aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2020-01-20 22:15:45 +0100
committerMike Pall <mike>2020-01-20 22:15:45 +0100
commit94d0b53004a5fa368defa4307a17edcdb87fe727 (patch)
tree2468fb7d60f39ccadcd696d333c83ef49f3dfc02
parentdfa692b746c9de067857d5fc992a41730be3d99a (diff)
downloadluajit-94d0b53004a5fa368defa4307a17edcdb87fe727.tar.gz
luajit-94d0b53004a5fa368defa4307a17edcdb87fe727.tar.bz2
luajit-94d0b53004a5fa368defa4307a17edcdb87fe727.zip
MIPS: Add MIPS64 R6 port.
Contributed by Hua Zhang, YunQiang Su from Wave Computing, and Radovan Birdic from RT-RK. Sponsored by Wave Computing.
-rw-r--r--dynasm/dasm_mips.h13
-rw-r--r--dynasm/dasm_mips.lua625
-rw-r--r--dynasm/dynasm.lua1
-rw-r--r--src/Makefile3
-rw-r--r--src/jit/bcsave.lua84
-rw-r--r--src/jit/dis_mips.lua293
-rw-r--r--src/jit/dis_mips64r6.lua17
-rw-r--r--src/jit/dis_mips64r6el.lua17
-rw-r--r--src/lj_arch.h29
-rw-r--r--src/lj_asm.c2
-rw-r--r--src/lj_asm_mips.h114
-rw-r--r--src/lj_emit_mips.h15
-rw-r--r--src/lj_jit.h8
-rw-r--r--src/lj_target_mips.h52
-rw-r--r--src/vm_mips64.dasc370
15 files changed, 1296 insertions, 347 deletions
diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h
index 71a835b2..7d06aa72 100644
--- a/dynasm/dasm_mips.h
+++ b/dynasm/dasm_mips.h
@@ -355,14 +355,15 @@ int dasm_encode(Dst_DECL, void *buffer)
355 CK(n >= 0, UNDEF_PC); 355 CK(n >= 0, UNDEF_PC);
356 n = *DASM_POS2PTR(D, n); 356 n = *DASM_POS2PTR(D, n);
357 if (ins & 2048) 357 if (ins & 2048)
358 n = n - (int)((char *)cp - base);
359 else
360 n = (n + (int)(size_t)base) & 0x0fffffff; 358 n = (n + (int)(size_t)base) & 0x0fffffff;
361 patchrel: 359 else
360 n = n - (int)((char *)cp - base);
361 patchrel: {
362 unsigned int e = 16 + ((ins >> 12) & 15);
362 CK((n & 3) == 0 && 363 CK((n & 3) == 0 &&
363 ((n + ((ins & 2048) ? 0x00020000 : 0)) >> 364 ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL);
364 ((ins & 2048) ? 18 : 28)) == 0, RANGE_REL); 365 cp[-1] |= ((n>>2) & ((1<<e)-1));
365 cp[-1] |= ((n>>2) & ((ins & 2048) ? 0x0000ffff: 0x03ffffff)); 366 }
366 break; 367 break;
367 case DASM_LABEL_LG: 368 case DASM_LABEL_LG:
368 ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); 369 ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua
index bd2a2b43..ccdc53cd 100644
--- a/dynasm/dasm_mips.lua
+++ b/dynasm/dasm_mips.lua
@@ -6,6 +6,7 @@
6------------------------------------------------------------------------------ 6------------------------------------------------------------------------------
7 7
8local mips64 = mips64 8local mips64 = mips64
9local mipsr6 = _map_def.MIPSR6
9 10
10-- Module information: 11-- Module information:
11local _info = { 12local _info = {
@@ -238,7 +239,6 @@ local map_op = {
238 bne_3 = "14000000STB", 239 bne_3 = "14000000STB",
239 blez_2 = "18000000SB", 240 blez_2 = "18000000SB",
240 bgtz_2 = "1c000000SB", 241 bgtz_2 = "1c000000SB",
241 addi_3 = "20000000TSI",
242 li_2 = "24000000TI", 242 li_2 = "24000000TI",
243 addiu_3 = "24000000TSI", 243 addiu_3 = "24000000TSI",
244 slti_3 = "28000000TSI", 244 slti_3 = "28000000TSI",
@@ -248,40 +248,22 @@ local map_op = {
248 ori_3 = "34000000TSU", 248 ori_3 = "34000000TSU",
249 xori_3 = "38000000TSU", 249 xori_3 = "38000000TSU",
250 lui_2 = "3c000000TU", 250 lui_2 = "3c000000TU",
251 beqzl_2 = "50000000SB",
252 beql_3 = "50000000STB",
253 bnezl_2 = "54000000SB",
254 bnel_3 = "54000000STB",
255 blezl_2 = "58000000SB",
256 bgtzl_2 = "5c000000SB",
257 daddi_3 = mips64 and "60000000TSI",
258 daddiu_3 = mips64 and "64000000TSI", 251 daddiu_3 = mips64 and "64000000TSI",
259 ldl_2 = mips64 and "68000000TO", 252 ldl_2 = mips64 and "68000000TO",
260 ldr_2 = mips64 and "6c000000TO", 253 ldr_2 = mips64 and "6c000000TO",
261 lb_2 = "80000000TO", 254 lb_2 = "80000000TO",
262 lh_2 = "84000000TO", 255 lh_2 = "84000000TO",
263 lwl_2 = "88000000TO",
264 lw_2 = "8c000000TO", 256 lw_2 = "8c000000TO",
265 lbu_2 = "90000000TO", 257 lbu_2 = "90000000TO",
266 lhu_2 = "94000000TO", 258 lhu_2 = "94000000TO",
267 lwr_2 = "98000000TO",
268 lwu_2 = mips64 and "9c000000TO", 259 lwu_2 = mips64 and "9c000000TO",
269 sb_2 = "a0000000TO", 260 sb_2 = "a0000000TO",
270 sh_2 = "a4000000TO", 261 sh_2 = "a4000000TO",
271 swl_2 = "a8000000TO",
272 sw_2 = "ac000000TO", 262 sw_2 = "ac000000TO",
273 sdl_2 = mips64 and "b0000000TO",
274 sdr_2 = mips64 and "b1000000TO",
275 swr_2 = "b8000000TO",
276 cache_2 = "bc000000NO",
277 ll_2 = "c0000000TO",
278 lwc1_2 = "c4000000HO", 263 lwc1_2 = "c4000000HO",
279 pref_2 = "cc000000NO",
280 ldc1_2 = "d4000000HO", 264 ldc1_2 = "d4000000HO",
281 ld_2 = mips64 and "dc000000TO", 265 ld_2 = mips64 and "dc000000TO",
282 sc_2 = "e0000000TO",
283 swc1_2 = "e4000000HO", 266 swc1_2 = "e4000000HO",
284 scd_2 = mips64 and "f0000000TO",
285 sdc1_2 = "f4000000HO", 267 sdc1_2 = "f4000000HO",
286 sd_2 = mips64 and "fc000000TO", 268 sd_2 = mips64 and "fc000000TO",
287 269
@@ -289,10 +271,6 @@ local map_op = {
289 nop_0 = "00000000", 271 nop_0 = "00000000",
290 sll_3 = "00000000DTA", 272 sll_3 = "00000000DTA",
291 sextw_2 = "00000000DT", 273 sextw_2 = "00000000DT",
292 movf_2 = "00000001DS",
293 movf_3 = "00000001DSC",
294 movt_2 = "00010001DS",
295 movt_3 = "00010001DSC",
296 srl_3 = "00000002DTA", 274 srl_3 = "00000002DTA",
297 rotr_3 = "00200002DTA", 275 rotr_3 = "00200002DTA",
298 sra_3 = "00000003DTA", 276 sra_3 = "00000003DTA",
@@ -301,31 +279,16 @@ local map_op = {
301 rotrv_3 = "00000046DTS", 279 rotrv_3 = "00000046DTS",
302 drotrv_3 = mips64 and "00000056DTS", 280 drotrv_3 = mips64 and "00000056DTS",
303 srav_3 = "00000007DTS", 281 srav_3 = "00000007DTS",
304 jr_1 = "00000008S",
305 jalr_1 = "0000f809S", 282 jalr_1 = "0000f809S",
306 jalr_2 = "00000009DS", 283 jalr_2 = "00000009DS",
307 movz_3 = "0000000aDST",
308 movn_3 = "0000000bDST",
309 syscall_0 = "0000000c", 284 syscall_0 = "0000000c",
310 syscall_1 = "0000000cY", 285 syscall_1 = "0000000cY",
311 break_0 = "0000000d", 286 break_0 = "0000000d",
312 break_1 = "0000000dY", 287 break_1 = "0000000dY",
313 sync_0 = "0000000f", 288 sync_0 = "0000000f",
314 mfhi_1 = "00000010D",
315 mthi_1 = "00000011S",
316 mflo_1 = "00000012D",
317 mtlo_1 = "00000013S",
318 dsllv_3 = mips64 and "00000014DTS", 289 dsllv_3 = mips64 and "00000014DTS",
319 dsrlv_3 = mips64 and "00000016DTS", 290 dsrlv_3 = mips64 and "00000016DTS",
320 dsrav_3 = mips64 and "00000017DTS", 291 dsrav_3 = mips64 and "00000017DTS",
321 mult_2 = "00000018ST",
322 multu_2 = "00000019ST",
323 div_2 = "0000001aST",
324 divu_2 = "0000001bST",
325 dmult_2 = mips64 and "0000001cST",
326 dmultu_2 = mips64 and "0000001dST",
327 ddiv_2 = mips64 and "0000001eST",
328 ddivu_2 = mips64 and "0000001fST",
329 add_3 = "00000020DST", 292 add_3 = "00000020DST",
330 move_2 = mips64 and "00000025DS" or "00000021DS", 293 move_2 = mips64 and "00000025DS" or "00000021DS",
331 addu_3 = "00000021DST", 294 addu_3 = "00000021DST",
@@ -369,32 +332,9 @@ local map_op = {
369 bgez_2 = "04010000SB", 332 bgez_2 = "04010000SB",
370 bltzl_2 = "04020000SB", 333 bltzl_2 = "04020000SB",
371 bgezl_2 = "04030000SB", 334 bgezl_2 = "04030000SB",
372 tgei_2 = "04080000SI",
373 tgeiu_2 = "04090000SI",
374 tlti_2 = "040a0000SI",
375 tltiu_2 = "040b0000SI",
376 teqi_2 = "040c0000SI",
377 tnei_2 = "040e0000SI",
378 bltzal_2 = "04100000SB",
379 bal_1 = "04110000B", 335 bal_1 = "04110000B",
380 bgezal_2 = "04110000SB",
381 bltzall_2 = "04120000SB",
382 bgezall_2 = "04130000SB",
383 synci_1 = "041f0000O", 336 synci_1 = "041f0000O",
384 337
385 -- Opcode SPECIAL2.
386 madd_2 = "70000000ST",
387 maddu_2 = "70000001ST",
388 mul_3 = "70000002DST",
389 msub_2 = "70000004ST",
390 msubu_2 = "70000005ST",
391 clz_2 = "70000020DS=",
392 clo_2 = "70000021DS=",
393 dclz_2 = mips64 and "70000024DS=",
394 dclo_2 = mips64 and "70000025DS=",
395 sdbbp_0 = "7000003f",
396 sdbbp_1 = "7000003fY",
397
398 -- Opcode SPECIAL3. 338 -- Opcode SPECIAL3.
399 ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 339 ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1
400 dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32 340 dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32
@@ -445,15 +385,6 @@ local map_op = {
445 ctc1_2 = "44c00000TG", 385 ctc1_2 = "44c00000TG",
446 mthc1_2 = "44e00000TG", 386 mthc1_2 = "44e00000TG",
447 387
448 bc1f_1 = "45000000B",
449 bc1f_2 = "45000000CB",
450 bc1t_1 = "45010000B",
451 bc1t_2 = "45010000CB",
452 bc1fl_1 = "45020000B",
453 bc1fl_2 = "45020000CB",
454 bc1tl_1 = "45030000B",
455 bc1tl_2 = "45030000CB",
456
457 ["add.s_3"] = "46000000FGH", 388 ["add.s_3"] = "46000000FGH",
458 ["sub.s_3"] = "46000001FGH", 389 ["sub.s_3"] = "46000001FGH",
459 ["mul.s_3"] = "46000002FGH", 390 ["mul.s_3"] = "46000002FGH",
@@ -470,51 +401,11 @@ local map_op = {
470 ["trunc.w.s_2"] = "4600000dFG", 401 ["trunc.w.s_2"] = "4600000dFG",
471 ["ceil.w.s_2"] = "4600000eFG", 402 ["ceil.w.s_2"] = "4600000eFG",
472 ["floor.w.s_2"] = "4600000fFG", 403 ["floor.w.s_2"] = "4600000fFG",
473 ["movf.s_2"] = "46000011FG",
474 ["movf.s_3"] = "46000011FGC",
475 ["movt.s_2"] = "46010011FG",
476 ["movt.s_3"] = "46010011FGC",
477 ["movz.s_3"] = "46000012FGT",
478 ["movn.s_3"] = "46000013FGT",
479 ["recip.s_2"] = "46000015FG", 404 ["recip.s_2"] = "46000015FG",
480 ["rsqrt.s_2"] = "46000016FG", 405 ["rsqrt.s_2"] = "46000016FG",
481 ["cvt.d.s_2"] = "46000021FG", 406 ["cvt.d.s_2"] = "46000021FG",
482 ["cvt.w.s_2"] = "46000024FG", 407 ["cvt.w.s_2"] = "46000024FG",
483 ["cvt.l.s_2"] = "46000025FG", 408 ["cvt.l.s_2"] = "46000025FG",
484 ["cvt.ps.s_3"] = "46000026FGH",
485 ["c.f.s_2"] = "46000030GH",
486 ["c.f.s_3"] = "46000030VGH",
487 ["c.un.s_2"] = "46000031GH",
488 ["c.un.s_3"] = "46000031VGH",
489 ["c.eq.s_2"] = "46000032GH",
490 ["c.eq.s_3"] = "46000032VGH",
491 ["c.ueq.s_2"] = "46000033GH",
492 ["c.ueq.s_3"] = "46000033VGH",
493 ["c.olt.s_2"] = "46000034GH",
494 ["c.olt.s_3"] = "46000034VGH",
495 ["c.ult.s_2"] = "46000035GH",
496 ["c.ult.s_3"] = "46000035VGH",
497 ["c.ole.s_2"] = "46000036GH",
498 ["c.ole.s_3"] = "46000036VGH",
499 ["c.ule.s_2"] = "46000037GH",
500 ["c.ule.s_3"] = "46000037VGH",
501 ["c.sf.s_2"] = "46000038GH",
502 ["c.sf.s_3"] = "46000038VGH",
503 ["c.ngle.s_2"] = "46000039GH",
504 ["c.ngle.s_3"] = "46000039VGH",
505 ["c.seq.s_2"] = "4600003aGH",
506 ["c.seq.s_3"] = "4600003aVGH",
507 ["c.ngl.s_2"] = "4600003bGH",
508 ["c.ngl.s_3"] = "4600003bVGH",
509 ["c.lt.s_2"] = "4600003cGH",
510 ["c.lt.s_3"] = "4600003cVGH",
511 ["c.nge.s_2"] = "4600003dGH",
512 ["c.nge.s_3"] = "4600003dVGH",
513 ["c.le.s_2"] = "4600003eGH",
514 ["c.le.s_3"] = "4600003eVGH",
515 ["c.ngt.s_2"] = "4600003fGH",
516 ["c.ngt.s_3"] = "4600003fVGH",
517
518 ["add.d_3"] = "46200000FGH", 409 ["add.d_3"] = "46200000FGH",
519 ["sub.d_3"] = "46200001FGH", 410 ["sub.d_3"] = "46200001FGH",
520 ["mul.d_3"] = "46200002FGH", 411 ["mul.d_3"] = "46200002FGH",
@@ -531,130 +422,410 @@ local map_op = {
531 ["trunc.w.d_2"] = "4620000dFG", 422 ["trunc.w.d_2"] = "4620000dFG",
532 ["ceil.w.d_2"] = "4620000eFG", 423 ["ceil.w.d_2"] = "4620000eFG",
533 ["floor.w.d_2"] = "4620000fFG", 424 ["floor.w.d_2"] = "4620000fFG",
534 ["movf.d_2"] = "46200011FG",
535 ["movf.d_3"] = "46200011FGC",
536 ["movt.d_2"] = "46210011FG",
537 ["movt.d_3"] = "46210011FGC",
538 ["movz.d_3"] = "46200012FGT",
539 ["movn.d_3"] = "46200013FGT",
540 ["recip.d_2"] = "46200015FG", 425 ["recip.d_2"] = "46200015FG",
541 ["rsqrt.d_2"] = "46200016FG", 426 ["rsqrt.d_2"] = "46200016FG",
542 ["cvt.s.d_2"] = "46200020FG", 427 ["cvt.s.d_2"] = "46200020FG",
543 ["cvt.w.d_2"] = "46200024FG", 428 ["cvt.w.d_2"] = "46200024FG",
544 ["cvt.l.d_2"] = "46200025FG", 429 ["cvt.l.d_2"] = "46200025FG",
545 ["c.f.d_2"] = "46200030GH",
546 ["c.f.d_3"] = "46200030VGH",
547 ["c.un.d_2"] = "46200031GH",
548 ["c.un.d_3"] = "46200031VGH",
549 ["c.eq.d_2"] = "46200032GH",
550 ["c.eq.d_3"] = "46200032VGH",
551 ["c.ueq.d_2"] = "46200033GH",
552 ["c.ueq.d_3"] = "46200033VGH",
553 ["c.olt.d_2"] = "46200034GH",
554 ["c.olt.d_3"] = "46200034VGH",
555 ["c.ult.d_2"] = "46200035GH",
556 ["c.ult.d_3"] = "46200035VGH",
557 ["c.ole.d_2"] = "46200036GH",
558 ["c.ole.d_3"] = "46200036VGH",
559 ["c.ule.d_2"] = "46200037GH",
560 ["c.ule.d_3"] = "46200037VGH",
561 ["c.sf.d_2"] = "46200038GH",
562 ["c.sf.d_3"] = "46200038VGH",
563 ["c.ngle.d_2"] = "46200039GH",
564 ["c.ngle.d_3"] = "46200039VGH",
565 ["c.seq.d_2"] = "4620003aGH",
566 ["c.seq.d_3"] = "4620003aVGH",
567 ["c.ngl.d_2"] = "4620003bGH",
568 ["c.ngl.d_3"] = "4620003bVGH",
569 ["c.lt.d_2"] = "4620003cGH",
570 ["c.lt.d_3"] = "4620003cVGH",
571 ["c.nge.d_2"] = "4620003dGH",
572 ["c.nge.d_3"] = "4620003dVGH",
573 ["c.le.d_2"] = "4620003eGH",
574 ["c.le.d_3"] = "4620003eVGH",
575 ["c.ngt.d_2"] = "4620003fGH",
576 ["c.ngt.d_3"] = "4620003fVGH",
577
578 ["add.ps_3"] = "46c00000FGH",
579 ["sub.ps_3"] = "46c00001FGH",
580 ["mul.ps_3"] = "46c00002FGH",
581 ["abs.ps_2"] = "46c00005FG",
582 ["mov.ps_2"] = "46c00006FG",
583 ["neg.ps_2"] = "46c00007FG",
584 ["movf.ps_2"] = "46c00011FG",
585 ["movf.ps_3"] = "46c00011FGC",
586 ["movt.ps_2"] = "46c10011FG",
587 ["movt.ps_3"] = "46c10011FGC",
588 ["movz.ps_3"] = "46c00012FGT",
589 ["movn.ps_3"] = "46c00013FGT",
590 ["cvt.s.pu_2"] = "46c00020FG",
591 ["cvt.s.pl_2"] = "46c00028FG",
592 ["pll.ps_3"] = "46c0002cFGH",
593 ["plu.ps_3"] = "46c0002dFGH",
594 ["pul.ps_3"] = "46c0002eFGH",
595 ["puu.ps_3"] = "46c0002fFGH",
596 ["c.f.ps_2"] = "46c00030GH",
597 ["c.f.ps_3"] = "46c00030VGH",
598 ["c.un.ps_2"] = "46c00031GH",
599 ["c.un.ps_3"] = "46c00031VGH",
600 ["c.eq.ps_2"] = "46c00032GH",
601 ["c.eq.ps_3"] = "46c00032VGH",
602 ["c.ueq.ps_2"] = "46c00033GH",
603 ["c.ueq.ps_3"] = "46c00033VGH",
604 ["c.olt.ps_2"] = "46c00034GH",
605 ["c.olt.ps_3"] = "46c00034VGH",
606 ["c.ult.ps_2"] = "46c00035GH",
607 ["c.ult.ps_3"] = "46c00035VGH",
608 ["c.ole.ps_2"] = "46c00036GH",
609 ["c.ole.ps_3"] = "46c00036VGH",
610 ["c.ule.ps_2"] = "46c00037GH",
611 ["c.ule.ps_3"] = "46c00037VGH",
612 ["c.sf.ps_2"] = "46c00038GH",
613 ["c.sf.ps_3"] = "46c00038VGH",
614 ["c.ngle.ps_2"] = "46c00039GH",
615 ["c.ngle.ps_3"] = "46c00039VGH",
616 ["c.seq.ps_2"] = "46c0003aGH",
617 ["c.seq.ps_3"] = "46c0003aVGH",
618 ["c.ngl.ps_2"] = "46c0003bGH",
619 ["c.ngl.ps_3"] = "46c0003bVGH",
620 ["c.lt.ps_2"] = "46c0003cGH",
621 ["c.lt.ps_3"] = "46c0003cVGH",
622 ["c.nge.ps_2"] = "46c0003dGH",
623 ["c.nge.ps_3"] = "46c0003dVGH",
624 ["c.le.ps_2"] = "46c0003eGH",
625 ["c.le.ps_3"] = "46c0003eVGH",
626 ["c.ngt.ps_2"] = "46c0003fGH",
627 ["c.ngt.ps_3"] = "46c0003fVGH",
628
629 ["cvt.s.w_2"] = "46800020FG", 430 ["cvt.s.w_2"] = "46800020FG",
630 ["cvt.d.w_2"] = "46800021FG", 431 ["cvt.d.w_2"] = "46800021FG",
631
632 ["cvt.s.l_2"] = "46a00020FG", 432 ["cvt.s.l_2"] = "46a00020FG",
633 ["cvt.d.l_2"] = "46a00021FG", 433 ["cvt.d.l_2"] = "46a00021FG",
634
635 -- Opcode COP1X.
636 lwxc1_2 = "4c000000FX",
637 ldxc1_2 = "4c000001FX",
638 luxc1_2 = "4c000005FX",
639 swxc1_2 = "4c000008FX",
640 sdxc1_2 = "4c000009FX",
641 suxc1_2 = "4c00000dFX",
642 prefx_2 = "4c00000fMX",
643 ["alnv.ps_4"] = "4c00001eFGHS",
644 ["madd.s_4"] = "4c000020FRGH",
645 ["madd.d_4"] = "4c000021FRGH",
646 ["madd.ps_4"] = "4c000026FRGH",
647 ["msub.s_4"] = "4c000028FRGH",
648 ["msub.d_4"] = "4c000029FRGH",
649 ["msub.ps_4"] = "4c00002eFRGH",
650 ["nmadd.s_4"] = "4c000030FRGH",
651 ["nmadd.d_4"] = "4c000031FRGH",
652 ["nmadd.ps_4"] = "4c000036FRGH",
653 ["nmsub.s_4"] = "4c000038FRGH",
654 ["nmsub.d_4"] = "4c000039FRGH",
655 ["nmsub.ps_4"] = "4c00003eFRGH",
656} 434}
657 435
436if mipsr6 then -- Instructions added with MIPSR6.
437
438 for k,v in pairs({
439
440 -- Add immediate to upper bits.
441 aui_3 = "3c000000TSI",
442 daui_3 = mips64 and "74000000TSI",
443 dahi_2 = mips64 and "04060000SI",
444 dati_2 = mips64 and "041e0000SI",
445
446 -- TODO: addiupc, auipc, aluipc, lwpc, lwupc, ldpc.
447
448 -- Compact branches.
449 blezalc_2 = "18000000TB", -- rt != 0.
450 bgezalc_2 = "18000000T=SB", -- rt != 0.
451 bgtzalc_2 = "1c000000TB", -- rt != 0.
452 bltzalc_2 = "1c000000T=SB", -- rt != 0.
453
454 blezc_2 = "58000000TB", -- rt != 0.
455 bgezc_2 = "58000000T=SB", -- rt != 0.
456 bgec_3 = "58000000STB", -- rs != rt.
457 blec_3 = "58000000TSB", -- rt != rs.
458
459 bgtzc_2 = "5c000000TB", -- rt != 0.
460 bltzc_2 = "5c000000T=SB", -- rt != 0.
461 bltc_3 = "5c000000STB", -- rs != rt.
462 bgtc_3 = "5c000000TSB", -- rt != rs.
463
464 bgeuc_3 = "18000000STB", -- rs != rt.
465 bleuc_3 = "18000000TSB", -- rt != rs.
466 bltuc_3 = "1c000000STB", -- rs != rt.
467 bgtuc_3 = "1c000000TSB", -- rt != rs.
468
469 beqzalc_2 = "20000000TB", -- rt != 0.
470 bnezalc_2 = "60000000TB", -- rt != 0.
471 beqc_3 = "20000000STB", -- rs < rt.
472 bnec_3 = "60000000STB", -- rs < rt.
473 bovc_3 = "20000000STB", -- rs >= rt.
474 bnvc_3 = "60000000STB", -- rs >= rt.
475
476 beqzc_2 = "d8000000SK", -- rs != 0.
477 bnezc_2 = "f8000000SK", -- rs != 0.
478 jic_2 = "d8000000TI",
479 jialc_2 = "f8000000TI",
480 bc_1 = "c8000000L",
481 balc_1 = "e8000000L",
482
483 -- Opcode SPECIAL.
484 jr_1 = "00000009S",
485 sdbbp_0 = "0000000e",
486 sdbbp_1 = "0000000eY",
487 lsa_4 = "00000005DSTA",
488 dlsa_4 = mips64 and "00000015DSTA",
489 seleqz_3 = "00000035DST",
490 selnez_3 = "00000037DST",
491 clz_2 = "00000050DS",
492 clo_2 = "00000051DS",
493 dclz_2 = mips64 and "00000052DS",
494 dclo_2 = mips64 and "00000053DS",
495 mul_3 = "00000098DST",
496 muh_3 = "000000d8DST",
497 mulu_3 = "00000099DST",
498 muhu_3 = "000000d9DST",
499 div_3 = "0000009aDST",
500 mod_3 = "000000daDST",
501 divu_3 = "0000009bDST",
502 modu_3 = "000000dbDST",
503 dmul_3 = mips64 and "0000009cDST",
504 dmuh_3 = mips64 and "000000dcDST",
505 dmulu_3 = mips64 and "0000009dDST",
506 dmuhu_3 = mips64 and "000000ddDST",
507 ddiv_3 = mips64 and "0000009eDST",
508 dmod_3 = mips64 and "000000deDST",
509 ddivu_3 = mips64 and "0000009fDST",
510 dmodu_3 = mips64 and "000000dfDST",
511
512 -- Opcode SPECIAL3.
513 align_4 = "7c000220DSTA",
514 dalign_4 = mips64 and "7c000224DSTA",
515 bitswap_2 = "7c000020DT",
516 dbitswap_2 = mips64 and "7c000024DT",
517
518 -- Opcode COP1.
519 bc1eqz_2 = "45200000HB",
520 bc1nez_2 = "45a00000HB",
521
522 ["sel.s_3"] = "46000010FGH",
523 ["seleqz.s_3"] = "46000014FGH",
524 ["selnez.s_3"] = "46000017FGH",
525 ["maddf.s_3"] = "46000018FGH",
526 ["msubf.s_3"] = "46000019FGH",
527 ["rint.s_2"] = "4600001aFG",
528 ["class.s_2"] = "4600001bFG",
529 ["min.s_3"] = "4600001cFGH",
530 ["mina.s_3"] = "4600001dFGH",
531 ["max.s_3"] = "4600001eFGH",
532 ["maxa.s_3"] = "4600001fFGH",
533 ["cmp.af.s_3"] = "46800000FGH",
534 ["cmp.un.s_3"] = "46800001FGH",
535 ["cmp.or.s_3"] = "46800011FGH",
536 ["cmp.eq.s_3"] = "46800002FGH",
537 ["cmp.une.s_3"] = "46800012FGH",
538 ["cmp.ueq.s_3"] = "46800003FGH",
539 ["cmp.ne.s_3"] = "46800013FGH",
540 ["cmp.lt.s_3"] = "46800004FGH",
541 ["cmp.ult.s_3"] = "46800005FGH",
542 ["cmp.le.s_3"] = "46800006FGH",
543 ["cmp.ule.s_3"] = "46800007FGH",
544 ["cmp.saf.s_3"] = "46800008FGH",
545 ["cmp.sun.s_3"] = "46800009FGH",
546 ["cmp.sor.s_3"] = "46800019FGH",
547 ["cmp.seq.s_3"] = "4680000aFGH",
548 ["cmp.sune.s_3"] = "4680001aFGH",
549 ["cmp.sueq.s_3"] = "4680000bFGH",
550 ["cmp.sne.s_3"] = "4680001bFGH",
551 ["cmp.slt.s_3"] = "4680000cFGH",
552 ["cmp.sult.s_3"] = "4680000dFGH",
553 ["cmp.sle.s_3"] = "4680000eFGH",
554 ["cmp.sule.s_3"] = "4680000fFGH",
555
556 ["sel.d_3"] = "46200010FGH",
557 ["seleqz.d_3"] = "46200014FGH",
558 ["selnez.d_3"] = "46200017FGH",
559 ["maddf.d_3"] = "46200018FGH",
560 ["msubf.d_3"] = "46200019FGH",
561 ["rint.d_2"] = "4620001aFG",
562 ["class.d_2"] = "4620001bFG",
563 ["min.d_3"] = "4620001cFGH",
564 ["mina.d_3"] = "4620001dFGH",
565 ["max.d_3"] = "4620001eFGH",
566 ["maxa.d_3"] = "4620001fFGH",
567 ["cmp.af.d_3"] = "46a00000FGH",
568 ["cmp.un.d_3"] = "46a00001FGH",
569 ["cmp.or.d_3"] = "46a00011FGH",
570 ["cmp.eq.d_3"] = "46a00002FGH",
571 ["cmp.une.d_3"] = "46a00012FGH",
572 ["cmp.ueq.d_3"] = "46a00003FGH",
573 ["cmp.ne.d_3"] = "46a00013FGH",
574 ["cmp.lt.d_3"] = "46a00004FGH",
575 ["cmp.ult.d_3"] = "46a00005FGH",
576 ["cmp.le.d_3"] = "46a00006FGH",
577 ["cmp.ule.d_3"] = "46a00007FGH",
578 ["cmp.saf.d_3"] = "46a00008FGH",
579 ["cmp.sun.d_3"] = "46a00009FGH",
580 ["cmp.sor.d_3"] = "46a00019FGH",
581 ["cmp.seq.d_3"] = "46a0000aFGH",
582 ["cmp.sune.d_3"] = "46a0001aFGH",
583 ["cmp.sueq.d_3"] = "46a0000bFGH",
584 ["cmp.sne.d_3"] = "46a0001bFGH",
585 ["cmp.slt.d_3"] = "46a0000cFGH",
586 ["cmp.sult.d_3"] = "46a0000dFGH",
587 ["cmp.sle.d_3"] = "46a0000eFGH",
588 ["cmp.sule.d_3"] = "46a0000fFGH",
589
590 }) do map_op[k] = v end
591
592else -- Instructions removed by MIPSR6.
593
594 for k,v in pairs({
595 -- Traps, don't use.
596 addi_3 = "20000000TSI",
597 daddi_3 = mips64 and "60000000TSI",
598
599 -- Branch on likely, don't use.
600 beqzl_2 = "50000000SB",
601 beql_3 = "50000000STB",
602 bnezl_2 = "54000000SB",
603 bnel_3 = "54000000STB",
604 blezl_2 = "58000000SB",
605 bgtzl_2 = "5c000000SB",
606
607 lwl_2 = "88000000TO",
608 lwr_2 = "98000000TO",
609 swl_2 = "a8000000TO",
610 sdl_2 = mips64 and "b0000000TO",
611 sdr_2 = mips64 and "b1000000TO",
612 swr_2 = "b8000000TO",
613 cache_2 = "bc000000NO",
614 ll_2 = "c0000000TO",
615 pref_2 = "cc000000NO",
616 sc_2 = "e0000000TO",
617 scd_2 = mips64 and "f0000000TO",
618
619 -- Opcode SPECIAL.
620 movf_2 = "00000001DS",
621 movf_3 = "00000001DSC",
622 movt_2 = "00010001DS",
623 movt_3 = "00010001DSC",
624 jr_1 = "00000008S",
625 movz_3 = "0000000aDST",
626 movn_3 = "0000000bDST",
627 mfhi_1 = "00000010D",
628 mthi_1 = "00000011S",
629 mflo_1 = "00000012D",
630 mtlo_1 = "00000013S",
631 mult_2 = "00000018ST",
632 multu_2 = "00000019ST",
633 div_3 = "0000001aST",
634 divu_3 = "0000001bST",
635 ddiv_3 = mips64 and "0000001eST",
636 ddivu_3 = mips64 and "0000001fST",
637 dmult_2 = mips64 and "0000001cST",
638 dmultu_2 = mips64 and "0000001dST",
639
640 -- Opcode REGIMM.
641 tgei_2 = "04080000SI",
642 tgeiu_2 = "04090000SI",
643 tlti_2 = "040a0000SI",
644 tltiu_2 = "040b0000SI",
645 teqi_2 = "040c0000SI",
646 tnei_2 = "040e0000SI",
647 bltzal_2 = "04100000SB",
648 bgezal_2 = "04110000SB",
649 bltzall_2 = "04120000SB",
650 bgezall_2 = "04130000SB",
651
652 -- Opcode SPECIAL2.
653 madd_2 = "70000000ST",
654 maddu_2 = "70000001ST",
655 mul_3 = "70000002DST",
656 msub_2 = "70000004ST",
657 msubu_2 = "70000005ST",
658 clz_2 = "70000020D=TS",
659 clo_2 = "70000021D=TS",
660 dclz_2 = mips64 and "70000024D=TS",
661 dclo_2 = mips64 and "70000025D=TS",
662 sdbbp_0 = "7000003f",
663 sdbbp_1 = "7000003fY",
664
665 -- Opcode COP1.
666 bc1f_1 = "45000000B",
667 bc1f_2 = "45000000CB",
668 bc1t_1 = "45010000B",
669 bc1t_2 = "45010000CB",
670 bc1fl_1 = "45020000B",
671 bc1fl_2 = "45020000CB",
672 bc1tl_1 = "45030000B",
673 bc1tl_2 = "45030000CB",
674
675 ["movf.s_2"] = "46000011FG",
676 ["movf.s_3"] = "46000011FGC",
677 ["movt.s_2"] = "46010011FG",
678 ["movt.s_3"] = "46010011FGC",
679 ["movz.s_3"] = "46000012FGT",
680 ["movn.s_3"] = "46000013FGT",
681 ["cvt.ps.s_3"] = "46000026FGH",
682 ["c.f.s_2"] = "46000030GH",
683 ["c.f.s_3"] = "46000030VGH",
684 ["c.un.s_2"] = "46000031GH",
685 ["c.un.s_3"] = "46000031VGH",
686 ["c.eq.s_2"] = "46000032GH",
687 ["c.eq.s_3"] = "46000032VGH",
688 ["c.ueq.s_2"] = "46000033GH",
689 ["c.ueq.s_3"] = "46000033VGH",
690 ["c.olt.s_2"] = "46000034GH",
691 ["c.olt.s_3"] = "46000034VGH",
692 ["c.ult.s_2"] = "46000035GH",
693 ["c.ult.s_3"] = "46000035VGH",
694 ["c.ole.s_2"] = "46000036GH",
695 ["c.ole.s_3"] = "46000036VGH",
696 ["c.ule.s_2"] = "46000037GH",
697 ["c.ule.s_3"] = "46000037VGH",
698 ["c.sf.s_2"] = "46000038GH",
699 ["c.sf.s_3"] = "46000038VGH",
700 ["c.ngle.s_2"] = "46000039GH",
701 ["c.ngle.s_3"] = "46000039VGH",
702 ["c.seq.s_2"] = "4600003aGH",
703 ["c.seq.s_3"] = "4600003aVGH",
704 ["c.ngl.s_2"] = "4600003bGH",
705 ["c.ngl.s_3"] = "4600003bVGH",
706 ["c.lt.s_2"] = "4600003cGH",
707 ["c.lt.s_3"] = "4600003cVGH",
708 ["c.nge.s_2"] = "4600003dGH",
709 ["c.nge.s_3"] = "4600003dVGH",
710 ["c.le.s_2"] = "4600003eGH",
711 ["c.le.s_3"] = "4600003eVGH",
712 ["c.ngt.s_2"] = "4600003fGH",
713 ["c.ngt.s_3"] = "4600003fVGH",
714 ["movf.d_2"] = "46200011FG",
715 ["movf.d_3"] = "46200011FGC",
716 ["movt.d_2"] = "46210011FG",
717 ["movt.d_3"] = "46210011FGC",
718 ["movz.d_3"] = "46200012FGT",
719 ["movn.d_3"] = "46200013FGT",
720 ["c.f.d_2"] = "46200030GH",
721 ["c.f.d_3"] = "46200030VGH",
722 ["c.un.d_2"] = "46200031GH",
723 ["c.un.d_3"] = "46200031VGH",
724 ["c.eq.d_2"] = "46200032GH",
725 ["c.eq.d_3"] = "46200032VGH",
726 ["c.ueq.d_2"] = "46200033GH",
727 ["c.ueq.d_3"] = "46200033VGH",
728 ["c.olt.d_2"] = "46200034GH",
729 ["c.olt.d_3"] = "46200034VGH",
730 ["c.ult.d_2"] = "46200035GH",
731 ["c.ult.d_3"] = "46200035VGH",
732 ["c.ole.d_2"] = "46200036GH",
733 ["c.ole.d_3"] = "46200036VGH",
734 ["c.ule.d_2"] = "46200037GH",
735 ["c.ule.d_3"] = "46200037VGH",
736 ["c.sf.d_2"] = "46200038GH",
737 ["c.sf.d_3"] = "46200038VGH",
738 ["c.ngle.d_2"] = "46200039GH",
739 ["c.ngle.d_3"] = "46200039VGH",
740 ["c.seq.d_2"] = "4620003aGH",
741 ["c.seq.d_3"] = "4620003aVGH",
742 ["c.ngl.d_2"] = "4620003bGH",
743 ["c.ngl.d_3"] = "4620003bVGH",
744 ["c.lt.d_2"] = "4620003cGH",
745 ["c.lt.d_3"] = "4620003cVGH",
746 ["c.nge.d_2"] = "4620003dGH",
747 ["c.nge.d_3"] = "4620003dVGH",
748 ["c.le.d_2"] = "4620003eGH",
749 ["c.le.d_3"] = "4620003eVGH",
750 ["c.ngt.d_2"] = "4620003fGH",
751 ["c.ngt.d_3"] = "4620003fVGH",
752 ["add.ps_3"] = "46c00000FGH",
753 ["sub.ps_3"] = "46c00001FGH",
754 ["mul.ps_3"] = "46c00002FGH",
755 ["abs.ps_2"] = "46c00005FG",
756 ["mov.ps_2"] = "46c00006FG",
757 ["neg.ps_2"] = "46c00007FG",
758 ["movf.ps_2"] = "46c00011FG",
759 ["movf.ps_3"] = "46c00011FGC",
760 ["movt.ps_2"] = "46c10011FG",
761 ["movt.ps_3"] = "46c10011FGC",
762 ["movz.ps_3"] = "46c00012FGT",
763 ["movn.ps_3"] = "46c00013FGT",
764 ["cvt.s.pu_2"] = "46c00020FG",
765 ["cvt.s.pl_2"] = "46c00028FG",
766 ["pll.ps_3"] = "46c0002cFGH",
767 ["plu.ps_3"] = "46c0002dFGH",
768 ["pul.ps_3"] = "46c0002eFGH",
769 ["puu.ps_3"] = "46c0002fFGH",
770 ["c.f.ps_2"] = "46c00030GH",
771 ["c.f.ps_3"] = "46c00030VGH",
772 ["c.un.ps_2"] = "46c00031GH",
773 ["c.un.ps_3"] = "46c00031VGH",
774 ["c.eq.ps_2"] = "46c00032GH",
775 ["c.eq.ps_3"] = "46c00032VGH",
776 ["c.ueq.ps_2"] = "46c00033GH",
777 ["c.ueq.ps_3"] = "46c00033VGH",
778 ["c.olt.ps_2"] = "46c00034GH",
779 ["c.olt.ps_3"] = "46c00034VGH",
780 ["c.ult.ps_2"] = "46c00035GH",
781 ["c.ult.ps_3"] = "46c00035VGH",
782 ["c.ole.ps_2"] = "46c00036GH",
783 ["c.ole.ps_3"] = "46c00036VGH",
784 ["c.ule.ps_2"] = "46c00037GH",
785 ["c.ule.ps_3"] = "46c00037VGH",
786 ["c.sf.ps_2"] = "46c00038GH",
787 ["c.sf.ps_3"] = "46c00038VGH",
788 ["c.ngle.ps_2"] = "46c00039GH",
789 ["c.ngle.ps_3"] = "46c00039VGH",
790 ["c.seq.ps_2"] = "46c0003aGH",
791 ["c.seq.ps_3"] = "46c0003aVGH",
792 ["c.ngl.ps_2"] = "46c0003bGH",
793 ["c.ngl.ps_3"] = "46c0003bVGH",
794 ["c.lt.ps_2"] = "46c0003cGH",
795 ["c.lt.ps_3"] = "46c0003cVGH",
796 ["c.nge.ps_2"] = "46c0003dGH",
797 ["c.nge.ps_3"] = "46c0003dVGH",
798 ["c.le.ps_2"] = "46c0003eGH",
799 ["c.le.ps_3"] = "46c0003eVGH",
800 ["c.ngt.ps_2"] = "46c0003fGH",
801 ["c.ngt.ps_3"] = "46c0003fVGH",
802
803 -- Opcode COP1X.
804 lwxc1_2 = "4c000000FX",
805 ldxc1_2 = "4c000001FX",
806 luxc1_2 = "4c000005FX",
807 swxc1_2 = "4c000008FX",
808 sdxc1_2 = "4c000009FX",
809 suxc1_2 = "4c00000dFX",
810 prefx_2 = "4c00000fMX",
811 ["alnv.ps_4"] = "4c00001eFGHS",
812 ["madd.s_4"] = "4c000020FRGH",
813 ["madd.d_4"] = "4c000021FRGH",
814 ["madd.ps_4"] = "4c000026FRGH",
815 ["msub.s_4"] = "4c000028FRGH",
816 ["msub.d_4"] = "4c000029FRGH",
817 ["msub.ps_4"] = "4c00002eFRGH",
818 ["nmadd.s_4"] = "4c000030FRGH",
819 ["nmadd.d_4"] = "4c000031FRGH",
820 ["nmadd.ps_4"] = "4c000036FRGH",
821 ["nmsub.s_4"] = "4c000038FRGH",
822 ["nmsub.d_4"] = "4c000039FRGH",
823 ["nmsub.ps_4"] = "4c00003eFRGH",
824
825 }) do map_op[k] = v end
826
827end
828
658------------------------------------------------------------------------------ 829------------------------------------------------------------------------------
659 830
660local function parse_gpr(expr) 831local function parse_gpr(expr)
@@ -808,9 +979,11 @@ map_op[".template__"] = function(params, template, nparams)
808 op = op + parse_disp(params[n]); n = n + 1 979 op = op + parse_disp(params[n]); n = n + 1
809 elseif p == "X" then 980 elseif p == "X" then
810 op = op + parse_index(params[n]); n = n + 1 981 op = op + parse_index(params[n]); n = n + 1
811 elseif p == "B" or p == "J" then 982 elseif p == "B" or p == "J" or p == "K" or p == "L" then
812 local mode, m, s = parse_label(params[n], false) 983 local mode, m, s = parse_label(params[n], false)
813 if p == "B" then m = m + 2048 end 984 if p == "J" then m = m + 0xa800
985 elseif p == "K" then m = m + 0x5000
986 elseif p == "L" then m = m + 0xa000 end
814 waction("REL_"..mode, m, s, 1) 987 waction("REL_"..mode, m, s, 1)
815 n = n + 1 988 n = n + 1
816 elseif p == "A" then 989 elseif p == "A" then
@@ -833,7 +1006,7 @@ map_op[".template__"] = function(params, template, nparams)
833 elseif p == "Z" then 1006 elseif p == "Z" then
834 op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1 1007 op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1
835 elseif p == "=" then 1008 elseif p == "=" then
836 op = op + shl(band(op, 0xf800), 5) -- Copy D to T for clz, clo. 1009 n = n - 1 -- Re-use previous parameter for next template char.
837 else 1010 else
838 assert(false) 1011 assert(false)
839 end 1012 end
diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua
index 5ec21a79..46ebfca8 100644
--- a/dynasm/dynasm.lua
+++ b/dynasm/dynasm.lua
@@ -630,6 +630,7 @@ end
630-- Load architecture-specific module. 630-- Load architecture-specific module.
631local function loadarch(arch) 631local function loadarch(arch)
632 if not match(arch, "^[%w_]+$") then return "bad arch name" end 632 if not match(arch, "^[%w_]+$") then return "bad arch name" end
633 _G._map_def = map_def
633 local ok, m_arch = pcall(require, "dasm_"..arch) 634 local ok, m_arch = pcall(require, "dasm_"..arch)
634 if not ok then return "cannot load module: "..m_arch end 635 if not ok then return "cannot load module: "..m_arch end
635 g_arch = m_arch 636 g_arch = m_arch
diff --git a/src/Makefile b/src/Makefile
index ad80642b..386f279f 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -445,6 +445,9 @@ ifeq (arm,$(TARGET_LJARCH))
445 DASM_AFLAGS+= -D IOS 445 DASM_AFLAGS+= -D IOS
446 endif 446 endif
447else 447else
448ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH)))
449 DASM_AFLAGS+= -D MIPSR6
450endif
448ifeq (ppc,$(TARGET_LJARCH)) 451ifeq (ppc,$(TARGET_LJARCH))
449 ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) 452 ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH)))
450 DASM_AFLAGS+= -D SQRT 453 DASM_AFLAGS+= -D SQRT
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index 2553d97e..41081184 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -17,6 +17,10 @@ local bit = require("bit")
17-- Symbol name prefix for LuaJIT bytecode. 17-- Symbol name prefix for LuaJIT bytecode.
18local LJBC_PREFIX = "luaJIT_BC_" 18local LJBC_PREFIX = "luaJIT_BC_"
19 19
20local type, assert = type, assert
21local format = string.format
22local tremove, tconcat = table.remove, table.concat
23
20------------------------------------------------------------------------------ 24------------------------------------------------------------------------------
21 25
22local function usage() 26local function usage()
@@ -63,8 +67,18 @@ local map_type = {
63} 67}
64 68
65local map_arch = { 69local map_arch = {
66 x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true, 70 x86 = { e = "le", b = 32, m = 3, p = 0x14c, },
67 ppc = true, mips = true, mipsel = true, 71 x64 = { e = "le", b = 64, m = 62, p = 0x8664, },
72 arm = { e = "le", b = 32, m = 40, p = 0x1c0, },
73 arm64 = { e = "le", b = 64, m = 183, p = 0xaa64, },
74 arm64be = { e = "be", b = 64, m = 183, },
75 ppc = { e = "be", b = 32, m = 20, },
76 mips = { e = "be", b = 32, m = 8, f = 0x50001006, },
77 mipsel = { e = "le", b = 32, m = 8, f = 0x50001006, },
78 mips64 = { e = "be", b = 64, m = 8, f = 0x80000007, },
79 mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, },
80 mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, },
81 mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, },
68} 82}
69 83
70local map_os = { 84local map_os = {
@@ -73,33 +87,33 @@ local map_os = {
73} 87}
74 88
75local function checkarg(str, map, err) 89local function checkarg(str, map, err)
76 str = string.lower(str) 90 str = str:lower()
77 local s = check(map[str], "unknown ", err) 91 local s = check(map[str], "unknown ", err)
78 return s == true and str or s 92 return type(s) == "string" and s or str
79end 93end
80 94
81local function detecttype(str) 95local function detecttype(str)
82 local ext = string.match(string.lower(str), "%.(%a+)$") 96 local ext = str:lower():match("%.(%a+)$")
83 return map_type[ext] or "raw" 97 return map_type[ext] or "raw"
84end 98end
85 99
86local function checkmodname(str) 100local function checkmodname(str)
87 check(string.match(str, "^[%w_.%-]+$"), "bad module name") 101 check(str:match("^[%w_.%-]+$"), "bad module name")
88 return string.gsub(str, "[%.%-]", "_") 102 return str:gsub("[%.%-]", "_")
89end 103end
90 104
91local function detectmodname(str) 105local function detectmodname(str)
92 if type(str) == "string" then 106 if type(str) == "string" then
93 local tail = string.match(str, "[^/\\]+$") 107 local tail = str:match("[^/\\]+$")
94 if tail then str = tail end 108 if tail then str = tail end
95 local head = string.match(str, "^(.*)%.[^.]*$") 109 local head = str:match("^(.*)%.[^.]*$")
96 if head then str = head end 110 if head then str = head end
97 str = string.match(str, "^[%w_.%-]+") 111 str = str:match("^[%w_.%-]+")
98 else 112 else
99 str = nil 113 str = nil
100 end 114 end
101 check(str, "cannot derive module name, use -n name") 115 check(str, "cannot derive module name, use -n name")
102 return string.gsub(str, "[%.%-]", "_") 116 return str:gsub("[%.%-]", "_")
103end 117end
104 118
105------------------------------------------------------------------------------ 119------------------------------------------------------------------------------
@@ -118,7 +132,7 @@ end
118local function bcsave_c(ctx, output, s) 132local function bcsave_c(ctx, output, s)
119 local fp = savefile(output, "w") 133 local fp = savefile(output, "w")
120 if ctx.type == "c" then 134 if ctx.type == "c" then
121 fp:write(string.format([[ 135 fp:write(format([[
122#ifdef _cplusplus 136#ifdef _cplusplus
123extern "C" 137extern "C"
124#endif 138#endif
@@ -128,7 +142,7 @@ __declspec(dllexport)
128const unsigned char %s%s[] = { 142const unsigned char %s%s[] = {
129]], LJBC_PREFIX, ctx.modname)) 143]], LJBC_PREFIX, ctx.modname))
130 else 144 else
131 fp:write(string.format([[ 145 fp:write(format([[
132#define %s%s_SIZE %d 146#define %s%s_SIZE %d
133static const unsigned char %s%s[] = { 147static const unsigned char %s%s[] = {
134]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) 148]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
@@ -138,13 +152,13 @@ static const unsigned char %s%s[] = {
138 local b = tostring(string.byte(s, i)) 152 local b = tostring(string.byte(s, i))
139 m = m + #b + 1 153 m = m + #b + 1
140 if m > 78 then 154 if m > 78 then
141 fp:write(table.concat(t, ",", 1, n), ",\n") 155 fp:write(tconcat(t, ",", 1, n), ",\n")
142 n, m = 0, #b + 1 156 n, m = 0, #b + 1
143 end 157 end
144 n = n + 1 158 n = n + 1
145 t[n] = b 159 t[n] = b
146 end 160 end
147 bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n") 161 bcsave_tail(fp, output, tconcat(t, ",", 1, n).."\n};\n")
148end 162end
149 163
150local function bcsave_elfobj(ctx, output, s, ffi) 164local function bcsave_elfobj(ctx, output, s, ffi)
@@ -199,12 +213,8 @@ typedef struct {
199} ELF64obj; 213} ELF64obj;
200]] 214]]
201 local symname = LJBC_PREFIX..ctx.modname 215 local symname = LJBC_PREFIX..ctx.modname
202 local is64, isbe = false, false 216 local ai = assert(map_arch[ctx.arch])
203 if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" then 217 local is64, isbe = ai.b == 64, ai.e == "be"
204 is64 = true
205 elseif ctx.arch == "ppc" or ctx.arch == "mips" then
206 isbe = true
207 end
208 218
209 -- Handle different host/target endianess. 219 -- Handle different host/target endianess.
210 local function f32(x) return x end 220 local function f32(x) return x end
@@ -237,10 +247,8 @@ typedef struct {
237 hdr.eendian = isbe and 2 or 1 247 hdr.eendian = isbe and 2 or 1
238 hdr.eversion = 1 248 hdr.eversion = 1
239 hdr.type = f16(1) 249 hdr.type = f16(1)
240 hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) 250 hdr.machine = f16(ai.m)
241 if ctx.arch == "mips" or ctx.arch == "mipsel" then 251 hdr.flags = f32(ai.f or 0)
242 hdr.flags = f32(0x50001006)
243 end
244 hdr.version = f32(1) 252 hdr.version = f32(1)
245 hdr.shofs = fofs(ffi.offsetof(o, "sect")) 253 hdr.shofs = fofs(ffi.offsetof(o, "sect"))
246 hdr.ehsize = f16(ffi.sizeof(hdr)) 254 hdr.ehsize = f16(ffi.sizeof(hdr))
@@ -336,12 +344,8 @@ typedef struct {
336} PEobj; 344} PEobj;
337]] 345]]
338 local symname = LJBC_PREFIX..ctx.modname 346 local symname = LJBC_PREFIX..ctx.modname
339 local is64 = false 347 local ai = assert(map_arch[ctx.arch])
340 if ctx.arch == "x86" then 348 local is64 = ai.b == 64
341 symname = "_"..symname
342 elseif ctx.arch == "x64" then
343 is64 = true
344 end
345 local symexport = " /EXPORT:"..symname..",DATA " 349 local symexport = " /EXPORT:"..symname..",DATA "
346 350
347 -- The file format is always little-endian. Swap if the host is big-endian. 351 -- The file format is always little-endian. Swap if the host is big-endian.
@@ -355,7 +359,7 @@ typedef struct {
355 -- Create PE object and fill in header. 359 -- Create PE object and fill in header.
356 local o = ffi.new("PEobj") 360 local o = ffi.new("PEobj")
357 local hdr = o.hdr 361 local hdr = o.hdr
358 hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch]) 362 hdr.arch = f16(assert(ai.p))
359 hdr.nsects = f16(2) 363 hdr.nsects = f16(2)
360 hdr.symtabofs = f32(ffi.offsetof(o, "sym0")) 364 hdr.symtabofs = f32(ffi.offsetof(o, "sym0"))
361 hdr.nsyms = f32(6) 365 hdr.nsyms = f32(6)
@@ -605,16 +609,16 @@ local function docmd(...)
605 local n = 1 609 local n = 1
606 local list = false 610 local list = false
607 local ctx = { 611 local ctx = {
608 strip = true, arch = jit.arch, os = string.lower(jit.os), 612 strip = true, arch = jit.arch, os = jit.os:lower(),
609 type = false, modname = false, 613 type = false, modname = false,
610 } 614 }
611 while n <= #arg do 615 while n <= #arg do
612 local a = arg[n] 616 local a = arg[n]
613 if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~= "-" then 617 if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then
614 table.remove(arg, n) 618 tremove(arg, n)
615 if a == "--" then break end 619 if a == "--" then break end
616 for m=2,#a do 620 for m=2,#a do
617 local opt = string.sub(a, m, m) 621 local opt = a:sub(m, m)
618 if opt == "l" then 622 if opt == "l" then
619 list = true 623 list = true
620 elseif opt == "s" then 624 elseif opt == "s" then
@@ -627,13 +631,13 @@ local function docmd(...)
627 if n ~= 1 then usage() end 631 if n ~= 1 then usage() end
628 arg[1] = check(loadstring(arg[1])) 632 arg[1] = check(loadstring(arg[1]))
629 elseif opt == "n" then 633 elseif opt == "n" then
630 ctx.modname = checkmodname(table.remove(arg, n)) 634 ctx.modname = checkmodname(tremove(arg, n))
631 elseif opt == "t" then 635 elseif opt == "t" then
632 ctx.type = checkarg(table.remove(arg, n), map_type, "file type") 636 ctx.type = checkarg(tremove(arg, n), map_type, "file type")
633 elseif opt == "a" then 637 elseif opt == "a" then
634 ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture") 638 ctx.arch = checkarg(tremove(arg, n), map_arch, "architecture")
635 elseif opt == "o" then 639 elseif opt == "o" then
636 ctx.os = checkarg(table.remove(arg, n), map_os, "OS name") 640 ctx.os = checkarg(tremove(arg, n), map_os, "OS name")
637 else 641 else
638 usage() 642 usage()
639 end 643 end
diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua
index a12b8e62..c003b984 100644
--- a/src/jit/dis_mips.lua
+++ b/src/jit/dis_mips.lua
@@ -19,13 +19,34 @@ local band, bor, tohex = bit.band, bit.bor, bit.tohex
19local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift 19local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
20 20
21------------------------------------------------------------------------------ 21------------------------------------------------------------------------------
22-- Primary and extended opcode maps 22-- Extended opcode maps common to all MIPS releases
23------------------------------------------------------------------------------ 23------------------------------------------------------------------------------
24 24
25local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", }
26local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", } 25local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", }
27local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", } 26local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", }
28 27
28local map_cop0 = {
29 shift = 25, mask = 1,
30 [0] = {
31 shift = 21, mask = 15,
32 [0] = "mfc0TDW", [4] = "mtc0TDW",
33 [10] = "rdpgprDT",
34 [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
35 [14] = "wrpgprDT",
36 }, {
37 shift = 0, mask = 63,
38 [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp",
39 [24] = "eret", [31] = "deret",
40 [32] = "wait",
41 },
42}
43
44------------------------------------------------------------------------------
45-- Primary and extended opcode maps for MIPS R1-R5
46------------------------------------------------------------------------------
47
48local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", }
49
29local map_special = { 50local map_special = {
30 shift = 0, mask = 63, 51 shift = 0, mask = 63,
31 [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, 52 [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
@@ -87,22 +108,6 @@ local map_regimm = {
87 false, false, false, "synciSO", 108 false, false, false, "synciSO",
88} 109}
89 110
90local map_cop0 = {
91 shift = 25, mask = 1,
92 [0] = {
93 shift = 21, mask = 15,
94 [0] = "mfc0TDW", [4] = "mtc0TDW",
95 [10] = "rdpgprDT",
96 [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
97 [14] = "wrpgprDT",
98 }, {
99 shift = 0, mask = 63,
100 [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp",
101 [24] = "eret", [31] = "deret",
102 [32] = "wait",
103 },
104}
105
106local map_cop1s = { 111local map_cop1s = {
107 shift = 0, mask = 63, 112 shift = 0, mask = 63,
108 [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", 113 [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
@@ -234,6 +239,208 @@ local map_pri = {
234} 239}
235 240
236------------------------------------------------------------------------------ 241------------------------------------------------------------------------------
242-- Primary and extended opcode maps for MIPS R6
243------------------------------------------------------------------------------
244
245local map_mul_r6 = { shift = 6, mask = 3, [2] = "mulDST", [3] = "muhDST" }
246local map_mulu_r6 = { shift = 6, mask = 3, [2] = "muluDST", [3] = "muhuDST" }
247local map_div_r6 = { shift = 6, mask = 3, [2] = "divDST", [3] = "modDST" }
248local map_divu_r6 = { shift = 6, mask = 3, [2] = "divuDST", [3] = "moduDST" }
249local map_dmul_r6 = { shift = 6, mask = 3, [2] = "dmulDST", [3] = "dmuhDST" }
250local map_dmulu_r6 = { shift = 6, mask = 3, [2] = "dmuluDST", [3] = "dmuhuDST" }
251local map_ddiv_r6 = { shift = 6, mask = 3, [2] = "ddivDST", [3] = "dmodDST" }
252local map_ddivu_r6 = { shift = 6, mask = 3, [2] = "ddivuDST", [3] = "dmoduDST" }
253
254local map_special_r6 = {
255 shift = 0, mask = 63,
256 [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
257 false, map_srl, "sraDTA",
258 "sllvDTS", false, map_srlv, "sravDTS",
259 "jrS", "jalrD1S", false, false,
260 "syscallY", "breakY", false, "sync",
261 "clzDS", "cloDS", "dclzDS", "dcloDS",
262 "dsllvDST", "dlsaDSTA", "dsrlvDST", "dsravDST",
263 map_mul_r6, map_mulu_r6, map_div_r6, map_divu_r6,
264 map_dmul_r6, map_dmulu_r6, map_ddiv_r6, map_ddivu_r6,
265 "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T",
266 "andDST", "or|moveDST0", "xorDST", "nor|notDST0",
267 false, false, "sltDST", "sltuDST",
268 "daddDST", "dadduDST", "dsubDST", "dsubuDST",
269 "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
270 "teqSTZ", "seleqzDST", "tneSTZ", "selnezDST",
271 "dsllDTA", false, "dsrlDTA", "dsraDTA",
272 "dsll32DTA", false, "dsrl32DTA", "dsra32DTA",
273}
274
275local map_bshfl_r6 = {
276 shift = 9, mask = 3,
277 [1] = "alignDSTa",
278 _ = {
279 shift = 6, mask = 31,
280 [0] = "bitswapDT",
281 [2] = "wsbhDT",
282 [16] = "sebDT",
283 [24] = "sehDT",
284 }
285}
286
287local map_dbshfl_r6 = {
288 shift = 9, mask = 3,
289 [1] = "dalignDSTa",
290 _ = {
291 shift = 6, mask = 31,
292 [0] = "dbitswapDT",
293 [2] = "dsbhDT",
294 [5] = "dshdDT",
295 }
296}
297
298local map_special3_r6 = {
299 shift = 0, mask = 63,
300 [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK",
301 [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL",
302 [32] = map_bshfl_r6, [36] = map_dbshfl_r6, [59] = "rdhwrTD",
303}
304
305local map_regimm_r6 = {
306 shift = 16, mask = 31,
307 [0] = "bltzSB", [1] = "bgezSB",
308 [6] = "dahiSI", [30] = "datiSI",
309 [23] = "sigrieI", [31] = "synciSO",
310}
311
312local map_pcrel_r6 = {
313 shift = 19, mask = 3,
314 [0] = "addiupcS2", "lwpcS2", "lwupcS2", {
315 shift = 18, mask = 1,
316 [0] = "ldpcS3", { shift = 16, mask = 3, [2] = "auipcSI", [3] = "aluipcSI" }
317 }
318}
319
320local map_cop1s_r6 = {
321 shift = 0, mask = 63,
322 [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
323 "sqrt.sFG", "abs.sFG", "mov.sFG", "neg.sFG",
324 "round.l.sFG", "trunc.l.sFG", "ceil.l.sFG", "floor.l.sFG",
325 "round.w.sFG", "trunc.w.sFG", "ceil.w.sFG", "floor.w.sFG",
326 "sel.sFGH", false, false, false,
327 "seleqz.sFGH", "recip.sFG", "rsqrt.sFG", "selnez.sFGH",
328 "maddf.sFGH", "msubf.sFGH", "rint.sFG", "class.sFG",
329 "min.sFGH", "mina.sFGH", "max.sFGH", "maxa.sFGH",
330 false, "cvt.d.sFG", false, false,
331 "cvt.w.sFG", "cvt.l.sFG",
332}
333
334local map_cop1d_r6 = {
335 shift = 0, mask = 63,
336 [0] = "add.dFGH", "sub.dFGH", "mul.dFGH", "div.dFGH",
337 "sqrt.dFG", "abs.dFG", "mov.dFG", "neg.dFG",
338 "round.l.dFG", "trunc.l.dFG", "ceil.l.dFG", "floor.l.dFG",
339 "round.w.dFG", "trunc.w.dFG", "ceil.w.dFG", "floor.w.dFG",
340 "sel.dFGH", false, false, false,
341 "seleqz.dFGH", "recip.dFG", "rsqrt.dFG", "selnez.dFGH",
342 "maddf.dFGH", "msubf.dFGH", "rint.dFG", "class.dFG",
343 "min.dFGH", "mina.dFGH", "max.dFGH", "maxa.dFGH",
344 "cvt.s.dFG", false, false, false,
345 "cvt.w.dFG", "cvt.l.dFG",
346}
347
348local map_cop1w_r6 = {
349 shift = 0, mask = 63,
350 [0] = "cmp.af.sFGH", "cmp.un.sFGH", "cmp.eq.sFGH", "cmp.ueq.sFGH",
351 "cmp.lt.sFGH", "cmp.ult.sFGH", "cmp.le.sFGH", "cmp.ule.sFGH",
352 "cmp.saf.sFGH", "cmp.sun.sFGH", "cmp.seq.sFGH", "cmp.sueq.sFGH",
353 "cmp.slt.sFGH", "cmp.sult.sFGH", "cmp.sle.sFGH", "cmp.sule.sFGH",
354 false, "cmp.or.sFGH", "cmp.une.sFGH", "cmp.ne.sFGH",
355 false, false, false, false,
356 false, "cmp.sor.sFGH", "cmp.sune.sFGH", "cmp.sne.sFGH",
357 false, false, false, false,
358 "cvt.s.wFG", "cvt.d.wFG",
359}
360
361local map_cop1l_r6 = {
362 shift = 0, mask = 63,
363 [0] = "cmp.af.dFGH", "cmp.un.dFGH", "cmp.eq.dFGH", "cmp.ueq.dFGH",
364 "cmp.lt.dFGH", "cmp.ult.dFGH", "cmp.le.dFGH", "cmp.ule.dFGH",
365 "cmp.saf.dFGH", "cmp.sun.dFGH", "cmp.seq.dFGH", "cmp.sueq.dFGH",
366 "cmp.slt.dFGH", "cmp.sult.dFGH", "cmp.sle.dFGH", "cmp.sule.dFGH",
367 false, "cmp.or.dFGH", "cmp.une.dFGH", "cmp.ne.dFGH",
368 false, false, false, false,
369 false, "cmp.sor.dFGH", "cmp.sune.dFGH", "cmp.sne.dFGH",
370 false, false, false, false,
371 "cvt.s.lFG", "cvt.d.lFG",
372}
373
374local map_cop1_r6 = {
375 shift = 21, mask = 31,
376 [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG",
377 "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG",
378 false, "bc1eqzHB", false, false,
379 false, "bc1nezHB", false, false,
380 map_cop1s_r6, map_cop1d_r6, false, false,
381 map_cop1w_r6, map_cop1l_r6,
382}
383
384local function maprs_popTS(rs, rt)
385 if rt == 0 then return 0 elseif rs == 0 then return 1
386 elseif rs == rt then return 2 else return 3 end
387end
388
389local map_pop06_r6 = {
390 maprs = maprs_popTS, [0] = "blezSB", "blezalcTB", "bgezalcTB", "bgeucSTB"
391}
392local map_pop07_r6 = {
393 maprs = maprs_popTS, [0] = "bgtzSB", "bgtzalcTB", "bltzalcTB", "bltucSTB"
394}
395local map_pop26_r6 = {
396 maprs = maprs_popTS, "blezcTB", "bgezcTB", "bgecSTB"
397}
398local map_pop27_r6 = {
399 maprs = maprs_popTS, "bgtzcTB", "bltzcTB", "bltcSTB"
400}
401
402local function maprs_popS(rs, rt)
403 if rs == 0 then return 0 else return 1 end
404end
405
406local map_pop66_r6 = {
407 maprs = maprs_popS, [0] = "jicTI", "beqzcSb"
408}
409local map_pop76_r6 = {
410 maprs = maprs_popS, [0] = "jialcTI", "bnezcSb"
411}
412
413local function maprs_popST(rs, rt)
414 if rs >= rt then return 0 elseif rs == 0 then return 1 else return 2 end
415end
416
417local map_pop10_r6 = {
418 maprs = maprs_popST, [0] = "bovcSTB", "beqzalcTB", "beqcSTB"
419}
420local map_pop30_r6 = {
421 maprs = maprs_popST, [0] = "bnvcSTB", "bnezalcTB", "bnecSTB"
422}
423
424local map_pri_r6 = {
425 [0] = map_special_r6, map_regimm_r6, "jJ", "jalJ",
426 "beq|beqz|bST00B", "bne|bnezST0B", map_pop06_r6, map_pop07_r6,
427 map_pop10_r6, "addiu|liTS0I", "sltiTSI", "sltiuTSI",
428 "andiTSU", "ori|liTS0U", "xoriTSU", "aui|luiTS0U",
429 map_cop0, map_cop1_r6, false, false,
430 false, false, map_pop26_r6, map_pop27_r6,
431 map_pop30_r6, "daddiuTSI", false, false,
432 false, "dauiTSI", false, map_special3_r6,
433 "lbTSO", "lhTSO", false, "lwTSO",
434 "lbuTSO", "lhuTSO", false, false,
435 "sbTSO", "shTSO", false, "swTSO",
436 false, false, false, false,
437 false, "lwc1HSO", "bc#", false,
438 false, "ldc1HSO", map_pop66_r6, "ldTSO",
439 false, "swc1HSO", "balc#", map_pcrel_r6,
440 false, "sdc1HSO", map_pop76_r6, "sdTSO",
441}
442
443------------------------------------------------------------------------------
237 444
238local map_gpr = { 445local map_gpr = {
239 [0] = "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", 446 [0] = "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
@@ -287,10 +494,14 @@ local function disass_ins(ctx)
287 ctx.op = op 494 ctx.op = op
288 ctx.rel = nil 495 ctx.rel = nil
289 496
290 local opat = map_pri[rshift(op, 26)] 497 local opat = ctx.map_pri[rshift(op, 26)]
291 while type(opat) ~= "string" do 498 while type(opat) ~= "string" do
292 if not opat then return unknown(ctx) end 499 if not opat then return unknown(ctx) end
293 opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ 500 if opat.maprs then
501 opat = opat[opat.maprs(band(rshift(op,21),31), band(rshift(op,16),31))]
502 else
503 opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
504 end
294 end 505 end
295 local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") 506 local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
296 local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") 507 local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
@@ -314,6 +525,8 @@ local function disass_ins(ctx)
314 x = "f"..band(rshift(op, 21), 31) 525 x = "f"..band(rshift(op, 21), 31)
315 elseif p == "A" then 526 elseif p == "A" then
316 x = band(rshift(op, 6), 31) 527 x = band(rshift(op, 6), 31)
528 elseif p == "a" then
529 x = band(rshift(op, 6), 7)
317 elseif p == "E" then 530 elseif p == "E" then
318 x = band(rshift(op, 6), 31) + 32 531 x = band(rshift(op, 6), 31) + 32
319 elseif p == "M" then 532 elseif p == "M" then
@@ -333,6 +546,10 @@ local function disass_ins(ctx)
333 x = band(rshift(op, 11), 31) - last + 33 546 x = band(rshift(op, 11), 31) - last + 33
334 elseif p == "I" then 547 elseif p == "I" then
335 x = arshift(lshift(op, 16), 16) 548 x = arshift(lshift(op, 16), 16)
549 elseif p == "2" then
550 x = arshift(lshift(op, 13), 11)
551 elseif p == "3" then
552 x = arshift(lshift(op, 14), 11)
336 elseif p == "U" then 553 elseif p == "U" then
337 x = band(op, 0xffff) 554 x = band(op, 0xffff)
338 elseif p == "O" then 555 elseif p == "O" then
@@ -342,7 +559,15 @@ local function disass_ins(ctx)
342 local index = map_gpr[band(rshift(op, 16), 31)] 559 local index = map_gpr[band(rshift(op, 16), 31)]
343 operands[#operands] = format("%s(%s)", index, last) 560 operands[#operands] = format("%s(%s)", index, last)
344 elseif p == "B" then 561 elseif p == "B" then
345 x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4 562 x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 14) + 4
563 ctx.rel = x
564 x = format("0x%08x", x)
565 elseif p == "b" then
566 x = ctx.addr + ctx.pos + arshift(lshift(op, 11), 9) + 4
567 ctx.rel = x
568 x = format("0x%08x", x)
569 elseif p == "#" then
570 x = ctx.addr + ctx.pos + arshift(lshift(op, 6), 4) + 4
346 ctx.rel = x 571 ctx.rel = x
347 x = format("0x%08x", x) 572 x = format("0x%08x", x)
348 elseif p == "J" then 573 elseif p == "J" then
@@ -408,6 +633,7 @@ local function create(code, addr, out)
408 ctx.disass = disass_block 633 ctx.disass = disass_block
409 ctx.hexdump = 8 634 ctx.hexdump = 8
410 ctx.get = get_be 635 ctx.get = get_be
636 ctx.map_pri = map_pri
411 return ctx 637 return ctx
412end 638end
413 639
@@ -417,6 +643,19 @@ local function create_el(code, addr, out)
417 return ctx 643 return ctx
418end 644end
419 645
646local function create_r6(code, addr, out)
647 local ctx = create(code, addr, out)
648 ctx.map_pri = map_pri_r6
649 return ctx
650end
651
652local function create_r6_el(code, addr, out)
653 local ctx = create(code, addr, out)
654 ctx.get = get_le
655 ctx.map_pri = map_pri_r6
656 return ctx
657end
658
420-- Simple API: disassemble code (a string) at address and output via out. 659-- Simple API: disassemble code (a string) at address and output via out.
421local function disass(code, addr, out) 660local function disass(code, addr, out)
422 create(code, addr, out):disass() 661 create(code, addr, out):disass()
@@ -426,6 +665,14 @@ local function disass_el(code, addr, out)
426 create_el(code, addr, out):disass() 665 create_el(code, addr, out):disass()
427end 666end
428 667
668local function disass_r6(code, addr, out)
669 create_r6(code, addr, out):disass()
670end
671
672local function disass_r6_el(code, addr, out)
673 create_r6_el(code, addr, out):disass()
674end
675
429-- Return register name for RID. 676-- Return register name for RID.
430local function regname(r) 677local function regname(r)
431 if r < 32 then return map_gpr[r] end 678 if r < 32 then return map_gpr[r] end
@@ -436,8 +683,12 @@ end
436return { 683return {
437 create = create, 684 create = create,
438 create_el = create_el, 685 create_el = create_el,
686 create_r6 = create_r6,
687 create_r6_el = create_r6_el,
439 disass = disass, 688 disass = disass,
440 disass_el = disass_el, 689 disass_el = disass_el,
690 disass_r6 = disass_r6,
691 disass_r6_el = disass_r6_el,
441 regname = regname 692 regname = regname
442} 693}
443 694
diff --git a/src/jit/dis_mips64r6.lua b/src/jit/dis_mips64r6.lua
new file mode 100644
index 00000000..023c05ab
--- /dev/null
+++ b/src/jit/dis_mips64r6.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64R6 disassembler wrapper module.
3--
4-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the r6 big-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create_r6,
14 disass = dis_mips.disass_r6,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mips64r6el.lua b/src/jit/dis_mips64r6el.lua
new file mode 100644
index 00000000..f2988339
--- /dev/null
+++ b/src/jit/dis_mips64r6el.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64R6EL disassembler wrapper module.
3--
4-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the r6 little-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create_r6_el,
14 disass = dis_mips.disass_r6_el,
15 regname = dis_mips.regname
16}
17
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 903d6c64..cd1a0568 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -330,18 +330,38 @@
330#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64 330#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64
331 331
332#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) 332#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
333#if __mips_isa_rev >= 6
334#define LJ_TARGET_MIPSR6 1
335#define LJ_TARGET_UNALIGNED 1
336#endif
333#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 337#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
338#if LJ_TARGET_MIPSR6
339#define LJ_ARCH_NAME "mips32r6el"
340#else
334#define LJ_ARCH_NAME "mipsel" 341#define LJ_ARCH_NAME "mipsel"
342#endif
343#else
344#if LJ_TARGET_MIPSR6
345#define LJ_ARCH_NAME "mips64r6el"
335#else 346#else
336#define LJ_ARCH_NAME "mips64el" 347#define LJ_ARCH_NAME "mips64el"
337#endif 348#endif
349#endif
338#define LJ_ARCH_ENDIAN LUAJIT_LE 350#define LJ_ARCH_ENDIAN LUAJIT_LE
339#else 351#else
340#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 352#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
353#if LJ_TARGET_MIPSR6
354#define LJ_ARCH_NAME "mips32r6"
355#else
341#define LJ_ARCH_NAME "mips" 356#define LJ_ARCH_NAME "mips"
357#endif
358#else
359#if LJ_TARGET_MIPSR6
360#define LJ_ARCH_NAME "mips64r6"
342#else 361#else
343#define LJ_ARCH_NAME "mips64" 362#define LJ_ARCH_NAME "mips64"
344#endif 363#endif
364#endif
345#define LJ_ARCH_ENDIAN LUAJIT_BE 365#define LJ_ARCH_ENDIAN LUAJIT_BE
346#endif 366#endif
347 367
@@ -377,7 +397,9 @@
377#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ 397#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
378#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL 398#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
379 399
380#if _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2 400#if LJ_TARGET_MIPSR6
401#define LJ_ARCH_VERSION 60
402#elif _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2
381#define LJ_ARCH_VERSION 20 403#define LJ_ARCH_VERSION 20
382#else 404#else
383#define LJ_ARCH_VERSION 10 405#define LJ_ARCH_VERSION 10
@@ -453,8 +475,13 @@
453#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32)) 475#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
454#error "Only o32 ABI supported for MIPS32" 476#error "Only o32 ABI supported for MIPS32"
455#endif 477#endif
478#if LJ_TARGET_MIPSR6
479/* Not that useful, since most available r6 CPUs are 64 bit. */
480#error "No support for MIPS32R6"
481#endif
456#elif LJ_TARGET_MIPS64 482#elif LJ_TARGET_MIPS64
457#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64)) 483#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
484/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */
458#error "Only n64 ABI supported for MIPS64" 485#error "Only n64 ABI supported for MIPS64"
459#endif 486#endif
460#endif 487#endif
diff --git a/src/lj_asm.c b/src/lj_asm.c
index c4c5dfdd..4f171edd 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -2112,8 +2112,8 @@ static void asm_setup_regsp(ASMState *as)
2112 ir->prev = REGSP_HINT(RID_FPRET); 2112 ir->prev = REGSP_HINT(RID_FPRET);
2113 continue; 2113 continue;
2114 } 2114 }
2115 /* fallthrough */
2116#endif 2115#endif
2116 /* fallthrough */
2117 case IR_CALLN: case IR_CALLXS: 2117 case IR_CALLN: case IR_CALLXS:
2118#if LJ_SOFTFP 2118#if LJ_SOFTFP
2119 case IR_MIN: case IR_MAX: 2119 case IR_MIN: case IR_MAX:
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index 3a4679b8..3dbe836d 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -101,7 +101,12 @@ static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt)
101 as->invmcp = NULL; 101 as->invmcp = NULL;
102 as->loopinv = 1; 102 as->loopinv = 1;
103 as->mcp = p+1; 103 as->mcp = p+1;
104#if !LJ_TARGET_MIPSR6
104 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ 105 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */
106#else
107 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u :
108 (mi>>28) == 4 ? 0x00800000u : 0x00010000u); /* Invert cond. */
109#endif
105 target = p; /* Patch target later in asm_loop_fixup. */ 110 target = p; /* Patch target later in asm_loop_fixup. */
106 } 111 }
107 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); 112 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
@@ -410,7 +415,11 @@ static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
410{ 415{
411 /* The modified regs must match with the *.dasc implementation. */ 416 /* The modified regs must match with the *.dasc implementation. */
412 RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| 417 RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)|
413 RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR); 418 RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)
419#if LJ_TARGET_MIPSR6
420 |RID2RSET(RID_F21)
421#endif
422 ;
414 if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); 423 if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
415 ra_evictset(as, drop); 424 ra_evictset(as, drop);
416 ra_destreg(as, ir, RID_FPRET); 425 ra_destreg(as, ir, RID_FPRET);
@@ -444,8 +453,13 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
444{ 453{
445 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 454 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
446 Reg dest = ra_dest(as, ir, RSET_GPR); 455 Reg dest = ra_dest(as, ir, RSET_GPR);
456#if !LJ_TARGET_MIPSR6
447 asm_guard(as, MIPSI_BC1F, 0, 0); 457 asm_guard(as, MIPSI_BC1F, 0, 0);
448 emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); 458 emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left);
459#else
460 asm_guard(as, MIPSI_BC1EQZ, 0, (tmp&31));
461 emit_fgh(as, MIPSI_CMP_EQ_D, tmp, tmp, left);
462#endif
449 emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); 463 emit_fg(as, MIPSI_CVT_D_W, tmp, tmp);
450 emit_tg(as, MIPSI_MFC1, dest, tmp); 464 emit_tg(as, MIPSI_MFC1, dest, tmp);
451 emit_fg(as, MIPSI_CVT_W_D, tmp, left); 465 emit_fg(as, MIPSI_CVT_W_D, tmp, left);
@@ -599,8 +613,13 @@ static void asm_conv(ASMState *as, IRIns *ir)
599 (void *)&as->J->k64[LJ_K64_M2P64], 613 (void *)&as->J->k64[LJ_K64_M2P64],
600 rset_exclude(RSET_GPR, dest)); 614 rset_exclude(RSET_GPR, dest));
601 emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ 615 emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */
602 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); 616#if !LJ_TARGET_MIPSR6
603 emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); 617 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
618 emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
619#else
620 emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
621 emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp);
622#endif
604 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), 623 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
605 (void *)&as->J->k64[LJ_K64_2P63], 624 (void *)&as->J->k64[LJ_K64_2P63],
606 rset_exclude(RSET_GPR, dest)); 625 rset_exclude(RSET_GPR, dest));
@@ -611,8 +630,13 @@ static void asm_conv(ASMState *as, IRIns *ir)
611 (void *)&as->J->k32[LJ_K32_M2P64], 630 (void *)&as->J->k32[LJ_K32_M2P64],
612 rset_exclude(RSET_GPR, dest)); 631 rset_exclude(RSET_GPR, dest));
613 emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ 632 emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */
614 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); 633#if !LJ_TARGET_MIPSR6
615 emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); 634 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
635 emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
636#else
637 emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
638 emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp);
639#endif
616 emit_lsptr(as, MIPSI_LWC1, (tmp & 31), 640 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
617 (void *)&as->J->k32[LJ_K32_2P63], 641 (void *)&as->J->k32[LJ_K32_2P63],
618 rset_exclude(RSET_GPR, dest)); 642 rset_exclude(RSET_GPR, dest));
@@ -840,8 +864,12 @@ static void asm_aref(ASMState *as, IRIns *ir)
840 } 864 }
841 base = ra_alloc1(as, ir->op1, RSET_GPR); 865 base = ra_alloc1(as, ir->op1, RSET_GPR);
842 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); 866 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
867#if !LJ_TARGET_MIPSR6
843 emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base); 868 emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base);
844 emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); 869 emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3);
870#else
871 emit_dst(as, MIPSI_ALSA | MIPSF_A(3-1), dest, idx, base);
872#endif
845} 873}
846 874
847/* Inlined hash lookup. Specialized for key type and for const keys. 875/* Inlined hash lookup. Specialized for key type and for const keys.
@@ -944,8 +972,13 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
944 l_end = asm_exitstub_addr(as); 972 l_end = asm_exitstub_addr(as);
945 } 973 }
946 if (!LJ_SOFTFP && irt_isnum(kt)) { 974 if (!LJ_SOFTFP && irt_isnum(kt)) {
975#if !LJ_TARGET_MIPSR6
947 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); 976 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
948 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); 977 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
978#else
979 emit_branch(as, MIPSI_BC1NEZ, 0, (tmpnum&31), l_end);
980 emit_fgh(as, MIPSI_CMP_EQ_D, tmpnum, tmpnum, key);
981#endif
949 *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */ 982 *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */
950 emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); 983 emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next);
951 emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); 984 emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM);
@@ -1196,7 +1229,9 @@ static MIPSIns asm_fxloadins(IRIns *ir)
1196 case IRT_I16: return MIPSI_LH; 1229 case IRT_I16: return MIPSI_LH;
1197 case IRT_U16: return MIPSI_LHU; 1230 case IRT_U16: return MIPSI_LHU;
1198 case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_LDC1; 1231 case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_LDC1;
1232 /* fallthrough */
1199 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; 1233 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1;
1234 /* fallthrough */
1200 default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW; 1235 default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW;
1201 } 1236 }
1202} 1237}
@@ -1207,7 +1242,9 @@ static MIPSIns asm_fxstoreins(IRIns *ir)
1207 case IRT_I8: case IRT_U8: return MIPSI_SB; 1242 case IRT_I8: case IRT_U8: return MIPSI_SB;
1208 case IRT_I16: case IRT_U16: return MIPSI_SH; 1243 case IRT_I16: case IRT_U16: return MIPSI_SH;
1209 case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_SDC1; 1244 case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_SDC1;
1245 /* fallthrough */
1210 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; 1246 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1;
1247 /* fallthrough */
1211 default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW; 1248 default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW;
1212 } 1249 }
1213} 1250}
@@ -1253,7 +1290,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
1253{ 1290{
1254 Reg dest = ra_dest(as, ir, 1291 Reg dest = ra_dest(as, ir,
1255 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); 1292 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
1256 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); 1293 lua_assert(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED));
1257 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1294 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
1258} 1295}
1259 1296
@@ -1544,7 +1581,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1544 ofs -= 4; if (LJ_BE) ir++; else ir--; 1581 ofs -= 4; if (LJ_BE) ir++; else ir--;
1545 } 1582 }
1546#else 1583#else
1547 emit_tsi(as, MIPSI_SD, ra_alloc1(as, ir->op2, allow), 1584 emit_tsi(as, sz == 8 ? MIPSI_SD : MIPSI_SW, ra_alloc1(as, ir->op2, allow),
1548 RID_RET, sizeof(GCcdata)); 1585 RID_RET, sizeof(GCcdata));
1549#endif 1586#endif
1550 lua_assert(sz == 4 || sz == 8); 1587 lua_assert(sz == 4 || sz == 8);
@@ -1672,6 +1709,7 @@ static void asm_add(ASMState *as, IRIns *ir)
1672 } else 1709 } else
1673#endif 1710#endif
1674 { 1711 {
1712 /* TODO MIPSR6: Fuse ADD(BSHL(a,1-4),b) or ADD(ADD(a,a),b) to MIPSI_ALSA. */
1675 Reg dest = ra_dest(as, ir, RSET_GPR); 1713 Reg dest = ra_dest(as, ir, RSET_GPR);
1676 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1714 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1677 if (irref_isk(ir->op2)) { 1715 if (irref_isk(ir->op2)) {
@@ -1716,8 +1754,12 @@ static void asm_mul(ASMState *as, IRIns *ir)
1716 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1754 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1717 right = (left >> 8); left &= 255; 1755 right = (left >> 8); left &= 255;
1718 if (LJ_64 && irt_is64(ir->t)) { 1756 if (LJ_64 && irt_is64(ir->t)) {
1757#if !LJ_TARGET_MIPSR6
1719 emit_dst(as, MIPSI_MFLO, dest, 0, 0); 1758 emit_dst(as, MIPSI_MFLO, dest, 0, 0);
1720 emit_dst(as, MIPSI_DMULT, 0, left, right); 1759 emit_dst(as, MIPSI_DMULT, 0, left, right);
1760#else
1761 emit_dst(as, MIPSI_DMUL, dest, left, right);
1762#endif
1721 } else { 1763 } else {
1722 emit_dst(as, MIPSI_MUL, dest, left, right); 1764 emit_dst(as, MIPSI_MUL, dest, left, right);
1723 } 1765 }
@@ -1801,6 +1843,7 @@ static void asm_abs(ASMState *as, IRIns *ir)
1801 1843
1802static void asm_arithov(ASMState *as, IRIns *ir) 1844static void asm_arithov(ASMState *as, IRIns *ir)
1803{ 1845{
1846 /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */
1804 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); 1847 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
1805 lua_assert(!irt_is64(ir->t)); 1848 lua_assert(!irt_is64(ir->t));
1806 if (irref_isk(ir->op2)) { 1849 if (irref_isk(ir->op2)) {
@@ -1845,9 +1888,14 @@ static void asm_mulov(ASMState *as, IRIns *ir)
1845 right), dest)); 1888 right), dest));
1846 asm_guard(as, MIPSI_BNE, RID_TMP, tmp); 1889 asm_guard(as, MIPSI_BNE, RID_TMP, tmp);
1847 emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31); 1890 emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31);
1891#if !LJ_TARGET_MIPSR6
1848 emit_dst(as, MIPSI_MFHI, tmp, 0, 0); 1892 emit_dst(as, MIPSI_MFHI, tmp, 0, 0);
1849 emit_dst(as, MIPSI_MFLO, dest, 0, 0); 1893 emit_dst(as, MIPSI_MFLO, dest, 0, 0);
1850 emit_dst(as, MIPSI_MULT, 0, left, right); 1894 emit_dst(as, MIPSI_MULT, 0, left, right);
1895#else
1896 emit_dst(as, MIPSI_MUL, dest, left, right);
1897 emit_dst(as, MIPSI_MUH, tmp, left, right);
1898#endif
1851} 1899}
1852 1900
1853#if LJ_32 && LJ_HASFFI 1901#if LJ_32 && LJ_HASFFI
@@ -2071,6 +2119,7 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
2071 Reg dest = ra_dest(as, ir, RSET_FPR); 2119 Reg dest = ra_dest(as, ir, RSET_FPR);
2072 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 2120 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
2073 right = (left >> 8); left &= 255; 2121 right = (left >> 8); left &= 255;
2122#if !LJ_TARGET_MIPSR6
2074 if (dest == left) { 2123 if (dest == left) {
2075 emit_fg(as, MIPSI_MOVT_D, dest, right); 2124 emit_fg(as, MIPSI_MOVT_D, dest, right);
2076 } else { 2125 } else {
@@ -2078,19 +2127,37 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
2078 if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); 2127 if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right);
2079 } 2128 }
2080 emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); 2129 emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left);
2130#else
2131 emit_fgh(as, ismax ? MIPSI_MAX_D : MIPSI_MIN_D, dest, left, right);
2132#endif
2081#endif 2133#endif
2082 } else { 2134 } else {
2083 Reg dest = ra_dest(as, ir, RSET_GPR); 2135 Reg dest = ra_dest(as, ir, RSET_GPR);
2084 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 2136 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
2085 right = (left >> 8); left &= 255; 2137 right = (left >> 8); left &= 255;
2086 if (dest == left) { 2138 if (left == right) {
2087 emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); 2139 if (dest != left) emit_move(as, dest, left);
2088 } else { 2140 } else {
2089 emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); 2141#if !LJ_TARGET_MIPSR6
2090 if (dest != right) emit_move(as, dest, right); 2142 if (dest == left) {
2143 emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP);
2144 } else {
2145 emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP);
2146 if (dest != right) emit_move(as, dest, right);
2147 }
2148#else
2149 emit_dst(as, MIPSI_OR, dest, dest, RID_TMP);
2150 if (dest != right) {
2151 emit_dst(as, MIPSI_SELNEZ, RID_TMP, right, RID_TMP);
2152 emit_dst(as, MIPSI_SELEQZ, dest, left, RID_TMP);
2153 } else {
2154 emit_dst(as, MIPSI_SELEQZ, RID_TMP, left, RID_TMP);
2155 emit_dst(as, MIPSI_SELNEZ, dest, right, RID_TMP);
2156 }
2157#endif
2158 emit_dst(as, MIPSI_SLT, RID_TMP,
2159 ismax ? left : right, ismax ? right : left);
2091 } 2160 }
2092 emit_dst(as, MIPSI_SLT, RID_TMP,
2093 ismax ? left : right, ismax ? right : left);
2094 } 2161 }
2095} 2162}
2096 2163
@@ -2174,10 +2241,18 @@ static void asm_comp(ASMState *as, IRIns *ir)
2174#if LJ_SOFTFP 2241#if LJ_SOFTFP
2175 asm_sfpcomp(as, ir); 2242 asm_sfpcomp(as, ir);
2176#else 2243#else
2244#if !LJ_TARGET_MIPSR6
2177 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 2245 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
2178 right = (left >> 8); left &= 255; 2246 right = (left >> 8); left &= 255;
2179 asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); 2247 asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
2180 emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); 2248 emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right);
2249#else
2250 Reg tmp, right, left = ra_alloc2(as, ir, RSET_FPR);
2251 right = (left >> 8); left &= 255;
2252 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right));
2253 asm_guard(as, (op&1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31));
2254 emit_fgh(as, MIPSI_CMP_LT_D + ((op&3) ^ ((op>>2)&1)), tmp, left, right);
2255#endif
2181#endif 2256#endif
2182 } else { 2257 } else {
2183 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); 2258 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -2213,9 +2288,13 @@ static void asm_equal(ASMState *as, IRIns *ir)
2213 if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { 2288 if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
2214#if LJ_SOFTFP 2289#if LJ_SOFTFP
2215 asm_sfpcomp(as, ir); 2290 asm_sfpcomp(as, ir);
2216#else 2291#elif !LJ_TARGET_MIPSR6
2217 asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); 2292 asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
2218 emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); 2293 emit_fgh(as, MIPSI_C_EQ_D, 0, left, right);
2294#else
2295 Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right));
2296 asm_guard(as, (ir->o & 1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31));
2297 emit_fgh(as, MIPSI_CMP_EQ_D, tmp, left, right);
2219#endif 2298#endif
2220 } else { 2299 } else {
2221 asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); 2300 asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right);
@@ -2618,7 +2697,12 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2618 if (((p[-1] ^ (px-p)) & 0xffffu) == 0 && 2697 if (((p[-1] ^ (px-p)) & 0xffffu) == 0 &&
2619 ((p[-1] & 0xf0000000u) == MIPSI_BEQ || 2698 ((p[-1] & 0xf0000000u) == MIPSI_BEQ ||
2620 (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ || 2699 (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ ||
2621 (p[-1] & 0xffe00000u) == MIPSI_BC1F)) { 2700#if !LJ_TARGET_MIPSR6
2701 (p[-1] & 0xffe00000u) == MIPSI_BC1F
2702#else
2703 (p[-1] & 0xff600000u) == MIPSI_BC1EQZ
2704#endif
2705 )) {
2622 ptrdiff_t delta = target - p; 2706 ptrdiff_t delta = target - p;
2623 if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ 2707 if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */
2624 patchbranch: 2708 patchbranch:
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
index bb6593ae..313d030a 100644
--- a/src/lj_emit_mips.h
+++ b/src/lj_emit_mips.h
@@ -138,6 +138,7 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
138 } else if (emit_kdelta1(as, r, (intptr_t)u64)) { 138 } else if (emit_kdelta1(as, r, (intptr_t)u64)) {
139 return; 139 return;
140 } else { 140 } else {
141 /* TODO MIPSR6: Use DAHI & DATI. Caveat: sign-extension. */
141 if ((u64 & 0xffff)) { 142 if ((u64 & 0xffff)) {
142 emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff); 143 emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff);
143 } 144 }
@@ -236,10 +237,22 @@ static void emit_jmp(ASMState *as, MCode *target)
236static void emit_call(ASMState *as, void *target, int needcfa) 237static void emit_call(ASMState *as, void *target, int needcfa)
237{ 238{
238 MCode *p = as->mcp; 239 MCode *p = as->mcp;
239 *--p = MIPSI_NOP; 240#if LJ_TARGET_MIPSR6
241 ptrdiff_t delta = (char *)target - (char *)p;
242 if ((((delta>>2) + 0x02000000) >> 26) == 0) { /* Try compact call first. */
243 *--p = MIPSI_BALC | (((uintptr_t)delta >>2) & 0x03ffffffu);
244 as->mcp = p;
245 return;
246 }
247#endif
248 *--p = MIPSI_NOP; /* Delay slot. */
240 if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) { 249 if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) {
250#if !LJ_TARGET_MIPSR6
241 *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) | 251 *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) |
242 (((uintptr_t)target >>2) & 0x03ffffffu); 252 (((uintptr_t)target >>2) & 0x03ffffffu);
253#else
254 *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu);
255#endif
243 } else { /* Target out of range: need indirect call. */ 256 } else { /* Target out of range: need indirect call. */
244 *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); 257 *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR);
245 needcfa = 1; 258 needcfa = 1;
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 5d41ef4b..0d9a9afe 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -51,10 +51,18 @@
51/* Names for the CPU-specific flags. Must match the order above. */ 51/* Names for the CPU-specific flags. Must match the order above. */
52#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 52#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2
53#if LJ_TARGET_MIPS32 53#if LJ_TARGET_MIPS32
54#if LJ_TARGET_MIPSR6
55#define JIT_F_CPUSTRING "\010MIPS32R6"
56#else
54#define JIT_F_CPUSTRING "\010MIPS32R2" 57#define JIT_F_CPUSTRING "\010MIPS32R2"
58#endif
59#else
60#if LJ_TARGET_MIPSR6
61#define JIT_F_CPUSTRING "\010MIPS64R6"
55#else 62#else
56#define JIT_F_CPUSTRING "\010MIPS64R2" 63#define JIT_F_CPUSTRING "\010MIPS64R2"
57#endif 64#endif
65#endif
58#else 66#else
59#define JIT_F_CPU_FIRST 0 67#define JIT_F_CPU_FIRST 0
60#define JIT_F_CPUSTRING "" 68#define JIT_F_CPUSTRING ""
diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h
index 740687b3..84db6012 100644
--- a/src/lj_target_mips.h
+++ b/src/lj_target_mips.h
@@ -223,6 +223,8 @@ typedef enum MIPSIns {
223 MIPSI_ADDIU = 0x24000000, 223 MIPSI_ADDIU = 0x24000000,
224 MIPSI_SUB = 0x00000022, 224 MIPSI_SUB = 0x00000022,
225 MIPSI_SUBU = 0x00000023, 225 MIPSI_SUBU = 0x00000023,
226
227#if !LJ_TARGET_MIPSR6
226 MIPSI_MUL = 0x70000002, 228 MIPSI_MUL = 0x70000002,
227 MIPSI_DIV = 0x0000001a, 229 MIPSI_DIV = 0x0000001a,
228 MIPSI_DIVU = 0x0000001b, 230 MIPSI_DIVU = 0x0000001b,
@@ -232,6 +234,15 @@ typedef enum MIPSIns {
232 MIPSI_MFHI = 0x00000010, 234 MIPSI_MFHI = 0x00000010,
233 MIPSI_MFLO = 0x00000012, 235 MIPSI_MFLO = 0x00000012,
234 MIPSI_MULT = 0x00000018, 236 MIPSI_MULT = 0x00000018,
237#else
238 MIPSI_MUL = 0x00000098,
239 MIPSI_MUH = 0x000000d8,
240 MIPSI_DIV = 0x0000009a,
241 MIPSI_DIVU = 0x0000009b,
242
243 MIPSI_SELEQZ = 0x00000035,
244 MIPSI_SELNEZ = 0x00000037,
245#endif
235 246
236 MIPSI_SLL = 0x00000000, 247 MIPSI_SLL = 0x00000000,
237 MIPSI_SRL = 0x00000002, 248 MIPSI_SRL = 0x00000002,
@@ -253,8 +264,13 @@ typedef enum MIPSIns {
253 MIPSI_B = 0x10000000, 264 MIPSI_B = 0x10000000,
254 MIPSI_J = 0x08000000, 265 MIPSI_J = 0x08000000,
255 MIPSI_JAL = 0x0c000000, 266 MIPSI_JAL = 0x0c000000,
267#if !LJ_TARGET_MIPSR6
256 MIPSI_JALX = 0x74000000, 268 MIPSI_JALX = 0x74000000,
257 MIPSI_JR = 0x00000008, 269 MIPSI_JR = 0x00000008,
270#else
271 MIPSI_JR = 0x00000009,
272 MIPSI_BALC = 0xe8000000,
273#endif
258 MIPSI_JALR = 0x0000f809, 274 MIPSI_JALR = 0x0000f809,
259 275
260 MIPSI_BEQ = 0x10000000, 276 MIPSI_BEQ = 0x10000000,
@@ -282,15 +298,23 @@ typedef enum MIPSIns {
282 298
283 /* MIPS64 instructions. */ 299 /* MIPS64 instructions. */
284 MIPSI_DADD = 0x0000002c, 300 MIPSI_DADD = 0x0000002c,
285 MIPSI_DADDI = 0x60000000,
286 MIPSI_DADDU = 0x0000002d, 301 MIPSI_DADDU = 0x0000002d,
287 MIPSI_DADDIU = 0x64000000, 302 MIPSI_DADDIU = 0x64000000,
288 MIPSI_DSUB = 0x0000002e, 303 MIPSI_DSUB = 0x0000002e,
289 MIPSI_DSUBU = 0x0000002f, 304 MIPSI_DSUBU = 0x0000002f,
305#if !LJ_TARGET_MIPSR6
290 MIPSI_DDIV = 0x0000001e, 306 MIPSI_DDIV = 0x0000001e,
291 MIPSI_DDIVU = 0x0000001f, 307 MIPSI_DDIVU = 0x0000001f,
292 MIPSI_DMULT = 0x0000001c, 308 MIPSI_DMULT = 0x0000001c,
293 MIPSI_DMULTU = 0x0000001d, 309 MIPSI_DMULTU = 0x0000001d,
310#else
311 MIPSI_DDIV = 0x0000009e,
312 MIPSI_DMOD = 0x000000de,
313 MIPSI_DDIVU = 0x0000009f,
314 MIPSI_DMODU = 0x000000df,
315 MIPSI_DMUL = 0x0000009c,
316 MIPSI_DMUH = 0x000000dc,
317#endif
294 318
295 MIPSI_DSLL = 0x00000038, 319 MIPSI_DSLL = 0x00000038,
296 MIPSI_DSRL = 0x0000003a, 320 MIPSI_DSRL = 0x0000003a,
@@ -308,6 +332,11 @@ typedef enum MIPSIns {
308 MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU, 332 MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU,
309 MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD, 333 MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD,
310 MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD, 334 MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD,
335#if LJ_TARGET_MIPSR6
336 MIPSI_LSA = 0x00000005,
337 MIPSI_DLSA = 0x00000015,
338 MIPSI_ALSA = LJ_32 ? MIPSI_LSA : MIPSI_DLSA,
339#endif
311 340
312 /* Extract/insert instructions. */ 341 /* Extract/insert instructions. */
313 MIPSI_DEXTM = 0x7c000001, 342 MIPSI_DEXTM = 0x7c000001,
@@ -317,18 +346,19 @@ typedef enum MIPSIns {
317 MIPSI_DINSU = 0x7c000006, 346 MIPSI_DINSU = 0x7c000006,
318 MIPSI_DINS = 0x7c000007, 347 MIPSI_DINS = 0x7c000007,
319 348
320 MIPSI_RINT_D = 0x4620001a,
321 MIPSI_RINT_S = 0x4600001a,
322 MIPSI_RINT = 0x4400001a,
323 MIPSI_FLOOR_D = 0x4620000b, 349 MIPSI_FLOOR_D = 0x4620000b,
324 MIPSI_CEIL_D = 0x4620000a,
325 MIPSI_ROUND_D = 0x46200008,
326 350
327 /* FP instructions. */ 351 /* FP instructions. */
328 MIPSI_MOV_S = 0x46000006, 352 MIPSI_MOV_S = 0x46000006,
329 MIPSI_MOV_D = 0x46200006, 353 MIPSI_MOV_D = 0x46200006,
354#if !LJ_TARGET_MIPSR6
330 MIPSI_MOVT_D = 0x46210011, 355 MIPSI_MOVT_D = 0x46210011,
331 MIPSI_MOVF_D = 0x46200011, 356 MIPSI_MOVF_D = 0x46200011,
357#else
358 MIPSI_MIN_D = 0x4620001C,
359 MIPSI_MAX_D = 0x4620001E,
360 MIPSI_SEL_D = 0x46200010,
361#endif
332 362
333 MIPSI_ABS_D = 0x46200005, 363 MIPSI_ABS_D = 0x46200005,
334 MIPSI_NEG_D = 0x46200007, 364 MIPSI_NEG_D = 0x46200007,
@@ -363,15 +393,23 @@ typedef enum MIPSIns {
363 MIPSI_DMTC1 = 0x44a00000, 393 MIPSI_DMTC1 = 0x44a00000,
364 MIPSI_DMFC1 = 0x44200000, 394 MIPSI_DMFC1 = 0x44200000,
365 395
396#if !LJ_TARGET_MIPSR6
366 MIPSI_BC1F = 0x45000000, 397 MIPSI_BC1F = 0x45000000,
367 MIPSI_BC1T = 0x45010000, 398 MIPSI_BC1T = 0x45010000,
368
369 MIPSI_C_EQ_D = 0x46200032, 399 MIPSI_C_EQ_D = 0x46200032,
370 MIPSI_C_OLT_S = 0x46000034, 400 MIPSI_C_OLT_S = 0x46000034,
371 MIPSI_C_OLT_D = 0x46200034, 401 MIPSI_C_OLT_D = 0x46200034,
372 MIPSI_C_ULT_D = 0x46200035, 402 MIPSI_C_ULT_D = 0x46200035,
373 MIPSI_C_OLE_D = 0x46200036, 403 MIPSI_C_OLE_D = 0x46200036,
374 MIPSI_C_ULE_D = 0x46200037, 404 MIPSI_C_ULE_D = 0x46200037,
405#else
406 MIPSI_BC1EQZ = 0x45200000,
407 MIPSI_BC1NEZ = 0x45a00000,
408 MIPSI_CMP_EQ_D = 0x46a00002,
409 MIPSI_CMP_LT_S = 0x46800004,
410 MIPSI_CMP_LT_D = 0x46a00004,
411#endif
412
375} MIPSIns; 413} MIPSIns;
376 414
377#endif 415#endif
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
index 1682c81e..91c12216 100644
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -83,6 +83,10 @@
83| 83|
84|.define FRET1, f0 84|.define FRET1, f0
85|.define FRET2, f2 85|.define FRET2, f2
86|
87|.define FTMP0, f20
88|.define FTMP1, f21
89|.define FTMP2, f22
86|.endif 90|.endif
87| 91|
88|// Stack layout while in interpreter. Must match with lj_frame.h. 92|// Stack layout while in interpreter. Must match with lj_frame.h.
@@ -310,10 +314,10 @@
310|.endmacro 314|.endmacro
311| 315|
312|// Assumes DISPATCH is relative to GL. 316|// Assumes DISPATCH is relative to GL.
313#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) 317#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
314#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) 318#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
315#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) 319#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch))
316#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) 320#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name)
317| 321|
318#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) 322#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
319| 323|
@@ -492,8 +496,15 @@ static void build_subroutines(BuildCtx *ctx)
492 |7: // Less results wanted. 496 |7: // Less results wanted.
493 | subu TMP0, RD, TMP2 497 | subu TMP0, RD, TMP2
494 | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it. 498 | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it.
499 |.if MIPSR6
500 | selnez TMP0, TMP0, TMP2 // LUA_MULTRET+1 case?
501 | seleqz BASE, BASE, TMP2
502 | b <3
503 |. or BASE, BASE, TMP0
504 |.else
495 | b <3 505 | b <3
496 |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? 506 |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case?
507 |.endif
497 | 508 |
498 |8: // Corner case: need to grow stack for filling up results. 509 |8: // Corner case: need to grow stack for filling up results.
499 | // This can happen if: 510 | // This can happen if:
@@ -1121,11 +1132,16 @@ static void build_subroutines(BuildCtx *ctx)
1121 |.endmacro 1132 |.endmacro
1122 | 1133 |
1123 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! 1134 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot!
1135 |// MIPSR6: no delay slot, but a forbidden slot.
1124 |.macro ffgccheck 1136 |.macro ffgccheck
1125 | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) 1137 | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH)
1126 | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) 1138 | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
1127 | dsubu AT, TMP0, TMP1 1139 | dsubu AT, TMP0, TMP1
1140 |.if MIPSR6
1141 | bgezalc AT, ->fff_gcstep
1142 |.else
1128 | bgezal AT, ->fff_gcstep 1143 | bgezal AT, ->fff_gcstep
1144 |.endif
1129 |.endmacro 1145 |.endmacro
1130 | 1146 |
1131 |//-- Base library: checks ----------------------------------------------- 1147 |//-- Base library: checks -----------------------------------------------
@@ -1153,7 +1169,13 @@ static void build_subroutines(BuildCtx *ctx)
1153 | sltu TMP1, TISNUM, TMP0 1169 | sltu TMP1, TISNUM, TMP0
1154 | not TMP2, TMP0 1170 | not TMP2, TMP0
1155 | li TMP3, ~LJ_TISNUM 1171 | li TMP3, ~LJ_TISNUM
1172 |.if MIPSR6
1173 | selnez TMP2, TMP2, TMP1
1174 | seleqz TMP3, TMP3, TMP1
1175 | or TMP2, TMP2, TMP3
1176 |.else
1156 | movz TMP2, TMP3, TMP1 1177 | movz TMP2, TMP3, TMP1
1178 |.endif
1157 | dsll TMP2, TMP2, 3 1179 | dsll TMP2, TMP2, 3
1158 | daddu TMP2, CFUNC:RB, TMP2 1180 | daddu TMP2, CFUNC:RB, TMP2
1159 | b ->fff_restv 1181 | b ->fff_restv
@@ -1165,7 +1187,11 @@ static void build_subroutines(BuildCtx *ctx)
1165 | gettp TMP2, CARG1 1187 | gettp TMP2, CARG1
1166 | daddiu TMP0, TMP2, -LJ_TTAB 1188 | daddiu TMP0, TMP2, -LJ_TTAB
1167 | daddiu TMP1, TMP2, -LJ_TUDATA 1189 | daddiu TMP1, TMP2, -LJ_TUDATA
1190 |.if MIPSR6
1191 | selnez TMP0, TMP1, TMP0
1192 |.else
1168 | movn TMP0, TMP1, TMP0 1193 | movn TMP0, TMP1, TMP0
1194 |.endif
1169 | bnez TMP0, >6 1195 | bnez TMP0, >6
1170 |. cleartp TAB:CARG1 1196 |. cleartp TAB:CARG1
1171 |1: // Field metatable must be at same offset for GCtab and GCudata! 1197 |1: // Field metatable must be at same offset for GCtab and GCudata!
@@ -1204,7 +1230,13 @@ static void build_subroutines(BuildCtx *ctx)
1204 | 1230 |
1205 |6: 1231 |6:
1206 | sltiu AT, TMP2, LJ_TISNUM 1232 | sltiu AT, TMP2, LJ_TISNUM
1233 |.if MIPSR6
1234 | selnez TMP0, TISNUM, AT
1235 | seleqz AT, TMP2, AT
1236 | or TMP2, TMP0, AT
1237 |.else
1207 | movn TMP2, TISNUM, AT 1238 | movn TMP2, TISNUM, AT
1239 |.endif
1208 | dsll TMP2, TMP2, 3 1240 | dsll TMP2, TMP2, 3
1209 | dsubu TMP0, DISPATCH, TMP2 1241 | dsubu TMP0, DISPATCH, TMP2
1210 | b <2 1242 | b <2
@@ -1266,8 +1298,13 @@ static void build_subroutines(BuildCtx *ctx)
1266 | or TMP0, TMP0, TMP1 1298 | or TMP0, TMP0, TMP1
1267 | bnez TMP0, ->fff_fallback 1299 | bnez TMP0, ->fff_fallback
1268 |. sd BASE, L->base // Add frame since C call can throw. 1300 |. sd BASE, L->base // Add frame since C call can throw.
1301 |.if MIPSR6
1302 | sd PC, SAVE_PC // Redundant (but a defined value).
1303 | ffgccheck
1304 |.else
1269 | ffgccheck 1305 | ffgccheck
1270 |. sd PC, SAVE_PC // Redundant (but a defined value). 1306 |. sd PC, SAVE_PC // Redundant (but a defined value).
1307 |.endif
1271 | load_got lj_strfmt_number 1308 | load_got lj_strfmt_number
1272 | move CARG1, L 1309 | move CARG1, L
1273 | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) 1310 | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
@@ -1438,8 +1475,15 @@ static void build_subroutines(BuildCtx *ctx)
1438 | addiu AT, TMP0, -LUA_YIELD 1475 | addiu AT, TMP0, -LUA_YIELD
1439 | daddu CARG3, CARG2, TMP0 1476 | daddu CARG3, CARG2, TMP0
1440 | daddiu TMP3, CARG2, 8 1477 | daddiu TMP3, CARG2, 8
1478 |.if MIPSR6
1479 | seleqz CARG2, CARG2, AT
1480 | selnez TMP3, TMP3, AT
1481 | bgtz AT, ->fff_fallback // st > LUA_YIELD?
1482 |. or CARG2, TMP3, CARG2
1483 |.else
1441 | bgtz AT, ->fff_fallback // st > LUA_YIELD? 1484 | bgtz AT, ->fff_fallback // st > LUA_YIELD?
1442 |. movn CARG2, TMP3, AT 1485 |. movn CARG2, TMP3, AT
1486 |.endif
1443 | xor TMP2, TMP2, CARG3 1487 | xor TMP2, TMP2, CARG3
1444 | bnez TMP1, ->fff_fallback // cframe != 0? 1488 | bnez TMP1, ->fff_fallback // cframe != 0?
1445 |. or AT, TMP2, TMP0 1489 |. or AT, TMP2, TMP0
@@ -1751,7 +1795,7 @@ static void build_subroutines(BuildCtx *ctx)
1751 | b ->fff_res 1795 | b ->fff_res
1752 |. li RD, (2+1)*8 1796 |. li RD, (2+1)*8
1753 | 1797 |
1754 |.macro math_minmax, name, intins, fpins 1798 |.macro math_minmax, name, intins, intinsc, fpins
1755 | .ffunc_1 name 1799 | .ffunc_1 name
1756 | daddu TMP3, BASE, NARGS8:RC 1800 | daddu TMP3, BASE, NARGS8:RC
1757 | checkint CARG1, >5 1801 | checkint CARG1, >5
@@ -1763,7 +1807,13 @@ static void build_subroutines(BuildCtx *ctx)
1763 |. sextw CARG1, CARG1 1807 |. sextw CARG1, CARG1
1764 | lw CARG2, LO(TMP2) 1808 | lw CARG2, LO(TMP2)
1765 |. slt AT, CARG1, CARG2 1809 |. slt AT, CARG1, CARG2
1810 |.if MIPSR6
1811 | intins TMP1, CARG2, AT
1812 | intinsc CARG1, CARG1, AT
1813 | or CARG1, CARG1, TMP1
1814 |.else
1766 | intins CARG1, CARG2, AT 1815 | intins CARG1, CARG2, AT
1816 |.endif
1767 | daddiu TMP2, TMP2, 8 1817 | daddiu TMP2, TMP2, 8
1768 | zextw CARG1, CARG1 1818 | zextw CARG1, CARG1
1769 | b <1 1819 | b <1
@@ -1799,13 +1849,23 @@ static void build_subroutines(BuildCtx *ctx)
1799 |. nop 1849 |. nop
1800 |7: 1850 |7:
1801 |.if FPU 1851 |.if FPU
1852 |.if MIPSR6
1853 | fpins FRET1, FRET1, FARG1
1854 |.else
1802 | c.olt.d FRET1, FARG1 1855 | c.olt.d FRET1, FARG1
1803 | fpins FRET1, FARG1 1856 | fpins FRET1, FARG1
1857 |.endif
1804 |.else 1858 |.else
1805 | bal ->vm_sfcmpolt 1859 | bal ->vm_sfcmpolt
1806 |. nop 1860 |. nop
1861 |.if MIPSR6
1862 | intins AT, CARG2, CRET1
1863 | intinsc CARG1, CARG1, CRET1
1864 | or CARG1, CARG1, AT
1865 |.else
1807 | intins CARG1, CARG2, CRET1 1866 | intins CARG1, CARG2, CRET1
1808 |.endif 1867 |.endif
1868 |.endif
1809 | b <6 1869 | b <6
1810 |. daddiu TMP2, TMP2, 8 1870 |. daddiu TMP2, TMP2, 8
1811 | 1871 |
@@ -1825,8 +1885,13 @@ static void build_subroutines(BuildCtx *ctx)
1825 | 1885 |
1826 |.endmacro 1886 |.endmacro
1827 | 1887 |
1828 | math_minmax math_min, movz, movf.d 1888 |.if MIPSR6
1829 | math_minmax math_max, movn, movt.d 1889 | math_minmax math_min, seleqz, selnez, min.d
1890 | math_minmax math_max, selnez, seleqz, max.d
1891 |.else
1892 | math_minmax math_min, movz, _, movf.d
1893 | math_minmax math_max, movn, _, movt.d
1894 |.endif
1830 | 1895 |
1831 |//-- String library ----------------------------------------------------- 1896 |//-- String library -----------------------------------------------------
1832 | 1897 |
@@ -1851,7 +1916,9 @@ static void build_subroutines(BuildCtx *ctx)
1851 | 1916 |
1852 |.ffunc string_char // Only handle the 1-arg case here. 1917 |.ffunc string_char // Only handle the 1-arg case here.
1853 | ffgccheck 1918 | ffgccheck
1919 |.if not MIPSR6
1854 |. nop 1920 |. nop
1921 |.endif
1855 | ld CARG1, 0(BASE) 1922 | ld CARG1, 0(BASE)
1856 | gettp TMP0, CARG1 1923 | gettp TMP0, CARG1
1857 | xori AT, NARGS8:RC, 8 // Exactly 1 argument. 1924 | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
@@ -1881,7 +1948,9 @@ static void build_subroutines(BuildCtx *ctx)
1881 | 1948 |
1882 |.ffunc string_sub 1949 |.ffunc string_sub
1883 | ffgccheck 1950 | ffgccheck
1951 |.if not MIPSR6
1884 |. nop 1952 |. nop
1953 |.endif
1885 | addiu AT, NARGS8:RC, -16 1954 | addiu AT, NARGS8:RC, -16
1886 | ld TMP0, 0(BASE) 1955 | ld TMP0, 0(BASE)
1887 | bltz AT, ->fff_fallback 1956 | bltz AT, ->fff_fallback
@@ -1904,8 +1973,30 @@ static void build_subroutines(BuildCtx *ctx)
1904 | addiu TMP0, CARG2, 1 1973 | addiu TMP0, CARG2, 1
1905 | addu TMP1, CARG4, TMP0 1974 | addu TMP1, CARG4, TMP0
1906 | slt TMP3, CARG3, r0 1975 | slt TMP3, CARG3, r0
1976 |.if MIPSR6
1977 | seleqz CARG4, CARG4, AT
1978 | selnez TMP1, TMP1, AT
1979 | or CARG4, TMP1, CARG4 // if (end < 0) end += len+1
1980 |.else
1907 | movn CARG4, TMP1, AT // if (end < 0) end += len+1 1981 | movn CARG4, TMP1, AT // if (end < 0) end += len+1
1982 |.endif
1908 | addu TMP1, CARG3, TMP0 1983 | addu TMP1, CARG3, TMP0
1984 |.if MIPSR6
1985 | selnez TMP1, TMP1, TMP3
1986 | seleqz CARG3, CARG3, TMP3
1987 | or CARG3, TMP1, CARG3 // if (start < 0) start += len+1
1988 | li TMP2, 1
1989 | slt AT, CARG4, r0
1990 | slt TMP3, r0, CARG3
1991 | seleqz CARG4, CARG4, AT // if (end < 0) end = 0
1992 | selnez CARG3, CARG3, TMP3
1993 | seleqz TMP2, TMP2, TMP3
1994 | or CARG3, TMP2, CARG3 // if (start < 1) start = 1
1995 | slt AT, CARG2, CARG4
1996 | seleqz CARG4, CARG4, AT
1997 | selnez CARG2, CARG2, AT
1998 | or CARG4, CARG2, CARG4 // if (end > len) end = len
1999 |.else
1909 | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 2000 | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1
1910 | li TMP2, 1 2001 | li TMP2, 1
1911 | slt AT, CARG4, r0 2002 | slt AT, CARG4, r0
@@ -1914,6 +2005,7 @@ static void build_subroutines(BuildCtx *ctx)
1914 | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 2005 | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1
1915 | slt AT, CARG2, CARG4 2006 | slt AT, CARG2, CARG4
1916 | movn CARG4, CARG2, AT // if (end > len) end = len 2007 | movn CARG4, CARG2, AT // if (end > len) end = len
2008 |.endif
1917 | daddu CARG2, STR:CARG1, CARG3 2009 | daddu CARG2, STR:CARG1, CARG3
1918 | subu CARG3, CARG4, CARG3 // len = end - start 2010 | subu CARG3, CARG4, CARG3 // len = end - start
1919 | daddiu CARG2, CARG2, sizeof(GCstr)-1 2011 | daddiu CARG2, CARG2, sizeof(GCstr)-1
@@ -1975,7 +2067,13 @@ static void build_subroutines(BuildCtx *ctx)
1975 | slt AT, CARG1, r0 2067 | slt AT, CARG1, r0
1976 | dsrlv CRET1, TMP0, CARG3 2068 | dsrlv CRET1, TMP0, CARG3
1977 | dsubu TMP0, r0, CRET1 2069 | dsubu TMP0, r0, CRET1
2070 |.if MIPSR6
2071 | selnez TMP0, TMP0, AT
2072 | seleqz CRET1, CRET1, AT
2073 | or CRET1, CRET1, TMP0
2074 |.else
1978 | movn CRET1, TMP0, AT 2075 | movn CRET1, TMP0, AT
2076 |.endif
1979 | jr ra 2077 | jr ra
1980 |. zextw CRET1, CRET1 2078 |. zextw CRET1, CRET1
1981 |1: 2079 |1:
@@ -1998,14 +2096,28 @@ static void build_subroutines(BuildCtx *ctx)
1998 | slt AT, CARG1, r0 2096 | slt AT, CARG1, r0
1999 | dsrlv CRET1, CRET2, TMP0 2097 | dsrlv CRET1, CRET2, TMP0
2000 | dsubu CARG1, r0, CRET1 2098 | dsubu CARG1, r0, CRET1
2099 |.if MIPSR6
2100 | seleqz CRET1, CRET1, AT
2101 | selnez CARG1, CARG1, AT
2102 | or CRET1, CRET1, CARG1
2103 |.else
2001 | movn CRET1, CARG1, AT 2104 | movn CRET1, CARG1, AT
2105 |.endif
2002 | li CARG1, 64 2106 | li CARG1, 64
2003 | subu TMP0, CARG1, TMP0 2107 | subu TMP0, CARG1, TMP0
2004 | dsllv CRET2, CRET2, TMP0 // Integer check. 2108 | dsllv CRET2, CRET2, TMP0 // Integer check.
2005 | sextw AT, CRET1 2109 | sextw AT, CRET1
2006 | xor AT, CRET1, AT // Range check. 2110 | xor AT, CRET1, AT // Range check.
2007 | jr ra 2111 | jr ra
2112 |.if MIPSR6
2113 | seleqz AT, AT, CRET2
2114 | selnez CRET2, CRET2, CRET2
2115 | jr ra
2116 |. or CRET2, AT, CRET2
2117 |.else
2118 | jr ra
2008 |. movz CRET2, AT, CRET2 2119 |. movz CRET2, AT, CRET2
2120 |.endif
2009 |1: 2121 |1:
2010 | jr ra 2122 | jr ra
2011 |. li CRET2, 1 2123 |. li CRET2, 1
@@ -2515,15 +2627,22 @@ static void build_subroutines(BuildCtx *ctx)
2515 | 2627 |
2516 |// Hard-float round to integer. 2628 |// Hard-float round to integer.
2517 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. 2629 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
2630 |// MIPSR6: Modifies FTMP1, too.
2518 |.macro vm_round_hf, func 2631 |.macro vm_round_hf, func
2519 | lui TMP0, 0x4330 // Hiword of 2^52 (double). 2632 | lui TMP0, 0x4330 // Hiword of 2^52 (double).
2520 | dsll TMP0, TMP0, 32 2633 | dsll TMP0, TMP0, 32
2521 | dmtc1 TMP0, f4 2634 | dmtc1 TMP0, f4
2522 | abs.d FRET2, FARG1 // |x| 2635 | abs.d FRET2, FARG1 // |x|
2523 | dmfc1 AT, FARG1 2636 | dmfc1 AT, FARG1
2637 |.if MIPSR6
2638 | cmp.lt.d FTMP1, FRET2, f4
2639 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
2640 | bc1eqz FTMP1, >1 // Truncate only if |x| < 2^52.
2641 |.else
2524 | c.olt.d 0, FRET2, f4 2642 | c.olt.d 0, FRET2, f4
2525 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 2643 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
2526 | bc1f 0, >1 // Truncate only if |x| < 2^52. 2644 | bc1f 0, >1 // Truncate only if |x| < 2^52.
2645 |.endif
2527 |. sub.d FRET1, FRET1, f4 2646 |. sub.d FRET1, FRET1, f4
2528 | slt AT, AT, r0 2647 | slt AT, AT, r0
2529 |.if "func" == "ceil" 2648 |.if "func" == "ceil"
@@ -2534,16 +2653,38 @@ static void build_subroutines(BuildCtx *ctx)
2534 |.if "func" == "trunc" 2653 |.if "func" == "trunc"
2535 | dsll TMP0, TMP0, 32 2654 | dsll TMP0, TMP0, 32
2536 | dmtc1 TMP0, f4 2655 | dmtc1 TMP0, f4
2656 |.if MIPSR6
2657 | cmp.lt.d FTMP1, FRET2, FRET1 // |x| < result?
2658 | sub.d FRET2, FRET1, f4
2659 | sel.d FTMP1, FRET1, FRET2 // If yes, subtract +1.
2660 | dmtc1 AT, FRET1
2661 | neg.d FRET2, FTMP1
2662 | jr ra
2663 |. sel.d FRET1, FTMP1, FRET2 // Merge sign bit back in.
2664 |.else
2537 | c.olt.d 0, FRET2, FRET1 // |x| < result? 2665 | c.olt.d 0, FRET2, FRET1 // |x| < result?
2538 | sub.d FRET2, FRET1, f4 2666 | sub.d FRET2, FRET1, f4
2539 | movt.d FRET1, FRET2, 0 // If yes, subtract +1. 2667 | movt.d FRET1, FRET2, 0 // If yes, subtract +1.
2540 | neg.d FRET2, FRET1 2668 | neg.d FRET2, FRET1
2541 | jr ra 2669 | jr ra
2542 |. movn.d FRET1, FRET2, AT // Merge sign bit back in. 2670 |. movn.d FRET1, FRET2, AT // Merge sign bit back in.
2671 |.endif
2543 |.else 2672 |.else
2544 | neg.d FRET2, FRET1 2673 | neg.d FRET2, FRET1
2545 | dsll TMP0, TMP0, 32 2674 | dsll TMP0, TMP0, 32
2546 | dmtc1 TMP0, f4 2675 | dmtc1 TMP0, f4
2676 |.if MIPSR6
2677 | dmtc1 AT, FTMP1
2678 | sel.d FTMP1, FRET1, FRET2
2679 |.if "func" == "ceil"
2680 | cmp.lt.d FRET1, FTMP1, FARG1 // x > result?
2681 |.else
2682 | cmp.lt.d FRET1, FARG1, FTMP1 // x < result?
2683 |.endif
2684 | sub.d FRET2, FTMP1, f4 // If yes, subtract +-1.
2685 | jr ra
2686 |. sel.d FRET1, FTMP1, FRET2
2687 |.else
2547 | movn.d FRET1, FRET2, AT // Merge sign bit back in. 2688 | movn.d FRET1, FRET2, AT // Merge sign bit back in.
2548 |.if "func" == "ceil" 2689 |.if "func" == "ceil"
2549 | c.olt.d 0, FRET1, FARG1 // x > result? 2690 | c.olt.d 0, FRET1, FARG1 // x > result?
@@ -2554,6 +2695,7 @@ static void build_subroutines(BuildCtx *ctx)
2554 | jr ra 2695 | jr ra
2555 |. movt.d FRET1, FRET2, 0 2696 |. movt.d FRET1, FRET2, 0
2556 |.endif 2697 |.endif
2698 |.endif
2557 |1: 2699 |1:
2558 | jr ra 2700 | jr ra
2559 |. mov.d FRET1, FARG1 2701 |. mov.d FRET1, FARG1
@@ -2698,7 +2840,7 @@ static void build_subroutines(BuildCtx *ctx)
2698 |. li CRET1, 0 2840 |. li CRET1, 0
2699 |.endif 2841 |.endif
2700 | 2842 |
2701 |.macro sfmin_max, name, intins 2843 |.macro sfmin_max, name, intins, intinsc
2702 |->vm_sf .. name: 2844 |->vm_sf .. name:
2703 |.if JIT and not FPU 2845 |.if JIT and not FPU
2704 | move TMP2, ra 2846 | move TMP2, ra
@@ -2707,13 +2849,25 @@ static void build_subroutines(BuildCtx *ctx)
2707 | move ra, TMP2 2849 | move ra, TMP2
2708 | move TMP0, CRET1 2850 | move TMP0, CRET1
2709 | move CRET1, CARG1 2851 | move CRET1, CARG1
2852 |.if MIPSR6
2853 | intins CRET1, CRET1, TMP0
2854 | intinsc TMP0, CARG2, TMP0
2855 | jr ra
2856 |. or CRET1, CRET1, TMP0
2857 |.else
2710 | jr ra 2858 | jr ra
2711 |. intins CRET1, CARG2, TMP0 2859 |. intins CRET1, CARG2, TMP0
2712 |.endif 2860 |.endif
2861 |.endif
2713 |.endmacro 2862 |.endmacro
2714 | 2863 |
2715 | sfmin_max min, movz 2864 |.if MIPSR6
2716 | sfmin_max max, movn 2865 | sfmin_max min, selnez, seleqz
2866 | sfmin_max max, seleqz, selnez
2867 |.else
2868 | sfmin_max min, movz, _
2869 | sfmin_max max, movn, _
2870 |.endif
2717 | 2871 |
2718 |//----------------------------------------------------------------------- 2872 |//-----------------------------------------------------------------------
2719 |//-- Miscellaneous functions -------------------------------------------- 2873 |//-- Miscellaneous functions --------------------------------------------
@@ -2882,7 +3036,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2882 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3036 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2883 | slt AT, CARG1, CARG2 3037 | slt AT, CARG1, CARG2
2884 | addu TMP2, TMP2, TMP3 3038 | addu TMP2, TMP2, TMP3
3039 |.if MIPSR6
3040 | movop TMP2, TMP2, AT
3041 |.else
2885 | movop TMP2, r0, AT 3042 | movop TMP2, r0, AT
3043 |.endif
2886 |1: 3044 |1:
2887 | daddu PC, PC, TMP2 3045 | daddu PC, PC, TMP2
2888 | ins_next 3046 | ins_next
@@ -2900,16 +3058,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2900 |.endif 3058 |.endif
2901 |3: // RA and RD are both numbers. 3059 |3: // RA and RD are both numbers.
2902 |.if FPU 3060 |.if FPU
2903 | fcomp f20, f22 3061 |.if MIPSR6
3062 | fcomp FTMP0, FTMP0, FTMP2
3063 | addu TMP2, TMP2, TMP3
3064 | mfc1 TMP3, FTMP0
3065 | b <1
3066 |. fmovop TMP2, TMP2, TMP3
3067 |.else
3068 | fcomp FTMP0, FTMP2
2904 | addu TMP2, TMP2, TMP3 3069 | addu TMP2, TMP2, TMP3
2905 | b <1 3070 | b <1
2906 |. fmovop TMP2, r0 3071 |. fmovop TMP2, r0
3072 |.endif
2907 |.else 3073 |.else
2908 | bal sfcomp 3074 | bal sfcomp
2909 |. addu TMP2, TMP2, TMP3 3075 |. addu TMP2, TMP2, TMP3
2910 | b <1 3076 | b <1
3077 |.if MIPSR6
3078 |. movop TMP2, TMP2, CRET1
3079 |.else
2911 |. movop TMP2, r0, CRET1 3080 |. movop TMP2, r0, CRET1
2912 |.endif 3081 |.endif
3082 |.endif
2913 | 3083 |
2914 |4: // RA is a number, RD is not a number. 3084 |4: // RA is a number, RD is not a number.
2915 | bne CARG4, TISNUM, ->vmeta_comp 3085 | bne CARG4, TISNUM, ->vmeta_comp
@@ -2956,15 +3126,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2956 |.endif 3126 |.endif
2957 |.endmacro 3127 |.endmacro
2958 | 3128 |
3129 |.if MIPSR6
3130 if (op == BC_ISLT) {
3131 | bc_comp FTMP0, FTMP2, CARG1, CARG2, selnez, selnez, cmp.lt.d, ->vm_sfcmpolt
3132 } else if (op == BC_ISGE) {
3133 | bc_comp FTMP0, FTMP2, CARG1, CARG2, seleqz, seleqz, cmp.lt.d, ->vm_sfcmpolt
3134 } else if (op == BC_ISLE) {
3135 | bc_comp FTMP2, FTMP0, CARG2, CARG1, seleqz, seleqz, cmp.ult.d, ->vm_sfcmpult
3136 } else {
3137 | bc_comp FTMP2, FTMP0, CARG2, CARG1, selnez, selnez, cmp.ult.d, ->vm_sfcmpult
3138 }
3139 |.else
2959 if (op == BC_ISLT) { 3140 if (op == BC_ISLT) {
2960 | bc_comp f20, f22, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt 3141 | bc_comp FTMP0, FTMP2, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt
2961 } else if (op == BC_ISGE) { 3142 } else if (op == BC_ISGE) {
2962 | bc_comp f20, f22, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt 3143 | bc_comp FTMP0, FTMP2, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt
2963 } else if (op == BC_ISLE) { 3144 } else if (op == BC_ISLE) {
2964 | bc_comp f22, f20, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult 3145 | bc_comp FTMP2, FTMP0, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult
2965 } else { 3146 } else {
2966 | bc_comp f22, f20, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult 3147 | bc_comp FTMP2, FTMP0, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult
2967 } 3148 }
3149 |.endif
2968 break; 3150 break;
2969 3151
2970 case BC_ISEQV: case BC_ISNEV: 3152 case BC_ISEQV: case BC_ISNEV:
@@ -3010,7 +3192,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3010 |2: // Check if the tags are the same and it's a table or userdata. 3192 |2: // Check if the tags are the same and it's a table or userdata.
3011 | xor AT, CARG3, CARG4 // Same type? 3193 | xor AT, CARG3, CARG4 // Same type?
3012 | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? 3194 | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata?
3195 |.if MIPSR6
3196 | seleqz TMP0, TMP0, AT
3197 |.else
3013 | movn TMP0, r0, AT 3198 | movn TMP0, r0, AT
3199 |.endif
3014 if (vk) { 3200 if (vk) {
3015 | beqz TMP0, <1 3201 | beqz TMP0, <1
3016 } else { 3202 } else {
@@ -3060,11 +3246,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3060 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3246 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3061 | xor TMP1, CARG1, CARG2 3247 | xor TMP1, CARG1, CARG2
3062 | addu TMP2, TMP2, TMP3 3248 | addu TMP2, TMP2, TMP3
3249 |.if MIPSR6
3250 if (vk) {
3251 | seleqz TMP2, TMP2, TMP1
3252 } else {
3253 | selnez TMP2, TMP2, TMP1
3254 }
3255 |.else
3063 if (vk) { 3256 if (vk) {
3064 | movn TMP2, r0, TMP1 3257 | movn TMP2, r0, TMP1
3065 } else { 3258 } else {
3066 | movz TMP2, r0, TMP1 3259 | movz TMP2, r0, TMP1
3067 } 3260 }
3261 |.endif
3068 | daddu PC, PC, TMP2 3262 | daddu PC, PC, TMP2
3069 | ins_next 3263 | ins_next
3070 break; 3264 break;
@@ -3091,6 +3285,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3091 | bne CARG4, TISNUM, >6 3285 | bne CARG4, TISNUM, >6
3092 |. addu TMP2, TMP2, TMP3 3286 |. addu TMP2, TMP2, TMP3
3093 | xor AT, CARG1, CARG2 3287 | xor AT, CARG1, CARG2
3288 |.if MIPSR6
3289 if (vk) {
3290 | seleqz TMP2, TMP2, AT
3291 |1:
3292 | daddu PC, PC, TMP2
3293 |2:
3294 } else {
3295 | selnez TMP2, TMP2, AT
3296 |1:
3297 |2:
3298 | daddu PC, PC, TMP2
3299 }
3300 |.else
3094 if (vk) { 3301 if (vk) {
3095 | movn TMP2, r0, AT 3302 | movn TMP2, r0, AT
3096 |1: 3303 |1:
@@ -3102,6 +3309,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3102 |2: 3309 |2:
3103 | daddu PC, PC, TMP2 3310 | daddu PC, PC, TMP2
3104 } 3311 }
3312 |.endif
3105 | ins_next 3313 | ins_next
3106 | 3314 |
3107 |3: // RA is not an integer. 3315 |3: // RA is not an integer.
@@ -3114,30 +3322,49 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3114 |. addu TMP2, TMP2, TMP3 3322 |. addu TMP2, TMP2, TMP3
3115 | sltu AT, CARG4, TISNUM 3323 | sltu AT, CARG4, TISNUM
3116 |.if FPU 3324 |.if FPU
3117 | ldc1 f20, 0(RA) 3325 | ldc1 FTMP0, 0(RA)
3118 | ldc1 f22, 0(RD) 3326 | ldc1 FTMP2, 0(RD)
3119 |.endif 3327 |.endif
3120 | beqz AT, >5 3328 | beqz AT, >5
3121 |. nop 3329 |. nop
3122 |4: // RA and RD are both numbers. 3330 |4: // RA and RD are both numbers.
3123 |.if FPU 3331 |.if FPU
3124 | c.eq.d f20, f22 3332 |.if MIPSR6
3333 | cmp.eq.d FTMP0, FTMP0, FTMP2
3334 | dmfc1 TMP1, FTMP0
3335 | b <1
3336 if (vk) {
3337 |. selnez TMP2, TMP2, TMP1
3338 } else {
3339 |. seleqz TMP2, TMP2, TMP1
3340 }
3341 |.else
3342 | c.eq.d FTMP0, FTMP2
3125 | b <1 3343 | b <1
3126 if (vk) { 3344 if (vk) {
3127 |. movf TMP2, r0 3345 |. movf TMP2, r0
3128 } else { 3346 } else {
3129 |. movt TMP2, r0 3347 |. movt TMP2, r0
3130 } 3348 }
3349 |.endif
3131 |.else 3350 |.else
3132 | bal ->vm_sfcmpeq 3351 | bal ->vm_sfcmpeq
3133 |. nop 3352 |. nop
3134 | b <1 3353 | b <1
3354 |.if MIPSR6
3355 if (vk) {
3356 |. selnez TMP2, TMP2, CRET1
3357 } else {
3358 |. seleqz TMP2, TMP2, CRET1
3359 }
3360 |.else
3135 if (vk) { 3361 if (vk) {
3136 |. movz TMP2, r0, CRET1 3362 |. movz TMP2, r0, CRET1
3137 } else { 3363 } else {
3138 |. movn TMP2, r0, CRET1 3364 |. movn TMP2, r0, CRET1
3139 } 3365 }
3140 |.endif 3366 |.endif
3367 |.endif
3141 | 3368 |
3142 |5: // RA is a number, RD is not a number. 3369 |5: // RA is a number, RD is not a number.
3143 |.if FFI 3370 |.if FFI
@@ -3147,9 +3374,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3147 |.endif 3374 |.endif
3148 | // RA is a number, RD is an integer. Convert RD to a number. 3375 | // RA is a number, RD is an integer. Convert RD to a number.
3149 |.if FPU 3376 |.if FPU
3150 |. lwc1 f22, LO(RD) 3377 |. lwc1 FTMP2, LO(RD)
3151 | b <4 3378 | b <4
3152 |. cvt.d.w f22, f22 3379 |. cvt.d.w FTMP2, FTMP2
3153 |.else 3380 |.else
3154 |. sextw CARG2, CARG2 3381 |. sextw CARG2, CARG2
3155 | bal ->vm_sfi2d_2 3382 | bal ->vm_sfi2d_2
@@ -3167,10 +3394,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3167 |.endif 3394 |.endif
3168 | // RA is an integer, RD is a number. Convert RA to a number. 3395 | // RA is an integer, RD is a number. Convert RA to a number.
3169 |.if FPU 3396 |.if FPU
3170 |. lwc1 f20, LO(RA) 3397 |. lwc1 FTMP0, LO(RA)
3171 | ldc1 f22, 0(RD) 3398 | ldc1 FTMP2, 0(RD)
3172 | b <4 3399 | b <4
3173 | cvt.d.w f20, f20 3400 | cvt.d.w FTMP0, FTMP0
3174 |.else 3401 |.else
3175 |. sextw CARG1, CARG1 3402 |. sextw CARG1, CARG1
3176 | bal ->vm_sfi2d_1 3403 | bal ->vm_sfi2d_1
@@ -3213,11 +3440,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3213 | decode_RD4b TMP2 3440 | decode_RD4b TMP2
3214 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3441 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3215 | addu TMP2, TMP2, TMP3 3442 | addu TMP2, TMP2, TMP3
3443 |.if MIPSR6
3444 if (vk) {
3445 | seleqz TMP2, TMP2, TMP0
3446 } else {
3447 | selnez TMP2, TMP2, TMP0
3448 }
3449 |.else
3216 if (vk) { 3450 if (vk) {
3217 | movn TMP2, r0, TMP0 3451 | movn TMP2, r0, TMP0
3218 } else { 3452 } else {
3219 | movz TMP2, r0, TMP0 3453 | movz TMP2, r0, TMP0
3220 } 3454 }
3455 |.endif
3221 | daddu PC, PC, TMP2 3456 | daddu PC, PC, TMP2
3222 | ins_next 3457 | ins_next
3223 break; 3458 break;
@@ -3236,11 +3471,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3236 | decode_RD4b TMP2 3471 | decode_RD4b TMP2
3237 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3472 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3238 | addu TMP2, TMP2, TMP3 3473 | addu TMP2, TMP2, TMP3
3474 |.if MIPSR6
3475 if (op == BC_IST) {
3476 | selnez TMP2, TMP2, TMP0;
3477 } else {
3478 | seleqz TMP2, TMP2, TMP0;
3479 }
3480 |.else
3239 if (op == BC_IST) { 3481 if (op == BC_IST) {
3240 | movz TMP2, r0, TMP0 3482 | movz TMP2, r0, TMP0
3241 } else { 3483 } else {
3242 | movn TMP2, r0, TMP0 3484 | movn TMP2, r0, TMP0
3243 } 3485 }
3486 |.endif
3244 | daddu PC, PC, TMP2 3487 | daddu PC, PC, TMP2
3245 } else { 3488 } else {
3246 | ld CRET1, 0(RD) 3489 | ld CRET1, 0(RD)
@@ -3483,9 +3726,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3483 | bltz TMP1, ->vmeta_arith 3726 | bltz TMP1, ->vmeta_arith
3484 |. daddu RA, BASE, RA 3727 |. daddu RA, BASE, RA
3485 |.elif "intins" == "mult" 3728 |.elif "intins" == "mult"
3729 |.if MIPSR6
3730 |. nop
3731 | mul CRET1, CARG3, CARG4
3732 | muh TMP2, CARG3, CARG4
3733 |.else
3486 |. intins CARG3, CARG4 3734 |. intins CARG3, CARG4
3487 | mflo CRET1 3735 | mflo CRET1
3488 | mfhi TMP2 3736 | mfhi TMP2
3737 |.endif
3489 | sra TMP1, CRET1, 31 3738 | sra TMP1, CRET1, 31
3490 | bne TMP1, TMP2, ->vmeta_arith 3739 | bne TMP1, TMP2, ->vmeta_arith
3491 |. daddu RA, BASE, RA 3740 |. daddu RA, BASE, RA
@@ -3508,16 +3757,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3508 |.endif 3757 |.endif
3509 | 3758 |
3510 |5: // Check for two numbers. 3759 |5: // Check for two numbers.
3511 | .FPU ldc1 f20, 0(RB) 3760 | .FPU ldc1 FTMP0, 0(RB)
3512 | sltu AT, TMP0, TISNUM 3761 | sltu AT, TMP0, TISNUM
3513 | sltu TMP0, TMP1, TISNUM 3762 | sltu TMP0, TMP1, TISNUM
3514 | .FPU ldc1 f22, 0(RC) 3763 | .FPU ldc1 FTMP2, 0(RC)
3515 | and AT, AT, TMP0 3764 | and AT, AT, TMP0
3516 | beqz AT, ->vmeta_arith 3765 | beqz AT, ->vmeta_arith
3517 |. daddu RA, BASE, RA 3766 |. daddu RA, BASE, RA
3518 | 3767 |
3519 |.if FPU 3768 |.if FPU
3520 | fpins FRET1, f20, f22 3769 | fpins FRET1, FTMP0, FTMP2
3521 |.elif "fpcall" == "sfpmod" 3770 |.elif "fpcall" == "sfpmod"
3522 | sfpmod 3771 | sfpmod
3523 |.else 3772 |.else
@@ -3847,7 +4096,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3847 | li TMP0, 0x801 4096 | li TMP0, 0x801
3848 | addiu AT, CARG2, -0x7ff 4097 | addiu AT, CARG2, -0x7ff
3849 | srl CARG3, RD, 14 4098 | srl CARG3, RD, 14
4099 |.if MIPSR6
4100 | seleqz TMP0, TMP0, AT
4101 | selnez CARG2, CARG2, AT
4102 | or CARG2, CARG2, TMP0
4103 |.else
3850 | movz CARG2, TMP0, AT 4104 | movz CARG2, TMP0, AT
4105 |.endif
3851 | // (lua_State *L, int32_t asize, uint32_t hbits) 4106 | // (lua_State *L, int32_t asize, uint32_t hbits)
3852 | call_intern lj_tab_new 4107 | call_intern lj_tab_new
3853 |. move CARG1, L 4108 |. move CARG1, L
@@ -4128,7 +4383,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4128 | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 4383 | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4129 | settp STR:RC, TMP3 // Tagged key to look for. 4384 | settp STR:RC, TMP3 // Tagged key to look for.
4130 |.if FPU 4385 |.if FPU
4131 | ldc1 f20, 0(RA) 4386 | ldc1 FTMP0, 0(RA)
4132 |.else 4387 |.else
4133 | ld CRET1, 0(RA) 4388 | ld CRET1, 0(RA)
4134 |.endif 4389 |.endif
@@ -4144,7 +4399,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4144 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4399 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4145 | bnez AT, >7 4400 | bnez AT, >7
4146 |.if FPU 4401 |.if FPU
4147 |. sdc1 f20, NODE:TMP2->val 4402 |. sdc1 FTMP0, NODE:TMP2->val
4148 |.else 4403 |.else
4149 |. sd CRET1, NODE:TMP2->val 4404 |. sd CRET1, NODE:TMP2->val
4150 |.endif 4405 |.endif
@@ -4185,7 +4440,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4185 | ld BASE, L->base 4440 | ld BASE, L->base
4186 |.if FPU 4441 |.if FPU
4187 | b <3 // No 2nd write barrier needed. 4442 | b <3 // No 2nd write barrier needed.
4188 |. sdc1 f20, 0(CRET1) 4443 |. sdc1 FTMP0, 0(CRET1)
4189 |.else 4444 |.else
4190 | ld CARG1, 0(RA) 4445 | ld CARG1, 0(RA)
4191 | b <3 // No 2nd write barrier needed. 4446 | b <3 // No 2nd write barrier needed.
@@ -4528,7 +4783,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4528 | ld CARG1, 0(RC) 4783 | ld CARG1, 0(RC)
4529 | sltu AT, RC, TMP3 4784 | sltu AT, RC, TMP3
4530 | daddiu RC, RC, 8 4785 | daddiu RC, RC, 8
4786 |.if MIPSR6
4787 | selnez CARG1, CARG1, AT
4788 | seleqz AT, TISNIL, AT
4789 | or CARG1, CARG1, AT
4790 |.else
4531 | movz CARG1, TISNIL, AT 4791 | movz CARG1, TISNIL, AT
4792 |.endif
4532 | sd CARG1, 0(RA) 4793 | sd CARG1, 0(RA)
4533 | sltu AT, RA, TMP2 4794 | sltu AT, RA, TMP2
4534 | bnez AT, <1 4795 | bnez AT, <1
@@ -4717,7 +4978,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4717 | dext AT, CRET1, 31, 0 4978 | dext AT, CRET1, 31, 0
4718 | slt CRET1, CARG2, CARG3 4979 | slt CRET1, CARG2, CARG3
4719 | slt TMP1, CARG3, CARG2 4980 | slt TMP1, CARG3, CARG2
4981 |.if MIPSR6
4982 | selnez TMP1, TMP1, AT
4983 | seleqz CRET1, CRET1, AT
4984 | or CRET1, CRET1, TMP1
4985 |.else
4720 | movn CRET1, TMP1, AT 4986 | movn CRET1, TMP1, AT
4987 |.endif
4721 } else { 4988 } else {
4722 | bne CARG3, TISNUM, >5 4989 | bne CARG3, TISNUM, >5
4723 |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type 4990 |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type
@@ -4733,20 +5000,34 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4733 | slt CRET1, CRET1, CARG1 5000 | slt CRET1, CRET1, CARG1
4734 | slt AT, CARG2, r0 5001 | slt AT, CARG2, r0
4735 | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. 5002 | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
5003 |.if MIPSR6
5004 | selnez TMP1, TMP1, AT
5005 | seleqz CRET1, CRET1, AT
5006 | or CRET1, CRET1, TMP1
5007 |.else
4736 | movn CRET1, TMP1, AT 5008 | movn CRET1, TMP1, AT
5009 |.endif
4737 | or CRET1, CRET1, TMP0 5010 | or CRET1, CRET1, TMP0
4738 | zextw CARG1, CARG1 5011 | zextw CARG1, CARG1
4739 | settp CARG1, TISNUM 5012 | settp CARG1, TISNUM
4740 } 5013 }
4741 |1: 5014 |1:
4742 if (op == BC_FORI) { 5015 if (op == BC_FORI) {
5016 |.if MIPSR6
5017 | selnez TMP2, TMP2, CRET1
5018 |.else
4743 | movz TMP2, r0, CRET1 5019 | movz TMP2, r0, CRET1
5020 |.endif
4744 | daddu PC, PC, TMP2 5021 | daddu PC, PC, TMP2
4745 } else if (op == BC_JFORI) { 5022 } else if (op == BC_JFORI) {
4746 | daddu PC, PC, TMP2 5023 | daddu PC, PC, TMP2
4747 | lhu RD, -4+OFS_RD(PC) 5024 | lhu RD, -4+OFS_RD(PC)
4748 } else if (op == BC_IFORL) { 5025 } else if (op == BC_IFORL) {
5026 |.if MIPSR6
5027 | seleqz TMP2, TMP2, CRET1
5028 |.else
4749 | movn TMP2, r0, CRET1 5029 | movn TMP2, r0, CRET1
5030 |.endif
4750 | daddu PC, PC, TMP2 5031 | daddu PC, PC, TMP2
4751 } 5032 }
4752 if (vk) { 5033 if (vk) {
@@ -4776,6 +5057,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4776 | and AT, AT, TMP0 5057 | and AT, AT, TMP0
4777 | beqz AT, ->vmeta_for 5058 | beqz AT, ->vmeta_for
4778 |. slt TMP3, TMP3, r0 5059 |. slt TMP3, TMP3, r0
5060 |.if MIPSR6
5061 | dmtc1 TMP3, FTMP2
5062 | cmp.lt.d FTMP0, f0, f2
5063 | cmp.lt.d FTMP1, f2, f0
5064 | sel.d FTMP2, FTMP1, FTMP0
5065 | b <1
5066 |. dmfc1 CRET1, FTMP2
5067 |.else
4779 | c.ole.d 0, f0, f2 5068 | c.ole.d 0, f0, f2
4780 | c.ole.d 1, f2, f0 5069 | c.ole.d 1, f2, f0
4781 | li CRET1, 1 5070 | li CRET1, 1
@@ -4783,12 +5072,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4783 | movt AT, r0, 1 5072 | movt AT, r0, 1
4784 | b <1 5073 | b <1
4785 |. movn CRET1, AT, TMP3 5074 |. movn CRET1, AT, TMP3
5075 |.endif
4786 } else { 5076 } else {
4787 | ldc1 f0, FORL_IDX*8(RA) 5077 | ldc1 f0, FORL_IDX*8(RA)
4788 | ldc1 f4, FORL_STEP*8(RA) 5078 | ldc1 f4, FORL_STEP*8(RA)
4789 | ldc1 f2, FORL_STOP*8(RA) 5079 | ldc1 f2, FORL_STOP*8(RA)
4790 | ld TMP3, FORL_STEP*8(RA) 5080 | ld TMP3, FORL_STEP*8(RA)
4791 | add.d f0, f0, f4 5081 | add.d f0, f0, f4
5082 |.if MIPSR6
5083 | slt TMP3, TMP3, r0
5084 | dmtc1 TMP3, FTMP2
5085 | cmp.lt.d FTMP0, f0, f2
5086 | cmp.lt.d FTMP1, f2, f0
5087 | sel.d FTMP2, FTMP1, FTMP0
5088 | dmfc1 CRET1, FTMP2
5089 if (op == BC_IFORL) {
5090 | seleqz TMP2, TMP2, CRET1
5091 | daddu PC, PC, TMP2
5092 }
5093 |.else
4792 | c.ole.d 0, f0, f2 5094 | c.ole.d 0, f0, f2
4793 | c.ole.d 1, f2, f0 5095 | c.ole.d 1, f2, f0
4794 | slt TMP3, TMP3, r0 5096 | slt TMP3, TMP3, r0
@@ -4801,6 +5103,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4801 | movn TMP2, r0, CRET1 5103 | movn TMP2, r0, CRET1
4802 | daddu PC, PC, TMP2 5104 | daddu PC, PC, TMP2
4803 } 5105 }
5106 |.endif
4804 | sdc1 f0, FORL_IDX*8(RA) 5107 | sdc1 f0, FORL_IDX*8(RA)
4805 | ins_next1 5108 | ins_next1
4806 | b <2 5109 | b <2
@@ -4976,8 +5279,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4976 | ld TMP0, 0(RA) 5279 | ld TMP0, 0(RA)
4977 | sltu AT, RA, RC // Less args than parameters? 5280 | sltu AT, RA, RC // Less args than parameters?
4978 | move CARG1, TMP0 5281 | move CARG1, TMP0
5282 |.if MIPSR6
5283 | selnez TMP0, TMP0, AT
5284 | seleqz TMP3, TISNIL, AT
5285 | or TMP0, TMP0, TMP3
5286 | seleqz TMP3, CARG1, AT
5287 | selnez CARG1, TISNIL, AT
5288 | or CARG1, CARG1, TMP3
5289 |.else
4979 | movz TMP0, TISNIL, AT // Clear missing parameters. 5290 | movz TMP0, TISNIL, AT // Clear missing parameters.
4980 | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). 5291 | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC).
5292 |.endif
4981 | addiu TMP2, TMP2, -1 5293 | addiu TMP2, TMP2, -1
4982 | sd TMP0, 16(TMP1) 5294 | sd TMP0, 16(TMP1)
4983 | daddiu TMP1, TMP1, 8 5295 | daddiu TMP1, TMP1, 8