aboutsummaryrefslogtreecommitdiff
path: root/src/lj_snap.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_snap.c')
-rw-r--r--src/lj_snap.c330
1 files changed, 317 insertions, 13 deletions
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 33edc8a6..1e6f10d0 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -11,6 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_tab.h"
14#include "lj_state.h" 15#include "lj_state.h"
15#include "lj_frame.h" 16#include "lj_frame.h"
16#include "lj_bc.h" 17#include "lj_bc.h"
@@ -20,10 +21,17 @@
20#include "lj_trace.h" 21#include "lj_trace.h"
21#include "lj_snap.h" 22#include "lj_snap.h"
22#include "lj_target.h" 23#include "lj_target.h"
24#if LJ_HASFFI
25#include "lj_ctype.h"
26#include "lj_cdata.h"
27#endif
23 28
24/* Some local macros to save typing. Undef'd at the end. */ 29/* Some local macros to save typing. Undef'd at the end. */
25#define IR(ref) (&J->cur.ir[(ref)]) 30#define IR(ref) (&J->cur.ir[(ref)])
26 31
32/* Pass IR on to next optimization in chain (FOLD). */
33#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
34
27/* Emit raw IR without passing through optimizations. */ 35/* Emit raw IR without passing through optimizations. */
28#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) 36#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
29 37
@@ -370,6 +378,31 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir)
370 } 378 }
371} 379}
372 380
381/* De-duplicate parent reference. */
382static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
383{
384 MSize j;
385 for (j = 0; j < nmax; j++)
386 if (snap_ref(map[j]) == ref)
387 return J->slot[snap_slot(map[j])];
388 return 0;
389}
390
391/* Emit parent reference with de-duplication. */
392static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
393 BloomFilter seen, IRRef ref)
394{
395 IRIns *ir = &T->ir[ref];
396 TRef tr;
397 if (irref_isk(ref))
398 tr = snap_replay_const(J, ir);
399 else if (!regsp_used(ir->prev))
400 tr = 0;
401 else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
402 tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
403 return tr;
404}
405
373/* Replay snapshot state to setup side trace. */ 406/* Replay snapshot state to setup side trace. */
374void lj_snap_replay(jit_State *J, GCtrace *T) 407void lj_snap_replay(jit_State *J, GCtrace *T)
375{ 408{
@@ -377,6 +410,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
377 SnapEntry *map = &T->snapmap[snap->mapofs]; 410 SnapEntry *map = &T->snapmap[snap->mapofs];
378 MSize n, nent = snap->nent; 411 MSize n, nent = snap->nent;
379 BloomFilter seen = 0; 412 BloomFilter seen = 0;
413 int pass23 = 0;
380 J->framedepth = 0; 414 J->framedepth = 0;
381 /* Emit IR for slots inherited from parent snapshot. */ 415 /* Emit IR for slots inherited from parent snapshot. */
382 for (n = 0; n < nent; n++) { 416 for (n = 0; n < nent; n++) {
@@ -386,21 +420,18 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
386 IRIns *ir = &T->ir[ref]; 420 IRIns *ir = &T->ir[ref];
387 TRef tr; 421 TRef tr;
388 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ 422 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
389 if (bloomtest(seen, ref)) { 423 if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
390 MSize j; 424 goto setslot;
391 for (j = 0; j < n; j++)
392 if (snap_ref(map[j]) == ref) {
393 tr = J->slot[snap_slot(map[j])];
394 goto setslot;
395 }
396 }
397 bloomset(seen, ref); 425 bloomset(seen, ref);
398 if (irref_isk(ref)) { 426 if (irref_isk(ref)) {
399 tr = snap_replay_const(J, ir); 427 tr = snap_replay_const(J, ir);
428 } else if (!regsp_used(ir->prev)) {
429 pass23 = 1;
430 lua_assert(s != 0);
431 tr = s;
400 } else { 432 } else {
401 IRType t = irt_type(ir->t); 433 IRType t = irt_type(ir->t);
402 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; 434 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
403 lua_assert(regsp_used(ir->prev));
404 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; 435 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
405 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); 436 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
406 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); 437 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
@@ -411,13 +442,126 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
411 if ((sn & SNAP_FRAME)) 442 if ((sn & SNAP_FRAME))
412 J->baseslot = s+1; 443 J->baseslot = s+1;
413 } 444 }
445 if (pass23) {
446 IRIns *irlast = &T->ir[(snap+1)->ref];
447 lua_assert(J->exitno+1 < T->nsnap);
448 pass23 = 0;
449 /* Emit dependent PVALs. */
450 for (n = 0; n < nent; n++) {
451 SnapEntry sn = map[n];
452 IRRef refp = snap_ref(sn);
453 IRIns *ir = &T->ir[refp];
454 if (regsp_reg(ir->r) == RID_SUNK) {
455 if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
456 pass23 = 1;
457 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
458 ir->o == IR_CNEW || ir->o == IR_CNEWI);
459 if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
460 if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
461 if (LJ_HASFFI && ir->o == IR_CNEWI) {
462 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
463 snap_pref(J, T, map, nent, seen, (ir+1)->op2);
464 } else {
465 IRIns *irs;
466 for (irs = ir+1; irs < irlast; irs++)
467 if (irs->r == RID_SINK && ir + irs->s == irs) {
468 if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
469 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
470 else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
471 irs+1 < irlast && (irs+1)->o == IR_HIOP)
472 snap_pref(J, T, map, nent, seen, (irs+1)->op2);
473 }
474 }
475 } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
476 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
477 J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
478 }
479 }
480 /* Replay sunk instructions. */
481 for (n = 0; pass23 && n < nent; n++) {
482 SnapEntry sn = map[n];
483 IRRef refp = snap_ref(sn);
484 IRIns *ir = &T->ir[refp];
485 if (regsp_reg(ir->r) == RID_SUNK) {
486 TRef op1, op2;
487 if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */
488 J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
489 continue;
490 }
491 op1 = ir->op1;
492 if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
493 op2 = ir->op2;
494 if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
495 if (LJ_HASFFI && ir->o == IR_CNEWI) {
496 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
497 lj_needsplit(J); /* Emit joining HIOP. */
498 op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
499 snap_pref(J, T, map, nent, seen, (ir+1)->op2));
500 }
501 J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2);
502 } else {
503 IRIns *irs;
504 TRef tr = emitir(ir->ot, op1, op2);
505 J->slot[snap_slot(sn)] = tr;
506 for (irs = ir+1; irs < irlast; irs++)
507 if (irs->r == RID_SINK && ir + irs->s == irs) {
508 IRIns *irr = &T->ir[irs->op1];
509 TRef val, key = irr->op2, tmp = tr;
510 if (irr->o != IR_FREF) {
511 IRIns *irk = &T->ir[key];
512 if (irr->o == IR_HREFK)
513 key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
514 irk->op2);
515 else
516 key = snap_replay_const(J, irk);
517 if (irr->o == IR_HREFK || irr->o == IR_AREF) {
518 IRIns *irf = &T->ir[irr->op1];
519 tmp = emitir(irf->ot, tmp, irf->op2);
520 }
521 }
522 tmp = emitir(irr->ot, tmp, key);
523 val = snap_pref(J, T, map, nent, seen, irs->op2);
524 if (val == 0) {
525 IRIns *irc = &T->ir[irs->op2];
526 lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
527 val = snap_pref(J, T, map, nent, seen, irc->op1);
528 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
529 } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
530 irs+1 < irlast && (irs+1)->o == IR_HIOP) {
531 IRType t = IRT_I64;
532 if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
533 t = IRT_NUM;
534 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
535 uint64_t k = (uint32_t)T->ir[irs->op2].i +
536 ((uint64_t)T->ir[(irs+1)->op2].i << 32);
537 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
538 lj_ir_k64_find(J, k));
539 } else {
540 val = emitir_raw(IRT(IR_HIOP, t), val,
541 snap_pref(J, T, map, nent, seen, (irs+1)->op2));
542 }
543 tmp = emitir(IRT(irs->o, t), tmp, val);
544 continue;
545 }
546 tmp = emitir(irs->ot, tmp, val);
547 }
548 }
549 }
550 }
551 }
414 J->base = J->slot + J->baseslot; 552 J->base = J->slot + J->baseslot;
415 J->maxslot = snap->nslots - J->baseslot; 553 J->maxslot = snap->nslots - J->baseslot;
416 lj_snap_add(J); 554 lj_snap_add(J);
555 if (pass23) /* Need explicit GC step _after_ initial snapshot. */
556 emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
417} 557}
418 558
419/* -- Snapshot restore ---------------------------------------------------- */ 559/* -- Snapshot restore ---------------------------------------------------- */
420 560
561static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
562 SnapNo snapno, BloomFilter rfilt,
563 IRIns *ir, TValue *o);
564
421/* Restore a value from the trace exit state. */ 565/* Restore a value from the trace exit state. */
422static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, 566static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
423 SnapNo snapno, BloomFilter rfilt, 567 SnapNo snapno, BloomFilter rfilt,
@@ -450,8 +594,12 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
450 } 594 }
451 } else { /* Restore from register. */ 595 } else { /* Restore from register. */
452 Reg r = regsp_reg(rs); 596 Reg r = regsp_reg(rs);
453 lua_assert(ra_hasreg(r)); 597 if (ra_noreg(r)) {
454 if (irt_isinteger(t)) { 598 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
599 snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
600 if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
601 return;
602 } else if (irt_isinteger(t)) {
455 setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); 603 setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
456#if !LJ_SOFTFP 604#if !LJ_SOFTFP
457 } else if (irt_isnum(t)) { 605 } else if (irt_isnum(t)) {
@@ -468,6 +616,148 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
468 } 616 }
469} 617}
470 618
619#if LJ_HASFFI
620/* Restore raw data from the trace exit state. */
621static void snap_restoredata(GCtrace *T, ExitState *ex,
622 SnapNo snapno, BloomFilter rfilt,
623 IRRef ref, void *dst, CTSize sz)
624{
625 IRIns *ir = &T->ir[ref];
626 RegSP rs = ir->prev;
627 int32_t *src;
628 union { uint64_t u64; float f; } tmp;
629 if (irref_isk(ref)) {
630 if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
631 src = mref(ir->ptr, int32_t);
632 } else if (sz == 8) {
633 tmp.u64 = (uint64_t)(uint32_t)ir->i;
634 src = (int32_t *)&tmp.u64;
635 } else {
636 src = &ir->i;
637 }
638 } else {
639 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
640 rs = snap_renameref(T, snapno, ref, rs);
641 if (ra_hasspill(regsp_spill(rs))) {
642 src = &ex->spill[regsp_spill(rs)];
643 } else {
644 Reg r = regsp_reg(rs);
645 if (ra_noreg(r)) {
646 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
647 lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
648 snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
649 *(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
650 return;
651 }
652 src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
653#if !LJ_SOFTFP
654 if (r >= RID_MAX_GPR) {
655 src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
656#if LJ_TARGET_PPC
657 if (sz == 4) { /* PPC FPRs are always doubles. */
658 tmp.f = (float)*(double *)src;
659 src = (int32_t *)&tmp.f;
660 }
661#else
662 if (LJ_BE && sz == 4) src++;
663#endif
664 }
665#endif
666 }
667 }
668 lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
669 if (sz == 4) *(int32_t *)dst = *src;
670 else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
671 else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
672 else *(int16_t *)dst = (int16_t)*src;
673}
674#endif
675
676/* Unsink allocation from the trace exit state. Unsink sunk stores. */
677static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
678 SnapNo snapno, BloomFilter rfilt,
679 IRIns *ir, TValue *o)
680{
681 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
682 ir->o == IR_CNEW || ir->o == IR_CNEWI);
683#if LJ_HASFFI
684 if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
685 CTState *cts = ctype_ctsG(J2G(J));
686 CTypeID id = (CTypeID)T->ir[ir->op1].i;
687 CTSize sz = lj_ctype_size(cts, id);
688 GCcdata *cd = lj_cdata_new(cts, id, sz);
689 setcdataV(J->L, o, cd);
690 if (ir->o == IR_CNEWI) {
691 uint8_t *p = (uint8_t *)cdataptr(cd);
692 lua_assert(sz == 4 || sz == 8);
693 if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
694 snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4);
695 if (LJ_BE) p += 4;
696 sz = 4;
697 }
698 snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
699 } else {
700 IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
701 for (irs = ir+1; irs < irlast; irs++)
702 if (irs->r == RID_SINK && ir + irs->s == irs) {
703 IRIns *iro = &T->ir[T->ir[irs->op1].op2];
704 uint8_t *p = (uint8_t *)cd;
705 CTSize szs;
706 lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
707 lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64);
708 if (irt_is64(irs->t)) szs = 8;
709 else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
710 else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
711 else szs = 4;
712 if (LJ_64 && iro->o == IR_KINT64)
713 p += (int64_t)ir_k64(iro)->u64;
714 else
715 p += iro->i;
716 lua_assert(p >= (uint8_t *)cdataptr(cd) &&
717 p + szs <= (uint8_t *)cdataptr(cd) + sz);
718 if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
719 lua_assert(szs == 4);
720 snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4);
721 if (LJ_BE) p += 4;
722 }
723 snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
724 }
725 }
726 } else
727#endif
728 {
729 IRIns *irs, *irlast;
730 GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
731 lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
732 settabV(J->L, o, t);
733 irlast = &T->ir[T->snap[snapno].ref];
734 for (irs = ir+1; irs < irlast; irs++)
735 if (irs->r == RID_SINK && ir + irs->s == irs) {
736 IRIns *irk = &T->ir[irs->op1];
737 TValue tmp, *val;
738 lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
739 irs->o == IR_FSTORE);
740 if (irk->o == IR_FREF) {
741 lua_assert(irk->op2 == IRFL_TAB_META);
742 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
743 /* NOBARRIER: The table is new (marked white). */
744 setgcref(t->metatable, obj2gco(tabV(&tmp)));
745 } else {
746 irk = &T->ir[irk->op2];
747 if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
748 lj_ir_kvalue(J->L, &tmp, irk);
749 val = lj_tab_set(J->L, t, &tmp);
750 /* NOBARRIER: The table is new (marked white). */
751 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
752 if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
753 snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
754 val->u32.hi = tmp.u32.lo;
755 }
756 }
757 }
758 }
759}
760
471/* Restore interpreter state from exit state with the help of a snapshot. */ 761/* Restore interpreter state from exit state with the help of a snapshot. */
472const BCIns *lj_snap_restore(jit_State *J, void *exptr) 762const BCIns *lj_snap_restore(jit_State *J, void *exptr)
473{ 763{
@@ -500,10 +790,23 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
500 SnapEntry sn = map[n]; 790 SnapEntry sn = map[n];
501 if (!(sn & SNAP_NORESTORE)) { 791 if (!(sn & SNAP_NORESTORE)) {
502 TValue *o = &frame[snap_slot(sn)]; 792 TValue *o = &frame[snap_slot(sn)];
503 snap_restoreval(J, T, ex, snapno, rfilt, snap_ref(sn), o); 793 IRRef ref = snap_ref(sn);
794 IRIns *ir = &T->ir[ref];
795 if (ir->r == RID_SUNK) {
796 MSize j;
797 for (j = 0; j < n; j++)
798 if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */
799 copyTV(L, o, &frame[snap_slot(map[j])]);
800 goto dupslot;
801 }
802 snap_unsink(J, T, ex, snapno, rfilt, ir, o);
803 dupslot:
804 continue;
805 }
806 snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
504 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { 807 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
505 TValue tmp; 808 TValue tmp;
506 snap_restoreval(J, T, ex, snapno, rfilt, snap_ref(sn)+1, &tmp); 809 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
507 o->u32.hi = tmp.u32.lo; 810 o->u32.hi = tmp.u32.lo;
508 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { 811 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
509 /* Overwrite tag with frame link. */ 812 /* Overwrite tag with frame link. */
@@ -528,5 +831,6 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
528 831
529#undef IR 832#undef IR
530#undef emitir_raw 833#undef emitir_raw
834#undef emitir
531 835
532#endif 836#endif