diff options
Diffstat (limited to 'src/lib_jit.c')
-rw-r--r-- | src/lib_jit.c | 169 |
1 files changed, 141 insertions, 28 deletions
diff --git a/src/lib_jit.c b/src/lib_jit.c index 6e98229e..c97b0d53 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c | |||
@@ -10,13 +10,17 @@ | |||
10 | #include "lauxlib.h" | 10 | #include "lauxlib.h" |
11 | #include "lualib.h" | 11 | #include "lualib.h" |
12 | 12 | ||
13 | #include "lj_arch.h" | ||
14 | #include "lj_obj.h" | 13 | #include "lj_obj.h" |
14 | #include "lj_gc.h" | ||
15 | #include "lj_err.h" | 15 | #include "lj_err.h" |
16 | #include "lj_debug.h" | 16 | #include "lj_debug.h" |
17 | #include "lj_str.h" | 17 | #include "lj_str.h" |
18 | #include "lj_tab.h" | 18 | #include "lj_tab.h" |
19 | #include "lj_state.h" | ||
19 | #include "lj_bc.h" | 20 | #include "lj_bc.h" |
21 | #if LJ_HASFFI | ||
22 | #include "lj_ctype.h" | ||
23 | #endif | ||
20 | #if LJ_HASJIT | 24 | #if LJ_HASJIT |
21 | #include "lj_ir.h" | 25 | #include "lj_ir.h" |
22 | #include "lj_jit.h" | 26 | #include "lj_jit.h" |
@@ -24,6 +28,7 @@ | |||
24 | #include "lj_iropt.h" | 28 | #include "lj_iropt.h" |
25 | #include "lj_target.h" | 29 | #include "lj_target.h" |
26 | #endif | 30 | #endif |
31 | #include "lj_trace.h" | ||
27 | #include "lj_dispatch.h" | 32 | #include "lj_dispatch.h" |
28 | #include "lj_vm.h" | 33 | #include "lj_vm.h" |
29 | #include "lj_vmevent.h" | 34 | #include "lj_vmevent.h" |
@@ -280,7 +285,7 @@ static GCtrace *jit_checktrace(lua_State *L) | |||
280 | /* Names of link types. ORDER LJ_TRLINK */ | 285 | /* Names of link types. ORDER LJ_TRLINK */ |
281 | static const char *const jit_trlinkname[] = { | 286 | static const char *const jit_trlinkname[] = { |
282 | "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion", | 287 | "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion", |
283 | "interpreter", "return" | 288 | "interpreter", "return", "stitch" |
284 | }; | 289 | }; |
285 | 290 | ||
286 | /* local info = jit.util.traceinfo(tr) */ | 291 | /* local info = jit.util.traceinfo(tr) */ |
@@ -333,6 +338,13 @@ LJLIB_CF(jit_util_tracek) | |||
333 | slot = ir->op2; | 338 | slot = ir->op2; |
334 | ir = &T->ir[ir->op1]; | 339 | ir = &T->ir[ir->op1]; |
335 | } | 340 | } |
341 | #if LJ_HASFFI | ||
342 | if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) { | ||
343 | ptrdiff_t oldtop = savestack(L, L->top); | ||
344 | luaopen_ffi(L); /* Load FFI library on-demand. */ | ||
345 | L->top = restorestack(L, oldtop); | ||
346 | } | ||
347 | #endif | ||
336 | lj_ir_kvalue(L, L->top-2, ir); | 348 | lj_ir_kvalue(L, L->top-2, ir); |
337 | setintV(L->top-1, (int32_t)irt_type(ir->t)); | 349 | setintV(L->top-1, (int32_t)irt_type(ir->t)); |
338 | if (slot == -1) | 350 | if (slot == -1) |
@@ -417,6 +429,12 @@ LJLIB_CF(jit_util_ircalladdr) | |||
417 | 429 | ||
418 | #include "lj_libdef.h" | 430 | #include "lj_libdef.h" |
419 | 431 | ||
432 | static int luaopen_jit_util(lua_State *L) | ||
433 | { | ||
434 | LJ_LIB_REG(L, NULL, jit_util); | ||
435 | return 1; | ||
436 | } | ||
437 | |||
420 | /* -- jit.opt module ------------------------------------------------------ */ | 438 | /* -- jit.opt module ------------------------------------------------------ */ |
421 | 439 | ||
422 | #if LJ_HASJIT | 440 | #if LJ_HASJIT |
@@ -514,6 +532,104 @@ LJLIB_CF(jit_opt_start) | |||
514 | 532 | ||
515 | #endif | 533 | #endif |
516 | 534 | ||
535 | /* -- jit.profile module -------------------------------------------------- */ | ||
536 | |||
537 | #if LJ_HASPROFILE | ||
538 | |||
539 | #define LJLIB_MODULE_jit_profile | ||
540 | |||
541 | /* Not loaded by default, use: local profile = require("jit.profile") */ | ||
542 | |||
543 | static const char KEY_PROFILE_THREAD = 't'; | ||
544 | static const char KEY_PROFILE_FUNC = 'f'; | ||
545 | |||
546 | static void jit_profile_callback(lua_State *L2, lua_State *L, int samples, | ||
547 | int vmstate) | ||
548 | { | ||
549 | TValue key; | ||
550 | cTValue *tv; | ||
551 | setlightudV(&key, (void *)&KEY_PROFILE_FUNC); | ||
552 | tv = lj_tab_get(L, tabV(registry(L)), &key); | ||
553 | if (tvisfunc(tv)) { | ||
554 | char vmst = (char)vmstate; | ||
555 | int status; | ||
556 | setfuncV(L2, L2->top++, funcV(tv)); | ||
557 | setthreadV(L2, L2->top++, L); | ||
558 | setintV(L2->top++, samples); | ||
559 | setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1)); | ||
560 | status = lua_pcall(L2, 3, 0, 0); /* callback(thread, samples, vmstate) */ | ||
561 | if (status) { | ||
562 | if (G(L2)->panic) G(L2)->panic(L2); | ||
563 | exit(EXIT_FAILURE); | ||
564 | } | ||
565 | lj_trace_abort(G(L2)); | ||
566 | } | ||
567 | } | ||
568 | |||
569 | /* profile.start(mode, cb) */ | ||
570 | LJLIB_CF(jit_profile_start) | ||
571 | { | ||
572 | GCtab *registry = tabV(registry(L)); | ||
573 | GCstr *mode = lj_lib_optstr(L, 1); | ||
574 | GCfunc *func = lj_lib_checkfunc(L, 2); | ||
575 | lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */ | ||
576 | TValue key; | ||
577 | /* Anchor thread and function in registry. */ | ||
578 | setlightudV(&key, (void *)&KEY_PROFILE_THREAD); | ||
579 | setthreadV(L, lj_tab_set(L, registry, &key), L2); | ||
580 | setlightudV(&key, (void *)&KEY_PROFILE_FUNC); | ||
581 | setfuncV(L, lj_tab_set(L, registry, &key), func); | ||
582 | lj_gc_anybarriert(L, registry); | ||
583 | luaJIT_profile_start(L, mode ? strdata(mode) : "", | ||
584 | (luaJIT_profile_callback)jit_profile_callback, L2); | ||
585 | return 0; | ||
586 | } | ||
587 | |||
588 | /* profile.stop() */ | ||
589 | LJLIB_CF(jit_profile_stop) | ||
590 | { | ||
591 | GCtab *registry; | ||
592 | TValue key; | ||
593 | luaJIT_profile_stop(L); | ||
594 | registry = tabV(registry(L)); | ||
595 | setlightudV(&key, (void *)&KEY_PROFILE_THREAD); | ||
596 | setnilV(lj_tab_set(L, registry, &key)); | ||
597 | setlightudV(&key, (void *)&KEY_PROFILE_FUNC); | ||
598 | setnilV(lj_tab_set(L, registry, &key)); | ||
599 | lj_gc_anybarriert(L, registry); | ||
600 | return 0; | ||
601 | } | ||
602 | |||
603 | /* dump = profile.dumpstack([thread,] fmt, depth) */ | ||
604 | LJLIB_CF(jit_profile_dumpstack) | ||
605 | { | ||
606 | lua_State *L2 = L; | ||
607 | int arg = 0; | ||
608 | size_t len; | ||
609 | int depth; | ||
610 | GCstr *fmt; | ||
611 | const char *p; | ||
612 | if (L->top > L->base && tvisthread(L->base)) { | ||
613 | L2 = threadV(L->base); | ||
614 | arg = 1; | ||
615 | } | ||
616 | fmt = lj_lib_checkstr(L, arg+1); | ||
617 | depth = lj_lib_checkint(L, arg+2); | ||
618 | p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len); | ||
619 | lua_pushlstring(L, p, len); | ||
620 | return 1; | ||
621 | } | ||
622 | |||
623 | #include "lj_libdef.h" | ||
624 | |||
625 | static int luaopen_jit_profile(lua_State *L) | ||
626 | { | ||
627 | LJ_LIB_REG(L, NULL, jit_profile); | ||
628 | return 1; | ||
629 | } | ||
630 | |||
631 | #endif | ||
632 | |||
517 | /* -- JIT compiler initialization ----------------------------------------- */ | 633 | /* -- JIT compiler initialization ----------------------------------------- */ |
518 | 634 | ||
519 | #if LJ_HASJIT | 635 | #if LJ_HASJIT |
@@ -539,38 +655,31 @@ static uint32_t jit_cpudetect(lua_State *L) | |||
539 | uint32_t features[4]; | 655 | uint32_t features[4]; |
540 | if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { | 656 | if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { |
541 | #if !LJ_HASJIT | 657 | #if !LJ_HASJIT |
542 | #define JIT_F_CMOV 1 | ||
543 | #define JIT_F_SSE2 2 | 658 | #define JIT_F_SSE2 2 |
544 | #endif | 659 | #endif |
545 | flags |= ((features[3] >> 15)&1) * JIT_F_CMOV; | ||
546 | flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; | 660 | flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; |
547 | #if LJ_HASJIT | 661 | #if LJ_HASJIT |
548 | flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; | 662 | flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; |
549 | flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; | 663 | flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; |
550 | if (vendor[2] == 0x6c65746e) { /* Intel. */ | 664 | if (vendor[2] == 0x6c65746e) { /* Intel. */ |
551 | if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ | 665 | if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ |
552 | flags |= JIT_F_P4; /* Currently unused. */ | ||
553 | else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ | ||
554 | flags |= JIT_F_LEA_AGU; | 666 | flags |= JIT_F_LEA_AGU; |
555 | } else if (vendor[2] == 0x444d4163) { /* AMD. */ | 667 | } else if (vendor[2] == 0x444d4163) { /* AMD. */ |
556 | uint32_t fam = (features[0] & 0x0ff00f00); | 668 | uint32_t fam = (features[0] & 0x0ff00f00); |
557 | if (fam == 0x00000f00) /* K8. */ | ||
558 | flags |= JIT_F_SPLIT_XMM; | ||
559 | if (fam >= 0x00000f00) /* K8, K10. */ | 669 | if (fam >= 0x00000f00) /* K8, K10. */ |
560 | flags |= JIT_F_PREFER_IMUL; | 670 | flags |= JIT_F_PREFER_IMUL; |
561 | } | 671 | } |
672 | if (vendor[0] >= 7) { | ||
673 | uint32_t xfeatures[4]; | ||
674 | lj_vm_cpuid(7, xfeatures); | ||
675 | flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; | ||
676 | } | ||
562 | #endif | 677 | #endif |
563 | } | 678 | } |
564 | /* Check for required instruction set support on x86 (unnecessary on x64). */ | 679 | /* Check for required instruction set support on x86 (unnecessary on x64). */ |
565 | #if LJ_TARGET_X86 | 680 | #if LJ_TARGET_X86 |
566 | #if !defined(LUAJIT_CPU_NOCMOV) | ||
567 | if (!(flags & JIT_F_CMOV)) | ||
568 | luaL_error(L, "CPU not supported"); | ||
569 | #endif | ||
570 | #if defined(LUAJIT_CPU_SSE2) | ||
571 | if (!(flags & JIT_F_SSE2)) | 681 | if (!(flags & JIT_F_SSE2)) |
572 | luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)"); | 682 | luaL_error(L, "CPU with SSE2 required"); |
573 | #endif | ||
574 | #endif | 683 | #endif |
575 | #elif LJ_TARGET_ARM | 684 | #elif LJ_TARGET_ARM |
576 | #if LJ_HASJIT | 685 | #if LJ_HASJIT |
@@ -592,6 +701,8 @@ static uint32_t jit_cpudetect(lua_State *L) | |||
592 | ver >= 60 ? JIT_F_ARMV6_ : 0; | 701 | ver >= 60 ? JIT_F_ARMV6_ : 0; |
593 | flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; | 702 | flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; |
594 | #endif | 703 | #endif |
704 | #elif LJ_TARGET_ARM64 | ||
705 | /* No optional CPU features to detect (for now). */ | ||
595 | #elif LJ_TARGET_PPC | 706 | #elif LJ_TARGET_PPC |
596 | #if LJ_HASJIT | 707 | #if LJ_HASJIT |
597 | #if LJ_ARCH_SQRT | 708 | #if LJ_ARCH_SQRT |
@@ -601,21 +712,23 @@ static uint32_t jit_cpudetect(lua_State *L) | |||
601 | flags |= JIT_F_ROUND; | 712 | flags |= JIT_F_ROUND; |
602 | #endif | 713 | #endif |
603 | #endif | 714 | #endif |
604 | #elif LJ_TARGET_PPCSPE | ||
605 | /* Nothing to do. */ | ||
606 | #elif LJ_TARGET_MIPS | 715 | #elif LJ_TARGET_MIPS |
607 | #if LJ_HASJIT | 716 | #if LJ_HASJIT |
608 | /* Compile-time MIPS CPU detection. */ | 717 | /* Compile-time MIPS CPU detection. */ |
609 | #if LJ_ARCH_VERSION >= 20 | 718 | #if LJ_ARCH_VERSION >= 20 |
610 | flags |= JIT_F_MIPS32R2; | 719 | flags |= JIT_F_MIPSXXR2; |
611 | #endif | 720 | #endif |
612 | /* Runtime MIPS CPU detection. */ | 721 | /* Runtime MIPS CPU detection. */ |
613 | #if defined(__GNUC__) | 722 | #if defined(__GNUC__) |
614 | if (!(flags & JIT_F_MIPS32R2)) { | 723 | if (!(flags & JIT_F_MIPSXXR2)) { |
615 | int x; | 724 | int x; |
725 | #ifdef __mips16 | ||
726 | x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */ | ||
727 | #else | ||
616 | /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */ | 728 | /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */ |
617 | __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2"); | 729 | __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2"); |
618 | if (x) flags |= JIT_F_MIPS32R2; /* Either 0x80000000 (R2) or 0 (R1). */ | 730 | #endif |
731 | if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ | ||
619 | } | 732 | } |
620 | #endif | 733 | #endif |
621 | #endif | 734 | #endif |
@@ -632,11 +745,7 @@ static void jit_init(lua_State *L) | |||
632 | uint32_t flags = jit_cpudetect(L); | 745 | uint32_t flags = jit_cpudetect(L); |
633 | #if LJ_HASJIT | 746 | #if LJ_HASJIT |
634 | jit_State *J = L2J(L); | 747 | jit_State *J = L2J(L); |
635 | #if LJ_TARGET_X86 | 748 | J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; |
636 | /* Silently turn off the JIT compiler on CPUs without SSE2. */ | ||
637 | if ((flags & JIT_F_SSE2)) | ||
638 | #endif | ||
639 | J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; | ||
640 | memcpy(J->param, jit_param_default, sizeof(J->param)); | 749 | memcpy(J->param, jit_param_default, sizeof(J->param)); |
641 | lj_dispatch_update(G(L)); | 750 | lj_dispatch_update(G(L)); |
642 | #else | 751 | #else |
@@ -646,19 +755,23 @@ static void jit_init(lua_State *L) | |||
646 | 755 | ||
647 | LUALIB_API int luaopen_jit(lua_State *L) | 756 | LUALIB_API int luaopen_jit(lua_State *L) |
648 | { | 757 | { |
758 | jit_init(L); | ||
649 | lua_pushliteral(L, LJ_OS_NAME); | 759 | lua_pushliteral(L, LJ_OS_NAME); |
650 | lua_pushliteral(L, LJ_ARCH_NAME); | 760 | lua_pushliteral(L, LJ_ARCH_NAME); |
651 | lua_pushinteger(L, LUAJIT_VERSION_NUM); | 761 | lua_pushinteger(L, LUAJIT_VERSION_NUM); |
652 | lua_pushliteral(L, LUAJIT_VERSION); | 762 | lua_pushliteral(L, LUAJIT_VERSION); |
653 | LJ_LIB_REG(L, LUA_JITLIBNAME, jit); | 763 | LJ_LIB_REG(L, LUA_JITLIBNAME, jit); |
764 | #if LJ_HASPROFILE | ||
765 | lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile, | ||
766 | tabref(L->env)); | ||
767 | #endif | ||
654 | #ifndef LUAJIT_DISABLE_JITUTIL | 768 | #ifndef LUAJIT_DISABLE_JITUTIL |
655 | LJ_LIB_REG(L, "jit.util", jit_util); | 769 | lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env)); |
656 | #endif | 770 | #endif |
657 | #if LJ_HASJIT | 771 | #if LJ_HASJIT |
658 | LJ_LIB_REG(L, "jit.opt", jit_opt); | 772 | LJ_LIB_REG(L, "jit.opt", jit_opt); |
659 | #endif | 773 | #endif |
660 | L->top -= 2; | 774 | L->top -= 2; |
661 | jit_init(L); | ||
662 | return 1; | 775 | return 1; |
663 | } | 776 | } |
664 | 777 | ||