diff options
author | Mike Pall <mike> | 2020-05-20 20:42:04 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2020-05-20 20:42:04 +0200 |
commit | 0eddcbead2d67c16dcd4039a6765b9d2fc8ea631 (patch) | |
tree | 614b63b87bb6ba476b616b10b95e278d4af0c452 | |
parent | 5bf0da3d7c02f9959fa3a9fb721e0565137b70c8 (diff) | |
download | luajit-0eddcbead2d67c16dcd4039a6765b9d2fc8ea631.tar.gz luajit-0eddcbead2d67c16dcd4039a6765b9d2fc8ea631.tar.bz2 luajit-0eddcbead2d67c16dcd4039a6765b9d2fc8ea631.zip |
Cleanup CPU detection and tuning for old CPUs.
Diffstat (limited to '')
-rw-r--r-- | src/Makefile | 1 | ||||
-rw-r--r-- | src/lib_jit.c | 65 | ||||
-rw-r--r-- | src/lj_arch.h | 6 | ||||
-rw-r--r-- | src/lj_asm_x86.h | 33 | ||||
-rw-r--r-- | src/lj_dispatch.c | 7 | ||||
-rw-r--r-- | src/lj_emit_x86.h | 5 | ||||
-rw-r--r-- | src/lj_errmsg.h | 4 | ||||
-rw-r--r-- | src/lj_jit.h | 94 | ||||
-rw-r--r-- | src/ljamalg.c | 10 |
9 files changed, 87 insertions, 138 deletions
diff --git a/src/Makefile b/src/Makefile index 07a94251..82a57032 100644 --- a/src/Makefile +++ b/src/Makefile | |||
@@ -603,7 +603,6 @@ E= @echo | |||
603 | default all: $(TARGET_T) | 603 | default all: $(TARGET_T) |
604 | 604 | ||
605 | amalg: | 605 | amalg: |
606 | @grep "^[+|]" ljamalg.c | ||
607 | $(MAKE) all "LJCORE_O=ljamalg.o" | 606 | $(MAKE) all "LJCORE_O=ljamalg.o" |
608 | 607 | ||
609 | clean: | 608 | clean: |
diff --git a/src/lib_jit.c b/src/lib_jit.c index c97b0d53..acd6c293 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c | |||
@@ -104,8 +104,8 @@ LJLIB_CF(jit_status) | |||
104 | jit_State *J = L2J(L); | 104 | jit_State *J = L2J(L); |
105 | L->top = L->base; | 105 | L->top = L->base; |
106 | setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); | 106 | setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); |
107 | flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); | 107 | flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING); |
108 | flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); | 108 | flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING); |
109 | return (int)(L->top - L->base); | 109 | return (int)(L->top - L->base); |
110 | #else | 110 | #else |
111 | setboolV(L->top++, 0); | 111 | setboolV(L->top++, 0); |
@@ -471,7 +471,7 @@ static int jitopt_flag(jit_State *J, const char *str) | |||
471 | str += str[2] == '-' ? 3 : 2; | 471 | str += str[2] == '-' ? 3 : 2; |
472 | set = 0; | 472 | set = 0; |
473 | } | 473 | } |
474 | for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) { | 474 | for (opt = JIT_F_OPT; ; opt <<= 1) { |
475 | size_t len = *(const uint8_t *)lst; | 475 | size_t len = *(const uint8_t *)lst; |
476 | if (len == 0) | 476 | if (len == 0) |
477 | break; | 477 | break; |
@@ -640,59 +640,41 @@ JIT_PARAMDEF(JIT_PARAMINIT) | |||
640 | #undef JIT_PARAMINIT | 640 | #undef JIT_PARAMINIT |
641 | 0 | 641 | 0 |
642 | }; | 642 | }; |
643 | #endif | ||
644 | 643 | ||
645 | #if LJ_TARGET_ARM && LJ_TARGET_LINUX | 644 | #if LJ_TARGET_ARM && LJ_TARGET_LINUX |
646 | #include <sys/utsname.h> | 645 | #include <sys/utsname.h> |
647 | #endif | 646 | #endif |
648 | 647 | ||
649 | /* Arch-dependent CPU detection. */ | 648 | /* Arch-dependent CPU feature detection. */ |
650 | static uint32_t jit_cpudetect(lua_State *L) | 649 | static uint32_t jit_cpudetect(void) |
651 | { | 650 | { |
652 | uint32_t flags = 0; | 651 | uint32_t flags = 0; |
653 | #if LJ_TARGET_X86ORX64 | 652 | #if LJ_TARGET_X86ORX64 |
653 | |||
654 | uint32_t vendor[4]; | 654 | uint32_t vendor[4]; |
655 | uint32_t features[4]; | 655 | uint32_t features[4]; |
656 | if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { | 656 | if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { |
657 | #if !LJ_HASJIT | ||
658 | #define JIT_F_SSE2 2 | ||
659 | #endif | ||
660 | flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; | ||
661 | #if LJ_HASJIT | ||
662 | flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; | 657 | flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; |
663 | flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; | 658 | flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; |
664 | if (vendor[2] == 0x6c65746e) { /* Intel. */ | ||
665 | if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ | ||
666 | flags |= JIT_F_LEA_AGU; | ||
667 | } else if (vendor[2] == 0x444d4163) { /* AMD. */ | ||
668 | uint32_t fam = (features[0] & 0x0ff00f00); | ||
669 | if (fam >= 0x00000f00) /* K8, K10. */ | ||
670 | flags |= JIT_F_PREFER_IMUL; | ||
671 | } | ||
672 | if (vendor[0] >= 7) { | 659 | if (vendor[0] >= 7) { |
673 | uint32_t xfeatures[4]; | 660 | uint32_t xfeatures[4]; |
674 | lj_vm_cpuid(7, xfeatures); | 661 | lj_vm_cpuid(7, xfeatures); |
675 | flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; | 662 | flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; |
676 | } | 663 | } |
677 | #endif | ||
678 | } | 664 | } |
679 | /* Check for required instruction set support on x86 (unnecessary on x64). */ | 665 | /* Don't bother checking for SSE2 -- the VM will crash before getting here. */ |
680 | #if LJ_TARGET_X86 | 666 | |
681 | if (!(flags & JIT_F_SSE2)) | ||
682 | luaL_error(L, "CPU with SSE2 required"); | ||
683 | #endif | ||
684 | #elif LJ_TARGET_ARM | 667 | #elif LJ_TARGET_ARM |
685 | #if LJ_HASJIT | 668 | |
686 | int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ | 669 | int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ |
687 | #if LJ_TARGET_LINUX | 670 | #if LJ_TARGET_LINUX |
688 | if (ver < 70) { /* Runtime ARM CPU detection. */ | 671 | if (ver < 70) { /* Runtime ARM CPU detection. */ |
689 | struct utsname ut; | 672 | struct utsname ut; |
690 | uname(&ut); | 673 | uname(&ut); |
691 | if (strncmp(ut.machine, "armv", 4) == 0) { | 674 | if (strncmp(ut.machine, "armv", 4) == 0) { |
692 | if (ut.machine[4] >= '7') | 675 | if (ut.machine[4] >= '8') ver = 80; |
693 | ver = 70; | 676 | else if (ut.machine[4] == '7') ver = 70; |
694 | else if (ut.machine[4] == '6') | 677 | else if (ut.machine[4] == '6') ver = 60; |
695 | ver = 60; | ||
696 | } | 678 | } |
697 | } | 679 | } |
698 | #endif | 680 | #endif |
@@ -700,20 +682,22 @@ static uint32_t jit_cpudetect(lua_State *L) | |||
700 | ver >= 61 ? JIT_F_ARMV6T2_ : | 682 | ver >= 61 ? JIT_F_ARMV6T2_ : |
701 | ver >= 60 ? JIT_F_ARMV6_ : 0; | 683 | ver >= 60 ? JIT_F_ARMV6_ : 0; |
702 | flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; | 684 | flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; |
703 | #endif | 685 | |
704 | #elif LJ_TARGET_ARM64 | 686 | #elif LJ_TARGET_ARM64 |
687 | |||
705 | /* No optional CPU features to detect (for now). */ | 688 | /* No optional CPU features to detect (for now). */ |
689 | |||
706 | #elif LJ_TARGET_PPC | 690 | #elif LJ_TARGET_PPC |
707 | #if LJ_HASJIT | 691 | |
708 | #if LJ_ARCH_SQRT | 692 | #if LJ_ARCH_SQRT |
709 | flags |= JIT_F_SQRT; | 693 | flags |= JIT_F_SQRT; |
710 | #endif | 694 | #endif |
711 | #if LJ_ARCH_ROUND | 695 | #if LJ_ARCH_ROUND |
712 | flags |= JIT_F_ROUND; | 696 | flags |= JIT_F_ROUND; |
713 | #endif | 697 | #endif |
714 | #endif | 698 | |
715 | #elif LJ_TARGET_MIPS | 699 | #elif LJ_TARGET_MIPS |
716 | #if LJ_HASJIT | 700 | |
717 | /* Compile-time MIPS CPU detection. */ | 701 | /* Compile-time MIPS CPU detection. */ |
718 | #if LJ_ARCH_VERSION >= 20 | 702 | #if LJ_ARCH_VERSION >= 20 |
719 | flags |= JIT_F_MIPSXXR2; | 703 | flags |= JIT_F_MIPSXXR2; |
@@ -731,31 +715,28 @@ static uint32_t jit_cpudetect(lua_State *L) | |||
731 | if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ | 715 | if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ |
732 | } | 716 | } |
733 | #endif | 717 | #endif |
734 | #endif | 718 | |
735 | #else | 719 | #else |
736 | #error "Missing CPU detection for this architecture" | 720 | #error "Missing CPU detection for this architecture" |
737 | #endif | 721 | #endif |
738 | UNUSED(L); | ||
739 | return flags; | 722 | return flags; |
740 | } | 723 | } |
741 | 724 | ||
742 | /* Initialize JIT compiler. */ | 725 | /* Initialize JIT compiler. */ |
743 | static void jit_init(lua_State *L) | 726 | static void jit_init(lua_State *L) |
744 | { | 727 | { |
745 | uint32_t flags = jit_cpudetect(L); | ||
746 | #if LJ_HASJIT | ||
747 | jit_State *J = L2J(L); | 728 | jit_State *J = L2J(L); |
748 | J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; | 729 | J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT; |
749 | memcpy(J->param, jit_param_default, sizeof(J->param)); | 730 | memcpy(J->param, jit_param_default, sizeof(J->param)); |
750 | lj_dispatch_update(G(L)); | 731 | lj_dispatch_update(G(L)); |
751 | #else | ||
752 | UNUSED(flags); | ||
753 | #endif | ||
754 | } | 732 | } |
733 | #endif | ||
755 | 734 | ||
756 | LUALIB_API int luaopen_jit(lua_State *L) | 735 | LUALIB_API int luaopen_jit(lua_State *L) |
757 | { | 736 | { |
737 | #if LJ_HASJIT | ||
758 | jit_init(L); | 738 | jit_init(L); |
739 | #endif | ||
759 | lua_pushliteral(L, LJ_OS_NAME); | 740 | lua_pushliteral(L, LJ_OS_NAME); |
760 | lua_pushliteral(L, LJ_ARCH_NAME); | 741 | lua_pushliteral(L, LJ_ARCH_NAME); |
761 | lua_pushinteger(L, LUAJIT_VERSION_NUM); | 742 | lua_pushinteger(L, LUAJIT_VERSION_NUM); |
diff --git a/src/lj_arch.h b/src/lj_arch.h index 027b39ce..70426838 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h | |||
@@ -208,13 +208,13 @@ | |||
208 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ | 208 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ |
209 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL | 209 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL |
210 | 210 | ||
211 | #if __ARM_ARCH_8__ || __ARM_ARCH_8A__ | 211 | #if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ |
212 | #define LJ_ARCH_VERSION 80 | 212 | #define LJ_ARCH_VERSION 80 |
213 | #elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ | 213 | #elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ |
214 | #define LJ_ARCH_VERSION 70 | 214 | #define LJ_ARCH_VERSION 70 |
215 | #elif __ARM_ARCH_6T2__ | 215 | #elif __ARM_ARCH_6T2__ |
216 | #define LJ_ARCH_VERSION 61 | 216 | #define LJ_ARCH_VERSION 61 |
217 | #elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ | 217 | #elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ |
218 | #define LJ_ARCH_VERSION 60 | 218 | #define LJ_ARCH_VERSION 60 |
219 | #else | 219 | #else |
220 | #define LJ_ARCH_VERSION 50 | 220 | #define LJ_ARCH_VERSION 50 |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index db3409b9..bf818f5a 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -1214,13 +1214,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
1214 | emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); | 1214 | emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); |
1215 | } else { | 1215 | } else { |
1216 | emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); | 1216 | emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); |
1217 | if ((as->flags & JIT_F_PREFER_IMUL)) { | 1217 | emit_shifti(as, XOg_SHL, dest, 3); |
1218 | emit_i8(as, sizeof(Node)); | 1218 | emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); |
1219 | emit_rr(as, XO_IMULi8, dest, dest); | ||
1220 | } else { | ||
1221 | emit_shifti(as, XOg_SHL, dest, 3); | ||
1222 | emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); | ||
1223 | } | ||
1224 | if (isk) { | 1219 | if (isk) { |
1225 | emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); | 1220 | emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); |
1226 | emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); | 1221 | emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); |
@@ -1279,7 +1274,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
1279 | lua_assert(ofs % sizeof(Node) == 0); | 1274 | lua_assert(ofs % sizeof(Node) == 0); |
1280 | if (ra_hasreg(dest)) { | 1275 | if (ra_hasreg(dest)) { |
1281 | if (ofs != 0) { | 1276 | if (ofs != 0) { |
1282 | if (dest == node && !(as->flags & JIT_F_LEA_AGU)) | 1277 | if (dest == node) |
1283 | emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); | 1278 | emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); |
1284 | else | 1279 | else |
1285 | emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); | 1280 | emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); |
@@ -2180,8 +2175,7 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
2180 | { | 2175 | { |
2181 | if (irt_isnum(ir->t)) | 2176 | if (irt_isnum(ir->t)) |
2182 | asm_fparith(as, ir, XO_ADDSD); | 2177 | asm_fparith(as, ir, XO_ADDSD); |
2183 | else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp || | 2178 | else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir)) |
2184 | irt_is64(ir->t) || !asm_lea(as, ir)) | ||
2185 | asm_intarith(as, ir, XOg_ADD); | 2179 | asm_intarith(as, ir, XOg_ADD); |
2186 | } | 2180 | } |
2187 | 2181 | ||
@@ -2903,7 +2897,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
2903 | MCode *target, *q; | 2897 | MCode *target, *q; |
2904 | int32_t spadj = as->T->spadjust; | 2898 | int32_t spadj = as->T->spadjust; |
2905 | if (spadj == 0) { | 2899 | if (spadj == 0) { |
2906 | p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); | 2900 | p -= LJ_64 ? 7 : 6; |
2907 | } else { | 2901 | } else { |
2908 | MCode *p1; | 2902 | MCode *p1; |
2909 | /* Patch stack adjustment. */ | 2903 | /* Patch stack adjustment. */ |
@@ -2915,20 +2909,11 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
2915 | p1 = p-9; | 2909 | p1 = p-9; |
2916 | *(int32_t *)p1 = spadj; | 2910 | *(int32_t *)p1 = spadj; |
2917 | } | 2911 | } |
2918 | if ((as->flags & JIT_F_LEA_AGU)) { | ||
2919 | #if LJ_64 | ||
2920 | p1[-4] = 0x48; | ||
2921 | #endif | ||
2922 | p1[-3] = (MCode)XI_LEA; | ||
2923 | p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); | ||
2924 | p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); | ||
2925 | } else { | ||
2926 | #if LJ_64 | 2912 | #if LJ_64 |
2927 | p1[-3] = 0x48; | 2913 | p1[-3] = 0x48; |
2928 | #endif | 2914 | #endif |
2929 | p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); | 2915 | p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); |
2930 | p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); | 2916 | p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); |
2931 | } | ||
2932 | } | 2917 | } |
2933 | /* Patch exit branch. */ | 2918 | /* Patch exit branch. */ |
2934 | target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; | 2919 | target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; |
@@ -2959,7 +2944,7 @@ static void asm_tail_prep(ASMState *as) | |||
2959 | as->invmcp = as->mcp = p; | 2944 | as->invmcp = as->mcp = p; |
2960 | } else { | 2945 | } else { |
2961 | /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ | 2946 | /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ |
2962 | as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0)); | 2947 | as->mcp = p - (LJ_64 ? 7 : 6); |
2963 | as->invmcp = NULL; | 2948 | as->invmcp = NULL; |
2964 | } | 2949 | } |
2965 | } | 2950 | } |
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 8553438c..39416d00 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c | |||
@@ -252,15 +252,8 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) | |||
252 | } else { | 252 | } else { |
253 | if (!(mode & LUAJIT_MODE_ON)) | 253 | if (!(mode & LUAJIT_MODE_ON)) |
254 | G2J(g)->flags &= ~(uint32_t)JIT_F_ON; | 254 | G2J(g)->flags &= ~(uint32_t)JIT_F_ON; |
255 | #if LJ_TARGET_X86ORX64 | ||
256 | else if ((G2J(g)->flags & JIT_F_SSE2)) | ||
257 | G2J(g)->flags |= (uint32_t)JIT_F_ON; | ||
258 | else | ||
259 | return 0; /* Don't turn on JIT compiler without SSE2 support. */ | ||
260 | #else | ||
261 | else | 255 | else |
262 | G2J(g)->flags |= (uint32_t)JIT_F_ON; | 256 | G2J(g)->flags |= (uint32_t)JIT_F_ON; |
263 | #endif | ||
264 | lj_dispatch_update(g); | 257 | lj_dispatch_update(g); |
265 | } | 258 | } |
266 | break; | 259 | break; |
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index bc4391a0..b17e28a5 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h | |||
@@ -559,10 +559,7 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) | |||
559 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | 559 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) |
560 | { | 560 | { |
561 | if (ofs) { | 561 | if (ofs) { |
562 | if ((as->flags & JIT_F_LEA_AGU)) | 562 | emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); |
563 | emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs); | ||
564 | else | ||
565 | emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); | ||
566 | } | 563 | } |
567 | } | 564 | } |
568 | 565 | ||
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index efb7c3f3..9110dc7e 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h | |||
@@ -101,11 +101,7 @@ ERRDEF(STRGSRV, "invalid replacement value (a %s)") | |||
101 | ERRDEF(BADMODN, "name conflict for module " LUA_QS) | 101 | ERRDEF(BADMODN, "name conflict for module " LUA_QS) |
102 | #if LJ_HASJIT | 102 | #if LJ_HASJIT |
103 | ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") | 103 | ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") |
104 | #if LJ_TARGET_X86ORX64 | ||
105 | ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2") | ||
106 | #else | ||
107 | ERRDEF(NOJIT, "JIT compiler disabled") | 104 | ERRDEF(NOJIT, "JIT compiler disabled") |
108 | #endif | ||
109 | #elif defined(LJ_ARCH_NOJIT) | 105 | #elif defined(LJ_ARCH_NOJIT) |
110 | ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") | 106 | ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") |
111 | #else | 107 | #else |
diff --git a/src/lj_jit.h b/src/lj_jit.h index f179f17f..a9c602f0 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
@@ -9,47 +9,49 @@ | |||
9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
10 | #include "lj_ir.h" | 10 | #include "lj_ir.h" |
11 | 11 | ||
12 | /* JIT engine flags. */ | 12 | /* -- JIT engine flags ---------------------------------------------------- */ |
13 | |||
14 | /* General JIT engine flags. 4 bits. */ | ||
13 | #define JIT_F_ON 0x00000001 | 15 | #define JIT_F_ON 0x00000001 |
14 | 16 | ||
15 | /* CPU-specific JIT engine flags. */ | 17 | /* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */ |
18 | #define JIT_F_CPU 0x00000010 | ||
19 | |||
16 | #if LJ_TARGET_X86ORX64 | 20 | #if LJ_TARGET_X86ORX64 |
17 | #define JIT_F_SSE2 0x00000010 | 21 | |
18 | #define JIT_F_SSE3 0x00000020 | 22 | #define JIT_F_SSE3 (JIT_F_CPU << 0) |
19 | #define JIT_F_SSE4_1 0x00000040 | 23 | #define JIT_F_SSE4_1 (JIT_F_CPU << 1) |
20 | #define JIT_F_PREFER_IMUL 0x00000080 | 24 | #define JIT_F_BMI2 (JIT_F_CPU << 2) |
21 | #define JIT_F_LEA_AGU 0x00000100 | 25 | |
22 | #define JIT_F_BMI2 0x00000200 | 26 | |
23 | 27 | #define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2" | |
24 | /* Names for the CPU-specific flags. Must match the order above. */ | 28 | |
25 | #define JIT_F_CPU_FIRST JIT_F_SSE2 | ||
26 | #define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2" | ||
27 | #elif LJ_TARGET_ARM | 29 | #elif LJ_TARGET_ARM |
28 | #define JIT_F_ARMV6_ 0x00000010 | 30 | |
29 | #define JIT_F_ARMV6T2_ 0x00000020 | 31 | #define JIT_F_ARMV6_ (JIT_F_CPU << 0) |
30 | #define JIT_F_ARMV7 0x00000040 | 32 | #define JIT_F_ARMV6T2_ (JIT_F_CPU << 1) |
31 | #define JIT_F_VFPV2 0x00000080 | 33 | #define JIT_F_ARMV7 (JIT_F_CPU << 2) |
32 | #define JIT_F_VFPV3 0x00000100 | 34 | #define JIT_F_ARMV8 (JIT_F_CPU << 3) |
33 | 35 | #define JIT_F_VFPV2 (JIT_F_CPU << 4) | |
34 | #define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7) | 36 | #define JIT_F_VFPV3 (JIT_F_CPU << 5) |
35 | #define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7) | 37 | |
38 | #define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) | ||
39 | #define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) | ||
36 | #define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) | 40 | #define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) |
37 | 41 | ||
38 | /* Names for the CPU-specific flags. Must match the order above. */ | 42 | #define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3" |
39 | #define JIT_F_CPU_FIRST JIT_F_ARMV6_ | 43 | |
40 | #define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3" | ||
41 | #elif LJ_TARGET_PPC | 44 | #elif LJ_TARGET_PPC |
42 | #define JIT_F_SQRT 0x00000010 | ||
43 | #define JIT_F_ROUND 0x00000020 | ||
44 | 45 | ||
45 | /* Names for the CPU-specific flags. Must match the order above. */ | 46 | #define JIT_F_SQRT (JIT_F_CPU << 0) |
46 | #define JIT_F_CPU_FIRST JIT_F_SQRT | 47 | #define JIT_F_ROUND (JIT_F_CPU << 1) |
48 | |||
47 | #define JIT_F_CPUSTRING "\4SQRT\5ROUND" | 49 | #define JIT_F_CPUSTRING "\4SQRT\5ROUND" |
50 | |||
48 | #elif LJ_TARGET_MIPS | 51 | #elif LJ_TARGET_MIPS |
49 | #define JIT_F_MIPSXXR2 0x00000010 | ||
50 | 52 | ||
51 | /* Names for the CPU-specific flags. Must match the order above. */ | 53 | #define JIT_F_MIPSXXR2 (JIT_F_CPU << 0) |
52 | #define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 | 54 | |
53 | #if LJ_TARGET_MIPS32 | 55 | #if LJ_TARGET_MIPS32 |
54 | #if LJ_TARGET_MIPSR6 | 56 | #if LJ_TARGET_MIPSR6 |
55 | #define JIT_F_CPUSTRING "\010MIPS32R6" | 57 | #define JIT_F_CPUSTRING "\010MIPS32R6" |
@@ -63,27 +65,29 @@ | |||
63 | #define JIT_F_CPUSTRING "\010MIPS64R2" | 65 | #define JIT_F_CPUSTRING "\010MIPS64R2" |
64 | #endif | 66 | #endif |
65 | #endif | 67 | #endif |
68 | |||
66 | #else | 69 | #else |
67 | #define JIT_F_CPU_FIRST 0 | 70 | |
68 | #define JIT_F_CPUSTRING "" | 71 | #define JIT_F_CPUSTRING "" |
72 | |||
69 | #endif | 73 | #endif |
70 | 74 | ||
71 | /* Optimization flags. */ | 75 | /* Optimization flags. 12 bits. */ |
76 | #define JIT_F_OPT 0x00010000 | ||
72 | #define JIT_F_OPT_MASK 0x0fff0000 | 77 | #define JIT_F_OPT_MASK 0x0fff0000 |
73 | 78 | ||
74 | #define JIT_F_OPT_FOLD 0x00010000 | 79 | #define JIT_F_OPT_FOLD (JIT_F_OPT << 0) |
75 | #define JIT_F_OPT_CSE 0x00020000 | 80 | #define JIT_F_OPT_CSE (JIT_F_OPT << 1) |
76 | #define JIT_F_OPT_DCE 0x00040000 | 81 | #define JIT_F_OPT_DCE (JIT_F_OPT << 2) |
77 | #define JIT_F_OPT_FWD 0x00080000 | 82 | #define JIT_F_OPT_FWD (JIT_F_OPT << 3) |
78 | #define JIT_F_OPT_DSE 0x00100000 | 83 | #define JIT_F_OPT_DSE (JIT_F_OPT << 4) |
79 | #define JIT_F_OPT_NARROW 0x00200000 | 84 | #define JIT_F_OPT_NARROW (JIT_F_OPT << 5) |
80 | #define JIT_F_OPT_LOOP 0x00400000 | 85 | #define JIT_F_OPT_LOOP (JIT_F_OPT << 6) |
81 | #define JIT_F_OPT_ABC 0x00800000 | 86 | #define JIT_F_OPT_ABC (JIT_F_OPT << 7) |
82 | #define JIT_F_OPT_SINK 0x01000000 | 87 | #define JIT_F_OPT_SINK (JIT_F_OPT << 8) |
83 | #define JIT_F_OPT_FUSE 0x02000000 | 88 | #define JIT_F_OPT_FUSE (JIT_F_OPT << 9) |
84 | 89 | ||
85 | /* Optimizations names for -O. Must match the order above. */ | 90 | /* Optimizations names for -O. Must match the order above. */ |
86 | #define JIT_F_OPT_FIRST JIT_F_OPT_FOLD | ||
87 | #define JIT_F_OPTSTRING \ | 91 | #define JIT_F_OPTSTRING \ |
88 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" | 92 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" |
89 | 93 | ||
@@ -95,6 +99,8 @@ | |||
95 | JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) | 99 | JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) |
96 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 | 100 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 |
97 | 101 | ||
102 | /* -- JIT engine parameters ----------------------------------------------- */ | ||
103 | |||
98 | #if LJ_TARGET_WINDOWS || LJ_64 | 104 | #if LJ_TARGET_WINDOWS || LJ_64 |
99 | /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ | 105 | /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ |
100 | #define JIT_P_sizemcode_DEFAULT 64 | 106 | #define JIT_P_sizemcode_DEFAULT 64 |
@@ -137,6 +143,8 @@ JIT_PARAMDEF(JIT_PARAMENUM) | |||
137 | #define JIT_PARAMSTR(len, name, value) #len #name | 143 | #define JIT_PARAMSTR(len, name, value) #len #name |
138 | #define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) | 144 | #define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) |
139 | 145 | ||
146 | /* -- JIT engine data structures ------------------------------------------ */ | ||
147 | |||
140 | /* Trace compiler state. */ | 148 | /* Trace compiler state. */ |
141 | typedef enum { | 149 | typedef enum { |
142 | LJ_TRACE_IDLE, /* Trace compiler idle. */ | 150 | LJ_TRACE_IDLE, /* Trace compiler idle. */ |
diff --git a/src/ljamalg.c b/src/ljamalg.c index 39542981..6712d435 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c | |||
@@ -3,16 +3,6 @@ | |||
3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | 3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h |
4 | */ | 4 | */ |
5 | 5 | ||
6 | /* | ||
7 | +--------------------------------------------------------------------------+ | ||
8 | | WARNING: Compiling the amalgamation needs a lot of virtual memory | | ||
9 | | (around 300 MB with GCC 4.x)! If you don't have enough physical memory | | ||
10 | | your machine will start swapping to disk and the compile will not finish | | ||
11 | | within a reasonable amount of time. | | ||
12 | | So either compile on a bigger machine or use the non-amalgamated build. | | ||
13 | +--------------------------------------------------------------------------+ | ||
14 | */ | ||
15 | |||
16 | #define ljamalg_c | 6 | #define ljamalg_c |
17 | #define LUA_CORE | 7 | #define LUA_CORE |
18 | 8 | ||