diff options
| author | Mike Pall <mike> | 2020-05-20 20:42:04 +0200 |
|---|---|---|
| committer | Mike Pall <mike> | 2020-05-20 20:42:04 +0200 |
| commit | 0eddcbead2d67c16dcd4039a6765b9d2fc8ea631 (patch) | |
| tree | 614b63b87bb6ba476b616b10b95e278d4af0c452 /src | |
| parent | 5bf0da3d7c02f9959fa3a9fb721e0565137b70c8 (diff) | |
| download | luajit-0eddcbead2d67c16dcd4039a6765b9d2fc8ea631.tar.gz luajit-0eddcbead2d67c16dcd4039a6765b9d2fc8ea631.tar.bz2 luajit-0eddcbead2d67c16dcd4039a6765b9d2fc8ea631.zip | |
Cleanup CPU detection and tuning for old CPUs.
Diffstat (limited to 'src')
| -rw-r--r-- | src/Makefile | 1 | ||||
| -rw-r--r-- | src/lib_jit.c | 65 | ||||
| -rw-r--r-- | src/lj_arch.h | 6 | ||||
| -rw-r--r-- | src/lj_asm_x86.h | 33 | ||||
| -rw-r--r-- | src/lj_dispatch.c | 7 | ||||
| -rw-r--r-- | src/lj_emit_x86.h | 5 | ||||
| -rw-r--r-- | src/lj_errmsg.h | 4 | ||||
| -rw-r--r-- | src/lj_jit.h | 94 | ||||
| -rw-r--r-- | src/ljamalg.c | 10 |
9 files changed, 87 insertions, 138 deletions
diff --git a/src/Makefile b/src/Makefile index 07a94251..82a57032 100644 --- a/src/Makefile +++ b/src/Makefile | |||
| @@ -603,7 +603,6 @@ E= @echo | |||
| 603 | default all: $(TARGET_T) | 603 | default all: $(TARGET_T) |
| 604 | 604 | ||
| 605 | amalg: | 605 | amalg: |
| 606 | @grep "^[+|]" ljamalg.c | ||
| 607 | $(MAKE) all "LJCORE_O=ljamalg.o" | 606 | $(MAKE) all "LJCORE_O=ljamalg.o" |
| 608 | 607 | ||
| 609 | clean: | 608 | clean: |
diff --git a/src/lib_jit.c b/src/lib_jit.c index c97b0d53..acd6c293 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c | |||
| @@ -104,8 +104,8 @@ LJLIB_CF(jit_status) | |||
| 104 | jit_State *J = L2J(L); | 104 | jit_State *J = L2J(L); |
| 105 | L->top = L->base; | 105 | L->top = L->base; |
| 106 | setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); | 106 | setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); |
| 107 | flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); | 107 | flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING); |
| 108 | flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); | 108 | flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING); |
| 109 | return (int)(L->top - L->base); | 109 | return (int)(L->top - L->base); |
| 110 | #else | 110 | #else |
| 111 | setboolV(L->top++, 0); | 111 | setboolV(L->top++, 0); |
| @@ -471,7 +471,7 @@ static int jitopt_flag(jit_State *J, const char *str) | |||
| 471 | str += str[2] == '-' ? 3 : 2; | 471 | str += str[2] == '-' ? 3 : 2; |
| 472 | set = 0; | 472 | set = 0; |
| 473 | } | 473 | } |
| 474 | for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) { | 474 | for (opt = JIT_F_OPT; ; opt <<= 1) { |
| 475 | size_t len = *(const uint8_t *)lst; | 475 | size_t len = *(const uint8_t *)lst; |
| 476 | if (len == 0) | 476 | if (len == 0) |
| 477 | break; | 477 | break; |
| @@ -640,59 +640,41 @@ JIT_PARAMDEF(JIT_PARAMINIT) | |||
| 640 | #undef JIT_PARAMINIT | 640 | #undef JIT_PARAMINIT |
| 641 | 0 | 641 | 0 |
| 642 | }; | 642 | }; |
| 643 | #endif | ||
| 644 | 643 | ||
| 645 | #if LJ_TARGET_ARM && LJ_TARGET_LINUX | 644 | #if LJ_TARGET_ARM && LJ_TARGET_LINUX |
| 646 | #include <sys/utsname.h> | 645 | #include <sys/utsname.h> |
| 647 | #endif | 646 | #endif |
| 648 | 647 | ||
| 649 | /* Arch-dependent CPU detection. */ | 648 | /* Arch-dependent CPU feature detection. */ |
| 650 | static uint32_t jit_cpudetect(lua_State *L) | 649 | static uint32_t jit_cpudetect(void) |
| 651 | { | 650 | { |
| 652 | uint32_t flags = 0; | 651 | uint32_t flags = 0; |
| 653 | #if LJ_TARGET_X86ORX64 | 652 | #if LJ_TARGET_X86ORX64 |
| 653 | |||
| 654 | uint32_t vendor[4]; | 654 | uint32_t vendor[4]; |
| 655 | uint32_t features[4]; | 655 | uint32_t features[4]; |
| 656 | if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { | 656 | if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { |
| 657 | #if !LJ_HASJIT | ||
| 658 | #define JIT_F_SSE2 2 | ||
| 659 | #endif | ||
| 660 | flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; | ||
| 661 | #if LJ_HASJIT | ||
| 662 | flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; | 657 | flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; |
| 663 | flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; | 658 | flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; |
| 664 | if (vendor[2] == 0x6c65746e) { /* Intel. */ | ||
| 665 | if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ | ||
| 666 | flags |= JIT_F_LEA_AGU; | ||
| 667 | } else if (vendor[2] == 0x444d4163) { /* AMD. */ | ||
| 668 | uint32_t fam = (features[0] & 0x0ff00f00); | ||
| 669 | if (fam >= 0x00000f00) /* K8, K10. */ | ||
| 670 | flags |= JIT_F_PREFER_IMUL; | ||
| 671 | } | ||
| 672 | if (vendor[0] >= 7) { | 659 | if (vendor[0] >= 7) { |
| 673 | uint32_t xfeatures[4]; | 660 | uint32_t xfeatures[4]; |
| 674 | lj_vm_cpuid(7, xfeatures); | 661 | lj_vm_cpuid(7, xfeatures); |
| 675 | flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; | 662 | flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; |
| 676 | } | 663 | } |
| 677 | #endif | ||
| 678 | } | 664 | } |
| 679 | /* Check for required instruction set support on x86 (unnecessary on x64). */ | 665 | /* Don't bother checking for SSE2 -- the VM will crash before getting here. */ |
| 680 | #if LJ_TARGET_X86 | 666 | |
| 681 | if (!(flags & JIT_F_SSE2)) | ||
| 682 | luaL_error(L, "CPU with SSE2 required"); | ||
| 683 | #endif | ||
| 684 | #elif LJ_TARGET_ARM | 667 | #elif LJ_TARGET_ARM |
| 685 | #if LJ_HASJIT | 668 | |
| 686 | int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ | 669 | int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ |
| 687 | #if LJ_TARGET_LINUX | 670 | #if LJ_TARGET_LINUX |
| 688 | if (ver < 70) { /* Runtime ARM CPU detection. */ | 671 | if (ver < 70) { /* Runtime ARM CPU detection. */ |
| 689 | struct utsname ut; | 672 | struct utsname ut; |
| 690 | uname(&ut); | 673 | uname(&ut); |
| 691 | if (strncmp(ut.machine, "armv", 4) == 0) { | 674 | if (strncmp(ut.machine, "armv", 4) == 0) { |
| 692 | if (ut.machine[4] >= '7') | 675 | if (ut.machine[4] >= '8') ver = 80; |
| 693 | ver = 70; | 676 | else if (ut.machine[4] == '7') ver = 70; |
| 694 | else if (ut.machine[4] == '6') | 677 | else if (ut.machine[4] == '6') ver = 60; |
| 695 | ver = 60; | ||
| 696 | } | 678 | } |
| 697 | } | 679 | } |
| 698 | #endif | 680 | #endif |
| @@ -700,20 +682,22 @@ static uint32_t jit_cpudetect(lua_State *L) | |||
| 700 | ver >= 61 ? JIT_F_ARMV6T2_ : | 682 | ver >= 61 ? JIT_F_ARMV6T2_ : |
| 701 | ver >= 60 ? JIT_F_ARMV6_ : 0; | 683 | ver >= 60 ? JIT_F_ARMV6_ : 0; |
| 702 | flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; | 684 | flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; |
| 703 | #endif | 685 | |
| 704 | #elif LJ_TARGET_ARM64 | 686 | #elif LJ_TARGET_ARM64 |
| 687 | |||
| 705 | /* No optional CPU features to detect (for now). */ | 688 | /* No optional CPU features to detect (for now). */ |
| 689 | |||
| 706 | #elif LJ_TARGET_PPC | 690 | #elif LJ_TARGET_PPC |
| 707 | #if LJ_HASJIT | 691 | |
| 708 | #if LJ_ARCH_SQRT | 692 | #if LJ_ARCH_SQRT |
| 709 | flags |= JIT_F_SQRT; | 693 | flags |= JIT_F_SQRT; |
| 710 | #endif | 694 | #endif |
| 711 | #if LJ_ARCH_ROUND | 695 | #if LJ_ARCH_ROUND |
| 712 | flags |= JIT_F_ROUND; | 696 | flags |= JIT_F_ROUND; |
| 713 | #endif | 697 | #endif |
| 714 | #endif | 698 | |
| 715 | #elif LJ_TARGET_MIPS | 699 | #elif LJ_TARGET_MIPS |
| 716 | #if LJ_HASJIT | 700 | |
| 717 | /* Compile-time MIPS CPU detection. */ | 701 | /* Compile-time MIPS CPU detection. */ |
| 718 | #if LJ_ARCH_VERSION >= 20 | 702 | #if LJ_ARCH_VERSION >= 20 |
| 719 | flags |= JIT_F_MIPSXXR2; | 703 | flags |= JIT_F_MIPSXXR2; |
| @@ -731,31 +715,28 @@ static uint32_t jit_cpudetect(lua_State *L) | |||
| 731 | if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ | 715 | if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ |
| 732 | } | 716 | } |
| 733 | #endif | 717 | #endif |
| 734 | #endif | 718 | |
| 735 | #else | 719 | #else |
| 736 | #error "Missing CPU detection for this architecture" | 720 | #error "Missing CPU detection for this architecture" |
| 737 | #endif | 721 | #endif |
| 738 | UNUSED(L); | ||
| 739 | return flags; | 722 | return flags; |
| 740 | } | 723 | } |
| 741 | 724 | ||
| 742 | /* Initialize JIT compiler. */ | 725 | /* Initialize JIT compiler. */ |
| 743 | static void jit_init(lua_State *L) | 726 | static void jit_init(lua_State *L) |
| 744 | { | 727 | { |
| 745 | uint32_t flags = jit_cpudetect(L); | ||
| 746 | #if LJ_HASJIT | ||
| 747 | jit_State *J = L2J(L); | 728 | jit_State *J = L2J(L); |
| 748 | J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; | 729 | J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT; |
| 749 | memcpy(J->param, jit_param_default, sizeof(J->param)); | 730 | memcpy(J->param, jit_param_default, sizeof(J->param)); |
| 750 | lj_dispatch_update(G(L)); | 731 | lj_dispatch_update(G(L)); |
| 751 | #else | ||
| 752 | UNUSED(flags); | ||
| 753 | #endif | ||
| 754 | } | 732 | } |
| 733 | #endif | ||
| 755 | 734 | ||
| 756 | LUALIB_API int luaopen_jit(lua_State *L) | 735 | LUALIB_API int luaopen_jit(lua_State *L) |
| 757 | { | 736 | { |
| 737 | #if LJ_HASJIT | ||
| 758 | jit_init(L); | 738 | jit_init(L); |
| 739 | #endif | ||
| 759 | lua_pushliteral(L, LJ_OS_NAME); | 740 | lua_pushliteral(L, LJ_OS_NAME); |
| 760 | lua_pushliteral(L, LJ_ARCH_NAME); | 741 | lua_pushliteral(L, LJ_ARCH_NAME); |
| 761 | lua_pushinteger(L, LUAJIT_VERSION_NUM); | 742 | lua_pushinteger(L, LUAJIT_VERSION_NUM); |
diff --git a/src/lj_arch.h b/src/lj_arch.h index 027b39ce..70426838 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h | |||
| @@ -208,13 +208,13 @@ | |||
| 208 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ | 208 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ |
| 209 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL | 209 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL |
| 210 | 210 | ||
| 211 | #if __ARM_ARCH_8__ || __ARM_ARCH_8A__ | 211 | #if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ |
| 212 | #define LJ_ARCH_VERSION 80 | 212 | #define LJ_ARCH_VERSION 80 |
| 213 | #elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ | 213 | #elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ |
| 214 | #define LJ_ARCH_VERSION 70 | 214 | #define LJ_ARCH_VERSION 70 |
| 215 | #elif __ARM_ARCH_6T2__ | 215 | #elif __ARM_ARCH_6T2__ |
| 216 | #define LJ_ARCH_VERSION 61 | 216 | #define LJ_ARCH_VERSION 61 |
| 217 | #elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ | 217 | #elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ |
| 218 | #define LJ_ARCH_VERSION 60 | 218 | #define LJ_ARCH_VERSION 60 |
| 219 | #else | 219 | #else |
| 220 | #define LJ_ARCH_VERSION 50 | 220 | #define LJ_ARCH_VERSION 50 |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index db3409b9..bf818f5a 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
| @@ -1214,13 +1214,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
| 1214 | emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); | 1214 | emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); |
| 1215 | } else { | 1215 | } else { |
| 1216 | emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); | 1216 | emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); |
| 1217 | if ((as->flags & JIT_F_PREFER_IMUL)) { | 1217 | emit_shifti(as, XOg_SHL, dest, 3); |
| 1218 | emit_i8(as, sizeof(Node)); | 1218 | emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); |
| 1219 | emit_rr(as, XO_IMULi8, dest, dest); | ||
| 1220 | } else { | ||
| 1221 | emit_shifti(as, XOg_SHL, dest, 3); | ||
| 1222 | emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); | ||
| 1223 | } | ||
| 1224 | if (isk) { | 1219 | if (isk) { |
| 1225 | emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); | 1220 | emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); |
| 1226 | emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); | 1221 | emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); |
| @@ -1279,7 +1274,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
| 1279 | lua_assert(ofs % sizeof(Node) == 0); | 1274 | lua_assert(ofs % sizeof(Node) == 0); |
| 1280 | if (ra_hasreg(dest)) { | 1275 | if (ra_hasreg(dest)) { |
| 1281 | if (ofs != 0) { | 1276 | if (ofs != 0) { |
| 1282 | if (dest == node && !(as->flags & JIT_F_LEA_AGU)) | 1277 | if (dest == node) |
| 1283 | emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); | 1278 | emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); |
| 1284 | else | 1279 | else |
| 1285 | emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); | 1280 | emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); |
| @@ -2180,8 +2175,7 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
| 2180 | { | 2175 | { |
| 2181 | if (irt_isnum(ir->t)) | 2176 | if (irt_isnum(ir->t)) |
| 2182 | asm_fparith(as, ir, XO_ADDSD); | 2177 | asm_fparith(as, ir, XO_ADDSD); |
| 2183 | else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp || | 2178 | else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir)) |
| 2184 | irt_is64(ir->t) || !asm_lea(as, ir)) | ||
| 2185 | asm_intarith(as, ir, XOg_ADD); | 2179 | asm_intarith(as, ir, XOg_ADD); |
| 2186 | } | 2180 | } |
| 2187 | 2181 | ||
| @@ -2903,7 +2897,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
| 2903 | MCode *target, *q; | 2897 | MCode *target, *q; |
| 2904 | int32_t spadj = as->T->spadjust; | 2898 | int32_t spadj = as->T->spadjust; |
| 2905 | if (spadj == 0) { | 2899 | if (spadj == 0) { |
| 2906 | p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); | 2900 | p -= LJ_64 ? 7 : 6; |
| 2907 | } else { | 2901 | } else { |
| 2908 | MCode *p1; | 2902 | MCode *p1; |
| 2909 | /* Patch stack adjustment. */ | 2903 | /* Patch stack adjustment. */ |
| @@ -2915,20 +2909,11 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
| 2915 | p1 = p-9; | 2909 | p1 = p-9; |
| 2916 | *(int32_t *)p1 = spadj; | 2910 | *(int32_t *)p1 = spadj; |
| 2917 | } | 2911 | } |
| 2918 | if ((as->flags & JIT_F_LEA_AGU)) { | ||
| 2919 | #if LJ_64 | ||
| 2920 | p1[-4] = 0x48; | ||
| 2921 | #endif | ||
| 2922 | p1[-3] = (MCode)XI_LEA; | ||
| 2923 | p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); | ||
| 2924 | p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); | ||
| 2925 | } else { | ||
| 2926 | #if LJ_64 | 2912 | #if LJ_64 |
| 2927 | p1[-3] = 0x48; | 2913 | p1[-3] = 0x48; |
| 2928 | #endif | 2914 | #endif |
| 2929 | p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); | 2915 | p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); |
| 2930 | p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); | 2916 | p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); |
| 2931 | } | ||
| 2932 | } | 2917 | } |
| 2933 | /* Patch exit branch. */ | 2918 | /* Patch exit branch. */ |
| 2934 | target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; | 2919 | target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; |
| @@ -2959,7 +2944,7 @@ static void asm_tail_prep(ASMState *as) | |||
| 2959 | as->invmcp = as->mcp = p; | 2944 | as->invmcp = as->mcp = p; |
| 2960 | } else { | 2945 | } else { |
| 2961 | /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ | 2946 | /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ |
| 2962 | as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0)); | 2947 | as->mcp = p - (LJ_64 ? 7 : 6); |
| 2963 | as->invmcp = NULL; | 2948 | as->invmcp = NULL; |
| 2964 | } | 2949 | } |
| 2965 | } | 2950 | } |
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 8553438c..39416d00 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c | |||
| @@ -252,15 +252,8 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) | |||
| 252 | } else { | 252 | } else { |
| 253 | if (!(mode & LUAJIT_MODE_ON)) | 253 | if (!(mode & LUAJIT_MODE_ON)) |
| 254 | G2J(g)->flags &= ~(uint32_t)JIT_F_ON; | 254 | G2J(g)->flags &= ~(uint32_t)JIT_F_ON; |
| 255 | #if LJ_TARGET_X86ORX64 | ||
| 256 | else if ((G2J(g)->flags & JIT_F_SSE2)) | ||
| 257 | G2J(g)->flags |= (uint32_t)JIT_F_ON; | ||
| 258 | else | ||
| 259 | return 0; /* Don't turn on JIT compiler without SSE2 support. */ | ||
| 260 | #else | ||
| 261 | else | 255 | else |
| 262 | G2J(g)->flags |= (uint32_t)JIT_F_ON; | 256 | G2J(g)->flags |= (uint32_t)JIT_F_ON; |
| 263 | #endif | ||
| 264 | lj_dispatch_update(g); | 257 | lj_dispatch_update(g); |
| 265 | } | 258 | } |
| 266 | break; | 259 | break; |
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index bc4391a0..b17e28a5 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h | |||
| @@ -559,10 +559,7 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) | |||
| 559 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | 559 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) |
| 560 | { | 560 | { |
| 561 | if (ofs) { | 561 | if (ofs) { |
| 562 | if ((as->flags & JIT_F_LEA_AGU)) | 562 | emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); |
| 563 | emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs); | ||
| 564 | else | ||
| 565 | emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); | ||
| 566 | } | 563 | } |
| 567 | } | 564 | } |
| 568 | 565 | ||
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index efb7c3f3..9110dc7e 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h | |||
| @@ -101,11 +101,7 @@ ERRDEF(STRGSRV, "invalid replacement value (a %s)") | |||
| 101 | ERRDEF(BADMODN, "name conflict for module " LUA_QS) | 101 | ERRDEF(BADMODN, "name conflict for module " LUA_QS) |
| 102 | #if LJ_HASJIT | 102 | #if LJ_HASJIT |
| 103 | ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") | 103 | ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") |
| 104 | #if LJ_TARGET_X86ORX64 | ||
| 105 | ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2") | ||
| 106 | #else | ||
| 107 | ERRDEF(NOJIT, "JIT compiler disabled") | 104 | ERRDEF(NOJIT, "JIT compiler disabled") |
| 108 | #endif | ||
| 109 | #elif defined(LJ_ARCH_NOJIT) | 105 | #elif defined(LJ_ARCH_NOJIT) |
| 110 | ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") | 106 | ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") |
| 111 | #else | 107 | #else |
diff --git a/src/lj_jit.h b/src/lj_jit.h index f179f17f..a9c602f0 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
| @@ -9,47 +9,49 @@ | |||
| 9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
| 10 | #include "lj_ir.h" | 10 | #include "lj_ir.h" |
| 11 | 11 | ||
| 12 | /* JIT engine flags. */ | 12 | /* -- JIT engine flags ---------------------------------------------------- */ |
| 13 | |||
| 14 | /* General JIT engine flags. 4 bits. */ | ||
| 13 | #define JIT_F_ON 0x00000001 | 15 | #define JIT_F_ON 0x00000001 |
| 14 | 16 | ||
| 15 | /* CPU-specific JIT engine flags. */ | 17 | /* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */ |
| 18 | #define JIT_F_CPU 0x00000010 | ||
| 19 | |||
| 16 | #if LJ_TARGET_X86ORX64 | 20 | #if LJ_TARGET_X86ORX64 |
| 17 | #define JIT_F_SSE2 0x00000010 | 21 | |
| 18 | #define JIT_F_SSE3 0x00000020 | 22 | #define JIT_F_SSE3 (JIT_F_CPU << 0) |
| 19 | #define JIT_F_SSE4_1 0x00000040 | 23 | #define JIT_F_SSE4_1 (JIT_F_CPU << 1) |
| 20 | #define JIT_F_PREFER_IMUL 0x00000080 | 24 | #define JIT_F_BMI2 (JIT_F_CPU << 2) |
| 21 | #define JIT_F_LEA_AGU 0x00000100 | 25 | |
| 22 | #define JIT_F_BMI2 0x00000200 | 26 | |
| 23 | 27 | #define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2" | |
| 24 | /* Names for the CPU-specific flags. Must match the order above. */ | 28 | |
| 25 | #define JIT_F_CPU_FIRST JIT_F_SSE2 | ||
| 26 | #define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2" | ||
| 27 | #elif LJ_TARGET_ARM | 29 | #elif LJ_TARGET_ARM |
| 28 | #define JIT_F_ARMV6_ 0x00000010 | 30 | |
| 29 | #define JIT_F_ARMV6T2_ 0x00000020 | 31 | #define JIT_F_ARMV6_ (JIT_F_CPU << 0) |
| 30 | #define JIT_F_ARMV7 0x00000040 | 32 | #define JIT_F_ARMV6T2_ (JIT_F_CPU << 1) |
| 31 | #define JIT_F_VFPV2 0x00000080 | 33 | #define JIT_F_ARMV7 (JIT_F_CPU << 2) |
| 32 | #define JIT_F_VFPV3 0x00000100 | 34 | #define JIT_F_ARMV8 (JIT_F_CPU << 3) |
| 33 | 35 | #define JIT_F_VFPV2 (JIT_F_CPU << 4) | |
| 34 | #define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7) | 36 | #define JIT_F_VFPV3 (JIT_F_CPU << 5) |
| 35 | #define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7) | 37 | |
| 38 | #define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) | ||
| 39 | #define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) | ||
| 36 | #define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) | 40 | #define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) |
| 37 | 41 | ||
| 38 | /* Names for the CPU-specific flags. Must match the order above. */ | 42 | #define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3" |
| 39 | #define JIT_F_CPU_FIRST JIT_F_ARMV6_ | 43 | |
| 40 | #define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3" | ||
| 41 | #elif LJ_TARGET_PPC | 44 | #elif LJ_TARGET_PPC |
| 42 | #define JIT_F_SQRT 0x00000010 | ||
| 43 | #define JIT_F_ROUND 0x00000020 | ||
| 44 | 45 | ||
| 45 | /* Names for the CPU-specific flags. Must match the order above. */ | 46 | #define JIT_F_SQRT (JIT_F_CPU << 0) |
| 46 | #define JIT_F_CPU_FIRST JIT_F_SQRT | 47 | #define JIT_F_ROUND (JIT_F_CPU << 1) |
| 48 | |||
| 47 | #define JIT_F_CPUSTRING "\4SQRT\5ROUND" | 49 | #define JIT_F_CPUSTRING "\4SQRT\5ROUND" |
| 50 | |||
| 48 | #elif LJ_TARGET_MIPS | 51 | #elif LJ_TARGET_MIPS |
| 49 | #define JIT_F_MIPSXXR2 0x00000010 | ||
| 50 | 52 | ||
| 51 | /* Names for the CPU-specific flags. Must match the order above. */ | 53 | #define JIT_F_MIPSXXR2 (JIT_F_CPU << 0) |
| 52 | #define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 | 54 | |
| 53 | #if LJ_TARGET_MIPS32 | 55 | #if LJ_TARGET_MIPS32 |
| 54 | #if LJ_TARGET_MIPSR6 | 56 | #if LJ_TARGET_MIPSR6 |
| 55 | #define JIT_F_CPUSTRING "\010MIPS32R6" | 57 | #define JIT_F_CPUSTRING "\010MIPS32R6" |
| @@ -63,27 +65,29 @@ | |||
| 63 | #define JIT_F_CPUSTRING "\010MIPS64R2" | 65 | #define JIT_F_CPUSTRING "\010MIPS64R2" |
| 64 | #endif | 66 | #endif |
| 65 | #endif | 67 | #endif |
| 68 | |||
| 66 | #else | 69 | #else |
| 67 | #define JIT_F_CPU_FIRST 0 | 70 | |
| 68 | #define JIT_F_CPUSTRING "" | 71 | #define JIT_F_CPUSTRING "" |
| 72 | |||
| 69 | #endif | 73 | #endif |
| 70 | 74 | ||
| 71 | /* Optimization flags. */ | 75 | /* Optimization flags. 12 bits. */ |
| 76 | #define JIT_F_OPT 0x00010000 | ||
| 72 | #define JIT_F_OPT_MASK 0x0fff0000 | 77 | #define JIT_F_OPT_MASK 0x0fff0000 |
| 73 | 78 | ||
| 74 | #define JIT_F_OPT_FOLD 0x00010000 | 79 | #define JIT_F_OPT_FOLD (JIT_F_OPT << 0) |
| 75 | #define JIT_F_OPT_CSE 0x00020000 | 80 | #define JIT_F_OPT_CSE (JIT_F_OPT << 1) |
| 76 | #define JIT_F_OPT_DCE 0x00040000 | 81 | #define JIT_F_OPT_DCE (JIT_F_OPT << 2) |
| 77 | #define JIT_F_OPT_FWD 0x00080000 | 82 | #define JIT_F_OPT_FWD (JIT_F_OPT << 3) |
| 78 | #define JIT_F_OPT_DSE 0x00100000 | 83 | #define JIT_F_OPT_DSE (JIT_F_OPT << 4) |
| 79 | #define JIT_F_OPT_NARROW 0x00200000 | 84 | #define JIT_F_OPT_NARROW (JIT_F_OPT << 5) |
| 80 | #define JIT_F_OPT_LOOP 0x00400000 | 85 | #define JIT_F_OPT_LOOP (JIT_F_OPT << 6) |
| 81 | #define JIT_F_OPT_ABC 0x00800000 | 86 | #define JIT_F_OPT_ABC (JIT_F_OPT << 7) |
| 82 | #define JIT_F_OPT_SINK 0x01000000 | 87 | #define JIT_F_OPT_SINK (JIT_F_OPT << 8) |
| 83 | #define JIT_F_OPT_FUSE 0x02000000 | 88 | #define JIT_F_OPT_FUSE (JIT_F_OPT << 9) |
| 84 | 89 | ||
| 85 | /* Optimizations names for -O. Must match the order above. */ | 90 | /* Optimizations names for -O. Must match the order above. */ |
| 86 | #define JIT_F_OPT_FIRST JIT_F_OPT_FOLD | ||
| 87 | #define JIT_F_OPTSTRING \ | 91 | #define JIT_F_OPTSTRING \ |
| 88 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" | 92 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" |
| 89 | 93 | ||
| @@ -95,6 +99,8 @@ | |||
| 95 | JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) | 99 | JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) |
| 96 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 | 100 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 |
| 97 | 101 | ||
| 102 | /* -- JIT engine parameters ----------------------------------------------- */ | ||
| 103 | |||
| 98 | #if LJ_TARGET_WINDOWS || LJ_64 | 104 | #if LJ_TARGET_WINDOWS || LJ_64 |
| 99 | /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ | 105 | /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ |
| 100 | #define JIT_P_sizemcode_DEFAULT 64 | 106 | #define JIT_P_sizemcode_DEFAULT 64 |
| @@ -137,6 +143,8 @@ JIT_PARAMDEF(JIT_PARAMENUM) | |||
| 137 | #define JIT_PARAMSTR(len, name, value) #len #name | 143 | #define JIT_PARAMSTR(len, name, value) #len #name |
| 138 | #define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) | 144 | #define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) |
| 139 | 145 | ||
| 146 | /* -- JIT engine data structures ------------------------------------------ */ | ||
| 147 | |||
| 140 | /* Trace compiler state. */ | 148 | /* Trace compiler state. */ |
| 141 | typedef enum { | 149 | typedef enum { |
| 142 | LJ_TRACE_IDLE, /* Trace compiler idle. */ | 150 | LJ_TRACE_IDLE, /* Trace compiler idle. */ |
diff --git a/src/ljamalg.c b/src/ljamalg.c index 39542981..6712d435 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c | |||
| @@ -3,16 +3,6 @@ | |||
| 3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | 3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h |
| 4 | */ | 4 | */ |
| 5 | 5 | ||
| 6 | /* | ||
| 7 | +--------------------------------------------------------------------------+ | ||
| 8 | | WARNING: Compiling the amalgamation needs a lot of virtual memory | | ||
| 9 | | (around 300 MB with GCC 4.x)! If you don't have enough physical memory | | ||
| 10 | | your machine will start swapping to disk and the compile will not finish | | ||
| 11 | | within a reasonable amount of time. | | ||
| 12 | | So either compile on a bigger machine or use the non-amalgamated build. | | ||
| 13 | +--------------------------------------------------------------------------+ | ||
| 14 | */ | ||
| 15 | |||
| 16 | #define ljamalg_c | 6 | #define ljamalg_c |
| 17 | #define LUA_CORE | 7 | #define LUA_CORE |
| 18 | 8 | ||
