aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2020-05-20 20:42:04 +0200
committerMike Pall <mike>2020-05-20 20:42:04 +0200
commit0eddcbead2d67c16dcd4039a6765b9d2fc8ea631 (patch)
tree614b63b87bb6ba476b616b10b95e278d4af0c452
parent5bf0da3d7c02f9959fa3a9fb721e0565137b70c8 (diff)
downloadluajit-0eddcbead2d67c16dcd4039a6765b9d2fc8ea631.tar.gz
luajit-0eddcbead2d67c16dcd4039a6765b9d2fc8ea631.tar.bz2
luajit-0eddcbead2d67c16dcd4039a6765b9d2fc8ea631.zip
Cleanup CPU detection and tuning for old CPUs.
Diffstat (limited to '')
-rw-r--r--src/Makefile1
-rw-r--r--src/lib_jit.c65
-rw-r--r--src/lj_arch.h6
-rw-r--r--src/lj_asm_x86.h33
-rw-r--r--src/lj_dispatch.c7
-rw-r--r--src/lj_emit_x86.h5
-rw-r--r--src/lj_errmsg.h4
-rw-r--r--src/lj_jit.h94
-rw-r--r--src/ljamalg.c10
9 files changed, 87 insertions, 138 deletions
diff --git a/src/Makefile b/src/Makefile
index 07a94251..82a57032 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -603,7 +603,6 @@ E= @echo
603default all: $(TARGET_T) 603default all: $(TARGET_T)
604 604
605amalg: 605amalg:
606 @grep "^[+|]" ljamalg.c
607 $(MAKE) all "LJCORE_O=ljamalg.o" 606 $(MAKE) all "LJCORE_O=ljamalg.o"
608 607
609clean: 608clean:
diff --git a/src/lib_jit.c b/src/lib_jit.c
index c97b0d53..acd6c293 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -104,8 +104,8 @@ LJLIB_CF(jit_status)
104 jit_State *J = L2J(L); 104 jit_State *J = L2J(L);
105 L->top = L->base; 105 L->top = L->base;
106 setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); 106 setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0);
107 flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); 107 flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING);
108 flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); 108 flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING);
109 return (int)(L->top - L->base); 109 return (int)(L->top - L->base);
110#else 110#else
111 setboolV(L->top++, 0); 111 setboolV(L->top++, 0);
@@ -471,7 +471,7 @@ static int jitopt_flag(jit_State *J, const char *str)
471 str += str[2] == '-' ? 3 : 2; 471 str += str[2] == '-' ? 3 : 2;
472 set = 0; 472 set = 0;
473 } 473 }
474 for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) { 474 for (opt = JIT_F_OPT; ; opt <<= 1) {
475 size_t len = *(const uint8_t *)lst; 475 size_t len = *(const uint8_t *)lst;
476 if (len == 0) 476 if (len == 0)
477 break; 477 break;
@@ -640,59 +640,41 @@ JIT_PARAMDEF(JIT_PARAMINIT)
640#undef JIT_PARAMINIT 640#undef JIT_PARAMINIT
641 0 641 0
642}; 642};
643#endif
644 643
645#if LJ_TARGET_ARM && LJ_TARGET_LINUX 644#if LJ_TARGET_ARM && LJ_TARGET_LINUX
646#include <sys/utsname.h> 645#include <sys/utsname.h>
647#endif 646#endif
648 647
649/* Arch-dependent CPU detection. */ 648/* Arch-dependent CPU feature detection. */
650static uint32_t jit_cpudetect(lua_State *L) 649static uint32_t jit_cpudetect(void)
651{ 650{
652 uint32_t flags = 0; 651 uint32_t flags = 0;
653#if LJ_TARGET_X86ORX64 652#if LJ_TARGET_X86ORX64
653
654 uint32_t vendor[4]; 654 uint32_t vendor[4];
655 uint32_t features[4]; 655 uint32_t features[4];
656 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { 656 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
657#if !LJ_HASJIT
658#define JIT_F_SSE2 2
659#endif
660 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
661#if LJ_HASJIT
662 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; 657 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
663 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; 658 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
664 if (vendor[2] == 0x6c65746e) { /* Intel. */
665 if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
666 flags |= JIT_F_LEA_AGU;
667 } else if (vendor[2] == 0x444d4163) { /* AMD. */
668 uint32_t fam = (features[0] & 0x0ff00f00);
669 if (fam >= 0x00000f00) /* K8, K10. */
670 flags |= JIT_F_PREFER_IMUL;
671 }
672 if (vendor[0] >= 7) { 659 if (vendor[0] >= 7) {
673 uint32_t xfeatures[4]; 660 uint32_t xfeatures[4];
674 lj_vm_cpuid(7, xfeatures); 661 lj_vm_cpuid(7, xfeatures);
675 flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; 662 flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
676 } 663 }
677#endif
678 } 664 }
679 /* Check for required instruction set support on x86 (unnecessary on x64). */ 665 /* Don't bother checking for SSE2 -- the VM will crash before getting here. */
680#if LJ_TARGET_X86 666
681 if (!(flags & JIT_F_SSE2))
682 luaL_error(L, "CPU with SSE2 required");
683#endif
684#elif LJ_TARGET_ARM 667#elif LJ_TARGET_ARM
685#if LJ_HASJIT 668
686 int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ 669 int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */
687#if LJ_TARGET_LINUX 670#if LJ_TARGET_LINUX
688 if (ver < 70) { /* Runtime ARM CPU detection. */ 671 if (ver < 70) { /* Runtime ARM CPU detection. */
689 struct utsname ut; 672 struct utsname ut;
690 uname(&ut); 673 uname(&ut);
691 if (strncmp(ut.machine, "armv", 4) == 0) { 674 if (strncmp(ut.machine, "armv", 4) == 0) {
692 if (ut.machine[4] >= '7') 675 if (ut.machine[4] >= '8') ver = 80;
693 ver = 70; 676 else if (ut.machine[4] == '7') ver = 70;
694 else if (ut.machine[4] == '6') 677 else if (ut.machine[4] == '6') ver = 60;
695 ver = 60;
696 } 678 }
697 } 679 }
698#endif 680#endif
@@ -700,20 +682,22 @@ static uint32_t jit_cpudetect(lua_State *L)
700 ver >= 61 ? JIT_F_ARMV6T2_ : 682 ver >= 61 ? JIT_F_ARMV6T2_ :
701 ver >= 60 ? JIT_F_ARMV6_ : 0; 683 ver >= 60 ? JIT_F_ARMV6_ : 0;
702 flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; 684 flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
703#endif 685
704#elif LJ_TARGET_ARM64 686#elif LJ_TARGET_ARM64
687
705 /* No optional CPU features to detect (for now). */ 688 /* No optional CPU features to detect (for now). */
689
706#elif LJ_TARGET_PPC 690#elif LJ_TARGET_PPC
707#if LJ_HASJIT 691
708#if LJ_ARCH_SQRT 692#if LJ_ARCH_SQRT
709 flags |= JIT_F_SQRT; 693 flags |= JIT_F_SQRT;
710#endif 694#endif
711#if LJ_ARCH_ROUND 695#if LJ_ARCH_ROUND
712 flags |= JIT_F_ROUND; 696 flags |= JIT_F_ROUND;
713#endif 697#endif
714#endif 698
715#elif LJ_TARGET_MIPS 699#elif LJ_TARGET_MIPS
716#if LJ_HASJIT 700
717 /* Compile-time MIPS CPU detection. */ 701 /* Compile-time MIPS CPU detection. */
718#if LJ_ARCH_VERSION >= 20 702#if LJ_ARCH_VERSION >= 20
719 flags |= JIT_F_MIPSXXR2; 703 flags |= JIT_F_MIPSXXR2;
@@ -731,31 +715,28 @@ static uint32_t jit_cpudetect(lua_State *L)
731 if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ 715 if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
732 } 716 }
733#endif 717#endif
734#endif 718
735#else 719#else
736#error "Missing CPU detection for this architecture" 720#error "Missing CPU detection for this architecture"
737#endif 721#endif
738 UNUSED(L);
739 return flags; 722 return flags;
740} 723}
741 724
742/* Initialize JIT compiler. */ 725/* Initialize JIT compiler. */
743static void jit_init(lua_State *L) 726static void jit_init(lua_State *L)
744{ 727{
745 uint32_t flags = jit_cpudetect(L);
746#if LJ_HASJIT
747 jit_State *J = L2J(L); 728 jit_State *J = L2J(L);
748 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; 729 J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT;
749 memcpy(J->param, jit_param_default, sizeof(J->param)); 730 memcpy(J->param, jit_param_default, sizeof(J->param));
750 lj_dispatch_update(G(L)); 731 lj_dispatch_update(G(L));
751#else
752 UNUSED(flags);
753#endif
754} 732}
733#endif
755 734
756LUALIB_API int luaopen_jit(lua_State *L) 735LUALIB_API int luaopen_jit(lua_State *L)
757{ 736{
737#if LJ_HASJIT
758 jit_init(L); 738 jit_init(L);
739#endif
759 lua_pushliteral(L, LJ_OS_NAME); 740 lua_pushliteral(L, LJ_OS_NAME);
760 lua_pushliteral(L, LJ_ARCH_NAME); 741 lua_pushliteral(L, LJ_ARCH_NAME);
761 lua_pushinteger(L, LUAJIT_VERSION_NUM); 742 lua_pushinteger(L, LUAJIT_VERSION_NUM);
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 027b39ce..70426838 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -208,13 +208,13 @@
208#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ 208#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
209#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL 209#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
210 210
211#if __ARM_ARCH_8__ || __ARM_ARCH_8A__ 211#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
212#define LJ_ARCH_VERSION 80 212#define LJ_ARCH_VERSION 80
213#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ 213#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
214#define LJ_ARCH_VERSION 70 214#define LJ_ARCH_VERSION 70
215#elif __ARM_ARCH_6T2__ 215#elif __ARM_ARCH_6T2__
216#define LJ_ARCH_VERSION 61 216#define LJ_ARCH_VERSION 61
217#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ 217#elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__
218#define LJ_ARCH_VERSION 60 218#define LJ_ARCH_VERSION 60
219#else 219#else
220#define LJ_ARCH_VERSION 50 220#define LJ_ARCH_VERSION 50
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index db3409b9..bf818f5a 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -1214,13 +1214,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1214 emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); 1214 emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node));
1215 } else { 1215 } else {
1216 emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); 1216 emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node));
1217 if ((as->flags & JIT_F_PREFER_IMUL)) { 1217 emit_shifti(as, XOg_SHL, dest, 3);
1218 emit_i8(as, sizeof(Node)); 1218 emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
1219 emit_rr(as, XO_IMULi8, dest, dest);
1220 } else {
1221 emit_shifti(as, XOg_SHL, dest, 3);
1222 emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
1223 }
1224 if (isk) { 1219 if (isk) {
1225 emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); 1220 emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash);
1226 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); 1221 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
@@ -1279,7 +1274,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1279 lua_assert(ofs % sizeof(Node) == 0); 1274 lua_assert(ofs % sizeof(Node) == 0);
1280 if (ra_hasreg(dest)) { 1275 if (ra_hasreg(dest)) {
1281 if (ofs != 0) { 1276 if (ofs != 0) {
1282 if (dest == node && !(as->flags & JIT_F_LEA_AGU)) 1277 if (dest == node)
1283 emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); 1278 emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs);
1284 else 1279 else
1285 emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); 1280 emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs);
@@ -2180,8 +2175,7 @@ static void asm_add(ASMState *as, IRIns *ir)
2180{ 2175{
2181 if (irt_isnum(ir->t)) 2176 if (irt_isnum(ir->t))
2182 asm_fparith(as, ir, XO_ADDSD); 2177 asm_fparith(as, ir, XO_ADDSD);
2183 else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp || 2178 else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir))
2184 irt_is64(ir->t) || !asm_lea(as, ir))
2185 asm_intarith(as, ir, XOg_ADD); 2179 asm_intarith(as, ir, XOg_ADD);
2186} 2180}
2187 2181
@@ -2903,7 +2897,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
2903 MCode *target, *q; 2897 MCode *target, *q;
2904 int32_t spadj = as->T->spadjust; 2898 int32_t spadj = as->T->spadjust;
2905 if (spadj == 0) { 2899 if (spadj == 0) {
2906 p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); 2900 p -= LJ_64 ? 7 : 6;
2907 } else { 2901 } else {
2908 MCode *p1; 2902 MCode *p1;
2909 /* Patch stack adjustment. */ 2903 /* Patch stack adjustment. */
@@ -2915,20 +2909,11 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
2915 p1 = p-9; 2909 p1 = p-9;
2916 *(int32_t *)p1 = spadj; 2910 *(int32_t *)p1 = spadj;
2917 } 2911 }
2918 if ((as->flags & JIT_F_LEA_AGU)) {
2919#if LJ_64
2920 p1[-4] = 0x48;
2921#endif
2922 p1[-3] = (MCode)XI_LEA;
2923 p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP);
2924 p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
2925 } else {
2926#if LJ_64 2912#if LJ_64
2927 p1[-3] = 0x48; 2913 p1[-3] = 0x48;
2928#endif 2914#endif
2929 p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); 2915 p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
2930 p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); 2916 p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
2931 }
2932 } 2917 }
2933 /* Patch exit branch. */ 2918 /* Patch exit branch. */
2934 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; 2919 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
@@ -2959,7 +2944,7 @@ static void asm_tail_prep(ASMState *as)
2959 as->invmcp = as->mcp = p; 2944 as->invmcp = as->mcp = p;
2960 } else { 2945 } else {
2961 /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ 2946 /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
2962 as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0)); 2947 as->mcp = p - (LJ_64 ? 7 : 6);
2963 as->invmcp = NULL; 2948 as->invmcp = NULL;
2964 } 2949 }
2965} 2950}
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
index 8553438c..39416d00 100644
--- a/src/lj_dispatch.c
+++ b/src/lj_dispatch.c
@@ -252,15 +252,8 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
252 } else { 252 } else {
253 if (!(mode & LUAJIT_MODE_ON)) 253 if (!(mode & LUAJIT_MODE_ON))
254 G2J(g)->flags &= ~(uint32_t)JIT_F_ON; 254 G2J(g)->flags &= ~(uint32_t)JIT_F_ON;
255#if LJ_TARGET_X86ORX64
256 else if ((G2J(g)->flags & JIT_F_SSE2))
257 G2J(g)->flags |= (uint32_t)JIT_F_ON;
258 else
259 return 0; /* Don't turn on JIT compiler without SSE2 support. */
260#else
261 else 255 else
262 G2J(g)->flags |= (uint32_t)JIT_F_ON; 256 G2J(g)->flags |= (uint32_t)JIT_F_ON;
263#endif
264 lj_dispatch_update(g); 257 lj_dispatch_update(g);
265 } 258 }
266 break; 259 break;
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index bc4391a0..b17e28a5 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -559,10 +559,7 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
559static void emit_addptr(ASMState *as, Reg r, int32_t ofs) 559static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
560{ 560{
561 if (ofs) { 561 if (ofs) {
562 if ((as->flags & JIT_F_LEA_AGU)) 562 emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
563 emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs);
564 else
565 emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
566 } 563 }
567} 564}
568 565
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
index efb7c3f3..9110dc7e 100644
--- a/src/lj_errmsg.h
+++ b/src/lj_errmsg.h
@@ -101,11 +101,7 @@ ERRDEF(STRGSRV, "invalid replacement value (a %s)")
101ERRDEF(BADMODN, "name conflict for module " LUA_QS) 101ERRDEF(BADMODN, "name conflict for module " LUA_QS)
102#if LJ_HASJIT 102#if LJ_HASJIT
103ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") 103ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?")
104#if LJ_TARGET_X86ORX64
105ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2")
106#else
107ERRDEF(NOJIT, "JIT compiler disabled") 104ERRDEF(NOJIT, "JIT compiler disabled")
108#endif
109#elif defined(LJ_ARCH_NOJIT) 105#elif defined(LJ_ARCH_NOJIT)
110ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") 106ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)")
111#else 107#else
diff --git a/src/lj_jit.h b/src/lj_jit.h
index f179f17f..a9c602f0 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -9,47 +9,49 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_ir.h" 10#include "lj_ir.h"
11 11
12/* JIT engine flags. */ 12/* -- JIT engine flags ---------------------------------------------------- */
13
14/* General JIT engine flags. 4 bits. */
13#define JIT_F_ON 0x00000001 15#define JIT_F_ON 0x00000001
14 16
15/* CPU-specific JIT engine flags. */ 17/* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */
18#define JIT_F_CPU 0x00000010
19
16#if LJ_TARGET_X86ORX64 20#if LJ_TARGET_X86ORX64
17#define JIT_F_SSE2 0x00000010 21
18#define JIT_F_SSE3 0x00000020 22#define JIT_F_SSE3 (JIT_F_CPU << 0)
19#define JIT_F_SSE4_1 0x00000040 23#define JIT_F_SSE4_1 (JIT_F_CPU << 1)
20#define JIT_F_PREFER_IMUL 0x00000080 24#define JIT_F_BMI2 (JIT_F_CPU << 2)
21#define JIT_F_LEA_AGU 0x00000100 25
22#define JIT_F_BMI2 0x00000200 26
23 27#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2"
24/* Names for the CPU-specific flags. Must match the order above. */ 28
25#define JIT_F_CPU_FIRST JIT_F_SSE2
26#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2"
27#elif LJ_TARGET_ARM 29#elif LJ_TARGET_ARM
28#define JIT_F_ARMV6_ 0x00000010 30
29#define JIT_F_ARMV6T2_ 0x00000020 31#define JIT_F_ARMV6_ (JIT_F_CPU << 0)
30#define JIT_F_ARMV7 0x00000040 32#define JIT_F_ARMV6T2_ (JIT_F_CPU << 1)
31#define JIT_F_VFPV2 0x00000080 33#define JIT_F_ARMV7 (JIT_F_CPU << 2)
32#define JIT_F_VFPV3 0x00000100 34#define JIT_F_ARMV8 (JIT_F_CPU << 3)
33 35#define JIT_F_VFPV2 (JIT_F_CPU << 4)
34#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7) 36#define JIT_F_VFPV3 (JIT_F_CPU << 5)
35#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7) 37
38#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
39#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
36#define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) 40#define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3)
37 41
38/* Names for the CPU-specific flags. Must match the order above. */ 42#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3"
39#define JIT_F_CPU_FIRST JIT_F_ARMV6_ 43
40#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3"
41#elif LJ_TARGET_PPC 44#elif LJ_TARGET_PPC
42#define JIT_F_SQRT 0x00000010
43#define JIT_F_ROUND 0x00000020
44 45
45/* Names for the CPU-specific flags. Must match the order above. */ 46#define JIT_F_SQRT (JIT_F_CPU << 0)
46#define JIT_F_CPU_FIRST JIT_F_SQRT 47#define JIT_F_ROUND (JIT_F_CPU << 1)
48
47#define JIT_F_CPUSTRING "\4SQRT\5ROUND" 49#define JIT_F_CPUSTRING "\4SQRT\5ROUND"
50
48#elif LJ_TARGET_MIPS 51#elif LJ_TARGET_MIPS
49#define JIT_F_MIPSXXR2 0x00000010
50 52
51/* Names for the CPU-specific flags. Must match the order above. */ 53#define JIT_F_MIPSXXR2 (JIT_F_CPU << 0)
52#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 54
53#if LJ_TARGET_MIPS32 55#if LJ_TARGET_MIPS32
54#if LJ_TARGET_MIPSR6 56#if LJ_TARGET_MIPSR6
55#define JIT_F_CPUSTRING "\010MIPS32R6" 57#define JIT_F_CPUSTRING "\010MIPS32R6"
@@ -63,27 +65,29 @@
63#define JIT_F_CPUSTRING "\010MIPS64R2" 65#define JIT_F_CPUSTRING "\010MIPS64R2"
64#endif 66#endif
65#endif 67#endif
68
66#else 69#else
67#define JIT_F_CPU_FIRST 0 70
68#define JIT_F_CPUSTRING "" 71#define JIT_F_CPUSTRING ""
72
69#endif 73#endif
70 74
71/* Optimization flags. */ 75/* Optimization flags. 12 bits. */
76#define JIT_F_OPT 0x00010000
72#define JIT_F_OPT_MASK 0x0fff0000 77#define JIT_F_OPT_MASK 0x0fff0000
73 78
74#define JIT_F_OPT_FOLD 0x00010000 79#define JIT_F_OPT_FOLD (JIT_F_OPT << 0)
75#define JIT_F_OPT_CSE 0x00020000 80#define JIT_F_OPT_CSE (JIT_F_OPT << 1)
76#define JIT_F_OPT_DCE 0x00040000 81#define JIT_F_OPT_DCE (JIT_F_OPT << 2)
77#define JIT_F_OPT_FWD 0x00080000 82#define JIT_F_OPT_FWD (JIT_F_OPT << 3)
78#define JIT_F_OPT_DSE 0x00100000 83#define JIT_F_OPT_DSE (JIT_F_OPT << 4)
79#define JIT_F_OPT_NARROW 0x00200000 84#define JIT_F_OPT_NARROW (JIT_F_OPT << 5)
80#define JIT_F_OPT_LOOP 0x00400000 85#define JIT_F_OPT_LOOP (JIT_F_OPT << 6)
81#define JIT_F_OPT_ABC 0x00800000 86#define JIT_F_OPT_ABC (JIT_F_OPT << 7)
82#define JIT_F_OPT_SINK 0x01000000 87#define JIT_F_OPT_SINK (JIT_F_OPT << 8)
83#define JIT_F_OPT_FUSE 0x02000000 88#define JIT_F_OPT_FUSE (JIT_F_OPT << 9)
84 89
85/* Optimizations names for -O. Must match the order above. */ 90/* Optimizations names for -O. Must match the order above. */
86#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD
87#define JIT_F_OPTSTRING \ 91#define JIT_F_OPTSTRING \
88 "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" 92 "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse"
89 93
@@ -95,6 +99,8 @@
95 JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) 99 JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE)
96#define JIT_F_OPT_DEFAULT JIT_F_OPT_3 100#define JIT_F_OPT_DEFAULT JIT_F_OPT_3
97 101
102/* -- JIT engine parameters ----------------------------------------------- */
103
98#if LJ_TARGET_WINDOWS || LJ_64 104#if LJ_TARGET_WINDOWS || LJ_64
99/* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ 105/* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */
100#define JIT_P_sizemcode_DEFAULT 64 106#define JIT_P_sizemcode_DEFAULT 64
@@ -137,6 +143,8 @@ JIT_PARAMDEF(JIT_PARAMENUM)
137#define JIT_PARAMSTR(len, name, value) #len #name 143#define JIT_PARAMSTR(len, name, value) #len #name
138#define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) 144#define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR)
139 145
146/* -- JIT engine data structures ------------------------------------------ */
147
140/* Trace compiler state. */ 148/* Trace compiler state. */
141typedef enum { 149typedef enum {
142 LJ_TRACE_IDLE, /* Trace compiler idle. */ 150 LJ_TRACE_IDLE, /* Trace compiler idle. */
diff --git a/src/ljamalg.c b/src/ljamalg.c
index 39542981..6712d435 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -3,16 +3,6 @@
3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h 3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4*/ 4*/
5 5
6/*
7+--------------------------------------------------------------------------+
8| WARNING: Compiling the amalgamation needs a lot of virtual memory |
9| (around 300 MB with GCC 4.x)! If you don't have enough physical memory |
10| your machine will start swapping to disk and the compile will not finish |
11| within a reasonable amount of time. |
12| So either compile on a bigger machine or use the non-amalgamated build. |
13+--------------------------------------------------------------------------+
14*/
15
16#define ljamalg_c 6#define ljamalg_c
17#define LUA_CORE 7#define LUA_CORE
18 8