aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/host/buildvm_peobj.c113
-rw-r--r--src/lj_arch.h2
-rw-r--r--src/lj_def.h24
-rw-r--r--src/lj_emit_arm64.h8
-rw-r--r--src/lj_mcode.c10
-rw-r--r--src/lj_target.h11
-rw-r--r--src/msvcbuild.bat16
7 files changed, 147 insertions, 37 deletions
diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c
index 5bca6df8..e3e1026e 100644
--- a/src/host/buildvm_peobj.c
+++ b/src/host/buildvm_peobj.c
@@ -9,7 +9,7 @@
9#include "buildvm.h" 9#include "buildvm.h"
10#include "lj_bc.h" 10#include "lj_bc.h"
11 11
12#if LJ_TARGET_X86ORX64 12#if LJ_TARGET_WINDOWS
13 13
14/* Context for PE object emitter. */ 14/* Context for PE object emitter. */
15static char *strtab; 15static char *strtab;
@@ -93,6 +93,17 @@ typedef struct PEsymaux {
93#define PEOBJ_RELOC_ADDR32NB 0x03 93#define PEOBJ_RELOC_ADDR32NB 0x03
94#define PEOBJ_RELOC_OFS 0 94#define PEOBJ_RELOC_OFS 0
95#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ 95#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
96#define PEOBJ_PDATA_NRELOC 6
97#define PEOBJ_XDATA_SIZE (8*2+4+6*2)
98#elif LJ_TARGET_ARM64
99#define PEOBJ_ARCH_TARGET 0xaa64
100#define PEOBJ_RELOC_REL32 0x03 /* MS: BRANCH26. */
101#define PEOBJ_RELOC_DIR32 0x01
102#define PEOBJ_RELOC_ADDR32NB 0x02
103#define PEOBJ_RELOC_OFS (-4)
104#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
105#define PEOBJ_PDATA_NRELOC 4
106#define PEOBJ_XDATA_SIZE (4+24+4 +4+8)
96#endif 107#endif
97 108
98/* Section numbers (0-based). */ 109/* Section numbers (0-based). */
@@ -100,7 +111,7 @@ enum {
100 PEOBJ_SECT_ABS = -2, 111 PEOBJ_SECT_ABS = -2,
101 PEOBJ_SECT_UNDEF = -1, 112 PEOBJ_SECT_UNDEF = -1,
102 PEOBJ_SECT_TEXT, 113 PEOBJ_SECT_TEXT,
103#if LJ_TARGET_X64 114#ifdef PEOBJ_PDATA_NRELOC
104 PEOBJ_SECT_PDATA, 115 PEOBJ_SECT_PDATA,
105 PEOBJ_SECT_XDATA, 116 PEOBJ_SECT_XDATA,
106#elif LJ_TARGET_X86 117#elif LJ_TARGET_X86
@@ -175,6 +186,9 @@ void emit_peobj(BuildCtx *ctx)
175 uint32_t sofs; 186 uint32_t sofs;
176 int i, nrsym; 187 int i, nrsym;
177 union { uint8_t b; uint32_t u; } host_endian; 188 union { uint8_t b; uint32_t u; } host_endian;
189#ifdef PEOBJ_PDATA_NRELOC
190 uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
191#endif
178 192
179 sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection); 193 sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection);
180 194
@@ -188,18 +202,18 @@ void emit_peobj(BuildCtx *ctx)
188 /* Flags: 60 = read+execute, 50 = align16, 20 = code. */ 202 /* Flags: 60 = read+execute, 50 = align16, 20 = code. */
189 pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS; 203 pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS;
190 204
191#if LJ_TARGET_X64 205#ifdef PEOBJ_PDATA_NRELOC
192 memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1); 206 memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1);
193 pesect[PEOBJ_SECT_PDATA].ofs = sofs; 207 pesect[PEOBJ_SECT_PDATA].ofs = sofs;
194 sofs += (pesect[PEOBJ_SECT_PDATA].size = 6*4); 208 sofs += (pesect[PEOBJ_SECT_PDATA].size = PEOBJ_PDATA_NRELOC*4);
195 pesect[PEOBJ_SECT_PDATA].relocofs = sofs; 209 pesect[PEOBJ_SECT_PDATA].relocofs = sofs;
196 sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 6) * PEOBJ_RELOC_SIZE; 210 sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = PEOBJ_PDATA_NRELOC) * PEOBJ_RELOC_SIZE;
197 /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ 211 /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
198 pesect[PEOBJ_SECT_PDATA].flags = 0x40300040; 212 pesect[PEOBJ_SECT_PDATA].flags = 0x40300040;
199 213
200 memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1); 214 memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1);
201 pesect[PEOBJ_SECT_XDATA].ofs = sofs; 215 pesect[PEOBJ_SECT_XDATA].ofs = sofs;
202 sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4+6*2); /* See below. */ 216 sofs += (pesect[PEOBJ_SECT_XDATA].size = PEOBJ_XDATA_SIZE); /* See below. */
203 pesect[PEOBJ_SECT_XDATA].relocofs = sofs; 217 pesect[PEOBJ_SECT_XDATA].relocofs = sofs;
204 sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; 218 sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
205 /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ 219 /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
@@ -234,7 +248,7 @@ void emit_peobj(BuildCtx *ctx)
234 */ 248 */
235 nrsym = ctx->nrelocsym; 249 nrsym = ctx->nrelocsym;
236 pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; 250 pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
237#if LJ_TARGET_X64 251#ifdef PEOBJ_PDATA_NRELOC
238 pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */ 252 pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */
239#endif 253#endif
240 254
@@ -259,7 +273,6 @@ void emit_peobj(BuildCtx *ctx)
259 273
260#if LJ_TARGET_X64 274#if LJ_TARGET_X64
261 { /* Write .pdata section. */ 275 { /* Write .pdata section. */
262 uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
263 uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */ 276 uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */
264 PEreloc reloc; 277 PEreloc reloc;
265 pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0; 278 pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0;
@@ -308,6 +321,88 @@ void emit_peobj(BuildCtx *ctx)
308 reloc.type = PEOBJ_RELOC_ADDR32NB; 321 reloc.type = PEOBJ_RELOC_ADDR32NB;
309 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); 322 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
310 } 323 }
324#elif LJ_TARGET_ARM64
325 /* https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling */
326 { /* Write .pdata section. */
327 uint32_t pdata[4];
328 PEreloc reloc;
329 pdata[0] = 0;
330 pdata[1] = 0;
331 pdata[2] = fcofs;
332 pdata[3] = 4+24+4;
333 owrite(ctx, &pdata, sizeof(pdata));
334 /* Start of .text and start of .xdata. */
335 reloc.vaddr = 0; reloc.symidx = 1+2+nrsym+2+2+1;
336 reloc.type = PEOBJ_RELOC_ADDR32NB;
337 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
338 reloc.vaddr = 4; reloc.symidx = 1+2+nrsym+2;
339 reloc.type = PEOBJ_RELOC_ADDR32NB;
340 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
341 /* Start of vm_ffi_call and start of second part of .xdata. */
342 reloc.vaddr = 8; reloc.symidx = 1+2+nrsym+2+2+1;
343 reloc.type = PEOBJ_RELOC_ADDR32NB;
344 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
345 reloc.vaddr = 12; reloc.symidx = 1+2+nrsym+2;
346 reloc.type = PEOBJ_RELOC_ADDR32NB;
347 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
348 }
349 { /* Write .xdata section. */
350 uint32_t u32;
351 uint8_t *p, uwc[24];
352 PEreloc reloc;
353
354#define CBE16(x) (*p = ((x) >> 8) & 0xff, p[1] = (x) & 0xff, p += 2)
355#define CALLOC_S(s) (*p++ = ((s) >> 4)) /* s < 512 */
356#define CSAVE_FPLR(o) (*p++ = 0x40 | ((o) >> 3)) /* o <= 504 */
357#define CSAVE_REGP(r,o) CBE16(0xc800 | (((r)-19)<< 6) | ((o) >> 3))
358#define CSAVE_REGS(r1,r2,o1) do { \
359 int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_REGP(r, o); \
360} while (0)
361#define CSAVE_FREGP(r,o) CBE16(0xd800 | (((r) - 8) << 6) | ((o) >> 3))
362#define CSAVE_FREGS(r1,r2,o1) do { \
363 int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_FREGP(r, o); \
364} while (0)
365#define CSAVE_REGX(r,o) CBE16(0xd400 | (((r) - 19) << 5) | (~(o) >> 3))
366#define CADD_FP(s) CBE16(0xe200 | ((s) >> 3)) /* s < 8*256 */
367#define CODE_NOP 0xe3
368#define CODE_END 0xe4
369#define CEND_ALIGN do { \
370 *p++ = CODE_END; \
371 while ((p - uwc) & 3) *p++ = CODE_NOP; \
372} while (0)
373
374 /* Unwind codes for .text section with handler. */
375 p = uwc;
376 CALLOC_S(208); /* +1 */
377 CSAVE_FPLR(192); /* +1 */
378 CADD_FP(192); /* +2 */
379 CSAVE_REGS(19, 28, 184); /* +5*2 */
380 CSAVE_FREGS(8, 15, 104); /* +4*2 */
381 CEND_ALIGN; /* +1 +1 -> 24 */
382
383 u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2);
384 owrite(ctx, &u32, 4);
385 owrite(ctx, &uwc, 24);
386
387 u32 = 0; /* Handler RVA to be relocated at 4 + 24. */
388 owrite(ctx, &u32, 4);
389
390 /* Unwind codes for vm_ffi_call without handler. */
391 p = uwc;
392 CSAVE_FPLR(16); /* +1 */
393 CADD_FP(16); /* +2 */
394 CSAVE_REGX(19, -24); /* +2 */
395 CSAVE_REGX(20, -32); /* +2 */
396 CEND_ALIGN; /* +1 +0 -> 8 */
397
398 u32 = ((8u >> 2) << 27) | (((uint32_t)ctx->codesz - fcofs) >> 2);
399 owrite(ctx, &u32, 4);
400 owrite(ctx, &uwc, 8);
401
402 reloc.vaddr = 4 + 24; reloc.symidx = 1+2+nrsym+2+2;
403 reloc.type = PEOBJ_RELOC_ADDR32NB;
404 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
405 }
311#elif LJ_TARGET_X86 406#elif LJ_TARGET_X86
312 /* Write .sxdata section. */ 407 /* Write .sxdata section. */
313 for (i = 0; i < nrsym; i++) { 408 for (i = 0; i < nrsym; i++) {
@@ -339,7 +434,7 @@ void emit_peobj(BuildCtx *ctx)
339 emit_peobj_sym(ctx, ctx->relocsym[i], 0, 434 emit_peobj_sym(ctx, ctx->relocsym[i], 0,
340 PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); 435 PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
341 436
342#if LJ_TARGET_X64 437#ifdef PEOBJ_PDATA_NRELOC
343 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); 438 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
344 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); 439 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
345 emit_peobj_sym(ctx, "lj_err_unwind_win", 0, 440 emit_peobj_sym(ctx, "lj_err_unwind_win", 0,
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 3e920f2a..026e741f 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -57,7 +57,7 @@
57#define LUAJIT_TARGET LUAJIT_ARCH_X64 57#define LUAJIT_TARGET LUAJIT_ARCH_X64
58#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) 58#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
59#define LUAJIT_TARGET LUAJIT_ARCH_ARM 59#define LUAJIT_TARGET LUAJIT_ARCH_ARM
60#elif defined(__aarch64__) 60#elif defined(__aarch64__) || defined(_M_ARM64)
61#define LUAJIT_TARGET LUAJIT_ARCH_ARM64 61#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
62#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) 62#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
63#define LUAJIT_TARGET LUAJIT_ARCH_PPC 63#define LUAJIT_TARGET LUAJIT_ARCH_PPC
diff --git a/src/lj_def.h b/src/lj_def.h
index 88bc6336..1461d3d7 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -146,15 +146,9 @@ typedef uintptr_t BloomFilter;
146#define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0) 146#define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0)
147 147
148#define lj_ffs(x) ((uint32_t)__builtin_ctz(x)) 148#define lj_ffs(x) ((uint32_t)__builtin_ctz(x))
149/* Don't ask ... */
150#if defined(__INTEL_COMPILER) && (defined(__i386__) || defined(__x86_64__))
151static LJ_AINLINE uint32_t lj_fls(uint32_t x)
152{
153 uint32_t r; __asm__("bsrl %1, %0" : "=r" (r) : "rm" (x) : "cc"); return r;
154}
155#else
156#define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31)) 149#define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31))
157#endif 150#define lj_ffs64(x) ((uint32_t)__builtin_ctzll(x))
151#define lj_fls64(x) ((uint32_t)(__builtin_clzll(x)^63))
158 152
159#if defined(__arm__) 153#if defined(__arm__)
160static LJ_AINLINE uint32_t lj_bswap(uint32_t x) 154static LJ_AINLINE uint32_t lj_bswap(uint32_t x)
@@ -265,8 +259,12 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
265#else 259#else
266unsigned char _BitScanForward(unsigned long *, unsigned long); 260unsigned char _BitScanForward(unsigned long *, unsigned long);
267unsigned char _BitScanReverse(unsigned long *, unsigned long); 261unsigned char _BitScanReverse(unsigned long *, unsigned long);
262unsigned char _BitScanForward64(unsigned long *, uint64_t);
263unsigned char _BitScanReverse64(unsigned long *, uint64_t);
268#pragma intrinsic(_BitScanForward) 264#pragma intrinsic(_BitScanForward)
269#pragma intrinsic(_BitScanReverse) 265#pragma intrinsic(_BitScanReverse)
266#pragma intrinsic(_BitScanForward64)
267#pragma intrinsic(_BitScanReverse64)
270 268
271static LJ_AINLINE uint32_t lj_ffs(uint32_t x) 269static LJ_AINLINE uint32_t lj_ffs(uint32_t x)
272{ 270{
@@ -277,6 +275,16 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
277{ 275{
278 unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r; 276 unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r;
279} 277}
278
279static LJ_AINLINE uint32_t lj_ffs64(uint64_t x)
280{
281 unsigned long r; _BitScanForward64(&r, x); return (uint32_t)r;
282}
283
284static LJ_AINLINE uint32_t lj_fls64(uint64_t x)
285{
286 unsigned long r; _BitScanReverse64(&r, x); return (uint32_t)r;
287}
280#endif 288#endif
281 289
282unsigned long _byteswap_ulong(unsigned long); 290unsigned long _byteswap_ulong(unsigned long);
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
index 9161c958..fef5d973 100644
--- a/src/lj_emit_arm64.h
+++ b/src/lj_emit_arm64.h
@@ -30,15 +30,15 @@ static uint32_t emit_isk12(int64_t n)
30 uint64_t k = n < 0 ? ~(uint64_t)n+1u : (uint64_t)n; 30 uint64_t k = n < 0 ? ~(uint64_t)n+1u : (uint64_t)n;
31 uint32_t m = n < 0 ? 0x40000000 : 0; 31 uint32_t m = n < 0 ? 0x40000000 : 0;
32 if (k < 0x1000) { 32 if (k < 0x1000) {
33 return A64I_K12|m|A64F_U12(k); 33 return (uint32_t)(A64I_K12|m|A64F_U12(k));
34 } else if ((k & 0xfff000) == k) { 34 } else if ((k & 0xfff000) == k) {
35 return A64I_K12|m|0x400000|A64F_U12(k>>12); 35 return (uint32_t)(A64I_K12|m|0x400000|A64F_U12(k>>12));
36 } 36 }
37 return 0; 37 return 0;
38} 38}
39 39
40#define emit_clz64(n) __builtin_clzll(n) 40#define emit_clz64(n) (lj_fls64(n)^63)
41#define emit_ctz64(n) __builtin_ctzll(n) 41#define emit_ctz64(n) lj_ffs64(n)
42 42
43/* Encode constant in K13 format for logical data processing instructions. */ 43/* Encode constant in K13 format for logical data processing instructions. */
44static uint32_t emit_isk13(uint64_t n, int is64) 44static uint32_t emit_isk13(uint64_t n, int is64)
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
index c8ed95e1..8a4851dd 100644
--- a/src/lj_mcode.c
+++ b/src/lj_mcode.c
@@ -29,6 +29,11 @@
29#include <valgrind/valgrind.h> 29#include <valgrind/valgrind.h>
30#endif 30#endif
31 31
32#if LJ_TARGET_WINDOWS
33#define WIN32_LEAN_AND_MEAN
34#include <windows.h>
35#endif
36
32#if LJ_TARGET_IOS 37#if LJ_TARGET_IOS
33void sys_icache_invalidate(void *start, size_t len); 38void sys_icache_invalidate(void *start, size_t len);
34#endif 39#endif
@@ -41,6 +46,8 @@ void lj_mcode_sync(void *start, void *end)
41#endif 46#endif
42#if LJ_TARGET_X86ORX64 47#if LJ_TARGET_X86ORX64
43 UNUSED(start); UNUSED(end); 48 UNUSED(start); UNUSED(end);
49#elif LJ_TARGET_WINDOWS
50 FlushInstructionCache(GetCurrentProcess(), start, (char *)end-(char *)start);
44#elif LJ_TARGET_IOS 51#elif LJ_TARGET_IOS
45 sys_icache_invalidate(start, (char *)end-(char *)start); 52 sys_icache_invalidate(start, (char *)end-(char *)start);
46#elif LJ_TARGET_PPC 53#elif LJ_TARGET_PPC
@@ -58,9 +65,6 @@ void lj_mcode_sync(void *start, void *end)
58 65
59#if LJ_TARGET_WINDOWS 66#if LJ_TARGET_WINDOWS
60 67
61#define WIN32_LEAN_AND_MEAN
62#include <windows.h>
63
64#define MCPROT_RW PAGE_READWRITE 68#define MCPROT_RW PAGE_READWRITE
65#define MCPROT_RX PAGE_EXECUTE_READ 69#define MCPROT_RX PAGE_EXECUTE_READ
66#define MCPROT_RWX PAGE_EXECUTE_READWRITE 70#define MCPROT_RWX PAGE_EXECUTE_READWRITE
diff --git a/src/lj_target.h b/src/lj_target.h
index 09d19bd9..e7322c07 100644
--- a/src/lj_target.h
+++ b/src/lj_target.h
@@ -58,9 +58,13 @@ typedef uint32_t RegSP;
58#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 58#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
59typedef uint64_t RegSet; 59typedef uint64_t RegSet;
60#define RSET_BITS 6 60#define RSET_BITS 6
61#define rset_picktop_(rs) ((Reg)lj_fls64(rs))
62#define rset_pickbot_(rs) ((Reg)lj_ffs64(rs))
61#else 63#else
62typedef uint32_t RegSet; 64typedef uint32_t RegSet;
63#define RSET_BITS 5 65#define RSET_BITS 5
66#define rset_picktop_(rs) ((Reg)lj_fls(rs))
67#define rset_pickbot_(rs) ((Reg)lj_ffs(rs))
64#endif 68#endif
65 69
66#define RID2RSET(r) (((RegSet)1) << (r)) 70#define RID2RSET(r) (((RegSet)1) << (r))
@@ -71,13 +75,6 @@ typedef uint32_t RegSet;
71#define rset_set(rs, r) (rs |= RID2RSET(r)) 75#define rset_set(rs, r) (rs |= RID2RSET(r))
72#define rset_clear(rs, r) (rs &= ~RID2RSET(r)) 76#define rset_clear(rs, r) (rs &= ~RID2RSET(r))
73#define rset_exclude(rs, r) (rs & ~RID2RSET(r)) 77#define rset_exclude(rs, r) (rs & ~RID2RSET(r))
74#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
75#define rset_picktop_(rs) ((Reg)(__builtin_clzll(rs)^63))
76#define rset_pickbot_(rs) ((Reg)__builtin_ctzll(rs))
77#else
78#define rset_picktop_(rs) ((Reg)lj_fls(rs))
79#define rset_pickbot_(rs) ((Reg)lj_ffs(rs))
80#endif
81 78
82/* -- Register allocation cost -------------------------------------------- */ 79/* -- Register allocation cost -------------------------------------------- */
83 80
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
index f9bf2528..2cfcf26e 100644
--- a/src/msvcbuild.bat
+++ b/src/msvcbuild.bat
@@ -34,20 +34,26 @@
34if exist minilua.exe.manifest^ 34if exist minilua.exe.manifest^
35 %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe 35 %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
36 36
37@set DASMFLAGS=-D WIN -D JIT -D FFI -D P64 37@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU -D P64
38@set LJARCH=x64 38@set LJARCH=x64
39@minilua 39@minilua
40@if errorlevel 8 goto :X64 40@if errorlevel 8 goto :NO32
41@set DASC=vm_x86.dasc 41@set DASC=vm_x86.dasc
42@set DASMFLAGS=-D WIN -D JIT -D FFI 42@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU
43@set LJARCH=x86 43@set LJARCH=x86
44@set LJCOMPILE=%LJCOMPILE% /arch:SSE2 44@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
45@goto :DA
46:NO32
47@if "%VSCMD_ARG_TGT_ARCH%" neq "arm64" goto :X64
48@set DASC=vm_arm64.dasc
49@set LJARCH=arm64
50@goto :DA
45:X64 51:X64
46@if "%1" neq "nogc64" goto :GC64 52@if "%1" neq "nogc64" goto :DA
47@shift 53@shift
48@set DASC=vm_x86.dasc 54@set DASC=vm_x86.dasc
49@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64 55@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64
50:GC64 56:DA
51minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% 57minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
52@if errorlevel 1 goto :BAD 58@if errorlevel 1 goto :BAD
53 59