diff options
-rw-r--r-- | src/host/buildvm_peobj.c | 113 | ||||
-rw-r--r-- | src/lj_arch.h | 2 | ||||
-rw-r--r-- | src/lj_def.h | 24 | ||||
-rw-r--r-- | src/lj_emit_arm64.h | 8 | ||||
-rw-r--r-- | src/lj_mcode.c | 10 | ||||
-rw-r--r-- | src/lj_target.h | 11 | ||||
-rw-r--r-- | src/msvcbuild.bat | 16 |
7 files changed, 147 insertions, 37 deletions
diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c index 5bca6df8..e3e1026e 100644 --- a/src/host/buildvm_peobj.c +++ b/src/host/buildvm_peobj.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include "buildvm.h" | 9 | #include "buildvm.h" |
10 | #include "lj_bc.h" | 10 | #include "lj_bc.h" |
11 | 11 | ||
12 | #if LJ_TARGET_X86ORX64 | 12 | #if LJ_TARGET_WINDOWS |
13 | 13 | ||
14 | /* Context for PE object emitter. */ | 14 | /* Context for PE object emitter. */ |
15 | static char *strtab; | 15 | static char *strtab; |
@@ -93,6 +93,17 @@ typedef struct PEsymaux { | |||
93 | #define PEOBJ_RELOC_ADDR32NB 0x03 | 93 | #define PEOBJ_RELOC_ADDR32NB 0x03 |
94 | #define PEOBJ_RELOC_OFS 0 | 94 | #define PEOBJ_RELOC_OFS 0 |
95 | #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ | 95 | #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ |
96 | #define PEOBJ_PDATA_NRELOC 6 | ||
97 | #define PEOBJ_XDATA_SIZE (8*2+4+6*2) | ||
98 | #elif LJ_TARGET_ARM64 | ||
99 | #define PEOBJ_ARCH_TARGET 0xaa64 | ||
100 | #define PEOBJ_RELOC_REL32 0x03 /* MS: BRANCH26. */ | ||
101 | #define PEOBJ_RELOC_DIR32 0x01 | ||
102 | #define PEOBJ_RELOC_ADDR32NB 0x02 | ||
103 | #define PEOBJ_RELOC_OFS (-4) | ||
104 | #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ | ||
105 | #define PEOBJ_PDATA_NRELOC 4 | ||
106 | #define PEOBJ_XDATA_SIZE (4+24+4 +4+8) | ||
96 | #endif | 107 | #endif |
97 | 108 | ||
98 | /* Section numbers (0-based). */ | 109 | /* Section numbers (0-based). */ |
@@ -100,7 +111,7 @@ enum { | |||
100 | PEOBJ_SECT_ABS = -2, | 111 | PEOBJ_SECT_ABS = -2, |
101 | PEOBJ_SECT_UNDEF = -1, | 112 | PEOBJ_SECT_UNDEF = -1, |
102 | PEOBJ_SECT_TEXT, | 113 | PEOBJ_SECT_TEXT, |
103 | #if LJ_TARGET_X64 | 114 | #ifdef PEOBJ_PDATA_NRELOC |
104 | PEOBJ_SECT_PDATA, | 115 | PEOBJ_SECT_PDATA, |
105 | PEOBJ_SECT_XDATA, | 116 | PEOBJ_SECT_XDATA, |
106 | #elif LJ_TARGET_X86 | 117 | #elif LJ_TARGET_X86 |
@@ -175,6 +186,9 @@ void emit_peobj(BuildCtx *ctx) | |||
175 | uint32_t sofs; | 186 | uint32_t sofs; |
176 | int i, nrsym; | 187 | int i, nrsym; |
177 | union { uint8_t b; uint32_t u; } host_endian; | 188 | union { uint8_t b; uint32_t u; } host_endian; |
189 | #ifdef PEOBJ_PDATA_NRELOC | ||
190 | uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs; | ||
191 | #endif | ||
178 | 192 | ||
179 | sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection); | 193 | sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection); |
180 | 194 | ||
@@ -188,18 +202,18 @@ void emit_peobj(BuildCtx *ctx) | |||
188 | /* Flags: 60 = read+execute, 50 = align16, 20 = code. */ | 202 | /* Flags: 60 = read+execute, 50 = align16, 20 = code. */ |
189 | pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS; | 203 | pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS; |
190 | 204 | ||
191 | #if LJ_TARGET_X64 | 205 | #ifdef PEOBJ_PDATA_NRELOC |
192 | memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1); | 206 | memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1); |
193 | pesect[PEOBJ_SECT_PDATA].ofs = sofs; | 207 | pesect[PEOBJ_SECT_PDATA].ofs = sofs; |
194 | sofs += (pesect[PEOBJ_SECT_PDATA].size = 6*4); | 208 | sofs += (pesect[PEOBJ_SECT_PDATA].size = PEOBJ_PDATA_NRELOC*4); |
195 | pesect[PEOBJ_SECT_PDATA].relocofs = sofs; | 209 | pesect[PEOBJ_SECT_PDATA].relocofs = sofs; |
196 | sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 6) * PEOBJ_RELOC_SIZE; | 210 | sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = PEOBJ_PDATA_NRELOC) * PEOBJ_RELOC_SIZE; |
197 | /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ | 211 | /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ |
198 | pesect[PEOBJ_SECT_PDATA].flags = 0x40300040; | 212 | pesect[PEOBJ_SECT_PDATA].flags = 0x40300040; |
199 | 213 | ||
200 | memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1); | 214 | memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1); |
201 | pesect[PEOBJ_SECT_XDATA].ofs = sofs; | 215 | pesect[PEOBJ_SECT_XDATA].ofs = sofs; |
202 | sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4+6*2); /* See below. */ | 216 | sofs += (pesect[PEOBJ_SECT_XDATA].size = PEOBJ_XDATA_SIZE); /* See below. */ |
203 | pesect[PEOBJ_SECT_XDATA].relocofs = sofs; | 217 | pesect[PEOBJ_SECT_XDATA].relocofs = sofs; |
204 | sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; | 218 | sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; |
205 | /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ | 219 | /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ |
@@ -234,7 +248,7 @@ void emit_peobj(BuildCtx *ctx) | |||
234 | */ | 248 | */ |
235 | nrsym = ctx->nrelocsym; | 249 | nrsym = ctx->nrelocsym; |
236 | pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; | 250 | pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; |
237 | #if LJ_TARGET_X64 | 251 | #ifdef PEOBJ_PDATA_NRELOC |
238 | pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */ | 252 | pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */ |
239 | #endif | 253 | #endif |
240 | 254 | ||
@@ -259,7 +273,6 @@ void emit_peobj(BuildCtx *ctx) | |||
259 | 273 | ||
260 | #if LJ_TARGET_X64 | 274 | #if LJ_TARGET_X64 |
261 | { /* Write .pdata section. */ | 275 | { /* Write .pdata section. */ |
262 | uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs; | ||
263 | uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */ | 276 | uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */ |
264 | PEreloc reloc; | 277 | PEreloc reloc; |
265 | pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0; | 278 | pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0; |
@@ -308,6 +321,88 @@ void emit_peobj(BuildCtx *ctx) | |||
308 | reloc.type = PEOBJ_RELOC_ADDR32NB; | 321 | reloc.type = PEOBJ_RELOC_ADDR32NB; |
309 | owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); | 322 | owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); |
310 | } | 323 | } |
324 | #elif LJ_TARGET_ARM64 | ||
325 | /* https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling */ | ||
326 | { /* Write .pdata section. */ | ||
327 | uint32_t pdata[4]; | ||
328 | PEreloc reloc; | ||
329 | pdata[0] = 0; | ||
330 | pdata[1] = 0; | ||
331 | pdata[2] = fcofs; | ||
332 | pdata[3] = 4+24+4; | ||
333 | owrite(ctx, &pdata, sizeof(pdata)); | ||
334 | /* Start of .text and start of .xdata. */ | ||
335 | reloc.vaddr = 0; reloc.symidx = 1+2+nrsym+2+2+1; | ||
336 | reloc.type = PEOBJ_RELOC_ADDR32NB; | ||
337 | owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); | ||
338 | reloc.vaddr = 4; reloc.symidx = 1+2+nrsym+2; | ||
339 | reloc.type = PEOBJ_RELOC_ADDR32NB; | ||
340 | owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); | ||
341 | /* Start of vm_ffi_call and start of second part of .xdata. */ | ||
342 | reloc.vaddr = 8; reloc.symidx = 1+2+nrsym+2+2+1; | ||
343 | reloc.type = PEOBJ_RELOC_ADDR32NB; | ||
344 | owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); | ||
345 | reloc.vaddr = 12; reloc.symidx = 1+2+nrsym+2; | ||
346 | reloc.type = PEOBJ_RELOC_ADDR32NB; | ||
347 | owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); | ||
348 | } | ||
349 | { /* Write .xdata section. */ | ||
350 | uint32_t u32; | ||
351 | uint8_t *p, uwc[24]; | ||
352 | PEreloc reloc; | ||
353 | |||
354 | #define CBE16(x) (*p = ((x) >> 8) & 0xff, p[1] = (x) & 0xff, p += 2) | ||
355 | #define CALLOC_S(s) (*p++ = ((s) >> 4)) /* s < 512 */ | ||
356 | #define CSAVE_FPLR(o) (*p++ = 0x40 | ((o) >> 3)) /* o <= 504 */ | ||
357 | #define CSAVE_REGP(r,o) CBE16(0xc800 | (((r)-19)<< 6) | ((o) >> 3)) | ||
358 | #define CSAVE_REGS(r1,r2,o1) do { \ | ||
359 | int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_REGP(r, o); \ | ||
360 | } while (0) | ||
361 | #define CSAVE_FREGP(r,o) CBE16(0xd800 | (((r) - 8) << 6) | ((o) >> 3)) | ||
362 | #define CSAVE_FREGS(r1,r2,o1) do { \ | ||
363 | int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_FREGP(r, o); \ | ||
364 | } while (0) | ||
365 | #define CSAVE_REGX(r,o) CBE16(0xd400 | (((r) - 19) << 5) | (~(o) >> 3)) | ||
366 | #define CADD_FP(s) CBE16(0xe200 | ((s) >> 3)) /* s < 8*256 */ | ||
367 | #define CODE_NOP 0xe3 | ||
368 | #define CODE_END 0xe4 | ||
369 | #define CEND_ALIGN do { \ | ||
370 | *p++ = CODE_END; \ | ||
371 | while ((p - uwc) & 3) *p++ = CODE_NOP; \ | ||
372 | } while (0) | ||
373 | |||
374 | /* Unwind codes for .text section with handler. */ | ||
375 | p = uwc; | ||
376 | CALLOC_S(208); /* +1 */ | ||
377 | CSAVE_FPLR(192); /* +1 */ | ||
378 | CADD_FP(192); /* +2 */ | ||
379 | CSAVE_REGS(19, 28, 184); /* +5*2 */ | ||
380 | CSAVE_FREGS(8, 15, 104); /* +4*2 */ | ||
381 | CEND_ALIGN; /* +1 +1 -> 24 */ | ||
382 | |||
383 | u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2); | ||
384 | owrite(ctx, &u32, 4); | ||
385 | owrite(ctx, &uwc, 24); | ||
386 | |||
387 | u32 = 0; /* Handler RVA to be relocated at 4 + 24. */ | ||
388 | owrite(ctx, &u32, 4); | ||
389 | |||
390 | /* Unwind codes for vm_ffi_call without handler. */ | ||
391 | p = uwc; | ||
392 | CSAVE_FPLR(16); /* +1 */ | ||
393 | CADD_FP(16); /* +2 */ | ||
394 | CSAVE_REGX(19, -24); /* +2 */ | ||
395 | CSAVE_REGX(20, -32); /* +2 */ | ||
396 | CEND_ALIGN; /* +1 +0 -> 8 */ | ||
397 | |||
398 | u32 = ((8u >> 2) << 27) | (((uint32_t)ctx->codesz - fcofs) >> 2); | ||
399 | owrite(ctx, &u32, 4); | ||
400 | owrite(ctx, &uwc, 8); | ||
401 | |||
402 | reloc.vaddr = 4 + 24; reloc.symidx = 1+2+nrsym+2+2; | ||
403 | reloc.type = PEOBJ_RELOC_ADDR32NB; | ||
404 | owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); | ||
405 | } | ||
311 | #elif LJ_TARGET_X86 | 406 | #elif LJ_TARGET_X86 |
312 | /* Write .sxdata section. */ | 407 | /* Write .sxdata section. */ |
313 | for (i = 0; i < nrsym; i++) { | 408 | for (i = 0; i < nrsym; i++) { |
@@ -339,7 +434,7 @@ void emit_peobj(BuildCtx *ctx) | |||
339 | emit_peobj_sym(ctx, ctx->relocsym[i], 0, | 434 | emit_peobj_sym(ctx, ctx->relocsym[i], 0, |
340 | PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); | 435 | PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); |
341 | 436 | ||
342 | #if LJ_TARGET_X64 | 437 | #ifdef PEOBJ_PDATA_NRELOC |
343 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); | 438 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); |
344 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); | 439 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); |
345 | emit_peobj_sym(ctx, "lj_err_unwind_win", 0, | 440 | emit_peobj_sym(ctx, "lj_err_unwind_win", 0, |
diff --git a/src/lj_arch.h b/src/lj_arch.h index 3e920f2a..026e741f 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h | |||
@@ -57,7 +57,7 @@ | |||
57 | #define LUAJIT_TARGET LUAJIT_ARCH_X64 | 57 | #define LUAJIT_TARGET LUAJIT_ARCH_X64 |
58 | #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) | 58 | #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) |
59 | #define LUAJIT_TARGET LUAJIT_ARCH_ARM | 59 | #define LUAJIT_TARGET LUAJIT_ARCH_ARM |
60 | #elif defined(__aarch64__) | 60 | #elif defined(__aarch64__) || defined(_M_ARM64) |
61 | #define LUAJIT_TARGET LUAJIT_ARCH_ARM64 | 61 | #define LUAJIT_TARGET LUAJIT_ARCH_ARM64 |
62 | #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) | 62 | #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) |
63 | #define LUAJIT_TARGET LUAJIT_ARCH_PPC | 63 | #define LUAJIT_TARGET LUAJIT_ARCH_PPC |
diff --git a/src/lj_def.h b/src/lj_def.h index 88bc6336..1461d3d7 100644 --- a/src/lj_def.h +++ b/src/lj_def.h | |||
@@ -146,15 +146,9 @@ typedef uintptr_t BloomFilter; | |||
146 | #define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0) | 146 | #define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0) |
147 | 147 | ||
148 | #define lj_ffs(x) ((uint32_t)__builtin_ctz(x)) | 148 | #define lj_ffs(x) ((uint32_t)__builtin_ctz(x)) |
149 | /* Don't ask ... */ | ||
150 | #if defined(__INTEL_COMPILER) && (defined(__i386__) || defined(__x86_64__)) | ||
151 | static LJ_AINLINE uint32_t lj_fls(uint32_t x) | ||
152 | { | ||
153 | uint32_t r; __asm__("bsrl %1, %0" : "=r" (r) : "rm" (x) : "cc"); return r; | ||
154 | } | ||
155 | #else | ||
156 | #define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31)) | 149 | #define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31)) |
157 | #endif | 150 | #define lj_ffs64(x) ((uint32_t)__builtin_ctzll(x)) |
151 | #define lj_fls64(x) ((uint32_t)(__builtin_clzll(x)^63)) | ||
158 | 152 | ||
159 | #if defined(__arm__) | 153 | #if defined(__arm__) |
160 | static LJ_AINLINE uint32_t lj_bswap(uint32_t x) | 154 | static LJ_AINLINE uint32_t lj_bswap(uint32_t x) |
@@ -265,8 +259,12 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x) | |||
265 | #else | 259 | #else |
266 | unsigned char _BitScanForward(unsigned long *, unsigned long); | 260 | unsigned char _BitScanForward(unsigned long *, unsigned long); |
267 | unsigned char _BitScanReverse(unsigned long *, unsigned long); | 261 | unsigned char _BitScanReverse(unsigned long *, unsigned long); |
262 | unsigned char _BitScanForward64(unsigned long *, uint64_t); | ||
263 | unsigned char _BitScanReverse64(unsigned long *, uint64_t); | ||
268 | #pragma intrinsic(_BitScanForward) | 264 | #pragma intrinsic(_BitScanForward) |
269 | #pragma intrinsic(_BitScanReverse) | 265 | #pragma intrinsic(_BitScanReverse) |
266 | #pragma intrinsic(_BitScanForward64) | ||
267 | #pragma intrinsic(_BitScanReverse64) | ||
270 | 268 | ||
271 | static LJ_AINLINE uint32_t lj_ffs(uint32_t x) | 269 | static LJ_AINLINE uint32_t lj_ffs(uint32_t x) |
272 | { | 270 | { |
@@ -277,6 +275,16 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x) | |||
277 | { | 275 | { |
278 | unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r; | 276 | unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r; |
279 | } | 277 | } |
278 | |||
279 | static LJ_AINLINE uint32_t lj_ffs64(uint64_t x) | ||
280 | { | ||
281 | unsigned long r; _BitScanForward64(&r, x); return (uint32_t)r; | ||
282 | } | ||
283 | |||
284 | static LJ_AINLINE uint32_t lj_fls64(uint64_t x) | ||
285 | { | ||
286 | unsigned long r; _BitScanReverse64(&r, x); return (uint32_t)r; | ||
287 | } | ||
280 | #endif | 288 | #endif |
281 | 289 | ||
282 | unsigned long _byteswap_ulong(unsigned long); | 290 | unsigned long _byteswap_ulong(unsigned long); |
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index 9161c958..fef5d973 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h | |||
@@ -30,15 +30,15 @@ static uint32_t emit_isk12(int64_t n) | |||
30 | uint64_t k = n < 0 ? ~(uint64_t)n+1u : (uint64_t)n; | 30 | uint64_t k = n < 0 ? ~(uint64_t)n+1u : (uint64_t)n; |
31 | uint32_t m = n < 0 ? 0x40000000 : 0; | 31 | uint32_t m = n < 0 ? 0x40000000 : 0; |
32 | if (k < 0x1000) { | 32 | if (k < 0x1000) { |
33 | return A64I_K12|m|A64F_U12(k); | 33 | return (uint32_t)(A64I_K12|m|A64F_U12(k)); |
34 | } else if ((k & 0xfff000) == k) { | 34 | } else if ((k & 0xfff000) == k) { |
35 | return A64I_K12|m|0x400000|A64F_U12(k>>12); | 35 | return (uint32_t)(A64I_K12|m|0x400000|A64F_U12(k>>12)); |
36 | } | 36 | } |
37 | return 0; | 37 | return 0; |
38 | } | 38 | } |
39 | 39 | ||
40 | #define emit_clz64(n) __builtin_clzll(n) | 40 | #define emit_clz64(n) (lj_fls64(n)^63) |
41 | #define emit_ctz64(n) __builtin_ctzll(n) | 41 | #define emit_ctz64(n) lj_ffs64(n) |
42 | 42 | ||
43 | /* Encode constant in K13 format for logical data processing instructions. */ | 43 | /* Encode constant in K13 format for logical data processing instructions. */ |
44 | static uint32_t emit_isk13(uint64_t n, int is64) | 44 | static uint32_t emit_isk13(uint64_t n, int is64) |
diff --git a/src/lj_mcode.c b/src/lj_mcode.c index c8ed95e1..8a4851dd 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c | |||
@@ -29,6 +29,11 @@ | |||
29 | #include <valgrind/valgrind.h> | 29 | #include <valgrind/valgrind.h> |
30 | #endif | 30 | #endif |
31 | 31 | ||
32 | #if LJ_TARGET_WINDOWS | ||
33 | #define WIN32_LEAN_AND_MEAN | ||
34 | #include <windows.h> | ||
35 | #endif | ||
36 | |||
32 | #if LJ_TARGET_IOS | 37 | #if LJ_TARGET_IOS |
33 | void sys_icache_invalidate(void *start, size_t len); | 38 | void sys_icache_invalidate(void *start, size_t len); |
34 | #endif | 39 | #endif |
@@ -41,6 +46,8 @@ void lj_mcode_sync(void *start, void *end) | |||
41 | #endif | 46 | #endif |
42 | #if LJ_TARGET_X86ORX64 | 47 | #if LJ_TARGET_X86ORX64 |
43 | UNUSED(start); UNUSED(end); | 48 | UNUSED(start); UNUSED(end); |
49 | #elif LJ_TARGET_WINDOWS | ||
50 | FlushInstructionCache(GetCurrentProcess(), start, (char *)end-(char *)start); | ||
44 | #elif LJ_TARGET_IOS | 51 | #elif LJ_TARGET_IOS |
45 | sys_icache_invalidate(start, (char *)end-(char *)start); | 52 | sys_icache_invalidate(start, (char *)end-(char *)start); |
46 | #elif LJ_TARGET_PPC | 53 | #elif LJ_TARGET_PPC |
@@ -58,9 +65,6 @@ void lj_mcode_sync(void *start, void *end) | |||
58 | 65 | ||
59 | #if LJ_TARGET_WINDOWS | 66 | #if LJ_TARGET_WINDOWS |
60 | 67 | ||
61 | #define WIN32_LEAN_AND_MEAN | ||
62 | #include <windows.h> | ||
63 | |||
64 | #define MCPROT_RW PAGE_READWRITE | 68 | #define MCPROT_RW PAGE_READWRITE |
65 | #define MCPROT_RX PAGE_EXECUTE_READ | 69 | #define MCPROT_RX PAGE_EXECUTE_READ |
66 | #define MCPROT_RWX PAGE_EXECUTE_READWRITE | 70 | #define MCPROT_RWX PAGE_EXECUTE_READWRITE |
diff --git a/src/lj_target.h b/src/lj_target.h index 09d19bd9..e7322c07 100644 --- a/src/lj_target.h +++ b/src/lj_target.h | |||
@@ -58,9 +58,13 @@ typedef uint32_t RegSP; | |||
58 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 | 58 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 |
59 | typedef uint64_t RegSet; | 59 | typedef uint64_t RegSet; |
60 | #define RSET_BITS 6 | 60 | #define RSET_BITS 6 |
61 | #define rset_picktop_(rs) ((Reg)lj_fls64(rs)) | ||
62 | #define rset_pickbot_(rs) ((Reg)lj_ffs64(rs)) | ||
61 | #else | 63 | #else |
62 | typedef uint32_t RegSet; | 64 | typedef uint32_t RegSet; |
63 | #define RSET_BITS 5 | 65 | #define RSET_BITS 5 |
66 | #define rset_picktop_(rs) ((Reg)lj_fls(rs)) | ||
67 | #define rset_pickbot_(rs) ((Reg)lj_ffs(rs)) | ||
64 | #endif | 68 | #endif |
65 | 69 | ||
66 | #define RID2RSET(r) (((RegSet)1) << (r)) | 70 | #define RID2RSET(r) (((RegSet)1) << (r)) |
@@ -71,13 +75,6 @@ typedef uint32_t RegSet; | |||
71 | #define rset_set(rs, r) (rs |= RID2RSET(r)) | 75 | #define rset_set(rs, r) (rs |= RID2RSET(r)) |
72 | #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) | 76 | #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) |
73 | #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) | 77 | #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) |
74 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 | ||
75 | #define rset_picktop_(rs) ((Reg)(__builtin_clzll(rs)^63)) | ||
76 | #define rset_pickbot_(rs) ((Reg)__builtin_ctzll(rs)) | ||
77 | #else | ||
78 | #define rset_picktop_(rs) ((Reg)lj_fls(rs)) | ||
79 | #define rset_pickbot_(rs) ((Reg)lj_ffs(rs)) | ||
80 | #endif | ||
81 | 78 | ||
82 | /* -- Register allocation cost -------------------------------------------- */ | 79 | /* -- Register allocation cost -------------------------------------------- */ |
83 | 80 | ||
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index f9bf2528..2cfcf26e 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat | |||
@@ -34,20 +34,26 @@ | |||
34 | if exist minilua.exe.manifest^ | 34 | if exist minilua.exe.manifest^ |
35 | %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe | 35 | %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe |
36 | 36 | ||
37 | @set DASMFLAGS=-D WIN -D JIT -D FFI -D P64 | 37 | @set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU -D P64 |
38 | @set LJARCH=x64 | 38 | @set LJARCH=x64 |
39 | @minilua | 39 | @minilua |
40 | @if errorlevel 8 goto :X64 | 40 | @if errorlevel 8 goto :NO32 |
41 | @set DASC=vm_x86.dasc | 41 | @set DASC=vm_x86.dasc |
42 | @set DASMFLAGS=-D WIN -D JIT -D FFI | 42 | @set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU |
43 | @set LJARCH=x86 | 43 | @set LJARCH=x86 |
44 | @set LJCOMPILE=%LJCOMPILE% /arch:SSE2 | 44 | @set LJCOMPILE=%LJCOMPILE% /arch:SSE2 |
45 | @goto :DA | ||
46 | :NO32 | ||
47 | @if "%VSCMD_ARG_TGT_ARCH%" neq "arm64" goto :X64 | ||
48 | @set DASC=vm_arm64.dasc | ||
49 | @set LJARCH=arm64 | ||
50 | @goto :DA | ||
45 | :X64 | 51 | :X64 |
46 | @if "%1" neq "nogc64" goto :GC64 | 52 | @if "%1" neq "nogc64" goto :DA |
47 | @shift | 53 | @shift |
48 | @set DASC=vm_x86.dasc | 54 | @set DASC=vm_x86.dasc |
49 | @set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64 | 55 | @set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64 |
50 | :GC64 | 56 | :DA |
51 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% | 57 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% |
52 | @if errorlevel 1 goto :BAD | 58 | @if errorlevel 1 goto :BAD |
53 | 59 | ||