diff options
Diffstat (limited to 'src/lj_jit.h')
-rw-r--r-- | src/lj_jit.h | 130 |
1 files changed, 110 insertions, 20 deletions
diff --git a/src/lj_jit.h b/src/lj_jit.h index 0e1c4827..f179f17f 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
@@ -14,18 +14,16 @@ | |||
14 | 14 | ||
15 | /* CPU-specific JIT engine flags. */ | 15 | /* CPU-specific JIT engine flags. */ |
16 | #if LJ_TARGET_X86ORX64 | 16 | #if LJ_TARGET_X86ORX64 |
17 | #define JIT_F_CMOV 0x00000010 | 17 | #define JIT_F_SSE2 0x00000010 |
18 | #define JIT_F_SSE2 0x00000020 | 18 | #define JIT_F_SSE3 0x00000020 |
19 | #define JIT_F_SSE3 0x00000040 | 19 | #define JIT_F_SSE4_1 0x00000040 |
20 | #define JIT_F_SSE4_1 0x00000080 | 20 | #define JIT_F_PREFER_IMUL 0x00000080 |
21 | #define JIT_F_P4 0x00000100 | 21 | #define JIT_F_LEA_AGU 0x00000100 |
22 | #define JIT_F_PREFER_IMUL 0x00000200 | 22 | #define JIT_F_BMI2 0x00000200 |
23 | #define JIT_F_SPLIT_XMM 0x00000400 | ||
24 | #define JIT_F_LEA_AGU 0x00000800 | ||
25 | 23 | ||
26 | /* Names for the CPU-specific flags. Must match the order above. */ | 24 | /* Names for the CPU-specific flags. Must match the order above. */ |
27 | #define JIT_F_CPU_FIRST JIT_F_CMOV | 25 | #define JIT_F_CPU_FIRST JIT_F_SSE2 |
28 | #define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM" | 26 | #define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2" |
29 | #elif LJ_TARGET_ARM | 27 | #elif LJ_TARGET_ARM |
30 | #define JIT_F_ARMV6_ 0x00000010 | 28 | #define JIT_F_ARMV6_ 0x00000010 |
31 | #define JIT_F_ARMV6T2_ 0x00000020 | 29 | #define JIT_F_ARMV6T2_ 0x00000020 |
@@ -48,11 +46,23 @@ | |||
48 | #define JIT_F_CPU_FIRST JIT_F_SQRT | 46 | #define JIT_F_CPU_FIRST JIT_F_SQRT |
49 | #define JIT_F_CPUSTRING "\4SQRT\5ROUND" | 47 | #define JIT_F_CPUSTRING "\4SQRT\5ROUND" |
50 | #elif LJ_TARGET_MIPS | 48 | #elif LJ_TARGET_MIPS |
51 | #define JIT_F_MIPS32R2 0x00000010 | 49 | #define JIT_F_MIPSXXR2 0x00000010 |
52 | 50 | ||
53 | /* Names for the CPU-specific flags. Must match the order above. */ | 51 | /* Names for the CPU-specific flags. Must match the order above. */ |
54 | #define JIT_F_CPU_FIRST JIT_F_MIPS32R2 | 52 | #define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 |
53 | #if LJ_TARGET_MIPS32 | ||
54 | #if LJ_TARGET_MIPSR6 | ||
55 | #define JIT_F_CPUSTRING "\010MIPS32R6" | ||
56 | #else | ||
55 | #define JIT_F_CPUSTRING "\010MIPS32R2" | 57 | #define JIT_F_CPUSTRING "\010MIPS32R2" |
58 | #endif | ||
59 | #else | ||
60 | #if LJ_TARGET_MIPSR6 | ||
61 | #define JIT_F_CPUSTRING "\010MIPS64R6" | ||
62 | #else | ||
63 | #define JIT_F_CPUSTRING "\010MIPS64R2" | ||
64 | #endif | ||
65 | #endif | ||
56 | #else | 66 | #else |
57 | #define JIT_F_CPU_FIRST 0 | 67 | #define JIT_F_CPU_FIRST 0 |
58 | #define JIT_F_CPUSTRING "" | 68 | #define JIT_F_CPUSTRING "" |
@@ -100,6 +110,7 @@ | |||
100 | _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ | 110 | _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ |
101 | _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ | 111 | _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ |
102 | _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ | 112 | _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ |
113 | _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \ | ||
103 | \ | 114 | \ |
104 | _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ | 115 | _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ |
105 | _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ | 116 | _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ |
@@ -186,14 +197,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); | |||
186 | #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) | 197 | #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) |
187 | #define SNAP_TR(slot, tr) \ | 198 | #define SNAP_TR(slot, tr) \ |
188 | (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) | 199 | (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) |
200 | #if !LJ_FR2 | ||
189 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) | 201 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) |
202 | #endif | ||
190 | #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) | 203 | #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) |
191 | #define snap_ref(sn) ((sn) & 0xffff) | 204 | #define snap_ref(sn) ((sn) & 0xffff) |
192 | #define snap_slot(sn) ((BCReg)((sn) >> 24)) | 205 | #define snap_slot(sn) ((BCReg)((sn) >> 24)) |
193 | #define snap_isframe(sn) ((sn) & SNAP_FRAME) | 206 | #define snap_isframe(sn) ((sn) & SNAP_FRAME) |
194 | #define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn)) | ||
195 | #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) | 207 | #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) |
196 | 208 | ||
209 | static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn) | ||
210 | { | ||
211 | #if LJ_FR2 | ||
212 | uint64_t pcbase; | ||
213 | memcpy(&pcbase, sn, sizeof(uint64_t)); | ||
214 | return (const BCIns *)(pcbase >> 8); | ||
215 | #else | ||
216 | return (const BCIns *)(uintptr_t)*sn; | ||
217 | #endif | ||
218 | } | ||
219 | |||
197 | /* Snapshot and exit numbers. */ | 220 | /* Snapshot and exit numbers. */ |
198 | typedef uint32_t SnapNo; | 221 | typedef uint32_t SnapNo; |
199 | typedef uint32_t ExitNo; | 222 | typedef uint32_t ExitNo; |
@@ -211,7 +234,8 @@ typedef enum { | |||
211 | LJ_TRLINK_UPREC, /* Up-recursion. */ | 234 | LJ_TRLINK_UPREC, /* Up-recursion. */ |
212 | LJ_TRLINK_DOWNREC, /* Down-recursion. */ | 235 | LJ_TRLINK_DOWNREC, /* Down-recursion. */ |
213 | LJ_TRLINK_INTERP, /* Fallback to interpreter. */ | 236 | LJ_TRLINK_INTERP, /* Fallback to interpreter. */ |
214 | LJ_TRLINK_RETURN /* Return to interpreter. */ | 237 | LJ_TRLINK_RETURN, /* Return to interpreter. */ |
238 | LJ_TRLINK_STITCH /* Trace stitching. */ | ||
215 | } TraceLink; | 239 | } TraceLink; |
216 | 240 | ||
217 | /* Trace object. */ | 241 | /* Trace object. */ |
@@ -219,6 +243,9 @@ typedef struct GCtrace { | |||
219 | GCHeader; | 243 | GCHeader; |
220 | uint16_t nsnap; /* Number of snapshots. */ | 244 | uint16_t nsnap; /* Number of snapshots. */ |
221 | IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ | 245 | IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ |
246 | #if LJ_GC64 | ||
247 | uint32_t unused_gc64; | ||
248 | #endif | ||
222 | GCRef gclist; | 249 | GCRef gclist; |
223 | IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ | 250 | IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ |
224 | IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ | 251 | IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ |
@@ -294,6 +321,16 @@ typedef struct ScEvEntry { | |||
294 | uint8_t dir; /* Direction. 1: +, 0: -. */ | 321 | uint8_t dir; /* Direction. 1: +, 0: -. */ |
295 | } ScEvEntry; | 322 | } ScEvEntry; |
296 | 323 | ||
324 | /* Reverse bytecode map (IRRef -> PC). Only for selected instructions. */ | ||
325 | typedef struct RBCHashEntry { | ||
326 | MRef pc; /* Bytecode PC. */ | ||
327 | GCRef pt; /* Prototype. */ | ||
328 | IRRef ref; /* IR reference. */ | ||
329 | } RBCHashEntry; | ||
330 | |||
331 | /* Number of slots in the reverse bytecode hash table. Must be a power of 2. */ | ||
332 | #define RBCHASH_SLOTS 8 | ||
333 | |||
297 | /* 128 bit SIMD constants. */ | 334 | /* 128 bit SIMD constants. */ |
298 | enum { | 335 | enum { |
299 | LJ_KSIMD_ABS, | 336 | LJ_KSIMD_ABS, |
@@ -301,12 +338,51 @@ enum { | |||
301 | LJ_KSIMD__MAX | 338 | LJ_KSIMD__MAX |
302 | }; | 339 | }; |
303 | 340 | ||
341 | enum { | ||
342 | #if LJ_TARGET_X86ORX64 | ||
343 | LJ_K64_TOBIT, /* 2^52 + 2^51 */ | ||
344 | LJ_K64_2P64, /* 2^64 */ | ||
345 | LJ_K64_M2P64, /* -2^64 */ | ||
346 | #if LJ_32 | ||
347 | LJ_K64_M2P64_31, /* -2^64 or -2^31 */ | ||
348 | #else | ||
349 | LJ_K64_M2P64_31 = LJ_K64_M2P64, | ||
350 | #endif | ||
351 | #endif | ||
352 | #if LJ_TARGET_MIPS | ||
353 | LJ_K64_2P31, /* 2^31 */ | ||
354 | #if LJ_64 | ||
355 | LJ_K64_2P63, /* 2^63 */ | ||
356 | LJ_K64_M2P64, /* -2^64 */ | ||
357 | #endif | ||
358 | #endif | ||
359 | LJ_K64__MAX, | ||
360 | }; | ||
361 | |||
362 | enum { | ||
363 | #if LJ_TARGET_X86ORX64 | ||
364 | LJ_K32_M2P64_31, /* -2^64 or -2^31 */ | ||
365 | #endif | ||
366 | #if LJ_TARGET_PPC | ||
367 | LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ | ||
368 | LJ_K32_2P52, /* 2^52 */ | ||
369 | #endif | ||
370 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | ||
371 | LJ_K32_2P31, /* 2^31 */ | ||
372 | #endif | ||
373 | #if LJ_TARGET_MIPS64 | ||
374 | LJ_K32_2P63, /* 2^63 */ | ||
375 | LJ_K32_M2P64, /* -2^64 */ | ||
376 | #endif | ||
377 | LJ_K32__MAX | ||
378 | }; | ||
379 | |||
304 | /* Get 16 byte aligned pointer to SIMD constant. */ | 380 | /* Get 16 byte aligned pointer to SIMD constant. */ |
305 | #define LJ_KSIMD(J, n) \ | 381 | #define LJ_KSIMD(J, n) \ |
306 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) | 382 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) |
307 | 383 | ||
308 | /* Set/reset flag to activate the SPLIT pass for the current trace. */ | 384 | /* Set/reset flag to activate the SPLIT pass for the current trace. */ |
309 | #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) | 385 | #if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) |
310 | #define lj_needsplit(J) (J->needsplit = 1) | 386 | #define lj_needsplit(J) (J->needsplit = 1) |
311 | #define lj_resetsplit(J) (J->needsplit = 0) | 387 | #define lj_resetsplit(J) (J->needsplit = 0) |
312 | #else | 388 | #else |
@@ -317,13 +393,14 @@ enum { | |||
317 | /* Fold state is used to fold instructions on-the-fly. */ | 393 | /* Fold state is used to fold instructions on-the-fly. */ |
318 | typedef struct FoldState { | 394 | typedef struct FoldState { |
319 | IRIns ins; /* Currently emitted instruction. */ | 395 | IRIns ins; /* Currently emitted instruction. */ |
320 | IRIns left; /* Instruction referenced by left operand. */ | 396 | IRIns left[2]; /* Instruction referenced by left operand. */ |
321 | IRIns right; /* Instruction referenced by right operand. */ | 397 | IRIns right[2]; /* Instruction referenced by right operand. */ |
322 | } FoldState; | 398 | } FoldState; |
323 | 399 | ||
324 | /* JIT compiler state. */ | 400 | /* JIT compiler state. */ |
325 | typedef struct jit_State { | 401 | typedef struct jit_State { |
326 | GCtrace cur; /* Current trace. */ | 402 | GCtrace cur; /* Current trace. */ |
403 | GCtrace *curfinal; /* Final address of current trace (set during asm). */ | ||
327 | 404 | ||
328 | lua_State *L; /* Current Lua state. */ | 405 | lua_State *L; /* Current Lua state. */ |
329 | const BCIns *pc; /* Current PC. */ | 406 | const BCIns *pc; /* Current PC. */ |
@@ -353,8 +430,9 @@ typedef struct jit_State { | |||
353 | int32_t framedepth; /* Current frame depth. */ | 430 | int32_t framedepth; /* Current frame depth. */ |
354 | int32_t retdepth; /* Return frame depth (count of RETF). */ | 431 | int32_t retdepth; /* Return frame depth (count of RETF). */ |
355 | 432 | ||
356 | MRef k64; /* Pointer to chained array of 64 bit constants. */ | ||
357 | TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ | 433 | TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ |
434 | TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */ | ||
435 | uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */ | ||
358 | 436 | ||
359 | IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ | 437 | IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ |
360 | IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ | 438 | IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ |
@@ -367,13 +445,15 @@ typedef struct jit_State { | |||
367 | MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ | 445 | MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ |
368 | 446 | ||
369 | PostProc postproc; /* Required post-processing after execution. */ | 447 | PostProc postproc; /* Required post-processing after execution. */ |
370 | #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) | 448 | #if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) |
371 | int needsplit; /* Need SPLIT pass. */ | 449 | uint8_t needsplit; /* Need SPLIT pass. */ |
372 | #endif | 450 | #endif |
451 | uint8_t retryrec; /* Retry recording. */ | ||
373 | 452 | ||
374 | GCRef *trace; /* Array of traces. */ | 453 | GCRef *trace; /* Array of traces. */ |
375 | TraceNo freetrace; /* Start of scan for next free trace. */ | 454 | TraceNo freetrace; /* Start of scan for next free trace. */ |
376 | MSize sizetrace; /* Size of trace array. */ | 455 | MSize sizetrace; /* Size of trace array. */ |
456 | IRRef1 ktrace; /* Reference to KGC with GCtrace. */ | ||
377 | 457 | ||
378 | IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ | 458 | IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ |
379 | TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ | 459 | TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ |
@@ -386,6 +466,10 @@ typedef struct jit_State { | |||
386 | uint32_t penaltyslot; /* Round-robin index into penalty slots. */ | 466 | uint32_t penaltyslot; /* Round-robin index into penalty slots. */ |
387 | uint32_t prngstate; /* PRNG state. */ | 467 | uint32_t prngstate; /* PRNG state. */ |
388 | 468 | ||
469 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
470 | RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */ | ||
471 | #endif | ||
472 | |||
389 | BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ | 473 | BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ |
390 | uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ | 474 | uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ |
391 | 475 | ||
@@ -406,6 +490,12 @@ typedef struct jit_State { | |||
406 | size_t szallmcarea; /* Total size of all allocated mcode areas. */ | 490 | size_t szallmcarea; /* Total size of all allocated mcode areas. */ |
407 | 491 | ||
408 | TValue errinfo; /* Additional info element for trace errors. */ | 492 | TValue errinfo; /* Additional info element for trace errors. */ |
493 | |||
494 | #if LJ_HASPROFILE | ||
495 | GCproto *prev_pt; /* Previous prototype. */ | ||
496 | BCLine prev_line; /* Previous line. */ | ||
497 | int prof_mode; /* Profiling mode: 0, 'f', 'l'. */ | ||
498 | #endif | ||
409 | } | 499 | } |
410 | #if LJ_TARGET_ARM | 500 | #if LJ_TARGET_ARM |
411 | LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ | 501 | LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ |