diff options
Diffstat (limited to 'src/lj_jit.h')
-rw-r--r-- | src/lj_jit.h | 122 |
1 files changed, 102 insertions, 20 deletions
diff --git a/src/lj_jit.h b/src/lj_jit.h index 0bc62583..5d41ef4b 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
@@ -14,18 +14,16 @@ | |||
14 | 14 | ||
15 | /* CPU-specific JIT engine flags. */ | 15 | /* CPU-specific JIT engine flags. */ |
16 | #if LJ_TARGET_X86ORX64 | 16 | #if LJ_TARGET_X86ORX64 |
17 | #define JIT_F_CMOV 0x00000010 | 17 | #define JIT_F_SSE2 0x00000010 |
18 | #define JIT_F_SSE2 0x00000020 | 18 | #define JIT_F_SSE3 0x00000020 |
19 | #define JIT_F_SSE3 0x00000040 | 19 | #define JIT_F_SSE4_1 0x00000040 |
20 | #define JIT_F_SSE4_1 0x00000080 | 20 | #define JIT_F_PREFER_IMUL 0x00000080 |
21 | #define JIT_F_P4 0x00000100 | 21 | #define JIT_F_LEA_AGU 0x00000100 |
22 | #define JIT_F_PREFER_IMUL 0x00000200 | 22 | #define JIT_F_BMI2 0x00000200 |
23 | #define JIT_F_SPLIT_XMM 0x00000400 | ||
24 | #define JIT_F_LEA_AGU 0x00000800 | ||
25 | 23 | ||
26 | /* Names for the CPU-specific flags. Must match the order above. */ | 24 | /* Names for the CPU-specific flags. Must match the order above. */ |
27 | #define JIT_F_CPU_FIRST JIT_F_CMOV | 25 | #define JIT_F_CPU_FIRST JIT_F_SSE2 |
28 | #define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM" | 26 | #define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2" |
29 | #elif LJ_TARGET_ARM | 27 | #elif LJ_TARGET_ARM |
30 | #define JIT_F_ARMV6_ 0x00000010 | 28 | #define JIT_F_ARMV6_ 0x00000010 |
31 | #define JIT_F_ARMV6T2_ 0x00000020 | 29 | #define JIT_F_ARMV6T2_ 0x00000020 |
@@ -48,12 +46,16 @@ | |||
48 | #define JIT_F_CPU_FIRST JIT_F_SQRT | 46 | #define JIT_F_CPU_FIRST JIT_F_SQRT |
49 | #define JIT_F_CPUSTRING "\4SQRT\5ROUND" | 47 | #define JIT_F_CPUSTRING "\4SQRT\5ROUND" |
50 | #elif LJ_TARGET_MIPS | 48 | #elif LJ_TARGET_MIPS |
51 | #define JIT_F_MIPS32R2 0x00000010 | 49 | #define JIT_F_MIPSXXR2 0x00000010 |
52 | 50 | ||
53 | /* Names for the CPU-specific flags. Must match the order above. */ | 51 | /* Names for the CPU-specific flags. Must match the order above. */ |
54 | #define JIT_F_CPU_FIRST JIT_F_MIPS32R2 | 52 | #define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 |
53 | #if LJ_TARGET_MIPS32 | ||
55 | #define JIT_F_CPUSTRING "\010MIPS32R2" | 54 | #define JIT_F_CPUSTRING "\010MIPS32R2" |
56 | #else | 55 | #else |
56 | #define JIT_F_CPUSTRING "\010MIPS64R2" | ||
57 | #endif | ||
58 | #else | ||
57 | #define JIT_F_CPU_FIRST 0 | 59 | #define JIT_F_CPU_FIRST 0 |
58 | #define JIT_F_CPUSTRING "" | 60 | #define JIT_F_CPUSTRING "" |
59 | #endif | 61 | #endif |
@@ -100,6 +102,7 @@ | |||
100 | _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ | 102 | _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ |
101 | _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ | 103 | _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ |
102 | _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ | 104 | _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ |
105 | _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \ | ||
103 | \ | 106 | \ |
104 | _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ | 107 | _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ |
105 | _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ | 108 | _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ |
@@ -186,14 +189,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); | |||
186 | #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) | 189 | #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) |
187 | #define SNAP_TR(slot, tr) \ | 190 | #define SNAP_TR(slot, tr) \ |
188 | (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) | 191 | (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) |
192 | #if !LJ_FR2 | ||
189 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) | 193 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) |
194 | #endif | ||
190 | #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) | 195 | #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) |
191 | #define snap_ref(sn) ((sn) & 0xffff) | 196 | #define snap_ref(sn) ((sn) & 0xffff) |
192 | #define snap_slot(sn) ((BCReg)((sn) >> 24)) | 197 | #define snap_slot(sn) ((BCReg)((sn) >> 24)) |
193 | #define snap_isframe(sn) ((sn) & SNAP_FRAME) | 198 | #define snap_isframe(sn) ((sn) & SNAP_FRAME) |
194 | #define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn)) | ||
195 | #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) | 199 | #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) |
196 | 200 | ||
201 | static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn) | ||
202 | { | ||
203 | #if LJ_FR2 | ||
204 | uint64_t pcbase; | ||
205 | memcpy(&pcbase, sn, sizeof(uint64_t)); | ||
206 | return (const BCIns *)(pcbase >> 8); | ||
207 | #else | ||
208 | return (const BCIns *)(uintptr_t)*sn; | ||
209 | #endif | ||
210 | } | ||
211 | |||
197 | /* Snapshot and exit numbers. */ | 212 | /* Snapshot and exit numbers. */ |
198 | typedef uint32_t SnapNo; | 213 | typedef uint32_t SnapNo; |
199 | typedef uint32_t ExitNo; | 214 | typedef uint32_t ExitNo; |
@@ -211,7 +226,8 @@ typedef enum { | |||
211 | LJ_TRLINK_UPREC, /* Up-recursion. */ | 226 | LJ_TRLINK_UPREC, /* Up-recursion. */ |
212 | LJ_TRLINK_DOWNREC, /* Down-recursion. */ | 227 | LJ_TRLINK_DOWNREC, /* Down-recursion. */ |
213 | LJ_TRLINK_INTERP, /* Fallback to interpreter. */ | 228 | LJ_TRLINK_INTERP, /* Fallback to interpreter. */ |
214 | LJ_TRLINK_RETURN /* Return to interpreter. */ | 229 | LJ_TRLINK_RETURN, /* Return to interpreter. */ |
230 | LJ_TRLINK_STITCH /* Trace stitching. */ | ||
215 | } TraceLink; | 231 | } TraceLink; |
216 | 232 | ||
217 | /* Trace object. */ | 233 | /* Trace object. */ |
@@ -219,6 +235,9 @@ typedef struct GCtrace { | |||
219 | GCHeader; | 235 | GCHeader; |
220 | uint16_t nsnap; /* Number of snapshots. */ | 236 | uint16_t nsnap; /* Number of snapshots. */ |
221 | IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ | 237 | IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ |
238 | #if LJ_GC64 | ||
239 | uint32_t unused_gc64; | ||
240 | #endif | ||
222 | GCRef gclist; | 241 | GCRef gclist; |
223 | IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ | 242 | IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ |
224 | IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ | 243 | IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ |
@@ -294,6 +313,16 @@ typedef struct ScEvEntry { | |||
294 | uint8_t dir; /* Direction. 1: +, 0: -. */ | 313 | uint8_t dir; /* Direction. 1: +, 0: -. */ |
295 | } ScEvEntry; | 314 | } ScEvEntry; |
296 | 315 | ||
316 | /* Reverse bytecode map (IRRef -> PC). Only for selected instructions. */ | ||
317 | typedef struct RBCHashEntry { | ||
318 | MRef pc; /* Bytecode PC. */ | ||
319 | GCRef pt; /* Prototype. */ | ||
320 | IRRef ref; /* IR reference. */ | ||
321 | } RBCHashEntry; | ||
322 | |||
323 | /* Number of slots in the reverse bytecode hash table. Must be a power of 2. */ | ||
324 | #define RBCHASH_SLOTS 8 | ||
325 | |||
297 | /* 128 bit SIMD constants. */ | 326 | /* 128 bit SIMD constants. */ |
298 | enum { | 327 | enum { |
299 | LJ_KSIMD_ABS, | 328 | LJ_KSIMD_ABS, |
@@ -301,12 +330,51 @@ enum { | |||
301 | LJ_KSIMD__MAX | 330 | LJ_KSIMD__MAX |
302 | }; | 331 | }; |
303 | 332 | ||
333 | enum { | ||
334 | #if LJ_TARGET_X86ORX64 | ||
335 | LJ_K64_TOBIT, /* 2^52 + 2^51 */ | ||
336 | LJ_K64_2P64, /* 2^64 */ | ||
337 | LJ_K64_M2P64, /* -2^64 */ | ||
338 | #if LJ_32 | ||
339 | LJ_K64_M2P64_31, /* -2^64 or -2^31 */ | ||
340 | #else | ||
341 | LJ_K64_M2P64_31 = LJ_K64_M2P64, | ||
342 | #endif | ||
343 | #endif | ||
344 | #if LJ_TARGET_MIPS | ||
345 | LJ_K64_2P31, /* 2^31 */ | ||
346 | #if LJ_64 | ||
347 | LJ_K64_2P63, /* 2^63 */ | ||
348 | LJ_K64_M2P64, /* -2^64 */ | ||
349 | #endif | ||
350 | #endif | ||
351 | LJ_K64__MAX, | ||
352 | }; | ||
353 | |||
354 | enum { | ||
355 | #if LJ_TARGET_X86ORX64 | ||
356 | LJ_K32_M2P64_31, /* -2^64 or -2^31 */ | ||
357 | #endif | ||
358 | #if LJ_TARGET_PPC | ||
359 | LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ | ||
360 | LJ_K32_2P52, /* 2^52 */ | ||
361 | #endif | ||
362 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | ||
363 | LJ_K32_2P31, /* 2^31 */ | ||
364 | #endif | ||
365 | #if LJ_TARGET_MIPS64 | ||
366 | LJ_K32_2P63, /* 2^63 */ | ||
367 | LJ_K32_M2P64, /* -2^64 */ | ||
368 | #endif | ||
369 | LJ_K32__MAX | ||
370 | }; | ||
371 | |||
304 | /* Get 16 byte aligned pointer to SIMD constant. */ | 372 | /* Get 16 byte aligned pointer to SIMD constant. */ |
305 | #define LJ_KSIMD(J, n) \ | 373 | #define LJ_KSIMD(J, n) \ |
306 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) | 374 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) |
307 | 375 | ||
308 | /* Set/reset flag to activate the SPLIT pass for the current trace. */ | 376 | /* Set/reset flag to activate the SPLIT pass for the current trace. */ |
309 | #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) | 377 | #if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) |
310 | #define lj_needsplit(J) (J->needsplit = 1) | 378 | #define lj_needsplit(J) (J->needsplit = 1) |
311 | #define lj_resetsplit(J) (J->needsplit = 0) | 379 | #define lj_resetsplit(J) (J->needsplit = 0) |
312 | #else | 380 | #else |
@@ -317,13 +385,14 @@ enum { | |||
317 | /* Fold state is used to fold instructions on-the-fly. */ | 385 | /* Fold state is used to fold instructions on-the-fly. */ |
318 | typedef struct FoldState { | 386 | typedef struct FoldState { |
319 | IRIns ins; /* Currently emitted instruction. */ | 387 | IRIns ins; /* Currently emitted instruction. */ |
320 | IRIns left; /* Instruction referenced by left operand. */ | 388 | IRIns left[2]; /* Instruction referenced by left operand. */ |
321 | IRIns right; /* Instruction referenced by right operand. */ | 389 | IRIns right[2]; /* Instruction referenced by right operand. */ |
322 | } FoldState; | 390 | } FoldState; |
323 | 391 | ||
324 | /* JIT compiler state. */ | 392 | /* JIT compiler state. */ |
325 | typedef struct jit_State { | 393 | typedef struct jit_State { |
326 | GCtrace cur; /* Current trace. */ | 394 | GCtrace cur; /* Current trace. */ |
395 | GCtrace *curfinal; /* Final address of current trace (set during asm). */ | ||
327 | 396 | ||
328 | lua_State *L; /* Current Lua state. */ | 397 | lua_State *L; /* Current Lua state. */ |
329 | const BCIns *pc; /* Current PC. */ | 398 | const BCIns *pc; /* Current PC. */ |
@@ -353,8 +422,9 @@ typedef struct jit_State { | |||
353 | int32_t framedepth; /* Current frame depth. */ | 422 | int32_t framedepth; /* Current frame depth. */ |
354 | int32_t retdepth; /* Return frame depth (count of RETF). */ | 423 | int32_t retdepth; /* Return frame depth (count of RETF). */ |
355 | 424 | ||
356 | MRef k64; /* Pointer to chained array of 64 bit constants. */ | ||
357 | TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ | 425 | TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ |
426 | TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */ | ||
427 | uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */ | ||
358 | 428 | ||
359 | IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ | 429 | IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ |
360 | IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ | 430 | IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ |
@@ -367,13 +437,15 @@ typedef struct jit_State { | |||
367 | MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ | 437 | MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ |
368 | 438 | ||
369 | PostProc postproc; /* Required post-processing after execution. */ | 439 | PostProc postproc; /* Required post-processing after execution. */ |
370 | #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) | 440 | #if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) |
371 | int needsplit; /* Need SPLIT pass. */ | 441 | uint8_t needsplit; /* Need SPLIT pass. */ |
372 | #endif | 442 | #endif |
443 | uint8_t retryrec; /* Retry recording. */ | ||
373 | 444 | ||
374 | GCRef *trace; /* Array of traces. */ | 445 | GCRef *trace; /* Array of traces. */ |
375 | TraceNo freetrace; /* Start of scan for next free trace. */ | 446 | TraceNo freetrace; /* Start of scan for next free trace. */ |
376 | MSize sizetrace; /* Size of trace array. */ | 447 | MSize sizetrace; /* Size of trace array. */ |
448 | IRRef1 ktrace; /* Reference to KGC with GCtrace. */ | ||
377 | 449 | ||
378 | IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ | 450 | IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ |
379 | TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ | 451 | TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ |
@@ -386,6 +458,10 @@ typedef struct jit_State { | |||
386 | uint32_t penaltyslot; /* Round-robin index into penalty slots. */ | 458 | uint32_t penaltyslot; /* Round-robin index into penalty slots. */ |
387 | uint32_t prngstate; /* PRNG state. */ | 459 | uint32_t prngstate; /* PRNG state. */ |
388 | 460 | ||
461 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
462 | RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */ | ||
463 | #endif | ||
464 | |||
389 | BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ | 465 | BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ |
390 | uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ | 466 | uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ |
391 | 467 | ||
@@ -406,6 +482,12 @@ typedef struct jit_State { | |||
406 | size_t szallmcarea; /* Total size of all allocated mcode areas. */ | 482 | size_t szallmcarea; /* Total size of all allocated mcode areas. */ |
407 | 483 | ||
408 | TValue errinfo; /* Additional info element for trace errors. */ | 484 | TValue errinfo; /* Additional info element for trace errors. */ |
485 | |||
486 | #if LJ_HASPROFILE | ||
487 | GCproto *prev_pt; /* Previous prototype. */ | ||
488 | BCLine prev_line; /* Previous line. */ | ||
489 | int prof_mode; /* Profiling mode: 0, 'f', 'l'. */ | ||
490 | #endif | ||
409 | } | 491 | } |
410 | #if LJ_TARGET_ARM | 492 | #if LJ_TARGET_ARM |
411 | LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ | 493 | LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ |