diff options
Diffstat (limited to 'src/lj_jit.h')
-rw-r--r-- | src/lj_jit.h | 118 |
1 files changed, 100 insertions, 18 deletions
diff --git a/src/lj_jit.h b/src/lj_jit.h index a2e8fd92..92054e3d 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
@@ -14,18 +14,16 @@ | |||
14 | 14 | ||
15 | /* CPU-specific JIT engine flags. */ | 15 | /* CPU-specific JIT engine flags. */ |
16 | #if LJ_TARGET_X86ORX64 | 16 | #if LJ_TARGET_X86ORX64 |
17 | #define JIT_F_CMOV 0x00000010 | 17 | #define JIT_F_SSE2 0x00000010 |
18 | #define JIT_F_SSE2 0x00000020 | 18 | #define JIT_F_SSE3 0x00000020 |
19 | #define JIT_F_SSE3 0x00000040 | 19 | #define JIT_F_SSE4_1 0x00000040 |
20 | #define JIT_F_SSE4_1 0x00000080 | 20 | #define JIT_F_PREFER_IMUL 0x00000080 |
21 | #define JIT_F_P4 0x00000100 | 21 | #define JIT_F_LEA_AGU 0x00000100 |
22 | #define JIT_F_PREFER_IMUL 0x00000200 | 22 | #define JIT_F_BMI2 0x00000200 |
23 | #define JIT_F_SPLIT_XMM 0x00000400 | ||
24 | #define JIT_F_LEA_AGU 0x00000800 | ||
25 | 23 | ||
26 | /* Names for the CPU-specific flags. Must match the order above. */ | 24 | /* Names for the CPU-specific flags. Must match the order above. */ |
27 | #define JIT_F_CPU_FIRST JIT_F_CMOV | 25 | #define JIT_F_CPU_FIRST JIT_F_SSE2 |
28 | #define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM" | 26 | #define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2" |
29 | #elif LJ_TARGET_ARM | 27 | #elif LJ_TARGET_ARM |
30 | #define JIT_F_ARMV6_ 0x00000010 | 28 | #define JIT_F_ARMV6_ 0x00000010 |
31 | #define JIT_F_ARMV6T2_ 0x00000020 | 29 | #define JIT_F_ARMV6T2_ 0x00000020 |
@@ -48,12 +46,16 @@ | |||
48 | #define JIT_F_CPU_FIRST JIT_F_SQRT | 46 | #define JIT_F_CPU_FIRST JIT_F_SQRT |
49 | #define JIT_F_CPUSTRING "\4SQRT\5ROUND" | 47 | #define JIT_F_CPUSTRING "\4SQRT\5ROUND" |
50 | #elif LJ_TARGET_MIPS | 48 | #elif LJ_TARGET_MIPS |
51 | #define JIT_F_MIPS32R2 0x00000010 | 49 | #define JIT_F_MIPSXXR2 0x00000010 |
52 | 50 | ||
53 | /* Names for the CPU-specific flags. Must match the order above. */ | 51 | /* Names for the CPU-specific flags. Must match the order above. */ |
54 | #define JIT_F_CPU_FIRST JIT_F_MIPS32R2 | 52 | #define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 |
53 | #if LJ_TARGET_MIPS32 | ||
55 | #define JIT_F_CPUSTRING "\010MIPS32R2" | 54 | #define JIT_F_CPUSTRING "\010MIPS32R2" |
56 | #else | 55 | #else |
56 | #define JIT_F_CPUSTRING "\010MIPS64R2" | ||
57 | #endif | ||
58 | #else | ||
57 | #define JIT_F_CPU_FIRST 0 | 59 | #define JIT_F_CPU_FIRST 0 |
58 | #define JIT_F_CPUSTRING "" | 60 | #define JIT_F_CPUSTRING "" |
59 | #endif | 61 | #endif |
@@ -100,6 +102,7 @@ | |||
100 | _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ | 102 | _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ |
101 | _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ | 103 | _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ |
102 | _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ | 104 | _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ |
105 | _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \ | ||
103 | \ | 106 | \ |
104 | _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ | 107 | _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ |
105 | _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ | 108 | _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ |
@@ -180,14 +183,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); | |||
180 | #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) | 183 | #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) |
181 | #define SNAP_TR(slot, tr) \ | 184 | #define SNAP_TR(slot, tr) \ |
182 | (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) | 185 | (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) |
186 | #if !LJ_FR2 | ||
183 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) | 187 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) |
188 | #endif | ||
184 | #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) | 189 | #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) |
185 | #define snap_ref(sn) ((sn) & 0xffff) | 190 | #define snap_ref(sn) ((sn) & 0xffff) |
186 | #define snap_slot(sn) ((BCReg)((sn) >> 24)) | 191 | #define snap_slot(sn) ((BCReg)((sn) >> 24)) |
187 | #define snap_isframe(sn) ((sn) & SNAP_FRAME) | 192 | #define snap_isframe(sn) ((sn) & SNAP_FRAME) |
188 | #define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn)) | ||
189 | #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) | 193 | #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) |
190 | 194 | ||
195 | static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn) | ||
196 | { | ||
197 | #if LJ_FR2 | ||
198 | uint64_t pcbase; | ||
199 | memcpy(&pcbase, sn, sizeof(uint64_t)); | ||
200 | return (const BCIns *)(pcbase >> 8); | ||
201 | #else | ||
202 | return (const BCIns *)(uintptr_t)*sn; | ||
203 | #endif | ||
204 | } | ||
205 | |||
191 | /* Snapshot and exit numbers. */ | 206 | /* Snapshot and exit numbers. */ |
192 | typedef uint32_t SnapNo; | 207 | typedef uint32_t SnapNo; |
193 | typedef uint32_t ExitNo; | 208 | typedef uint32_t ExitNo; |
@@ -205,7 +220,8 @@ typedef enum { | |||
205 | LJ_TRLINK_UPREC, /* Up-recursion. */ | 220 | LJ_TRLINK_UPREC, /* Up-recursion. */ |
206 | LJ_TRLINK_DOWNREC, /* Down-recursion. */ | 221 | LJ_TRLINK_DOWNREC, /* Down-recursion. */ |
207 | LJ_TRLINK_INTERP, /* Fallback to interpreter. */ | 222 | LJ_TRLINK_INTERP, /* Fallback to interpreter. */ |
208 | LJ_TRLINK_RETURN /* Return to interpreter. */ | 223 | LJ_TRLINK_RETURN, /* Return to interpreter. */ |
224 | LJ_TRLINK_STITCH /* Trace stitching. */ | ||
209 | } TraceLink; | 225 | } TraceLink; |
210 | 226 | ||
211 | /* Trace object. */ | 227 | /* Trace object. */ |
@@ -214,6 +230,9 @@ typedef struct GCtrace { | |||
214 | uint8_t topslot; /* Top stack slot already checked to be allocated. */ | 230 | uint8_t topslot; /* Top stack slot already checked to be allocated. */ |
215 | uint8_t linktype; /* Type of link. */ | 231 | uint8_t linktype; /* Type of link. */ |
216 | IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ | 232 | IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ |
233 | #if LJ_GC64 | ||
234 | uint32_t unused_gc64; | ||
235 | #endif | ||
217 | GCRef gclist; | 236 | GCRef gclist; |
218 | IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ | 237 | IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ |
219 | IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ | 238 | IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ |
@@ -288,6 +307,16 @@ typedef struct ScEvEntry { | |||
288 | uint8_t dir; /* Direction. 1: +, 0: -. */ | 307 | uint8_t dir; /* Direction. 1: +, 0: -. */ |
289 | } ScEvEntry; | 308 | } ScEvEntry; |
290 | 309 | ||
310 | /* Reverse bytecode map (IRRef -> PC). Only for selected instructions. */ | ||
311 | typedef struct RBCHashEntry { | ||
312 | MRef pc; /* Bytecode PC. */ | ||
313 | GCRef pt; /* Prototype. */ | ||
314 | IRRef ref; /* IR reference. */ | ||
315 | } RBCHashEntry; | ||
316 | |||
317 | /* Number of slots in the reverse bytecode hash table. Must be a power of 2. */ | ||
318 | #define RBCHASH_SLOTS 8 | ||
319 | |||
291 | /* 128 bit SIMD constants. */ | 320 | /* 128 bit SIMD constants. */ |
292 | enum { | 321 | enum { |
293 | LJ_KSIMD_ABS, | 322 | LJ_KSIMD_ABS, |
@@ -295,6 +324,45 @@ enum { | |||
295 | LJ_KSIMD__MAX | 324 | LJ_KSIMD__MAX |
296 | }; | 325 | }; |
297 | 326 | ||
327 | enum { | ||
328 | #if LJ_TARGET_X86ORX64 | ||
329 | LJ_K64_TOBIT, /* 2^52 + 2^51 */ | ||
330 | LJ_K64_2P64, /* 2^64 */ | ||
331 | LJ_K64_M2P64, /* -2^64 */ | ||
332 | #if LJ_32 | ||
333 | LJ_K64_M2P64_31, /* -2^64 or -2^31 */ | ||
334 | #else | ||
335 | LJ_K64_M2P64_31 = LJ_K64_M2P64, | ||
336 | #endif | ||
337 | #endif | ||
338 | #if LJ_TARGET_MIPS | ||
339 | LJ_K64_2P31, /* 2^31 */ | ||
340 | #if LJ_64 | ||
341 | LJ_K64_2P63, /* 2^63 */ | ||
342 | LJ_K64_M2P64, /* -2^64 */ | ||
343 | #endif | ||
344 | #endif | ||
345 | LJ_K64__MAX, | ||
346 | }; | ||
347 | |||
348 | enum { | ||
349 | #if LJ_TARGET_X86ORX64 | ||
350 | LJ_K32_M2P64_31, /* -2^64 or -2^31 */ | ||
351 | #endif | ||
352 | #if LJ_TARGET_PPC | ||
353 | LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ | ||
354 | LJ_K32_2P52, /* 2^52 */ | ||
355 | #endif | ||
356 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | ||
357 | LJ_K32_2P31, /* 2^31 */ | ||
358 | #endif | ||
359 | #if LJ_TARGET_MIPS64 | ||
360 | LJ_K32_2P63, /* 2^63 */ | ||
361 | LJ_K32_M2P64, /* -2^64 */ | ||
362 | #endif | ||
363 | LJ_K32__MAX | ||
364 | }; | ||
365 | |||
298 | /* Get 16 byte aligned pointer to SIMD constant. */ | 366 | /* Get 16 byte aligned pointer to SIMD constant. */ |
299 | #define LJ_KSIMD(J, n) \ | 367 | #define LJ_KSIMD(J, n) \ |
300 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) | 368 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) |
@@ -311,13 +379,14 @@ enum { | |||
311 | /* Fold state is used to fold instructions on-the-fly. */ | 379 | /* Fold state is used to fold instructions on-the-fly. */ |
312 | typedef struct FoldState { | 380 | typedef struct FoldState { |
313 | IRIns ins; /* Currently emitted instruction. */ | 381 | IRIns ins; /* Currently emitted instruction. */ |
314 | IRIns left; /* Instruction referenced by left operand. */ | 382 | IRIns left[2]; /* Instruction referenced by left operand. */ |
315 | IRIns right; /* Instruction referenced by right operand. */ | 383 | IRIns right[2]; /* Instruction referenced by right operand. */ |
316 | } FoldState; | 384 | } FoldState; |
317 | 385 | ||
318 | /* JIT compiler state. */ | 386 | /* JIT compiler state. */ |
319 | typedef struct jit_State { | 387 | typedef struct jit_State { |
320 | GCtrace cur; /* Current trace. */ | 388 | GCtrace cur; /* Current trace. */ |
389 | GCtrace *curfinal; /* Final address of current trace (set during asm). */ | ||
321 | 390 | ||
322 | lua_State *L; /* Current Lua state. */ | 391 | lua_State *L; /* Current Lua state. */ |
323 | const BCIns *pc; /* Current PC. */ | 392 | const BCIns *pc; /* Current PC. */ |
@@ -347,8 +416,9 @@ typedef struct jit_State { | |||
347 | int32_t framedepth; /* Current frame depth. */ | 416 | int32_t framedepth; /* Current frame depth. */ |
348 | int32_t retdepth; /* Return frame depth (count of RETF). */ | 417 | int32_t retdepth; /* Return frame depth (count of RETF). */ |
349 | 418 | ||
350 | MRef k64; /* Pointer to chained array of 64 bit constants. */ | ||
351 | TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ | 419 | TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ |
420 | TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */ | ||
421 | uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */ | ||
352 | 422 | ||
353 | IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ | 423 | IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ |
354 | IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ | 424 | IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ |
@@ -362,12 +432,14 @@ typedef struct jit_State { | |||
362 | 432 | ||
363 | PostProc postproc; /* Required post-processing after execution. */ | 433 | PostProc postproc; /* Required post-processing after execution. */ |
364 | #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) | 434 | #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) |
365 | int needsplit; /* Need SPLIT pass. */ | 435 | uint8_t needsplit; /* Need SPLIT pass. */ |
366 | #endif | 436 | #endif |
437 | uint8_t retryrec; /* Retry recording. */ | ||
367 | 438 | ||
368 | GCRef *trace; /* Array of traces. */ | 439 | GCRef *trace; /* Array of traces. */ |
369 | TraceNo freetrace; /* Start of scan for next free trace. */ | 440 | TraceNo freetrace; /* Start of scan for next free trace. */ |
370 | MSize sizetrace; /* Size of trace array. */ | 441 | MSize sizetrace; /* Size of trace array. */ |
442 | IRRef1 ktrace; /* Reference to KGC with GCtrace. */ | ||
371 | 443 | ||
372 | IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ | 444 | IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ |
373 | TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ | 445 | TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ |
@@ -380,6 +452,10 @@ typedef struct jit_State { | |||
380 | uint32_t penaltyslot; /* Round-robin index into penalty slots. */ | 452 | uint32_t penaltyslot; /* Round-robin index into penalty slots. */ |
381 | uint32_t prngstate; /* PRNG state. */ | 453 | uint32_t prngstate; /* PRNG state. */ |
382 | 454 | ||
455 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
456 | RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */ | ||
457 | #endif | ||
458 | |||
383 | BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ | 459 | BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ |
384 | uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ | 460 | uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ |
385 | 461 | ||
@@ -400,6 +476,12 @@ typedef struct jit_State { | |||
400 | size_t szallmcarea; /* Total size of all allocated mcode areas. */ | 476 | size_t szallmcarea; /* Total size of all allocated mcode areas. */ |
401 | 477 | ||
402 | TValue errinfo; /* Additional info element for trace errors. */ | 478 | TValue errinfo; /* Additional info element for trace errors. */ |
479 | |||
480 | #if LJ_HASPROFILE | ||
481 | GCproto *prev_pt; /* Previous prototype. */ | ||
482 | BCLine prev_line; /* Previous line. */ | ||
483 | int prof_mode; /* Profiling mode: 0, 'f', 'l'. */ | ||
484 | #endif | ||
403 | } | 485 | } |
404 | #if LJ_TARGET_ARM | 486 | #if LJ_TARGET_ARM |
405 | LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ | 487 | LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ |