aboutsummaryrefslogtreecommitdiff
path: root/src/lj_jit.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_jit.h')
-rw-r--r--src/lj_jit.h130
1 files changed, 110 insertions, 20 deletions
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 0e1c4827..f179f17f 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -14,18 +14,16 @@
14 14
15/* CPU-specific JIT engine flags. */ 15/* CPU-specific JIT engine flags. */
16#if LJ_TARGET_X86ORX64 16#if LJ_TARGET_X86ORX64
17#define JIT_F_CMOV 0x00000010 17#define JIT_F_SSE2 0x00000010
18#define JIT_F_SSE2 0x00000020 18#define JIT_F_SSE3 0x00000020
19#define JIT_F_SSE3 0x00000040 19#define JIT_F_SSE4_1 0x00000040
20#define JIT_F_SSE4_1 0x00000080 20#define JIT_F_PREFER_IMUL 0x00000080
21#define JIT_F_P4 0x00000100 21#define JIT_F_LEA_AGU 0x00000100
22#define JIT_F_PREFER_IMUL 0x00000200 22#define JIT_F_BMI2 0x00000200
23#define JIT_F_SPLIT_XMM 0x00000400
24#define JIT_F_LEA_AGU 0x00000800
25 23
26/* Names for the CPU-specific flags. Must match the order above. */ 24/* Names for the CPU-specific flags. Must match the order above. */
27#define JIT_F_CPU_FIRST JIT_F_CMOV 25#define JIT_F_CPU_FIRST JIT_F_SSE2
28#define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM" 26#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2"
29#elif LJ_TARGET_ARM 27#elif LJ_TARGET_ARM
30#define JIT_F_ARMV6_ 0x00000010 28#define JIT_F_ARMV6_ 0x00000010
31#define JIT_F_ARMV6T2_ 0x00000020 29#define JIT_F_ARMV6T2_ 0x00000020
@@ -48,11 +46,23 @@
48#define JIT_F_CPU_FIRST JIT_F_SQRT 46#define JIT_F_CPU_FIRST JIT_F_SQRT
49#define JIT_F_CPUSTRING "\4SQRT\5ROUND" 47#define JIT_F_CPUSTRING "\4SQRT\5ROUND"
50#elif LJ_TARGET_MIPS 48#elif LJ_TARGET_MIPS
51#define JIT_F_MIPS32R2 0x00000010 49#define JIT_F_MIPSXXR2 0x00000010
52 50
53/* Names for the CPU-specific flags. Must match the order above. */ 51/* Names for the CPU-specific flags. Must match the order above. */
54#define JIT_F_CPU_FIRST JIT_F_MIPS32R2 52#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2
53#if LJ_TARGET_MIPS32
54#if LJ_TARGET_MIPSR6
55#define JIT_F_CPUSTRING "\010MIPS32R6"
56#else
55#define JIT_F_CPUSTRING "\010MIPS32R2" 57#define JIT_F_CPUSTRING "\010MIPS32R2"
58#endif
59#else
60#if LJ_TARGET_MIPSR6
61#define JIT_F_CPUSTRING "\010MIPS64R6"
62#else
63#define JIT_F_CPUSTRING "\010MIPS64R2"
64#endif
65#endif
56#else 66#else
57#define JIT_F_CPU_FIRST 0 67#define JIT_F_CPU_FIRST 0
58#define JIT_F_CPUSTRING "" 68#define JIT_F_CPUSTRING ""
@@ -100,6 +110,7 @@
100 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ 110 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \
101 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ 111 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \
102 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ 112 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \
113 _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \
103 \ 114 \
104 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ 115 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \
105 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ 116 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \
@@ -186,14 +197,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
186#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) 197#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref))
187#define SNAP_TR(slot, tr) \ 198#define SNAP_TR(slot, tr) \
188 (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) 199 (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
200#if !LJ_FR2
189#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) 201#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
202#endif
190#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) 203#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz))
191#define snap_ref(sn) ((sn) & 0xffff) 204#define snap_ref(sn) ((sn) & 0xffff)
192#define snap_slot(sn) ((BCReg)((sn) >> 24)) 205#define snap_slot(sn) ((BCReg)((sn) >> 24))
193#define snap_isframe(sn) ((sn) & SNAP_FRAME) 206#define snap_isframe(sn) ((sn) & SNAP_FRAME)
194#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn))
195#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) 207#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref))
196 208
209static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn)
210{
211#if LJ_FR2
212 uint64_t pcbase;
213 memcpy(&pcbase, sn, sizeof(uint64_t));
214 return (const BCIns *)(pcbase >> 8);
215#else
216 return (const BCIns *)(uintptr_t)*sn;
217#endif
218}
219
197/* Snapshot and exit numbers. */ 220/* Snapshot and exit numbers. */
198typedef uint32_t SnapNo; 221typedef uint32_t SnapNo;
199typedef uint32_t ExitNo; 222typedef uint32_t ExitNo;
@@ -211,7 +234,8 @@ typedef enum {
211 LJ_TRLINK_UPREC, /* Up-recursion. */ 234 LJ_TRLINK_UPREC, /* Up-recursion. */
212 LJ_TRLINK_DOWNREC, /* Down-recursion. */ 235 LJ_TRLINK_DOWNREC, /* Down-recursion. */
213 LJ_TRLINK_INTERP, /* Fallback to interpreter. */ 236 LJ_TRLINK_INTERP, /* Fallback to interpreter. */
214 LJ_TRLINK_RETURN /* Return to interpreter. */ 237 LJ_TRLINK_RETURN, /* Return to interpreter. */
238 LJ_TRLINK_STITCH /* Trace stitching. */
215} TraceLink; 239} TraceLink;
216 240
217/* Trace object. */ 241/* Trace object. */
@@ -219,6 +243,9 @@ typedef struct GCtrace {
219 GCHeader; 243 GCHeader;
220 uint16_t nsnap; /* Number of snapshots. */ 244 uint16_t nsnap; /* Number of snapshots. */
221 IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ 245 IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */
246#if LJ_GC64
247 uint32_t unused_gc64;
248#endif
222 GCRef gclist; 249 GCRef gclist;
223 IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ 250 IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */
224 IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ 251 IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */
@@ -294,6 +321,16 @@ typedef struct ScEvEntry {
294 uint8_t dir; /* Direction. 1: +, 0: -. */ 321 uint8_t dir; /* Direction. 1: +, 0: -. */
295} ScEvEntry; 322} ScEvEntry;
296 323
324/* Reverse bytecode map (IRRef -> PC). Only for selected instructions. */
325typedef struct RBCHashEntry {
326 MRef pc; /* Bytecode PC. */
327 GCRef pt; /* Prototype. */
328 IRRef ref; /* IR reference. */
329} RBCHashEntry;
330
331/* Number of slots in the reverse bytecode hash table. Must be a power of 2. */
332#define RBCHASH_SLOTS 8
333
297/* 128 bit SIMD constants. */ 334/* 128 bit SIMD constants. */
298enum { 335enum {
299 LJ_KSIMD_ABS, 336 LJ_KSIMD_ABS,
@@ -301,12 +338,51 @@ enum {
301 LJ_KSIMD__MAX 338 LJ_KSIMD__MAX
302}; 339};
303 340
341enum {
342#if LJ_TARGET_X86ORX64
343 LJ_K64_TOBIT, /* 2^52 + 2^51 */
344 LJ_K64_2P64, /* 2^64 */
345 LJ_K64_M2P64, /* -2^64 */
346#if LJ_32
347 LJ_K64_M2P64_31, /* -2^64 or -2^31 */
348#else
349 LJ_K64_M2P64_31 = LJ_K64_M2P64,
350#endif
351#endif
352#if LJ_TARGET_MIPS
353 LJ_K64_2P31, /* 2^31 */
354#if LJ_64
355 LJ_K64_2P63, /* 2^63 */
356 LJ_K64_M2P64, /* -2^64 */
357#endif
358#endif
359 LJ_K64__MAX,
360};
361
362enum {
363#if LJ_TARGET_X86ORX64
364 LJ_K32_M2P64_31, /* -2^64 or -2^31 */
365#endif
366#if LJ_TARGET_PPC
367 LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
368 LJ_K32_2P52, /* 2^52 */
369#endif
370#if LJ_TARGET_PPC || LJ_TARGET_MIPS
371 LJ_K32_2P31, /* 2^31 */
372#endif
373#if LJ_TARGET_MIPS64
374 LJ_K32_2P63, /* 2^63 */
375 LJ_K32_M2P64, /* -2^64 */
376#endif
377 LJ_K32__MAX
378};
379
304/* Get 16 byte aligned pointer to SIMD constant. */ 380/* Get 16 byte aligned pointer to SIMD constant. */
305#define LJ_KSIMD(J, n) \ 381#define LJ_KSIMD(J, n) \
306 ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) 382 ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
307 383
308/* Set/reset flag to activate the SPLIT pass for the current trace. */ 384/* Set/reset flag to activate the SPLIT pass for the current trace. */
309#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) 385#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
310#define lj_needsplit(J) (J->needsplit = 1) 386#define lj_needsplit(J) (J->needsplit = 1)
311#define lj_resetsplit(J) (J->needsplit = 0) 387#define lj_resetsplit(J) (J->needsplit = 0)
312#else 388#else
@@ -317,13 +393,14 @@ enum {
317/* Fold state is used to fold instructions on-the-fly. */ 393/* Fold state is used to fold instructions on-the-fly. */
318typedef struct FoldState { 394typedef struct FoldState {
319 IRIns ins; /* Currently emitted instruction. */ 395 IRIns ins; /* Currently emitted instruction. */
320 IRIns left; /* Instruction referenced by left operand. */ 396 IRIns left[2]; /* Instruction referenced by left operand. */
321 IRIns right; /* Instruction referenced by right operand. */ 397 IRIns right[2]; /* Instruction referenced by right operand. */
322} FoldState; 398} FoldState;
323 399
324/* JIT compiler state. */ 400/* JIT compiler state. */
325typedef struct jit_State { 401typedef struct jit_State {
326 GCtrace cur; /* Current trace. */ 402 GCtrace cur; /* Current trace. */
403 GCtrace *curfinal; /* Final address of current trace (set during asm). */
327 404
328 lua_State *L; /* Current Lua state. */ 405 lua_State *L; /* Current Lua state. */
329 const BCIns *pc; /* Current PC. */ 406 const BCIns *pc; /* Current PC. */
@@ -353,8 +430,9 @@ typedef struct jit_State {
353 int32_t framedepth; /* Current frame depth. */ 430 int32_t framedepth; /* Current frame depth. */
354 int32_t retdepth; /* Return frame depth (count of RETF). */ 431 int32_t retdepth; /* Return frame depth (count of RETF). */
355 432
356 MRef k64; /* Pointer to chained array of 64 bit constants. */
357 TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ 433 TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */
434 TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */
435 uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */
358 436
359 IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ 437 IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
360 IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ 438 IRRef irtoplim; /* Upper limit of instuction buffer (biased). */
@@ -367,13 +445,15 @@ typedef struct jit_State {
367 MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ 445 MSize sizesnapmap; /* Size of temp. snapshot map buffer. */
368 446
369 PostProc postproc; /* Required post-processing after execution. */ 447 PostProc postproc; /* Required post-processing after execution. */
370#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) 448#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
371 int needsplit; /* Need SPLIT pass. */ 449 uint8_t needsplit; /* Need SPLIT pass. */
372#endif 450#endif
451 uint8_t retryrec; /* Retry recording. */
373 452
374 GCRef *trace; /* Array of traces. */ 453 GCRef *trace; /* Array of traces. */
375 TraceNo freetrace; /* Start of scan for next free trace. */ 454 TraceNo freetrace; /* Start of scan for next free trace. */
376 MSize sizetrace; /* Size of trace array. */ 455 MSize sizetrace; /* Size of trace array. */
456 IRRef1 ktrace; /* Reference to KGC with GCtrace. */
377 457
378 IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ 458 IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
379 TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ 459 TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */
@@ -386,6 +466,10 @@ typedef struct jit_State {
386 uint32_t penaltyslot; /* Round-robin index into penalty slots. */ 466 uint32_t penaltyslot; /* Round-robin index into penalty slots. */
387 uint32_t prngstate; /* PRNG state. */ 467 uint32_t prngstate; /* PRNG state. */
388 468
469#ifdef LUAJIT_ENABLE_TABLE_BUMP
470 RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */
471#endif
472
389 BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ 473 BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */
390 uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ 474 uint32_t bpropslot; /* Round-robin index into bpropcache slots. */
391 475
@@ -406,6 +490,12 @@ typedef struct jit_State {
406 size_t szallmcarea; /* Total size of all allocated mcode areas. */ 490 size_t szallmcarea; /* Total size of all allocated mcode areas. */
407 491
408 TValue errinfo; /* Additional info element for trace errors. */ 492 TValue errinfo; /* Additional info element for trace errors. */
493
494#if LJ_HASPROFILE
495 GCproto *prev_pt; /* Previous prototype. */
496 BCLine prev_line; /* Previous line. */
497 int prof_mode; /* Profiling mode: 0, 'f', 'l'. */
498#endif
409} 499}
410#if LJ_TARGET_ARM 500#if LJ_TARGET_ARM
411LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ 501LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */