aboutsummaryrefslogtreecommitdiff
path: root/src/lj_jit.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_jit.h')
-rw-r--r--src/lj_jit.h217
1 files changed, 156 insertions, 61 deletions
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 4a4b0b1b..655b84c3 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -9,71 +9,85 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_ir.h" 10#include "lj_ir.h"
11 11
12/* JIT engine flags. */ 12/* -- JIT engine flags ---------------------------------------------------- */
13
14/* General JIT engine flags. 4 bits. */
13#define JIT_F_ON 0x00000001 15#define JIT_F_ON 0x00000001
14 16
15/* CPU-specific JIT engine flags. */ 17/* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */
18#define JIT_F_CPU 0x00000010
19
16#if LJ_TARGET_X86ORX64 20#if LJ_TARGET_X86ORX64
17#define JIT_F_CMOV 0x00000010 21
18#define JIT_F_SSE2 0x00000020 22#define JIT_F_SSE3 (JIT_F_CPU << 0)
19#define JIT_F_SSE3 0x00000040 23#define JIT_F_SSE4_1 (JIT_F_CPU << 1)
20#define JIT_F_SSE4_1 0x00000080 24#define JIT_F_BMI2 (JIT_F_CPU << 2)
21#define JIT_F_P4 0x00000100 25
22#define JIT_F_PREFER_IMUL 0x00000200 26
23#define JIT_F_SPLIT_XMM 0x00000400 27#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2"
24#define JIT_F_LEA_AGU 0x00000800 28
25
26/* Names for the CPU-specific flags. Must match the order above. */
27#define JIT_F_CPU_FIRST JIT_F_CMOV
28#define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM"
29#elif LJ_TARGET_ARM 29#elif LJ_TARGET_ARM
30#define JIT_F_ARMV6_ 0x00000010 30
31#define JIT_F_ARMV6T2_ 0x00000020 31#define JIT_F_ARMV6_ (JIT_F_CPU << 0)
32#define JIT_F_ARMV7 0x00000040 32#define JIT_F_ARMV6T2_ (JIT_F_CPU << 1)
33#define JIT_F_VFPV2 0x00000080 33#define JIT_F_ARMV7 (JIT_F_CPU << 2)
34#define JIT_F_VFPV3 0x00000100 34#define JIT_F_ARMV8 (JIT_F_CPU << 3)
35 35#define JIT_F_VFPV2 (JIT_F_CPU << 4)
36#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7) 36#define JIT_F_VFPV3 (JIT_F_CPU << 5)
37#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7) 37
38#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
39#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
38#define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) 40#define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3)
39 41
40/* Names for the CPU-specific flags. Must match the order above. */ 42#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3"
41#define JIT_F_CPU_FIRST JIT_F_ARMV6_ 43
42#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3"
43#elif LJ_TARGET_PPC 44#elif LJ_TARGET_PPC
44#define JIT_F_SQRT 0x00000010
45#define JIT_F_ROUND 0x00000020
46 45
47/* Names for the CPU-specific flags. Must match the order above. */ 46#define JIT_F_SQRT (JIT_F_CPU << 0)
48#define JIT_F_CPU_FIRST JIT_F_SQRT 47#define JIT_F_ROUND (JIT_F_CPU << 1)
48
49#define JIT_F_CPUSTRING "\4SQRT\5ROUND" 49#define JIT_F_CPUSTRING "\4SQRT\5ROUND"
50
50#elif LJ_TARGET_MIPS 51#elif LJ_TARGET_MIPS
51#define JIT_F_MIPS32R2 0x00000010
52 52
53/* Names for the CPU-specific flags. Must match the order above. */ 53#define JIT_F_MIPSXXR2 (JIT_F_CPU << 0)
54#define JIT_F_CPU_FIRST JIT_F_MIPS32R2 54
55#if LJ_TARGET_MIPS32
56#if LJ_TARGET_MIPSR6
57#define JIT_F_CPUSTRING "\010MIPS32R6"
58#else
55#define JIT_F_CPUSTRING "\010MIPS32R2" 59#define JIT_F_CPUSTRING "\010MIPS32R2"
60#endif
61#else
62#if LJ_TARGET_MIPSR6
63#define JIT_F_CPUSTRING "\010MIPS64R6"
56#else 64#else
57#define JIT_F_CPU_FIRST 0 65#define JIT_F_CPUSTRING "\010MIPS64R2"
66#endif
67#endif
68
69#else
70
58#define JIT_F_CPUSTRING "" 71#define JIT_F_CPUSTRING ""
72
59#endif 73#endif
60 74
61/* Optimization flags. */ 75/* Optimization flags. 12 bits. */
76#define JIT_F_OPT 0x00010000
62#define JIT_F_OPT_MASK 0x0fff0000 77#define JIT_F_OPT_MASK 0x0fff0000
63 78
64#define JIT_F_OPT_FOLD 0x00010000 79#define JIT_F_OPT_FOLD (JIT_F_OPT << 0)
65#define JIT_F_OPT_CSE 0x00020000 80#define JIT_F_OPT_CSE (JIT_F_OPT << 1)
66#define JIT_F_OPT_DCE 0x00040000 81#define JIT_F_OPT_DCE (JIT_F_OPT << 2)
67#define JIT_F_OPT_FWD 0x00080000 82#define JIT_F_OPT_FWD (JIT_F_OPT << 3)
68#define JIT_F_OPT_DSE 0x00100000 83#define JIT_F_OPT_DSE (JIT_F_OPT << 4)
69#define JIT_F_OPT_NARROW 0x00200000 84#define JIT_F_OPT_NARROW (JIT_F_OPT << 5)
70#define JIT_F_OPT_LOOP 0x00400000 85#define JIT_F_OPT_LOOP (JIT_F_OPT << 6)
71#define JIT_F_OPT_ABC 0x00800000 86#define JIT_F_OPT_ABC (JIT_F_OPT << 7)
72#define JIT_F_OPT_SINK 0x01000000 87#define JIT_F_OPT_SINK (JIT_F_OPT << 8)
73#define JIT_F_OPT_FUSE 0x02000000 88#define JIT_F_OPT_FUSE (JIT_F_OPT << 9)
74 89
75/* Optimizations names for -O. Must match the order above. */ 90/* Optimizations names for -O. Must match the order above. */
76#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD
77#define JIT_F_OPTSTRING \ 91#define JIT_F_OPTSTRING \
78 "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" 92 "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse"
79 93
@@ -85,6 +99,8 @@
85 JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) 99 JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE)
86#define JIT_F_OPT_DEFAULT JIT_F_OPT_3 100#define JIT_F_OPT_DEFAULT JIT_F_OPT_3
87 101
102/* -- JIT engine parameters ----------------------------------------------- */
103
88#if LJ_TARGET_WINDOWS || LJ_64 104#if LJ_TARGET_WINDOWS || LJ_64
89/* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ 105/* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */
90#define JIT_P_sizemcode_DEFAULT 64 106#define JIT_P_sizemcode_DEFAULT 64
@@ -100,6 +116,7 @@
100 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ 116 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \
101 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ 117 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \
102 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ 118 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \
119 _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \
103 \ 120 \
104 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ 121 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \
105 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ 122 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \
@@ -126,6 +143,8 @@ JIT_PARAMDEF(JIT_PARAMENUM)
126#define JIT_PARAMSTR(len, name, value) #len #name 143#define JIT_PARAMSTR(len, name, value) #len #name
127#define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) 144#define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR)
128 145
146/* -- JIT engine data structures ------------------------------------------ */
147
129/* Trace compiler state. */ 148/* Trace compiler state. */
130typedef enum { 149typedef enum {
131 LJ_TRACE_IDLE, /* Trace compiler idle. */ 150 LJ_TRACE_IDLE, /* Trace compiler idle. */
@@ -186,14 +205,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
186#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) 205#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref))
187#define SNAP_TR(slot, tr) \ 206#define SNAP_TR(slot, tr) \
188 (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) 207 (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
208#if !LJ_FR2
189#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) 209#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
210#endif
190#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) 211#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz))
191#define snap_ref(sn) ((sn) & 0xffff) 212#define snap_ref(sn) ((sn) & 0xffff)
192#define snap_slot(sn) ((BCReg)((sn) >> 24)) 213#define snap_slot(sn) ((BCReg)((sn) >> 24))
193#define snap_isframe(sn) ((sn) & SNAP_FRAME) 214#define snap_isframe(sn) ((sn) & SNAP_FRAME)
194#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn))
195#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) 215#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref))
196 216
217static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn)
218{
219#if LJ_FR2
220 uint64_t pcbase;
221 memcpy(&pcbase, sn, sizeof(uint64_t));
222 return (const BCIns *)(pcbase >> 8);
223#else
224 return (const BCIns *)(uintptr_t)*sn;
225#endif
226}
227
197/* Snapshot and exit numbers. */ 228/* Snapshot and exit numbers. */
198typedef uint32_t SnapNo; 229typedef uint32_t SnapNo;
199typedef uint32_t ExitNo; 230typedef uint32_t ExitNo;
@@ -211,7 +242,8 @@ typedef enum {
211 LJ_TRLINK_UPREC, /* Up-recursion. */ 242 LJ_TRLINK_UPREC, /* Up-recursion. */
212 LJ_TRLINK_DOWNREC, /* Down-recursion. */ 243 LJ_TRLINK_DOWNREC, /* Down-recursion. */
213 LJ_TRLINK_INTERP, /* Fallback to interpreter. */ 244 LJ_TRLINK_INTERP, /* Fallback to interpreter. */
214 LJ_TRLINK_RETURN /* Return to interpreter. */ 245 LJ_TRLINK_RETURN, /* Return to interpreter. */
246 LJ_TRLINK_STITCH /* Trace stitching. */
215} TraceLink; 247} TraceLink;
216 248
217/* Trace object. */ 249/* Trace object. */
@@ -219,6 +251,9 @@ typedef struct GCtrace {
219 GCHeader; 251 GCHeader;
220 uint16_t nsnap; /* Number of snapshots. */ 252 uint16_t nsnap; /* Number of snapshots. */
221 IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ 253 IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */
254#if LJ_GC64
255 uint32_t unused_gc64;
256#endif
222 GCRef gclist; 257 GCRef gclist;
223 IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ 258 IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */
224 IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ 259 IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */
@@ -294,6 +329,16 @@ typedef struct ScEvEntry {
294 uint8_t dir; /* Direction. 1: +, 0: -. */ 329 uint8_t dir; /* Direction. 1: +, 0: -. */
295} ScEvEntry; 330} ScEvEntry;
296 331
332/* Reverse bytecode map (IRRef -> PC). Only for selected instructions. */
333typedef struct RBCHashEntry {
334 MRef pc; /* Bytecode PC. */
335 GCRef pt; /* Prototype. */
336 IRRef ref; /* IR reference. */
337} RBCHashEntry;
338
339/* Number of slots in the reverse bytecode hash table. Must be a power of 2. */
340#define RBCHASH_SLOTS 8
341
297/* 128 bit SIMD constants. */ 342/* 128 bit SIMD constants. */
298enum { 343enum {
299 LJ_KSIMD_ABS, 344 LJ_KSIMD_ABS,
@@ -301,12 +346,51 @@ enum {
301 LJ_KSIMD__MAX 346 LJ_KSIMD__MAX
302}; 347};
303 348
349enum {
350#if LJ_TARGET_X86ORX64
351 LJ_K64_TOBIT, /* 2^52 + 2^51 */
352 LJ_K64_2P64, /* 2^64 */
353 LJ_K64_M2P64, /* -2^64 */
354#if LJ_32
355 LJ_K64_M2P64_31, /* -2^64 or -2^31 */
356#else
357 LJ_K64_M2P64_31 = LJ_K64_M2P64,
358#endif
359#endif
360#if LJ_TARGET_MIPS
361 LJ_K64_2P31, /* 2^31 */
362#if LJ_64
363 LJ_K64_2P63, /* 2^63 */
364 LJ_K64_M2P64, /* -2^64 */
365#endif
366#endif
367 LJ_K64__MAX,
368};
369
370enum {
371#if LJ_TARGET_X86ORX64
372 LJ_K32_M2P64_31, /* -2^64 or -2^31 */
373#endif
374#if LJ_TARGET_PPC
375 LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
376 LJ_K32_2P52, /* 2^52 */
377#endif
378#if LJ_TARGET_PPC || LJ_TARGET_MIPS
379 LJ_K32_2P31, /* 2^31 */
380#endif
381#if LJ_TARGET_MIPS64
382 LJ_K32_2P63, /* 2^63 */
383 LJ_K32_M2P64, /* -2^64 */
384#endif
385 LJ_K32__MAX
386};
387
304/* Get 16 byte aligned pointer to SIMD constant. */ 388/* Get 16 byte aligned pointer to SIMD constant. */
305#define LJ_KSIMD(J, n) \ 389#define LJ_KSIMD(J, n) \
306 ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) 390 ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
307 391
308/* Set/reset flag to activate the SPLIT pass for the current trace. */ 392/* Set/reset flag to activate the SPLIT pass for the current trace. */
309#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) 393#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
310#define lj_needsplit(J) (J->needsplit = 1) 394#define lj_needsplit(J) (J->needsplit = 1)
311#define lj_resetsplit(J) (J->needsplit = 0) 395#define lj_resetsplit(J) (J->needsplit = 0)
312#else 396#else
@@ -317,13 +401,14 @@ enum {
317/* Fold state is used to fold instructions on-the-fly. */ 401/* Fold state is used to fold instructions on-the-fly. */
318typedef struct FoldState { 402typedef struct FoldState {
319 IRIns ins; /* Currently emitted instruction. */ 403 IRIns ins; /* Currently emitted instruction. */
320 IRIns left; /* Instruction referenced by left operand. */ 404 IRIns left[2]; /* Instruction referenced by left operand. */
321 IRIns right; /* Instruction referenced by right operand. */ 405 IRIns right[2]; /* Instruction referenced by right operand. */
322} FoldState; 406} FoldState;
323 407
324/* JIT compiler state. */ 408/* JIT compiler state. */
325typedef struct jit_State { 409typedef struct jit_State {
326 GCtrace cur; /* Current trace. */ 410 GCtrace cur; /* Current trace. */
411 GCtrace *curfinal; /* Final address of current trace (set during asm). */
327 412
328 lua_State *L; /* Current Lua state. */ 413 lua_State *L; /* Current Lua state. */
329 const BCIns *pc; /* Current PC. */ 414 const BCIns *pc; /* Current PC. */
@@ -353,8 +438,9 @@ typedef struct jit_State {
353 int32_t framedepth; /* Current frame depth. */ 438 int32_t framedepth; /* Current frame depth. */
354 int32_t retdepth; /* Return frame depth (count of RETF). */ 439 int32_t retdepth; /* Return frame depth (count of RETF). */
355 440
356 MRef k64; /* Pointer to chained array of 64 bit constants. */ 441 uint32_t k32[LJ_K32__MAX]; /* Common 4 byte constants used by backends. */
357 TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ 442 TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */
443 TValue k64[LJ_K64__MAX]; /* Common 8 byte constants. */
358 444
359 IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ 445 IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
360 IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ 446 IRRef irtoplim; /* Upper limit of instuction buffer (biased). */
@@ -367,13 +453,15 @@ typedef struct jit_State {
367 MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ 453 MSize sizesnapmap; /* Size of temp. snapshot map buffer. */
368 454
369 PostProc postproc; /* Required post-processing after execution. */ 455 PostProc postproc; /* Required post-processing after execution. */
370#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) 456#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
371 int needsplit; /* Need SPLIT pass. */ 457 uint8_t needsplit; /* Need SPLIT pass. */
372#endif 458#endif
459 uint8_t retryrec; /* Retry recording. */
373 460
374 GCRef *trace; /* Array of traces. */ 461 GCRef *trace; /* Array of traces. */
375 TraceNo freetrace; /* Start of scan for next free trace. */ 462 TraceNo freetrace; /* Start of scan for next free trace. */
376 MSize sizetrace; /* Size of trace array. */ 463 MSize sizetrace; /* Size of trace array. */
464 IRRef1 ktrace; /* Reference to KGC with GCtrace. */
377 465
378 IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ 466 IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
379 TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ 467 TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */
@@ -384,7 +472,10 @@ typedef struct jit_State {
384 472
385 HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */ 473 HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */
386 uint32_t penaltyslot; /* Round-robin index into penalty slots. */ 474 uint32_t penaltyslot; /* Round-robin index into penalty slots. */
387 uint32_t prngstate; /* PRNG state. */ 475
476#ifdef LUAJIT_ENABLE_TABLE_BUMP
477 RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */
478#endif
388 479
389 BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ 480 BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */
390 uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ 481 uint32_t bpropslot; /* Round-robin index into bpropcache slots. */
@@ -406,14 +497,18 @@ typedef struct jit_State {
406 size_t szallmcarea; /* Total size of all allocated mcode areas. */ 497 size_t szallmcarea; /* Total size of all allocated mcode areas. */
407 498
408 TValue errinfo; /* Additional info element for trace errors. */ 499 TValue errinfo; /* Additional info element for trace errors. */
500
501#if LJ_HASPROFILE
502 GCproto *prev_pt; /* Previous prototype. */
503 BCLine prev_line; /* Previous line. */
504 int prof_mode; /* Profiling mode: 0, 'f', 'l'. */
505#endif
409} jit_State; 506} jit_State;
410 507
411/* Trivial PRNG e.g. used for penalty randomization. */ 508#ifdef LUA_USE_ASSERT
412static LJ_AINLINE uint32_t LJ_PRNG_BITS(jit_State *J, int bits) 509#define lj_assertJ(c, ...) lj_assertG_(J2G(J), (c), __VA_ARGS__)
413{ 510#else
414 /* Yes, this LCG is very weak, but that doesn't matter for our use case. */ 511#define lj_assertJ(c, ...) ((void)J)
415 J->prngstate = J->prngstate * 1103515245 + 12345; 512#endif
416 return J->prngstate >> (32-bits);
417}
418 513
419#endif 514#endif