diff options
Diffstat (limited to 'src/lj_jit.h')
-rw-r--r-- | src/lj_jit.h | 279 |
1 files changed, 279 insertions, 0 deletions
diff --git a/src/lj_jit.h b/src/lj_jit.h new file mode 100644 index 00000000..280eff41 --- /dev/null +++ b/src/lj_jit.h | |||
@@ -0,0 +1,279 @@ | |||
1 | /* | ||
2 | ** Common definitions for the JIT compiler. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_JIT_H | ||
7 | #define _LJ_JIT_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_ir.h" | ||
11 | |||
12 | /* JIT engine flags. */ | ||
13 | #define JIT_F_ON 0x00000001 | ||
14 | |||
15 | /* CPU-specific JIT engine flags. */ | ||
16 | #if LJ_TARGET_X86ORX64 | ||
17 | #define JIT_F_CMOV 0x00000100 | ||
18 | #define JIT_F_SSE2 0x00000200 | ||
19 | #define JIT_F_SSE4_1 0x00000400 | ||
20 | #define JIT_F_P4 0x00000800 | ||
21 | #define JIT_F_PREFER_IMUL 0x00001000 | ||
22 | #define JIT_F_SPLIT_XMM 0x00002000 | ||
23 | #define JIT_F_LEA_AGU 0x00004000 | ||
24 | |||
25 | /* Names for the CPU-specific flags. Must match the order above. */ | ||
26 | #define JIT_F_CPU_FIRST JIT_F_CMOV | ||
27 | #define JIT_F_CPUSTRING "\4CMOV\4SSE2\6SSE4.1\2P4\3AMD\2K8\4ATOM" | ||
28 | #else | ||
29 | #error "Missing CPU-specific JIT engine flags" | ||
30 | #endif | ||
31 | |||
32 | /* Optimization flags. */ | ||
33 | #define JIT_F_OPT_MASK 0x00ff0000 | ||
34 | |||
35 | #define JIT_F_OPT_FOLD 0x00010000 | ||
36 | #define JIT_F_OPT_CSE 0x00020000 | ||
37 | #define JIT_F_OPT_DCE 0x00040000 | ||
38 | #define JIT_F_OPT_FWD 0x00080000 | ||
39 | #define JIT_F_OPT_DSE 0x00100000 | ||
40 | #define JIT_F_OPT_NARROW 0x00200000 | ||
41 | #define JIT_F_OPT_LOOP 0x00400000 | ||
42 | #define JIT_F_OPT_FUSE 0x00800000 | ||
43 | |||
44 | /* Optimizations names for -O. Must match the order above. */ | ||
45 | #define JIT_F_OPT_FIRST JIT_F_OPT_FOLD | ||
46 | #define JIT_F_OPTSTRING \ | ||
47 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\4fuse" | ||
48 | |||
49 | /* Optimization levels set a fixed combination of flags. */ | ||
50 | #define JIT_F_OPT_0 0 | ||
51 | #define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE) | ||
52 | #define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP) | ||
53 | #define JIT_F_OPT_3 (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_FUSE) | ||
54 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 | ||
55 | |||
56 | #ifdef LUA_USE_WIN | ||
57 | /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ | ||
58 | #define JIT_P_sizemcode_DEFAULT 64 | ||
59 | #else | ||
60 | /* Could go as low as 4K, but the mmap() overhead would be rather high. */ | ||
61 | #define JIT_P_sizemcode_DEFAULT 32 | ||
62 | #endif | ||
63 | |||
64 | /* Optimization parameters and their defaults. Length is a char in octal! */ | ||
65 | #define JIT_PARAMDEF(_) \ | ||
66 | _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \ | ||
67 | _(\011, maxrecord, 2000) /* Max. # of recorded IR instructions. */ \ | ||
68 | _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ | ||
69 | _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ | ||
70 | _(\007, maxsnap, 100) /* Max. # of snapshots for a trace. */ \ | ||
71 | \ | ||
72 | _(\007, hotloop, 57) /* # of iterations to detect a hot loop. */ \ | ||
73 | _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ | ||
74 | _(\007, tryside, 4) /* # of attempts to compile a side trace. */ \ | ||
75 | \ | ||
76 | _(\012, instunroll, 4) /* Max. unroll for instable loops. */ \ | ||
77 | _(\012, loopunroll, 7) /* Max. unroll for loop ops in side traces. */ \ | ||
78 | _(\012, callunroll, 3) /* Max. unroll for recursive calls. */ \ | ||
79 | _(\011, recunroll, 0) /* Max. unroll for true recursion. */ \ | ||
80 | \ | ||
81 | /* Size of each machine code area (in KBytes). */ \ | ||
82 | _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \ | ||
83 | /* Max. total size of all machine code areas (in KBytes). */ \ | ||
84 | _(\010, maxmcode, 512) \ | ||
85 | /* End of list. */ | ||
86 | |||
87 | enum { | ||
88 | #define JIT_PARAMENUM(len, name, value) JIT_P_##name, | ||
89 | JIT_PARAMDEF(JIT_PARAMENUM) | ||
90 | #undef JIT_PARAMENUM | ||
91 | JIT_P__MAX | ||
92 | }; | ||
93 | |||
94 | #define JIT_PARAMSTR(len, name, value) #len #name | ||
95 | #define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) | ||
96 | |||
97 | /* Trace compiler state. */ | ||
98 | typedef enum { | ||
99 | LJ_TRACE_IDLE, /* Trace compiler idle. */ | ||
100 | LJ_TRACE_ACTIVE = 0x10, | ||
101 | LJ_TRACE_RECORD, /* Bytecode recording active. */ | ||
102 | LJ_TRACE_START, /* New trace started. */ | ||
103 | LJ_TRACE_END, /* End of trace. */ | ||
104 | LJ_TRACE_ASM, /* Assemble trace. */ | ||
105 | LJ_TRACE_ERR, /* Trace aborted with error. */ | ||
106 | } TraceState; | ||
107 | |||
108 | /* Machine code type. */ | ||
109 | typedef uint8_t MCode; | ||
110 | |||
111 | /* Stack snapshot header. */ | ||
112 | typedef struct SnapShot { | ||
113 | uint16_t mapofs; /* Offset into snapshot map. */ | ||
114 | IRRef1 ref; /* First IR ref for this snapshot. */ | ||
115 | uint8_t nslots; /* Number of stack slots. */ | ||
116 | uint8_t nframelinks; /* Number of frame links. */ | ||
117 | uint8_t count; /* Count of taken exits for this snapshot. */ | ||
118 | uint8_t unused1; | ||
119 | } SnapShot; | ||
120 | |||
121 | #define SNAPCOUNT_DONE 255 /* Already compiled and linked a side trace. */ | ||
122 | #define snap_ref(sn) ((IRRef)(IRRef1)(sn)) | ||
123 | #define snap_ridsp(sn) ((sn) >> 16) | ||
124 | |||
125 | /* Snapshot and exit numbers. */ | ||
126 | typedef uint32_t SnapNo; | ||
127 | typedef uint32_t ExitNo; | ||
128 | |||
129 | /* Trace number. */ | ||
130 | typedef uint32_t TraceNo; /* Used to pass around trace numbers. */ | ||
131 | typedef uint16_t TraceNo1; /* Stored trace number. */ | ||
132 | |||
133 | #define TRACE_INTERP 0 /* Fallback to interpreter. */ | ||
134 | |||
135 | /* Trace anchor. */ | ||
136 | typedef struct Trace { | ||
137 | IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ | ||
138 | IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ | ||
139 | IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ | ||
140 | SnapShot *snap; /* Snapshot array. */ | ||
141 | IRRef2 *snapmap; /* Snapshot map. */ | ||
142 | uint16_t nsnap; /* Number of snapshots. */ | ||
143 | uint16_t nsnapmap; /* Number of snapshot map elements. */ | ||
144 | GCRef startpt; /* Starting prototype. */ | ||
145 | BCIns startins; /* Original bytecode of starting instruction. */ | ||
146 | MCode *mcode; /* Start of machine code. */ | ||
147 | MSize szmcode; /* Size of machine code. */ | ||
148 | MSize mcloop; /* Offset of loop start in machine code. */ | ||
149 | TraceNo1 link; /* Linked trace (or self for loops). */ | ||
150 | TraceNo1 root; /* Root trace of side trace (or 0 for root traces). */ | ||
151 | TraceNo1 nextroot; /* Next root trace for same prototype. */ | ||
152 | TraceNo1 nextside; /* Next side trace of same root trace. */ | ||
153 | uint16_t nchild; /* Number of child traces (root trace only). */ | ||
154 | uint16_t spadjust; /* Stack pointer adjustment (offset in bytes). */ | ||
155 | #ifdef LUAJIT_USE_GDBJIT | ||
156 | void *gdbjit_entry; /* GDB JIT entry. */ | ||
157 | #endif | ||
158 | } Trace; | ||
159 | |||
160 | /* Round-robin penalty cache for bytecodes leading to aborted traces. */ | ||
161 | typedef struct HotPenalty { | ||
162 | const BCIns *pc; /* Starting bytecode PC. */ | ||
163 | uint16_t val; /* Penalty value, i.e. hotcount start. */ | ||
164 | uint16_t reason; /* Abort reason (really TraceErr). */ | ||
165 | } HotPenalty; | ||
166 | |||
167 | /* Number of slots for the penalty cache. Must be a power of 2. */ | ||
168 | #define PENALTY_SLOTS 16 | ||
169 | |||
170 | /* Round-robin backpropagation cache for narrowing conversions. */ | ||
171 | typedef struct BPropEntry { | ||
172 | IRRef1 key; /* Key: original reference. */ | ||
173 | IRRef1 val; /* Value: reference after conversion. */ | ||
174 | IRRef mode; /* Mode for this entry (currently IRTOINT_*). */ | ||
175 | } BPropEntry; | ||
176 | |||
177 | /* Number of slots for the backpropagation cache. Must be a power of 2. */ | ||
178 | #define BPROP_SLOTS 16 | ||
179 | |||
180 | /* Fold state is used to fold instructions on-the-fly. */ | ||
181 | typedef struct FoldState { | ||
182 | IRIns ins; /* Currently emitted instruction. */ | ||
183 | IRIns left; /* Instruction referenced by left operand. */ | ||
184 | IRIns right; /* Instruction referenced by right operand. */ | ||
185 | } FoldState; | ||
186 | |||
187 | /* JIT compiler state. */ | ||
188 | typedef struct jit_State { | ||
189 | Trace cur; /* Current trace. */ | ||
190 | |||
191 | lua_State *L; /* Current Lua state. */ | ||
192 | const BCIns *pc; /* Current PC. */ | ||
193 | BCReg maxslot; /* Relative to baseslot. */ | ||
194 | |||
195 | uint32_t flags; /* JIT engine flags. */ | ||
196 | TRef *base; /* Current frame base, points into J->slots. */ | ||
197 | BCReg baseslot; /* Current frame base, offset into J->slots. */ | ||
198 | GCfunc *fn; /* Current function. */ | ||
199 | GCproto *pt; /* Current prototype. */ | ||
200 | |||
201 | FoldState fold; /* Fold state. */ | ||
202 | |||
203 | uint8_t mergesnap; /* Allowed to merge with next snapshot. */ | ||
204 | uint8_t needsnap; /* Need snapshot before recording next bytecode. */ | ||
205 | IRType1 guardemit; /* Accumulated IRT_GUARD for emitted instructions. */ | ||
206 | uint8_t unused1; | ||
207 | |||
208 | const BCIns *bc_min; /* Start of allowed bytecode range for root trace. */ | ||
209 | MSize bc_extent; /* Extent of the range. */ | ||
210 | |||
211 | TraceState state; /* Trace compiler state. */ | ||
212 | |||
213 | int32_t instunroll; /* Unroll counter for instable loops. */ | ||
214 | int32_t loopunroll; /* Unroll counter for loop ops in side traces. */ | ||
215 | int32_t tailcalled; /* Number of successive tailcalls. */ | ||
216 | int32_t framedepth; /* Current frame depth. */ | ||
217 | |||
218 | MRef knum; /* Pointer to chained array of KNUM constants. */ | ||
219 | |||
220 | IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ | ||
221 | IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ | ||
222 | IRRef irbotlim; /* Lower limit of instuction buffer (biased). */ | ||
223 | IRRef loopref; /* Last loop reference or ref of final LOOP (or 0). */ | ||
224 | |||
225 | SnapShot *snapbuf; /* Temp. snapshot buffer. */ | ||
226 | IRRef2 *snapmapbuf; /* Temp. snapshot map buffer. */ | ||
227 | MSize sizesnap; /* Size of temp. snapshot buffer. */ | ||
228 | MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ | ||
229 | |||
230 | Trace **trace; /* Array of traces. */ | ||
231 | TraceNo curtrace; /* Current trace number (if not 0). Kept in J->cur. */ | ||
232 | TraceNo freetrace; /* Start of scan for next free trace. */ | ||
233 | MSize sizetrace; /* Size of trace array. */ | ||
234 | |||
235 | IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ | ||
236 | TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ | ||
237 | |||
238 | int32_t param[JIT_P__MAX]; /* JIT engine parameters. */ | ||
239 | |||
240 | MCode *exitstubgroup[LJ_MAX_EXITSTUBGR]; /* Exit stub group addresses. */ | ||
241 | |||
242 | HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */ | ||
243 | uint32_t penaltyslot; /* Round-robin index into penalty slots. */ | ||
244 | |||
245 | BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ | ||
246 | uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ | ||
247 | |||
248 | const BCIns *startpc; /* Bytecode PC of starting instruction. */ | ||
249 | TraceNo parent; /* Parent of current side trace (0 for root traces). */ | ||
250 | ExitNo exitno; /* Exit number in parent of current side trace. */ | ||
251 | |||
252 | TValue errinfo; /* Additional info element for trace errors. */ | ||
253 | |||
254 | MCode *mcarea; /* Base of current mcode area. */ | ||
255 | MCode *mctop; /* Top of current mcode area. */ | ||
256 | MCode *mcbot; /* Bottom of current mcode area. */ | ||
257 | size_t szmcarea; /* Size of current mcode area. */ | ||
258 | size_t szallmcarea; /* Total size of all allocated mcode areas. */ | ||
259 | int mcprot; /* Protection of current mcode area. */ | ||
260 | } jit_State; | ||
261 | |||
262 | /* Exit stubs. */ | ||
263 | #if LJ_TARGET_X86ORX64 | ||
264 | /* Limited by the range of a short fwd jump (127): (2+2)*(32-1)-2 = 122. */ | ||
265 | #define EXITSTUB_SPACING (2+2) | ||
266 | #define EXITSTUBS_PER_GROUP 32 | ||
267 | #else | ||
268 | #error "Missing CPU-specific exit stub definitions" | ||
269 | #endif | ||
270 | |||
271 | /* Return the address of an exit stub. */ | ||
272 | static LJ_AINLINE MCode *exitstub_addr(jit_State *J, ExitNo exitno) | ||
273 | { | ||
274 | lua_assert(J->exitstubgroup[exitno / EXITSTUBS_PER_GROUP] != NULL); | ||
275 | return J->exitstubgroup[exitno / EXITSTUBS_PER_GROUP] + | ||
276 | EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP); | ||
277 | } | ||
278 | |||
279 | #endif | ||