summaryrefslogtreecommitdiff
path: root/src/lj_opt_split.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_opt_split.c')
-rw-r--r--src/lj_opt_split.c343
1 files changed, 343 insertions, 0 deletions
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
new file mode 100644
index 00000000..3cb30514
--- /dev/null
+++ b/src/lj_opt_split.c
@@ -0,0 +1,343 @@
1/*
2** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
3** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_opt_split_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT && LJ_HASFFI && LJ_32
12
13#include "lj_err.h"
14#include "lj_str.h"
15#include "lj_ir.h"
16#include "lj_jit.h"
17#include "lj_iropt.h"
18#include "lj_vm.h"
19
20/* SPLIT pass:
21**
22** This pass splits up 64 bit IR instructions into multiple 32 bit IR
23** instructions. It's only active for 32 bit CPUs which lack native 64 bit
24** operations. The FFI is currently the only emitter for 64 bit
25** instructions, so this pass is disabled if the FFI is disabled.
26**
27** Splitting the IR in a separate pass keeps each 32 bit IR assembler
28** backend simple. Only a small amount of extra functionality needs to be
29** implemented. This is much easier than adding support for allocating
30** register pairs to each backend (believe me, I tried). A few simple, but
31** important optimizations can be performed by the SPLIT pass, which would
32** be tedious to do in the backend.
33**
34** The basic idea is to replace each 64 bit IR instruction with its 32 bit
35** equivalent plus an extra HIOP instruction. The splitted IR is not passed
36** through FOLD or any other optimizations, so each HIOP is guaranteed to
37** immediately follow it's counterpart. The actual functionality of HIOP is
38** inferred from the previous instruction.
39**
40** The operands of HIOP hold the hiword input references. The output of HIOP
41** is the hiword output reference, which is also used to hold the hiword
42** register or spill slot information. The register allocator treats this
43** instruction independent of any other instruction, which improves code
44** quality compared to using fixed register pairs.
45**
46** It's easier to split up some instructions into two regular 32 bit
47** instructions. E.g. XLOAD is split up into two XLOADs with two different
48** addresses. Obviously 64 bit constants need to be split up into two 32 bit
49** constants, too. Some hiword instructions can be entirely omitted, e.g.
50** when zero-extending a 32 bit value to 64 bits.
51**
52** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
53** two int64_t fields:
54**
55** 0100 p32 ADD base +8
56** 0101 i64 XLOAD 0100
57** 0102 i64 ADD 0101 +1
58** 0103 p32 ADD base +16
59** 0104 i64 XSTORE 0103 0102
60**
61** mov rax, [esi+0x8]
62** add rax, +0x01
63** mov [esi+0x10], rax
64**
65** Here's the transformed IR and the x86 machine code after the SPLIT pass:
66**
67** 0100 p32 ADD base +8
68** 0101 int XLOAD 0100
69** 0102 p32 ADD base +12
70** 0103 int XLOAD 0102
71** 0104 int ADD 0101 +1
72** 0105 int HIOP 0103 +0
73** 0106 p32 ADD base +16
74** 0107 int XSTORE 0106 0104
75** 0108 p32 ADD base +20
76** 0109 int XSTORE 0108 0105
77**
78** mov eax, [esi+0x8]
79** mov ecx, [esi+0xc]
80** add eax, +0x01
81** adc ecx, +0x00
82** mov [esi+0x10], eax
83** mov [esi+0x14], ecx
84**
85** You may notice the reassociated hiword address computation, which is
86** later fused into the mov operands by the assembler.
87*/
88
89/* Some local macros to save typing. Undef'd at the end. */
90#define IR(ref) (&J->cur.ir[(ref)])
91
92/* Directly emit the transformed IR without updating chains etc. */
93static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
94{
95 IRRef nref = lj_ir_nextins(J);
96 IRIns *ir = IR(nref);
97 ir->ot = ot;
98 ir->op1 = op1;
99 ir->op2 = op2;
100 return nref;
101}
102
103/* Emit a CALLN with two split 64 bit arguments. */
104static IRRef split_call64(jit_State *J, IRRef1 *hisubst, IRIns *oir,
105 IRIns *ir, IRCallID id)
106{
107 IRRef tmp, op1 = ir->op1, op2 = ir->op2;
108 J->cur.nins--;
109#if LJ_LE
110 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
111 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
112 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
113#else
114 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
115 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
116 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
117#endif
118 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
119 return split_emit(J, IRTI(IR_HIOP), tmp, tmp);
120}
121
122/* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
123static IRRef split_ptr(jit_State *J, IRRef ref)
124{
125 IRIns *ir = IR(ref);
126 int32_t ofs = 4;
127 if (ir->o == IR_ADD && irref_isk(ir->op2)) { /* Reassociate address. */
128 ofs += IR(ir->op2)->i;
129 ref = ir->op1;
130 if (ofs == 0) return ref;
131 }
132 return split_emit(J, IRTI(IR_ADD), ref, lj_ir_kint(J, ofs));
133}
134
135/* Transform the old IR to the new IR. */
136static void split_ir(jit_State *J)
137{
138 IRRef nins = J->cur.nins, nk = J->cur.nk;
139 MSize irlen = nins - nk;
140 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
141 IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need);
142 IRRef1 *hisubst;
143 IRRef ref;
144
145 /* Copy old IR to buffer. */
146 memcpy(oir, IR(nk), irlen*sizeof(IRIns));
147 /* Bias hiword substitution table and old IR. Loword kept in field prev. */
148 hisubst = (IRRef1 *)&oir[irlen] - nk;
149 oir -= nk;
150
151 /* Remove all IR instructions, but retain IR constants. */
152 J->cur.nins = REF_FIRST;
153
154 /* Process constants and fixed references. */
155 for (ref = nk; ref <= REF_BASE; ref++) {
156 IRIns *ir = &oir[ref];
157 if (ir->o == IR_KINT64) { /* Split up 64 bit constant. */
158 TValue tv = *ir_k64(ir);
159 ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
160 hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
161 } else {
162 ir->prev = (IRRef1)ref; /* Identity substitution for loword. */
163 }
164 }
165
166 /* Process old IR instructions. */
167 for (ref = REF_FIRST; ref < nins; ref++) {
168 IRIns *ir = &oir[ref];
169 IRRef nref = lj_ir_nextins(J);
170 IRIns *nir = IR(nref);
171
172 /* Copy-substitute old instruction to new instruction. */
173 nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
174 nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
175 ir->prev = nref; /* Loword substitution. */
176 nir->o = ir->o;
177 nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
178
179 /* Split 64 bit instructions. */
180 if (irt_isint64(ir->t)) {
181 IRRef hi = hisubst[ir->op1];
182 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
183 switch (ir->o) {
184 case IR_ADD:
185 case IR_SUB:
186 /* Use plain op for hiword if loword cannot produce a carry/borrow. */
187 if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
188 ir->prev = nir->op1; /* Pass through loword. */
189 nir->op1 = hi; nir->op2 = hisubst[ir->op2];
190 hi = nref;
191 break;
192 }
193 /* fallthrough */
194 case IR_NEG:
195 hi = split_emit(J, IRTI(IR_HIOP), hi, hisubst[ir->op2]);
196 break;
197 case IR_MUL:
198 hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
199 break;
200 case IR_POWI:
201 hi = split_call64(J, hisubst, oir, ir,
202 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
203 IRCALL_lj_carith_powu64);
204 break;
205 case IR_XLOAD:
206 hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, nir->op1), ir->op2);
207#if LJ_BE
208 ir->prev = hi; hi = nref;
209#endif
210 break;
211 case IR_XSTORE:
212#if LJ_LE
213 hi = hisubst[ir->op2];
214#else
215 hi = nir->op2; nir->op2 = hisubst[ir->op2];
216#endif
217 split_emit(J, IRTI(IR_XSTORE), split_ptr(J, nir->op1), hi);
218 continue;
219 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
220 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
221 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
222 hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
223 } else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
224 /* Drop cast, since assembler doesn't care. */
225 hisubst[ref] = hi;
226 goto fwdlo;
227 } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
228 IRRef k31 = lj_ir_kint(J, 31);
229 nir = IR(nref); /* May have been reallocated. */
230 ir->prev = nir->op1; /* Pass through loword. */
231 nir->o = IR_BSAR; /* hi = bsar(lo, 31). */
232 nir->op2 = k31;
233 hi = nref;
234 } else { /* Zero-extend to 64 bit. */
235 hisubst[ref] = lj_ir_kint(J, 0);
236 goto fwdlo;
237 }
238 break;
239 }
240 case IR_PHI: {
241 IRRef hi2;
242 if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
243 nir->op1 == nir->op2)
244 J->cur.nins--; /* Drop useless PHIs. */
245 hi2 = hisubst[ir->op2];
246 if (!((irref_isk(hi) && irref_isk(hi2)) || hi == hi2))
247 split_emit(J, IRTI(IR_PHI), hi, hi2);
248 continue;
249 }
250 default:
251 lua_assert(ir->o <= IR_NE);
252 split_emit(J, IRTGI(IR_HIOP), hi, hisubst[ir->op2]); /* Comparisons. */
253 continue;
254 }
255 hisubst[ref] = hi; /* Store hiword substitution. */
256 } else if (ir->o == IR_CONV) { /* See above, too. */
257 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
258 if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */
259 if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */
260 ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
261 hisubst[ir->op1], nref);
262 } else { /* Truncate to lower 32 bits. */
263 fwdlo:
264 ir->prev = nir->op1; /* Forward loword. */
265 /* Replace with NOP to avoid messing up the snapshot logic. */
266 nir->ot = IRT(IR_NOP, IRT_NIL);
267 nir->op1 = nir->op2 = 0;
268 }
269 }
270 } else if (ir->o == IR_LOOP) {
271 J->loopref = nref; /* Needed by assembler. */
272 }
273 }
274
275 /* Add PHI marks. */
276 for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
277 IRIns *ir = IR(ref);
278 if (ir->o != IR_PHI) break;
279 if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
280 if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
281 }
282
283 /* Substitute snapshot maps. */
284 oir[nins].prev = J->cur.nins; /* Substitution for last snapshot. */
285 {
286 SnapNo i, nsnap = J->cur.nsnap;
287 for (i = 0; i < nsnap; i++) {
288 SnapShot *snap = &J->cur.snap[i];
289 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
290 MSize n, nent = snap->nent;
291 snap->ref = oir[snap->ref].prev;
292 for (n = 0; n < nent; n++) {
293 SnapEntry sn = map[n];
294 map[n] = ((sn & 0xffff0000) | oir[snap_ref(sn)].prev);
295 }
296 }
297 }
298}
299
300/* Protected callback for split pass. */
301static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
302{
303 jit_State *J = (jit_State *)ud;
304 split_ir(J);
305 UNUSED(L); UNUSED(dummy);
306 return NULL;
307}
308
309#ifdef LUA_USE_ASSERT
310/* Slow, but sure way to check whether a SPLIT pass is needed. */
311static int split_needsplit(jit_State *J)
312{
313 IRIns *ir, *irend;
314 IRRef ref;
315 for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
316 if (irt_isint64(ir->t))
317 return 1;
318 for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev)
319 if ((IR(ref)->op2 & IRCONV_SRCMASK) == IRT_I64 ||
320 (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_U64)
321 return 1;
322 return 0; /* Nope. */
323}
324#endif
325
326/* SPLIT pass. */
327void lj_opt_split(jit_State *J)
328{
329 lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */
330 if (J->needsplit) {
331 int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
332 if (errcode) {
333 /* Completely reset the trace to avoid inconsistent dump on abort. */
334 J->cur.nins = J->cur.nk = REF_BASE;
335 J->cur.nsnap = 0;
336 lj_err_throw(J->L, errcode); /* Propagate errors. */
337 }
338 }
339}
340
341#undef IR
342
343#endif