aboutsummaryrefslogtreecommitdiff
path: root/src/lj_opt_narrow.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_opt_narrow.c')
-rw-r--r--src/lj_opt_narrow.c89
1 files changed, 55 insertions, 34 deletions
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index b6615f32..fb6601e9 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -89,16 +89,17 @@
89/* -- Elimination of narrowing type conversions --------------------------- */ 89/* -- Elimination of narrowing type conversions --------------------------- */
90 90
91/* Narrowing of index expressions and bit operations is demand-driven. The 91/* Narrowing of index expressions and bit operations is demand-driven. The
92** trace recorder emits a narrowing type conversion (TOINT or TOBIT) in 92** trace recorder emits a narrowing type conversion (CONV.int.num or TOBIT)
93** all of these cases (e.g. array indexing or string indexing). FOLD 93** in all of these cases (e.g. array indexing or string indexing). FOLD
94** already takes care of eliminating simple redundant conversions like 94** already takes care of eliminating simple redundant conversions like
95** TOINT(TONUM(x)) ==> x. 95** CONV.int.num(CONV.num.int(x)) ==> x.
96** 96**
97** But the surrounding code is FP-heavy and all arithmetic operations are 97** But the surrounding code is FP-heavy and all arithmetic operations are
98** performed on FP numbers. Consider a common example such as 'x=t[i+1]', 98** performed on FP numbers. Consider a common example such as 'x=t[i+1]',
99** with 'i' already an integer (due to induction variable narrowing). The 99** with 'i' already an integer (due to induction variable narrowing). The
100** index expression would be recorded as TOINT(ADD(TONUM(i), 1)), which is 100** index expression would be recorded as
101** clearly suboptimal. 101** CONV.int.num(ADD(CONV.num.int(i), 1))
102** which is clearly suboptimal.
102** 103**
103** One can do better by recursively backpropagating the narrowing type 104** One can do better by recursively backpropagating the narrowing type
104** conversion across FP arithmetic operations. This turns FP ops into 105** conversion across FP arithmetic operations. This turns FP ops into
@@ -106,9 +107,10 @@
106** the conversion they also need to check for overflow. Currently only ADD 107** the conversion they also need to check for overflow. Currently only ADD
107** and SUB are supported. 108** and SUB are supported.
108** 109**
109** The above example can be rewritten as ADDOV(TOINT(TONUM(i)), 1) and 110** The above example can be rewritten as
110** then into ADDOV(i, 1) after folding of the conversions. The original FP 111** ADDOV(CONV.int.num(CONV.num.int(i)), 1)
111** ops remain in the IR and are eliminated by DCE since all references to 112** and then into ADDOV(i, 1) after folding of the conversions. The original
113** FP ops remain in the IR and are eliminated by DCE since all references to
112** them are gone. 114** them are gone.
113** 115**
114** Special care has to be taken to avoid narrowing across an operation 116** Special care has to be taken to avoid narrowing across an operation
@@ -173,6 +175,7 @@
173enum { 175enum {
174 NARROW_REF, /* Push ref. */ 176 NARROW_REF, /* Push ref. */
175 NARROW_CONV, /* Push conversion of ref. */ 177 NARROW_CONV, /* Push conversion of ref. */
178 NARROW_SEXT, /* Push sign-extension of ref. */
176 NARROW_INT /* Push KINT ref. The next code holds an int32_t. */ 179 NARROW_INT /* Push KINT ref. The next code holds an int32_t. */
177}; 180};
178 181
@@ -188,7 +191,8 @@ typedef struct NarrowConv {
188 NarrowIns *sp; /* Current stack pointer. */ 191 NarrowIns *sp; /* Current stack pointer. */
189 NarrowIns *maxsp; /* Maximum stack pointer minus redzone. */ 192 NarrowIns *maxsp; /* Maximum stack pointer minus redzone. */
190 int lim; /* Limit on the number of emitted conversions. */ 193 int lim; /* Limit on the number of emitted conversions. */
191 IRRef mode; /* Conversion mode (IRTOINT_*). */ 194 IRRef mode; /* Conversion mode (IRCONV_*). */
195 IRType t; /* Destination type: IRT_INT or IRT_I64. */
192 NarrowIns stack[NARROW_MAX_STACK]; /* Stack holding stack-machine code. */ 196 NarrowIns stack[NARROW_MAX_STACK]; /* Stack holding stack-machine code. */
193} NarrowConv; 197} NarrowConv;
194 198
@@ -198,7 +202,9 @@ static BPropEntry *narrow_bpc_get(jit_State *J, IRRef1 key, IRRef mode)
198 ptrdiff_t i; 202 ptrdiff_t i;
199 for (i = 0; i < BPROP_SLOTS; i++) { 203 for (i = 0; i < BPROP_SLOTS; i++) {
200 BPropEntry *bp = &J->bpropcache[i]; 204 BPropEntry *bp = &J->bpropcache[i];
201 if (bp->key == key && bp->mode <= mode) /* Stronger checks are ok, too. */ 205 /* Stronger checks are ok, too. */
206 if (bp->key == key && bp->mode >= mode &&
207 ((bp->mode ^ mode) & IRCONV_MODEMASK) == 0)
202 return bp; 208 return bp;
203 } 209 }
204 return NULL; 210 return NULL;
@@ -223,16 +229,16 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
223 IRRef cref; 229 IRRef cref;
224 230
225 /* Check the easy cases first. */ 231 /* Check the easy cases first. */
226 if (ir->o == IR_TONUM) { /* Undo inverse conversion. */ 232 if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) {
227 *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); 233 if (nc->t == IRT_I64)
228 if (nc->mode == IRTOINT_TRUNCI64) { 234 *nc->sp++ = NARROWINS(NARROW_SEXT, ir->op1); /* Reduce to sign-ext. */
229 *nc->sp++ = NARROWINS(NARROW_REF, IRTOINT_SEXT64); 235 else
230 *nc->sp++ = NARROWINS(IRT(IR_TOI64, IRT_I64), 0); 236 *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */
231 }
232 return 0; 237 return 0;
233 } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ 238 } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */
234 lua_Number n = ir_knum(ir)->n; 239 lua_Number n = ir_knum(ir)->n;
235 if (nc->mode == IRTOINT_TOBIT) { /* Allows a wider range of constants. */ 240 if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) {
241 /* Allows a wider range of constants. */
236 int64_t k64 = (int64_t)n; 242 int64_t k64 = (int64_t)n;
237 if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */ 243 if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */
238 *nc->sp++ = NARROWINS(NARROW_INT, 0); 244 *nc->sp++ = NARROWINS(NARROW_INT, 0);
@@ -251,36 +257,46 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
251 } 257 }
252 258
253 /* Try to CSE the conversion. Stronger checks are ok, too. */ 259 /* Try to CSE the conversion. Stronger checks are ok, too. */
254 for (cref = J->chain[fins->o]; cref > ref; cref = IR(cref)->prev) 260 cref = J->chain[fins->o];
255 if (IR(cref)->op1 == ref && 261 while (cref > ref) {
256 irt_isguard(IR(cref)->t) >= irt_isguard(fins->t)) { 262 IRIns *cr = IR(cref);
263 if (cr->op1 == ref &&
264 (fins->o == IR_TOBIT ||
265 ((cr->op2 & IRCONV_MODEMASK) == (nc->mode & IRCONV_MODEMASK) &&
266 irt_isguard(cr->t) >= irt_isguard(fins->t)))) {
257 *nc->sp++ = NARROWINS(NARROW_REF, cref); 267 *nc->sp++ = NARROWINS(NARROW_REF, cref);
258 return 0; /* Already there, no additional conversion needed. */ 268 return 0; /* Already there, no additional conversion needed. */
259 } 269 }
270 cref = cr->prev;
271 }
260 272
261 /* Backpropagate across ADD/SUB. */ 273 /* Backpropagate across ADD/SUB. */
262 if (ir->o == IR_ADD || ir->o == IR_SUB) { 274 if (ir->o == IR_ADD || ir->o == IR_SUB) {
263 /* Try cache lookup first. */ 275 /* Try cache lookup first. */
264 IRRef mode = nc->mode; 276 IRRef mode = nc->mode;
265 BPropEntry *bp; 277 BPropEntry *bp;
266 if (mode == IRTOINT_INDEX && depth > 0) 278 /* Inner conversions need a stronger check. */
267 mode = IRTOINT_CHECK; /* Inner conversions need a stronger check. */ 279 if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX && depth > 0)
280 mode += IRCONV_CHECK-IRCONV_INDEX;
268 bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); 281 bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode);
269 if (bp) { 282 if (bp) {
270 *nc->sp++ = NARROWINS(NARROW_REF, bp->val); 283 *nc->sp++ = NARROWINS(NARROW_REF, bp->val);
271 if (mode == IRTOINT_TRUNCI64 && mode != bp->mode) {
272 *nc->sp++ = NARROWINS(NARROW_REF, IRTOINT_SEXT64);
273 *nc->sp++ = NARROWINS(IRT(IR_TOI64, IRT_I64), 0);
274 }
275 return 0; 284 return 0;
285 } else if (nc->t == IRT_I64) {
286 /* Try sign-extending from an existing (checked) conversion to int. */
287 mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX;
288 bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode);
289 if (bp) {
290 *nc->sp++ = NARROWINS(NARROW_SEXT, bp->val);
291 return 0;
292 }
276 } 293 }
277 if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) { 294 if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) {
278 NarrowIns *savesp = nc->sp; 295 NarrowIns *savesp = nc->sp;
279 int count = narrow_conv_backprop(nc, ir->op1, depth); 296 int count = narrow_conv_backprop(nc, ir->op1, depth);
280 count += narrow_conv_backprop(nc, ir->op2, depth); 297 count += narrow_conv_backprop(nc, ir->op2, depth);
281 if (count <= nc->lim) { /* Limit total number of conversions. */ 298 if (count <= nc->lim) { /* Limit total number of conversions. */
282 IRType t = mode == IRTOINT_TRUNCI64 ? IRT_I64 : IRT_INT; 299 *nc->sp++ = NARROWINS(IRT(ir->o, nc->t), ref);
283 *nc->sp++ = NARROWINS(IRT(ir->o, t), ref);
284 return count; 300 return count;
285 } 301 }
286 nc->sp = savesp; /* Too many conversions, need to backtrack. */ 302 nc->sp = savesp; /* Too many conversions, need to backtrack. */
@@ -309,9 +325,12 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
309 *sp++ = ref; 325 *sp++ = ref;
310 } else if (op == NARROW_CONV) { 326 } else if (op == NARROW_CONV) {
311 *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ 327 *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */
328 } else if (op == NARROW_SEXT) {
329 *sp++ = emitir(IRT(IR_CONV, IRT_I64), ref,
330 (IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
312 } else if (op == NARROW_INT) { 331 } else if (op == NARROW_INT) {
313 lua_assert(next < last); 332 lua_assert(next < last);
314 *sp++ = nc->mode == IRTOINT_TRUNCI64 ? 333 *sp++ = nc->t == IRT_I64 ?
315 lj_ir_kint64(J, (int64_t)(int32_t)*next++) : 334 lj_ir_kint64(J, (int64_t)(int32_t)*next++) :
316 lj_ir_kint(J, *next++); 335 lj_ir_kint(J, *next++);
317 } else { /* Regular IROpT. Pops two operands and pushes one result. */ 336 } else { /* Regular IROpT. Pops two operands and pushes one result. */
@@ -319,12 +338,12 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
319 lua_assert(sp >= nc->stack+2); 338 lua_assert(sp >= nc->stack+2);
320 sp--; 339 sp--;
321 /* Omit some overflow checks for array indexing. See comments above. */ 340 /* Omit some overflow checks for array indexing. See comments above. */
322 if (mode == IRTOINT_INDEX) { 341 if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) {
323 if (next == last && irref_isk(narrow_ref(sp[0])) && 342 if (next == last && irref_isk(narrow_ref(sp[0])) &&
324 (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000 < 0x80000000) 343 (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000 < 0x80000000)
325 guardot = 0; 344 guardot = 0;
326 else 345 else /* Otherwise cache a stronger check. */
327 mode = IRTOINT_CHECK; /* Otherwise cache a stronger check. */ 346 mode += IRCONV_CHECK-IRCONV_INDEX;
328 } 347 }
329 sp[-1] = emitir(op+guardot, sp[-1], sp[0]); 348 sp[-1] = emitir(op+guardot, sp[-1], sp[0]);
330 /* Add to cache. */ 349 /* Add to cache. */
@@ -344,8 +363,9 @@ TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J)
344 nc.J = J; 363 nc.J = J;
345 nc.sp = nc.stack; 364 nc.sp = nc.stack;
346 nc.maxsp = &nc.stack[NARROW_MAX_STACK-4]; 365 nc.maxsp = &nc.stack[NARROW_MAX_STACK-4];
366 nc.t = irt_type(fins->t);
347 if (fins->o == IR_TOBIT) { 367 if (fins->o == IR_TOBIT) {
348 nc.mode = IRTOINT_TOBIT; /* Used only in the backpropagation cache. */ 368 nc.mode = IRCONV_TOBIT; /* Used only in the backpropagation cache. */
349 nc.lim = 2; /* TOBIT can use a more optimistic rule. */ 369 nc.lim = 2; /* TOBIT can use a more optimistic rule. */
350 } else { 370 } else {
351 nc.mode = fins->op2; 371 nc.mode = fins->op2;
@@ -401,7 +421,8 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
401 if (!tref_isinteger(rc)) { 421 if (!tref_isinteger(rc)) {
402 if (tref_isstr(rc)) 422 if (tref_isstr(rc))
403 rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); 423 rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
404 rc = emitir(IRTGI(IR_TOINT), rc, IRTOINT_CHECK); /* Guarded TOINT! */ 424 /* Guarded conversion to integer! */
425 rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK);
405 } 426 }
406 if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */ 427 if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */
407 tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536-2147483647-1)); 428 tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536-2147483647-1));