diff options
Diffstat (limited to 'src/lj_opt_narrow.c')
-rw-r--r-- | src/lj_opt_narrow.c | 89 |
1 files changed, 55 insertions, 34 deletions
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index b6615f32..fb6601e9 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c | |||
@@ -89,16 +89,17 @@ | |||
89 | /* -- Elimination of narrowing type conversions --------------------------- */ | 89 | /* -- Elimination of narrowing type conversions --------------------------- */ |
90 | 90 | ||
91 | /* Narrowing of index expressions and bit operations is demand-driven. The | 91 | /* Narrowing of index expressions and bit operations is demand-driven. The |
92 | ** trace recorder emits a narrowing type conversion (TOINT or TOBIT) in | 92 | ** trace recorder emits a narrowing type conversion (CONV.int.num or TOBIT) |
93 | ** all of these cases (e.g. array indexing or string indexing). FOLD | 93 | ** in all of these cases (e.g. array indexing or string indexing). FOLD |
94 | ** already takes care of eliminating simple redundant conversions like | 94 | ** already takes care of eliminating simple redundant conversions like |
95 | ** TOINT(TONUM(x)) ==> x. | 95 | ** CONV.int.num(CONV.num.int(x)) ==> x. |
96 | ** | 96 | ** |
97 | ** But the surrounding code is FP-heavy and all arithmetic operations are | 97 | ** But the surrounding code is FP-heavy and all arithmetic operations are |
98 | ** performed on FP numbers. Consider a common example such as 'x=t[i+1]', | 98 | ** performed on FP numbers. Consider a common example such as 'x=t[i+1]', |
99 | ** with 'i' already an integer (due to induction variable narrowing). The | 99 | ** with 'i' already an integer (due to induction variable narrowing). The |
100 | ** index expression would be recorded as TOINT(ADD(TONUM(i), 1)), which is | 100 | ** index expression would be recorded as |
101 | ** clearly suboptimal. | 101 | ** CONV.int.num(ADD(CONV.num.int(i), 1)) |
102 | ** which is clearly suboptimal. | ||
102 | ** | 103 | ** |
103 | ** One can do better by recursively backpropagating the narrowing type | 104 | ** One can do better by recursively backpropagating the narrowing type |
104 | ** conversion across FP arithmetic operations. This turns FP ops into | 105 | ** conversion across FP arithmetic operations. This turns FP ops into |
@@ -106,9 +107,10 @@ | |||
106 | ** the conversion they also need to check for overflow. Currently only ADD | 107 | ** the conversion they also need to check for overflow. Currently only ADD |
107 | ** and SUB are supported. | 108 | ** and SUB are supported. |
108 | ** | 109 | ** |
109 | ** The above example can be rewritten as ADDOV(TOINT(TONUM(i)), 1) and | 110 | ** The above example can be rewritten as |
110 | ** then into ADDOV(i, 1) after folding of the conversions. The original FP | 111 | ** ADDOV(CONV.int.num(CONV.num.int(i)), 1) |
111 | ** ops remain in the IR and are eliminated by DCE since all references to | 112 | ** and then into ADDOV(i, 1) after folding of the conversions. The original |
113 | ** FP ops remain in the IR and are eliminated by DCE since all references to | ||
112 | ** them are gone. | 114 | ** them are gone. |
113 | ** | 115 | ** |
114 | ** Special care has to be taken to avoid narrowing across an operation | 116 | ** Special care has to be taken to avoid narrowing across an operation |
@@ -173,6 +175,7 @@ | |||
173 | enum { | 175 | enum { |
174 | NARROW_REF, /* Push ref. */ | 176 | NARROW_REF, /* Push ref. */ |
175 | NARROW_CONV, /* Push conversion of ref. */ | 177 | NARROW_CONV, /* Push conversion of ref. */ |
178 | NARROW_SEXT, /* Push sign-extension of ref. */ | ||
176 | NARROW_INT /* Push KINT ref. The next code holds an int32_t. */ | 179 | NARROW_INT /* Push KINT ref. The next code holds an int32_t. */ |
177 | }; | 180 | }; |
178 | 181 | ||
@@ -188,7 +191,8 @@ typedef struct NarrowConv { | |||
188 | NarrowIns *sp; /* Current stack pointer. */ | 191 | NarrowIns *sp; /* Current stack pointer. */ |
189 | NarrowIns *maxsp; /* Maximum stack pointer minus redzone. */ | 192 | NarrowIns *maxsp; /* Maximum stack pointer minus redzone. */ |
190 | int lim; /* Limit on the number of emitted conversions. */ | 193 | int lim; /* Limit on the number of emitted conversions. */ |
191 | IRRef mode; /* Conversion mode (IRTOINT_*). */ | 194 | IRRef mode; /* Conversion mode (IRCONV_*). */ |
195 | IRType t; /* Destination type: IRT_INT or IRT_I64. */ | ||
192 | NarrowIns stack[NARROW_MAX_STACK]; /* Stack holding stack-machine code. */ | 196 | NarrowIns stack[NARROW_MAX_STACK]; /* Stack holding stack-machine code. */ |
193 | } NarrowConv; | 197 | } NarrowConv; |
194 | 198 | ||
@@ -198,7 +202,9 @@ static BPropEntry *narrow_bpc_get(jit_State *J, IRRef1 key, IRRef mode) | |||
198 | ptrdiff_t i; | 202 | ptrdiff_t i; |
199 | for (i = 0; i < BPROP_SLOTS; i++) { | 203 | for (i = 0; i < BPROP_SLOTS; i++) { |
200 | BPropEntry *bp = &J->bpropcache[i]; | 204 | BPropEntry *bp = &J->bpropcache[i]; |
201 | if (bp->key == key && bp->mode <= mode) /* Stronger checks are ok, too. */ | 205 | /* Stronger checks are ok, too. */ |
206 | if (bp->key == key && bp->mode >= mode && | ||
207 | ((bp->mode ^ mode) & IRCONV_MODEMASK) == 0) | ||
202 | return bp; | 208 | return bp; |
203 | } | 209 | } |
204 | return NULL; | 210 | return NULL; |
@@ -223,16 +229,16 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) | |||
223 | IRRef cref; | 229 | IRRef cref; |
224 | 230 | ||
225 | /* Check the easy cases first. */ | 231 | /* Check the easy cases first. */ |
226 | if (ir->o == IR_TONUM) { /* Undo inverse conversion. */ | 232 | if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) { |
227 | *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); | 233 | if (nc->t == IRT_I64) |
228 | if (nc->mode == IRTOINT_TRUNCI64) { | 234 | *nc->sp++ = NARROWINS(NARROW_SEXT, ir->op1); /* Reduce to sign-ext. */ |
229 | *nc->sp++ = NARROWINS(NARROW_REF, IRTOINT_SEXT64); | 235 | else |
230 | *nc->sp++ = NARROWINS(IRT(IR_TOI64, IRT_I64), 0); | 236 | *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */ |
231 | } | ||
232 | return 0; | 237 | return 0; |
233 | } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ | 238 | } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ |
234 | lua_Number n = ir_knum(ir)->n; | 239 | lua_Number n = ir_knum(ir)->n; |
235 | if (nc->mode == IRTOINT_TOBIT) { /* Allows a wider range of constants. */ | 240 | if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { |
241 | /* Allows a wider range of constants. */ | ||
236 | int64_t k64 = (int64_t)n; | 242 | int64_t k64 = (int64_t)n; |
237 | if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */ | 243 | if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */ |
238 | *nc->sp++ = NARROWINS(NARROW_INT, 0); | 244 | *nc->sp++ = NARROWINS(NARROW_INT, 0); |
@@ -251,36 +257,46 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) | |||
251 | } | 257 | } |
252 | 258 | ||
253 | /* Try to CSE the conversion. Stronger checks are ok, too. */ | 259 | /* Try to CSE the conversion. Stronger checks are ok, too. */ |
254 | for (cref = J->chain[fins->o]; cref > ref; cref = IR(cref)->prev) | 260 | cref = J->chain[fins->o]; |
255 | if (IR(cref)->op1 == ref && | 261 | while (cref > ref) { |
256 | irt_isguard(IR(cref)->t) >= irt_isguard(fins->t)) { | 262 | IRIns *cr = IR(cref); |
263 | if (cr->op1 == ref && | ||
264 | (fins->o == IR_TOBIT || | ||
265 | ((cr->op2 & IRCONV_MODEMASK) == (nc->mode & IRCONV_MODEMASK) && | ||
266 | irt_isguard(cr->t) >= irt_isguard(fins->t)))) { | ||
257 | *nc->sp++ = NARROWINS(NARROW_REF, cref); | 267 | *nc->sp++ = NARROWINS(NARROW_REF, cref); |
258 | return 0; /* Already there, no additional conversion needed. */ | 268 | return 0; /* Already there, no additional conversion needed. */ |
259 | } | 269 | } |
270 | cref = cr->prev; | ||
271 | } | ||
260 | 272 | ||
261 | /* Backpropagate across ADD/SUB. */ | 273 | /* Backpropagate across ADD/SUB. */ |
262 | if (ir->o == IR_ADD || ir->o == IR_SUB) { | 274 | if (ir->o == IR_ADD || ir->o == IR_SUB) { |
263 | /* Try cache lookup first. */ | 275 | /* Try cache lookup first. */ |
264 | IRRef mode = nc->mode; | 276 | IRRef mode = nc->mode; |
265 | BPropEntry *bp; | 277 | BPropEntry *bp; |
266 | if (mode == IRTOINT_INDEX && depth > 0) | 278 | /* Inner conversions need a stronger check. */ |
267 | mode = IRTOINT_CHECK; /* Inner conversions need a stronger check. */ | 279 | if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX && depth > 0) |
280 | mode += IRCONV_CHECK-IRCONV_INDEX; | ||
268 | bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); | 281 | bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); |
269 | if (bp) { | 282 | if (bp) { |
270 | *nc->sp++ = NARROWINS(NARROW_REF, bp->val); | 283 | *nc->sp++ = NARROWINS(NARROW_REF, bp->val); |
271 | if (mode == IRTOINT_TRUNCI64 && mode != bp->mode) { | ||
272 | *nc->sp++ = NARROWINS(NARROW_REF, IRTOINT_SEXT64); | ||
273 | *nc->sp++ = NARROWINS(IRT(IR_TOI64, IRT_I64), 0); | ||
274 | } | ||
275 | return 0; | 284 | return 0; |
285 | } else if (nc->t == IRT_I64) { | ||
286 | /* Try sign-extending from an existing (checked) conversion to int. */ | ||
287 | mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX; | ||
288 | bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); | ||
289 | if (bp) { | ||
290 | *nc->sp++ = NARROWINS(NARROW_SEXT, bp->val); | ||
291 | return 0; | ||
292 | } | ||
276 | } | 293 | } |
277 | if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) { | 294 | if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) { |
278 | NarrowIns *savesp = nc->sp; | 295 | NarrowIns *savesp = nc->sp; |
279 | int count = narrow_conv_backprop(nc, ir->op1, depth); | 296 | int count = narrow_conv_backprop(nc, ir->op1, depth); |
280 | count += narrow_conv_backprop(nc, ir->op2, depth); | 297 | count += narrow_conv_backprop(nc, ir->op2, depth); |
281 | if (count <= nc->lim) { /* Limit total number of conversions. */ | 298 | if (count <= nc->lim) { /* Limit total number of conversions. */ |
282 | IRType t = mode == IRTOINT_TRUNCI64 ? IRT_I64 : IRT_INT; | 299 | *nc->sp++ = NARROWINS(IRT(ir->o, nc->t), ref); |
283 | *nc->sp++ = NARROWINS(IRT(ir->o, t), ref); | ||
284 | return count; | 300 | return count; |
285 | } | 301 | } |
286 | nc->sp = savesp; /* Too many conversions, need to backtrack. */ | 302 | nc->sp = savesp; /* Too many conversions, need to backtrack. */ |
@@ -309,9 +325,12 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) | |||
309 | *sp++ = ref; | 325 | *sp++ = ref; |
310 | } else if (op == NARROW_CONV) { | 326 | } else if (op == NARROW_CONV) { |
311 | *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ | 327 | *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ |
328 | } else if (op == NARROW_SEXT) { | ||
329 | *sp++ = emitir(IRT(IR_CONV, IRT_I64), ref, | ||
330 | (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); | ||
312 | } else if (op == NARROW_INT) { | 331 | } else if (op == NARROW_INT) { |
313 | lua_assert(next < last); | 332 | lua_assert(next < last); |
314 | *sp++ = nc->mode == IRTOINT_TRUNCI64 ? | 333 | *sp++ = nc->t == IRT_I64 ? |
315 | lj_ir_kint64(J, (int64_t)(int32_t)*next++) : | 334 | lj_ir_kint64(J, (int64_t)(int32_t)*next++) : |
316 | lj_ir_kint(J, *next++); | 335 | lj_ir_kint(J, *next++); |
317 | } else { /* Regular IROpT. Pops two operands and pushes one result. */ | 336 | } else { /* Regular IROpT. Pops two operands and pushes one result. */ |
@@ -319,12 +338,12 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) | |||
319 | lua_assert(sp >= nc->stack+2); | 338 | lua_assert(sp >= nc->stack+2); |
320 | sp--; | 339 | sp--; |
321 | /* Omit some overflow checks for array indexing. See comments above. */ | 340 | /* Omit some overflow checks for array indexing. See comments above. */ |
322 | if (mode == IRTOINT_INDEX) { | 341 | if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { |
323 | if (next == last && irref_isk(narrow_ref(sp[0])) && | 342 | if (next == last && irref_isk(narrow_ref(sp[0])) && |
324 | (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000 < 0x80000000) | 343 | (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000 < 0x80000000) |
325 | guardot = 0; | 344 | guardot = 0; |
326 | else | 345 | else /* Otherwise cache a stronger check. */ |
327 | mode = IRTOINT_CHECK; /* Otherwise cache a stronger check. */ | 346 | mode += IRCONV_CHECK-IRCONV_INDEX; |
328 | } | 347 | } |
329 | sp[-1] = emitir(op+guardot, sp[-1], sp[0]); | 348 | sp[-1] = emitir(op+guardot, sp[-1], sp[0]); |
330 | /* Add to cache. */ | 349 | /* Add to cache. */ |
@@ -344,8 +363,9 @@ TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J) | |||
344 | nc.J = J; | 363 | nc.J = J; |
345 | nc.sp = nc.stack; | 364 | nc.sp = nc.stack; |
346 | nc.maxsp = &nc.stack[NARROW_MAX_STACK-4]; | 365 | nc.maxsp = &nc.stack[NARROW_MAX_STACK-4]; |
366 | nc.t = irt_type(fins->t); | ||
347 | if (fins->o == IR_TOBIT) { | 367 | if (fins->o == IR_TOBIT) { |
348 | nc.mode = IRTOINT_TOBIT; /* Used only in the backpropagation cache. */ | 368 | nc.mode = IRCONV_TOBIT; /* Used only in the backpropagation cache. */ |
349 | nc.lim = 2; /* TOBIT can use a more optimistic rule. */ | 369 | nc.lim = 2; /* TOBIT can use a more optimistic rule. */ |
350 | } else { | 370 | } else { |
351 | nc.mode = fins->op2; | 371 | nc.mode = fins->op2; |
@@ -401,7 +421,8 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) | |||
401 | if (!tref_isinteger(rc)) { | 421 | if (!tref_isinteger(rc)) { |
402 | if (tref_isstr(rc)) | 422 | if (tref_isstr(rc)) |
403 | rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); | 423 | rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); |
404 | rc = emitir(IRTGI(IR_TOINT), rc, IRTOINT_CHECK); /* Guarded TOINT! */ | 424 | /* Guarded conversion to integer! */ |
425 | rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK); | ||
405 | } | 426 | } |
406 | if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */ | 427 | if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */ |
407 | tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536-2147483647-1)); | 428 | tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536-2147483647-1)); |