summaryrefslogtreecommitdiff
path: root/src/lj_opt_narrow.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_opt_narrow.c')
-rw-r--r--src/lj_opt_narrow.c233
1 files changed, 195 insertions, 38 deletions
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index 0a2bb6cd..1727e9b5 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -1,5 +1,6 @@
1/* 1/*
2** NARROW: Narrowing of numbers to integers (double to int32_t). 2** NARROW: Narrowing of numbers to integers (double to int32_t).
3** STRIPOV: Stripping of overflow checks.
3** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h 4** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
4*/ 5*/
5 6
@@ -16,6 +17,7 @@
16#include "lj_jit.h" 17#include "lj_jit.h"
17#include "lj_iropt.h" 18#include "lj_iropt.h"
18#include "lj_trace.h" 19#include "lj_trace.h"
20#include "lj_vm.h"
19 21
20/* Rationale for narrowing optimizations: 22/* Rationale for narrowing optimizations:
21** 23**
@@ -57,24 +59,34 @@
57** 59**
58** A better solution is to keep all numbers as FP values and only narrow 60** A better solution is to keep all numbers as FP values and only narrow
59** when it's beneficial to do so. LuaJIT uses predictive narrowing for 61** when it's beneficial to do so. LuaJIT uses predictive narrowing for
60** induction variables and demand-driven narrowing for index expressions 62** induction variables and demand-driven narrowing for index expressions,
61** and bit operations. Additionally it can eliminate or hoists most of the 63** integer arguments and bit operations. Additionally it can eliminate or
62** resulting overflow checks. Regular arithmetic computations are never 64** hoist most of the resulting overflow checks. Regular arithmetic
63** narrowed to integers. 65** computations are never narrowed to integers.
64** 66**
65** The integer type in the IR has convenient wrap-around semantics and 67** The integer type in the IR has convenient wrap-around semantics and
66** ignores overflow. Extra operations have been added for 68** ignores overflow. Extra operations have been added for
67** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type. 69** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type.
68** Apart from reducing overall complexity of the compiler, this also 70** Apart from reducing overall complexity of the compiler, this also
69** nicely solves the problem where you want to apply algebraic 71** nicely solves the problem where you want to apply algebraic
70** simplifications to ADD, but not to ADDOV. And the assembler can use lea 72** simplifications to ADD, but not to ADDOV. And the x86/x64 assembler can
71** instead of an add for integer ADD, but not for ADDOV (lea does not 73** use lea instead of an add for integer ADD, but not for ADDOV (lea does
72** affect the flags, but it helps to avoid register moves). 74** not affect the flags, but it helps to avoid register moves).
73** 75**
74** Note that all of the above has to be reconsidered if LuaJIT is to be 76**
75** ported to architectures with slow FP operations or with no hardware FPU 77** All of the above has to be reconsidered for architectures with slow FP
76** at all. In the latter case an integer-only port may be the best overall 78** operations or without a hardware FPU. The dual-number mode of LuaJIT
77** solution (if this still meets user demands). 79** addresses this issue. Arithmetic operations are performed on integers
80** as far as possible and overflow checks are added as needed.
81**
82** This implies that narrowing for integer arguments and bit operations
83** should also strip overflow checks, e.g. replace ADDOV with ADD. The
84** original overflow guards are weak and can be eliminated by DCE, if
85** there's no other use.
86**
87** A slight twist is that it's usually beneficial to use overflow-checked
88** integer arithmetics if all inputs are already integers. This is the only
89** change that affects the single-number mode, too.
78*/ 90*/
79 91
80/* Some local macros to save typing. Undef'd at the end. */ 92/* Some local macros to save typing. Undef'd at the end. */
@@ -94,10 +106,10 @@
94** already takes care of eliminating simple redundant conversions like 106** already takes care of eliminating simple redundant conversions like
95** CONV.int.num(CONV.num.int(x)) ==> x. 107** CONV.int.num(CONV.num.int(x)) ==> x.
96** 108**
97** But the surrounding code is FP-heavy and all arithmetic operations are 109** But the surrounding code is FP-heavy and arithmetic operations are
98** performed on FP numbers. Consider a common example such as 'x=t[i+1]', 110** performed on FP numbers (for the single-number mode). Consider a common
99** with 'i' already an integer (due to induction variable narrowing). The 111** example such as 'x=t[i+1]', with 'i' already an integer (due to induction
100** index expression would be recorded as 112** variable narrowing). The index expression would be recorded as
101** CONV.int.num(ADD(CONV.num.int(i), 1)) 113** CONV.int.num(ADD(CONV.num.int(i), 1))
102** which is clearly suboptimal. 114** which is clearly suboptimal.
103** 115**
@@ -113,6 +125,9 @@
113** FP ops remain in the IR and are eliminated by DCE since all references to 125** FP ops remain in the IR and are eliminated by DCE since all references to
114** them are gone. 126** them are gone.
115** 127**
128** [In dual-number mode the trace recorder already emits ADDOV etc., but
129** this can be further reduced. See below.]
130**
116** Special care has to be taken to avoid narrowing across an operation 131** Special care has to be taken to avoid narrowing across an operation
117** which is potentially operating on non-integral operands. One obvious 132** which is potentially operating on non-integral operands. One obvious
118** case is when an expression contains a non-integral constant, but ends 133** case is when an expression contains a non-integral constant, but ends
@@ -221,6 +236,26 @@ static void narrow_bpc_set(jit_State *J, IRRef1 key, IRRef1 val, IRRef mode)
221 bp->mode = mode; 236 bp->mode = mode;
222} 237}
223 238
239/* Backpropagate overflow stripping. */
240static void narrow_stripov_backprop(NarrowConv *nc, IRRef ref, int depth)
241{
242 jit_State *J = nc->J;
243 IRIns *ir = IR(ref);
244 if (ir->o == IR_ADDOV || ir->o == IR_SUBOV ||
245 (ir->o == IR_MULOV && (nc->mode & IRCONV_CONVMASK) == IRCONV_ANY)) {
246 BPropEntry *bp = narrow_bpc_get(nc->J, ref, IRCONV_TOBIT);
247 if (bp) {
248 ref = bp->val;
249 } else if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) {
250 narrow_stripov_backprop(nc, ir->op1, depth);
251 narrow_stripov_backprop(nc, ir->op2, depth);
252 *nc->sp++ = NARROWINS(IRT(ir->o - IR_ADDOV + IR_ADD, IRT_INT), ref);
253 return;
254 }
255 }
256 *nc->sp++ = NARROWINS(NARROW_REF, ref);
257}
258
224/* Backpropagate narrowing conversion. Return number of needed conversions. */ 259/* Backpropagate narrowing conversion. Return number of needed conversions. */
225static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) 260static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
226{ 261{
@@ -230,24 +265,26 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
230 265
231 /* Check the easy cases first. */ 266 /* Check the easy cases first. */
232 if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) { 267 if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) {
233 if (nc->t == IRT_I64) 268 if ((nc->mode & IRCONV_CONVMASK) <= IRCONV_ANY)
234 *nc->sp++ = NARROWINS(NARROW_SEXT, ir->op1); /* Reduce to sign-ext. */ 269 narrow_stripov_backprop(nc, ir->op1, depth+1);
235 else 270 else
236 *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */ 271 *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */
272 if (nc->t == IRT_I64)
273 *nc->sp++ = NARROWINS(NARROW_SEXT, 0); /* Sign-extend integer. */
237 return 0; 274 return 0;
238 } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ 275 } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */
239 lua_Number n = ir_knum(ir)->n; 276 lua_Number n = ir_knum(ir)->n;
240 if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { 277 if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) {
241 /* Allows a wider range of constants. */ 278 /* Allows a wider range of constants. */
242 int64_t k64 = (int64_t)n; 279 int64_t k64 = (int64_t)n;
243 if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */ 280 if (n == (lua_Number)k64) { /* Only if const doesn't lose precision. */
244 *nc->sp++ = NARROWINS(NARROW_INT, 0); 281 *nc->sp++ = NARROWINS(NARROW_INT, 0);
245 *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */ 282 *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */
246 return 0; 283 return 0;
247 } 284 }
248 } else { 285 } else {
249 int32_t k = lj_num2int(n); 286 int32_t k = lj_num2int(n);
250 if (n == cast_num(k)) { /* Only if constant is really an integer. */ 287 if (n == (lua_Number)k) { /* Only if constant is really an integer. */
251 *nc->sp++ = NARROWINS(NARROW_INT, 0); 288 *nc->sp++ = NARROWINS(NARROW_INT, 0);
252 *nc->sp++ = (NarrowIns)k; 289 *nc->sp++ = (NarrowIns)k;
253 return 0; 290 return 0;
@@ -287,7 +324,8 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
287 mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX; 324 mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX;
288 bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); 325 bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode);
289 if (bp) { 326 if (bp) {
290 *nc->sp++ = NARROWINS(NARROW_SEXT, bp->val); 327 *nc->sp++ = NARROWINS(NARROW_REF, bp->val);
328 *nc->sp++ = NARROWINS(NARROW_SEXT, 0);
291 return 0; 329 return 0;
292 } 330 }
293 } 331 }
@@ -326,8 +364,9 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
326 } else if (op == NARROW_CONV) { 364 } else if (op == NARROW_CONV) {
327 *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ 365 *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */
328 } else if (op == NARROW_SEXT) { 366 } else if (op == NARROW_SEXT) {
329 *sp++ = emitir(IRT(IR_CONV, IRT_I64), ref, 367 lua_assert(sp >= nc->stack+1);
330 (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); 368 sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1],
369 (IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
331 } else if (op == NARROW_INT) { 370 } else if (op == NARROW_INT) {
332 lua_assert(next < last); 371 lua_assert(next < last);
333 *sp++ = nc->t == IRT_I64 ? 372 *sp++ = nc->t == IRT_I64 ?
@@ -340,7 +379,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
340 /* Omit some overflow checks for array indexing. See comments above. */ 379 /* Omit some overflow checks for array indexing. See comments above. */
341 if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { 380 if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) {
342 if (next == last && irref_isk(narrow_ref(sp[0])) && 381 if (next == last && irref_isk(narrow_ref(sp[0])) &&
343 (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000 < 0x80000000) 382 (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000u < 0x80000000u)
344 guardot = 0; 383 guardot = 0;
345 else /* Otherwise cache a stronger check. */ 384 else /* Otherwise cache a stronger check. */
346 mode += IRCONV_CHECK-IRCONV_INDEX; 385 mode += IRCONV_CHECK-IRCONV_INDEX;
@@ -377,12 +416,123 @@ TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J)
377 return NEXTFOLD; 416 return NEXTFOLD;
378} 417}
379 418
419/* -- Narrowing of implicit conversions ----------------------------------- */
420
421/* Recursively strip overflow checks. */
422static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode)
423{
424 IRRef ref = tref_ref(tr);
425 IRIns *ir = IR(ref);
426 int op = ir->o;
427 if (op >= IR_ADDOV && op <= lastop) {
428 BPropEntry *bp = narrow_bpc_get(J, ref, mode);
429 if (bp) {
430 return TREF(bp->val, irt_t(IR(bp->val)->t));
431 } else {
432 IRRef op1 = ir->op1, op2 = ir->op2; /* The IR may be reallocated. */
433 op1 = narrow_stripov(J, op1, lastop, mode);
434 op2 = narrow_stripov(J, op2, lastop, mode);
435 tr = emitir(IRT(op - IR_ADDOV + IR_ADD,
436 ((mode & IRCONV_DSTMASK) >> IRCONV_DSH)), op1, op2);
437 narrow_bpc_set(J, ref, tref_ref(tr), mode);
438 }
439 } else if (LJ_64 && (mode & IRCONV_SEXT) && !irt_is64(ir->t)) {
440 tr = emitir(IRT(IR_CONV, IRT_INTP), tr, mode);
441 }
442 return tr;
443}
444
445/* Narrow array index. */
446TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr)
447{
448 IRIns *ir;
449 lua_assert(tref_isnumber(tr));
450 if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
451 return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX);
452 /* Omit some overflow checks for array indexing. See comments above. */
453 ir = IR(tref_ref(tr));
454 if ((ir->o == IR_ADDOV || ir->o == IR_SUBOV) && irref_isk(ir->op2) &&
455 (uint32_t)IR(ir->op2)->i + 0x40000000u < 0x80000000u)
456 return emitir(IRTI(ir->o - IR_ADDOV + IR_ADD), ir->op1, ir->op2);
457 return tr;
458}
459
460/* Narrow conversion to integer operand (overflow undefined). */
461TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr)
462{
463 if (tref_isstr(tr))
464 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
465 if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
466 return emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY);
467 if (!tref_isinteger(tr))
468 lj_trace_err(J, LJ_TRERR_BADTYPE);
469 /*
470 ** Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV.
471 ** Use IRCONV_TOBIT for the cache entries, since the semantics are the same.
472 */
473 return narrow_stripov(J, tr, IR_MULOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT);
474}
475
476/* Narrow conversion to bitop operand (overflow wrapped). */
477TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr)
478{
479 if (tref_isstr(tr))
480 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
481 if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
482 return emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J));
483 if (!tref_isinteger(tr))
484 lj_trace_err(J, LJ_TRERR_BADTYPE);
485 /*
486 ** Wrapped overflow semantics allow stripping of ADDOV and SUBOV.
487 ** MULOV cannot be stripped due to precision widening.
488 */
489 return narrow_stripov(J, tr, IR_SUBOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT);
490}
491
492#if LJ_HASFFI
493/* Narrow C array index (overflow undefined). */
494TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
495{
496 lua_assert(tref_isnumber(tr));
497 if (tref_isnum(tr))
498 return emitir(IRTI(IR_CONV), tr,
499 (IRT_INTP<<5)|IRT_NUM|IRCONV_TRUNC|IRCONV_ANY);
500 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
501 return narrow_stripov(J, tr, IR_MULOV,
502 LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) :
503 ((IRT_INTP<<5)|IRT_INT|IRCONV_TOBIT));
504}
505#endif
506
380/* -- Narrowing of arithmetic operators ----------------------------------- */ 507/* -- Narrowing of arithmetic operators ----------------------------------- */
381 508
382/* Check whether a number fits into an int32_t (-0 is ok, too). */ 509/* Check whether a number fits into an int32_t (-0 is ok, too). */
383static int numisint(lua_Number n) 510static int numisint(lua_Number n)
384{ 511{
385 return (n == cast_num(lj_num2int(n))); 512 return (n == (lua_Number)lj_num2int(n));
513}
514
515/* Narrowing of arithmetic operations. */
516TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
517 TValue *vb, TValue *vc, IROp op)
518{
519 if (tref_isstr(rb)) {
520 rb = emitir(IRTG(IR_STRTO, IRT_NUM), rb, 0);
521 lj_str_tonum(strV(vb), vb);
522 }
523 if (tref_isstr(rc)) {
524 rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
525 lj_str_tonum(strV(vc), vc);
526 }
527 /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */
528 if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) &&
529 tref_isinteger(rb) && tref_isinteger(rc) &&
530 numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc),
531 (int)op - (int)IR_ADD)))
532 return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc);
533 if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT);
534 if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
535 return emitir(IRTN(op), rb, rc);
386} 536}
387 537
388/* Narrowing of modulo operator. */ 538/* Narrowing of modulo operator. */
@@ -409,16 +559,15 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc)
409/* Narrowing of power operator or math.pow. */ 559/* Narrowing of power operator or math.pow. */
410TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) 560TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
411{ 561{
412 lua_Number n;
413 if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc)) 562 if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc))
414 lj_trace_err(J, LJ_TRERR_BADTYPE); 563 lj_trace_err(J, LJ_TRERR_BADTYPE);
415 n = numV(vc);
416 /* Narrowing must be unconditional to preserve (-x)^i semantics. */ 564 /* Narrowing must be unconditional to preserve (-x)^i semantics. */
417 if (numisint(n)) { 565 if (tvisint(vc) || numisint(numV(vc))) {
418 int checkrange = 0; 566 int checkrange = 0;
419 /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ 567 /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */
420 if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { 568 if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {
421 if (!(n >= -65536.0 && n <= 65536.0)) goto split_pow; 569 int32_t k = numberVint(vc);
570 if (!(k >= -65536 && k <= 65536)) goto split_pow;
422 checkrange = 1; 571 checkrange = 1;
423 } 572 }
424 if (!tref_isinteger(rc)) { 573 if (!tref_isinteger(rc)) {
@@ -448,20 +597,28 @@ split_pow:
448 597
449/* -- Predictive narrowing of induction variables ------------------------- */ 598/* -- Predictive narrowing of induction variables ------------------------- */
450 599
600/* Narrow a single runtime value. */
601static int narrow_forl(jit_State *J, cTValue *o)
602{
603 if (tvisint(o)) return 1;
604 if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o));
605 return 0;
606}
607
451/* Narrow the FORL index type by looking at the runtime values. */ 608/* Narrow the FORL index type by looking at the runtime values. */
452IRType lj_opt_narrow_forl(cTValue *forbase) 609IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv)
453{ 610{
454 lua_assert(tvisnum(&forbase[FORL_IDX]) && 611 lua_assert(tvisnumber(&tv[FORL_IDX]) &&
455 tvisnum(&forbase[FORL_STOP]) && 612 tvisnumber(&tv[FORL_STOP]) &&
456 tvisnum(&forbase[FORL_STEP])); 613 tvisnumber(&tv[FORL_STEP]));
457 /* Narrow only if the runtime values of start/stop/step are all integers. */ 614 /* Narrow only if the runtime values of start/stop/step are all integers. */
458 if (numisint(numV(&forbase[FORL_IDX])) && 615 if (narrow_forl(J, &tv[FORL_IDX]) &&
459 numisint(numV(&forbase[FORL_STOP])) && 616 narrow_forl(J, &tv[FORL_STOP]) &&
460 numisint(numV(&forbase[FORL_STEP]))) { 617 narrow_forl(J, &tv[FORL_STEP])) {
461 /* And if the loop index can't possibly overflow. */ 618 /* And if the loop index can't possibly overflow. */
462 lua_Number step = numV(&forbase[FORL_STEP]); 619 lua_Number step = numberVnum(&tv[FORL_STEP]);
463 lua_Number sum = numV(&forbase[FORL_STOP]) + step; 620 lua_Number sum = numberVnum(&tv[FORL_STOP]) + step;
464 if (0 <= step ? sum <= 2147483647.0 : sum >= -2147483648.0) 621 if (0 <= step ? (sum <= 2147483647.0) : (sum >= -2147483648.0))
465 return IRT_INT; 622 return IRT_INT;
466 } 623 }
467 return IRT_NUM; 624 return IRT_NUM;