diff options
Diffstat (limited to 'src/lj_opt_narrow.c')
-rw-r--r-- | src/lj_opt_narrow.c | 48 |
1 files changed, 23 insertions, 25 deletions
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 34fe6c39..57b19613 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c | |||
@@ -372,17 +372,17 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) | |||
372 | } else if (op == NARROW_CONV) { | 372 | } else if (op == NARROW_CONV) { |
373 | *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ | 373 | *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ |
374 | } else if (op == NARROW_SEXT) { | 374 | } else if (op == NARROW_SEXT) { |
375 | lua_assert(sp >= nc->stack+1); | 375 | lj_assertJ(sp >= nc->stack+1, "stack underflow"); |
376 | sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], | 376 | sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], |
377 | (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); | 377 | (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); |
378 | } else if (op == NARROW_INT) { | 378 | } else if (op == NARROW_INT) { |
379 | lua_assert(next < last); | 379 | lj_assertJ(next < last, "missing arg to NARROW_INT"); |
380 | *sp++ = nc->t == IRT_I64 ? | 380 | *sp++ = nc->t == IRT_I64 ? |
381 | lj_ir_kint64(J, (int64_t)(int32_t)*next++) : | 381 | lj_ir_kint64(J, (int64_t)(int32_t)*next++) : |
382 | lj_ir_kint(J, *next++); | 382 | lj_ir_kint(J, *next++); |
383 | } else { /* Regular IROpT. Pops two operands and pushes one result. */ | 383 | } else { /* Regular IROpT. Pops two operands and pushes one result. */ |
384 | IRRef mode = nc->mode; | 384 | IRRef mode = nc->mode; |
385 | lua_assert(sp >= nc->stack+2); | 385 | lj_assertJ(sp >= nc->stack+2, "stack underflow"); |
386 | sp--; | 386 | sp--; |
387 | /* Omit some overflow checks for array indexing. See comments above. */ | 387 | /* Omit some overflow checks for array indexing. See comments above. */ |
388 | if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { | 388 | if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { |
@@ -398,7 +398,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) | |||
398 | narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode); | 398 | narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode); |
399 | } | 399 | } |
400 | } | 400 | } |
401 | lua_assert(sp == nc->stack+1); | 401 | lj_assertJ(sp == nc->stack+1, "stack misalignment"); |
402 | return nc->stack[0]; | 402 | return nc->stack[0]; |
403 | } | 403 | } |
404 | 404 | ||
@@ -452,7 +452,7 @@ static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode) | |||
452 | TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) | 452 | TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) |
453 | { | 453 | { |
454 | IRIns *ir; | 454 | IRIns *ir; |
455 | lua_assert(tref_isnumber(tr)); | 455 | lj_assertJ(tref_isnumber(tr), "expected number type"); |
456 | if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ | 456 | if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ |
457 | return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); | 457 | return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); |
458 | /* Omit some overflow checks for array indexing. See comments above. */ | 458 | /* Omit some overflow checks for array indexing. See comments above. */ |
@@ -499,7 +499,7 @@ TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr) | |||
499 | /* Narrow C array index (overflow undefined). */ | 499 | /* Narrow C array index (overflow undefined). */ |
500 | TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) | 500 | TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) |
501 | { | 501 | { |
502 | lua_assert(tref_isnumber(tr)); | 502 | lj_assertJ(tref_isnumber(tr), "expected number type"); |
503 | if (tref_isnum(tr)) | 503 | if (tref_isnum(tr)) |
504 | return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY); | 504 | return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY); |
505 | /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ | 505 | /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ |
@@ -551,11 +551,16 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc) | |||
551 | { | 551 | { |
552 | rc = conv_str_tonum(J, rc, vc); | 552 | rc = conv_str_tonum(J, rc, vc); |
553 | if (tref_isinteger(rc)) { | 553 | if (tref_isinteger(rc)) { |
554 | if ((uint32_t)numberVint(vc) != 0x80000000u) | 554 | uint32_t k = (uint32_t)numberVint(vc); |
555 | return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); | 555 | if ((LJ_DUALNUM || k != 0) && k != 0x80000000u) { |
556 | TRef zero = lj_ir_kint(J, 0); | ||
557 | if (!LJ_DUALNUM) | ||
558 | emitir(IRTGI(IR_NE), rc, zero); | ||
559 | return emitir(IRTGI(IR_SUBOV), zero, rc); | ||
560 | } | ||
556 | rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); | 561 | rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); |
557 | } | 562 | } |
558 | return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J)); | 563 | return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG)); |
559 | } | 564 | } |
560 | 565 | ||
561 | /* Narrowing of modulo operator. */ | 566 | /* Narrowing of modulo operator. */ |
@@ -588,10 +593,10 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) | |||
588 | /* Narrowing must be unconditional to preserve (-x)^i semantics. */ | 593 | /* Narrowing must be unconditional to preserve (-x)^i semantics. */ |
589 | if (tvisint(vc) || numisint(numV(vc))) { | 594 | if (tvisint(vc) || numisint(numV(vc))) { |
590 | int checkrange = 0; | 595 | int checkrange = 0; |
591 | /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ | 596 | /* pow() is faster for bigger exponents. But do this only for (+k)^i. */ |
592 | if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { | 597 | if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { |
593 | int32_t k = numberVint(vc); | 598 | int32_t k = numberVint(vc); |
594 | if (!(k >= -65536 && k <= 65536)) goto split_pow; | 599 | if (!(k >= -65536 && k <= 65536)) goto force_pow_num; |
595 | checkrange = 1; | 600 | checkrange = 1; |
596 | } | 601 | } |
597 | if (!tref_isinteger(rc)) { | 602 | if (!tref_isinteger(rc)) { |
@@ -602,19 +607,11 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) | |||
602 | TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); | 607 | TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); |
603 | emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); | 608 | emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); |
604 | } | 609 | } |
605 | return emitir(IRTN(IR_POW), rb, rc); | 610 | } else { |
611 | force_pow_num: | ||
612 | rc = lj_ir_tonum(J, rc); /* Want POW(num, num), not POW(num, int). */ | ||
606 | } | 613 | } |
607 | split_pow: | 614 | return emitir(IRTN(IR_POW), rb, rc); |
608 | /* FOLD covers most cases, but some are easier to do here. */ | ||
609 | if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb))))) | ||
610 | return rb; /* 1 ^ x ==> 1 */ | ||
611 | rc = lj_ir_tonum(J, rc); | ||
612 | if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5) | ||
613 | return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */ | ||
614 | /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */ | ||
615 | rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2); | ||
616 | rc = emitir(IRTN(IR_MUL), rb, rc); | ||
617 | return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2); | ||
618 | } | 615 | } |
619 | 616 | ||
620 | /* -- Predictive narrowing of induction variables ------------------------- */ | 617 | /* -- Predictive narrowing of induction variables ------------------------- */ |
@@ -630,9 +627,10 @@ static int narrow_forl(jit_State *J, cTValue *o) | |||
630 | /* Narrow the FORL index type by looking at the runtime values. */ | 627 | /* Narrow the FORL index type by looking at the runtime values. */ |
631 | IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) | 628 | IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) |
632 | { | 629 | { |
633 | lua_assert(tvisnumber(&tv[FORL_IDX]) && | 630 | lj_assertJ(tvisnumber(&tv[FORL_IDX]) && |
634 | tvisnumber(&tv[FORL_STOP]) && | 631 | tvisnumber(&tv[FORL_STOP]) && |
635 | tvisnumber(&tv[FORL_STEP])); | 632 | tvisnumber(&tv[FORL_STEP]), |
633 | "expected number types"); | ||
636 | /* Narrow only if the runtime values of start/stop/step are all integers. */ | 634 | /* Narrow only if the runtime values of start/stop/step are all integers. */ |
637 | if (narrow_forl(J, &tv[FORL_IDX]) && | 635 | if (narrow_forl(J, &tv[FORL_IDX]) && |
638 | narrow_forl(J, &tv[FORL_STOP]) && | 636 | narrow_forl(J, &tv[FORL_STOP]) && |