diff options
Diffstat (limited to 'src/lj_opt_narrow.c')
-rw-r--r-- | src/lj_opt_narrow.c | 66 |
1 files changed, 17 insertions, 49 deletions
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 700c23d4..1172df2b 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c | |||
@@ -372,17 +372,17 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) | |||
372 | } else if (op == NARROW_CONV) { | 372 | } else if (op == NARROW_CONV) { |
373 | *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ | 373 | *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ |
374 | } else if (op == NARROW_SEXT) { | 374 | } else if (op == NARROW_SEXT) { |
375 | lua_assert(sp >= nc->stack+1); | 375 | lj_assertJ(sp >= nc->stack+1, "stack underflow"); |
376 | sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], | 376 | sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], |
377 | (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); | 377 | (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); |
378 | } else if (op == NARROW_INT) { | 378 | } else if (op == NARROW_INT) { |
379 | lua_assert(next < last); | 379 | lj_assertJ(next < last, "missing arg to NARROW_INT"); |
380 | *sp++ = nc->t == IRT_I64 ? | 380 | *sp++ = nc->t == IRT_I64 ? |
381 | lj_ir_kint64(J, (int64_t)(int32_t)*next++) : | 381 | lj_ir_kint64(J, (int64_t)(int32_t)*next++) : |
382 | lj_ir_kint(J, *next++); | 382 | lj_ir_kint(J, *next++); |
383 | } else { /* Regular IROpT. Pops two operands and pushes one result. */ | 383 | } else { /* Regular IROpT. Pops two operands and pushes one result. */ |
384 | IRRef mode = nc->mode; | 384 | IRRef mode = nc->mode; |
385 | lua_assert(sp >= nc->stack+2); | 385 | lj_assertJ(sp >= nc->stack+2, "stack underflow"); |
386 | sp--; | 386 | sp--; |
387 | /* Omit some overflow checks for array indexing. See comments above. */ | 387 | /* Omit some overflow checks for array indexing. See comments above. */ |
388 | if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { | 388 | if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { |
@@ -398,7 +398,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) | |||
398 | narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode); | 398 | narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode); |
399 | } | 399 | } |
400 | } | 400 | } |
401 | lua_assert(sp == nc->stack+1); | 401 | lj_assertJ(sp == nc->stack+1, "stack misalignment"); |
402 | return nc->stack[0]; | 402 | return nc->stack[0]; |
403 | } | 403 | } |
404 | 404 | ||
@@ -452,7 +452,7 @@ static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode) | |||
452 | TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) | 452 | TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) |
453 | { | 453 | { |
454 | IRIns *ir; | 454 | IRIns *ir; |
455 | lua_assert(tref_isnumber(tr)); | 455 | lj_assertJ(tref_isnumber(tr), "expected number type"); |
456 | if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ | 456 | if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ |
457 | return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); | 457 | return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); |
458 | /* Omit some overflow checks for array indexing. See comments above. */ | 458 | /* Omit some overflow checks for array indexing. See comments above. */ |
@@ -499,7 +499,7 @@ TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr) | |||
499 | /* Narrow C array index (overflow undefined). */ | 499 | /* Narrow C array index (overflow undefined). */ |
500 | TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) | 500 | TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) |
501 | { | 501 | { |
502 | lua_assert(tref_isnumber(tr)); | 502 | lj_assertJ(tref_isnumber(tr), "expected number type"); |
503 | if (tref_isnum(tr)) | 503 | if (tref_isnum(tr)) |
504 | return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY); | 504 | return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY); |
505 | /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ | 505 | /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ |
@@ -551,11 +551,16 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc) | |||
551 | { | 551 | { |
552 | rc = conv_str_tonum(J, rc, vc); | 552 | rc = conv_str_tonum(J, rc, vc); |
553 | if (tref_isinteger(rc)) { | 553 | if (tref_isinteger(rc)) { |
554 | if ((uint32_t)numberVint(vc) != 0x80000000u) | 554 | uint32_t k = (uint32_t)numberVint(vc); |
555 | return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); | 555 | if ((LJ_DUALNUM || k != 0) && k != 0x80000000u) { |
556 | TRef zero = lj_ir_kint(J, 0); | ||
557 | if (!LJ_DUALNUM) | ||
558 | emitir(IRTGI(IR_NE), rc, zero); | ||
559 | return emitir(IRTGI(IR_SUBOV), zero, rc); | ||
560 | } | ||
556 | rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); | 561 | rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); |
557 | } | 562 | } |
558 | return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J)); | 563 | return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG)); |
559 | } | 564 | } |
560 | 565 | ||
561 | /* Narrowing of modulo operator. */ | 566 | /* Narrowing of modulo operator. */ |
@@ -579,44 +584,6 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) | |||
579 | return emitir(IRTN(IR_SUB), rb, tmp); | 584 | return emitir(IRTN(IR_SUB), rb, tmp); |
580 | } | 585 | } |
581 | 586 | ||
582 | /* Narrowing of power operator or math.pow. */ | ||
583 | TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) | ||
584 | { | ||
585 | rb = conv_str_tonum(J, rb, vb); | ||
586 | rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */ | ||
587 | rc = conv_str_tonum(J, rc, vc); | ||
588 | /* Narrowing must be unconditional to preserve (-x)^i semantics. */ | ||
589 | if (tvisint(vc) || numisint(numV(vc))) { | ||
590 | int checkrange = 0; | ||
591 | /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ | ||
592 | if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { | ||
593 | int32_t k = numberVint(vc); | ||
594 | if (!(k >= -65536 && k <= 65536)) goto split_pow; | ||
595 | checkrange = 1; | ||
596 | } | ||
597 | if (!tref_isinteger(rc)) { | ||
598 | /* Guarded conversion to integer! */ | ||
599 | rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK); | ||
600 | } | ||
601 | if (checkrange && !tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */ | ||
602 | TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); | ||
603 | emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); | ||
604 | } | ||
605 | return emitir(IRTN(IR_POW), rb, rc); | ||
606 | } | ||
607 | split_pow: | ||
608 | /* FOLD covers most cases, but some are easier to do here. */ | ||
609 | if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb))))) | ||
610 | return rb; /* 1 ^ x ==> 1 */ | ||
611 | rc = lj_ir_tonum(J, rc); | ||
612 | if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5) | ||
613 | return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */ | ||
614 | /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */ | ||
615 | rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2); | ||
616 | rc = emitir(IRTN(IR_MUL), rb, rc); | ||
617 | return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2); | ||
618 | } | ||
619 | |||
620 | /* -- Predictive narrowing of induction variables ------------------------- */ | 587 | /* -- Predictive narrowing of induction variables ------------------------- */ |
621 | 588 | ||
622 | /* Narrow a single runtime value. */ | 589 | /* Narrow a single runtime value. */ |
@@ -630,9 +597,10 @@ static int narrow_forl(jit_State *J, cTValue *o) | |||
630 | /* Narrow the FORL index type by looking at the runtime values. */ | 597 | /* Narrow the FORL index type by looking at the runtime values. */ |
631 | IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) | 598 | IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) |
632 | { | 599 | { |
633 | lua_assert(tvisnumber(&tv[FORL_IDX]) && | 600 | lj_assertJ(tvisnumber(&tv[FORL_IDX]) && |
634 | tvisnumber(&tv[FORL_STOP]) && | 601 | tvisnumber(&tv[FORL_STOP]) && |
635 | tvisnumber(&tv[FORL_STEP])); | 602 | tvisnumber(&tv[FORL_STEP]), |
603 | "expected number types"); | ||
636 | /* Narrow only if the runtime values of start/stop/step are all integers. */ | 604 | /* Narrow only if the runtime values of start/stop/step are all integers. */ |
637 | if (narrow_forl(J, &tv[FORL_IDX]) && | 605 | if (narrow_forl(J, &tv[FORL_IDX]) && |
638 | narrow_forl(J, &tv[FORL_STOP]) && | 606 | narrow_forl(J, &tv[FORL_STOP]) && |