aboutsummaryrefslogtreecommitdiff
path: root/src/lj_opt_narrow.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_opt_narrow.c')
-rw-r--r--src/lj_opt_narrow.c48
1 files changed, 23 insertions, 25 deletions
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index 34fe6c39..57b19613 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -372,17 +372,17 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
372 } else if (op == NARROW_CONV) { 372 } else if (op == NARROW_CONV) {
373 *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ 373 *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */
374 } else if (op == NARROW_SEXT) { 374 } else if (op == NARROW_SEXT) {
375 lua_assert(sp >= nc->stack+1); 375 lj_assertJ(sp >= nc->stack+1, "stack underflow");
376 sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], 376 sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1],
377 (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); 377 (IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
378 } else if (op == NARROW_INT) { 378 } else if (op == NARROW_INT) {
379 lua_assert(next < last); 379 lj_assertJ(next < last, "missing arg to NARROW_INT");
380 *sp++ = nc->t == IRT_I64 ? 380 *sp++ = nc->t == IRT_I64 ?
381 lj_ir_kint64(J, (int64_t)(int32_t)*next++) : 381 lj_ir_kint64(J, (int64_t)(int32_t)*next++) :
382 lj_ir_kint(J, *next++); 382 lj_ir_kint(J, *next++);
383 } else { /* Regular IROpT. Pops two operands and pushes one result. */ 383 } else { /* Regular IROpT. Pops two operands and pushes one result. */
384 IRRef mode = nc->mode; 384 IRRef mode = nc->mode;
385 lua_assert(sp >= nc->stack+2); 385 lj_assertJ(sp >= nc->stack+2, "stack underflow");
386 sp--; 386 sp--;
387 /* Omit some overflow checks for array indexing. See comments above. */ 387 /* Omit some overflow checks for array indexing. See comments above. */
388 if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { 388 if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) {
@@ -398,7 +398,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
398 narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode); 398 narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode);
399 } 399 }
400 } 400 }
401 lua_assert(sp == nc->stack+1); 401 lj_assertJ(sp == nc->stack+1, "stack misalignment");
402 return nc->stack[0]; 402 return nc->stack[0];
403} 403}
404 404
@@ -452,7 +452,7 @@ static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode)
452TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) 452TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr)
453{ 453{
454 IRIns *ir; 454 IRIns *ir;
455 lua_assert(tref_isnumber(tr)); 455 lj_assertJ(tref_isnumber(tr), "expected number type");
456 if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ 456 if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
457 return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); 457 return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX);
458 /* Omit some overflow checks for array indexing. See comments above. */ 458 /* Omit some overflow checks for array indexing. See comments above. */
@@ -499,7 +499,7 @@ TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr)
499/* Narrow C array index (overflow undefined). */ 499/* Narrow C array index (overflow undefined). */
500TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) 500TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
501{ 501{
502 lua_assert(tref_isnumber(tr)); 502 lj_assertJ(tref_isnumber(tr), "expected number type");
503 if (tref_isnum(tr)) 503 if (tref_isnum(tr))
504 return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY); 504 return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY);
505 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ 505 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
@@ -551,11 +551,16 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc)
551{ 551{
552 rc = conv_str_tonum(J, rc, vc); 552 rc = conv_str_tonum(J, rc, vc);
553 if (tref_isinteger(rc)) { 553 if (tref_isinteger(rc)) {
554 if ((uint32_t)numberVint(vc) != 0x80000000u) 554 uint32_t k = (uint32_t)numberVint(vc);
555 return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); 555 if ((LJ_DUALNUM || k != 0) && k != 0x80000000u) {
556 TRef zero = lj_ir_kint(J, 0);
557 if (!LJ_DUALNUM)
558 emitir(IRTGI(IR_NE), rc, zero);
559 return emitir(IRTGI(IR_SUBOV), zero, rc);
560 }
556 rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); 561 rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
557 } 562 }
558 return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J)); 563 return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG));
559} 564}
560 565
561/* Narrowing of modulo operator. */ 566/* Narrowing of modulo operator. */
@@ -588,10 +593,10 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
588 /* Narrowing must be unconditional to preserve (-x)^i semantics. */ 593 /* Narrowing must be unconditional to preserve (-x)^i semantics. */
589 if (tvisint(vc) || numisint(numV(vc))) { 594 if (tvisint(vc) || numisint(numV(vc))) {
590 int checkrange = 0; 595 int checkrange = 0;
591 /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ 596 /* pow() is faster for bigger exponents. But do this only for (+k)^i. */
592 if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { 597 if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {
593 int32_t k = numberVint(vc); 598 int32_t k = numberVint(vc);
594 if (!(k >= -65536 && k <= 65536)) goto split_pow; 599 if (!(k >= -65536 && k <= 65536)) goto force_pow_num;
595 checkrange = 1; 600 checkrange = 1;
596 } 601 }
597 if (!tref_isinteger(rc)) { 602 if (!tref_isinteger(rc)) {
@@ -602,19 +607,11 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
602 TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); 607 TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
603 emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); 608 emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
604 } 609 }
605 return emitir(IRTN(IR_POW), rb, rc); 610 } else {
611force_pow_num:
612 rc = lj_ir_tonum(J, rc); /* Want POW(num, num), not POW(num, int). */
606 } 613 }
607split_pow: 614 return emitir(IRTN(IR_POW), rb, rc);
608 /* FOLD covers most cases, but some are easier to do here. */
609 if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb)))))
610 return rb; /* 1 ^ x ==> 1 */
611 rc = lj_ir_tonum(J, rc);
612 if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5)
613 return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */
614 /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */
615 rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2);
616 rc = emitir(IRTN(IR_MUL), rb, rc);
617 return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2);
618} 615}
619 616
620/* -- Predictive narrowing of induction variables ------------------------- */ 617/* -- Predictive narrowing of induction variables ------------------------- */
@@ -630,9 +627,10 @@ static int narrow_forl(jit_State *J, cTValue *o)
630/* Narrow the FORL index type by looking at the runtime values. */ 627/* Narrow the FORL index type by looking at the runtime values. */
631IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) 628IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv)
632{ 629{
633 lua_assert(tvisnumber(&tv[FORL_IDX]) && 630 lj_assertJ(tvisnumber(&tv[FORL_IDX]) &&
634 tvisnumber(&tv[FORL_STOP]) && 631 tvisnumber(&tv[FORL_STOP]) &&
635 tvisnumber(&tv[FORL_STEP])); 632 tvisnumber(&tv[FORL_STEP]),
633 "expected number types");
636 /* Narrow only if the runtime values of start/stop/step are all integers. */ 634 /* Narrow only if the runtime values of start/stop/step are all integers. */
637 if (narrow_forl(J, &tv[FORL_IDX]) && 635 if (narrow_forl(J, &tv[FORL_IDX]) &&
638 narrow_forl(J, &tv[FORL_STOP]) && 636 narrow_forl(J, &tv[FORL_STOP]) &&