aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2020-05-23 21:33:01 +0200
committerMike Pall <mike>2020-05-23 21:33:01 +0200
commitb2307c8ad817e350d65cc909a579ca2f77439682 (patch)
tree4984f3c3972d768220b7263eb5eb139d6049cfcb
parent5655be4546d9177890c69f0d0accac4773ff0887 (diff)
downloadluajit-b2307c8ad817e350d65cc909a579ca2f77439682.tar.gz
luajit-b2307c8ad817e350d65cc909a579ca2f77439682.tar.bz2
luajit-b2307c8ad817e350d65cc909a579ca2f77439682.zip
Remove pow() splitting and cleanup backends.
-rw-r--r--src/lj_arch.h3
-rw-r--r--src/lj_asm.c106
-rw-r--r--src/lj_asm_arm.h10
-rw-r--r--src/lj_asm_arm64.h39
-rw-r--r--src/lj_asm_mips.h38
-rw-r--r--src/lj_asm_ppc.h9
-rw-r--r--src/lj_asm_x86.h37
-rw-r--r--src/lj_ir.h2
-rw-r--r--src/lj_ircall.h1
-rw-r--r--src/lj_opt_fold.c18
-rw-r--r--src/lj_opt_narrow.c20
-rw-r--r--src/lj_opt_split.c21
-rw-r--r--src/lj_vm.h5
-rw-r--r--src/lj_vmmath.c8
14 files changed, 95 insertions, 222 deletions
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 70426838..d65bc551 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -586,9 +586,6 @@
586#if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS 586#if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
587#define LUAJIT_NO_LOG2 587#define LUAJIT_NO_LOG2
588#endif 588#endif
589#if defined(__symbian__) || LJ_TARGET_WINDOWS
590#define LUAJIT_NO_EXP2
591#endif
592#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) 589#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0)
593#define LJ_NO_SYSTEM 1 590#define LJ_NO_SYSTEM 1
594#endif 591#endif
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 20d63731..dd84a4f2 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1308,32 +1308,6 @@ static void asm_call(ASMState *as, IRIns *ir)
1308 asm_gencall(as, ci, args); 1308 asm_gencall(as, ci, args);
1309} 1309}
1310 1310
1311#if !LJ_SOFTFP32
1312static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
1313{
1314 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1315 IRRef args[2];
1316 args[0] = lref;
1317 args[1] = rref;
1318 asm_setupresult(as, ir, ci);
1319 asm_gencall(as, ci, args);
1320}
1321
1322static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
1323{
1324 IRIns *irp = IR(ir->op1);
1325 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1326 IRIns *irpp = IR(irp->op1);
1327 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1328 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1329 asm_fppow(as, ir, irpp->op1, irp->op2);
1330 return 1;
1331 }
1332 }
1333 return 0;
1334}
1335#endif
1336
1337/* -- PHI and loop handling ----------------------------------------------- */ 1311/* -- PHI and loop handling ----------------------------------------------- */
1338 1312
1339/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1313/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1604,6 +1578,62 @@ static void asm_loop(ASMState *as)
1604#error "Missing assembler for target CPU" 1578#error "Missing assembler for target CPU"
1605#endif 1579#endif
1606 1580
1581/* -- Common instruction helpers ------------------------------------------ */
1582
1583#if !LJ_SOFTFP32
1584#if !LJ_TARGET_X86ORX64
1585#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1586#define asm_fppowi(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1587#endif
1588
1589static void asm_pow(ASMState *as, IRIns *ir)
1590{
1591#if LJ_64 && LJ_HASFFI
1592 if (!irt_isnum(ir->t))
1593 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1594 IRCALL_lj_carith_powu64);
1595 else
1596#endif
1597 if (irt_isnum(IR(ir->op2)->t))
1598 asm_callid(as, ir, IRCALL_pow);
1599 else
1600 asm_fppowi(as, ir);
1601}
1602
1603static void asm_div(ASMState *as, IRIns *ir)
1604{
1605#if LJ_64 && LJ_HASFFI
1606 if (!irt_isnum(ir->t))
1607 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1608 IRCALL_lj_carith_divu64);
1609 else
1610#endif
1611 asm_fpdiv(as, ir);
1612}
1613#endif
1614
1615static void asm_mod(ASMState *as, IRIns *ir)
1616{
1617#if LJ_64 && LJ_HASFFI
1618 if (!irt_isint(ir->t))
1619 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1620 IRCALL_lj_carith_modu64);
1621 else
1622#endif
1623 asm_callid(as, ir, IRCALL_lj_vm_modi);
1624}
1625
1626static void asm_fuseequal(ASMState *as, IRIns *ir)
1627{
1628 /* Fuse HREF + EQ/NE. */
1629 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1630 as->curins--;
1631 asm_href(as, ir-1, (IROp)ir->o);
1632 } else {
1633 asm_equal(as, ir);
1634 }
1635}
1636
1607/* -- Instruction dispatch ------------------------------------------------ */ 1637/* -- Instruction dispatch ------------------------------------------------ */
1608 1638
1609/* Assemble a single instruction. */ 1639/* Assemble a single instruction. */
@@ -1626,14 +1656,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
1626 case IR_ABC: 1656 case IR_ABC:
1627 asm_comp(as, ir); 1657 asm_comp(as, ir);
1628 break; 1658 break;
1629 case IR_EQ: case IR_NE: 1659 case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break;
1630 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1631 as->curins--;
1632 asm_href(as, ir-1, (IROp)ir->o);
1633 } else {
1634 asm_equal(as, ir);
1635 }
1636 break;
1637 1660
1638 case IR_RETF: asm_retf(as, ir); break; 1661 case IR_RETF: asm_retf(as, ir); break;
1639 1662
@@ -1702,7 +1725,13 @@ static void asm_ir(ASMState *as, IRIns *ir)
1702 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; 1725 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1703 case IR_TNEW: asm_tnew(as, ir); break; 1726 case IR_TNEW: asm_tnew(as, ir); break;
1704 case IR_TDUP: asm_tdup(as, ir); break; 1727 case IR_TDUP: asm_tdup(as, ir); break;
1705 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; 1728 case IR_CNEW: case IR_CNEWI:
1729#if LJ_HASFFI
1730 asm_cnew(as, ir);
1731#else
1732 lua_assert(0);
1733#endif
1734 break;
1706 1735
1707 /* Buffer operations. */ 1736 /* Buffer operations. */
1708 case IR_BUFHDR: asm_bufhdr(as, ir); break; 1737 case IR_BUFHDR: asm_bufhdr(as, ir); break;
@@ -2167,6 +2196,10 @@ static void asm_setup_regsp(ASMState *as)
2167 if (inloop) 2196 if (inloop)
2168 as->modset |= RSET_SCRATCH; 2197 as->modset |= RSET_SCRATCH;
2169#if LJ_TARGET_X86 2198#if LJ_TARGET_X86
2199 if (irt_isnum(IR(ir->op2)->t)) {
2200 if (as->evenspill < 4) /* Leave room to call pow(). */
2201 as->evenspill = 4;
2202 }
2170 break; 2203 break;
2171#else 2204#else
2172 ir->prev = REGSP_HINT(RID_FPRET); 2205 ir->prev = REGSP_HINT(RID_FPRET);
@@ -2192,9 +2225,6 @@ static void asm_setup_regsp(ASMState *as)
2192 continue; 2225 continue;
2193 } 2226 }
2194 break; 2227 break;
2195 } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
2196 if (as->evenspill < 4) /* Leave room to call pow(). */
2197 as->evenspill = 4;
2198 } 2228 }
2199#endif 2229#endif
2200 if (inloop) 2230 if (inloop)
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index ccb8ccb6..f922ed0f 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -1268,8 +1268,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1268 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1268 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1269 ra_releasetmp(as, ASMREF_TMP1)); 1269 ra_releasetmp(as, ASMREF_TMP1));
1270} 1270}
1271#else
1272#define asm_cnew(as, ir) ((void)0)
1273#endif 1271#endif
1274 1272
1275/* -- Write barriers ------------------------------------------------------ */ 1273/* -- Write barriers ------------------------------------------------------ */
@@ -1364,8 +1362,6 @@ static void asm_callround(ASMState *as, IRIns *ir, int id)
1364 1362
1365static void asm_fpmath(ASMState *as, IRIns *ir) 1363static void asm_fpmath(ASMState *as, IRIns *ir)
1366{ 1364{
1367 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1368 return;
1369 if (ir->op2 <= IRFPM_TRUNC) 1365 if (ir->op2 <= IRFPM_TRUNC)
1370 asm_callround(as, ir, ir->op2); 1366 asm_callround(as, ir, ir->op2);
1371 else if (ir->op2 == IRFPM_SQRT) 1367 else if (ir->op2 == IRFPM_SQRT)
@@ -1507,14 +1503,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
1507#define asm_mulov(as, ir) asm_mul(as, ir) 1503#define asm_mulov(as, ir) asm_mul(as, ir)
1508 1504
1509#if !LJ_SOFTFP 1505#if !LJ_SOFTFP
1510#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) 1506#define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
1511#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1512#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) 1507#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
1513#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1514#endif 1508#endif
1515 1509
1516#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1517
1518static void asm_neg(ASMState *as, IRIns *ir) 1510static void asm_neg(ASMState *as, IRIns *ir)
1519{ 1511{
1520#if !LJ_SOFTFP 1512#if !LJ_SOFTFP
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index da857355..a3502223 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -1242,8 +1242,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1242 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1242 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1243 ra_releasetmp(as, ASMREF_TMP1)); 1243 ra_releasetmp(as, ASMREF_TMP1));
1244} 1244}
1245#else
1246#define asm_cnew(as, ir) ((void)0)
1247#endif 1245#endif
1248 1246
1249/* -- Write barriers ------------------------------------------------------ */ 1247/* -- Write barriers ------------------------------------------------------ */
@@ -1320,8 +1318,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1320 } else if (fpm <= IRFPM_TRUNC) { 1318 } else if (fpm <= IRFPM_TRUNC) {
1321 asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd : 1319 asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd :
1322 fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd); 1320 fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd);
1323 } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
1324 return;
1325 } else { 1321 } else {
1326 asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); 1322 asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
1327 } 1323 }
@@ -1428,45 +1424,12 @@ static void asm_mul(ASMState *as, IRIns *ir)
1428 asm_intmul(as, ir); 1424 asm_intmul(as, ir);
1429} 1425}
1430 1426
1431static void asm_div(ASMState *as, IRIns *ir)
1432{
1433#if LJ_HASFFI
1434 if (!irt_isnum(ir->t))
1435 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1436 IRCALL_lj_carith_divu64);
1437 else
1438#endif
1439 asm_fparith(as, ir, A64I_FDIVd);
1440}
1441
1442static void asm_pow(ASMState *as, IRIns *ir)
1443{
1444#if LJ_HASFFI
1445 if (!irt_isnum(ir->t))
1446 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1447 IRCALL_lj_carith_powu64);
1448 else
1449#endif
1450 asm_callid(as, ir, IRCALL_lj_vm_powi);
1451}
1452
1453#define asm_addov(as, ir) asm_add(as, ir) 1427#define asm_addov(as, ir) asm_add(as, ir)
1454#define asm_subov(as, ir) asm_sub(as, ir) 1428#define asm_subov(as, ir) asm_sub(as, ir)
1455#define asm_mulov(as, ir) asm_mul(as, ir) 1429#define asm_mulov(as, ir) asm_mul(as, ir)
1456 1430
1431#define asm_fpdiv(as, ir) asm_fparith(as, ir, A64I_FDIVd)
1457#define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS) 1432#define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS)
1458#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1459
1460static void asm_mod(ASMState *as, IRIns *ir)
1461{
1462#if LJ_HASFFI
1463 if (!irt_isint(ir->t))
1464 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1465 IRCALL_lj_carith_modu64);
1466 else
1467#endif
1468 asm_callid(as, ir, IRCALL_lj_vm_modi);
1469}
1470 1433
1471static void asm_neg(ASMState *as, IRIns *ir) 1434static void asm_neg(ASMState *as, IRIns *ir)
1472{ 1435{
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index 8b5efc35..6d898c5f 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -1607,8 +1607,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1607 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1607 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1608 ra_releasetmp(as, ASMREF_TMP1)); 1608 ra_releasetmp(as, ASMREF_TMP1));
1609} 1609}
1610#else
1611#define asm_cnew(as, ir) ((void)0)
1612#endif 1610#endif
1613 1611
1614/* -- Write barriers ------------------------------------------------------ */ 1612/* -- Write barriers ------------------------------------------------------ */
@@ -1677,8 +1675,6 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
1677#if !LJ_SOFTFP32 1675#if !LJ_SOFTFP32
1678static void asm_fpmath(ASMState *as, IRIns *ir) 1676static void asm_fpmath(ASMState *as, IRIns *ir)
1679{ 1677{
1680 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1681 return;
1682#if !LJ_SOFTFP 1678#if !LJ_SOFTFP
1683 if (ir->op2 <= IRFPM_TRUNC) 1679 if (ir->op2 <= IRFPM_TRUNC)
1684 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); 1680 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
@@ -1766,41 +1762,13 @@ static void asm_mul(ASMState *as, IRIns *ir)
1766 } 1762 }
1767} 1763}
1768 1764
1769static void asm_mod(ASMState *as, IRIns *ir)
1770{
1771#if LJ_64 && LJ_HASFFI
1772 if (!irt_isint(ir->t))
1773 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1774 IRCALL_lj_carith_modu64);
1775 else
1776#endif
1777 asm_callid(as, ir, IRCALL_lj_vm_modi);
1778}
1779
1780#if !LJ_SOFTFP32 1765#if !LJ_SOFTFP32
1781static void asm_pow(ASMState *as, IRIns *ir) 1766static void asm_fpdiv(ASMState *as, IRIns *ir)
1782{
1783#if LJ_64 && LJ_HASFFI
1784 if (!irt_isnum(ir->t))
1785 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1786 IRCALL_lj_carith_powu64);
1787 else
1788#endif
1789 asm_callid(as, ir, IRCALL_lj_vm_powi);
1790}
1791
1792static void asm_div(ASMState *as, IRIns *ir)
1793{ 1767{
1794#if LJ_64 && LJ_HASFFI
1795 if (!irt_isnum(ir->t))
1796 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1797 IRCALL_lj_carith_divu64);
1798 else
1799#endif
1800#if !LJ_SOFTFP 1768#if !LJ_SOFTFP
1801 asm_fparith(as, ir, MIPSI_DIV_D); 1769 asm_fparith(as, ir, MIPSI_DIV_D);
1802#else 1770#else
1803 asm_callid(as, ir, IRCALL_softfp_div); 1771 asm_callid(as, ir, IRCALL_softfp_div);
1804#endif 1772#endif
1805} 1773}
1806#endif 1774#endif
@@ -1838,8 +1806,6 @@ static void asm_abs(ASMState *as, IRIns *ir)
1838} 1806}
1839#endif 1807#endif
1840 1808
1841#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1842
1843static void asm_arithov(ASMState *as, IRIns *ir) 1809static void asm_arithov(ASMState *as, IRIns *ir)
1844{ 1810{
1845 /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */ 1811 /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index d9e4ad04..c15b89fe 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -1174,8 +1174,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1174 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1174 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1175 ra_releasetmp(as, ASMREF_TMP1)); 1175 ra_releasetmp(as, ASMREF_TMP1));
1176} 1176}
1177#else
1178#define asm_cnew(as, ir) ((void)0)
1179#endif 1177#endif
1180 1178
1181/* -- Write barriers ------------------------------------------------------ */ 1179/* -- Write barriers ------------------------------------------------------ */
@@ -1246,8 +1244,6 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
1246 1244
1247static void asm_fpmath(ASMState *as, IRIns *ir) 1245static void asm_fpmath(ASMState *as, IRIns *ir)
1248{ 1246{
1249 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1250 return;
1251 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) 1247 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
1252 asm_fpunary(as, ir, PPCI_FSQRT); 1248 asm_fpunary(as, ir, PPCI_FSQRT);
1253 else 1249 else
@@ -1361,9 +1357,7 @@ static void asm_mul(ASMState *as, IRIns *ir)
1361 } 1357 }
1362} 1358}
1363 1359
1364#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV) 1360#define asm_fpdiv(as, ir) asm_fparith(as, ir, PPCI_FDIV)
1365#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1366#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1367 1361
1368static void asm_neg(ASMState *as, IRIns *ir) 1362static void asm_neg(ASMState *as, IRIns *ir)
1369{ 1363{
@@ -1387,7 +1381,6 @@ static void asm_neg(ASMState *as, IRIns *ir)
1387} 1381}
1388 1382
1389#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) 1383#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS)
1390#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1391 1384
1392static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) 1385static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1393{ 1386{
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index d5cd6326..7356a5f0 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -1843,8 +1843,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1843 asm_gencall(as, ci, args); 1843 asm_gencall(as, ci, args);
1844 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); 1844 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
1845} 1845}
1846#else
1847#define asm_cnew(as, ir) ((void)0)
1848#endif 1846#endif
1849 1847
1850/* -- Write barriers ------------------------------------------------------ */ 1848/* -- Write barriers ------------------------------------------------------ */
@@ -1950,8 +1948,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1950 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); 1948 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
1951 ra_left(as, RID_XMM0, ir->op1); 1949 ra_left(as, RID_XMM0, ir->op1);
1952 } 1950 }
1953 } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
1954 /* Rejoined to pow(). */
1955 } else { 1951 } else {
1956 asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); 1952 asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
1957 } 1953 }
@@ -1986,17 +1982,6 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
1986 ra_left(as, RID_EAX, ir->op2); 1982 ra_left(as, RID_EAX, ir->op2);
1987} 1983}
1988 1984
1989static void asm_pow(ASMState *as, IRIns *ir)
1990{
1991#if LJ_64 && LJ_HASFFI
1992 if (!irt_isnum(ir->t))
1993 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1994 IRCALL_lj_carith_powu64);
1995 else
1996#endif
1997 asm_fppowi(as, ir);
1998}
1999
2000static int asm_swapops(ASMState *as, IRIns *ir) 1985static int asm_swapops(ASMState *as, IRIns *ir)
2001{ 1986{
2002 IRIns *irl = IR(ir->op1); 1987 IRIns *irl = IR(ir->op1);
@@ -2193,27 +2178,7 @@ static void asm_mul(ASMState *as, IRIns *ir)
2193 asm_intarith(as, ir, XOg_X_IMUL); 2178 asm_intarith(as, ir, XOg_X_IMUL);
2194} 2179}
2195 2180
2196static void asm_div(ASMState *as, IRIns *ir) 2181#define asm_fpdiv(as, ir) asm_fparith(as, ir, XO_DIVSD)
2197{
2198#if LJ_64 && LJ_HASFFI
2199 if (!irt_isnum(ir->t))
2200 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
2201 IRCALL_lj_carith_divu64);
2202 else
2203#endif
2204 asm_fparith(as, ir, XO_DIVSD);
2205}
2206
2207static void asm_mod(ASMState *as, IRIns *ir)
2208{
2209#if LJ_64 && LJ_HASFFI
2210 if (!irt_isint(ir->t))
2211 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
2212 IRCALL_lj_carith_modu64);
2213 else
2214#endif
2215 asm_callid(as, ir, IRCALL_lj_vm_modi);
2216}
2217 2182
2218static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) 2183static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
2219{ 2184{
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 60e335c2..1a9a89a3 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -177,7 +177,7 @@ LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE);
177/* FPMATH sub-functions. ORDER FPM. */ 177/* FPMATH sub-functions. ORDER FPM. */
178#define IRFPMDEF(_) \ 178#define IRFPMDEF(_) \
179 _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ 179 _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \
180 _(SQRT) _(EXP2) _(LOG) _(LOG2) \ 180 _(SQRT) _(LOG) _(LOG2) \
181 _(OTHER) 181 _(OTHER)
182 182
183typedef enum { 183typedef enum {
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 35c02dc0..5c72478b 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -192,7 +192,6 @@ typedef struct CCallInfo {
192 _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \ 192 _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \
193 _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \ 193 _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \
194 _(FPMATH, sqrt, 1, N, NUM, XA_FP) \ 194 _(FPMATH, sqrt, 1, N, NUM, XA_FP) \
195 _(ANY, lj_vm_exp2, 1, N, NUM, XA_FP) \
196 _(ANY, log, 1, N, NUM, XA_FP) \ 195 _(ANY, log, 1, N, NUM, XA_FP) \
197 _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \ 196 _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \
198 _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \ 197 _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index ae65e15a..7a02c6ff 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -237,10 +237,11 @@ LJFOLDF(kfold_fpcall2)
237} 237}
238 238
239LJFOLD(POW KNUM KINT) 239LJFOLD(POW KNUM KINT)
240LJFOLD(POW KNUM KNUM)
240LJFOLDF(kfold_numpow) 241LJFOLDF(kfold_numpow)
241{ 242{
242 lua_Number a = knumleft; 243 lua_Number a = knumleft;
243 lua_Number b = (lua_Number)fright->i; 244 lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright;
244 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD); 245 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
245 return lj_ir_knum(J, y); 246 return lj_ir_knum(J, y);
246} 247}
@@ -1077,7 +1078,7 @@ LJFOLDF(simplify_nummuldiv_negneg)
1077} 1078}
1078 1079
1079LJFOLD(POW any KINT) 1080LJFOLD(POW any KINT)
1080LJFOLDF(simplify_numpow_xk) 1081LJFOLDF(simplify_numpow_xkint)
1081{ 1082{
1082 int32_t k = fright->i; 1083 int32_t k = fright->i;
1083 TRef ref = fins->op1; 1084 TRef ref = fins->op1;
@@ -1106,13 +1107,22 @@ LJFOLDF(simplify_numpow_xk)
1106 return ref; 1107 return ref;
1107} 1108}
1108 1109
1110LJFOLD(POW any KNUM)
1111LJFOLDF(simplify_numpow_xknum)
1112{
1113 if (knumright == 0.5) /* x ^ 0.5 ==> sqrt(x) */
1114 return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT);
1115 return NEXTFOLD;
1116}
1117
1109LJFOLD(POW KNUM any) 1118LJFOLD(POW KNUM any)
1110LJFOLDF(simplify_numpow_kx) 1119LJFOLDF(simplify_numpow_kx)
1111{ 1120{
1112 lua_Number n = knumleft; 1121 lua_Number n = knumleft;
1113 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */ 1122 if (n == 2.0 && irt_isint(fright->t)) { /* 2.0 ^ i ==> ldexp(1.0, i) */
1114 fins->o = IR_CONV;
1115#if LJ_TARGET_X86ORX64 1123#if LJ_TARGET_X86ORX64
1124 /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */
1125 fins->o = IR_CONV;
1116 fins->op1 = fins->op2; 1126 fins->op1 = fins->op2;
1117 fins->op2 = IRCONV_NUM_INT; 1127 fins->op2 = IRCONV_NUM_INT;
1118 fins->op2 = (IRRef1)lj_opt_fold(J); 1128 fins->op2 = (IRRef1)lj_opt_fold(J);
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index ba425334..94cce582 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -593,10 +593,10 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
593 /* Narrowing must be unconditional to preserve (-x)^i semantics. */ 593 /* Narrowing must be unconditional to preserve (-x)^i semantics. */
594 if (tvisint(vc) || numisint(numV(vc))) { 594 if (tvisint(vc) || numisint(numV(vc))) {
595 int checkrange = 0; 595 int checkrange = 0;
596 /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ 596 /* pow() is faster for bigger exponents. But do this only for (+k)^i. */
597 if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { 597 if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {
598 int32_t k = numberVint(vc); 598 int32_t k = numberVint(vc);
599 if (!(k >= -65536 && k <= 65536)) goto split_pow; 599 if (!(k >= -65536 && k <= 65536)) goto force_pow_num;
600 checkrange = 1; 600 checkrange = 1;
601 } 601 }
602 if (!tref_isinteger(rc)) { 602 if (!tref_isinteger(rc)) {
@@ -607,19 +607,11 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
607 TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); 607 TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
608 emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); 608 emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
609 } 609 }
610 return emitir(IRTN(IR_POW), rb, rc); 610 } else {
611force_pow_num:
612 rc = lj_ir_tonum(J, rc); /* Want POW(num, num), not POW(num, int). */
611 } 613 }
612split_pow: 614 return emitir(IRTN(IR_POW), rb, rc);
613 /* FOLD covers most cases, but some are easier to do here. */
614 if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb)))))
615 return rb; /* 1 ^ x ==> 1 */
616 rc = lj_ir_tonum(J, rc);
617 if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5)
618 return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */
619 /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */
620 rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2);
621 rc = emitir(IRTN(IR_MUL), rb, rc);
622 return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2);
623} 615}
624 616
625/* -- Predictive narrowing of induction variables ------------------------- */ 617/* -- Predictive narrowing of induction variables ------------------------- */
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index e526b49d..7925cfa5 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -403,27 +403,6 @@ static void split_ir(jit_State *J)
403 hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); 403 hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
404 break; 404 break;
405 case IR_FPMATH: 405 case IR_FPMATH:
406 /* Try to rejoin pow from EXP2, MUL and LOG2. */
407 if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
408 IRIns *irp = IR(nir->op1);
409 if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
410 IRIns *irm4 = IR(irp->op1);
411 IRIns *irm3 = IR(irm4->op1);
412 IRIns *irm12 = IR(irm3->op1);
413 IRIns *irl1 = IR(irm12->op1);
414 if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
415 irl1->op2 == IRCALL_lj_vm_log2) {
416 IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */
417 IRRef arg3 = irm3->op2, arg4 = irm4->op2;
418 J->cur.nins--;
419 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
420 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
421 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
422 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
423 break;
424 }
425 }
426 }
427 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); 406 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
428 break; 407 break;
429 case IR_LDEXP: 408 case IR_LDEXP:
diff --git a/src/lj_vm.h b/src/lj_vm.h
index 5a7bc392..d572e7d7 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -92,11 +92,6 @@ LJ_ASMF double lj_vm_trunc(double);
92LJ_ASMF double lj_vm_trunc_sf(double); 92LJ_ASMF double lj_vm_trunc_sf(double);
93#endif 93#endif
94#endif 94#endif
95#ifdef LUAJIT_NO_EXP2
96LJ_ASMF double lj_vm_exp2(double);
97#else
98#define lj_vm_exp2 exp2
99#endif
100#if LJ_HASFFI 95#if LJ_HASFFI
101LJ_ASMF int lj_vm_errno(void); 96LJ_ASMF int lj_vm_errno(void);
102#endif 97#endif
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index 36178f29..623a686d 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -79,13 +79,6 @@ double lj_vm_log2(double a)
79} 79}
80#endif 80#endif
81 81
82#ifdef LUAJIT_NO_EXP2
83double lj_vm_exp2(double a)
84{
85 return exp(a * 0.6931471805599453);
86}
87#endif
88
89#if !LJ_TARGET_X86ORX64 82#if !LJ_TARGET_X86ORX64
90/* Unsigned x^k. */ 83/* Unsigned x^k. */
91static double lj_vm_powui(double x, uint32_t k) 84static double lj_vm_powui(double x, uint32_t k)
@@ -128,7 +121,6 @@ double lj_vm_foldfpm(double x, int fpm)
128 case IRFPM_CEIL: return lj_vm_ceil(x); 121 case IRFPM_CEIL: return lj_vm_ceil(x);
129 case IRFPM_TRUNC: return lj_vm_trunc(x); 122 case IRFPM_TRUNC: return lj_vm_trunc(x);
130 case IRFPM_SQRT: return sqrt(x); 123 case IRFPM_SQRT: return sqrt(x);
131 case IRFPM_EXP2: return lj_vm_exp2(x);
132 case IRFPM_LOG: return log(x); 124 case IRFPM_LOG: return log(x);
133 case IRFPM_LOG2: return lj_vm_log2(x); 125 case IRFPM_LOG2: return lj_vm_log2(x);
134 default: lua_assert(0); 126 default: lua_assert(0);