1 files changed, 592 insertions, 0 deletions
diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c
new file mode 100644
index 00000000..36b11dc0
--- /dev/null
+++ b/src/lj_strfmt_num.c
@@ -0,0 +1,592 @@
+/*
+** String formatting for floating-point numbers.
+** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
+** Contributed by Peter Cawley.
+*/
+#include <stdio.h>
+#define lj_strfmt_num_c
+#define LUA_CORE
+#include "lj_obj.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#include "lj_strfmt.h"
+/* -- Precomputed tables -------------------------------------------------- */
+/* Rescale factors to push the exponent of a number towards zero. */
+#define RESCALE_EXPONENTS(P, N) \
+  P(308), P(289), P(270), P(250), P(231), P(212), P(193), P(173), P(154), \
+  P(135), P(115), P(96), P(77), P(58), P(38), P(0), P(0), P(0), N(39), N(58), \
+  N(77), N(96), N(116), N(135), N(154), N(174), N(193), N(212), N(231), \
+  N(251), N(270), N(289)
+#define ONE_E_P(X) 1e+0 ## X
+#define ONE_E_N(X) 1e-0 ## X
+static const int16_t rescale_e[] = { RESCALE_EXPONENTS(-, +) };
+static const double rescale_n[] = { RESCALE_EXPONENTS(ONE_E_P, ONE_E_N) };
+#undef ONE_E_N
+#undef ONE_E_P
+/*
+** For p in range -70 through 57, this table encodes pairs (m, e) such that
+** 4*2^p <= (uint8_t)m*10^e, and is the smallest value for which this holds.
+*/
+static const int8_t four_ulp_m_e[] = {
+  34, -21, 68, -21, 14, -20, 28, -20, 55, -20, 2, -19, 3, -19, 5, -19, 9, -19,
+  -82, -18, 35, -18, 7, -17, -117, -17, 28, -17, 56, -17, 112, -16, -33, -16,
+  45, -16, 89, -16, -78, -15, 36, -15, 72, -15, -113, -14, 29, -14, 57, -14,
+  114, -13, -28, -13, 46, -13, 91, -12, -74, -12, 37, -12, 73, -12, 15, -11, 3,
+  -11, 59, -11, 2, -10, 3, -10, 5, -10, 1, -9, -69, -9, 38, -9, 75, -9, 15, -7,
+  3, -7, 6, -7, 12, -6, -17, -7, 48, -7, 96, -7, -65, -6, 39, -6, 77, -6, -103,
+  -5, 31, -5, 62, -5, 123, -4, -11, -4, 49, -4, 98, -4, -60, -3, 4, -2, 79, -3,
+  16, -2, 32, -2, 63, -2, 2, -1, 25, 0, 5, 1, 1, 2, 2, 2, 4, 2, 8, 2, 16, 2,
+  32, 2, 64, 2, -128, 2, 26, 2, 52, 2, 103, 3, -51, 3, 41, 4, 82, 4, -92, 4,
+  33, 4, 66, 4, -124, 5, 27, 5, 53, 5, 105, 6, 21, 6, 42, 6, 84, 6, 17, 7, 34,
+  7, 68, 7, 2, 8, 3, 8, 6, 8, 108, 9, -41, 9, 43, 10, 86, 9, -84, 10, 35, 10,
+  69, 10, -118, 11, 28, 11, 55, 12, 11, 13, 22, 13, 44, 13, 88, 13, -80, 13,
+  36, 13, 71, 13, -115, 14, 29, 14, 57, 14, 113, 15, -30, 15, 46, 15, 91, 15,
+  19, 16, 37, 16, 73, 16, 2, 17, 3, 17, 6, 17
+};
+/* min(2^32-1, 10^e-1) for e in range 0 through 10 */
+static uint32_t ndigits_dec_threshold[] = {
+  0, 9U, 99U, 999U, 9999U, 99999U, 999999U,
+  9999999U, 99999999U, 999999999U, 0xffffffffU
+};
+/* -- Helper functions ---------------------------------------------------- */
+/* Compute the number of digits in the decimal representation of x. */
+static MSize ndigits_dec(uint32_t x)
+{
+  MSize t = ((lj_fls(x | 1) * 77) >> 8) + 1; /* 2^8/77 is roughly log2(10) */
+  return t + (x > ndigits_dec_threshold[t]);
+}
+#define WINT_R(x, sh, sc) \
+  { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
+/* Write 9-digit unsigned integer to buffer. */
+static char *lj_strfmt_wuint9(char *p, uint32_t u)
+{
+  uint32_t v = u / 10000, w;
+  u -= v * 10000;
+  w = v / 10000;
+  v -= w * 10000;
+  *p++ = (char)('0'+w);
+  WINT_R(v, 23, 1000)
+  WINT_R(v, 12, 100)
+  WINT_R(v, 10, 10)
+  *p++ = (char)('0'+v);
+  WINT_R(u, 23, 1000)
+  WINT_R(u, 12, 100)
+  WINT_R(u, 10, 10)
+  *p++ = (char)('0'+u);
+  return p;
+}
+#undef WINT_R
+/* -- Extended precision arithmetic --------------------------------------- */
+/*
+** The "nd" format is a fixed-precision decimal representation for numbers. It
+** consists of up to 64 uint32_t values, with each uint32_t storing a value
+** in the range [0, 1e9). A number in "nd" format consists of three variables:
+**
+**  uint32_t nd[64];
+**  uint32_t ndlo;
+**  uint32_t ndhi;
+**
+** The integral part of the number is stored in nd[0 ... ndhi], the value of
+** which is sum{i in [0, ndhi] | nd[i] * 10^(9*i)}. If the fractional part of
+** the number is zero, ndlo is zero. Otherwise, the fractional part is stored
+** in nd[ndlo ... 63], the value of which is taken to be
+** sum{i in [ndlo, 63] | nd[i] * 10^(9*(i-64))}.
+**
+** If the array part had 128 elements rather than 64, then every double would
+** have an exact representation in "nd" format. With 64 elements, all integral
+** doubles have an exact representation, and all non-integral doubles have
+** enough digits to make both %.99e and %.99f do the right thing.
+*/
+#if LJ_64
+#define ND_MUL2K_MAX_SHIFT      29
+#define ND_MUL2K_DIV1E9(val)    ((uint32_t)((val) / 1000000000))
+#else
+#define ND_MUL2K_MAX_SHIFT      11
+#define ND_MUL2K_DIV1E9(val)    ((uint32_t)((val) >> 9) / 1953125)
+#endif
+/* Multiply nd by 2^k and add carry_in (ndlo is assumed to be zero). */
+static uint32_t nd_mul2k(uint32_t* nd, uint32_t ndhi, uint32_t k,
+                         uint32_t carry_in, SFormat sf)
+{
+  uint32_t i, ndlo = 0, start = 1;
+  /* Performance hacks. */
+  if (k > ND_MUL2K_MAX_SHIFT*2 && STRFMT_FP(sf) != STRFMT_FP(STRFMT_T_FP_F)) {
+    start = ndhi - (STRFMT_PREC(sf) + 17) / 8;
+  }
+  /* Real logic. */
+  while (k >= ND_MUL2K_MAX_SHIFT) {
+    for (i = ndlo; i <= ndhi; i++) {
+      uint64_t val = ((uint64_t)nd[i] << ND_MUL2K_MAX_SHIFT) | carry_in;
+      carry_in = ND_MUL2K_DIV1E9(val);
+      nd[i] = (uint32_t)val - carry_in * 1000000000;
+    }
+    if (carry_in) {
+      nd[++ndhi] = carry_in; carry_in = 0;
+      if (start++ == ndlo) ++ndlo;
+    }
+    k -= ND_MUL2K_MAX_SHIFT;
+  }
+  if (k) {
+    for (i = ndlo; i <= ndhi; i++) {
+      uint64_t val = ((uint64_t)nd[i] << k) | carry_in;
+      carry_in = ND_MUL2K_DIV1E9(val);
+      nd[i] = (uint32_t)val - carry_in * 1000000000;
+    }
+    if (carry_in) nd[++ndhi] = carry_in;
+  }
+  return ndhi;
+}
+/* Divide nd by 2^k (ndlo is assumed to be zero). */
+static uint32_t nd_div2k(uint32_t* nd, uint32_t ndhi, uint32_t k, SFormat sf)
+{
+  uint32_t ndlo = 0, stop1 = ~0, stop2 = ~0;
+  /* Performance hacks. */
+  if (!ndhi) {
+    if (!nd[0]) {
+      return 0;
+    } else {
+      uint32_t s = lj_ffs(nd[0]);
+      if (s >= k) { nd[0] >>= k; return 0; }
+      nd[0] >>= s; k -= s;
+    }
+  }
+  if (k > 18) {
+    if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_F)) {
+      stop1 = 63 - (int32_t)STRFMT_PREC(sf) / 9;
+    } else {
+      int32_t floorlog2 = ndhi * 29 + lj_fls(nd[ndhi]) - k;
+      int32_t floorlog10 = (int32_t)(floorlog2 * 0.30102999566398114);
+      stop1 = 62 + (floorlog10 - (int32_t)STRFMT_PREC(sf)) / 9;
+      stop2 = 61 + ndhi - (int32_t)STRFMT_PREC(sf) / 8;
+    }
+  }
+  /* Real logic. */
+  while (k >= 9) {
+    uint32_t i = ndhi, carry = 0;
+    for (;;) {
+      uint32_t val = nd[i];
+      nd[i] = (val >> 9) + carry;
+      carry = (val & 0x1ff) * 1953125;
+      if (i == ndlo) break;
+      i = (i - 1) & 0x3f;
+    }
+    if (ndlo != stop1 && ndlo != stop2) {
+      if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; }
+      if (!nd[ndhi]) { ndhi = (ndhi - 1) & 0x3f; stop2--; }
+    } else if (!nd[ndhi]) {
+      if (ndhi != ndlo) { ndhi = (ndhi - 1) & 0x3f; stop2--; }
+      else return ndlo;
+    }
+    k -= 9;
+  }
+  if (k) {
+    uint32_t mask = (1U << k) - 1, mul = 1000000000 >> k, i = ndhi, carry = 0;
+    for (;;) {
+      uint32_t val = nd[i];
+      nd[i] = (val >> k) + carry;
+      carry = (val & mask) * mul;
+      if (i == ndlo) break;
+      i = (i - 1) & 0x3f;
+    }
+    if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; }
+  }
+  return ndlo;
+}
+/* Add m*10^e to nd (assumes ndlo <= e/9 <= ndhi and 0 <= m <= 9). */
+static uint32_t nd_add_m10e(uint32_t* nd, uint32_t ndhi, uint8_t m, int32_t e)
+{
+  uint32_t i, carry;
+  if (e >= 0) {
+    i = (uint32_t)e/9;
+    carry = m * (ndigits_dec_threshold[e - (int32_t)i*9] + 1);
+  } else {
+    int32_t f = (e-8)/9;
+    i = (uint32_t)(64 + f);
+    carry = m * (ndigits_dec_threshold[e - f*9] + 1);
+  }
+  for (;;) {
+    uint32_t val = nd[i] + carry;
+    if (LJ_UNLIKELY(val >= 1000000000)) {
+      val -= 1000000000;
+      nd[i] = val;
+      if (LJ_UNLIKELY(i == ndhi)) {
+        ndhi = (ndhi + 1) & 0x3f;
+        nd[ndhi] = 1;
+        break;
+      }
+      carry = 1;
+      i = (i + 1) & 0x3f;
+    } else {
+      nd[i] = val;
+      break;
+    }
+  }
+  return ndhi;
+}
+/* Test whether two "nd" values are equal in their most significant digits. */
+static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref, MSize hilen,
+                      MSize prec)
+{
+  char nd9[9], ref9[9];
+  if (hilen <= prec) {
+    if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0;
+    prec -= hilen; ref--; ndhi = (ndhi - 1) & 0x3f;
+    if (prec >= 9) {
+      if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0;
+      prec -= 9; ref--; ndhi = (ndhi - 1) & 0x3f;
+    }
+  } else {
+    prec -= hilen - 9;
+  }
+  lua_assert(prec < 9);
+  lj_strfmt_wuint9(nd9, nd[ndhi]);
+  lj_strfmt_wuint9(ref9, *ref);
+  return !memcmp(nd9, ref9, prec) && (nd9[prec] < '5') == (ref9[prec] < '5');
+}
+/* -- Formatted conversions to buffer ------------------------------------- */
+/* Write formatted floating-point number to either sb or p. */
+static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p)
+{
+  MSize width = STRFMT_WIDTH(sf), prec = STRFMT_PREC(sf), len;
+  TValue t;
+  t.n = n;
+  if (LJ_UNLIKELY((t.u32.hi << 1) >= 0xffe00000)) {
+    /* Handle non-finite values uniformly for %a, %e, %f, %g. */
+    int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0;
+    if (((t.u32.hi & 0x000fffff) | t.u32.lo) != 0) {
+      ch ^= ('n' << 16) | ('a' << 8) | 'n';
+      if ((sf & STRFMT_F_SPACE)) prefix = ' ';
+    } else {
+      ch ^= ('i' << 16) | ('n' << 8) | 'f';
+      if ((t.u32.hi & 0x80000000)) prefix = '-';
+      else if ((sf & STRFMT_F_PLUS)) prefix = '+';
+      else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
+    }
+    len = 3 + (prefix != 0);
+    if (!p) p = lj_buf_more(sb, width > len ? width : len);
+    if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
+    if (prefix) *p++ = prefix;
+    *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch;
+  } else if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_A)) {
+    /* %a */
+    const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEFPX"
+                                               : "0123456789abcdefpx";
+    int32_t e = (t.u32.hi >> 20) & 0x7ff;
+    char prefix = 0, eprefix = '+';
+    if (t.u32.hi & 0x80000000) prefix = '-';
+    else if ((sf & STRFMT_F_PLUS)) prefix = '+';
+    else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
+    t.u32.hi &= 0xfffff;
+    if (e) {
+      t.u32.hi |= 0x100000;
+      e -= 1023;
+    } else if (t.u32.lo | t.u32.hi) {
+      /* Non-zero denormal - normalise it. */
+      uint32_t shift = t.u32.hi ? 20-lj_fls(t.u32.hi) : 52-lj_fls(t.u32.lo);
+      e = -1022 - shift;
+      t.u64 <<= shift;
+    }
+    /* abs(n) == t.u64 * 2^(e - 52) */
+    /* If n != 0, bit 52 of t.u64 is set, and is the highest set bit. */
+    if ((int32_t)prec < 0) {
+      /* Default precision: use smallest precision giving exact result. */
+      prec = t.u32.lo ? 13-lj_ffs(t.u32.lo)/4 : 5-lj_ffs(t.u32.hi|0x100000)/4;
+    } else if (prec < 13) {
+      /* Precision is sufficiently low as to maybe require rounding. */
+      t.u64 += (((uint64_t)1) << (51 - prec*4));
+    }
+    if (e < 0) {
+      eprefix = '-';
+      e = -e;
+    }
+    len = 5 + ndigits_dec((uint32_t)e) + prec + (prefix != 0)
+            + ((prec | (sf & STRFMT_F_ALT)) != 0);
+    if (!p) p = lj_buf_more(sb, width > len ? width : len);
+    if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) {
+      while (width-- > len) *p++ = ' ';
+    }
+    if (prefix) *p++ = prefix;
+    *p++ = '0';
+    *p++ = hexdig[17]; /* x or X */
+    if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
+      while (width-- > len) *p++ = '0';
+    }
+    *p++ = '0' + (t.u32.hi >> 20); /* Usually '1', sometimes '0' or '2'. */
+    if ((prec | (sf & STRFMT_F_ALT))) {
+      /* Emit fractional part. */
+      char *q = p + 1 + prec;
+      *p = '.';
+      if (prec < 13) t.u64 >>= (52 - prec*4);
+      else while (prec > 13) p[prec--] = '0';
+      while (prec) { p[prec--] = hexdig[t.u64 & 15]; t.u64 >>= 4; }
+      p = q;
+    }
+    *p++ = hexdig[16]; /* p or P */
+    *p++ = eprefix; /* + or - */
+    p = lj_strfmt_wint(p, e);
+  } else {
+    /* %e or %f or %g - begin by converting n to "nd" format. */
+    uint32_t nd[64];
+    uint32_t ndhi = 0, ndlo, i;
+    int32_t e = (t.u32.hi >> 20) & 0x7ff, ndebias = 0;
+    char prefix = 0, *q;
+    if (t.u32.hi & 0x80000000) prefix = '-';
+    else if ((sf & STRFMT_F_PLUS)) prefix = '+';
+    else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
+    prec += ((int32_t)prec >> 31) & 7; /* Default precision is 6. */
+    if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_G)) {
+      /* %g - decrement precision if non-zero (to make it like %e). */
+      prec--;
+      prec ^= (uint32_t)((int32_t)prec >> 31);
+    }
+    if ((sf & STRFMT_T_FP_E) && prec < 14 && n != 0) {
+      /* Precision is sufficiently low that rescaling will probably work. */
+      if ((ndebias = rescale_e[e >> 6])) {
+        t.n = n * rescale_n[e >> 6];
+        if (LJ_UNLIKELY(!e)) t.n *= 1e10, ndebias -= 10;
+        t.u64 -= 2; /* Convert 2ulp below (later we convert 2ulp above). */
+        nd[0] = 0x100000 | (t.u32.hi & 0xfffff);
+        e = ((t.u32.hi >> 20) & 0x7ff) - 1075 - (ND_MUL2K_MAX_SHIFT < 29);
+        goto load_t_lo; rescale_failed:
+        t.n = n;
+        e = (t.u32.hi >> 20) & 0x7ff;
+        ndebias = ndhi = 0;
+      }
+    }
+    nd[0] = t.u32.hi & 0xfffff;
+    if (e == 0) e++; else nd[0] |= 0x100000;
+    e -= 1043;
+    if (t.u32.lo) {
+      e -= 32 + (ND_MUL2K_MAX_SHIFT < 29); load_t_lo:
+#if ND_MUL2K_MAX_SHIFT >= 29
+      nd[0] = (nd[0] << 3) | (t.u32.lo >> 29);
+      ndhi = nd_mul2k(nd, ndhi, 29, t.u32.lo & 0x1fffffff, sf);
+#elif ND_MUL2K_MAX_SHIFT >= 11
+      ndhi = nd_mul2k(nd, ndhi, 11, t.u32.lo >> 21, sf);
+      ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo >> 10) & 0x7ff, sf);
+      ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo <<  1) & 0x7ff, sf);
+#else
+#error "ND_MUL2K_MAX_SHIFT too small"
+#endif
+    }
+    if (e >= 0) {
+      ndhi = nd_mul2k(nd, ndhi, (uint32_t)e, 0, sf);
+      ndlo = 0;
+    } else {
+      ndlo = nd_div2k(nd, ndhi, (uint32_t)-e, sf);
+      if (ndhi && !nd[ndhi]) ndhi--;
+    }
+    /* abs(n) == nd * 10^ndebias (for slightly loose interpretation of ==) */
+    if ((sf & STRFMT_T_FP_E)) {
+      /* %e or %g - assume %e and start by calculating nd's exponent (nde). */
+      char eprefix = '+';
+      int32_t nde = -1;
+      MSize hilen;
+      if (ndlo && !nd[ndhi]) {
+        ndhi = 64; do {} while (!nd[--ndhi]);
+        nde -= 64 * 9;
+      }
+      hilen = ndigits_dec(nd[ndhi]);
+      nde += ndhi * 9 + hilen;
+      if (ndebias) {
+        /*
+        ** Rescaling was performed, but this introduced some error, and might
+        ** have pushed us across a rounding boundary. We check whether this
+        ** error affected the result by introducing even more error (2ulp in
+        ** either direction), and seeing whether a roundary boundary was
+        ** crossed. Having already converted the -2ulp case, we save off its
+        ** most significant digits, convert the +2ulp case, and compare them.
+        */
+        int32_t eidx = e + 70 + (ND_MUL2K_MAX_SHIFT < 29)
+                         + (t.u32.lo >= 0xfffffffe && !(~t.u32.hi << 12));
+        const int8_t *m_e = four_ulp_m_e + eidx * 2;
+        lua_assert(0 <= eidx && eidx < 128);
+        nd[33] = nd[ndhi];
+        nd[32] = nd[(ndhi - 1) & 0x3f];
+        nd[31] = nd[(ndhi - 2) & 0x3f];
+        nd_add_m10e(nd, ndhi, (uint8_t)*m_e, m_e[1]);
+        if (LJ_UNLIKELY(!nd_similar(nd, ndhi, nd + 33, hilen, prec + 1))) {
+          goto rescale_failed;
+        }
+      }
+      if ((int32_t)(prec - nde) < (0x3f & -(int32_t)ndlo) * 9) {
+        /* Precision is sufficiently low as to maybe require rounding. */
+        ndhi = nd_add_m10e(nd, ndhi, 5, nde - prec - 1);
+        nde += (hilen != ndigits_dec(nd[ndhi]));
+      }
+      nde += ndebias;
+      if ((sf & STRFMT_T_FP_F)) {
+        /* %g */
+        if ((int32_t)prec >= nde && nde >= -4) {
+          if (nde < 0) ndhi = 0;
+          prec -= nde;
+          goto g_format_like_f;
+        } else if (!(sf & STRFMT_F_ALT) && prec && width > 5) {
+          /* Decrease precision in order to strip trailing zeroes. */
+          char tail[9];
+          uint32_t maxprec = hilen - 1 + ((ndhi - ndlo) & 0x3f) * 9;
+          if (prec >= maxprec) prec = maxprec;
+          else ndlo = (ndhi - (((int32_t)(prec - hilen) + 9) / 9)) & 0x3f;
+          i = prec - hilen - (((ndhi - ndlo) & 0x3f) * 9) + 10;
+          lj_strfmt_wuint9(tail, nd[ndlo]);
+          while (prec && tail[--i] == '0') {
+            prec--;
+            if (!i) {
+              if (ndlo == ndhi) { prec = 0; break; }
+              lj_strfmt_wuint9(tail, nd[++ndlo]);
+              i = 9;
+            }
+          }
+        }
+      }
+      if (nde < 0) {
+        /* Make nde non-negative. */
+        eprefix = '-';
+        nde = -nde;
+      }
+      len = 3 + prec + (prefix != 0) + ndigits_dec((uint32_t)nde) + (nde < 10)
+              + ((prec | (sf & STRFMT_F_ALT)) != 0);
+      if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 5);
+      if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) {
+        while (width-- > len) *p++ = ' ';
+      }
+      if (prefix) *p++ = prefix;
+      if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
+        while (width-- > len) *p++ = '0';
+      }
+      q = lj_strfmt_wint(p + 1, nd[ndhi]);
+      p[0] = p[1]; /* Put leading digit in the correct place. */
+      if ((prec | (sf & STRFMT_F_ALT))) {
+        /* Emit fractional part. */
+        p[1] = '.'; p += 2;
+        prec -= (MSize)(q - p); p = q; /* Account for digits already emitted. */
+        /* Then emit chunks of 9 digits (this may emit 8 digits too many). */
+        for (i = ndhi; (int32_t)prec > 0 && i != ndlo; prec -= 9) {
+          i = (i - 1) & 0x3f;
+          p = lj_strfmt_wuint9(p, nd[i]);
+        }
+        if ((sf & STRFMT_T_FP_F) && !(sf & STRFMT_F_ALT)) {
+          /* %g (and not %#g) - strip trailing zeroes. */
+          p += (int32_t)prec & ((int32_t)prec >> 31);
+          while (p[-1] == '0') p--;
+          if (p[-1] == '.') p--;
+        } else {
+          /* %e (or %#g) - emit trailing zeroes. */
+          while ((int32_t)prec > 0) { *p++ = '0'; prec--; }
+          p += (int32_t)prec;
+        }
+      } else {
+        p++;
+      }
+      *p++ = (sf & STRFMT_F_UPPER) ? 'E' : 'e';
+      *p++ = eprefix; /* + or - */
+      if (nde < 10) *p++ = '0'; /* Always at least two digits of exponent. */
+      p = lj_strfmt_wint(p, nde);
+    } else {
+      /* %f (or, shortly, %g in %f style) */
+      if (prec < (MSize)(0x3f & -(int32_t)ndlo) * 9) {
+        /* Precision is sufficiently low as to maybe require rounding. */
+        ndhi = nd_add_m10e(nd, ndhi, 5, 0 - prec - 1);
+      }
+      g_format_like_f:
+      if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT) && prec && width) {
+        /* Decrease precision in order to strip trailing zeroes. */
+        if (ndlo) {
+          /* nd has a fractional part; we need to look at its digits. */
+          char tail[9];
+          uint32_t maxprec = (64 - ndlo) * 9;
+          if (prec >= maxprec) prec = maxprec;
+          else ndlo = 64 - (prec + 8) / 9;
+          i = prec - ((63 - ndlo) * 9);
+          lj_strfmt_wuint9(tail, nd[ndlo]);
+          while (prec && tail[--i] == '0') {
+            prec--;
+            if (!i) {
+              if (ndlo == 63) { prec = 0; break; }
+              lj_strfmt_wuint9(tail, nd[++ndlo]);
+              i = 9;
+            }
+          }
+        } else {
+          /* nd has no fractional part, so precision goes straight to zero. */
+          prec = 0;
+        }
+      }
+      len = ndhi * 9 + ndigits_dec(nd[ndhi]) + prec + (prefix != 0)
+                     + ((prec | (sf & STRFMT_F_ALT)) != 0);
+      if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 8);
+      if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) {
+        while (width-- > len) *p++ = ' ';
+      }
+      if (prefix) *p++ = prefix;
+      if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
+        while (width-- > len) *p++ = '0';
+      }
+      /* Emit integer part. */
+      p = lj_strfmt_wint(p, nd[ndhi]);
+      i = ndhi;
+      while (i) p = lj_strfmt_wuint9(p, nd[--i]);
+      if ((prec | (sf & STRFMT_F_ALT))) {
+        /* Emit fractional part. */
+        *p++ = '.';
+        /* Emit chunks of 9 digits (this may emit 8 digits too many). */
+        while ((int32_t)prec > 0 && i != ndlo) {
+          i = (i - 1) & 0x3f;
+          p = lj_strfmt_wuint9(p, nd[i]);
+          prec -= 9;
+        }
+        if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT)) {
+          /* %g (and not %#g) - strip trailing zeroes. */
+          p += (int32_t)prec & ((int32_t)prec >> 31);
+          while (p[-1] == '0') p--;
+          if (p[-1] == '.') p--;
+        } else {
+          /* %f (or %#g) - emit trailing zeroes. */
+          while ((int32_t)prec > 0) { *p++ = '0'; prec--; }
+          p += (int32_t)prec;
+        }
+      }
+    }
+  }
+  if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
+  return p;
+}
+/* Add formatted floating-point number to buffer. */
+SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n)
+{
+  setsbufP(sb, lj_strfmt_wfnum(sb, sf, n, NULL));
+  return sb;
+}
+/* -- Conversions to strings ---------------------------------------------- */
+/* Convert number to string. */
+GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o)
+{
+  char buf[STRFMT_MAXBUF_NUM];
+  MSize len = (MSize)(lj_strfmt_wfnum(NULL, STRFMT_G14, o->n, buf) - buf);
+  return lj_str_new(L, buf, len);
+}

diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c new file mode 100644 index 00000000..36b11dc0 --- /dev/null +++ b/src/lj_strfmt_num.c
@@ -0,0 +1,592 @@
	1	/*
	2	** String formatting for floating-point numbers.
	3	** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
	4	** Contributed by Peter Cawley.
	5	*/
	6
	7	#include <stdio.h>
	8
	9	#define lj_strfmt_num_c
	10	#define LUA_CORE
	11
	12	#include "lj_obj.h"
	13	#include "lj_buf.h"
	14	#include "lj_str.h"
	15	#include "lj_strfmt.h"
	16
	17	/* -- Precomputed tables -------------------------------------------------- */
	18
	19	/* Rescale factors to push the exponent of a number towards zero. */
	20	#define RESCALE_EXPONENTS(P, N) \
	21	P(308), P(289), P(270), P(250), P(231), P(212), P(193), P(173), P(154), \
	22	P(135), P(115), P(96), P(77), P(58), P(38), P(0), P(0), P(0), N(39), N(58), \
	23	N(77), N(96), N(116), N(135), N(154), N(174), N(193), N(212), N(231), \
	24	N(251), N(270), N(289)
	25
	26	#define ONE_E_P(X) 1e+0 ## X
	27	#define ONE_E_N(X) 1e-0 ## X
	28	static const int16_t rescale_e[] = { RESCALE_EXPONENTS(-, +) };
	29	static const double rescale_n[] = { RESCALE_EXPONENTS(ONE_E_P, ONE_E_N) };
	30	#undef ONE_E_N
	31	#undef ONE_E_P
	32
	33	/*
	34	** For p in range -70 through 57, this table encodes pairs (m, e) such that
	35	** 42^p <= (uint8_t)m10^e, and is the smallest value for which this holds.
	36	*/
	37	static const int8_t four_ulp_m_e[] = {
	38	34, -21, 68, -21, 14, -20, 28, -20, 55, -20, 2, -19, 3, -19, 5, -19, 9, -19,
	39	-82, -18, 35, -18, 7, -17, -117, -17, 28, -17, 56, -17, 112, -16, -33, -16,
	40	45, -16, 89, -16, -78, -15, 36, -15, 72, -15, -113, -14, 29, -14, 57, -14,
	41	114, -13, -28, -13, 46, -13, 91, -12, -74, -12, 37, -12, 73, -12, 15, -11, 3,
	42	-11, 59, -11, 2, -10, 3, -10, 5, -10, 1, -9, -69, -9, 38, -9, 75, -9, 15, -7,
	43	3, -7, 6, -7, 12, -6, -17, -7, 48, -7, 96, -7, -65, -6, 39, -6, 77, -6, -103,
	44	-5, 31, -5, 62, -5, 123, -4, -11, -4, 49, -4, 98, -4, -60, -3, 4, -2, 79, -3,
	45	16, -2, 32, -2, 63, -2, 2, -1, 25, 0, 5, 1, 1, 2, 2, 2, 4, 2, 8, 2, 16, 2,
	46	32, 2, 64, 2, -128, 2, 26, 2, 52, 2, 103, 3, -51, 3, 41, 4, 82, 4, -92, 4,
	47	33, 4, 66, 4, -124, 5, 27, 5, 53, 5, 105, 6, 21, 6, 42, 6, 84, 6, 17, 7, 34,
	48	7, 68, 7, 2, 8, 3, 8, 6, 8, 108, 9, -41, 9, 43, 10, 86, 9, -84, 10, 35, 10,
	49	69, 10, -118, 11, 28, 11, 55, 12, 11, 13, 22, 13, 44, 13, 88, 13, -80, 13,
	50	36, 13, 71, 13, -115, 14, 29, 14, 57, 14, 113, 15, -30, 15, 46, 15, 91, 15,
	51	19, 16, 37, 16, 73, 16, 2, 17, 3, 17, 6, 17
	52	};
	53
	54	/* min(2^32-1, 10^e-1) for e in range 0 through 10 */
	55	static uint32_t ndigits_dec_threshold[] = {
	56	0, 9U, 99U, 999U, 9999U, 99999U, 999999U,
	57	9999999U, 99999999U, 999999999U, 0xffffffffU
	58	};
	59
	60	/* -- Helper functions ---------------------------------------------------- */
	61
	62	/* Compute the number of digits in the decimal representation of x. */
	63	static MSize ndigits_dec(uint32_t x)
	64	{
	65	MSize t = ((lj_fls(x \| 1) * 77) >> 8) + 1; /* 2^8/77 is roughly log2(10) */
	66	return t + (x > ndigits_dec_threshold[t]);
	67	}
	68
	69	#define WINT_R(x, sh, sc) \
	70	{ uint32_t d = (x(((1<<sh)+sc-1)/sc))>>sh; x -= dsc; *p++ = (char)('0'+d); }
	71
	72	/* Write 9-digit unsigned integer to buffer. */
	73	static char lj_strfmt_wuint9(char p, uint32_t u)
	74	{
	75	uint32_t v = u / 10000, w;
	76	u -= v * 10000;
	77	w = v / 10000;
	78	v -= w * 10000;
	79	*p++ = (char)('0'+w);
	80	WINT_R(v, 23, 1000)
	81	WINT_R(v, 12, 100)
	82	WINT_R(v, 10, 10)
	83	*p++ = (char)('0'+v);
	84	WINT_R(u, 23, 1000)
	85	WINT_R(u, 12, 100)
	86	WINT_R(u, 10, 10)
	87	*p++ = (char)('0'+u);
	88	return p;
	89	}
	90	#undef WINT_R
	91
	92	/* -- Extended precision arithmetic --------------------------------------- */
	93
	94	/*
	95	** The "nd" format is a fixed-precision decimal representation for numbers. It
	96	** consists of up to 64 uint32_t values, with each uint32_t storing a value
	97	** in the range [0, 1e9). A number in "nd" format consists of three variables:
	98	**
	99	** uint32_t nd[64];
	100	** uint32_t ndlo;
	101	** uint32_t ndhi;
	102	**
	103	** The integral part of the number is stored in nd[0 ... ndhi], the value of
	104	** which is sum{i in [0, ndhi] \| nd[i] * 10^(9*i)}. If the fractional part of
	105	** the number is zero, ndlo is zero. Otherwise, the fractional part is stored
	106	** in nd[ndlo ... 63], the value of which is taken to be
	107	** sum{i in [ndlo, 63] \| nd[i] * 10^(9*(i-64))}.
	108	**
	109	** If the array part had 128 elements rather than 64, then every double would
	110	** have an exact representation in "nd" format. With 64 elements, all integral
	111	** doubles have an exact representation, and all non-integral doubles have
	112	** enough digits to make both %.99e and %.99f do the right thing.
	113	*/
	114
	115	#if LJ_64
	116	#define ND_MUL2K_MAX_SHIFT 29
	117	#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) / 1000000000))
	118	#else
	119	#define ND_MUL2K_MAX_SHIFT 11
	120	#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) >> 9) / 1953125)
	121	#endif
	122
	123	/* Multiply nd by 2^k and add carry_in (ndlo is assumed to be zero). */
	124	static uint32_t nd_mul2k(uint32_t* nd, uint32_t ndhi, uint32_t k,
	125	uint32_t carry_in, SFormat sf)
	126	{
	127	uint32_t i, ndlo = 0, start = 1;
	128	/* Performance hacks. */
	129	if (k > ND_MUL2K_MAX_SHIFT*2 && STRFMT_FP(sf) != STRFMT_FP(STRFMT_T_FP_F)) {
	130	start = ndhi - (STRFMT_PREC(sf) + 17) / 8;
	131	}
	132	/* Real logic. */
	133	while (k >= ND_MUL2K_MAX_SHIFT) {
	134	for (i = ndlo; i <= ndhi; i++) {
	135	uint64_t val = ((uint64_t)nd[i] << ND_MUL2K_MAX_SHIFT) \| carry_in;
	136	carry_in = ND_MUL2K_DIV1E9(val);
	137	nd[i] = (uint32_t)val - carry_in * 1000000000;
	138	}
	139	if (carry_in) {
	140	nd[++ndhi] = carry_in; carry_in = 0;
	141	if (start++ == ndlo) ++ndlo;
	142	}
	143	k -= ND_MUL2K_MAX_SHIFT;
	144	}
	145	if (k) {
	146	for (i = ndlo; i <= ndhi; i++) {
	147	uint64_t val = ((uint64_t)nd[i] << k) \| carry_in;
	148	carry_in = ND_MUL2K_DIV1E9(val);
	149	nd[i] = (uint32_t)val - carry_in * 1000000000;
	150	}
	151	if (carry_in) nd[++ndhi] = carry_in;
	152	}
	153	return ndhi;
	154	}
	155
	156	/* Divide nd by 2^k (ndlo is assumed to be zero). */
	157	static uint32_t nd_div2k(uint32_t* nd, uint32_t ndhi, uint32_t k, SFormat sf)
	158	{
	159	uint32_t ndlo = 0, stop1 = ~0, stop2 = ~0;
	160	/* Performance hacks. */
	161	if (!ndhi) {
	162	if (!nd[0]) {
	163	return 0;
	164	} else {
	165	uint32_t s = lj_ffs(nd[0]);
	166	if (s >= k) { nd[0] >>= k; return 0; }
	167	nd[0] >>= s; k -= s;
	168	}
	169	}
	170	if (k > 18) {
	171	if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_F)) {
	172	stop1 = 63 - (int32_t)STRFMT_PREC(sf) / 9;
	173	} else {
	174	int32_t floorlog2 = ndhi * 29 + lj_fls(nd[ndhi]) - k;
	175	int32_t floorlog10 = (int32_t)(floorlog2 * 0.30102999566398114);
	176	stop1 = 62 + (floorlog10 - (int32_t)STRFMT_PREC(sf)) / 9;
	177	stop2 = 61 + ndhi - (int32_t)STRFMT_PREC(sf) / 8;
	178	}
	179	}
	180	/* Real logic. */
	181	while (k >= 9) {
	182	uint32_t i = ndhi, carry = 0;
	183	for (;;) {
	184	uint32_t val = nd[i];
	185	nd[i] = (val >> 9) + carry;
	186	carry = (val & 0x1ff) * 1953125;
	187	if (i == ndlo) break;
	188	i = (i - 1) & 0x3f;
	189	}
	190	if (ndlo != stop1 && ndlo != stop2) {
	191	if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; }
	192	if (!nd[ndhi]) { ndhi = (ndhi - 1) & 0x3f; stop2--; }
	193	} else if (!nd[ndhi]) {
	194	if (ndhi != ndlo) { ndhi = (ndhi - 1) & 0x3f; stop2--; }
	195	else return ndlo;
	196	}
	197	k -= 9;
	198	}
	199	if (k) {
	200	uint32_t mask = (1U << k) - 1, mul = 1000000000 >> k, i = ndhi, carry = 0;
	201	for (;;) {
	202	uint32_t val = nd[i];
	203	nd[i] = (val >> k) + carry;
	204	carry = (val & mask) * mul;
	205	if (i == ndlo) break;
	206	i = (i - 1) & 0x3f;
	207	}
	208	if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; }
	209	}
	210	return ndlo;
	211	}
	212
	213	/* Add m10^e to nd (assumes ndlo <= e/9 <= ndhi and 0 <= m <= 9). /
	214	static uint32_t nd_add_m10e(uint32_t* nd, uint32_t ndhi, uint8_t m, int32_t e)
	215	{
	216	uint32_t i, carry;
	217	if (e >= 0) {
	218	i = (uint32_t)e/9;
	219	carry = m * (ndigits_dec_threshold[e - (int32_t)i*9] + 1);
	220	} else {
	221	int32_t f = (e-8)/9;
	222	i = (uint32_t)(64 + f);
	223	carry = m * (ndigits_dec_threshold[e - f*9] + 1);
	224	}
	225	for (;;) {
	226	uint32_t val = nd[i] + carry;
	227	if (LJ_UNLIKELY(val >= 1000000000)) {
	228	val -= 1000000000;
	229	nd[i] = val;
	230	if (LJ_UNLIKELY(i == ndhi)) {
	231	ndhi = (ndhi + 1) & 0x3f;
	232	nd[ndhi] = 1;
	233	break;
	234	}
	235	carry = 1;
	236	i = (i + 1) & 0x3f;
	237	} else {
	238	nd[i] = val;
	239	break;
	240	}
	241	}
	242	return ndhi;
	243	}
	244
	245	/* Test whether two "nd" values are equal in their most significant digits. */
	246	static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref, MSize hilen,
	247	MSize prec)
	248	{
	249	char nd9[9], ref9[9];
	250	if (hilen <= prec) {
	251	if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0;
	252	prec -= hilen; ref--; ndhi = (ndhi - 1) & 0x3f;
	253	if (prec >= 9) {
	254	if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0;
	255	prec -= 9; ref--; ndhi = (ndhi - 1) & 0x3f;
	256	}
	257	} else {
	258	prec -= hilen - 9;
	259	}
	260	lua_assert(prec < 9);
	261	lj_strfmt_wuint9(nd9, nd[ndhi]);
	262	lj_strfmt_wuint9(ref9, *ref);
	263	return !memcmp(nd9, ref9, prec) && (nd9[prec] < '5') == (ref9[prec] < '5');
	264	}
	265
	266	/* -- Formatted conversions to buffer ------------------------------------- */
	267
	268	/* Write formatted floating-point number to either sb or p. */
	269	static char lj_strfmt_wfnum(SBuf sb, SFormat sf, lua_Number n, char *p)
	270	{
	271	MSize width = STRFMT_WIDTH(sf), prec = STRFMT_PREC(sf), len;
	272	TValue t;
	273	t.n = n;
	274	if (LJ_UNLIKELY((t.u32.hi << 1) >= 0xffe00000)) {
	275	/* Handle non-finite values uniformly for %a, %e, %f, %g. */
	276	int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0;
	277	if (((t.u32.hi & 0x000fffff) \| t.u32.lo) != 0) {
	278	ch ^= ('n' << 16) \| ('a' << 8) \| 'n';
	279	if ((sf & STRFMT_F_SPACE)) prefix = ' ';
	280	} else {
	281	ch ^= ('i' << 16) \| ('n' << 8) \| 'f';
	282	if ((t.u32.hi & 0x80000000)) prefix = '-';
	283	else if ((sf & STRFMT_F_PLUS)) prefix = '+';
	284	else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
	285	}
	286	len = 3 + (prefix != 0);
	287	if (!p) p = lj_buf_more(sb, width > len ? width : len);
	288	if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
	289	if (prefix) *p++ = prefix;
	290	p++ = (char)(ch >> 16); p++ = (char)(ch >> 8); *p++ = (char)ch;
	291	} else if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_A)) {
	292	/* %a */
	293	const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEFPX"
	294	: "0123456789abcdefpx";
	295	int32_t e = (t.u32.hi >> 20) & 0x7ff;
	296	char prefix = 0, eprefix = '+';
	297	if (t.u32.hi & 0x80000000) prefix = '-';
	298	else if ((sf & STRFMT_F_PLUS)) prefix = '+';
	299	else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
	300	t.u32.hi &= 0xfffff;
	301	if (e) {
	302	t.u32.hi \|= 0x100000;
	303	e -= 1023;
	304	} else if (t.u32.lo \| t.u32.hi) {
	305	/* Non-zero denormal - normalise it. */
	306	uint32_t shift = t.u32.hi ? 20-lj_fls(t.u32.hi) : 52-lj_fls(t.u32.lo);
	307	e = -1022 - shift;
	308	t.u64 <<= shift;
	309	}
	310	/* abs(n) == t.u64 * 2^(e - 52) */
	311	/* If n != 0, bit 52 of t.u64 is set, and is the highest set bit. */
	312	if ((int32_t)prec < 0) {
	313	/* Default precision: use smallest precision giving exact result. */
	314	prec = t.u32.lo ? 13-lj_ffs(t.u32.lo)/4 : 5-lj_ffs(t.u32.hi\|0x100000)/4;
	315	} else if (prec < 13) {
	316	/* Precision is sufficiently low as to maybe require rounding. */
	317	t.u64 += (((uint64_t)1) << (51 - prec*4));
	318	}
	319	if (e < 0) {
	320	eprefix = '-';
	321	e = -e;
	322	}
	323	len = 5 + ndigits_dec((uint32_t)e) + prec + (prefix != 0)
	324	+ ((prec \| (sf & STRFMT_F_ALT)) != 0);
	325	if (!p) p = lj_buf_more(sb, width > len ? width : len);
	326	if (!(sf & (STRFMT_F_LEFT \| STRFMT_F_ZERO))) {
	327	while (width-- > len) *p++ = ' ';
	328	}
	329	if (prefix) *p++ = prefix;
	330	*p++ = '0';
	331	p++ = hexdig[17]; / x or X */
	332	if ((sf & (STRFMT_F_LEFT \| STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
	333	while (width-- > len) *p++ = '0';
	334	}
	335	p++ = '0' + (t.u32.hi >> 20); / Usually '1', sometimes '0' or '2'. */
	336	if ((prec \| (sf & STRFMT_F_ALT))) {
	337	/* Emit fractional part. */
	338	char *q = p + 1 + prec;
	339	*p = '.';
	340	if (prec < 13) t.u64 >>= (52 - prec*4);
	341	else while (prec > 13) p[prec--] = '0';
	342	while (prec) { p[prec--] = hexdig[t.u64 & 15]; t.u64 >>= 4; }
	343	p = q;
	344	}
	345	p++ = hexdig[16]; / p or P */
	346	p++ = eprefix; / + or - */
	347	p = lj_strfmt_wint(p, e);
	348	} else {
	349	/* %e or %f or %g - begin by converting n to "nd" format. */
	350	uint32_t nd[64];
	351	uint32_t ndhi = 0, ndlo, i;
	352	int32_t e = (t.u32.hi >> 20) & 0x7ff, ndebias = 0;
	353	char prefix = 0, *q;
	354	if (t.u32.hi & 0x80000000) prefix = '-';
	355	else if ((sf & STRFMT_F_PLUS)) prefix = '+';
	356	else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
	357	prec += ((int32_t)prec >> 31) & 7; /* Default precision is 6. */
	358	if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_G)) {
	359	/* %g - decrement precision if non-zero (to make it like %e). */
	360	prec--;
	361	prec ^= (uint32_t)((int32_t)prec >> 31);
	362	}
	363	if ((sf & STRFMT_T_FP_E) && prec < 14 && n != 0) {
	364	/* Precision is sufficiently low that rescaling will probably work. */
	365	if ((ndebias = rescale_e[e >> 6])) {
	366	t.n = n * rescale_n[e >> 6];
	367	if (LJ_UNLIKELY(!e)) t.n *= 1e10, ndebias -= 10;
	368	t.u64 -= 2; /* Convert 2ulp below (later we convert 2ulp above). */
	369	nd[0] = 0x100000 \| (t.u32.hi & 0xfffff);
	370	e = ((t.u32.hi >> 20) & 0x7ff) - 1075 - (ND_MUL2K_MAX_SHIFT < 29);
	371	goto load_t_lo; rescale_failed:
	372	t.n = n;
	373	e = (t.u32.hi >> 20) & 0x7ff;
	374	ndebias = ndhi = 0;
	375	}
	376	}
	377	nd[0] = t.u32.hi & 0xfffff;
	378	if (e == 0) e++; else nd[0] \|= 0x100000;
	379	e -= 1043;
	380	if (t.u32.lo) {
	381	e -= 32 + (ND_MUL2K_MAX_SHIFT < 29); load_t_lo:
	382	#if ND_MUL2K_MAX_SHIFT >= 29
	383	nd[0] = (nd[0] << 3) \| (t.u32.lo >> 29);
	384	ndhi = nd_mul2k(nd, ndhi, 29, t.u32.lo & 0x1fffffff, sf);
	385	#elif ND_MUL2K_MAX_SHIFT >= 11
	386	ndhi = nd_mul2k(nd, ndhi, 11, t.u32.lo >> 21, sf);
	387	ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo >> 10) & 0x7ff, sf);
	388	ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo << 1) & 0x7ff, sf);
	389	#else
	390	#error "ND_MUL2K_MAX_SHIFT too small"
	391	#endif
	392	}
	393	if (e >= 0) {
	394	ndhi = nd_mul2k(nd, ndhi, (uint32_t)e, 0, sf);
	395	ndlo = 0;
	396	} else {
	397	ndlo = nd_div2k(nd, ndhi, (uint32_t)-e, sf);
	398	if (ndhi && !nd[ndhi]) ndhi--;
	399	}
	400	/* abs(n) == nd * 10^ndebias (for slightly loose interpretation of ==) */
	401	if ((sf & STRFMT_T_FP_E)) {
	402	/* %e or %g - assume %e and start by calculating nd's exponent (nde). */
	403	char eprefix = '+';
	404	int32_t nde = -1;
	405	MSize hilen;
	406	if (ndlo && !nd[ndhi]) {
	407	ndhi = 64; do {} while (!nd[--ndhi]);
	408	nde -= 64 * 9;
	409	}
	410	hilen = ndigits_dec(nd[ndhi]);
	411	nde += ndhi * 9 + hilen;
	412	if (ndebias) {
	413	/*
	414	** Rescaling was performed, but this introduced some error, and might
	415	** have pushed us across a rounding boundary. We check whether this
	416	** error affected the result by introducing even more error (2ulp in
	417	** either direction), and seeing whether a roundary boundary was
	418	** crossed. Having already converted the -2ulp case, we save off its
	419	** most significant digits, convert the +2ulp case, and compare them.
	420	*/
	421	int32_t eidx = e + 70 + (ND_MUL2K_MAX_SHIFT < 29)
	422	+ (t.u32.lo >= 0xfffffffe && !(~t.u32.hi << 12));
	423	const int8_t m_e = four_ulp_m_e + eidx 2;
	424	lua_assert(0 <= eidx && eidx < 128);
	425	nd[33] = nd[ndhi];
	426	nd[32] = nd[(ndhi - 1) & 0x3f];
	427	nd[31] = nd[(ndhi - 2) & 0x3f];
	428	nd_add_m10e(nd, ndhi, (uint8_t)*m_e, m_e[1]);
	429	if (LJ_UNLIKELY(!nd_similar(nd, ndhi, nd + 33, hilen, prec + 1))) {
	430	goto rescale_failed;
	431	}
	432	}
	433	if ((int32_t)(prec - nde) < (0x3f & -(int32_t)ndlo) * 9) {
	434	/* Precision is sufficiently low as to maybe require rounding. */
	435	ndhi = nd_add_m10e(nd, ndhi, 5, nde - prec - 1);
	436	nde += (hilen != ndigits_dec(nd[ndhi]));
	437	}
	438	nde += ndebias;
	439	if ((sf & STRFMT_T_FP_F)) {
	440	/* %g */
	441	if ((int32_t)prec >= nde && nde >= -4) {
	442	if (nde < 0) ndhi = 0;
	443	prec -= nde;
	444	goto g_format_like_f;
	445	} else if (!(sf & STRFMT_F_ALT) && prec && width > 5) {
	446	/* Decrease precision in order to strip trailing zeroes. */
	447	char tail[9];
	448	uint32_t maxprec = hilen - 1 + ((ndhi - ndlo) & 0x3f) * 9;
	449	if (prec >= maxprec) prec = maxprec;
	450	else ndlo = (ndhi - (((int32_t)(prec - hilen) + 9) / 9)) & 0x3f;
	451	i = prec - hilen - (((ndhi - ndlo) & 0x3f) * 9) + 10;
	452	lj_strfmt_wuint9(tail, nd[ndlo]);
	453	while (prec && tail[--i] == '0') {
	454	prec--;
	455	if (!i) {
	456	if (ndlo == ndhi) { prec = 0; break; }
	457	lj_strfmt_wuint9(tail, nd[++ndlo]);
	458	i = 9;
	459	}
	460	}
	461	}
	462	}
	463	if (nde < 0) {
	464	/* Make nde non-negative. */
	465	eprefix = '-';
	466	nde = -nde;
	467	}
	468	len = 3 + prec + (prefix != 0) + ndigits_dec((uint32_t)nde) + (nde < 10)
	469	+ ((prec \| (sf & STRFMT_F_ALT)) != 0);
	470	if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 5);
	471	if (!(sf & (STRFMT_F_LEFT \| STRFMT_F_ZERO))) {
	472	while (width-- > len) *p++ = ' ';
	473	}
	474	if (prefix) *p++ = prefix;
	475	if ((sf & (STRFMT_F_LEFT \| STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
	476	while (width-- > len) *p++ = '0';
	477	}
	478	q = lj_strfmt_wint(p + 1, nd[ndhi]);
	479	p[0] = p[1]; /* Put leading digit in the correct place. */
	480	if ((prec \| (sf & STRFMT_F_ALT))) {
	481	/* Emit fractional part. */
	482	p[1] = '.'; p += 2;
	483	prec -= (MSize)(q - p); p = q; /* Account for digits already emitted. */
	484	/* Then emit chunks of 9 digits (this may emit 8 digits too many). */
	485	for (i = ndhi; (int32_t)prec > 0 && i != ndlo; prec -= 9) {
	486	i = (i - 1) & 0x3f;
	487	p = lj_strfmt_wuint9(p, nd[i]);
	488	}
	489	if ((sf & STRFMT_T_FP_F) && !(sf & STRFMT_F_ALT)) {
	490	/* %g (and not %#g) - strip trailing zeroes. */
	491	p += (int32_t)prec & ((int32_t)prec >> 31);
	492	while (p[-1] == '0') p--;
	493	if (p[-1] == '.') p--;
	494	} else {
	495	/* %e (or %#g) - emit trailing zeroes. */
	496	while ((int32_t)prec > 0) { *p++ = '0'; prec--; }
	497	p += (int32_t)prec;
	498	}
	499	} else {
	500	p++;
	501	}
	502	*p++ = (sf & STRFMT_F_UPPER) ? 'E' : 'e';
	503	p++ = eprefix; / + or - */
	504	if (nde < 10) p++ = '0'; / Always at least two digits of exponent. */
	505	p = lj_strfmt_wint(p, nde);
	506	} else {
	507	/* %f (or, shortly, %g in %f style) */
	508	if (prec < (MSize)(0x3f & -(int32_t)ndlo) * 9) {
	509	/* Precision is sufficiently low as to maybe require rounding. */
	510	ndhi = nd_add_m10e(nd, ndhi, 5, 0 - prec - 1);
	511	}
	512	g_format_like_f:
	513	if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT) && prec && width) {
	514	/* Decrease precision in order to strip trailing zeroes. */
	515	if (ndlo) {
	516	/* nd has a fractional part; we need to look at its digits. */
	517	char tail[9];
	518	uint32_t maxprec = (64 - ndlo) * 9;
	519	if (prec >= maxprec) prec = maxprec;
	520	else ndlo = 64 - (prec + 8) / 9;
	521	i = prec - ((63 - ndlo) * 9);
	522	lj_strfmt_wuint9(tail, nd[ndlo]);
	523	while (prec && tail[--i] == '0') {
	524	prec--;
	525	if (!i) {
	526	if (ndlo == 63) { prec = 0; break; }
	527	lj_strfmt_wuint9(tail, nd[++ndlo]);
	528	i = 9;
	529	}
	530	}
	531	} else {
	532	/* nd has no fractional part, so precision goes straight to zero. */
	533	prec = 0;
	534	}
	535	}
	536	len = ndhi * 9 + ndigits_dec(nd[ndhi]) + prec + (prefix != 0)
	537	+ ((prec \| (sf & STRFMT_F_ALT)) != 0);
	538	if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 8);
	539	if (!(sf & (STRFMT_F_LEFT \| STRFMT_F_ZERO))) {
	540	while (width-- > len) *p++ = ' ';
	541	}
	542	if (prefix) *p++ = prefix;
	543	if ((sf & (STRFMT_F_LEFT \| STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
	544	while (width-- > len) *p++ = '0';
	545	}
	546	/* Emit integer part. */
	547	p = lj_strfmt_wint(p, nd[ndhi]);
	548	i = ndhi;
	549	while (i) p = lj_strfmt_wuint9(p, nd[--i]);
	550	if ((prec \| (sf & STRFMT_F_ALT))) {
	551	/* Emit fractional part. */
	552	*p++ = '.';
	553	/* Emit chunks of 9 digits (this may emit 8 digits too many). */
	554	while ((int32_t)prec > 0 && i != ndlo) {
	555	i = (i - 1) & 0x3f;
	556	p = lj_strfmt_wuint9(p, nd[i]);
	557	prec -= 9;
	558	}
	559	if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT)) {
	560	/* %g (and not %#g) - strip trailing zeroes. */
	561	p += (int32_t)prec & ((int32_t)prec >> 31);
	562	while (p[-1] == '0') p--;
	563	if (p[-1] == '.') p--;
	564	} else {
	565	/* %f (or %#g) - emit trailing zeroes. */
	566	while ((int32_t)prec > 0) { *p++ = '0'; prec--; }
	567	p += (int32_t)prec;
	568	}
	569	}
	570	}
	571	}
	572	if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
	573	return p;
	574	}
	575
	576	/* Add formatted floating-point number to buffer. */
	577	SBuf lj_strfmt_putfnum(SBuf sb, SFormat sf, lua_Number n)
	578	{
	579	setsbufP(sb, lj_strfmt_wfnum(sb, sf, n, NULL));
	580	return sb;
	581	}
	582
	583	/* -- Conversions to strings ---------------------------------------------- */
	584
	585	/* Convert number to string. */
	586	GCstr * LJ_FASTCALL lj_strfmt_num(lua_State L, cTValue o)
	587	{
	588	char buf[STRFMT_MAXBUF_NUM];
	589	MSize len = (MSize)(lj_strfmt_wfnum(NULL, STRFMT_G14, o->n, buf) - buf);
	590	return lj_str_new(L, buf, len);
	591	}
	592