re-implement mrb_float_to_str()

The new implementation is backwards incompatible, but I couldn't find any usage outside mruby and I also couldn't think of a different and good name. All ISO C99 printf conversion specifiers for floating point numbers and an optional precision are supported. It is largely based on code from the MIT licensed musl libc (http://www.musl-libc.org/) and its floating point printing is exact (unlike the current code behind Float#to_s).
author: cremno <[email protected]> 2015-02-13 09:33:03 +0100
committer: cremno <[email protected]> 2015-02-13 09:33:03 +0100
commit: 6f893b5183450384e89b3d8f03f9ef32a7522624 (patch)
tree: 481913e69c91974e18c4e2355ef7778f9f0ae93d
parent: 789177c8fe6c3bcb3833e76f95ecbd41b43b83fd (diff)
download: mruby-6f893b5183450384e89b3d8f03f9ef32a7522624.tar.gz
mruby-6f893b5183450384e89b3d8f03f9ef32a7522624.zip
5 files changed, 389 insertions, 153 deletions
diff --git a/include/mruby/numeric.h b/include/mruby/numeric.h
index 3020dbd93..94cdbccee 100644
--- a/include/mruby/numeric.h
+++ b/include/mruby/numeric.h
@@ -17,6 +17,8 @@ extern "C" {
 
 MRB_API mrb_value mrb_flo_to_fixnum(mrb_state *mrb, mrb_value val);
 MRB_API mrb_value mrb_fixnum_to_str(mrb_state *mrb, mrb_value x, int base);
+/* ArgumentError if format string doesn't match /%(\.[0-9]+)?[aAeEfFgG]/ */
+MRB_API mrb_value mrb_float_to_str(mrb_state *mrb, mrb_value x, const char *fmt);
 MRB_API mrb_float mrb_to_flo(mrb_state *mrb, mrb_value x);
 
 mrb_value mrb_fixnum_plus(mrb_state *mrb, mrb_value x, mrb_value y);
diff --git a/include/mruby/value.h b/include/mruby/value.h
index 9297f6b28..f27800366 100644
--- a/include/mruby/value.h
+++ b/include/mruby/value.h
@@ -34,11 +34,9 @@ struct mrb_state;
 
 #ifdef MRB_USE_FLOAT
   typedef float mrb_float;
-# define mrb_float_to_str(buf, i) sprintf(buf, "%.7e", i)
 # define str_to_mrb_float(buf) strtof(buf, NULL)
 #else
   typedef double mrb_float;
-# define mrb_float_to_str(buf, i) sprintf(buf, "%.16e", i)
 # define str_to_mrb_float(buf) strtod(buf, NULL)
 #endif
 
diff --git a/src/dump.c b/src/dump.c
index 24ddd90b8..3113a71d3 100644
--- a/src/dump.c
+++ b/src/dump.c
@@ -18,6 +18,12 @@
 
 #ifdef ENABLE_STDIO
 
+#ifdef MRB_USE_FLOAT
+#define MRB_FLOAT_FMT "%.9e"
+#else
+#define MRB_FLOAT_FMT "%.16e"
+#endif
+
 static size_t get_irep_record_size_1(mrb_state *mrb, mrb_irep *irep);
 
 #if UINT32_MAX > SIZE_MAX
@@ -111,7 +117,6 @@ get_pool_block_size(mrb_state *mrb, mrb_irep *irep)
   size_t size = 0;
   size_t pool_no;
   mrb_value str;
-  char buf[32];
 
   size += sizeof(uint32_t); /* plen */
   size += irep->plen * (sizeof(uint8_t) + sizeof(uint16_t)); /* len(n) */
@@ -130,9 +135,9 @@ get_pool_block_size(mrb_state *mrb, mrb_irep *irep)
       break;
 
     case MRB_TT_FLOAT:
+      str = mrb_float_to_str(mrb, irep->pool[pool_no], MRB_FLOAT_FMT);
       {
-        int len;
-        len = mrb_float_to_str(buf, mrb_float(irep->pool[pool_no]));
+        mrb_int len = RSTRING_LEN(str);
         mrb_assert_int_fit(mrb_int, len, size_t, SIZE_MAX);
         size += (size_t)len;
       }
@@ -163,7 +168,6 @@ write_pool_block(mrb_state *mrb, mrb_irep *irep, uint8_t *buf)
   uint16_t len;
   mrb_value str;
   const char *char_ptr;
-  char char_buf[30];
 
   cur += uint32_to_bin(irep->plen, cur); /* number of pool */
 
@@ -186,13 +190,15 @@ write_pool_block(mrb_state *mrb, mrb_irep *irep, uint8_t *buf)
 
     case MRB_TT_FLOAT:
       cur += uint8_to_bin(IREP_TT_FLOAT, cur); /* data type */
+      str = mrb_float_to_str(mrb, irep->pool[pool_no], MRB_FLOAT_FMT);
+      char_ptr = RSTRING_PTR(str);
       {
-        int tlen;
-        tlen = mrb_float_to_str(char_buf, mrb_float(irep->pool[pool_no]));
-        mrb_assert_int_fit(int, tlen, uint16_t, UINT16_MAX);
+        mrb_int tlen;
+
+        tlen = RSTRING_LEN(str);
+        mrb_assert_int_fit(mrb_int, tlen, uint16_t, UINT16_MAX);
         len = (uint16_t)tlen;
       }
-      char_ptr = &char_buf[0];
       break;
 
     case MRB_TT_STRING:
diff --git a/src/fmt_fp.c b/src/fmt_fp.c
new file mode 100644
index 000000000..9345c31d1
--- /dev/null
+++ b/src/fmt_fp.c
@@ -0,0 +1,367 @@
+/*
+
+Most code in this file originates from musl (src/stdio/vfprintf.c)
+which, just like mruby itself, is licensed under the MIT license.
+
+Copyright © 2005-2014 Rich Felker, et al.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*/
+
+#include <limits.h>
+#include <string.h>
+#include <stdint.h>
+#include <math.h>
+#include <float.h>
+#include <ctype.h>
+
+#include "mruby.h"
+#include "mruby/string.h"
+
+struct fmt_args {
+  mrb_state *mrb;
+  mrb_value str;
+};
+
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+
+/* Convenient bit representation for modifier flags, which all fall
+ * within 31 codepoints of the space character. */
+
+#define ALT_FORM   (1U<<('#'-' '))
+#define ZERO_PAD   (1U<<('0'-' '))
+#define LEFT_ADJ   (1U<<('-'-' '))
+#define PAD_POS    (1U<<(' '-' '))
+#define MARK_POS   (1U<<('+'-' '))
+
+static void
+out(struct fmt_args *f, const char *s, size_t l)
+{
+  mrb_str_cat(f->mrb, f->str, s, l);
+}
+
+static void
+pad(struct fmt_args *f, char c, int w, int l, int fl)
+{
+  char pad[256];
+  if (fl & (LEFT_ADJ | ZERO_PAD) || l >= w) return;
+  l = w - l;
+  memset(pad, c, l>sizeof pad ? sizeof pad : l);
+  for (; l >= sizeof pad; l -= sizeof pad)
+    out(f, pad, sizeof pad);
+  out(f, pad, l);
+}
+
+static const char xdigits[16] = {
+  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
+};
+
+static char*
+fmt_u(uint32_t x, char *s)
+{
+  for (; x; x /= 10) *--s = '0' + x % 10;
+  return s;
+}
+
+/* Do not override this check. The floating point printing code below
+ * depends on the float.h constants being right. If they are wrong, it
+ * may overflow the stack. */
+#if LDBL_MANT_DIG == 53
+typedef char compiler_defines_long_double_incorrectly[9-(int)sizeof(long double)];
+#endif
+
+static int
+fmt_fp(struct fmt_args *f, long double y, int w, int p, int fl, int t)
+{
+  uint32_t big[(LDBL_MANT_DIG+28)/29 + 1          // mantissa expansion
+    + (LDBL_MAX_EXP+LDBL_MANT_DIG+28+8)/9]; // exponent expansion
+  uint32_t *a, *d, *r, *z;
+  uint32_t i;
+  int e2=0, e, j, l;
+  char buf[9+LDBL_MANT_DIG/4], *s;
+  const char *prefix="-0X+0X 0X-0x+0x 0x";
+  int pl;
+  char ebuf0[3*sizeof(int)], *ebuf=&ebuf0[3*sizeof(int)], *estr;
+
+  pl=1;
+  if (signbit(y)) {
+    y=-y;
+  } else if (fl & MARK_POS) {
+    prefix+=3;
+  } else if (fl & PAD_POS) {
+    prefix+=6;
+  } else prefix++, pl=0;
+
+  if (!isfinite(y)) {
+    char *ss = (t&32)?"inf":"INF";
+    if (y!=y) ss=(t&32)?"nan":"NAN";
+    pad(f, ' ', w, 3+pl, fl&~ZERO_PAD);
+    out(f, prefix, pl);
+    out(f, ss, 3);
+    pad(f, ' ', w, 3+pl, fl^LEFT_ADJ);
+    return MAX(w, 3+pl);
+  }
+
+  y = frexpl(y, &e2) * 2;
+  if (y) e2--;
+
+  if ((t|32)=='a') {
+    long double round = 8.0;
+    int re;
+
+    if (t&32) prefix += 9;
+    pl += 2;
+
+    if (p<0 || p>=LDBL_MANT_DIG/4-1) re=0;
+    else re=LDBL_MANT_DIG/4-1-p;
+
+    if (re) {
+      while (re--) round*=16;
+      if (*prefix=='-') {
+        y=-y;
+        y-=round;
+        y+=round;
+        y=-y;
+      }
+      else {
+        y+=round;
+        y-=round;
+      }
+    }
+
+    estr=fmt_u(e2<0 ? -e2 : e2, ebuf);
+    if (estr==ebuf) *--estr='0';
+    *--estr = (e2<0 ? '-' : '+');
+    *--estr = t+('p'-'a');
+
+    s=buf;
+    do {
+      int x=y;
+      *s++=xdigits[x]|(t&32);
+      y=16*(y-x);
+      if (s-buf==1 && (y||p>0||(fl&ALT_FORM))) *s++='.';
+    } while (y);
+
+    if (p && s-buf-2 < p)
+      l = (p+2) + (ebuf-estr);
+    else
+      l = (s-buf) + (ebuf-estr);
+
+    pad(f, ' ', w, pl+l, fl);
+    out(f, prefix, pl);
+    pad(f, '0', w, pl+l, fl^ZERO_PAD);
+    out(f, buf, s-buf);
+    pad(f, '0', l-(ebuf-estr)-(s-buf), 0, 0);
+    out(f, estr, ebuf-estr);
+    pad(f, ' ', w, pl+l, fl^LEFT_ADJ);
+    return MAX(w, pl+l);
+  }
+  if (p<0) p=6;
+
+  if (y) y *= 268435456.0, e2-=28;
+
+  if (e2<0) a=r=z=big;
+  else a=r=z=big+sizeof(big)/sizeof(*big) - LDBL_MANT_DIG - 1;
+
+  do {
+    *z = y;
+    y = 1000000000*(y-*z++);
+  } while (y);
+
+  while (e2>0) {
+    uint32_t carry=0;
+    int sh=MIN(29,e2);
+    for (d=z-1; d>=a; d--) {
+      uint64_t x = ((uint64_t)*d<<sh)+carry;
+      *d = x % 1000000000;
+      carry = x / 1000000000;
+    }
+    if (carry) *--a = carry;
+    while (z>a && !z[-1]) z--;
+    e2-=sh;
+  }
+  while (e2<0) {
+    uint32_t carry=0, *b;
+    int sh=MIN(9,-e2), need=1+(p+LDBL_MANT_DIG/3+8)/9;
+    for (d=a; d<z; d++) {
+      uint32_t rm = *d & (1<<sh)-1;
+      *d = (*d>>sh) + carry;
+      carry = (1000000000>>sh) * rm;
+    }
+    if (!*a) a++;
+    if (carry) *z++ = carry;
+    /* Avoid (slow!) computation past requested precision */
+    b = (t|32)=='f' ? r : a;
+    if (z-b > need) z = b+need;
+    e2+=sh;
+  }
+
+  if (a<z) for (i=10, e=9*(r-a); *a>=i; i*=10, e++);
+  else e=0;
+
+  /* Perform rounding: j is precision after the radix (possibly neg) */
+  j = p - ((t|32)!='f')*e - ((t|32)=='g' && p);
+  if (j < 9*(z-r-1)) {
+    uint32_t x;
+    /* We avoid C's broken division of negative numbers */
+    d = r + 1 + ((j+9*LDBL_MAX_EXP)/9 - LDBL_MAX_EXP);
+    j += 9*LDBL_MAX_EXP;
+    j %= 9;
+    for (i=10, j++; j<9; i*=10, j++);
+    x = *d % i;
+    /* Are there any significant digits past j? */
+    if (x || d+1!=z) {
+      long double round = 2/LDBL_EPSILON;
+      long double small;
+      if (*d/i & 1) round += 2;
+      if (x<i/2) small=0.5;
+      else if (x==i/2 && d+1==z) small=1.0;
+      else small=1.5;
+      if (pl && *prefix=='-') round*=-1, small*=-1;
+      *d -= x;
+      /* Decide whether to round by probing round+small */
+      if (round+small != round) {
+        *d = *d + i;
+        while (*d > 999999999) {
+          *d--=0;
+          if (d<a) *--a=0;
+          (*d)++;
+        }
+        for (i=10, e=9*(r-a); *a>=i; i*=10, e++);
+      }
+    }
+    if (z>d+1) z=d+1;
+  }
+  for (; z>a && !z[-1]; z--);
+  
+  if ((t|32)=='g') {
+    if (!p) p++;
+    if (p>e && e>=-4) {
+      t--;
+      p-=e+1;
+    }
+    else {
+      t-=2;
+      p--;
+    }
+    if (!(fl&ALT_FORM)) {
+      /* Count trailing zeros in last place */
+      if (z>a && z[-1]) for (i=10, j=0; z[-1]%i==0; i*=10, j++);
+      else j=9;
+      if ((t|32)=='f')
+        p = MIN(p,MAX(0,9*(z-r-1)-j));
+      else
+        p = MIN(p,MAX(0,9*(z-r-1)+e-j));
+    }
+  }
+  l = 1 + p + (p || (fl&ALT_FORM));
+  if ((t|32)=='f') {
+    if (e>0) l+=e;
+  }
+  else {
+    estr=fmt_u(e<0 ? -e : e, ebuf);
+    while(ebuf-estr<2) *--estr='0';
+    *--estr = (e<0 ? '-' : '+');
+    *--estr = t;
+    l += ebuf-estr;
+  }
+
+  pad(f, ' ', w, pl+l, fl);
+  out(f, prefix, pl);
+  pad(f, '0', w, pl+l, fl^ZERO_PAD);
+
+  if ((t|32)=='f') {
+    if (a>r) a=r;
+    for (d=a; d<=r; d++) {
+      char *ss = fmt_u(*d, buf+9);
+      if (d!=a) while (ss>buf) *--ss='0';
+      else if (ss==buf+9) *--ss='0';
+      out(f, ss, buf+9-ss);
+    }
+    if (p || (fl&ALT_FORM)) out(f, ".", 1);
+    for (; d<z && p>0; d++, p-=9) {
+      char *ss = fmt_u(*d, buf+9);
+      while (ss>buf) *--ss='0';
+      out(f, ss, MIN(9,p));
+    }
+    pad(f, '0', p+9, 9, 0);
+  }
+  else {
+    if (z<=a) z=a+1;
+    for (d=a; d<z && p>=0; d++) {
+      char *ss = fmt_u(*d, buf+9);
+      if (ss==buf+9) *--ss='0';
+      if (d!=a) while (ss>buf) *--ss='0';
+      else {
+        out(f, ss++, 1);
+        if (p>0||(fl&ALT_FORM)) out(f, ".", 1);
+      }
+      out(f, ss, MIN(buf+9-ss, p));
+      p -= buf+9-ss;
+    }
+    pad(f, '0', p+18, 18, 0);
+    out(f, estr, ebuf-estr);
+  }
+
+  pad(f, ' ', w, pl+l, fl^LEFT_ADJ);
+
+  return MAX(w, pl+l);
+}
+
+static int
+fmt_core(struct fmt_args *f, const char *fmt, mrb_float flo)
+{
+  int p;
+
+  if (*fmt != '%') {
+    return -1;
+  }
+  ++fmt;
+
+  if (*fmt == '.') {
+    ++fmt;
+    for (p = 0; ISDIGIT(*fmt); ++fmt) {
+      p = 10 * p + (*fmt - '0');
+    }
+  }
+  else {
+    p = -1;
+  }
+
+  switch (*fmt) {
+  case 'e': case 'f': case 'g': case 'a':
+  case 'E': case 'F': case 'G': case 'A':
+    return fmt_fp(f, flo, 0, p, 0, *fmt);
+  default:
+    return -1;
+  } 
+}
+
+mrb_value
+mrb_float_to_str(mrb_state *mrb, mrb_value flo, const char *fmt)
+{
+  struct fmt_args f = { mrb, mrb_str_buf_new(mrb, 24) };
+  if (fmt_core(&f, fmt, mrb_float(flo)) < 0) {
+    mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid format string");
+  }
+  return f.str;
+}
diff --git a/src/numeric.c b/src/numeric.c
index e01e661a5..8b6ec4c88 100644
--- a/src/numeric.c
+++ b/src/numeric.c
@@ -18,13 +18,9 @@
 #define floor(f) floorf(f)
 #define ceil(f) ceilf(f)
 #define fmod(x,y) fmodf(x,y)
-#define FLO_MAX_DIGITS 7
-#define FLO_MAX_SIGN_LENGTH 3
-#define FLO_EPSILON FLT_EPSILON
+#define MRB_FLO_TO_STR_FMT "%.7g"
 #else
-#define FLO_MAX_DIGITS 14
-#define FLO_MAX_SIGN_LENGTH 10
-#define FLO_EPSILON DBL_EPSILON
+#define MRB_FLO_TO_STR_FMT "%.14g"
 #endif
 
 MRB_API mrb_float
@@ -107,142 +103,6 @@ num_div(mrb_state *mrb, mrb_value x)
  *  representation.
  */
 
-static mrb_value
-mrb_flo_to_str(mrb_state *mrb, mrb_float flo)
-{
-  double n = (double)flo;
-  int max_digits = FLO_MAX_DIGITS;
-
-  if (isnan(n)) {
-    return mrb_str_new_lit(mrb, "NaN");
-  }
-  else if (isinf(n)) {
-    if (n < 0) {
-      return mrb_str_new_lit(mrb, "-inf");
-    }
-    else {
-      return mrb_str_new_lit(mrb, "inf");
-    }
-  }
-  else {
-    int digit;
-    int m = 0;
-    int exp;
-    mrb_bool e = FALSE;
-    char s[48];
-    char *c = &s[0];
-    int length = 0;
-
-    if (signbit(n)) {
-      n = -n;
-      *(c++) = '-';
-    }
-
-    if (n != 0.0) {
-      if (n > 1.0) {
-        exp = (int)floor(log10(n));
-      }
-      else {
-        exp = (int)-ceil(-log10(n));
-      }
-    }
-    else {
-      exp = 0;
-    }
-
-    /* preserve significands */
-    if (exp < 0) {
-      int i, beg = -1, end = 0;
-      double f = n;
-      double fd = 0;
-      for (i = 0; i < FLO_MAX_DIGITS; ++i) {
-        f = (f - fd) * 10.0;
-        fd = floor(f + FLO_EPSILON);
-        if (fd != 0) {
-          if (beg < 0) beg = i;
-          end = i + 1;
-        }
-      }
-      if (beg >= 0) length = end - beg;
-      if (length > FLO_MAX_SIGN_LENGTH) length = FLO_MAX_SIGN_LENGTH;
-    }
-
-    if (abs(exp) + length >= FLO_MAX_DIGITS) {
-      /* exponent representation */
-      e = TRUE;
-      n = n / pow(10.0, exp);
-      if (isinf(n)) {
-        if (s < c) {            /* s[0] == '-' */
-          return mrb_str_new_lit(mrb, "-0.0");
-        }
-        else {
-          return mrb_str_new_lit(mrb, "0.0");
-        }
-      }
-    }
-    else {
-      /* un-exponent (normal) representation */
-      if (exp > 0) {
-        m = exp;
-      }
-    }
-
-    /* puts digits */
-    while (max_digits >= 0) {
-      double weight = (m < 0) ? 0.0 : pow(10.0, m);
-      double fdigit = (m < 0) ? n * 10.0 : n / weight;
-
-      if (fdigit < 0) fdigit = n = 0;
-      if (m < -1 && fdigit < FLO_EPSILON) {
-        if (e || exp > 0 || m <= -abs(exp)) {
-          break;
-        }
-      }
-      digit = (int)floor(fdigit + FLO_EPSILON);
-      if (m == 0 && digit > 9) {
-        n /= 10.0;
-        exp++;
-        continue;
-      }
-      *(c++) = '0' + digit;
-      n = (m < 0) ? n * 10.0 - digit : n - (digit * weight);
-      max_digits--;
-      if (m-- == 0) {
-        *(c++) = '.';
-      }
-    }
-    if (c[-1] == '0') {
-      while (&s[0] < c && c[-1] == '0') {
-        c--;
-      }
-      c++;
-    }
-
-    if (e) {
-      *(c++) = 'e';
-      if (exp > 0) {
-        *(c++) = '+';
-      }
-      else {
-        *(c++) = '-';
-        exp = -exp;
-      }
-
-      if (exp >= 100) {
-        *(c++) = '0' + exp / 100;
-        exp -= exp / 100 * 100;
-      }
-
-      *(c++) = '0' + exp / 10;
-      *(c++) = '0' + exp % 10;
-    }
-
-    *c = '\0';
-
-    return mrb_str_new(mrb, &s[0], c - &s[0]);
-  }
-}
-
 /* 15.2.9.3.16(x) */
 /*
  *  call-seq:
@@ -257,7 +117,10 @@ mrb_flo_to_str(mrb_state *mrb, mrb_float flo)
 static mrb_value
 flo_to_s(mrb_state *mrb, mrb_value flt)
 {
-  return mrb_flo_to_str(mrb, mrb_float(flt));
+  if (isnan(mrb_float(flt))) {
+    return mrb_str_new_lit(mrb, "NaN");
+  }
+  return mrb_float_to_str(mrb, flt, MRB_FLO_TO_STR_FMT);
 }
 
 /* 15.2.9.3.2  */
author	cremno <[email protected]>	2015-02-13 09:33:03 +0100
committer	cremno <[email protected]>	2015-02-13 09:33:03 +0100
commit	6f893b5183450384e89b3d8f03f9ef32a7522624 (patch)
tree	481913e69c91974e18c4e2355ef7778f9f0ae93d
parent	789177c8fe6c3bcb3833e76f95ecbd41b43b83fd (diff)
download	mruby-6f893b5183450384e89b3d8f03f9ef32a7522624.tar.gz mruby-6f893b5183450384e89b3d8f03f9ef32a7522624.zip