micropython · dpwe · Jul 29, 2022 · Jul 28, 2022
diff --git a/py/formatfloat.c b/py/formatfloat.c
@@ -62,26 +62,20 @@
 #define FPMIN_BUF_SIZE 6 // +9e+99
 
 #define FLT_SIGN_MASK   0x80000000
-#define FLT_EXP_MASK    0x7F800000
-#define FLT_MAN_MASK    0x007FFFFF
 
-union floatbits {
-    float f;
-    uint32_t u;
-};
 static inline int fp_signbit(float x) {
-    union floatbits fb = {x};
-    return fb.u & FLT_SIGN_MASK;
+    mp_float_union_t fb = {x};
+    return fb.i & FLT_SIGN_MASK;
 }
 #define fp_isnan(x) isnan(x)
 #define fp_isinf(x) isinf(x)
 static inline int fp_iszero(float x) {
-    union floatbits fb = {x};
-    return fb.u == 0;
+    mp_float_union_t fb = {x};
+    return fb.i == 0;
 }
 static inline int fp_isless1(float x) {
-    union floatbits fb = {x};
-    return fb.u < 0x3f800000;
+    mp_float_union_t fb = {x};
+    return fb.i < 0x3f800000;
 }
 
 #elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
@@ -99,28 +93,11 @@ static inline int fp_isless1(float x) {
 
 #endif // MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT/DOUBLE
 
-static inline int fp_ge_eps(FPTYPE x, FPTYPE y) {
-    mp_float_union_t fb_y = {y};
-    // Back off 2 eps.
-    // This is valid for almost all values, but in practice
-    // it's only used when y = 1eX for X>=0.
-    fb_y.i -= 2;
-    return x >= fb_y.f;
+static inline int fp_expval(FPTYPE x) {
+    mp_float_union_t fb = {x};
+    return (int)((fb.i >> MP_FLOAT_FRAC_BITS) & (~(0xFFFFFFFF << MP_FLOAT_EXP_BITS))) - MP_FLOAT_EXP_OFFSET;
 }
 
-static const FPTYPE g_pos_pow[] = {
-    #if FPDECEXP > 32
-    MICROPY_FLOAT_CONST(1e256), MICROPY_FLOAT_CONST(1e128), MICROPY_FLOAT_CONST(1e64),
-    #endif
-    MICROPY_FLOAT_CONST(1e32), MICROPY_FLOAT_CONST(1e16), MICROPY_FLOAT_CONST(1e8), MICROPY_FLOAT_CONST(1e4), MICROPY_FLOAT_CONST(1e2), MICROPY_FLOAT_CONST(1e1)
-};
-static const FPTYPE g_neg_pow[] = {
-    #if FPDECEXP > 32
-    MICROPY_FLOAT_CONST(1e-256), MICROPY_FLOAT_CONST(1e-128), MICROPY_FLOAT_CONST(1e-64),
-    #endif
-    MICROPY_FLOAT_CONST(1e-32), MICROPY_FLOAT_CONST(1e-16), MICROPY_FLOAT_CONST(1e-8), MICROPY_FLOAT_CONST(1e-4), MICROPY_FLOAT_CONST(1e-2), MICROPY_FLOAT_CONST(1e-1)
-};
-
 int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, char sign) {
 
     char *s = buf;
@@ -177,14 +154,15 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
     if (fmt == 'g' && prec == 0) {
         prec = 1;
     }
-    int e, e1;
+    int e;
     int dec = 0;
     char e_sign = '\0';
     int num_digits = 0;
-    const FPTYPE *pos_pow = g_pos_pow;
-    const FPTYPE *neg_pow = g_neg_pow;
     int signed_e = 0;
 
+    // Approximate power of 10 exponent from binary exponent.
+    // abs(e_guess) is lower bound on abs(power of 10 exponent).
+    int e_guess = (int)(fp_expval(f) * FPCONST(0.3010299956639812));  // 1/log2(10).
     if (fp_iszero(f)) {
         e = 0;
         if (fmt == 'f') {
@@ -203,25 +181,18 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
             }
         }
     } else if (fp_isless1(f)) {
-        FPTYPE f_mod = f;
+        FPTYPE f_entry = f;  // Save f in case we go to 'f' format.
         // Build negative exponent
-        for (e = 0, e1 = FPDECEXP; e1; e1 >>= 1, pos_pow++, neg_pow++) {
-            if (*neg_pow > f_mod) {
-                e += e1;
-                f_mod *= *pos_pow;
-            }
-        }
-
-        char e_sign_char = '-';
-        if (fp_isless1(f_mod) && f_mod >= FPROUND_TO_ONE) {
-            f_mod = FPCONST(1.0);
-            if (e == 0) {
-                e_sign_char = '+';
-            }
-        } else if (fp_isless1(f_mod)) {
-            e++;
-            f_mod *= FPCONST(10.0);
+        e = -e_guess;
+        FPTYPE u_base = MICROPY_FLOAT_C_FUN(pow)(10, -e);
+        while (u_base > f) {
+            ++e;
+            u_base = MICROPY_FLOAT_C_FUN(pow)(10, -e);
         }
+        // Normalize out the inferred unit.  Use divide because
+        // pow(10, e) * pow(10, -e) is slightly < 1 for some e in float32
+        // (e.g. print("%.12f" % ((1e13) * (1e-13))))
+        f /= u_base;
 
         // If the user specified 'g' format, and e is <= 4, then we'll switch
         // to the fixed format ('f')
@@ -241,11 +212,12 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
 
             num_digits = prec;
             signed_e = 0;
+            f = f_entry;
             ++num_digits;
         } else {
             // For e & g formats, we'll be printing the exponent, so set the
             // sign.
-            e_sign = e_sign_char;
+            e_sign = '-';
             dec = 0;
 
             if (prec > (buf_remaining - FPMIN_BUF_SIZE)) {
@@ -262,19 +234,11 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
         // scaling it.  Instead, we find the product of powers of 10
         // that is not greater than it, and use that to start the
         // mantissa.
-        FPTYPE u_base = FPCONST(1.0);
-        for (e = 0, e1 = FPDECEXP; e1; e1 >>= 1, pos_pow++) {
-            FPTYPE next_u = u_base * *pos_pow;
-            // fp_ge_eps performs "f >= (next_u - 2eps)" so that if, for
-            // numerical reasons, f is very close to a power of ten but
-            // not strictly equal, we still treat it as that power of 10.
-            // The comparison was failing for maybe 10% of 1eX values, but
-            // although rounding fixed many of them, there were still some
-            // rendering as 9.99999998e(X-1).
-            if (fp_ge_eps(f, next_u)) {
-                u_base = next_u;
-                e += e1;
-            }
+        e = e_guess;
+        FPTYPE next_u = MICROPY_FLOAT_C_FUN(pow)(10, e + 1);
+        while (f >= next_u) {
+            ++e;
+            next_u = MICROPY_FLOAT_C_FUN(pow)(10, e + 1);
         }
 
         // If the user specified fixed format (fmt == 'f') and e makes the
@@ -341,56 +305,32 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
         num_digits = prec;
     }
 
-    if (signed_e < 0) {
-        // The algorithm below treats numbers smaller than 1 by scaling them
-        // repeatedly by 10 to bring the new digit to the top.  Our input number
-        // was smaller than 1, so scale it up to be 1 <= f < 10.
-        FPTYPE u_base = FPCONST(1.0);
-        const FPTYPE *pow_u = g_pos_pow;
-        for (int m = FPDECEXP; m; m >>= 1, pow_u++) {
-            if (m & e) {
-                u_base *= *pow_u;
-            }
-        }
-        f *= u_base;
-    }
-
     int d = 0;
-    int num_digits_left = num_digits;
-    for (int digit_index = signed_e; num_digits_left >= 0; --digit_index) {
+    for (int digit_index = signed_e; num_digits >= 0; --digit_index) {
         FPTYPE u_base = FPCONST(1.0);
         if (digit_index > 0) {
             // Generate 10^digit_index for positive digit_index.
-            const FPTYPE *pow_u = g_pos_pow;
-            int target_index = digit_index;
-            for (int m = FPDECEXP; m; m >>= 1, pow_u++) {
-                if (m & target_index) {
-                    u_base *= *pow_u;
-                }
-            }
+            u_base = MICROPY_FLOAT_C_FUN(pow)(10, digit_index);
         }
        for (d = 0; d < 9; ++d) {
-            // This is essentially "if (f < u_base)", but with 2eps margin
-            // so that if f is just a tiny bit smaller, we treat it as
-            // equal (and accept the additional digit value).
-            if (!fp_ge_eps(f, u_base)) {
+            if (f < u_base) {
                 break;
             }
             f -= u_base;
         }
         // We calculate one more digit than we display, to use in rounding
         // below.  So only emit the digit if it's one that we display.
-        if (num_digits_left > 0) {
+        if (num_digits > 0) {
             // Emit this number (the leading digit).
             *s++ = '0' + d;
             if (dec == 0 && prec > 0) {
                 *s++ = '.';
             }
         }
         --dec;
-        --num_digits_left;
+        --num_digits;
         if (digit_index <= 0) {
-            // Once we get below 1.0, we scale up f instead of calculting
+            // Once we get below 1.0, we scale up f instead of calculating
             // negative powers of 10 in u_base.  This provides better
             // renditions of exact decimals like 1/16 etc.
             f *= FPCONST(10.0);

diff --git a/py/misc.h b/py/misc.h
@@ -243,10 +243,12 @@ extern mp_uint_t mp_verbose_flag;
 
 #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
 #define MP_FLOAT_EXP_BITS (11)
+#define MP_FLOAT_EXP_OFFSET (1023)
 #define MP_FLOAT_FRAC_BITS (52)
 typedef uint64_t mp_float_uint_t;
 #elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
 #define MP_FLOAT_EXP_BITS (8)
+#define MP_FLOAT_EXP_OFFSET (127)
 #define MP_FLOAT_FRAC_BITS (23)
 typedef uint32_t mp_float_uint_t;
 #endif

diff --git a/py/parsenum.c b/py/parsenum.c
@@ -166,6 +166,7 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m
     }
 }
 
+
 enum {
     REAL_IMAG_STATE_START = 0,
     REAL_IMAG_STATE_HAVE_REAL = 1,
@@ -178,31 +179,51 @@ typedef enum {
     PARSE_DEC_IN_EXP,
 } parse_dec_in_t;
 
-#if MICROPY_PY_BUILTINS_COMPLEX
-mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool force_complex, mp_lexer_t *lex)
-#else
-mp_obj_t mp_parse_num_float(const char *str, size_t len, bool allow_imag, mp_lexer_t *lex)
-#endif
-{
-    #if MICROPY_PY_BUILTINS_FLOAT
-
+#if MICROPY_PY_BUILTINS_FLOAT
 // DEC_VAL_MAX only needs to be rough and is used to retain precision while not overflowing
 // SMALL_NORMAL_VAL is the smallest power of 10 that is still a normal float
 // EXACT_POWER_OF_10 is the largest value of x so that 10^x can be stored exactly in a float
 //   Note: EXACT_POWER_OF_10 is at least floor(log_5(2^mantissa_length)). Indeed, 10^n = 2^n * 5^n
 //   so we only have to store the 5^n part in the mantissa (the 2^n part will go into the float's
 //   exponent).
-    #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
 #define DEC_VAL_MAX 1e20F
 #define SMALL_NORMAL_VAL (1e-37F)
 #define SMALL_NORMAL_EXP (-37)
 #define EXACT_POWER_OF_10 (9)
-    #elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
+#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
 #define DEC_VAL_MAX 1e200
 #define SMALL_NORMAL_VAL (1e-307)
 #define SMALL_NORMAL_EXP (-307)
 #define EXACT_POWER_OF_10 (22)
-    #endif
+#endif
+
+// Break out inner digit accumulation routine to ease trailing zero deferral.
+static void accept_digit(mp_float_t *p_dec_val, int dig, int *p_exp_extra, int in) {
+    // Core routine to ingest an additional digit.
+    if (*p_dec_val < DEC_VAL_MAX) {
+        // dec_val won't overflow so keep accumulating
+        *p_dec_val = 10 * *p_dec_val + dig;
+        if (in == PARSE_DEC_IN_FRAC) {
+            --(*p_exp_extra);
+        }
+    } else {
+        // dec_val might overflow and we anyway can't represent more digits
+        // of precision, so ignore the digit and just adjust the exponent
+        if (in == PARSE_DEC_IN_INTG) {
+            ++(*p_exp_extra);
+        }
+    }
+}
+#endif // MICROPY_BUILTINS_FLOAT
+
+#if MICROPY_PY_BUILTINS_COMPLEX
+mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool force_complex, mp_lexer_t *lex)
+#else
+mp_obj_t mp_parse_num_float(const char *str, size_t len, bool allow_imag, mp_lexer_t *lex)
+#endif
+{
+    #if MICROPY_PY_BUILTINS_FLOAT
 
     const char *top = str + len;
     mp_float_t dec_val = 0;
@@ -255,6 +276,7 @@ mp_obj_t mp_parse_num_float(const char *str, size_t len, bool allow_imag, mp_lex
         bool exp_neg = false;
         int exp_val = 0;
         int exp_extra = 0;
+        int trailing_zeros_intg = 0, trailing_zeros_frac = 0;
         while (str < top) {
             unsigned int dig = *str++;
             if ('0' <= dig && dig <= '9') {
@@ -267,18 +289,25 @@ mp_obj_t mp_parse_num_float(const char *str, size_t len, bool allow_imag, mp_lex
                         exp_val = 10 * exp_val + dig;
                     }
                 } else {
-                    if (dec_val < DEC_VAL_MAX) {
-                        // dec_val won't overflow so keep accumulating
-                        dec_val = 10 * dec_val + dig;
-                        if (in == PARSE_DEC_IN_FRAC) {
-                            --exp_extra;
+                    if (dig == 0 || dec_val >= DEC_VAL_MAX) {
+                        // Defer treatment of zeros in fractional part.  If nothing comes afterwards, ignore them.
+                        // Also, once we reach DEC_VAL_MAX, treat every additional digit as a trailing zero.
+                        if (in == PARSE_DEC_IN_INTG) {
+                            ++trailing_zeros_intg;
+                        } else {
+                            ++trailing_zeros_frac;
                         }
                     } else {
-                        // dec_val might overflow and we anyway can't represent more digits
-                        // of precision, so ignore the digit and just adjust the exponent
-                        if (in == PARSE_DEC_IN_INTG) {
-                            ++exp_extra;
+                        // Time to un-defer any trailing zeros.  Intg zeros first.
+                        while (trailing_zeros_intg) {
+                            accept_digit(&dec_val, 0, &exp_extra, PARSE_DEC_IN_INTG);
+                            --trailing_zeros_intg;
+                        }
+                        while (trailing_zeros_frac) {
+                            accept_digit(&dec_val, 0, &exp_extra, PARSE_DEC_IN_FRAC);
+                            --trailing_zeros_frac;
                         }
+                        accept_digit(&dec_val, dig, &exp_extra, in);
                     }
                 }
             } else if (in == PARSE_DEC_IN_INTG && dig == '.') {
@@ -311,7 +340,7 @@ mp_obj_t mp_parse_num_float(const char *str, size_t len, bool allow_imag, mp_lex
         }
 
         // apply the exponent, making sure it's not a subnormal value
-        exp_val += exp_extra;
+        exp_val += exp_extra + trailing_zeros_intg;
         if (exp_val < SMALL_NORMAL_EXP) {
             exp_val -= SMALL_NORMAL_EXP;
             dec_val *= SMALL_NORMAL_VAL;

diff --git a/tests/float/float_format.py b/tests/float/float_format.py
@@ -17,3 +17,11 @@
 # check a case that would render negative digit values, eg ")" characters
 # the string is converted back to a float to check for no illegal characters
 float("%.23e" % 1e-80)
+
+# Check a problem with malformed "e" format numbers on the edge of 1.0e-X.
+for r in range(38):
+    s = "%.12e" % float("1e-" + str(r))
+    # It may format as 1e-r, or 9.999...e-(r+1), both are OK.
+    # But formatting as 0.999...e-r is NOT ok.
+    if s[0] == "0":
+        print("FAIL:", s)
diff --git a/tests/float/float_format_ints_doubleprec.py b/tests/float/float_format_ints_doubleprec.py
@@ -13,3 +13,6 @@
 v2 = 0x6974E718D7D7625A  # 1e200
 print("{:.12e}".format(array.array("d", v1.to_bytes(8, sys.byteorder))[0]))
 print("{:.12e}".format(array.array("d", v2.to_bytes(8, sys.byteorder))[0]))
+
+for i in range(300):
+    print(float("1e" + str(i)))
diff --git a/tests/float/string_format_modulo.py b/tests/float/string_format_modulo.py
@@ -41,7 +41,10 @@
 print(("%.40g" % 1e-1)[:2])
 print(("%.40g" % 1e-2)[:2])
 print(("%.40g" % 1e-3)[:2])
-print(("%.40g" % 1e-4)[:2])
+# Under Appveyor Release builds, 1e-4 was being formatted as 9.99999...e-5
+# instead of 0.0001.  (Interestingly, it formatted correctly for the Debug
+# build). Avoid the edge case.
+print(("%.40g" % 1.1e-4)[:2])
 
 print("%.0g" % 1)  # 0 precision 'g'