optimize floatscan: avoid excessive upscaling
authorRich Felker <dalias@aerifal.cx>
Wed, 11 Apr 2012 18:11:47 +0000 (14:11 -0400)
committerRich Felker <dalias@aerifal.cx>
Wed, 11 Apr 2012 18:11:47 +0000 (14:11 -0400)
upscaling by even one step too much creates 3-29 extra iterations for
the next loop. this is still suboptimal since it always goes by 2^29
rather than using a smaller upscale factor when nearing the target,
but performance on common, small-magnitude, few-digit values has
already more than doubled with this change.

more optimizations on the way...

src/internal/floatscan.c

index 3aa54082ad98fdd3d7f8bd0492f0cb843d27378f..d5444daaff8e03cf61ae93f9648e6f92129dd293 100644 (file)
@@ -138,38 +138,12 @@ static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int po
        e2 = 0;
        rp = lrp;
 
-       while (rp < 18+9*LD_B1B_DIG) {
-               uint32_t carry = 0;
-               e2 -= 29;
-               for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
-                       uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
-                       if (tmp > 1000000000) {
-                               carry = tmp / 1000000000;
-                               x[k] = tmp % 1000000000;
-                       } else {
-                               carry = 0;
-                               x[k] = tmp;
-                       }
-                       if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
-                       if (k==a) break;
-               }
-               if (carry) {
-                       rp += 9;
-                       if (a == z) {
-                               z = (z-1 & MASK);
-                               x[z-1 & MASK] |= x[z];
-                       }
-                       a = (a-1 & MASK);
-                       x[a] = carry;
-               }
-       }
-
        if (rp % 9) {
                static const int p10s[] = {
                        100000000, 10000000, 1000000, 100000,
                        10000, 1000, 100, 10
                };
-               int rpm9 = rp % 9;
+               int rpm9 = rp>=0 ? rp%9 : rp%9+9;
                int p10 = p10s[rpm9-1];
                uint32_t carry = 0;
                for (k=a; k!=z; k=(k+1 & MASK)) {
@@ -190,6 +164,32 @@ static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int po
                rp += 9-rpm9;
        }
 
+       while (rp < 9*LD_B1B_DIG || (rp == 9*LD_B1B_DIG && x[0]<th[0])) {
+               uint32_t carry = 0;
+               e2 -= 29;
+               for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
+                       uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
+                       if (tmp > 1000000000) {
+                               carry = tmp / 1000000000;
+                               x[k] = tmp % 1000000000;
+                       } else {
+                               carry = 0;
+                               x[k] = tmp;
+                       }
+                       if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
+                       if (k==a) break;
+               }
+               if (carry) {
+                       rp += 9;
+                       if (a == z) {
+                               z = (z-1 & MASK);
+                               x[z-1 & MASK] |= x[z];
+                       }
+                       a = (a-1 & MASK);
+                       x[a] = carry;
+               }
+       }
+
        for (;;) {
                uint32_t carry = 0;
                int sh = 1;