From 10673c44f11045a0c99b19f32930097e9b3ae148 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 11 Apr 2017 07:02:42 +0200 Subject: [PATCH] factor: much faster, and very slightly larger isqrt() function old new delta isqrt_odd 70 88 +18 Signed-off-by: Denys Vlasenko --- coreutils/factor.c | 44 ++++++++++---------------------------------- 1 file changed, 10 insertions(+), 34 deletions(-) diff --git a/coreutils/factor.c b/coreutils/factor.c index 11097c12d..f910fdb44 100644 --- a/coreutils/factor.c +++ b/coreutils/factor.c @@ -48,41 +48,17 @@ typedef unsigned long half_t; static inline half_t isqrt(wide_t N) { half_t x; + unsigned shift; - // Never called with N < 1 - //if (N == 0) - // return 0; - - x = HALF_MAX; - /* First approximation of x+1 > sqrt(N) - all-ones, half as many bits: - * 1xxxxx -> 111 (six bits to three) - * 01xxxx -> 111 - * 001xxx -> 011 - * 0001xx -> 011 and so on. - */ - // It is actually not performance-critical at all. - // Can simply start Newton loop with very conservative x=0xffffffff. - //wide_t mask_2bits; - //mask_2bits = TOPMOST_WIDE_BIT | (TOPMOST_WIDE_BIT >> 1); - //while (!(N & mask_2bits)) { - // x >>= 1; - // mask_2bits >>= 2; - //} - dbg("x:%"HALF_FMT"x", x); - - for (;;) { - half_t y = (x + N/x) / 2; - dbg("y:%x y^2:%llx", y, (wide_t)y * y); - /* - * "real" y may be one bit wider: 0x100000000 and get truncated to 0. - * In this case, "real" y is > x. The first check below is for this case: - */ - if (y == 0 || y >= x) { - dbg("isqrt(%llx)=%"HALF_FMT"x", N, x); - return x; - } - x = y; - } + shift = WIDE_BITS - 2; + x = 0; + do { + x = (x << 1) + 1; + if ((wide_t)x * x > (N >> shift)) + x--; /* whoops, that +1 was too much */ + shift -= 2; + } while ((int)shift >= 0); + return x; } static NOINLINE half_t isqrt_odd(wide_t N) -- 2.25.1