From 1d232fd44026deae94c88200766152aec9d6c5e5 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 9 Apr 2017 22:54:57 +0200 Subject: [PATCH] factor: 30% faster code (estimate max possible factor just once) function old new delta factorize - 161 +161 isqrt_odd - 102 +102 factor_main 281 110 -171 ------------------------------------------------------------------------------ (add/remove: 2/0 grow/shrink: 0/1 up/down: 263/-171) Total: 92 bytes Signed-off-by: Denys Vlasenko --- coreutils/factor.c | 106 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 82 insertions(+), 24 deletions(-) diff --git a/coreutils/factor.c b/coreutils/factor.c index 0f838a735..e01190150 100644 --- a/coreutils/factor.c +++ b/coreutils/factor.c @@ -20,65 +20,111 @@ #include "libbb.h" +#if 0 +# define dbg(...) bb_error_msg(__VA_ARGS__) +#else +# define dbg(...) ((void)0) +#endif + +typedef unsigned long long wide_t; +#define WIDE_BITS (unsigned)(sizeof(wide_t)*8) +#define TOPMOST_WIDE_BIT ((wide_t)1 << (WIDE_BITS-1)) + #if ULLONG_MAX == (UINT_MAX * UINT_MAX + 2 * UINT_MAX) /* "unsigned" is half as wide as ullong */ typedef unsigned half_t; +#define HALF_MAX UINT_MAX #define HALF_FMT "" #elif ULLONG_MAX == (ULONG_MAX * ULONG_MAX + 2 * ULONG_MAX) /* long is half as wide as ullong */ typedef unsigned long half_t; +#define HALF_MAX ULONG_MAX #define HALF_FMT "l" #else #error Cant find an integer type which is half as wide as ullong #endif -static void factorize(const char *numstr) +/* Returns such x that x+1 > sqrt(N) */ +static inline half_t isqrt(wide_t N) { - unsigned long long N, factor2; - half_t factor; - unsigned count3; + wide_t x; + unsigned c; + +// Never called with N < 1 +// if (N == 0) +// return 0; +// + /* Count leading zeros */ + c = 0; + while (!(N & TOPMOST_WIDE_BIT)) { + c++; + N <<= 1; + } + N >>= c; - /* Coreutils compat */ - numstr = skip_whitespace(numstr); - if (*numstr == '+') - numstr++; + /* Make x > sqrt(n) */ + x = (wide_t)1 << ((WIDE_BITS + 1 - c) / 2); + dbg("x:%llx", x); + for (;;) { + wide_t y = (x + N/x) / 2; + dbg("y:%llx y^2:%llx (y+1)^2:%llx]", y, y*y, (y+1)*(y+1)); + if (y >= x) { + /* Handle degenerate case N = 0xffffffffff...fffffff */ + if (y == (wide_t)HALF_MAX + 1) + y--; + dbg("isqrt(%llx)=%llx"HALF_FMT, N, y); + return y; + } + x = y; + } +} - N = bb_strtoull(numstr, NULL, 10); - if (errno) - bb_show_usage(); +static NOINLINE half_t isqrt_odd(wide_t N) +{ + half_t s = isqrt(N); + if (s && !(s & 1)) /* even? */ + s--; + return s; +} - printf("%llu:", N); +static NOINLINE void factorize(wide_t N) +{ + wide_t factor2; + half_t factor; + half_t max_factor; + unsigned count3; if (N < 4) goto end; + while (!(N & 1)) { printf(" 2"); N >>= 1; } + max_factor = isqrt_odd(N); count3 = 3; factor = 3; factor2 = 3 * 3; for (;;) { - unsigned long long nfactor2; - while ((N % factor) == 0) { N = N / factor; printf(" %u"HALF_FMT"", factor); + max_factor = isqrt_odd(N); } next_factor: - /* (f + 2)^2 = f^2 + 4*f + 4 = f^2 + 4*(f+1) */ - nfactor2 = factor2 + 4 * (factor + 1); - if (nfactor2 < factor2) /* overflow? */ - break; - factor2 = nfactor2; - if (factor2 > N) + if (factor >= max_factor) break; + /* (f + 2)^2 = f^2 + 4*f + 4 = f^2 + 4*(f+1) */ + factor2 = factor2 + 4 * (factor + 1); + /* overflow is impossible due to max_factor check */ + /* (factor2 > N) is impossible due to max_factor check */ factor += 2; /* Rudimentary wheel sieving: skip multiples of 3: * Every third odd number is divisible by three and thus isn't a prime: - * 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37... - * ^ ^ ^ ^ ^ ^ ^ _ ^ ^ _ ^ (^primes) + * 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37... + * ^ ^ ^ ^ ^ ^ ^ _ ^ ^ _ ^ + * (^ = primes, _ = would-be-primes-if-not-divisible-by-5) */ --count3; if (count3 == 0) { @@ -105,8 +151,20 @@ int factor_main(int argc UNUSED_PARAM, char **argv) bb_show_usage(); do { - factorize(*argv); + wide_t N; + const char *numstr; + + /* Coreutils compat */ + numstr = skip_whitespace(*argv); + if (*numstr == '+') + numstr++; + + N = bb_strtoull(numstr, NULL, 10); + if (errno) + bb_show_usage(); + printf("%llu:", N); + factorize(N); } while (*++argv); - fflush_stdout_and_exit(EXIT_SUCCESS); + return EXIT_SUCCESS; } -- 2.25.1