ash,hush: improve randomness of $RANDOM, add easy-ish way to test it
authorDenys Vlasenko <vda.linux@googlemail.com>
Thu, 13 Mar 2014 11:52:43 +0000 (12:52 +0100)
committerDenys Vlasenko <vda.linux@googlemail.com>
Thu, 13 Mar 2014 11:52:43 +0000 (12:52 +0100)
function                                             old     new   delta
next_random                                           68     113     +45
change_random                                        103     121     +18

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
shell/random.c
shell/random.h

index 853ab085a22c1ee5b3dfae79d88fc9ac9f56f225..9a64f54b0d54b3642eee496172930246a97f1670 100644 (file)
@@ -6,17 +6,51 @@
  *
  * Licensed under GPLv2, see file LICENSE in this source tree.
  */
-#include "libbb.h"
-#include "random.h"
+
+/* For testing against dieharder, you need only random.{c,h}
+ * Howto:
+ * gcc -O2 -Wall -DRANDTEST random.c -o random
+ * ./random | dieharder -g 200 -a
+ */
+
+#if !defined RANDTEST
+
+# include "libbb.h"
+# include "random.h"
+# define RAND_BASH_MASK 0x7fff
+
+#else
+# include <stdint.h>
+# include <unistd.h>
+# include <stdio.h>
+# include <time.h>
+# define RAND_BASH_MASK 0xffffffff /* off */
+# define FAST_FUNC /* nothing */
+# define PUSH_AND_SET_FUNCTION_VISIBILITY_TO_HIDDEN /* nothing */
+# define POP_SAVED_FUNCTION_VISIBILITY /* nothing */
+# define monotonic_us() time(NULL)
+# include "random.h"
+#endif
 
 uint32_t FAST_FUNC
 next_random(random_t *rnd)
 {
-       /* Galois LFSR parameter */
-       /* Taps at 32 31 29 1: */
+       /* Galois LFSR parameter:
+        * Taps at 32 31 29 1:
+        */
        enum { MASK = 0x8000000b };
        /* Another example - taps at 32 31 30 10: */
-       /* MASK = 0x00400007 */
+       /* enum { MASK = 0x00400007 }; */
+
+       /* Xorshift parameters:
+        * Choices for a,b,c: 10,13,10; 8,9,22; 2,7,3; 23,3,24
+        * (given by algorithm author)
+        */
+        enum {
+                a = 2,
+                b = 7,
+                c = 3,
+        };
 
        uint32_t t;
 
@@ -27,18 +61,94 @@ next_random(random_t *rnd)
                INIT_RANDOM_T(rnd, getpid(), monotonic_us());
        }
 
-       /* LCG has period of 2^32 and alternating lowest bit */
+       /* LCG: period of 2^32, but quite weak:
+        * bit 0 alternates beetween 0 and 1 (pattern of length 2)
+        * bit 1 has a repeating pattern of length 4
+        * bit 2 has a repeating pattern of length 8
+        * etc...
+        */
        rnd->LCG = 1664525 * rnd->LCG + 1013904223;
-       /* Galois LFSR has period of 2^32-1 = 3 * 5 * 17 * 257 * 65537 */
+
+       /* Galois LFSR:
+        * period of 2^32-1 = 3 * 5 * 17 * 257 * 65537.
+        * Successive values are right-shifted one bit
+        * and possibly xored with a sparse constant.
+        */
        t = (rnd->galois_LFSR << 1);
        if (rnd->galois_LFSR < 0) /* if we just shifted 1 out of msb... */
                t ^= MASK;
        rnd->galois_LFSR = t;
-       /* Both are weak, combining them gives better randomness
-        * and ~2^64 period. & 0x7fff is probably bash compat
-        * for $RANDOM range. Combining with subtraction is
-        * just for fun. + and ^ would work equally well. */
-       t = (t - rnd->LCG) & 0x7fff;
 
-       return t;
+       /* http://en.wikipedia.org/wiki/Xorshift
+        * Period 2^64-1 = 3 * 715827883 * 2147483647
+        * Moderately good statistical properties:
+        * fails the following "dieharder -g 200 -a" tests:
+        *       diehard_operm5|   0
+        *         diehard_oqso|   0
+        * diehard_count_1s_byt|   0
+        *     diehard_3dsphere|   3
+        *      diehard_squeeze|   0
+        *         diehard_runs|   0
+        *         diehard_runs|   0
+        *        diehard_craps|   0
+        *        diehard_craps|   0
+        * rgb_minimum_distance|   3
+        * rgb_minimum_distance|   4
+        * rgb_minimum_distance|   5
+        *     rgb_permutations|   3
+        *     rgb_permutations|   4
+        *     rgb_permutations|   5
+        *         dab_filltree|  32
+        *         dab_filltree|  32
+        *         dab_monobit2|  12
+        */
+       t = rnd->xs64_x ^ (rnd->xs64_x << a);
+       rnd->xs64_x = rnd->xs64_y;
+       rnd->xs64_y = rnd->xs64_y ^ (rnd->xs64_y >> c) ^ t ^ (t >> b);
+
+       /* Combined LCG + Galois LFSR have 2^32 * 2^32-1 period.
+        * Strength:
+        * individually, both are extremely weak cryptographycally;
+        * when combined, they fail the following "dieharder -g 200 -a" tests:
+        *     diehard_rank_6x8|   0
+        *         diehard_oqso|   0
+        *          diehard_dna|   0
+        * diehard_count_1s_byt|   0
+        *          rgb_bitdist|   2
+        *         dab_monobit2|  12
+        *
+        * Combining them with xorshift-64 increases period to
+        * 2^32 * 2^32-1 * 2^64-1 / 3
+        * (2^32-1 and 2^64-1 have one common divisor 3, hence "/ 3" part),
+        * which is about 2^128 / 3, or in base 10 ~1.13*10^38.
+        * Strength of the combination:
+        * passes all "dieharder -g 200 -a" tests.
+        *
+        * Combining with subtraction and addition is just for fun.
+        * It does not add meaningful strength, could use xor operation instead.
+        */
+       t = rnd->galois_LFSR - rnd->LCG + rnd->xs64_y;
+
+       /* bash compat $RANDOM range: */
+       return t & RAND_BASH_MASK;
 }
+
+#ifdef RANDTEST
+static random_t rnd;
+
+int main(int argc, char **argv)
+{
+       int i;
+       int buf[4096];
+
+       for (;;) {
+               for (i = 0; i < sizeof(buf) / sizeof(buf[0]); i++) {
+                       buf[i] = next_random(&rnd);
+               }
+               write(1, buf, sizeof(buf));
+       }
+
+        return 0;
+}
+
+#endif
index 180c48abbf20abfef936953363c00913a5a279e9..c4eb44c133bb836e4b6d30ba497639444f49610b 100644 (file)
 PUSH_AND_SET_FUNCTION_VISIBILITY_TO_HIDDEN
 
 typedef struct random_t {
-       /* Random number generators */
-       int32_t galois_LFSR; /* Galois LFSR (fast but weak). signed! */
-       uint32_t LCG;        /* LCG (fast but weak) */
+       /* State of random number generators: */
+
+       /* Galois LFSR (fast but weak) */
+       int32_t galois_LFSR; /* must be signed! */
+
+       /* LCG (fast but weak) */
+       uint32_t LCG;
+
+       /* 64-bit xorshift (fast, moderate strength) */
+       uint32_t xs64_x;
+       uint32_t xs64_y;
 } random_t;
 
 #define UNINITED_RANDOM_T(rnd) \
        ((rnd)->galois_LFSR == 0)
 
 #define INIT_RANDOM_T(rnd, nonzero, v) \
-       ((rnd)->galois_LFSR = (nonzero), (rnd)->LCG = (v))
+       ((rnd)->galois_LFSR = (rnd)->xs64_x = (nonzero), (rnd)->LCG = (rnd)->xs64_y = (v))
 
 #define CLEAR_RANDOM_T(rnd) \
        ((rnd)->galois_LFSR = 0)