From: Denis Vlasenko Date: Sun, 15 Jun 2008 09:48:18 +0000 (-0000) Subject: further encrypt_des optimizations X-Git-Tag: 1_11_0~61 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=5cdc247ba4e813db8c2d0a05ed7ed9624ed422fa;p=oweals%2Fbusybox.git further encrypt_des optimizations function old new delta pw_encrypt 943 964 +21 des_crypt 1512 1509 -3 u_sbox 512 256 -256 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/2 up/down: 21/-259) Total: -238 bytes --- diff --git a/libbb/pw_encrypt_des.c b/libbb/pw_encrypt_des.c index 956a3e679..a13e1a5df 100644 --- a/libbb/pw_encrypt_des.c +++ b/libbb/pw_encrypt_des.c @@ -56,6 +56,14 @@ * alignment). */ + +/* Parts busybox doesn't need or had optimized */ +#define USE_PRECOMPUTED_u_sbox 1 +#define USE_REPETITIVE_SPEEDUP 0 +#define USE_ip_mask 0 +#define USE_de_keys 0 + + /* A pile of data */ static const uint8_t IP[64] = { 58, 50, 42, 34, 26, 18, 10, 2, 60, 52, 44, 36, 28, 20, 12, 4, @@ -85,57 +93,93 @@ static const uint8_t comp_perm[48] = { /* * No E box is used, as it's replaced by some ANDs, shifts, and ORs. */ - +#if !USE_PRECOMPUTED_u_sbox static const uint8_t sbox[8][64] = { - { - 14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7, + { 14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7, 0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8, 4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0, 15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13 }, - { - 15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10, + { 15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10, 3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5, 0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15, 13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9 }, - { - 10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8, + { 10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8, 13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1, 13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7, 1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12 }, - { - 7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15, + { 7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15, 13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9, 10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4, 3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14 }, - { - 2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9, + { 2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9, 14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6, 4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14, 11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3 }, - { - 12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11, + { 12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11, 10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8, 9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6, 4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13 }, - { - 4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1, + { 4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1, 13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6, 1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2, 6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12 }, - { - 13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7, + { 13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7, 1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2, 7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8, 2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11 } }; +#else /* precomputed, with half-bytes packed into one byte */ +static const uint8_t u_sbox[8][32] = { + { 0x0e, 0xf4, 0x7d, 0x41, 0xe2, 0x2f, 0xdb, 0x18, + 0xa3, 0x6a, 0xc6, 0xbc, 0x95, 0x59, 0x30, 0x87, + 0xf4, 0xc1, 0x8e, 0x28, 0x4d, 0x96, 0x12, 0x7b, + 0x5f, 0xbc, 0x39, 0xe7, 0xa3, 0x0a, 0x65, 0xd0, + }, + { 0x3f, 0xd1, 0x48, 0x7e, 0xf6, 0x2b, 0x83, 0xe4, + 0xc9, 0x07, 0x12, 0xad, 0x6c, 0x90, 0xb5, 0x5a, + 0xd0, 0x8e, 0xa7, 0x1b, 0x3a, 0xf4, 0x4d, 0x21, + 0xb5, 0x68, 0x7c, 0xc6, 0x09, 0x53, 0xe2, 0x9f, + }, + { 0xda, 0x70, 0x09, 0x9e, 0x36, 0x43, 0x6f, 0xa5, + 0x21, 0x8d, 0x5c, 0xe7, 0xcb, 0xb4, 0xf2, 0x18, + 0x1d, 0xa6, 0xd4, 0x09, 0x68, 0x9f, 0x83, 0x70, + 0x4b, 0xf1, 0xe2, 0x3c, 0xb5, 0x5a, 0x2e, 0xc7, + }, + { 0xd7, 0x8d, 0xbe, 0x53, 0x60, 0xf6, 0x09, 0x3a, + 0x41, 0x72, 0x28, 0xc5, 0x1b, 0xac, 0xe4, 0x9f, + 0x3a, 0xf6, 0x09, 0x60, 0xac, 0x1b, 0xd7, 0x8d, + 0x9f, 0x41, 0x53, 0xbe, 0xc5, 0x72, 0x28, 0xe4, + }, + { 0xe2, 0xbc, 0x24, 0xc1, 0x47, 0x7a, 0xdb, 0x16, + 0x58, 0x05, 0xf3, 0xaf, 0x3d, 0x90, 0x8e, 0x69, + 0xb4, 0x82, 0xc1, 0x7b, 0x1a, 0xed, 0x27, 0xd8, + 0x6f, 0xf9, 0x0c, 0x95, 0xa6, 0x43, 0x50, 0x3e, + }, + { 0xac, 0xf1, 0x4a, 0x2f, 0x79, 0xc2, 0x96, 0x58, + 0x60, 0x1d, 0xd3, 0xe4, 0x0e, 0xb7, 0x35, 0x8b, + 0x49, 0x3e, 0x2f, 0xc5, 0x92, 0x58, 0xfc, 0xa3, + 0xb7, 0xe0, 0x14, 0x7a, 0x61, 0x0d, 0x8b, 0xd6, + }, + { 0xd4, 0x0b, 0xb2, 0x7e, 0x4f, 0x90, 0x18, 0xad, + 0xe3, 0x3c, 0x59, 0xc7, 0x25, 0xfa, 0x86, 0x61, + 0x61, 0xb4, 0xdb, 0x8d, 0x1c, 0x43, 0xa7, 0x7e, + 0x9a, 0x5f, 0x06, 0xf8, 0xe0, 0x25, 0x39, 0xc2, + }, + { 0x1d, 0xf2, 0xd8, 0x84, 0xa6, 0x3f, 0x7b, 0x41, + 0xca, 0x59, 0x63, 0xbe, 0x05, 0xe0, 0x9c, 0x27, + 0x27, 0x1b, 0xe4, 0x71, 0x49, 0xac, 0x8e, 0xd2, + 0xf0, 0xc6, 0x9a, 0x0d, 0x3f, 0x53, 0x65, 0xb8, + }, +}; +#endif static const uint8_t pbox[32] = { 16, 7, 20, 21, 29, 12, 28, 17, 1, 15, 23, 26, 5, 18, 31, 10, @@ -180,7 +224,10 @@ ascii_to_bin(char ch) * being initialized, and therefore doesn't need to be made * reentrant. */ struct const_des_ctx { - uint8_t init_perm[64], final_perm[64]; /* referenced 2 times each */ +#if USE_ip_mask + uint8_t init_perm[64]; /* referenced 2 times */ +#endif + uint8_t final_perm[64]; /* 2 times */ uint8_t m_sbox[4][4096]; /* 5 times */ }; #define C (*cctx) @@ -191,22 +238,27 @@ struct const_des_ctx { static struct const_des_ctx* const_des_init(void) { - int i, j, b; - uint8_t u_sbox[8][64]; + unsigned i, j, b; struct const_des_ctx *cctx; +#if !USE_PRECOMPUTED_u_sbox + uint8_t u_sbox[8][64]; + cctx = xmalloc(sizeof(*cctx)); - /* - * Invert the S-boxes, reordering the input bits. - */ + /* Invert the S-boxes, reordering the input bits. */ for (i = 0; i < 8; i++) { for (j = 0; j < 64; j++) { b = (j & 0x20) | ((j & 1) << 4) | ((j >> 1) & 0xf); u_sbox[i][j] = sbox[i][b]; } } - + for (i = 0; i < 8; i++) { + fprintf(stderr, "\t{\t"); + for (j = 0; j < 64; j+=2) + fprintf(stderr, " 0x%02x,", u_sbox[i][j] + u_sbox[i][j+1]*16); + fprintf(stderr, "\n\t},\n"); + } /* * Convert the inverted S-boxes into 4 arrays of 8 bits. * Each will handle 12 bits of the S-box input. @@ -217,24 +269,45 @@ const_des_init(void) m_sbox[b][(i << 6) | j] = (uint8_t)((u_sbox[(b << 1)][i] << 4) | u_sbox[(b << 1) + 1][j]); +#else + cctx = xmalloc(sizeof(*cctx)); + + /* + * Convert the inverted S-boxes into 4 arrays of 8 bits. + * Each will handle 12 bits of the S-box input. + */ + for (b = 0; b < 4; b++) + for (i = 0; i < 64; i++) + for (j = 0; j < 64; j++) { + uint8_t lo, hi; + hi = u_sbox[(b << 1)][i / 2]; + if (!(i & 1)) + hi <<= 4; + lo = u_sbox[(b << 1) + 1][j / 2]; + if (j & 1) + lo >>= 4; + m_sbox[b][(i << 6) | j] = (hi & 0xf0) | (lo & 0x0f); + } +#endif /* * Set up the initial & final permutations into a useful form. */ for (i = 0; i < 64; i++) { final_perm[i] = IP[i] - 1; +#if USE_ip_mask init_perm[final_perm[i]] = (uint8_t)i; +#endif } return cctx; } -#define WANT_REPETITIVE_SPEEDUP 0 struct des_ctx { const struct const_des_ctx *const_ctx; uint32_t saltbits; /* referenced 5 times */ -#if WANT_REPETITIVE_SPEEDUP +#if USE_REPETITIVE_SPEEDUP uint32_t old_salt; /* 3 times */ uint32_t old_rawkey0, old_rawkey1; /* 3 times each */ #endif @@ -242,8 +315,12 @@ struct des_ctx { uint8_t inv_comp_perm[56]; /* 3 times */ uint8_t inv_key_perm[64]; /* 3 times */ uint32_t en_keysl[16], en_keysr[16]; /* 2 times each */ -// uint32_t de_keysl[16], de_keysr[16]; /* 2 times each */ +#if USE_de_keys + uint32_t de_keysl[16], de_keysr[16]; /* 2 times each */ +#endif +#if USE_ip_mask uint32_t ip_maskl[8][256], ip_maskr[8][256]; /* 9 times each */ +#endif uint32_t fp_maskl[8][256], fp_maskr[8][256]; /* 9 times each */ uint32_t key_perm_maskl[8][128], key_perm_maskr[8][128]; /* 9 times */ uint32_t comp_maskl[8][128], comp_maskr[8][128]; /* 9 times each */ @@ -260,8 +337,8 @@ struct des_ctx { #define inv_key_perm (D.inv_key_perm ) #define en_keysl (D.en_keysl ) #define en_keysr (D.en_keysr ) -//#define de_keysl (D.de_keysl ) -//#define de_keysr (D.de_keysr ) +#define de_keysl (D.de_keysl ) +#define de_keysr (D.de_keysr ) #define ip_maskl (D.ip_maskl ) #define ip_maskr (D.ip_maskr ) #define fp_maskl (D.fp_maskl ) @@ -277,14 +354,13 @@ des_init(struct des_ctx *ctx, const struct const_des_ctx *cctx) { int i, j, b, k, inbit, obit; uint32_t p; - uint32_t il, ir, fl, fr; const uint32_t *bits28, *bits24; if (!ctx) ctx = xmalloc(sizeof(*ctx)); const_ctx = cctx; -#if WANT_REPETITIVE_SPEEDUP +#if USE_REPETITIVE_SPEEDUP old_rawkey0 = old_rawkey1 = 0; old_salt = 0; #endif @@ -292,9 +368,7 @@ des_init(struct des_ctx *ctx, const struct const_des_ctx *cctx) bits28 = bits32 + 4; bits24 = bits28 + 4; - /* - * Initialise the inverted key permutation. - */ + /* Initialise the inverted key permutation. */ for (i = 0; i < 64; i++) { inv_key_perm[i] = 255; } @@ -308,9 +382,7 @@ des_init(struct des_ctx *ctx, const struct const_des_ctx *cctx) inv_comp_perm[i] = 255; } - /* - * Invert the key compression permutation. - */ + /* Invert the key compression permutation. */ for (i = 0; i < 48; i++) { inv_comp_perm[comp_perm[i] - 1] = (uint8_t)i; } @@ -320,19 +392,25 @@ des_init(struct des_ctx *ctx, const struct const_des_ctx *cctx) * and for the key initial and compression permutations. */ for (k = 0; k < 8; k++) { + uint32_t il, ir; + uint32_t fl, fr; for (i = 0; i < 256; i++) { +#if USE_ip_mask il = 0; ir = 0; +#endif fl = 0; fr = 0; for (j = 0; j < 8; j++) { inbit = 8 * k + j; if (i & bits8[j]) { +#if USE_ip_mask obit = init_perm[inbit]; if (obit < 32) il |= bits32[obit]; else ir |= bits32[obit - 32]; +#endif obit = final_perm[inbit]; if (obit < 32) fl |= bits32[obit]; @@ -340,8 +418,10 @@ des_init(struct des_ctx *ctx, const struct const_des_ctx *cctx) fr |= bits32[obit - 32]; } } +#if USE_ip_mask ip_maskl[k][i] = il; ip_maskr[k][i] = ir; +#endif fp_maskl[k][i] = fl; fp_maskr[k][i] = fr; } @@ -409,7 +489,7 @@ setup_salt(struct des_ctx *ctx, uint32_t salt) uint32_t obit, saltbit; int i; -#if WANT_REPETITIVE_SPEEDUP +#if USE_REPETITIVE_SPEEDUP if (salt == old_salt) return; old_salt = salt; @@ -435,7 +515,7 @@ des_setkey(struct des_ctx *ctx, const char *key) rawkey0 = ntohl(*(const uint32_t *) key); rawkey1 = ntohl(*(const uint32_t *) (key + 4)); -#if WANT_REPETITIVE_SPEEDUP +#if USE_REPETITIVE_SPEEDUP if ((rawkey0 | rawkey1) && rawkey0 == old_rawkey0 && rawkey1 == old_rawkey1 @@ -453,7 +533,7 @@ des_setkey(struct des_ctx *ctx, const char *key) #endif /* - * Do key permutation and split into two 28-bit subkeys. + * Do key permutation and split into two 28-bit subkeys. */ k0 = key_perm_maskl[0][rawkey0 >> 25] | key_perm_maskl[1][(rawkey0 >> 17) & 0x7f] @@ -472,7 +552,7 @@ des_setkey(struct des_ctx *ctx, const char *key) | key_perm_maskr[6][(rawkey1 >> 9) & 0x7f] | key_perm_maskr[7][(rawkey1 >> 1) & 0x7f]; /* - * Rotate subkeys and do compression permutation. + * Rotate subkeys and do compression permutation. */ shifts = 0; for (round = 0; round < 16; round++) { @@ -483,7 +563,9 @@ des_setkey(struct des_ctx *ctx, const char *key) t0 = (k0 << shifts) | (k0 >> (28 - shifts)); t1 = (k1 << shifts) | (k1 >> (28 - shifts)); -// de_keysl[15 - round] = +#if USE_de_keys + de_keysl[15 - round] = +#endif en_keysl[round] = comp_maskl[0][(t0 >> 21) & 0x7f] | comp_maskl[1][(t0 >> 14) & 0x7f] | comp_maskl[2][(t0 >> 7) & 0x7f] @@ -493,7 +575,9 @@ des_setkey(struct des_ctx *ctx, const char *key) | comp_maskl[6][(t1 >> 7) & 0x7f] | comp_maskl[7][t1 & 0x7f]; -// de_keysr[15 - round] = +#if USE_de_keys + de_keysr[15 - round] = +#endif en_keysr[round] = comp_maskr[0][(t0 >> 21) & 0x7f] | comp_maskr[1][(t0 >> 14) & 0x7f] | comp_maskr[2][(t0 >> 7) & 0x7f] @@ -519,7 +603,9 @@ do_des(struct des_ctx *ctx, /*uint32_t l_in, uint32_t r_in,*/ uint32_t *l_out, u int round; /* Do initial permutation (IP). */ -#if 0 +#if USE_ip_mask + uint32_t l_in = 0; + uint32_t r_in = 0; l = ip_maskl[0][l_in >> 24] | ip_maskl[1][(l_in >> 16) & 0xff] | ip_maskl[2][(l_in >> 8) & 0xff] @@ -588,9 +674,8 @@ do_des(struct des_ctx *ctx, /*uint32_t l_in, uint32_t r_in,*/ uint32_t *l_out, u r = l; l = f; } while (--count); - /* - * Do final permutation (inverse of IP). - */ + + /* Do final permutation (inverse of IP). */ *l_out = fp_maskl[0][l >> 24] | fp_maskl[1][(l >> 16) & 0xff] | fp_maskl[2][(l >> 8) & 0xff] @@ -613,7 +698,8 @@ do_des(struct des_ctx *ctx, /*uint32_t l_in, uint32_t r_in,*/ uint32_t *l_out, u static char * NOINLINE -des_crypt(struct des_ctx *ctx, char output[21], const unsigned char *key, const unsigned char *setting) +des_crypt(struct des_ctx *ctx, char output[DES_OUT_BUFSIZE], + const unsigned char *key, const unsigned char *setting) { uint32_t salt, l, r0, r1, keybuf[2]; uint8_t *p, *q; @@ -679,7 +765,11 @@ des_crypt(struct des_ctx *ctx, char output[21], const unsigned char *key, const return output; } -#undef WANT_REPETITIVE_SPEEDUP +#undef USE_PRECOMPUTED_u_sbox +#undef USE_REPETITIVE_SPEEDUP +#undef USE_ip_mask +#undef USE_de_keys + #undef C #undef init_perm #undef final_perm @@ -695,8 +785,8 @@ des_crypt(struct des_ctx *ctx, char output[21], const unsigned char *key, const #undef inv_key_perm #undef en_keysl #undef en_keysr -//#undef de_keysl -//#undef de_keysr +#undef de_keysl +#undef de_keysr #undef ip_maskl #undef ip_maskr #undef fp_maskl