libarchive: open_zipped() does not need to check extensions for e.g. gzip
[oweals/busybox.git] / libbb / hash_md5_sha.c
index 4cd2244a182f4158ef20d8d7a8e9e7a15cdb90b1..3f743ac75029cebf47da0ed8b460a07ab04fa8d1 100644 (file)
@@ -56,7 +56,7 @@ static void FAST_FUNC common64_hash(md5_ctx_t *ctx, const void *buffer, size_t l
                len -= remaining;
                buffer = (const char *)buffer + remaining;
                bufpos += remaining;
-               /* clever way to do "if (bufpos != 64) break; ... ; bufpos = 0;" */
+               /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
                bufpos -= 64;
                if (bufpos != 0)
                        break;
@@ -84,7 +84,7 @@ static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed)
                        if (swap_needed)
                                t = bb_bswap_64(t);
                        /* wbuffer is suitably aligned for this */
-                       *(uint64_t *) (&ctx->wbuffer[64 - 8]) = t;
+                       *(bb__aliased_uint64_t *) (&ctx->wbuffer[64 - 8]) = t;
                }
                ctx->process_block(ctx);
                if (remaining >= 8)
@@ -190,10 +190,9 @@ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
        int i;
        uint32_t temp;
 
-# if BB_BIG_ENDIAN
-       for (i = 0; i < 16; i++)
-               words[i] = SWAP_LE32(words[i]);
-# endif
+       if (BB_BIG_ENDIAN)
+               for (i = 0; i < 16; i++)
+                       words[i] = SWAP_LE32(words[i]);
 
 # if MD5_SMALL == 3
        pc = C_array;
@@ -467,12 +466,13 @@ void FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf)
        common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN);
 
        /* The MD5 result is in little endian byte order */
-#if BB_BIG_ENDIAN
-       ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
-       ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
-       ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
-       ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
-#endif
+       if (BB_BIG_ENDIAN) {
+               ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
+               ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
+               ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
+               ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
+       }
+
        memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4);
 }
 
@@ -839,7 +839,7 @@ void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
                len -= remaining;
                buffer = (const char *)buffer + remaining;
                bufpos += remaining;
-               /* clever way to do "if (bufpos != 128) break; ... ; bufpos = 0;" */
+               /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
                bufpos -= 128;
                if (bufpos != 0)
                        break;
@@ -883,10 +883,10 @@ void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
                        uint64_t t;
                        t = ctx->total64[0] << 3;
                        t = SWAP_BE64(t);
-                       *(uint64_t *) (&ctx->wbuffer[128 - 8]) = t;
+                       *(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 8]) = t;
                        t = (ctx->total64[1] << 3) | (ctx->total64[0] >> 61);
                        t = SWAP_BE64(t);
-                       *(uint64_t *) (&ctx->wbuffer[128 - 16]) = t;
+                       *(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 16]) = t;
                }
                sha512_process_block128(ctx);
                if (remaining >= 16)
@@ -926,66 +926,65 @@ void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
 # define SHA3_SMALL CONFIG_SHA3_SMALL
 #endif
 
-#define ARCH_IS_64BIT (sizeof(long) >= sizeof(uint64_t))
-
 enum {
-       cKeccakR_SizeInBytes = 576 / 8,
-       cKeccakNumberOfRounds = 24,
+       SHA3_IBLK_BYTES = 72, /* 576 bits / 8 */
 };
 
-/* Elements should be 64-bit, but top half is always zero or 0x80000000.
- * It is encoded as a separate word below.
- * Same is true for 31th bits.
+/*
+ * In the crypto literature this function is usually called Keccak-f().
  */
-static const uint16_t KeccakF_RoundConstants[cKeccakNumberOfRounds] = {
-       0x0001UL,
-       0x8082UL,
-       0x808aUL,
-       0x8000UL,
-       0x808bUL,
-       0x0001UL,
-       0x8081UL,
-       0x8009UL,
-       0x008aUL,
-       0x0088UL,
-       0x8009UL,
-       0x000aUL,
-       0x808bUL,
-       0x008bUL,
-       0x8089UL,
-       0x8003UL,
-       0x8002UL,
-       0x0080UL,
-       0x800aUL,
-       0x000aUL,
-       0x8081UL,
-       0x8080UL,
-       0x0001UL,
-       0x8008UL
-};
-/* 0th first - 0011 0011 0000 0111 1101 1101: */
-#define KeccakF_RoundConstantBit63 ((uint32_t)(0x3307dd00))
-/* 0th first - 0001 0110 0011 1000 0001 1011: */
-#define KeccakF_RoundConstantBit31 ((uint32_t)(0x16381b00))
-
-static const uint8_t KeccakF_RotationConstants[25] = {
-       1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, 27, 41, 56, 8, 25, 43, 62,
-       18, 39, 61, 20, 44
-};
-
-static const uint8_t KeccakF_PiLane[25] = {
-       10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 15, 23, 19, 13, 12, 2, 20,
-       14, 22, 9, 6, 1
-};
+static void sha3_process_block72(uint64_t *state)
+{
+       enum { NROUNDS = 24 };
 
-static const uint8_t KeccakF_Mod5[10] = {
-       0, 1, 2, 3, 4, 0, 1, 2, 3, 4
-};
+       /* Elements should be 64-bit, but top half is always zero or 0x80000000.
+        * We encode 63rd bits in a separate word below.
+        * Same is true for 31th bits, which lets us use 16-bit table instead of 64-bit.
+        * The speed penalty is lost in the noise.
+        */
+       static const uint16_t IOTA_CONST[NROUNDS] = {
+               0x0001,
+               0x8082,
+               0x808a,
+               0x8000,
+               0x808b,
+               0x0001,
+               0x8081,
+               0x8009,
+               0x008a,
+               0x0088,
+               0x8009,
+               0x000a,
+               0x808b,
+               0x008b,
+               0x8089,
+               0x8003,
+               0x8002,
+               0x0080,
+               0x800a,
+               0x000a,
+               0x8081,
+               0x8080,
+               0x0001,
+               0x8008,
+       };
+       /* bit for CONST[0] is in msb: 0011 0011 0000 0111 1101 1101 */
+       const uint32_t IOTA_CONST_bit63 = (uint32_t)(0x3307dd00);
+       /* bit for CONST[0] is in msb: 0001 0110 0011 1000 0001 1011 */
+       const uint32_t IOTA_CONST_bit31 = (uint32_t)(0x16381b00);
+
+       static const uint8_t ROT_CONST[24] = {
+               1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
+               27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
+       };
+       static const uint8_t PI_LANE[24] = {
+               10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
+               15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
+       };
+       /*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, };*/
 
-static void KeccakF(uint64_t *state)
-{
-       uint8_t x, y;
-       int round;
+       unsigned x, y;
+       unsigned round;
 
        if (BB_BIG_ENDIAN) {
                for (x = 0; x < 25; x++) {
@@ -993,7 +992,7 @@ static void KeccakF(uint64_t *state)
                }
        }
 
-       for (round = 0; round < cKeccakNumberOfRounds; ++round) {
+       for (round = 0; round < NROUNDS; ++round) {
                /* Theta */
                {
                        uint64_t BC[10];
@@ -1003,23 +1002,16 @@ static void KeccakF(uint64_t *state)
                                        ^ state[x + 15] ^ state[x + 20];
                        }
                        /* Using 2x5 vector above eliminates the need to use
-                        * [Mod5[x+N]] index trick below to calculate (x+N) % 5,
+                        * BC[MOD5[x+N]] trick below to fetch BC[(x+N) % 5],
                         * and the code is a bit _smaller_.
                         */
                        for (x = 0; x < 5; ++x) {
                                uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1);
-                               if (SHA3_SMALL && !ARCH_IS_64BIT) {
-                                       for (y = 0; y <= 20; y += 5)
-                                               state[x + y] ^= temp;
-                               } else {
-                                       /* On 64-bit, this is also smaller,
-                                        * not only faster, than loop */
-                                       state[x] ^= temp;
-                                       state[x + 5] ^= temp;
-                                       state[x + 10] ^= temp;
-                                       state[x + 15] ^= temp;
-                                       state[x + 20] ^= temp;
-                               }
+                               state[x] ^= temp;
+                               state[x + 5] ^= temp;
+                               state[x + 10] ^= temp;
+                               state[x + 15] ^= temp;
+                               state[x + 20] ^= temp;
                        }
                }
 
@@ -1027,22 +1019,24 @@ static void KeccakF(uint64_t *state)
                if (SHA3_SMALL) {
                        uint64_t t1 = state[1];
                        for (x = 0; x < 24; ++x) {
-                               uint64_t t0 = state[KeccakF_PiLane[x]];
-                               state[KeccakF_PiLane[x]] = rotl64(t1, KeccakF_RotationConstants[x]);
+                               uint64_t t0 = state[PI_LANE[x]];
+                               state[PI_LANE[x]] = rotl64(t1, ROT_CONST[x]);
                                t1 = t0;
                        }
                } else {
                        /* Especially large benefit for 32-bit arch (75% faster):
                         * 64-bit rotations by non-constant usually are SLOW on those.
                         * We resort to unrolling here.
-                        * This optimizes out KeccakF_PiLane[] and KeccakF_RotationConstants[],
+                        * This optimizes out PI_LANE[] and ROT_CONST[],
                         * but generates 300-500 more bytes of code.
                         */
                        uint64_t t0;
                        uint64_t t1 = state[1];
 #define RhoPi_twice(x) \
-                       t0 = state[KeccakF_PiLane[x  ]]; state[KeccakF_PiLane[x  ]] = rotl64(t1, KeccakF_RotationConstants[x  ]); \
-                       t1 = state[KeccakF_PiLane[x+1]]; state[KeccakF_PiLane[x+1]] = rotl64(t0, KeccakF_RotationConstants[x+1]);
+       t0 = state[PI_LANE[x  ]]; \
+       state[PI_LANE[x  ]] = rotl64(t1, ROT_CONST[x  ]); \
+       t1 = state[PI_LANE[x+1]]; \
+       state[PI_LANE[x+1]] = rotl64(t0, ROT_CONST[x+1]);
                        RhoPi_twice(0); RhoPi_twice(2);
                        RhoPi_twice(4); RhoPi_twice(6);
                        RhoPi_twice(8); RhoPi_twice(10);
@@ -1054,38 +1048,23 @@ static void KeccakF(uint64_t *state)
 
                /* Chi */
                for (y = 0; y <= 20; y += 5) {
-                       if (SHA3_SMALL) {
-                               uint64_t BC[5];
-                               BC[0] = state[y + 0];
-                               BC[1] = state[y + 1];
-                               BC[2] = state[y + 2];
-                               BC[3] = state[y + 3];
-                               BC[4] = state[y + 4];
-                               for (x = 0; x < 5; ++x) {
-                                       state[y + x] =
-                                           BC[x] ^ ((~BC[KeccakF_Mod5[x + 1]]) &
-                                                    BC[KeccakF_Mod5[x + 2]]);
-                               }
-                       } else {
-                               /* 32-bit x86: +50 bytes code, 10% faster */
-                               uint64_t BC0, BC1, BC2, BC3, BC4;
-                               BC0 = state[y + 0];
-                               BC1 = state[y + 1];
-                               BC2 = state[y + 2];
-                               state[y + 0] = BC0 ^ ((~BC1) & BC2);
-                               BC3 = state[y + 3];
-                               state[y + 1] = BC1 ^ ((~BC2) & BC3);
-                               BC4 = state[y + 4];
-                               state[y + 2] = BC2 ^ ((~BC3) & BC4);
-                               state[y + 3] = BC3 ^ ((~BC4) & BC0);
-                               state[y + 4] = BC4 ^ ((~BC0) & BC1);
-                       }
+                       uint64_t BC0, BC1, BC2, BC3, BC4;
+                       BC0 = state[y + 0];
+                       BC1 = state[y + 1];
+                       BC2 = state[y + 2];
+                       state[y + 0] = BC0 ^ ((~BC1) & BC2);
+                       BC3 = state[y + 3];
+                       state[y + 1] = BC1 ^ ((~BC2) & BC3);
+                       BC4 = state[y + 4];
+                       state[y + 2] = BC2 ^ ((~BC3) & BC4);
+                       state[y + 3] = BC3 ^ ((~BC4) & BC0);
+                       state[y + 4] = BC4 ^ ((~BC0) & BC1);
                }
 
                /* Iota */
-               state[0] ^= KeccakF_RoundConstants[round]
-                       | (uint32_t)((KeccakF_RoundConstantBit31 << round) & 0x80000000)
-                       | (uint64_t)((KeccakF_RoundConstantBit63 << round) & 0x80000000) << 32;
+               state[0] ^= IOTA_CONST[round]
+                       | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000)
+                       | (uint64_t)((IOTA_CONST_bit63 << round) & 0x80000000) << 32;
        }
 
        if (BB_BIG_ENDIAN) {
@@ -1100,62 +1079,90 @@ void FAST_FUNC sha3_begin(sha3_ctx_t *ctx)
        memset(ctx, 0, sizeof(*ctx));
 }
 
-void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buf, size_t bytes)
+void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buffer, size_t len)
 {
-       const uint8_t *data = buf;
+#if SHA3_SMALL
+       const uint8_t *data = buffer;
+       unsigned bufpos = ctx->bytes_queued;
+
+       while (1) {
+               unsigned remaining = SHA3_IBLK_BYTES - bufpos;
+               if (remaining > len)
+                       remaining = len;
+               len -= remaining;
+               /* XOR data into buffer */
+               while (remaining != 0) {
+                       uint8_t *buf = (uint8_t*)ctx->state;
+                       buf[bufpos] ^= *data++;
+                       bufpos++;
+                       remaining--;
+               }
+               /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
+               bufpos -= SHA3_IBLK_BYTES;
+               if (bufpos != 0)
+                       break;
+               /* Buffer is filled up, process it */
+               sha3_process_block72(ctx->state);
+               /*bufpos = 0; - already is */
+       }
+       ctx->bytes_queued = bufpos + SHA3_IBLK_BYTES;
+#else
+       /* +50 bytes code size, but a bit faster because of long-sized XORs */
+       const uint8_t *data = buffer;
+       unsigned bufpos = ctx->bytes_queued;
 
        /* If already data in queue, continue queuing first */
-       while (bytes != 0 && ctx->bytes_queued != 0) {
-               uint8_t *buffer = (uint8_t*)ctx->state;
-               buffer[ctx->bytes_queued] ^= *data++;
-               bytes--;
-               ctx->bytes_queued++;
-               if (ctx->bytes_queued == cKeccakR_SizeInBytes) {
-                       KeccakF(ctx->state);
-                       ctx->bytes_queued = 0;
+       while (len != 0 && bufpos != 0) {
+               uint8_t *buf = (uint8_t*)ctx->state;
+               buf[bufpos] ^= *data++;
+               len--;
+               bufpos++;
+               if (bufpos == SHA3_IBLK_BYTES) {
+                       bufpos = 0;
+                       goto do_block;
                }
        }
 
        /* Absorb complete blocks */
-       while (bytes >= cKeccakR_SizeInBytes) {
+       while (len >= SHA3_IBLK_BYTES) {
                /* XOR data onto beginning of state[].
-                * We try to be efficient - operate on word at a time, not byte.
-                * Yet safe wrt unaligned access: can't just use "*(long*)data"...
+                * We try to be efficient - operate one word at a time, not byte.
+                * Careful wrt unaligned access: can't just use "*(long*)data"!
                 */
-               unsigned count = cKeccakR_SizeInBytes / sizeof(long);
-               long *buffer = (long*)ctx->state;
+               unsigned count = SHA3_IBLK_BYTES / sizeof(long);
+               long *buf = (long*)ctx->state;
                do {
                        long v;
                        move_from_unaligned_long(v, (long*)data);
-                       *buffer++ ^= v;
+                       *buf++ ^= v;
                        data += sizeof(long);
                } while (--count);
-
-               KeccakF(ctx->state);
-               bytes -= cKeccakR_SizeInBytes;
+               len -= SHA3_IBLK_BYTES;
+ do_block:
+               sha3_process_block72(ctx->state);
        }
 
        /* Queue remaining data bytes */
-       while (bytes != 0) {
-               uint8_t *buffer = (uint8_t*)ctx->state;
-               buffer[ctx->bytes_queued] ^= *data++;
-               ctx->bytes_queued++;
-               bytes--;
+       while (len != 0) {
+               uint8_t *buf = (uint8_t*)ctx->state;
+               buf[bufpos] ^= *data++;
+               bufpos++;
+               len--;
        }
+
+       ctx->bytes_queued = bufpos;
+#endif
 }
 
-void FAST_FUNC sha3_end(sha3_ctx_t *ctx, uint8_t *hashval)
+void FAST_FUNC sha3_end(sha3_ctx_t *ctx, void *resbuf)
 {
        /* Padding */
-       uint8_t *buffer = (uint8_t*)ctx->state;
-       /* 0 is the number of bits in last, incomplete byte
-        * (that is, zero: we never have incomplete bytes):
-        */
-       buffer[ctx->bytes_queued] ^= 1 << 0;
-       buffer[cKeccakR_SizeInBytes - 1] ^= 0x80;
+       uint8_t *buf = (uint8_t*)ctx->state;
+       buf[ctx->bytes_queued]   ^= 1;
+       buf[SHA3_IBLK_BYTES - 1] ^= 0x80;
 
-       KeccakF(ctx->state);
+       sha3_process_block72(ctx->state);
 
        /* Output */
-       memcpy(hashval, ctx->state, 64);
+       memcpy(resbuf, ctx->state, 64);
 }