libbb/hash_md5_sha.c

   1 /* vi: set sw=4 ts=4: */
   2 /*
   3  * Utility routines.
   4  *
   5  * Copyright (C) 2010 Denys Vlasenko
   6  *
   7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
   8  */
   9
  10 #include "libbb.h"
  11
  12 /* gcc 4.2.1 optimizes rotr64 better with inline than with macro
  13  * (for rotX32, there is no difference). Why? My guess is that
  14  * macro requires clever common subexpression elimination heuristics
  15  * in gcc, while inline basically forces it to happen.
  16  */
  17 //#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
  18 static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n)
  19 {
  20         return (x << n) | (x >> (32 - n));
  21 }
  22 //#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n))))
  23 static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n)
  24 {
  25         return (x >> n) | (x << (32 - n));
  26 }
  27 /* rotr64 in needed for sha512 only: */
  28 //#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n))))
  29 static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n)
  30 {
  31         return (x >> n) | (x << (64 - n));
  32 }
  33
  34
  35 /* Feed data through a temporary buffer.
  36  * The internal buffer remembers previous data until it has 64
  37  * bytes worth to pass on.
  38  */
  39 static void FAST_FUNC common64_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
  40 {
  41         unsigned bufpos = ctx->total64 & 63;
  42
  43         ctx->total64 += len;
  44
  45         while (1) {
  46                 unsigned remaining = 64 - bufpos;
  47                 if (remaining > len)
  48                         remaining = len;
  49                 /* Copy data into aligned buffer */
  50                 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
  51                 len -= remaining;
  52                 buffer = (const char *)buffer + remaining;
  53                 bufpos += remaining;
  54                 /* clever way to do "if (bufpos != 64) break; ... ; bufpos = 0;" */
  55                 bufpos -= 64;
  56                 if (bufpos != 0)
  57                         break;
  58                 /* Buffer is filled up, process it */
  59                 ctx->process_block(ctx);
  60                 /*bufpos = 0; - already is */
  61         }
  62 }
  63
  64 /* Process the remaining bytes in the buffer */
  65 static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed)
  66 {
  67         unsigned bufpos = ctx->total64 & 63;
  68         /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */
  69         ctx->wbuffer[bufpos++] = 0x80;
  70
  71         /* This loop iterates either once or twice, no more, no less */
  72         while (1) {
  73                 unsigned remaining = 64 - bufpos;
  74                 memset(ctx->wbuffer + bufpos, 0, remaining);
  75                 /* Do we have enough space for the length count? */
  76                 if (remaining >= 8) {
  77                         /* Store the 64-bit counter of bits in the buffer */
  78                         uint64_t t = ctx->total64 << 3;
  79                         if (swap_needed)
  80                                 t = bb_bswap_64(t);
  81                         /* wbuffer is suitably aligned for this */
  82                         *(uint64_t *) (&ctx->wbuffer[64 - 8]) = t;
  83                 }
  84                 ctx->process_block(ctx);
  85                 if (remaining >= 8)
  86                         break;
  87                 bufpos = 0;
  88         }
  89 }
  90
  91
  92 /*
  93  * Compute MD5 checksum of strings according to the
  94  * definition of MD5 in RFC 1321 from April 1992.
  95  *
  96  * Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
  97  *
  98  * Copyright (C) 1995-1999 Free Software Foundation, Inc.
  99  * Copyright (C) 2001 Manuel Novoa III
 100  * Copyright (C) 2003 Glenn L. McGrath
 101  * Copyright (C) 2003 Erik Andersen
 102  *
 103  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
 104  */
 105
 106 /* 0: fastest, 3: smallest */
 107 #if CONFIG_MD5_SIZE_VS_SPEED < 0
 108 # define MD5_SIZE_VS_SPEED 0
 109 #elif CONFIG_MD5_SIZE_VS_SPEED > 3
 110 # define MD5_SIZE_VS_SPEED 3
 111 #else
 112 # define MD5_SIZE_VS_SPEED CONFIG_MD5_SIZE_VS_SPEED
 113 #endif
 114
 115 /* These are the four functions used in the four steps of the MD5 algorithm
 116  * and defined in the RFC 1321.  The first function is a little bit optimized
 117  * (as found in Colin Plumbs public domain implementation).
 118  * #define FF(b, c, d) ((b & c) | (~b & d))
 119  */
 120 #undef FF
 121 #undef FG
 122 #undef FH
 123 #undef FI
 124 #define FF(b, c, d) (d ^ (b & (c ^ d)))
 125 #define FG(b, c, d) FF(d, b, c)
 126 #define FH(b, c, d) (b ^ c ^ d)
 127 #define FI(b, c, d) (c ^ (b | ~d))
 128
 129 /* Hash a single block, 64 bytes long and 4-byte aligned */
 130 static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
 131 {
 132 #if MD5_SIZE_VS_SPEED > 0
 133         /* Before we start, one word to the strange constants.
 134            They are defined in RFC 1321 as
 135            T[i] = (int)(4294967296.0 * fabs(sin(i))), i=1..64
 136          */
 137         static const uint32_t C_array[] = {
 138                 /* round 1 */
 139                 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
 140                 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
 141                 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
 142                 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
 143                 /* round 2 */
 144                 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
 145                 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
 146                 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
 147                 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
 148                 /* round 3 */
 149                 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
 150                 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
 151                 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
 152                 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
 153                 /* round 4 */
 154                 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
 155                 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
 156                 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
 157                 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
 158         };
 159         static const char P_array[] ALIGN1 = {
 160 # if MD5_SIZE_VS_SPEED > 1
 161                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,   /* 1 */
 162 # endif
 163                 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12,   /* 2 */
 164                 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2,   /* 3 */
 165                 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9    /* 4 */
 166         };
 167 #endif
 168         uint32_t *words = (void*) ctx->wbuffer;
 169         uint32_t A = ctx->hash[0];
 170         uint32_t B = ctx->hash[1];
 171         uint32_t C = ctx->hash[2];
 172         uint32_t D = ctx->hash[3];
 173
 174 #if MD5_SIZE_VS_SPEED >= 2  /* 2 or 3 */
 175
 176         static const char S_array[] ALIGN1 = {
 177                 7, 12, 17, 22,
 178                 5, 9, 14, 20,
 179                 4, 11, 16, 23,
 180                 6, 10, 15, 21
 181         };
 182         const uint32_t *pc;
 183         const char *pp;
 184         const char *ps;
 185         int i;
 186         uint32_t temp;
 187
 188 # if BB_BIG_ENDIAN
 189         for (i = 0; i < 16; i++)
 190                 words[i] = SWAP_LE32(words[i]);
 191 # endif
 192
 193 # if MD5_SIZE_VS_SPEED == 3
 194         pc = C_array;
 195         pp = P_array;
 196         ps = S_array - 4;
 197
 198         for (i = 0; i < 64; i++) {
 199                 if ((i & 0x0f) == 0)
 200                         ps += 4;
 201                 temp = A;
 202                 switch (i >> 4) {
 203                 case 0:
 204                         temp += FF(B, C, D);
 205                         break;
 206                 case 1:
 207                         temp += FG(B, C, D);
 208                         break;
 209                 case 2:
 210                         temp += FH(B, C, D);
 211                         break;
 212                 case 3:
 213                         temp += FI(B, C, D);
 214                 }
 215                 temp += words[(int) (*pp++)] + *pc++;
 216                 temp = rotl32(temp, ps[i & 3]);
 217                 temp += B;
 218                 A = D;
 219                 D = C;
 220                 C = B;
 221                 B = temp;
 222         }
 223 # else  /* MD5_SIZE_VS_SPEED == 2 */
 224         pc = C_array;
 225         pp = P_array;
 226         ps = S_array;
 227
 228         for (i = 0; i < 16; i++) {
 229                 temp = A + FF(B, C, D) + words[(int) (*pp++)] + *pc++;
 230                 temp = rotl32(temp, ps[i & 3]);
 231                 temp += B;
 232                 A = D;
 233                 D = C;
 234                 C = B;
 235                 B = temp;
 236         }
 237         ps += 4;
 238         for (i = 0; i < 16; i++) {
 239                 temp = A + FG(B, C, D) + words[(int) (*pp++)] + *pc++;
 240                 temp = rotl32(temp, ps[i & 3]);
 241                 temp += B;
 242                 A = D;
 243                 D = C;
 244                 C = B;
 245                 B = temp;
 246         }
 247         ps += 4;
 248         for (i = 0; i < 16; i++) {
 249                 temp = A + FH(B, C, D) + words[(int) (*pp++)] + *pc++;
 250                 temp = rotl32(temp, ps[i & 3]);
 251                 temp += B;
 252                 A = D;
 253                 D = C;
 254                 C = B;
 255                 B = temp;
 256         }
 257         ps += 4;
 258         for (i = 0; i < 16; i++) {
 259                 temp = A + FI(B, C, D) + words[(int) (*pp++)] + *pc++;
 260                 temp = rotl32(temp, ps[i & 3]);
 261                 temp += B;
 262                 A = D;
 263                 D = C;
 264                 C = B;
 265                 B = temp;
 266         }
 267 # endif
 268         /* Add checksum to the starting values */
 269         ctx->hash[0] += A;
 270         ctx->hash[1] += B;
 271         ctx->hash[2] += C;
 272         ctx->hash[3] += D;
 273
 274 #else  /* MD5_SIZE_VS_SPEED == 0 or 1 */
 275
 276         uint32_t A_save = A;
 277         uint32_t B_save = B;
 278         uint32_t C_save = C;
 279         uint32_t D_save = D;
 280 # if MD5_SIZE_VS_SPEED == 1
 281         const uint32_t *pc;
 282         const char *pp;
 283         int i;
 284 # endif
 285
 286         /* First round: using the given function, the context and a constant
 287            the next context is computed.  Because the algorithm's processing
 288            unit is a 32-bit word and it is determined to work on words in
 289            little endian byte order we perhaps have to change the byte order
 290            before the computation.  To reduce the work for the next steps
 291            we save swapped words in WORDS array.  */
 292 # undef OP
 293 # define OP(a, b, c, d, s, T) \
 294         do { \
 295                 a += FF(b, c, d) + (*words IF_BIG_ENDIAN(= SWAP_LE32(*words))) + T; \
 296                 words++; \
 297                 a = rotl32(a, s); \
 298                 a += b; \
 299         } while (0)
 300
 301         /* Round 1 */
 302 # if MD5_SIZE_VS_SPEED == 1
 303         pc = C_array;
 304         for (i = 0; i < 4; i++) {
 305                 OP(A, B, C, D, 7, *pc++);
 306                 OP(D, A, B, C, 12, *pc++);
 307                 OP(C, D, A, B, 17, *pc++);
 308                 OP(B, C, D, A, 22, *pc++);
 309         }
 310 # else
 311         OP(A, B, C, D, 7, 0xd76aa478);
 312         OP(D, A, B, C, 12, 0xe8c7b756);
 313         OP(C, D, A, B, 17, 0x242070db);
 314         OP(B, C, D, A, 22, 0xc1bdceee);
 315         OP(A, B, C, D, 7, 0xf57c0faf);
 316         OP(D, A, B, C, 12, 0x4787c62a);
 317         OP(C, D, A, B, 17, 0xa8304613);
 318         OP(B, C, D, A, 22, 0xfd469501);
 319         OP(A, B, C, D, 7, 0x698098d8);
 320         OP(D, A, B, C, 12, 0x8b44f7af);
 321         OP(C, D, A, B, 17, 0xffff5bb1);
 322         OP(B, C, D, A, 22, 0x895cd7be);
 323         OP(A, B, C, D, 7, 0x6b901122);
 324         OP(D, A, B, C, 12, 0xfd987193);
 325         OP(C, D, A, B, 17, 0xa679438e);
 326         OP(B, C, D, A, 22, 0x49b40821);
 327 # endif
 328         words -= 16;
 329
 330         /* For the second to fourth round we have the possibly swapped words
 331            in WORDS.  Redefine the macro to take an additional first
 332            argument specifying the function to use.  */
 333 # undef OP
 334 # define OP(f, a, b, c, d, k, s, T) \
 335         do { \
 336                 a += f(b, c, d) + words[k] + T; \
 337                 a = rotl32(a, s); \
 338                 a += b; \
 339         } while (0)
 340
 341         /* Round 2 */
 342 # if MD5_SIZE_VS_SPEED == 1
 343         pp = P_array;
 344         for (i = 0; i < 4; i++) {
 345                 OP(FG, A, B, C, D, (int) (*pp++), 5, *pc++);
 346                 OP(FG, D, A, B, C, (int) (*pp++), 9, *pc++);
 347                 OP(FG, C, D, A, B, (int) (*pp++), 14, *pc++);
 348                 OP(FG, B, C, D, A, (int) (*pp++), 20, *pc++);
 349         }
 350 # else
 351         OP(FG, A, B, C, D, 1, 5, 0xf61e2562);
 352         OP(FG, D, A, B, C, 6, 9, 0xc040b340);
 353         OP(FG, C, D, A, B, 11, 14, 0x265e5a51);
 354         OP(FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
 355         OP(FG, A, B, C, D, 5, 5, 0xd62f105d);
 356         OP(FG, D, A, B, C, 10, 9, 0x02441453);
 357         OP(FG, C, D, A, B, 15, 14, 0xd8a1e681);
 358         OP(FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
 359         OP(FG, A, B, C, D, 9, 5, 0x21e1cde6);
 360         OP(FG, D, A, B, C, 14, 9, 0xc33707d6);
 361         OP(FG, C, D, A, B, 3, 14, 0xf4d50d87);
 362         OP(FG, B, C, D, A, 8, 20, 0x455a14ed);
 363         OP(FG, A, B, C, D, 13, 5, 0xa9e3e905);
 364         OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8);
 365         OP(FG, C, D, A, B, 7, 14, 0x676f02d9);
 366         OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
 367 # endif
 368
 369         /* Round 3 */
 370 # if MD5_SIZE_VS_SPEED == 1
 371         for (i = 0; i < 4; i++) {
 372                 OP(FH, A, B, C, D, (int) (*pp++), 4, *pc++);
 373                 OP(FH, D, A, B, C, (int) (*pp++), 11, *pc++);
 374                 OP(FH, C, D, A, B, (int) (*pp++), 16, *pc++);
 375                 OP(FH, B, C, D, A, (int) (*pp++), 23, *pc++);
 376         }
 377 # else
 378         OP(FH, A, B, C, D, 5, 4, 0xfffa3942);
 379         OP(FH, D, A, B, C, 8, 11, 0x8771f681);
 380         OP(FH, C, D, A, B, 11, 16, 0x6d9d6122);
 381         OP(FH, B, C, D, A, 14, 23, 0xfde5380c);
 382         OP(FH, A, B, C, D, 1, 4, 0xa4beea44);
 383         OP(FH, D, A, B, C, 4, 11, 0x4bdecfa9);
 384         OP(FH, C, D, A, B, 7, 16, 0xf6bb4b60);
 385         OP(FH, B, C, D, A, 10, 23, 0xbebfbc70);
 386         OP(FH, A, B, C, D, 13, 4, 0x289b7ec6);
 387         OP(FH, D, A, B, C, 0, 11, 0xeaa127fa);
 388         OP(FH, C, D, A, B, 3, 16, 0xd4ef3085);
 389         OP(FH, B, C, D, A, 6, 23, 0x04881d05);
 390         OP(FH, A, B, C, D, 9, 4, 0xd9d4d039);
 391         OP(FH, D, A, B, C, 12, 11, 0xe6db99e5);
 392         OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8);
 393         OP(FH, B, C, D, A, 2, 23, 0xc4ac5665);
 394 # endif
 395
 396         /* Round 4 */
 397 # if MD5_SIZE_VS_SPEED == 1
 398         for (i = 0; i < 4; i++) {
 399                 OP(FI, A, B, C, D, (int) (*pp++), 6, *pc++);
 400                 OP(FI, D, A, B, C, (int) (*pp++), 10, *pc++);
 401                 OP(FI, C, D, A, B, (int) (*pp++), 15, *pc++);
 402                 OP(FI, B, C, D, A, (int) (*pp++), 21, *pc++);
 403         }
 404 # else
 405         OP(FI, A, B, C, D, 0, 6, 0xf4292244);
 406         OP(FI, D, A, B, C, 7, 10, 0x432aff97);
 407         OP(FI, C, D, A, B, 14, 15, 0xab9423a7);
 408         OP(FI, B, C, D, A, 5, 21, 0xfc93a039);
 409         OP(FI, A, B, C, D, 12, 6, 0x655b59c3);
 410         OP(FI, D, A, B, C, 3, 10, 0x8f0ccc92);
 411         OP(FI, C, D, A, B, 10, 15, 0xffeff47d);
 412         OP(FI, B, C, D, A, 1, 21, 0x85845dd1);
 413         OP(FI, A, B, C, D, 8, 6, 0x6fa87e4f);
 414         OP(FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
 415         OP(FI, C, D, A, B, 6, 15, 0xa3014314);
 416         OP(FI, B, C, D, A, 13, 21, 0x4e0811a1);
 417         OP(FI, A, B, C, D, 4, 6, 0xf7537e82);
 418         OP(FI, D, A, B, C, 11, 10, 0xbd3af235);
 419         OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
 420         OP(FI, B, C, D, A, 9, 21, 0xeb86d391);
 421 # undef OP
 422 # endif
 423         /* Add checksum to the starting values */
 424         ctx->hash[0] = A_save + A;
 425         ctx->hash[1] = B_save + B;
 426         ctx->hash[2] = C_save + C;
 427         ctx->hash[3] = D_save + D;
 428 #endif
 429 }
 430 #undef FF
 431 #undef FG
 432 #undef FH
 433 #undef FI
 434
 435 /* Initialize structure containing state of computation.
 436  * (RFC 1321, 3.3: Step 3)
 437  */
 438 void FAST_FUNC md5_begin(md5_ctx_t *ctx)
 439 {
 440         ctx->hash[0] = 0x67452301;
 441         ctx->hash[1] = 0xefcdab89;
 442         ctx->hash[2] = 0x98badcfe;
 443         ctx->hash[3] = 0x10325476;
 444         ctx->total64 = 0;
 445         ctx->process_block = md5_process_block64;
 446 }
 447
 448 /* Used also for sha1 and sha256 */
 449 void FAST_FUNC md5_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
 450 {
 451         common64_hash(ctx, buffer, len);
 452 }
 453
 454 /* Process the remaining bytes in the buffer and put result from CTX
 455  * in first 16 bytes following RESBUF.  The result is always in little
 456  * endian byte order, so that a byte-wise output yields to the wanted
 457  * ASCII representation of the message digest.
 458  */
 459 void FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf)
 460 {
 461         /* MD5 stores total in LE, need to swap on BE arches: */
 462         common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN);
 463
 464         /* The MD5 result is in little endian byte order */
 465 #if BB_BIG_ENDIAN
 466         ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
 467         ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
 468         ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
 469         ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
 470 #endif
 471         memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4);
 472 }
 473
 474
 475 /*
 476  * Based on shasum from http://www.netsw.org/crypto/hash/
 477  * Majorly hacked up to use Dr Brian Gladman's sha1 code
 478  *
 479  * Copyright (C) 2002 Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
 480  * Copyright (C) 2003 Glenn L. McGrath
 481  * Copyright (C) 2003 Erik Andersen
 482  *
 483  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
 484  *
 485  * ---------------------------------------------------------------------------
 486  * Issue Date: 10/11/2002
 487  *
 488  * This is a byte oriented version of SHA1 that operates on arrays of bytes
 489  * stored in memory. It runs at 22 cycles per byte on a Pentium P4 processor
 490  *
 491  * ---------------------------------------------------------------------------
 492  *
 493  * SHA256 and SHA512 parts are:
 494  * Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>.
 495  * Shrank by Denys Vlasenko.
 496  *
 497  * ---------------------------------------------------------------------------
 498  *
 499  * The best way to test random blocksizes is to go to coreutils/md5_sha1_sum.c
 500  * and replace "4096" with something like "2000 + time(NULL) % 2097",
 501  * then rebuild and compare "shaNNNsum bigfile" results.
 502  */
 503
 504 static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
 505 {
 506         unsigned t;
 507         uint32_t W[80], a, b, c, d, e;
 508         const uint32_t *words = (uint32_t*) ctx->wbuffer;
 509
 510         for (t = 0; t < 16; ++t)
 511                 W[t] = SWAP_BE32(words[t]);
 512         for (/*t = 16*/; t < 80; ++t) {
 513                 uint32_t T = W[t - 3] ^ W[t - 8] ^ W[t - 14] ^ W[t - 16];
 514                 W[t] = rotl32(T, 1);
 515         }
 516
 517         a = ctx->hash[0];
 518         b = ctx->hash[1];
 519         c = ctx->hash[2];
 520         d = ctx->hash[3];
 521         e = ctx->hash[4];
 522
 523 #undef ch
 524 #undef parity
 525 #undef maj
 526 #undef rnd
 527 #define ch(x,y,z)        ((z) ^ ((x) & ((y) ^ (z))))
 528 #define parity(x,y,z)    ((x) ^ (y) ^ (z))
 529 #define maj(x,y,z)       (((x) & (y)) | ((z) & ((x) | (y))))
 530 /* A normal version as set out in the FIPS.  */
 531 #define rnd(f,k) \
 532         do { \
 533                 uint32_t T = a; \
 534                 a = rotl32(a, 5) + f(b, c, d) + e + k + W[t]; \
 535                 e = d; \
 536                 d = c; \
 537                 c = rotl32(b, 30); \
 538                 b = T; \
 539         } while (0)
 540
 541         for (t = 0; t < 20; ++t)
 542                 rnd(ch, 0x5a827999);
 543
 544         for (/*t = 20*/; t < 40; ++t)
 545                 rnd(parity, 0x6ed9eba1);
 546
 547         for (/*t = 40*/; t < 60; ++t)
 548                 rnd(maj, 0x8f1bbcdc);
 549
 550         for (/*t = 60*/; t < 80; ++t)
 551                 rnd(parity, 0xca62c1d6);
 552 #undef ch
 553 #undef parity
 554 #undef maj
 555 #undef rnd
 556
 557         ctx->hash[0] += a;
 558         ctx->hash[1] += b;
 559         ctx->hash[2] += c;
 560         ctx->hash[3] += d;
 561         ctx->hash[4] += e;
 562 }
 563
 564 /* Constants for SHA512 from FIPS 180-2:4.2.3.
 565  * SHA256 constants from FIPS 180-2:4.2.2
 566  * are the most significant half of first 64 elements
 567  * of the same array.
 568  */
 569 static const uint64_t sha_K[80] = {
 570         0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
 571         0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
 572         0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
 573         0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
 574         0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
 575         0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
 576         0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
 577         0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
 578         0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
 579         0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
 580         0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
 581         0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
 582         0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
 583         0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
 584         0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
 585         0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
 586         0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
 587         0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
 588         0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
 589         0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
 590         0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
 591         0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
 592         0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
 593         0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
 594         0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
 595         0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
 596         0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
 597         0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
 598         0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
 599         0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
 600         0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
 601         0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
 602         0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, /* [64]+ are used for sha512 only */
 603         0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
 604         0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
 605         0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
 606         0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
 607         0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
 608         0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
 609         0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
 610 };
 611
 612 #undef Ch
 613 #undef Maj
 614 #undef S0
 615 #undef S1
 616 #undef R0
 617 #undef R1
 618
 619 static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
 620 {
 621         unsigned t;
 622         uint32_t W[64], a, b, c, d, e, f, g, h;
 623         const uint32_t *words = (uint32_t*) ctx->wbuffer;
 624
 625         /* Operators defined in FIPS 180-2:4.1.2.  */
 626 #define Ch(x, y, z) ((x & y) ^ (~x & z))
 627 #define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
 628 #define S0(x) (rotr32(x, 2) ^ rotr32(x, 13) ^ rotr32(x, 22))
 629 #define S1(x) (rotr32(x, 6) ^ rotr32(x, 11) ^ rotr32(x, 25))
 630 #define R0(x) (rotr32(x, 7) ^ rotr32(x, 18) ^ (x >> 3))
 631 #define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
 632
 633         /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2.  */
 634         for (t = 0; t < 16; ++t)
 635                 W[t] = SWAP_BE32(words[t]);
 636         for (/*t = 16*/; t < 64; ++t)
 637                 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
 638
 639         a = ctx->hash[0];
 640         b = ctx->hash[1];
 641         c = ctx->hash[2];
 642         d = ctx->hash[3];
 643         e = ctx->hash[4];
 644         f = ctx->hash[5];
 645         g = ctx->hash[6];
 646         h = ctx->hash[7];
 647
 648         /* The actual computation according to FIPS 180-2:6.2.2 step 3.  */
 649         for (t = 0; t < 64; ++t) {
 650                 /* Need to fetch upper half of sha_K[t]
 651                  * (I hope compiler is clever enough to just fetch
 652                  * upper half)
 653                  */
 654                 uint32_t K_t = sha_K[t] >> 32;
 655                 uint32_t T1 = h + S1(e) + Ch(e, f, g) + K_t + W[t];
 656                 uint32_t T2 = S0(a) + Maj(a, b, c);
 657                 h = g;
 658                 g = f;
 659                 f = e;
 660                 e = d + T1;
 661                 d = c;
 662                 c = b;
 663                 b = a;
 664                 a = T1 + T2;
 665         }
 666 #undef Ch
 667 #undef Maj
 668 #undef S0
 669 #undef S1
 670 #undef R0
 671 #undef R1
 672         /* Add the starting values of the context according to FIPS 180-2:6.2.2
 673            step 4.  */
 674         ctx->hash[0] += a;
 675         ctx->hash[1] += b;
 676         ctx->hash[2] += c;
 677         ctx->hash[3] += d;
 678         ctx->hash[4] += e;
 679         ctx->hash[5] += f;
 680         ctx->hash[6] += g;
 681         ctx->hash[7] += h;
 682 }
 683
 684 static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
 685 {
 686         unsigned t;
 687         uint64_t W[80];
 688         /* On i386, having assignments here (not later as sha256 does)
 689          * produces 99 bytes smaller code with gcc 4.3.1
 690          */
 691         uint64_t a = ctx->hash[0];
 692         uint64_t b = ctx->hash[1];
 693         uint64_t c = ctx->hash[2];
 694         uint64_t d = ctx->hash[3];
 695         uint64_t e = ctx->hash[4];
 696         uint64_t f = ctx->hash[5];
 697         uint64_t g = ctx->hash[6];
 698         uint64_t h = ctx->hash[7];
 699         const uint64_t *words = (uint64_t*) ctx->wbuffer;
 700
 701         /* Operators defined in FIPS 180-2:4.1.2.  */
 702 #define Ch(x, y, z) ((x & y) ^ (~x & z))
 703 #define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
 704 #define S0(x) (rotr64(x, 28) ^ rotr64(x, 34) ^ rotr64(x, 39))
 705 #define S1(x) (rotr64(x, 14) ^ rotr64(x, 18) ^ rotr64(x, 41))
 706 #define R0(x) (rotr64(x, 1) ^ rotr64(x, 8) ^ (x >> 7))
 707 #define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
 708
 709         /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2.  */
 710         for (t = 0; t < 16; ++t)
 711                 W[t] = SWAP_BE64(words[t]);
 712         for (/*t = 16*/; t < 80; ++t)
 713                 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
 714
 715         /* The actual computation according to FIPS 180-2:6.3.2 step 3.  */
 716         for (t = 0; t < 80; ++t) {
 717                 uint64_t T1 = h + S1(e) + Ch(e, f, g) + sha_K[t] + W[t];
 718                 uint64_t T2 = S0(a) + Maj(a, b, c);
 719                 h = g;
 720                 g = f;
 721                 f = e;
 722                 e = d + T1;
 723                 d = c;
 724                 c = b;
 725                 b = a;
 726                 a = T1 + T2;
 727         }
 728 #undef Ch
 729 #undef Maj
 730 #undef S0
 731 #undef S1
 732 #undef R0
 733 #undef R1
 734         /* Add the starting values of the context according to FIPS 180-2:6.3.2
 735            step 4.  */
 736         ctx->hash[0] += a;
 737         ctx->hash[1] += b;
 738         ctx->hash[2] += c;
 739         ctx->hash[3] += d;
 740         ctx->hash[4] += e;
 741         ctx->hash[5] += f;
 742         ctx->hash[6] += g;
 743         ctx->hash[7] += h;
 744 }
 745
 746
 747 void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
 748 {
 749         ctx->hash[0] = 0x67452301;
 750         ctx->hash[1] = 0xefcdab89;
 751         ctx->hash[2] = 0x98badcfe;
 752         ctx->hash[3] = 0x10325476;
 753         ctx->hash[4] = 0xc3d2e1f0;
 754         ctx->total64 = 0;
 755         ctx->process_block = sha1_process_block64;
 756 }
 757
 758 static const uint32_t init256[] = {
 759         0,
 760         0,
 761         0x6a09e667,
 762         0xbb67ae85,
 763         0x3c6ef372,
 764         0xa54ff53a,
 765         0x510e527f,
 766         0x9b05688c,
 767         0x1f83d9ab,
 768         0x5be0cd19,
 769 };
 770 static const uint32_t init512_lo[] = {
 771         0,
 772         0,
 773         0xf3bcc908,
 774         0x84caa73b,
 775         0xfe94f82b,
 776         0x5f1d36f1,
 777         0xade682d1,
 778         0x2b3e6c1f,
 779         0xfb41bd6b,
 780         0x137e2179,
 781 };
 782
 783 /* Initialize structure containing state of computation.
 784    (FIPS 180-2:5.3.2)  */
 785 void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
 786 {
 787         memcpy(&ctx->total64, init256, sizeof(init256));
 788         /*ctx->total64 = 0; - done by prepending two 32-bit zeros to init256 */
 789         ctx->process_block = sha256_process_block64;
 790 }
 791
 792 /* Initialize structure containing state of computation.
 793    (FIPS 180-2:5.3.3)  */
 794 void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
 795 {
 796         int i;
 797         /* Two extra iterations zero out ctx->total64[2] */
 798         uint64_t *tp = ctx->total64;
 799         for (i = 0; i < 2+8; i++)
 800                 tp[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i];
 801         /*ctx->total64[0] = ctx->total64[1] = 0; - already done */
 802 }
 803
 804 void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
 805 {
 806         unsigned bufpos = ctx->total64[0] & 127;
 807         unsigned remaining;
 808
 809         /* First increment the byte count.  FIPS 180-2 specifies the possible
 810            length of the file up to 2^128 _bits_.
 811            We compute the number of _bytes_ and convert to bits later.  */
 812         ctx->total64[0] += len;
 813         if (ctx->total64[0] < len)
 814                 ctx->total64[1]++;
 815 #if 0
 816         remaining = 128 - bufpos;
 817
 818         /* Hash whole blocks */
 819         while (len >= remaining) {
 820                 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
 821                 buffer = (const char *)buffer + remaining;
 822                 len -= remaining;
 823                 remaining = 128;
 824                 bufpos = 0;
 825                 sha512_process_block128(ctx);
 826         }
 827
 828         /* Save last, partial blosk */
 829         memcpy(ctx->wbuffer + bufpos, buffer, len);
 830 #else
 831         while (1) {
 832                 remaining = 128 - bufpos;
 833                 if (remaining > len)
 834                         remaining = len;
 835                 /* Copy data into aligned buffer */
 836                 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
 837                 len -= remaining;
 838                 buffer = (const char *)buffer + remaining;
 839                 bufpos += remaining;
 840                 /* clever way to do "if (bufpos != 128) break; ... ; bufpos = 0;" */
 841                 bufpos -= 128;
 842                 if (bufpos != 0)
 843                         break;
 844                 /* Buffer is filled up, process it */
 845                 sha512_process_block128(ctx);
 846                 /*bufpos = 0; - already is */
 847         }
 848 #endif
 849 }
 850
 851 /* Used also for sha256 */
 852 void FAST_FUNC sha1_end(sha1_ctx_t *ctx, void *resbuf)
 853 {
 854         unsigned hash_size;
 855
 856         /* SHA stores total in BE, need to swap on LE arches: */
 857         common64_end(ctx, /*swap_needed:*/ BB_LITTLE_ENDIAN);
 858
 859         hash_size = (ctx->process_block == sha1_process_block64) ? 5 : 8;
 860         /* This way we do not impose alignment constraints on resbuf: */
 861         if (BB_LITTLE_ENDIAN) {
 862                 unsigned i;
 863                 for (i = 0; i < hash_size; ++i)
 864                         ctx->hash[i] = SWAP_BE32(ctx->hash[i]);
 865         }
 866         memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * hash_size);
 867 }
 868
 869 void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
 870 {
 871         unsigned bufpos = ctx->total64[0] & 127;
 872
 873         /* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... */
 874         ctx->wbuffer[bufpos++] = 0x80;
 875
 876         while (1) {
 877                 unsigned remaining = 128 - bufpos;
 878                 memset(ctx->wbuffer + bufpos, 0, remaining);
 879                 if (remaining >= 16) {
 880                         /* Store the 128-bit counter of bits in the buffer in BE format */
 881                         uint64_t t;
 882                         t = ctx->total64[0] << 3;
 883                         t = SWAP_BE64(t);
 884                         *(uint64_t *) (&ctx->wbuffer[128 - 8]) = t;
 885                         t = (ctx->total64[1] << 3) | (ctx->total64[0] >> 61);
 886                         t = SWAP_BE64(t);
 887                         *(uint64_t *) (&ctx->wbuffer[128 - 16]) = t;
 888                 }
 889                 sha512_process_block128(ctx);
 890                 if (remaining >= 16)
 891                         break;
 892                 bufpos = 0;
 893         }
 894
 895         if (BB_LITTLE_ENDIAN) {
 896                 unsigned i;
 897                 for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
 898                         ctx->hash[i] = SWAP_BE64(ctx->hash[i]);
 899         }
 900         memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
 901 }