2 * Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the OpenSSL license (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
10 #include <openssl/opensslconf.h>
12 * IMPLEMENTATION NOTES.
14 * As you might have noticed 32-bit hash algorithms:
16 * - permit SHA_LONG to be wider than 32-bit
17 * - optimized versions implement two transform functions: one operating
18 * on [aligned] data in host byte order and one - on data in input
20 * - share common byte-order neutral collector and padding function
21 * implementations, ../md32_common.h;
23 * Neither of the above applies to this SHA-512 implementations. Reasons
24 * [in reverse order] are:
26 * - it's the only 64-bit hash algorithm for the moment of this writing,
27 * there is no need for common collector/padding implementation [yet];
28 * - by supporting only one transform function [which operates on
29 * *aligned* data in input stream byte order, big-endian in this case]
30 * we minimize burden of maintenance in two ways: a) collector/padding
31 * function is simpler; b) only one transform function to stare at;
32 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
33 * apply a number of optimizations to mitigate potential performance
34 * penalties caused by previous design decision;
38 * Implementation relies on the fact that "long long" is 64-bit on
39 * both 32- and 64-bit platforms. If some compiler vendor comes up
40 * with 128-bit long long, adjustment to sha.h would be required.
41 * As this implementation relies on 64-bit integer type, it's totally
42 * inappropriate for platforms which don't support it, most notably
44 * <appro@fy.chalmers.se>
49 #include <openssl/crypto.h>
50 #include <openssl/sha.h>
51 #include <openssl/opensslv.h>
53 #include "internal/cryptlib.h"
55 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56 defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57 defined(__s390__) || defined(__s390x__) || \
58 defined(__aarch64__) || \
60 # define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
63 int SHA384_Init(SHA512_CTX *c)
65 c->h[0] = U64(0xcbbb9d5dc1059ed8);
66 c->h[1] = U64(0x629a292a367cd507);
67 c->h[2] = U64(0x9159015a3070dd17);
68 c->h[3] = U64(0x152fecd8f70e5939);
69 c->h[4] = U64(0x67332667ffc00b31);
70 c->h[5] = U64(0x8eb44a8768581511);
71 c->h[6] = U64(0xdb0c2e0d64f98fa7);
72 c->h[7] = U64(0x47b5481dbefa4fa4);
77 c->md_len = SHA384_DIGEST_LENGTH;
81 int SHA512_Init(SHA512_CTX *c)
83 c->h[0] = U64(0x6a09e667f3bcc908);
84 c->h[1] = U64(0xbb67ae8584caa73b);
85 c->h[2] = U64(0x3c6ef372fe94f82b);
86 c->h[3] = U64(0xa54ff53a5f1d36f1);
87 c->h[4] = U64(0x510e527fade682d1);
88 c->h[5] = U64(0x9b05688c2b3e6c1f);
89 c->h[6] = U64(0x1f83d9abfb41bd6b);
90 c->h[7] = U64(0x5be0cd19137e2179);
95 c->md_len = SHA512_DIGEST_LENGTH;
102 void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
104 int SHA512_Final(unsigned char *md, SHA512_CTX *c)
106 unsigned char *p = (unsigned char *)c->u.p;
109 p[n] = 0x80; /* There always is a room for one */
111 if (n > (sizeof(c->u) - 16)) {
112 memset(p + n, 0, sizeof(c->u) - n);
114 sha512_block_data_order(c, p, 1);
117 memset(p + n, 0, sizeof(c->u) - 16 - n);
119 c->u.d[SHA_LBLOCK - 2] = c->Nh;
120 c->u.d[SHA_LBLOCK - 1] = c->Nl;
122 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
123 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
124 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
125 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
126 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
127 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
128 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
129 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
130 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
131 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
132 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
133 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
134 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
135 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
136 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
137 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
140 sha512_block_data_order(c, p, 1);
146 /* Let compiler decide if it's appropriate to unroll... */
147 case SHA384_DIGEST_LENGTH:
148 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
149 SHA_LONG64 t = c->h[n];
151 *(md++) = (unsigned char)(t >> 56);
152 *(md++) = (unsigned char)(t >> 48);
153 *(md++) = (unsigned char)(t >> 40);
154 *(md++) = (unsigned char)(t >> 32);
155 *(md++) = (unsigned char)(t >> 24);
156 *(md++) = (unsigned char)(t >> 16);
157 *(md++) = (unsigned char)(t >> 8);
158 *(md++) = (unsigned char)(t);
161 case SHA512_DIGEST_LENGTH:
162 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
163 SHA_LONG64 t = c->h[n];
165 *(md++) = (unsigned char)(t >> 56);
166 *(md++) = (unsigned char)(t >> 48);
167 *(md++) = (unsigned char)(t >> 40);
168 *(md++) = (unsigned char)(t >> 32);
169 *(md++) = (unsigned char)(t >> 24);
170 *(md++) = (unsigned char)(t >> 16);
171 *(md++) = (unsigned char)(t >> 8);
172 *(md++) = (unsigned char)(t);
175 /* ... as well as make sure md_len is not abused. */
183 int SHA384_Final(unsigned char *md, SHA512_CTX *c)
185 return SHA512_Final(md, c);
188 int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
191 unsigned char *p = c->u.p;
192 const unsigned char *data = (const unsigned char *)_data;
197 l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
200 if (sizeof(len) >= 8)
201 c->Nh += (((SHA_LONG64) len) >> 61);
205 size_t n = sizeof(c->u) - c->num;
208 memcpy(p + c->num, data, len), c->num += (unsigned int)len;
211 memcpy(p + c->num, data, n), c->num = 0;
213 sha512_block_data_order(c, p, 1);
217 if (len >= sizeof(c->u)) {
218 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
219 if ((size_t)data % sizeof(c->u.d[0]) != 0)
220 while (len >= sizeof(c->u))
221 memcpy(p, data, sizeof(c->u)),
222 sha512_block_data_order(c, p, 1),
223 len -= sizeof(c->u), data += sizeof(c->u);
226 sha512_block_data_order(c, data, len / sizeof(c->u)),
227 data += len, len %= sizeof(c->u), data -= len;
231 memcpy(p, data, len), c->num = (int)len;
236 int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
238 return SHA512_Update(c, data, len);
241 void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
243 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
244 if ((size_t)data % sizeof(c->u.d[0]) != 0)
245 memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
247 sha512_block_data_order(c, data, 1);
250 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
253 static unsigned char m[SHA384_DIGEST_LENGTH];
258 SHA512_Update(&c, d, n);
259 SHA512_Final(md, &c);
260 OPENSSL_cleanse(&c, sizeof(c));
264 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
267 static unsigned char m[SHA512_DIGEST_LENGTH];
272 SHA512_Update(&c, d, n);
273 SHA512_Final(md, &c);
274 OPENSSL_cleanse(&c, sizeof(c));
279 static const SHA_LONG64 K512[80] = {
280 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
281 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
282 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
283 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
284 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
285 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
286 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
287 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
288 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
289 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
290 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
291 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
292 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
293 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
294 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
295 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
296 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
297 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
298 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
299 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
300 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
301 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
302 U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
303 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
304 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
305 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
306 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
307 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
308 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
309 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
310 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
311 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
312 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
313 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
314 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
315 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
316 U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
317 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
318 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
319 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
323 # if defined(__GNUC__) && __GNUC__>=2 && \
324 !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
325 # if defined(__x86_64) || defined(__x86_64__)
326 # define ROTR(a,n) ({ SHA_LONG64 ret; \
331 # if !defined(B_ENDIAN)
332 # define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
337 # elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
338 # if defined(I386_ONLY)
339 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
340 unsigned int hi=p[0],lo=p[1]; \
341 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
342 "roll $16,%%eax; roll $16,%%edx; "\
343 "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
344 : "=a"(lo),"=d"(hi) \
345 : "0"(lo),"1"(hi) : "cc"); \
346 ((SHA_LONG64)hi)<<32|lo; })
348 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
349 unsigned int hi=p[0],lo=p[1]; \
350 asm ("bswapl %0; bswapl %1;" \
351 : "=r"(lo),"=r"(hi) \
352 : "0"(lo),"1"(hi)); \
353 ((SHA_LONG64)hi)<<32|lo; })
355 # elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
356 # define ROTR(a,n) ({ SHA_LONG64 ret; \
357 asm ("rotrdi %0,%1,%2" \
359 : "r"(a),"K"(n)); ret; })
360 # elif defined(__aarch64__)
361 # define ROTR(a,n) ({ SHA_LONG64 ret; \
362 asm ("ror %0,%1,%2" \
364 : "r"(a),"I"(n)); ret; })
365 # if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
366 __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
367 # define PULL64(x) ({ SHA_LONG64 ret; \
370 : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
373 # elif defined(_MSC_VER)
374 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
375 # pragma intrinsic(_rotr64)
376 # define ROTR(a,n) _rotr64((a),n)
378 # if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
379 !defined(OPENSSL_NO_INLINE_ASM)
380 # if defined(I386_ONLY)
381 static SHA_LONG64 __fastcall __pull64be(const void *x)
383 _asm mov edx,[ecx + 0]
384 _asm mov eax,[ecx + 4]
393 static SHA_LONG64 __fastcall __pull64be(const void *x)
395 _asm mov edx,[ecx + 0]
396 _asm mov eax,[ecx + 4]
401 # define PULL64(x) __pull64be(&(x))
406 # define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
407 # define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
410 # define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
412 # define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
413 # define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
414 # define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
415 # define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
416 # define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
417 # define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
419 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
421 * This code should give better results on 32-bit CPU with less than
422 * ~24 registers, both size and performance wise...
425 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
428 const SHA_LONG64 *W = in;
430 SHA_LONG64 X[9 + 80], *F;
445 for (i = 0; i < 16; i++, F--) {
454 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
456 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
459 for (; i < 80; i++, F--) {
460 T = sigma0(F[8 + 16 - 1]);
461 T += sigma1(F[8 + 16 - 14]);
462 T += F[8 + 16] + F[8 + 16 - 9];
467 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
469 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
485 # elif defined(OPENSSL_SMALL_FOOTPRINT)
487 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
490 const SHA_LONG64 *W = in;
491 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
506 for (i = 0; i < 16; i++) {
510 T1 = X[i] = PULL64(W[i]);
512 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
513 T2 = Sigma0(a) + Maj(a, b, c);
524 for (; i < 80; i++) {
525 s0 = X[(i + 1) & 0x0f];
527 s1 = X[(i + 14) & 0x0f];
530 T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
531 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
532 T2 = Sigma0(a) + Maj(a, b, c);
557 # define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
558 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
559 h = Sigma0(a) + Maj(a,b,c); \
560 d += T1; h += T1; } while (0)
562 # define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
563 s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
564 s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
565 T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
566 ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
568 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
571 const SHA_LONG64 *W = in;
572 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
589 ROUND_00_15(0, a, b, c, d, e, f, g, h);
591 ROUND_00_15(1, h, a, b, c, d, e, f, g);
593 ROUND_00_15(2, g, h, a, b, c, d, e, f);
595 ROUND_00_15(3, f, g, h, a, b, c, d, e);
597 ROUND_00_15(4, e, f, g, h, a, b, c, d);
599 ROUND_00_15(5, d, e, f, g, h, a, b, c);
601 ROUND_00_15(6, c, d, e, f, g, h, a, b);
603 ROUND_00_15(7, b, c, d, e, f, g, h, a);
605 ROUND_00_15(8, a, b, c, d, e, f, g, h);
607 ROUND_00_15(9, h, a, b, c, d, e, f, g);
609 ROUND_00_15(10, g, h, a, b, c, d, e, f);
611 ROUND_00_15(11, f, g, h, a, b, c, d, e);
613 ROUND_00_15(12, e, f, g, h, a, b, c, d);
615 ROUND_00_15(13, d, e, f, g, h, a, b, c);
617 ROUND_00_15(14, c, d, e, f, g, h, a, b);
619 ROUND_00_15(15, b, c, d, e, f, g, h, a);
621 T1 = X[0] = PULL64(W[0]);
622 ROUND_00_15(0, a, b, c, d, e, f, g, h);
623 T1 = X[1] = PULL64(W[1]);
624 ROUND_00_15(1, h, a, b, c, d, e, f, g);
625 T1 = X[2] = PULL64(W[2]);
626 ROUND_00_15(2, g, h, a, b, c, d, e, f);
627 T1 = X[3] = PULL64(W[3]);
628 ROUND_00_15(3, f, g, h, a, b, c, d, e);
629 T1 = X[4] = PULL64(W[4]);
630 ROUND_00_15(4, e, f, g, h, a, b, c, d);
631 T1 = X[5] = PULL64(W[5]);
632 ROUND_00_15(5, d, e, f, g, h, a, b, c);
633 T1 = X[6] = PULL64(W[6]);
634 ROUND_00_15(6, c, d, e, f, g, h, a, b);
635 T1 = X[7] = PULL64(W[7]);
636 ROUND_00_15(7, b, c, d, e, f, g, h, a);
637 T1 = X[8] = PULL64(W[8]);
638 ROUND_00_15(8, a, b, c, d, e, f, g, h);
639 T1 = X[9] = PULL64(W[9]);
640 ROUND_00_15(9, h, a, b, c, d, e, f, g);
641 T1 = X[10] = PULL64(W[10]);
642 ROUND_00_15(10, g, h, a, b, c, d, e, f);
643 T1 = X[11] = PULL64(W[11]);
644 ROUND_00_15(11, f, g, h, a, b, c, d, e);
645 T1 = X[12] = PULL64(W[12]);
646 ROUND_00_15(12, e, f, g, h, a, b, c, d);
647 T1 = X[13] = PULL64(W[13]);
648 ROUND_00_15(13, d, e, f, g, h, a, b, c);
649 T1 = X[14] = PULL64(W[14]);
650 ROUND_00_15(14, c, d, e, f, g, h, a, b);
651 T1 = X[15] = PULL64(W[15]);
652 ROUND_00_15(15, b, c, d, e, f, g, h, a);
655 for (i = 16; i < 80; i += 16) {
656 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
657 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
658 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
659 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
660 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
661 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
662 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
663 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
664 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
665 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
666 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
667 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
668 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
669 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
670 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
671 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
689 #endif /* SHA512_ASM */