1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3 * Copyright (c) 2004 The OpenSSL Project. All rights reserved
4 * according to the OpenSSL license [found in ../../LICENSE].
5 * ====================================================================
7 #include <openssl/opensslconf.h>
9 * IMPLEMENTATION NOTES.
11 * As you might have noticed 32-bit hash algorithms:
13 * - permit SHA_LONG to be wider than 32-bit
14 * - optimized versions implement two transform functions: one operating
15 * on [aligned] data in host byte order and one - on data in input
17 * - share common byte-order neutral collector and padding function
18 * implementations, ../md32_common.h;
20 * Neither of the above applies to this SHA-512 implementations. Reasons
21 * [in reverse order] are:
23 * - it's the only 64-bit hash algorithm for the moment of this writing,
24 * there is no need for common collector/padding implementation [yet];
25 * - by supporting only one transform function [which operates on
26 * *aligned* data in input stream byte order, big-endian in this case]
27 * we minimize burden of maintenance in two ways: a) collector/padding
28 * function is simpler; b) only one transform function to stare at;
29 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
30 * apply a number of optimizations to mitigate potential performance
31 * penalties caused by previous design decision;
35 * Implementation relies on the fact that "long long" is 64-bit on
36 * both 32- and 64-bit platforms. If some compiler vendor comes up
37 * with 128-bit long long, adjustment to sha.h would be required.
38 * As this implementation relies on 64-bit integer type, it's totally
39 * inappropriate for platforms which don't support it, most notably
41 * <appro@fy.chalmers.se>
46 #include <openssl/crypto.h>
47 #include <openssl/sha.h>
48 #include <openssl/opensslv.h>
50 #include "internal/cryptlib.h"
52 const char SHA512_version[] = "SHA-512" OPENSSL_VERSION_PTEXT;
54 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
55 defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
56 defined(__s390__) || defined(__s390x__) || \
57 defined(__aarch64__) || \
59 # define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
62 int SHA384_Init(SHA512_CTX *c)
64 c->h[0] = U64(0xcbbb9d5dc1059ed8);
65 c->h[1] = U64(0x629a292a367cd507);
66 c->h[2] = U64(0x9159015a3070dd17);
67 c->h[3] = U64(0x152fecd8f70e5939);
68 c->h[4] = U64(0x67332667ffc00b31);
69 c->h[5] = U64(0x8eb44a8768581511);
70 c->h[6] = U64(0xdb0c2e0d64f98fa7);
71 c->h[7] = U64(0x47b5481dbefa4fa4);
76 c->md_len = SHA384_DIGEST_LENGTH;
80 int SHA512_Init(SHA512_CTX *c)
82 c->h[0] = U64(0x6a09e667f3bcc908);
83 c->h[1] = U64(0xbb67ae8584caa73b);
84 c->h[2] = U64(0x3c6ef372fe94f82b);
85 c->h[3] = U64(0xa54ff53a5f1d36f1);
86 c->h[4] = U64(0x510e527fade682d1);
87 c->h[5] = U64(0x9b05688c2b3e6c1f);
88 c->h[6] = U64(0x1f83d9abfb41bd6b);
89 c->h[7] = U64(0x5be0cd19137e2179);
94 c->md_len = SHA512_DIGEST_LENGTH;
101 void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
103 int SHA512_Final(unsigned char *md, SHA512_CTX *c)
105 unsigned char *p = (unsigned char *)c->u.p;
108 p[n] = 0x80; /* There always is a room for one */
110 if (n > (sizeof(c->u) - 16)) {
111 memset(p + n, 0, sizeof(c->u) - n);
113 sha512_block_data_order(c, p, 1);
116 memset(p + n, 0, sizeof(c->u) - 16 - n);
118 c->u.d[SHA_LBLOCK - 2] = c->Nh;
119 c->u.d[SHA_LBLOCK - 1] = c->Nl;
121 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
122 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
123 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
124 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
125 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
126 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
127 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
128 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
129 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
130 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
131 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
132 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
133 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
134 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
135 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
136 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
139 sha512_block_data_order(c, p, 1);
145 /* Let compiler decide if it's appropriate to unroll... */
146 case SHA384_DIGEST_LENGTH:
147 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
148 SHA_LONG64 t = c->h[n];
150 *(md++) = (unsigned char)(t >> 56);
151 *(md++) = (unsigned char)(t >> 48);
152 *(md++) = (unsigned char)(t >> 40);
153 *(md++) = (unsigned char)(t >> 32);
154 *(md++) = (unsigned char)(t >> 24);
155 *(md++) = (unsigned char)(t >> 16);
156 *(md++) = (unsigned char)(t >> 8);
157 *(md++) = (unsigned char)(t);
160 case SHA512_DIGEST_LENGTH:
161 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
162 SHA_LONG64 t = c->h[n];
164 *(md++) = (unsigned char)(t >> 56);
165 *(md++) = (unsigned char)(t >> 48);
166 *(md++) = (unsigned char)(t >> 40);
167 *(md++) = (unsigned char)(t >> 32);
168 *(md++) = (unsigned char)(t >> 24);
169 *(md++) = (unsigned char)(t >> 16);
170 *(md++) = (unsigned char)(t >> 8);
171 *(md++) = (unsigned char)(t);
174 /* ... as well as make sure md_len is not abused. */
182 int SHA384_Final(unsigned char *md, SHA512_CTX *c)
184 return SHA512_Final(md, c);
187 int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
190 unsigned char *p = c->u.p;
191 const unsigned char *data = (const unsigned char *)_data;
196 l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
199 if (sizeof(len) >= 8)
200 c->Nh += (((SHA_LONG64) len) >> 61);
204 size_t n = sizeof(c->u) - c->num;
207 memcpy(p + c->num, data, len), c->num += (unsigned int)len;
210 memcpy(p + c->num, data, n), c->num = 0;
212 sha512_block_data_order(c, p, 1);
216 if (len >= sizeof(c->u)) {
217 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
218 if ((size_t)data % sizeof(c->u.d[0]) != 0)
219 while (len >= sizeof(c->u))
220 memcpy(p, data, sizeof(c->u)),
221 sha512_block_data_order(c, p, 1),
222 len -= sizeof(c->u), data += sizeof(c->u);
225 sha512_block_data_order(c, data, len / sizeof(c->u)),
226 data += len, len %= sizeof(c->u), data -= len;
230 memcpy(p, data, len), c->num = (int)len;
235 int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
237 return SHA512_Update(c, data, len);
240 void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
242 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
243 if ((size_t)data % sizeof(c->u.d[0]) != 0)
244 memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
246 sha512_block_data_order(c, data, 1);
249 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
252 static unsigned char m[SHA384_DIGEST_LENGTH];
257 SHA512_Update(&c, d, n);
258 SHA512_Final(md, &c);
259 OPENSSL_cleanse(&c, sizeof(c));
263 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
266 static unsigned char m[SHA512_DIGEST_LENGTH];
271 SHA512_Update(&c, d, n);
272 SHA512_Final(md, &c);
273 OPENSSL_cleanse(&c, sizeof(c));
278 static const SHA_LONG64 K512[80] = {
279 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
280 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
281 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
282 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
283 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
284 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
285 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
286 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
287 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
288 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
289 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
290 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
291 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
292 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
293 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
294 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
295 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
296 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
297 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
298 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
299 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
300 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
301 U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
302 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
303 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
304 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
305 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
306 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
307 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
308 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
309 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
310 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
311 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
312 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
313 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
314 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
315 U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
316 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
317 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
318 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
322 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
323 # if defined(__x86_64) || defined(__x86_64__)
324 # define ROTR(a,n) ({ SHA_LONG64 ret; \
329 # if !defined(B_ENDIAN)
330 # define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
335 # elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
336 # if defined(I386_ONLY)
337 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
338 unsigned int hi=p[0],lo=p[1]; \
339 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
340 "roll $16,%%eax; roll $16,%%edx; "\
341 "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
342 : "=a"(lo),"=d"(hi) \
343 : "0"(lo),"1"(hi) : "cc"); \
344 ((SHA_LONG64)hi)<<32|lo; })
346 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
347 unsigned int hi=p[0],lo=p[1]; \
348 asm ("bswapl %0; bswapl %1;" \
349 : "=r"(lo),"=r"(hi) \
350 : "0"(lo),"1"(hi)); \
351 ((SHA_LONG64)hi)<<32|lo; })
353 # elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
354 # define ROTR(a,n) ({ SHA_LONG64 ret; \
355 asm ("rotrdi %0,%1,%2" \
357 : "r"(a),"K"(n)); ret; })
358 # elif defined(__aarch64__)
359 # define ROTR(a,n) ({ SHA_LONG64 ret; \
360 asm ("ror %0,%1,%2" \
362 : "r"(a),"I"(n)); ret; })
363 # if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
364 __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
365 # define PULL64(x) ({ SHA_LONG64 ret; \
368 : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
371 # elif defined(_MSC_VER)
372 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
373 # pragma intrinsic(_rotr64)
374 # define ROTR(a,n) _rotr64((a),n)
376 # if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
377 # if defined(I386_ONLY)
378 static SHA_LONG64 __fastcall __pull64be(const void *x)
380 _asm mov edx,[ecx + 0]
381 _asm mov eax,[ecx + 4]
384 _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al}
386 static SHA_LONG64 __fastcall __pull64be(const void *x)
388 _asm mov edx,[ecx + 0]
389 _asm mov eax,[ecx + 4]
390 _asm bswap edx _asm bswap eax}
392 # define PULL64(x) __pull64be(&(x))
394 # pragma inline_depth(0)
400 # define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
401 # define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
404 # define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
406 # define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
407 # define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
408 # define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
409 # define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
410 # define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
411 # define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
412 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
414 * This code should give better results on 32-bit CPU with less than
415 * ~24 registers, both size and performance wise...
416 */ static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
419 const SHA_LONG64 *W = in;
421 SHA_LONG64 X[9 + 80], *F;
436 for (i = 0; i < 16; i++, F--) {
445 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
447 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
450 for (; i < 80; i++, F--) {
451 T = sigma0(F[8 + 16 - 1]);
452 T += sigma1(F[8 + 16 - 14]);
453 T += F[8 + 16] + F[8 + 16 - 9];
458 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
460 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
476 # elif defined(OPENSSL_SMALL_FOOTPRINT)
477 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
480 const SHA_LONG64 *W = in;
481 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
496 for (i = 0; i < 16; i++) {
500 T1 = X[i] = PULL64(W[i]);
502 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
503 T2 = Sigma0(a) + Maj(a, b, c);
514 for (; i < 80; i++) {
515 s0 = X[(i + 1) & 0x0f];
517 s1 = X[(i + 14) & 0x0f];
520 T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
521 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
522 T2 = Sigma0(a) + Maj(a, b, c);
547 # define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
548 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
549 h = Sigma0(a) + Maj(a,b,c); \
550 d += T1; h += T1; } while (0)
551 # define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
552 s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
553 s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
554 T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
555 ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
556 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
559 const SHA_LONG64 *W = in;
560 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
577 ROUND_00_15(0, a, b, c, d, e, f, g, h);
579 ROUND_00_15(1, h, a, b, c, d, e, f, g);
581 ROUND_00_15(2, g, h, a, b, c, d, e, f);
583 ROUND_00_15(3, f, g, h, a, b, c, d, e);
585 ROUND_00_15(4, e, f, g, h, a, b, c, d);
587 ROUND_00_15(5, d, e, f, g, h, a, b, c);
589 ROUND_00_15(6, c, d, e, f, g, h, a, b);
591 ROUND_00_15(7, b, c, d, e, f, g, h, a);
593 ROUND_00_15(8, a, b, c, d, e, f, g, h);
595 ROUND_00_15(9, h, a, b, c, d, e, f, g);
597 ROUND_00_15(10, g, h, a, b, c, d, e, f);
599 ROUND_00_15(11, f, g, h, a, b, c, d, e);
601 ROUND_00_15(12, e, f, g, h, a, b, c, d);
603 ROUND_00_15(13, d, e, f, g, h, a, b, c);
605 ROUND_00_15(14, c, d, e, f, g, h, a, b);
607 ROUND_00_15(15, b, c, d, e, f, g, h, a);
609 T1 = X[0] = PULL64(W[0]);
610 ROUND_00_15(0, a, b, c, d, e, f, g, h);
611 T1 = X[1] = PULL64(W[1]);
612 ROUND_00_15(1, h, a, b, c, d, e, f, g);
613 T1 = X[2] = PULL64(W[2]);
614 ROUND_00_15(2, g, h, a, b, c, d, e, f);
615 T1 = X[3] = PULL64(W[3]);
616 ROUND_00_15(3, f, g, h, a, b, c, d, e);
617 T1 = X[4] = PULL64(W[4]);
618 ROUND_00_15(4, e, f, g, h, a, b, c, d);
619 T1 = X[5] = PULL64(W[5]);
620 ROUND_00_15(5, d, e, f, g, h, a, b, c);
621 T1 = X[6] = PULL64(W[6]);
622 ROUND_00_15(6, c, d, e, f, g, h, a, b);
623 T1 = X[7] = PULL64(W[7]);
624 ROUND_00_15(7, b, c, d, e, f, g, h, a);
625 T1 = X[8] = PULL64(W[8]);
626 ROUND_00_15(8, a, b, c, d, e, f, g, h);
627 T1 = X[9] = PULL64(W[9]);
628 ROUND_00_15(9, h, a, b, c, d, e, f, g);
629 T1 = X[10] = PULL64(W[10]);
630 ROUND_00_15(10, g, h, a, b, c, d, e, f);
631 T1 = X[11] = PULL64(W[11]);
632 ROUND_00_15(11, f, g, h, a, b, c, d, e);
633 T1 = X[12] = PULL64(W[12]);
634 ROUND_00_15(12, e, f, g, h, a, b, c, d);
635 T1 = X[13] = PULL64(W[13]);
636 ROUND_00_15(13, d, e, f, g, h, a, b, c);
637 T1 = X[14] = PULL64(W[14]);
638 ROUND_00_15(14, c, d, e, f, g, h, a, b);
639 T1 = X[15] = PULL64(W[15]);
640 ROUND_00_15(15, b, c, d, e, f, g, h, a);
643 for (i = 16; i < 80; i += 16) {
644 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
645 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
646 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
647 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
648 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
649 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
650 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
651 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
652 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
653 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
654 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
655 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
656 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
657 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
658 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
659 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
677 #endif /* SHA512_ASM */