1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3 * Copyright (c) 2004 The OpenSSL Project. All rights reserved
4 * according to the OpenSSL license [found in ../../LICENSE].
5 * ====================================================================
8 * IMPLEMENTATION NOTES.
10 * As you might have noticed 32-bit hash algorithms:
12 * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
13 * - optimized versions implement two transform functions: one operating
14 * on [aligned] data in host byte order and one - on data in input
16 * - share common byte-order neutral collector and padding function
17 * implementations, ../md32_common.h;
19 * Neither of the above applies to this SHA-512 implementations. Reasons
20 * [in reverse order] are:
22 * - it's the only 64-bit hash algorithm for the moment of this writing,
23 * there is no need for common collector/padding implementation [yet];
24 * - by supporting only one transform function [which operates on
25 * *aligned* data in input stream byte order, big-endian in this case]
26 * we minimize burden of maintenance in two ways: a) collector/padding
27 * function is simpler; b) only one transform function to stare at;
28 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
29 * apply a number of optimizations to mitigate potential performance
30 * penalties caused by previous design decision;
34 * Implementation relies on the fact that "long long" is 64-bit on
35 * both 32- and 64-bit platforms. If some compiler vendor comes up
36 * with 128-bit long long, adjustment to sha.h would be required.
37 * As this implementation relies on 64-bit integer type, it's totally
38 * inappropriate for platforms which don't support it, most notably
40 * <appro@fy.chalmers.se>
45 #include <openssl/opensslconf.h>
46 #include <openssl/crypto.h>
47 #include <openssl/sha.h>
48 #include <openssl/opensslv.h>
50 const char *SHA512_version="SHA-512" OPENSSL_VERSION_PTEXT;
52 #if defined(_M_IX86) || defined(_M_AMD64) || defined(__i386) || defined(__x86_64)
53 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
56 int SHA384_Init (SHA512_CTX *c)
58 c->h[0]=U64(0xcbbb9d5dc1059ed8);
59 c->h[1]=U64(0x629a292a367cd507);
60 c->h[2]=U64(0x9159015a3070dd17);
61 c->h[3]=U64(0x152fecd8f70e5939);
62 c->h[4]=U64(0x67332667ffc00b31);
63 c->h[5]=U64(0x8eb44a8768581511);
64 c->h[6]=U64(0xdb0c2e0d64f98fa7);
65 c->h[7]=U64(0x47b5481dbefa4fa4);
67 c->num=0; c->md_len=SHA384_DIGEST_LENGTH;
71 int SHA512_Init (SHA512_CTX *c)
73 c->h[0]=U64(0x6a09e667f3bcc908);
74 c->h[1]=U64(0xbb67ae8584caa73b);
75 c->h[2]=U64(0x3c6ef372fe94f82b);
76 c->h[3]=U64(0xa54ff53a5f1d36f1);
77 c->h[4]=U64(0x510e527fade682d1);
78 c->h[5]=U64(0x9b05688c2b3e6c1f);
79 c->h[6]=U64(0x1f83d9abfb41bd6b);
80 c->h[7]=U64(0x5be0cd19137e2179);
82 c->num=0; c->md_len=SHA512_DIGEST_LENGTH;
86 static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num);
88 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
90 unsigned char *p=(unsigned char *)c->u.p;
93 p[n]=0x80; /* There always is a room for one */
95 if (n > (sizeof(c->u)-16))
96 memset (p+n,0,sizeof(c->u)-n), n=0,
99 memset (p+n,0,sizeof(c->u)-16-n);
101 c->u.d[SHA_LBLOCK-2] = c->Nh;
102 c->u.d[SHA_LBLOCK-1] = c->Nl;
104 p[sizeof(c->u)-1] = (c->Nl)&0xFF;
105 p[sizeof(c->u)-2] = (c->Nl>>8)&0xFF;
106 p[sizeof(c->u)-3] = (c->Nl>>16)&0xFF;
107 p[sizeof(c->u)-4] = (c->Nl>>24)&0xFF;
108 p[sizeof(c->u)-5] = (c->Nl>>32)&0xFF;
109 p[sizeof(c->u)-6] = (c->Nl>>40)&0xFF;
110 p[sizeof(c->u)-7] = (c->Nl>>48)&0xFF;
111 p[sizeof(c->u)-8] = (c->Nl>>56)&0xFF;
112 p[sizeof(c->u)-9] = (c->Nh)&0xFF;
113 p[sizeof(c->u)-10] = (c->Nh>>8)&0xFF;
114 p[sizeof(c->u)-11] = (c->Nh>>16)&0xFF;
115 p[sizeof(c->u)-12] = (c->Nh>>24)&0xFF;
116 p[sizeof(c->u)-13] = (c->Nh>>32)&0xFF;
117 p[sizeof(c->u)-14] = (c->Nh>>40)&0xFF;
118 p[sizeof(c->u)-15] = (c->Nh>>48)&0xFF;
119 p[sizeof(c->u)-16] = (c->Nh>>56)&0xFF;
122 sha512_block (c,p,1);
128 /* Let compiler decide if it's appropriate to unroll... */
129 case SHA384_DIGEST_LENGTH:
130 for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
132 SHA_LONG64 t = c->h[n];
134 *(md++) = (t>>56)&0xFF; *(md++) = (t>>48)&0xFF;
135 *(md++) = (t>>40)&0xFF; *(md++) = (t>>32)&0xFF;
136 *(md++) = (t>>24)&0xFF; *(md++) = (t>>16)&0xFF;
137 *(md++) = (t>>8)&0xFF; *(md++) = (t)&0xFF;
140 case SHA512_DIGEST_LENGTH:
141 for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
143 SHA_LONG64 t = c->h[n];
145 *(md++) = (t>>56)&0xFF; *(md++) = (t>>48)&0xFF;
146 *(md++) = (t>>40)&0xFF; *(md++) = (t>>32)&0xFF;
147 *(md++) = (t>>24)&0xFF; *(md++) = (t>>16)&0xFF;
148 *(md++) = (t>>8)&0xFF; *(md++) = (t)&0xFF;
151 /* ... as well as make sure md_len is not abused. */
158 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
159 { return SHA512_Final (md,c); }
161 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
164 unsigned char *p=c->u.p;
165 const unsigned char *data=(const unsigned char *)_data;
167 if (len==0) return 1;
169 l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
170 if (l < c->Nl) c->Nh++;
171 if (sizeof(len)>=8) c->Nh+=(((SHA_LONG64)len)>>61);
176 size_t n = sizeof(c->u) - c->num;
180 memcpy (p+c->num,data,len), c->num += len;
184 memcpy (p+c->num,data,n), c->num = 0;
186 sha512_block (c,p,1);
190 if (len >= sizeof(c->u))
192 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
193 if ((size_t)data%sizeof(c->u.d[0]) != 0)
194 while (len >= sizeof(c->u))
195 memcpy (p,data,sizeof(c->u)),
196 sha512_block (c,p,1),
198 data += sizeof(c->u);
201 sha512_block (c,data,len/sizeof(c->u)),
207 if (len != 0) memcpy (p,data,len), c->num = (int)len;
212 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
213 { return SHA512_Update (c,data,len); }
215 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
216 { sha512_block (c,data,1); }
218 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
221 static unsigned char m[SHA384_DIGEST_LENGTH];
223 if (md == NULL) md=m;
225 SHA512_Update(&c,d,n);
227 OPENSSL_cleanse(&c,sizeof(c));
231 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
234 static unsigned char m[SHA512_DIGEST_LENGTH];
236 if (md == NULL) md=m;
238 SHA512_Update(&c,d,n);
240 OPENSSL_cleanse(&c,sizeof(c));
244 static const SHA_LONG64 K512[80] = {
245 U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
246 U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
247 U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
248 U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
249 U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
250 U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
251 U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
252 U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
253 U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
254 U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
255 U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
256 U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
257 U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
258 U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
259 U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
260 U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
261 U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
262 U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
263 U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
264 U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
265 U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
266 U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
267 U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
268 U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
269 U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
270 U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
271 U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
272 U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
273 U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
274 U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
275 U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
276 U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
277 U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
278 U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
279 U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
280 U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
281 U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
282 U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
283 U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
284 U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
287 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
288 # if defined(__x86_64) || defined(__x86_64__)
289 # define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
298 #define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
299 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
303 # if defined(_MSC_VER)
304 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
305 # define ROTR(a,n) _rotr64((a),n)
307 # elif defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
308 # if defined(__x86_64) || defined(__x86_64__)
309 # define ROTR(a,n) ({ unsigned long ret; \
314 # elif defined(_ARCH_PPC) && defined(__64BIT__)
315 # define ROTR(a,n) ({ unsigned long ret; \
316 asm ("rotrdi %0,%1,%2" \
318 : "r"(a),"K"(n)); ret; })
324 #define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
327 #define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
328 #define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
329 #define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
330 #define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
332 #define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
333 #define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
335 #if defined(OPENSSL_IA32_SSE2) && !defined(OPENSSL_NO_ASM)
336 #define GO_FOR_SSE2(ctx,in,num) do { \
337 extern int OPENSSL_ia32cap; \
338 void sha512_block_sse2(void *,const void *,size_t); \
339 if (!(OPENSSL_ia32cap & (1<<26))) break; \
340 sha512_block_sse2(ctx->h,in,num); return; \
344 #ifdef OPENSSL_SMALL_FOOTPRINT
346 static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num)
348 const SHA_LONG64 *W=in;
349 SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1,T2;
354 GO_FOR_SSE2(ctx,in,num);
359 a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
360 e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
367 T1 = X[i] = PULL64(W[i]);
369 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
370 T2 = Sigma0(a) + Maj(a,b,c);
371 h = g; g = f; f = e; e = d + T1;
372 d = c; c = b; b = a; a = T1 + T2;
377 s0 = X[(i+1)&0x0f]; s0 = sigma0(s0);
378 s1 = X[(i+14)&0x0f]; s1 = sigma1(s1);
380 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
381 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
382 T2 = Sigma0(a) + Maj(a,b,c);
383 h = g; g = f; f = e; e = d + T1;
384 d = c; c = b; b = a; a = T1 + T2;
387 ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
388 ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
396 #define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
397 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
398 h = Sigma0(a) + Maj(a,b,c); \
399 d += T1; h += T1; } while (0)
401 #define ROUND_16_80(i,a,b,c,d,e,f,g,h,X) do { \
402 s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); \
403 s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); \
404 T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f]; \
405 ROUND_00_15(i,a,b,c,d,e,f,g,h); } while (0)
407 static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num)
409 const SHA_LONG64 *W=in;
410 SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1;
415 GO_FOR_SSE2(ctx,in,num);
420 a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
421 e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
424 T1 = X[0] = W[0]; ROUND_00_15(0,a,b,c,d,e,f,g,h);
425 T1 = X[1] = W[1]; ROUND_00_15(1,h,a,b,c,d,e,f,g);
426 T1 = X[2] = W[2]; ROUND_00_15(2,g,h,a,b,c,d,e,f);
427 T1 = X[3] = W[3]; ROUND_00_15(3,f,g,h,a,b,c,d,e);
428 T1 = X[4] = W[4]; ROUND_00_15(4,e,f,g,h,a,b,c,d);
429 T1 = X[5] = W[5]; ROUND_00_15(5,d,e,f,g,h,a,b,c);
430 T1 = X[6] = W[6]; ROUND_00_15(6,c,d,e,f,g,h,a,b);
431 T1 = X[7] = W[7]; ROUND_00_15(7,b,c,d,e,f,g,h,a);
432 T1 = X[8] = W[8]; ROUND_00_15(8,a,b,c,d,e,f,g,h);
433 T1 = X[9] = W[9]; ROUND_00_15(9,h,a,b,c,d,e,f,g);
434 T1 = X[10] = W[10]; ROUND_00_15(10,g,h,a,b,c,d,e,f);
435 T1 = X[11] = W[11]; ROUND_00_15(11,f,g,h,a,b,c,d,e);
436 T1 = X[12] = W[12]; ROUND_00_15(12,e,f,g,h,a,b,c,d);
437 T1 = X[13] = W[13]; ROUND_00_15(13,d,e,f,g,h,a,b,c);
438 T1 = X[14] = W[14]; ROUND_00_15(14,c,d,e,f,g,h,a,b);
439 T1 = X[15] = W[15]; ROUND_00_15(15,b,c,d,e,f,g,h,a);
441 T1 = X[0] = PULL64(W[0]); ROUND_00_15(0,a,b,c,d,e,f,g,h);
442 T1 = X[1] = PULL64(W[1]); ROUND_00_15(1,h,a,b,c,d,e,f,g);
443 T1 = X[2] = PULL64(W[2]); ROUND_00_15(2,g,h,a,b,c,d,e,f);
444 T1 = X[3] = PULL64(W[3]); ROUND_00_15(3,f,g,h,a,b,c,d,e);
445 T1 = X[4] = PULL64(W[4]); ROUND_00_15(4,e,f,g,h,a,b,c,d);
446 T1 = X[5] = PULL64(W[5]); ROUND_00_15(5,d,e,f,g,h,a,b,c);
447 T1 = X[6] = PULL64(W[6]); ROUND_00_15(6,c,d,e,f,g,h,a,b);
448 T1 = X[7] = PULL64(W[7]); ROUND_00_15(7,b,c,d,e,f,g,h,a);
449 T1 = X[8] = PULL64(W[8]); ROUND_00_15(8,a,b,c,d,e,f,g,h);
450 T1 = X[9] = PULL64(W[9]); ROUND_00_15(9,h,a,b,c,d,e,f,g);
451 T1 = X[10] = PULL64(W[10]); ROUND_00_15(10,g,h,a,b,c,d,e,f);
452 T1 = X[11] = PULL64(W[11]); ROUND_00_15(11,f,g,h,a,b,c,d,e);
453 T1 = X[12] = PULL64(W[12]); ROUND_00_15(12,e,f,g,h,a,b,c,d);
454 T1 = X[13] = PULL64(W[13]); ROUND_00_15(13,d,e,f,g,h,a,b,c);
455 T1 = X[14] = PULL64(W[14]); ROUND_00_15(14,c,d,e,f,g,h,a,b);
456 T1 = X[15] = PULL64(W[15]); ROUND_00_15(15,b,c,d,e,f,g,h,a);
461 ROUND_16_80(i+0,a,b,c,d,e,f,g,h,X);
462 ROUND_16_80(i+1,h,a,b,c,d,e,f,g,X);
463 ROUND_16_80(i+2,g,h,a,b,c,d,e,f,X);
464 ROUND_16_80(i+3,f,g,h,a,b,c,d,e,X);
465 ROUND_16_80(i+4,e,f,g,h,a,b,c,d,X);
466 ROUND_16_80(i+5,d,e,f,g,h,a,b,c,X);
467 ROUND_16_80(i+6,c,d,e,f,g,h,a,b,X);
468 ROUND_16_80(i+7,b,c,d,e,f,g,h,a,X);
471 ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
472 ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;