1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3 * Copyright (c) 2004 The OpenSSL Project. All rights reserved
4 * according to the OpenSSL license [found in ../../LICENSE].
5 * ====================================================================
8 * IMPLEMENTATION NOTES.
10 * As you might have noticed 32-bit hash algorithms:
12 * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
13 * - optimized versions implement two transform functions: one operating
14 * on [aligned] data in host byte order and one - on data in input
16 * - share common byte-order neutral collector and padding function
17 * implementations, ../md32_common.h;
19 * Neither of the above applies to this SHA-512 implementations. Reasons
20 * [in reverse order] are:
22 * - it's the only 64-bit hash algorithm for the moment of this writing,
23 * there is no need for common collector/padding implementation [yet];
24 * - by supporting only one transform function [which operates on
25 * *aligned* data in input stream byte order, big-endian in this case]
26 * we minimize burden of maintenance in two ways: a) collector/padding
27 * function is simpler; b) only one transform function to stare at;
28 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
29 * apply a number of optimizations to mitigate potential performance
30 * penalties caused by previous design decision;
34 * Implementation relies on the fact that "long long" is 64-bit on
35 * both 32- and 64-bit platforms. If some compiler vendor comes up
36 * with 128-bit long long, adjustment to sha.h would be required.
37 * As this implementation relies on 64-bit integer type, it's totally
38 * inappropriate for platforms which don't support it, most notably
40 * <appro@fy.chalmers.se>
45 #include <openssl/opensslconf.h>
46 #include <openssl/crypto.h>
47 #include <openssl/sha.h>
48 #include <openssl/opensslv.h>
50 const char *SHA512_version="SHA-512" OPENSSL_VERSION_PTEXT;
52 #if defined(_M_IX86) || defined(_M_AMD64) || defined(__i386) || defined(__x86_64)
53 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
56 int SHA384_Init (SHA512_CTX *c)
58 c->h[0]=U64(0xcbbb9d5dc1059ed8);
59 c->h[1]=U64(0x629a292a367cd507);
60 c->h[2]=U64(0x9159015a3070dd17);
61 c->h[3]=U64(0x152fecd8f70e5939);
62 c->h[4]=U64(0x67332667ffc00b31);
63 c->h[5]=U64(0x8eb44a8768581511);
64 c->h[6]=U64(0xdb0c2e0d64f98fa7);
65 c->h[7]=U64(0x47b5481dbefa4fa4);
71 int SHA512_Init (SHA512_CTX *c)
73 c->h[0]=U64(0x6a09e667f3bcc908);
74 c->h[1]=U64(0xbb67ae8584caa73b);
75 c->h[2]=U64(0x3c6ef372fe94f82b);
76 c->h[3]=U64(0xa54ff53a5f1d36f1);
77 c->h[4]=U64(0x510e527fade682d1);
78 c->h[5]=U64(0x9b05688c2b3e6c1f);
79 c->h[6]=U64(0x1f83d9abfb41bd6b);
80 c->h[7]=U64(0x5be0cd19137e2179);
86 static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num);
88 static int sha512_final (unsigned char *md, SHA512_CTX *c, size_t msz)
90 unsigned char *p=(unsigned char *)c->u.p;
95 if (n > (sizeof(c->u)-16))
96 memset (p+n,0,sizeof(c->u)-n), n=0,
99 memset (p+n,0,sizeof(c->u)-16-n);
101 c->u.d[SHA_LBLOCK-2] = c->Nh;
102 c->u.d[SHA_LBLOCK-1] = c->Nl;
104 p[sizeof(c->u)-1] = (c->Nl)&0xFF;
105 p[sizeof(c->u)-2] = (c->Nl>>8)&0xFF;
106 p[sizeof(c->u)-3] = (c->Nl>>16)&0xFF;
107 p[sizeof(c->u)-4] = (c->Nl>>24)&0xFF;
108 p[sizeof(c->u)-5] = (c->Nl>>32)&0xFF;
109 p[sizeof(c->u)-6] = (c->Nl>>40)&0xFF;
110 p[sizeof(c->u)-7] = (c->Nl>>48)&0xFF;
111 p[sizeof(c->u)-8] = (c->Nl>>56)&0xFF;
112 p[sizeof(c->u)-9] = (c->Nh)&0xFF;
113 p[sizeof(c->u)-10] = (c->Nh>>8)&0xFF;
114 p[sizeof(c->u)-11] = (c->Nh>>16)&0xFF;
115 p[sizeof(c->u)-12] = (c->Nh>>24)&0xFF;
116 p[sizeof(c->u)-13] = (c->Nh>>32)&0xFF;
117 p[sizeof(c->u)-14] = (c->Nh>>40)&0xFF;
118 p[sizeof(c->u)-15] = (c->Nh>>48)&0xFF;
119 p[sizeof(c->u)-16] = (c->Nh>>56)&0xFF;
122 sha512_block (c,p,1);
126 for (n=0;msz>0;n++,msz-=8)
128 SHA_LONG64 t = c->h[n];
130 *(md++) = (t>>56)&0xFF; *(md++) = (t>>48)&0xFF;
131 *(md++) = (t>>40)&0xFF; *(md++) = (t>>32)&0xFF;
132 *(md++) = (t>>24)&0xFF; *(md++) = (t>>16)&0xFF;
133 *(md++) = (t>>8)&0xFF; *(md++) = (t)&0xFF;
139 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
140 { return sha512_final (md,c,SHA384_DIGEST_LENGTH); }
141 int SHA512_Final (unsigned char *md,SHA512_CTX *c)
142 { return sha512_final (md,c,SHA512_DIGEST_LENGTH); }
144 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
147 unsigned char *p=c->u.p;
148 const unsigned char *data=(const unsigned char *)_data;
150 if (len==0) return 1;
152 l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
153 if (l < c->Nl) c->Nh++;
154 if (sizeof(len)>=8) c->Nh+=(((SHA_LONG64)len)>>61);
159 size_t n = sizeof(c->u) - c->num;
163 memcpy (p+c->num,data,len), c->num += len;
167 memcpy (p+c->num,data,n), c->num = 0;
169 sha512_block (c,p,1);
173 if (len >= sizeof(c->u))
175 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
176 if ((size_t)data%sizeof(c->u.d[0]) != 0)
177 while (len >= sizeof(c->u))
178 memcpy (p,data,sizeof(c->u)),
179 sha512_block (c,p,1),
181 data += sizeof(c->u);
184 sha512_block (c,data,len/sizeof(c->u)),
190 if (len != 0) memcpy (p,data,len), c->num = (int)len;
195 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
196 { return SHA512_Update (c,data,len); }
198 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
199 { sha512_block (c,data,1); }
201 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
204 static unsigned char m[SHA384_DIGEST_LENGTH];
206 if (md == NULL) md=m;
208 SHA512_Update(&c,d,n);
209 sha512_final(md,&c,sizeof(m));
210 OPENSSL_cleanse(&c,sizeof(c));
214 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
217 static unsigned char m[SHA512_DIGEST_LENGTH];
219 if (md == NULL) md=m;
221 SHA512_Update(&c,d,n);
222 sha512_final(md,&c,sizeof(m));
223 OPENSSL_cleanse(&c,sizeof(c));
227 static const SHA_LONG64 K512[80] = {
228 U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
229 U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
230 U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
231 U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
232 U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
233 U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
234 U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
235 U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
236 U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
237 U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
238 U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
239 U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
240 U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
241 U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
242 U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
243 U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
244 U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
245 U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
246 U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
247 U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
248 U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
249 U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
250 U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
251 U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
252 U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
253 U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
254 U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
255 U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
256 U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
257 U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
258 U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
259 U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
260 U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
261 U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
262 U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
263 U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
264 U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
265 U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
266 U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
267 U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
270 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
271 # if defined(__x86_64) || defined(__x86_64__)
272 # define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
281 #define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
282 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
286 # if defined(_MSC_VER)
287 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
288 # define ROTR(a,n) _rotr64((a),n)
290 # elif defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
291 # if defined(__x86_64) || defined(__x86_64__)
292 # define ROTR(a,n) ({ unsigned long ret; \
297 # elif defined(_ARCH_PPC) && defined(__64BIT__)
298 # define ROTR(a,n) ({ unsigned long ret; \
299 asm ("rotrdi %0,%1,%2" \
301 : "r"(a),"K"(n)); ret; })
307 #define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
310 #define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
311 #define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
312 #define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
313 #define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
315 #define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
316 #define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
318 #if defined(OPENSSL_IA32_SSE2) && !defined(OPENSSL_NO_ASM)
319 #define GO_FOR_SSE2(ctx,in,num) do { \
320 extern int OPENSSL_ia32cap; \
321 void sha512_block_sse2(void *,const void *,size_t); \
322 if (!(OPENSSL_ia32cap & (1<<26))) break; \
323 sha512_block_sse2(ctx->h,in,num); return; \
327 #ifdef OPENSSL_SMALL_FOOTPRINT
329 static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num)
331 const SHA_LONG64 *W=in;
332 SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1,T2;
337 GO_FOR_SSE2(ctx,in,num);
342 a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
343 e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
350 T1 = X[i] = PULL64(W[i]);
352 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
353 T2 = Sigma0(a) + Maj(a,b,c);
354 h = g; g = f; f = e; e = d + T1;
355 d = c; c = b; b = a; a = T1 + T2;
360 s0 = X[(i+1)&0x0f]; s0 = sigma0(s0);
361 s1 = X[(i+14)&0x0f]; s1 = sigma1(s1);
363 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
364 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
365 T2 = Sigma0(a) + Maj(a,b,c);
366 h = g; g = f; f = e; e = d + T1;
367 d = c; c = b; b = a; a = T1 + T2;
370 ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
371 ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
379 #define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
380 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
381 h = Sigma0(a) + Maj(a,b,c); \
382 d += T1; h += T1; } while (0)
384 #define ROUND_16_80(i,a,b,c,d,e,f,g,h,X) do { \
385 s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); \
386 s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); \
387 T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f]; \
388 ROUND_00_15(i,a,b,c,d,e,f,g,h); } while (0)
390 static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num)
392 const SHA_LONG64 *W=in;
393 SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1;
398 GO_FOR_SSE2(ctx,in,num);
403 a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
404 e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
407 T1 = X[0] = W[0]; ROUND_00_15(0,a,b,c,d,e,f,g,h);
408 T1 = X[1] = W[1]; ROUND_00_15(1,h,a,b,c,d,e,f,g);
409 T1 = X[2] = W[2]; ROUND_00_15(2,g,h,a,b,c,d,e,f);
410 T1 = X[3] = W[3]; ROUND_00_15(3,f,g,h,a,b,c,d,e);
411 T1 = X[4] = W[4]; ROUND_00_15(4,e,f,g,h,a,b,c,d);
412 T1 = X[5] = W[5]; ROUND_00_15(5,d,e,f,g,h,a,b,c);
413 T1 = X[6] = W[6]; ROUND_00_15(6,c,d,e,f,g,h,a,b);
414 T1 = X[7] = W[7]; ROUND_00_15(7,b,c,d,e,f,g,h,a);
415 T1 = X[8] = W[8]; ROUND_00_15(8,a,b,c,d,e,f,g,h);
416 T1 = X[9] = W[9]; ROUND_00_15(9,h,a,b,c,d,e,f,g);
417 T1 = X[10] = W[10]; ROUND_00_15(10,g,h,a,b,c,d,e,f);
418 T1 = X[11] = W[11]; ROUND_00_15(11,f,g,h,a,b,c,d,e);
419 T1 = X[12] = W[12]; ROUND_00_15(12,e,f,g,h,a,b,c,d);
420 T1 = X[13] = W[13]; ROUND_00_15(13,d,e,f,g,h,a,b,c);
421 T1 = X[14] = W[14]; ROUND_00_15(14,c,d,e,f,g,h,a,b);
422 T1 = X[15] = W[15]; ROUND_00_15(15,b,c,d,e,f,g,h,a);
424 T1 = X[0] = PULL64(W[0]); ROUND_00_15(0,a,b,c,d,e,f,g,h);
425 T1 = X[1] = PULL64(W[1]); ROUND_00_15(1,h,a,b,c,d,e,f,g);
426 T1 = X[2] = PULL64(W[2]); ROUND_00_15(2,g,h,a,b,c,d,e,f);
427 T1 = X[3] = PULL64(W[3]); ROUND_00_15(3,f,g,h,a,b,c,d,e);
428 T1 = X[4] = PULL64(W[4]); ROUND_00_15(4,e,f,g,h,a,b,c,d);
429 T1 = X[5] = PULL64(W[5]); ROUND_00_15(5,d,e,f,g,h,a,b,c);
430 T1 = X[6] = PULL64(W[6]); ROUND_00_15(6,c,d,e,f,g,h,a,b);
431 T1 = X[7] = PULL64(W[7]); ROUND_00_15(7,b,c,d,e,f,g,h,a);
432 T1 = X[8] = PULL64(W[8]); ROUND_00_15(8,a,b,c,d,e,f,g,h);
433 T1 = X[9] = PULL64(W[9]); ROUND_00_15(9,h,a,b,c,d,e,f,g);
434 T1 = X[10] = PULL64(W[10]); ROUND_00_15(10,g,h,a,b,c,d,e,f);
435 T1 = X[11] = PULL64(W[11]); ROUND_00_15(11,f,g,h,a,b,c,d,e);
436 T1 = X[12] = PULL64(W[12]); ROUND_00_15(12,e,f,g,h,a,b,c,d);
437 T1 = X[13] = PULL64(W[13]); ROUND_00_15(13,d,e,f,g,h,a,b,c);
438 T1 = X[14] = PULL64(W[14]); ROUND_00_15(14,c,d,e,f,g,h,a,b);
439 T1 = X[15] = PULL64(W[15]); ROUND_00_15(15,b,c,d,e,f,g,h,a);
444 ROUND_16_80(i+0,a,b,c,d,e,f,g,h,X);
445 ROUND_16_80(i+1,h,a,b,c,d,e,f,g,X);
446 ROUND_16_80(i+2,g,h,a,b,c,d,e,f,X);
447 ROUND_16_80(i+3,f,g,h,a,b,c,d,e,X);
448 ROUND_16_80(i+4,e,f,g,h,a,b,c,d,X);
449 ROUND_16_80(i+5,d,e,f,g,h,a,b,c,X);
450 ROUND_16_80(i+6,c,d,e,f,g,h,a,b,X);
451 ROUND_16_80(i+7,b,c,d,e,f,g,h,a,X);
454 ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
455 ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;