5f7a8dee6d43b9a017a8d2923e28552ba3058fc1
[oweals/openssl.git] / crypto / sha / sha512.c
1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4  * according to the OpenSSL license [found in ../../LICENSE].
5  * ====================================================================
6  */
7 /*
8  * IMPLEMENTATION NOTES.
9  *
10  * As you might have noticed 32-bit hash algorithms:
11  *
12  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
13  * - optimized versions implement two transform functions: one operating
14  *   on [aligned] data in host byte order and one - on data in input
15  *   stream byte order;
16  * - share common byte-order neutral collector and padding function
17  *   implementations, ../md32_common.h;
18  *
19  * Neither of the above applies to this SHA-512 implementations. Reasons
20  * [in reverse order] are:
21  *
22  * - it's the only 64-bit hash algorithm for the moment of this writing,
23  *   there is no need for common collector/padding implementation [yet];
24  * - by supporting only one transform function [which operates on
25  *   *aligned* data in input stream byte order, big-endian in this case]
26  *   we minimize burden of maintenance in two ways: a) collector/padding
27  *   function is simpler; b) only one transform function to stare at;
28  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
29  *   apply a number of optimizations to mitigate potential performance
30  *   penalties caused by previous design decision;
31  *
32  * Caveat lector.
33  *
34  * Implementation relies on the fact that "long long" is 64-bit on
35  * both 32- and 64-bit platforms. If some compiler vendor comes up
36  * with 128-bit long long, adjustment to sha.h would be required.
37  * As this implementation relies on 64-bit integer type, it's totally
38  * inappropriate for platforms which don't support it, most notably
39  * 16-bit platforms.
40  *                                      <appro@fy.chalmers.se>
41  */
42 #include <stdlib.h>
43 #include <string.h>
44
45 #include <openssl/opensslconf.h>
46 #include <openssl/crypto.h>
47 #include <openssl/sha.h>
48 #include <openssl/opensslv.h>
49
50 const char *SHA512_version="SHA-512" OPENSSL_VERSION_PTEXT;
51
52 #if defined(_M_IX86) || defined(_M_AMD64) || defined(__i386) || defined(__x86_64)
53 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
54 #endif
55
56 int SHA384_Init (SHA512_CTX *c)
57         {
58         c->h[0]=U64(0xcbbb9d5dc1059ed8);
59         c->h[1]=U64(0x629a292a367cd507);
60         c->h[2]=U64(0x9159015a3070dd17);
61         c->h[3]=U64(0x152fecd8f70e5939);
62         c->h[4]=U64(0x67332667ffc00b31);
63         c->h[5]=U64(0x8eb44a8768581511);
64         c->h[6]=U64(0xdb0c2e0d64f98fa7);
65         c->h[7]=U64(0x47b5481dbefa4fa4);
66         c->Nl=0;        c->Nh=0;
67         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
68         return 1;
69         }
70
71 int SHA512_Init (SHA512_CTX *c)
72         {
73         c->h[0]=U64(0x6a09e667f3bcc908);
74         c->h[1]=U64(0xbb67ae8584caa73b);
75         c->h[2]=U64(0x3c6ef372fe94f82b);
76         c->h[3]=U64(0xa54ff53a5f1d36f1);
77         c->h[4]=U64(0x510e527fade682d1);
78         c->h[5]=U64(0x9b05688c2b3e6c1f);
79         c->h[6]=U64(0x1f83d9abfb41bd6b);
80         c->h[7]=U64(0x5be0cd19137e2179);
81         c->Nl=0;        c->Nh=0;
82         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
83         return 1;
84         }
85
86 #ifndef SHA512_ASM
87 static
88 #endif
89 void sha512_block (SHA512_CTX *ctx, const void *in, size_t num);
90
91 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
92         {
93         unsigned char *p=(unsigned char *)c->u.p;
94         size_t n=c->num;
95
96         p[n]=0x80;      /* There always is a room for one */
97         n++;
98         if (n > (sizeof(c->u)-16))
99                 memset (p+n,0,sizeof(c->u)-n), n=0,
100                 sha512_block (c,p,1);
101
102         memset (p+n,0,sizeof(c->u)-16-n);
103 #ifdef  B_ENDIAN
104         c->u.d[SHA_LBLOCK-2] = c->Nh;
105         c->u.d[SHA_LBLOCK-1] = c->Nl;
106 #else
107         p[sizeof(c->u)-1]  = (c->Nl)&0xFF;
108         p[sizeof(c->u)-2]  = (c->Nl>>8)&0xFF;
109         p[sizeof(c->u)-3]  = (c->Nl>>16)&0xFF;
110         p[sizeof(c->u)-4]  = (c->Nl>>24)&0xFF;
111         p[sizeof(c->u)-5]  = (c->Nl>>32)&0xFF;
112         p[sizeof(c->u)-6]  = (c->Nl>>40)&0xFF;
113         p[sizeof(c->u)-7]  = (c->Nl>>48)&0xFF;
114         p[sizeof(c->u)-8]  = (c->Nl>>56)&0xFF;
115         p[sizeof(c->u)-9]  = (c->Nh)&0xFF;
116         p[sizeof(c->u)-10] = (c->Nh>>8)&0xFF;
117         p[sizeof(c->u)-11] = (c->Nh>>16)&0xFF;
118         p[sizeof(c->u)-12] = (c->Nh>>24)&0xFF;
119         p[sizeof(c->u)-13] = (c->Nh>>32)&0xFF;
120         p[sizeof(c->u)-14] = (c->Nh>>40)&0xFF;
121         p[sizeof(c->u)-15] = (c->Nh>>48)&0xFF;
122         p[sizeof(c->u)-16] = (c->Nh>>56)&0xFF;
123 #endif
124
125         sha512_block (c,p,1);
126
127         if (md==0) return 0;
128
129         switch (c->md_len)
130                 {
131                 /* Let compiler decide if it's appropriate to unroll... */
132                 case SHA384_DIGEST_LENGTH:
133                         for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
134                                 {
135                                 SHA_LONG64 t = c->h[n];
136
137                                 *(md++) = (t>>56)&0xFF; *(md++) = (t>>48)&0xFF;
138                                 *(md++) = (t>>40)&0xFF; *(md++) = (t>>32)&0xFF;
139                                 *(md++) = (t>>24)&0xFF; *(md++) = (t>>16)&0xFF;
140                                 *(md++) = (t>>8)&0xFF;  *(md++) = (t)&0xFF;
141                                 }
142                         break;
143                 case SHA512_DIGEST_LENGTH:
144                         for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
145                                 {
146                                 SHA_LONG64 t = c->h[n];
147
148                                 *(md++) = (t>>56)&0xFF; *(md++) = (t>>48)&0xFF;
149                                 *(md++) = (t>>40)&0xFF; *(md++) = (t>>32)&0xFF;
150                                 *(md++) = (t>>24)&0xFF; *(md++) = (t>>16)&0xFF;
151                                 *(md++) = (t>>8)&0xFF;  *(md++) = (t)&0xFF;
152                                 }
153                         break;
154                 /* ... as well as make sure md_len is not abused. */
155                 default:        return 0;
156                 }
157
158         return 1;
159         }
160
161 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
162 {   return SHA512_Final (md,c);   }
163
164 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
165         {
166         SHA_LONG64      l;
167         unsigned char  *p=c->u.p;
168         const unsigned char *data=(const unsigned char *)_data;
169
170         if (len==0) return  1;
171
172         l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
173         if (l < c->Nl)          c->Nh++;
174         if (sizeof(len)>=8)     c->Nh+=(((SHA_LONG64)len)>>61);
175         c->Nl=l;
176
177         if (c->num != 0)
178                 {
179                 size_t n = sizeof(c->u) - c->num;
180
181                 if (len < n)
182                         {
183                         memcpy (p+c->num,data,len), c->num += len;
184                         return 1;
185                         }
186                 else    {
187                         memcpy (p+c->num,data,n), c->num = 0;
188                         len-=n, data+=n;
189                         sha512_block (c,p,1);
190                         }
191                 }
192
193         if (len >= sizeof(c->u))
194                 {
195 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
196                 if ((size_t)data%sizeof(c->u.d[0]) != 0)
197                         while (len >= sizeof(c->u))
198                                 memcpy (p,data,sizeof(c->u)),
199                                 sha512_block (c,p,1),
200                                 len  -= sizeof(c->u),
201                                 data += sizeof(c->u);
202                 else
203 #endif
204                         sha512_block (c,data,len/sizeof(c->u)),
205                         data += len,
206                         len  %= sizeof(c->u),
207                         data -= len;
208                 }
209
210         if (len != 0)   memcpy (p,data,len), c->num = (int)len;
211
212         return 1;
213         }
214
215 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
216 {   return SHA512_Update (c,data,len);   }
217
218 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
219 {   sha512_block (c,data,1);  }
220
221 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
222         {
223         SHA512_CTX c;
224         static unsigned char m[SHA384_DIGEST_LENGTH];
225
226         if (md == NULL) md=m;
227         SHA384_Init(&c);
228         SHA512_Update(&c,d,n);
229         SHA512_Final(md,&c);
230         OPENSSL_cleanse(&c,sizeof(c));
231         return(md);
232         }
233
234 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
235         {
236         SHA512_CTX c;
237         static unsigned char m[SHA512_DIGEST_LENGTH];
238
239         if (md == NULL) md=m;
240         SHA512_Init(&c);
241         SHA512_Update(&c,d,n);
242         SHA512_Final(md,&c);
243         OPENSSL_cleanse(&c,sizeof(c));
244         return(md);
245         }
246
247 #ifndef SHA512_ASM
248 static const SHA_LONG64 K512[80] = {
249         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
250         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
251         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
252         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
253         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
254         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
255         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
256         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
257         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
258         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
259         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
260         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
261         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
262         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
263         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
264         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
265         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
266         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
267         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
268         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
269         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
270         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
271         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
272         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
273         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
274         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
275         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
276         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
277         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
278         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
279         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
280         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
281         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
282         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
283         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
284         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
285         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
286         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
287         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
288         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
289
290 #ifndef PEDANTIC
291 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
292 #  if defined(__x86_64) || defined(__x86_64__)
293 #   define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));   \
294                                 asm ("bswapq    %0"             \
295                                 : "=r"(ret)                     \
296                                 : "0"(ret)); ret;               })
297 #  endif
298 # endif
299 #endif
300
301 #ifndef PULL64
302 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
303 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
304 #endif
305
306 #ifndef PEDANTIC
307 # if defined(_MSC_VER)
308 #  if defined(_WIN64)   /* applies to both IA-64 and AMD64 */
309 #   define ROTR(a,n)    _rotr64((a),n)
310 #  endif
311 # elif defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
312 #  if defined(__x86_64) || defined(__x86_64__)
313 #   define ROTR(a,n)    ({ unsigned long ret;           \
314                                 asm ("rorq %1,%0"       \
315                                 : "=r"(ret)             \
316                                 : "J"(n),"0"(a)         \
317                                 : "cc"); ret;           })
318 #  elif defined(_ARCH_PPC) && defined(__64BIT__)
319 #   define ROTR(a,n)    ({ unsigned long ret;           \
320                                 asm ("rotrdi %0,%1,%2"  \
321                                 : "=r"(ret)             \
322                                 : "r"(a),"K"(n)); ret;  })
323 #  endif
324 # endif
325 #endif
326
327 #ifndef ROTR
328 #define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
329 #endif
330
331 #define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
332 #define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
333 #define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
334 #define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
335
336 #define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
337 #define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
338
339 #if defined(OPENSSL_IA32_SSE2) && !defined(OPENSSL_NO_ASM)
340 #define GO_FOR_SSE2(ctx,in,num)         do {            \
341         extern int      OPENSSL_ia32cap;                \
342         void            sha512_block_sse2(void *,const void *,size_t);  \
343         if (!(OPENSSL_ia32cap & (1<<26))) break;        \
344         sha512_block_sse2(ctx->h,in,num); return;       \
345                                         } while (0)
346 #endif
347
348 #ifdef OPENSSL_SMALL_FOOTPRINT
349
350 static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num)
351         {
352         const SHA_LONG64 *W=in;
353         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1,T2;
354         SHA_LONG64      X[16];
355         int i;
356
357 #ifdef GO_FOR_SSE2
358         GO_FOR_SSE2(ctx,in,num);
359 #endif
360
361                         while (num--) {
362
363         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
364         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
365
366         for (i=0;i<16;i++)
367                 {
368 #ifdef B_ENDIAN
369                 T1 = X[i] = W[i];
370 #else
371                 T1 = X[i] = PULL64(W[i]);
372 #endif
373                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
374                 T2 = Sigma0(a) + Maj(a,b,c);
375                 h = g;  g = f;  f = e;  e = d + T1;
376                 d = c;  c = b;  b = a;  a = T1 + T2;
377                 }
378
379         for (;i<80;i++)
380                 {
381                 s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);
382                 s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);
383
384                 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
385                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
386                 T2 = Sigma0(a) + Maj(a,b,c);
387                 h = g;  g = f;  f = e;  e = d + T1;
388                 d = c;  c = b;  b = a;  a = T1 + T2;
389                 }
390
391         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
392         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
393
394                         W+=SHA_LBLOCK;
395                         }
396         }
397
398 #else
399
400 #define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
401         T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
402         h = Sigma0(a) + Maj(a,b,c);                     \
403         d += T1;        h += T1;                } while (0)
404
405 #define ROUND_16_80(i,a,b,c,d,e,f,g,h,X)        do {    \
406         s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);        \
407         s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);        \
408         T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f];    \
409         ROUND_00_15(i,a,b,c,d,e,f,g,h);         } while (0)
410
411 static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num)
412         {
413         const SHA_LONG64 *W=in;
414         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1;
415         SHA_LONG64      X[16];
416         int i;
417
418 #ifdef GO_FOR_SSE2
419         GO_FOR_SSE2(ctx,in,num);
420 #endif
421
422                         while (num--) {
423
424         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
425         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
426
427 #ifdef B_ENDIAN
428         T1 = X[0] = W[0];       ROUND_00_15(0,a,b,c,d,e,f,g,h);
429         T1 = X[1] = W[1];       ROUND_00_15(1,h,a,b,c,d,e,f,g);
430         T1 = X[2] = W[2];       ROUND_00_15(2,g,h,a,b,c,d,e,f);
431         T1 = X[3] = W[3];       ROUND_00_15(3,f,g,h,a,b,c,d,e);
432         T1 = X[4] = W[4];       ROUND_00_15(4,e,f,g,h,a,b,c,d);
433         T1 = X[5] = W[5];       ROUND_00_15(5,d,e,f,g,h,a,b,c);
434         T1 = X[6] = W[6];       ROUND_00_15(6,c,d,e,f,g,h,a,b);
435         T1 = X[7] = W[7];       ROUND_00_15(7,b,c,d,e,f,g,h,a);
436         T1 = X[8] = W[8];       ROUND_00_15(8,a,b,c,d,e,f,g,h);
437         T1 = X[9] = W[9];       ROUND_00_15(9,h,a,b,c,d,e,f,g);
438         T1 = X[10] = W[10];     ROUND_00_15(10,g,h,a,b,c,d,e,f);
439         T1 = X[11] = W[11];     ROUND_00_15(11,f,g,h,a,b,c,d,e);
440         T1 = X[12] = W[12];     ROUND_00_15(12,e,f,g,h,a,b,c,d);
441         T1 = X[13] = W[13];     ROUND_00_15(13,d,e,f,g,h,a,b,c);
442         T1 = X[14] = W[14];     ROUND_00_15(14,c,d,e,f,g,h,a,b);
443         T1 = X[15] = W[15];     ROUND_00_15(15,b,c,d,e,f,g,h,a);
444 #else
445         T1 = X[0]  = PULL64(W[0]);      ROUND_00_15(0,a,b,c,d,e,f,g,h);
446         T1 = X[1]  = PULL64(W[1]);      ROUND_00_15(1,h,a,b,c,d,e,f,g);
447         T1 = X[2]  = PULL64(W[2]);      ROUND_00_15(2,g,h,a,b,c,d,e,f);
448         T1 = X[3]  = PULL64(W[3]);      ROUND_00_15(3,f,g,h,a,b,c,d,e);
449         T1 = X[4]  = PULL64(W[4]);      ROUND_00_15(4,e,f,g,h,a,b,c,d);
450         T1 = X[5]  = PULL64(W[5]);      ROUND_00_15(5,d,e,f,g,h,a,b,c);
451         T1 = X[6]  = PULL64(W[6]);      ROUND_00_15(6,c,d,e,f,g,h,a,b);
452         T1 = X[7]  = PULL64(W[7]);      ROUND_00_15(7,b,c,d,e,f,g,h,a);
453         T1 = X[8]  = PULL64(W[8]);      ROUND_00_15(8,a,b,c,d,e,f,g,h);
454         T1 = X[9]  = PULL64(W[9]);      ROUND_00_15(9,h,a,b,c,d,e,f,g);
455         T1 = X[10] = PULL64(W[10]);     ROUND_00_15(10,g,h,a,b,c,d,e,f);
456         T1 = X[11] = PULL64(W[11]);     ROUND_00_15(11,f,g,h,a,b,c,d,e);
457         T1 = X[12] = PULL64(W[12]);     ROUND_00_15(12,e,f,g,h,a,b,c,d);
458         T1 = X[13] = PULL64(W[13]);     ROUND_00_15(13,d,e,f,g,h,a,b,c);
459         T1 = X[14] = PULL64(W[14]);     ROUND_00_15(14,c,d,e,f,g,h,a,b);
460         T1 = X[15] = PULL64(W[15]);     ROUND_00_15(15,b,c,d,e,f,g,h,a);
461 #endif
462
463         for (i=16;i<80;i+=8)
464                 {
465                 ROUND_16_80(i+0,a,b,c,d,e,f,g,h,X);
466                 ROUND_16_80(i+1,h,a,b,c,d,e,f,g,X);
467                 ROUND_16_80(i+2,g,h,a,b,c,d,e,f,X);
468                 ROUND_16_80(i+3,f,g,h,a,b,c,d,e,X);
469                 ROUND_16_80(i+4,e,f,g,h,a,b,c,d,X);
470                 ROUND_16_80(i+5,d,e,f,g,h,a,b,c,X);
471                 ROUND_16_80(i+6,c,d,e,f,g,h,a,b,X);
472                 ROUND_16_80(i+7,b,c,d,e,f,g,h,a,X);
473                 }
474
475         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
476         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
477
478                         W+=SHA_LBLOCK;
479                         }
480         }
481
482 #endif
483
484 #endif /* SHA512_ASM */