SHA clean-up and (LP64) tune-up.
authorAndy Polyakov <appro@openssl.org>
Sun, 5 Sep 1999 12:42:04 +0000 (12:42 +0000)
committerAndy Polyakov <appro@openssl.org>
Sun, 5 Sep 1999 12:42:04 +0000 (12:42 +0000)
"Clean-up" stands for the fact that it's using common message digest
template ../md32_common.h and sha[1_]dgst.c are reduced down to
'#define SHA_[01]' and then '#include "sha_locl.h"'. It stands "(LP64)"
there because it's 64 bit platforms which benefit most from the tune-up.
The updated code exhibits 40% performance improvement on IRIX64
(sounds too good, huh? I probably should double check if it's not
some cache trashing that was holding it back before), 28% - on
Alpha Linux and 12% - Solaris 7/64.

crypto/sha/sha.h
crypto/sha/sha1dgst.c
crypto/sha/sha1s.cpp
crypto/sha/sha_dgst.c
crypto/sha/sha_locl.h

index cd6960ee1a3d0a8df475eaaf4a178b53afc58c00..96daa968fecc39f7b08cd21745903734a022c77c 100644 (file)
@@ -63,7 +63,7 @@
 extern "C" {
 #endif
 
-#ifdef NO_SHA
+#if defined(NO_SHA) || defined(NO_SHA0) || defined(NO_SHA1)
 #error SHA is disabled.
 #endif
 
@@ -103,14 +103,14 @@ void SHA_Init(SHA_CTX *c);
 void SHA_Update(SHA_CTX *c, const unsigned char *data, unsigned long len);
 void SHA_Final(unsigned char *md, SHA_CTX *c);
 unsigned char *SHA(const unsigned char *d, unsigned long n,unsigned char *md);
-void SHA_Transform(SHA_CTX *c, unsigned char *data);
+void SHA_Transform(SHA_CTX *c, const unsigned char *data);
 #endif
 #ifndef NO_SHA1
 void SHA1_Init(SHA_CTX *c);
 void SHA1_Update(SHA_CTX *c, const unsigned char *data, unsigned long len);
 void SHA1_Final(unsigned char *md, SHA_CTX *c);
 unsigned char *SHA1(const unsigned char *d, unsigned long n,unsigned char *md);
-void SHA1_Transform(SHA_CTX *c, unsigned char *data);
+void SHA1_Transform(SHA_CTX *c, const unsigned char *data);
 #endif
 #ifdef  __cplusplus
 }
index 66e885dd76d2d243172cb31e1705f76a67b6f009..141daf08463cf72bdd9fce0a1889099bc0036ab9 100644 (file)
  * [including the GNU Public Licence.]
  */
 
-#include <stdio.h>
-#include <string.h>
+#if !defined(NO_SHA1) && !defined(NO_SHA)
+
 #undef  SHA_0
 #define SHA_1
-#include <openssl/sha.h>
-#include "sha_locl.h"
+
 #include <openssl/opensslv.h>
 
-#ifndef NO_SHA1
 char *SHA1_version="SHA1" OPENSSL_VERSION_PTEXT;
 
-/* Implemented from SHA-1 document - The Secure Hash Algorithm
- */
-
-#define INIT_DATA_h0 0x67452301UL
-#define INIT_DATA_h1 0xefcdab89UL
-#define INIT_DATA_h2 0x98badcfeUL
-#define INIT_DATA_h3 0x10325476UL
-#define INIT_DATA_h4 0xc3d2e1f0UL
-
-#define K_00_19        0x5a827999UL
-#define K_20_39 0x6ed9eba1UL
-#define K_40_59 0x8f1bbcdcUL
-#define K_60_79 0xca62c1d6UL
-
-#ifdef SHA1_ASM
-   void sha1_block_x86(SHA_CTX *c, register SHA_LONG *p, int num);
-#  define sha1_block(c,p,n) sha1_block_x86((c),(p),(n)*SHA_CBLOCK)
-#else
-   static void sha1_block(SHA_CTX *c, register SHA_LONG *p, int num);
-#endif
-
-#if !defined(B_ENDIAN) && defined(SHA1_ASM)
-#  define      M_c2nl          c2l
-#  define      M_p_c2nl        p_c2l
-#  define      M_c2nl_p        c2l_p
-#  define      M_p_c2nl_p      p_c2l_p
-#  define      M_nl2c          l2c
-#else
-#  define      M_c2nl          c2nl
-#  define      M_p_c2nl        p_c2nl
-#  define      M_c2nl_p        c2nl_p
-#  define      M_p_c2nl_p      p_c2nl_p
-#  define      M_nl2c          nl2c
-#endif
-
-void SHA1_Init(SHA_CTX *c)
-       {
-       c->h0=INIT_DATA_h0;
-       c->h1=INIT_DATA_h1;
-       c->h2=INIT_DATA_h2;
-       c->h3=INIT_DATA_h3;
-       c->h4=INIT_DATA_h4;
-       c->Nl=0;
-       c->Nh=0;
-       c->num=0;
-       }
-
-void SHA1_Update(SHA_CTX *c, register const unsigned char *data,
-            unsigned long len)
-       {
-       register SHA_LONG *p;
-       int ew,ec,sw,sc;
-       SHA_LONG l;
-
-       if (len == 0) return;
-
-       l=(c->Nl+(len<<3))&0xffffffffL;
-       if (l < c->Nl) /* overflow */
-               c->Nh++;
-       c->Nh+=(len>>29);
-       c->Nl=l;
-
-       if (c->num != 0)
-               {
-               p=c->data;
-               sw=c->num>>2;
-               sc=c->num&0x03;
-
-               if ((c->num+len) >= SHA_CBLOCK)
-                       {
-                       l= p[sw];
-                       M_p_c2nl(data,l,sc);
-                       p[sw++]=l;
-                       for (; sw<SHA_LBLOCK; sw++)
-                               {
-                               M_c2nl(data,l);
-                               p[sw]=l;
-                               }
-                       len-=(SHA_CBLOCK-c->num);
-
-                       sha1_block(c,p,1);
-                       c->num=0;
-                       /* drop through and do the rest */
-                       }
-               else
-                       {
-                       c->num+=(int)len;
-                       if ((sc+len) < 4) /* ugly, add char's to a word */
-                               {
-                               l= p[sw];
-                               M_p_c2nl_p(data,l,sc,len);
-                               p[sw]=l;
-                               }
-                       else
-                               {
-                               ew=(c->num>>2);
-                               ec=(c->num&0x03);
-                               l= p[sw];
-                               M_p_c2nl(data,l,sc);
-                               p[sw++]=l;
-                               for (; sw < ew; sw++)
-                                       { M_c2nl(data,l); p[sw]=l; }
-                               if (ec)
-                                       {
-                                       M_c2nl_p(data,l,ec);
-                                       p[sw]=l;
-                                       }
-                               }
-                       return;
-                       }
-               }
-       /* We can only do the following code for assember, the reason
-        * being that the sha1_block 'C' version changes the values
-        * in the 'data' array.  The assember code avoids this and
-        * copies it to a local array.  I should be able to do this for
-        * the C version as well....
-        */
-#if SHA_LONG_LOG2==2
-#if defined(B_ENDIAN) || defined(SHA1_ASM)
-       if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
-               {
-               sw=len/SHA_CBLOCK;
-               if (sw)
-                       {
-                       sha1_block(c,(SHA_LONG *)data,sw);
-                       sw*=SHA_CBLOCK;
-                       data+=sw;
-                       len-=sw;
-                       }
-               }
-#endif
-#endif
-       /* we now can process the input data in blocks of SHA_CBLOCK
-        * chars and save the leftovers to c->data. */
-       p=c->data;
-       while (len >= SHA_CBLOCK)
-               {
-#if SHA_LONG_LOG2==2
-#if defined(B_ENDIAN) || defined(SHA1_ASM)
-#define SHA_NO_TAIL_CODE
-               /*
-                * Basically we get here only when data happens
-                * to be unaligned.
-                */
-               if (p != (SHA_LONG *)data)
-                       memcpy(p,data,SHA_CBLOCK);
-               data+=SHA_CBLOCK;
-               sha1_block(c,p=c->data,1);
-               len-=SHA_CBLOCK;
-#elif defined(L_ENDIAN)
-#define BE_COPY(dst,src,i)     {                               \
-                               l = ((SHA_LONG *)src)[i];       \
-                               Endian_Reverse32(l);            \
-                               dst[i] = l;                     \
-                               }
-               if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
-                       {
-                       for (sw=(SHA_LBLOCK/4); sw; sw--)
-                               {
-                               BE_COPY(p,data,0);
-                               BE_COPY(p,data,1);
-                               BE_COPY(p,data,2);
-                               BE_COPY(p,data,3);
-                               p+=4;
-                               data += 4*sizeof(SHA_LONG);
-                               }
-                       sha1_block(c,p=c->data,1);
-                       len-=SHA_CBLOCK;
-                       continue;
-                       }
-#endif
-#endif
-#ifndef SHA_NO_TAIL_CODE
-               /*
-                * In addition to "sizeof(SHA_LONG)!= 4" case the
-                * following code covers unaligned access cases on
-                * little-endian machines.
-                *                      <appro@fy.chalmers.se>
-                */
-               p=c->data;
-               for (sw=(SHA_LBLOCK/4); sw; sw--)
-                       {
-                       M_c2nl(data,l); p[0]=l;
-                       M_c2nl(data,l); p[1]=l;
-                       M_c2nl(data,l); p[2]=l;
-                       M_c2nl(data,l); p[3]=l;
-                       p+=4;
-                       }
-               p=c->data;
-               sha1_block(c,p,1);
-               len-=SHA_CBLOCK;
-#endif
-               }
-       ec=(int)len;
-       c->num=ec;
-       ew=(ec>>2);
-       ec&=0x03;
-
-       for (sw=0; sw < ew; sw++)
-               { M_c2nl(data,l); p[sw]=l; }
-       M_c2nl_p(data,l,ec);
-       p[sw]=l;
-       }
-
-void SHA1_Transform(SHA_CTX *c, unsigned char *b)
-       {
-       SHA_LONG p[SHA_LBLOCK];
-
-#if SHA_LONG_LOG2==2
-#if defined(B_ENDIAN) || defined(SHA1_ASM)
-       memcpy(p,b,SHA_CBLOCK);
-       sha1_block(c,p,1);
-       return;
-#elif defined(L_ENDIAN)
-       if (((unsigned long)b%sizeof(SHA_LONG)) == 0)
-               {
-               SHA_LONG *q;
-               int i;
-
-               q=p;
-               for (i=(SHA_LBLOCK/4); i; i--)
-                       {
-                       unsigned long l;
-                       BE_COPY(q,b,0); /* BE_COPY was defined above */
-                       BE_COPY(q,b,1);
-                       BE_COPY(q,b,2);
-                       BE_COPY(q,b,3);
-                       q+=4;
-                       b+=4*sizeof(SHA_LONG);
-                       }
-               sha1_block(c,p,1);
-               return;
-               }
-#endif
-#endif
-#ifndef SHA_NO_TAIL_CODE /* defined above, see comment */
-               {
-               SHA_LONG *q;
-               int i;
-       
-               q=p;
-               for (i=(SHA_LBLOCK/4); i; i--)
-                       {
-                       SHA_LONG l;
-                       c2nl(b,l); *(q++)=l;
-                       c2nl(b,l); *(q++)=l;
-                       c2nl(b,l); *(q++)=l;
-                       c2nl(b,l); *(q++)=l; 
-                       } 
-               sha1_block(c,p,1);
-               }
-#endif
-       }
-
-#ifndef SHA1_ASM
-static void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num)
-       {
-       register SHA_LONG A,B,C,D,E,T;
-       SHA_LONG X[SHA_LBLOCK];
-
-       A=c->h0;
-       B=c->h1;
-       C=c->h2;
-       D=c->h3;
-       E=c->h4;
-
-       for (;;)
-               {
-       BODY_00_15( 0,A,B,C,D,E,T,W);
-       BODY_00_15( 1,T,A,B,C,D,E,W);
-       BODY_00_15( 2,E,T,A,B,C,D,W);
-       BODY_00_15( 3,D,E,T,A,B,C,W);
-       BODY_00_15( 4,C,D,E,T,A,B,W);
-       BODY_00_15( 5,B,C,D,E,T,A,W);
-       BODY_00_15( 6,A,B,C,D,E,T,W);
-       BODY_00_15( 7,T,A,B,C,D,E,W);
-       BODY_00_15( 8,E,T,A,B,C,D,W);
-       BODY_00_15( 9,D,E,T,A,B,C,W);
-       BODY_00_15(10,C,D,E,T,A,B,W);
-       BODY_00_15(11,B,C,D,E,T,A,W);
-       BODY_00_15(12,A,B,C,D,E,T,W);
-       BODY_00_15(13,T,A,B,C,D,E,W);
-       BODY_00_15(14,E,T,A,B,C,D,W);
-       BODY_00_15(15,D,E,T,A,B,C,W);
-       BODY_16_19(16,C,D,E,T,A,B,W,W,W,W);
-       BODY_16_19(17,B,C,D,E,T,A,W,W,W,W);
-       BODY_16_19(18,A,B,C,D,E,T,W,W,W,W);
-       BODY_16_19(19,T,A,B,C,D,E,W,W,W,X);
-
-       BODY_20_31(20,E,T,A,B,C,D,W,W,W,X);
-       BODY_20_31(21,D,E,T,A,B,C,W,W,W,X);
-       BODY_20_31(22,C,D,E,T,A,B,W,W,W,X);
-       BODY_20_31(23,B,C,D,E,T,A,W,W,W,X);
-       BODY_20_31(24,A,B,C,D,E,T,W,W,X,X);
-       BODY_20_31(25,T,A,B,C,D,E,W,W,X,X);
-       BODY_20_31(26,E,T,A,B,C,D,W,W,X,X);
-       BODY_20_31(27,D,E,T,A,B,C,W,W,X,X);
-       BODY_20_31(28,C,D,E,T,A,B,W,W,X,X);
-       BODY_20_31(29,B,C,D,E,T,A,W,W,X,X);
-       BODY_20_31(30,A,B,C,D,E,T,W,X,X,X);
-       BODY_20_31(31,T,A,B,C,D,E,W,X,X,X);
-       BODY_32_39(32,E,T,A,B,C,D,X);
-       BODY_32_39(33,D,E,T,A,B,C,X);
-       BODY_32_39(34,C,D,E,T,A,B,X);
-       BODY_32_39(35,B,C,D,E,T,A,X);
-       BODY_32_39(36,A,B,C,D,E,T,X);
-       BODY_32_39(37,T,A,B,C,D,E,X);
-       BODY_32_39(38,E,T,A,B,C,D,X);
-       BODY_32_39(39,D,E,T,A,B,C,X);
-
-       BODY_40_59(40,C,D,E,T,A,B,X);
-       BODY_40_59(41,B,C,D,E,T,A,X);
-       BODY_40_59(42,A,B,C,D,E,T,X);
-       BODY_40_59(43,T,A,B,C,D,E,X);
-       BODY_40_59(44,E,T,A,B,C,D,X);
-       BODY_40_59(45,D,E,T,A,B,C,X);
-       BODY_40_59(46,C,D,E,T,A,B,X);
-       BODY_40_59(47,B,C,D,E,T,A,X);
-       BODY_40_59(48,A,B,C,D,E,T,X);
-       BODY_40_59(49,T,A,B,C,D,E,X);
-       BODY_40_59(50,E,T,A,B,C,D,X);
-       BODY_40_59(51,D,E,T,A,B,C,X);
-       BODY_40_59(52,C,D,E,T,A,B,X);
-       BODY_40_59(53,B,C,D,E,T,A,X);
-       BODY_40_59(54,A,B,C,D,E,T,X);
-       BODY_40_59(55,T,A,B,C,D,E,X);
-       BODY_40_59(56,E,T,A,B,C,D,X);
-       BODY_40_59(57,D,E,T,A,B,C,X);
-       BODY_40_59(58,C,D,E,T,A,B,X);
-       BODY_40_59(59,B,C,D,E,T,A,X);
-
-       BODY_60_79(60,A,B,C,D,E,T,X);
-       BODY_60_79(61,T,A,B,C,D,E,X);
-       BODY_60_79(62,E,T,A,B,C,D,X);
-       BODY_60_79(63,D,E,T,A,B,C,X);
-       BODY_60_79(64,C,D,E,T,A,B,X);
-       BODY_60_79(65,B,C,D,E,T,A,X);
-       BODY_60_79(66,A,B,C,D,E,T,X);
-       BODY_60_79(67,T,A,B,C,D,E,X);
-       BODY_60_79(68,E,T,A,B,C,D,X);
-       BODY_60_79(69,D,E,T,A,B,C,X);
-       BODY_60_79(70,C,D,E,T,A,B,X);
-       BODY_60_79(71,B,C,D,E,T,A,X);
-       BODY_60_79(72,A,B,C,D,E,T,X);
-       BODY_60_79(73,T,A,B,C,D,E,X);
-       BODY_60_79(74,E,T,A,B,C,D,X);
-       BODY_60_79(75,D,E,T,A,B,C,X);
-       BODY_60_79(76,C,D,E,T,A,B,X);
-       BODY_60_79(77,B,C,D,E,T,A,X);
-       BODY_60_79(78,A,B,C,D,E,T,X);
-       BODY_60_79(79,T,A,B,C,D,E,X);
-       
-       c->h0=(c->h0+E)&0xffffffffL; 
-       c->h1=(c->h1+T)&0xffffffffL;
-       c->h2=(c->h2+A)&0xffffffffL;
-       c->h3=(c->h3+B)&0xffffffffL;
-       c->h4=(c->h4+C)&0xffffffffL;
-
-       if (--num <= 0) break;
-
-       A=c->h0;
-       B=c->h1;
-       C=c->h2;
-       D=c->h3;
-       E=c->h4;
-
-       W+=SHA_LBLOCK;  /* Note! This can happen only when sizeof(SHA_LONG)
-                        * is 4. Whenever it's not the actual case this
-                        * function is never called with num larger than 1
-                        * and we never advance down here.
-                        *                      <appro@fy.chalmers.se>
-                        */
-               }
-       }
-#endif
-
-void SHA1_Final(unsigned char *md, SHA_CTX *c)
-       {
-       register int i,j;
-       register SHA_LONG l;
-       register SHA_LONG *p;
-       static unsigned char end[4]={0x80,0x00,0x00,0x00};
-       unsigned char *cp=end;
-
-       /* c->num should definitly have room for at least one more byte. */
-       p=c->data;
-       j=c->num;
-       i=j>>2;
-#ifdef PURIFY
-       if ((j&0x03) == 0) p[i]=0;
-#endif
-       l=p[i];
-       M_p_c2nl(cp,l,j&0x03);
-       p[i]=l;
-       i++;
-       /* i is the next 'undefined word' */
-       if (c->num >= SHA_LAST_BLOCK)
-               {
-               for (; i<SHA_LBLOCK; i++)
-                       p[i]=0;
-               sha1_block(c,p,1);
-               i=0;
-               }
-       for (; i<(SHA_LBLOCK-2); i++)
-               p[i]=0;
-       p[SHA_LBLOCK-2]=c->Nh;
-       p[SHA_LBLOCK-1]=c->Nl;
-#if SHA_LONG_LOG2==2
-#if !defined(B_ENDIAN) && defined(SHA1_ASM)
-       Endian_Reverse32(p[SHA_LBLOCK-2]);
-       Endian_Reverse32(p[SHA_LBLOCK-1]);
-#endif
-#endif
-       sha1_block(c,p,1);
-       cp=md;
-       l=c->h0; nl2c(l,cp);
-       l=c->h1; nl2c(l,cp);
-       l=c->h2; nl2c(l,cp);
-       l=c->h3; nl2c(l,cp);
-       l=c->h4; nl2c(l,cp);
+#include "sha_locl.h"
 
-       c->num=0;
-       /* sha_block may be leaving some stuff on the stack
-        * but I'm not worried :-)
-       memset((void *)c,0,sizeof(SHA_CTX));
-        */
-       }
 #endif
 
index 3103e1871bbed91ad2e9f04099d236ef321c6a50..af23d1e0f21857f1f9d1cf83cb50438d4f69cf8d 100644 (file)
@@ -34,6 +34,7 @@ void GetTSC(unsigned long& tsc)
 #include <stdlib.h>
 #include <openssl/sha.h>
 
+#define sha1_block_x86 sha1_block_asm_data_order
 extern "C" {
 void sha1_block_x86(SHA_CTX *ctx, unsigned char *buffer,int num);
 }
@@ -55,8 +56,10 @@ void main(int argc,char *argv[])
        if (num == 0) num=16;
        if (num > 250) num=16;
        numm=num+2;
+#if 0
        num*=64;
        numm*=64;
+#endif
 
        for (j=0; j<6; j++)
                {
@@ -72,7 +75,7 @@ void main(int argc,char *argv[])
                        sha1_block_x86(&ctx,buffer,num);
                        }
 
-               printf("sha1 (%d bytes) %d %d (%.2f)\n",num,
+               printf("sha1 (%d bytes) %d %d (%.2f)\n",num*64,
                        e1-s1,e2-s2,(double)((e1-s1)-(e2-s2))/2);
                }
        }
index 4df535360f35e254d8af90fb5449430456e632c7..81bce0c150146f62ad55a0b395d23b675fdfa7de 100644 (file)
@@ -1,4 +1,4 @@
-/* crypto/sha/sha_dgst.c */
+/* crypto/sha/sha1dgst.c */
 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  * All rights reserved.
  *
  * [including the GNU Public Licence.]
  */
 
-#include <stdio.h>
-#include <string.h>
-#define  SHA_0
-#undef SHA_1
-#include <openssl/sha.h>
-#include "sha_locl.h"
-#include <openssl/opensslv.h>
-
-#ifndef NO_SHA0
-char *SHA_version="SHA" OPENSSL_VERSION_PTEXT;
-
-/* Implemented from SHA-0 document - The Secure Hash Algorithm
- */
-
-#define INIT_DATA_h0 0x67452301UL
-#define INIT_DATA_h1 0xefcdab89UL
-#define INIT_DATA_h2 0x98badcfeUL
-#define INIT_DATA_h3 0x10325476UL
-#define INIT_DATA_h4 0xc3d2e1f0UL
-
-#define K_00_19        0x5a827999UL
-#define K_20_39 0x6ed9eba1UL
-#define K_40_59 0x8f1bbcdcUL
-#define K_60_79 0xca62c1d6UL
-
-static void sha_block(SHA_CTX *c, register SHA_LONG *p, int num);
-
-#if !defined(B_ENDIAN) && defined(SHA_ASM)
-#  define      M_c2nl          c2l
-#  define      M_p_c2nl        p_c2l
-#  define      M_c2nl_p        c2l_p
-#  define      M_p_c2nl_p      p_c2l_p
-#  define      M_nl2c          l2c
-#else
-#  define      M_c2nl          c2nl
-#  define      M_p_c2nl        p_c2nl
-#  define      M_c2nl_p        c2nl_p
-#  define      M_p_c2nl_p      p_c2nl_p
-#  define      M_nl2c          nl2c
-#endif
-
-void SHA_Init(SHA_CTX *c)
-       {
-       c->h0=INIT_DATA_h0;
-       c->h1=INIT_DATA_h1;
-       c->h2=INIT_DATA_h2;
-       c->h3=INIT_DATA_h3;
-       c->h4=INIT_DATA_h4;
-       c->Nl=0;
-       c->Nh=0;
-       c->num=0;
-       }
-
-void SHA_Update(SHA_CTX *c, register const unsigned char *data,
-               unsigned long len)
-       {
-       register SHA_LONG *p;
-       int ew,ec,sw,sc;
-       SHA_LONG l;
-
-       if (len == 0) return;
-
-       l=(c->Nl+(len<<3))&0xffffffffL;
-       if (l < c->Nl) /* overflow */
-               c->Nh++;
-       c->Nh+=(len>>29);
-       c->Nl=l;
-
-       if (c->num != 0)
-               {
-               p=c->data;
-               sw=c->num>>2;
-               sc=c->num&0x03;
-
-               if ((c->num+len) >= SHA_CBLOCK)
-                       {
-                       l= p[sw];
-                       M_p_c2nl(data,l,sc);
-                       p[sw++]=l;
-                       for (; sw<SHA_LBLOCK; sw++)
-                               {
-                               M_c2nl(data,l);
-                               p[sw]=l;
-                               }
-                       len-=(SHA_CBLOCK-c->num);
-
-                       sha_block(c,p,1);
-                       c->num=0;
-                       /* drop through and do the rest */
-                       }
-               else
-                       {
-                       c->num+=(int)len;
-                       if ((sc+len) < 4) /* ugly, add char's to a word */
-                               {
-                               l= p[sw];
-                               M_p_c2nl_p(data,l,sc,len);
-                               p[sw]=l;
-                               }
-                       else
-                               {
-                               ew=(c->num>>2);
-                               ec=(c->num&0x03);
-                               l= p[sw];
-                               M_p_c2nl(data,l,sc);
-                               p[sw++]=l;
-                               for (; sw < ew; sw++)
-                                       { M_c2nl(data,l); p[sw]=l; }
-                               if (ec)
-                                       {
-                                       M_c2nl_p(data,l,ec);
-                                       p[sw]=l;
-                                       }
-                               }
-                       return;
-                       }
-               }
-       /* We can only do the following code for assember, the reason
-        * being that the sha_block 'C' version changes the values
-        * in the 'data' array.  The assember code avoids this and
-        * copies it to a local array.  I should be able to do this for
-        * the C version as well....
-        */
-#if SHA_LONG_LOG2==2
-#if defined(B_ENDIAN) || defined(SHA_ASM)
-       if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
-               {
-               sw=len/SHA_CBLOCK;
-               if (sw)
-                       {
-                       sha_block(c,(SHA_LONG *)data,sw);
-                       sw*=SHA_CBLOCK;
-                       data+=sw;
-                       len-=sw;
-                       }
-               }
-#endif
-#endif
-       /* we now can process the input data in blocks of SHA_CBLOCK
-        * chars and save the leftovers to c->data. */
-       p=c->data;
-       while (len >= SHA_CBLOCK)
-               {
-#if SHA_LONG_LOG2==2
-#if defined(B_ENDIAN) || defined(SHA_ASM)
-#define SHA_NO_TAIL_CODE
-               /*
-                * Basically we get here only when data happens
-                * to be unaligned.
-                */
-               if (p != (SHA_LONG *)data)
-                       memcpy(p,data,SHA_CBLOCK);
-               data+=SHA_CBLOCK;
-               sha_block(c,p=c->data,1);
-               len-=SHA_CBLOCK;
-#elif defined(L_ENDIAN)
-#define BE_COPY(dst,src,i)     {                               \
-                               l = ((SHA_LONG *)src)[i];       \
-                               Endian_Reverse32(l);            \
-                               dst[i] = l;                     \
-                               }
-               if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
-                       {
-                       for (sw=(SHA_LBLOCK/4); sw; sw--)
-                               {
-                               BE_COPY(p,data,0);
-                               BE_COPY(p,data,1);
-                               BE_COPY(p,data,2);
-                               BE_COPY(p,data,3);
-                               p+=4;
-                               data += 4*sizeof(SHA_LONG);
-                               }
-                       sha_block(c,p=c->data,1);
-                       len-=SHA_CBLOCK;
-                       continue;
-                       }
-#endif
-#endif
-#ifndef SHA_NO_TAIL_CODE
-               /*
-                * In addition to "sizeof(SHA_LONG)!= 4" case the
-                * following code covers unaligned access cases on
-                * little-endian machines.
-                *                      <appro@fy.chalmers.se>
-                */
-               p=c->data;
-               for (sw=(SHA_LBLOCK/4); sw; sw--)
-                       {
-                       M_c2nl(data,l); p[0]=l;
-                       M_c2nl(data,l); p[1]=l;
-                       M_c2nl(data,l); p[2]=l;
-                       M_c2nl(data,l); p[3]=l;
-                       p+=4;
-                       }
-               p=c->data;
-               sha_block(c,p,1);
-               len-=SHA_CBLOCK;
-#endif
-               }
-       ec=(int)len;
-       c->num=ec;
-       ew=(ec>>2);
-       ec&=0x03;
-
-       for (sw=0; sw < ew; sw++)
-               { M_c2nl(data,l); p[sw]=l; }
-       M_c2nl_p(data,l,ec);
-       p[sw]=l;
-       }
-
-void SHA_Transform(SHA_CTX *c, unsigned char *b)
-       {
-       SHA_LONG p[SHA_LBLOCK];
-
-#if SHA_LONG_LOG2==2
-#if defined(B_ENDIAN) || defined(SHA_ASM)
-       memcpy(p,b,SHA_CBLOCK);
-       sha_block(c,p,1);
-       return;
-#elif defined(L_ENDIAN)
-       if (((unsigned long)b%sizeof(SHA_LONG)) == 0)
-               {
-               SHA_LONG *q;
-               int i;
-
-               q=p;
-               for (i=(SHA_LBLOCK/4); i; i--)
-                       {
-                       unsigned long l;
-                       BE_COPY(q,b,0); /* BE_COPY was defined above */
-                       BE_COPY(q,b,1);
-                       BE_COPY(q,b,2);
-                       BE_COPY(q,b,3);
-                       q+=4;
-                       b+=4*sizeof(SHA_LONG);
-                       }
-               sha_block(c,p,1);
-               return;
-               }
-#endif
-#endif
-#ifndef SHA_NO_TAIL_CODE /* defined above, see comment */
-               {
-               SHA_LONG *q;
-               int i;
-
-               q=p;
-               for (i=(SHA_LBLOCK/4); i; i--)
-                       {
-                       SHA_LONG l;
-                       c2nl(b,l); *(q++)=l;
-                       c2nl(b,l); *(q++)=l;
-                       c2nl(b,l); *(q++)=l;
-                       c2nl(b,l); *(q++)=l; 
-                       } 
-               sha_block(c,p,1);
-               }
-#endif
-       }
-
-#ifndef SHA_ASM
-static void sha_block(SHA_CTX *c, register SHA_LONG *W, int num)
-       {
-       register SHA_LONG A,B,C,D,E,T;
-       SHA_LONG X[SHA_LBLOCK];
-
-       A=c->h0;
-       B=c->h1;
-       C=c->h2;
-       D=c->h3;
-       E=c->h4;
+#if !defined(NO_SHA0) && !defined(NO_SHA)
 
-       for (;;)
-               {
-       BODY_00_15( 0,A,B,C,D,E,T,W);
-       BODY_00_15( 1,T,A,B,C,D,E,W);
-       BODY_00_15( 2,E,T,A,B,C,D,W);
-       BODY_00_15( 3,D,E,T,A,B,C,W);
-       BODY_00_15( 4,C,D,E,T,A,B,W);
-       BODY_00_15( 5,B,C,D,E,T,A,W);
-       BODY_00_15( 6,A,B,C,D,E,T,W);
-       BODY_00_15( 7,T,A,B,C,D,E,W);
-       BODY_00_15( 8,E,T,A,B,C,D,W);
-       BODY_00_15( 9,D,E,T,A,B,C,W);
-       BODY_00_15(10,C,D,E,T,A,B,W);
-       BODY_00_15(11,B,C,D,E,T,A,W);
-       BODY_00_15(12,A,B,C,D,E,T,W);
-       BODY_00_15(13,T,A,B,C,D,E,W);
-       BODY_00_15(14,E,T,A,B,C,D,W);
-       BODY_00_15(15,D,E,T,A,B,C,W);
-       BODY_16_19(16,C,D,E,T,A,B,W,W,W,W);
-       BODY_16_19(17,B,C,D,E,T,A,W,W,W,W);
-       BODY_16_19(18,A,B,C,D,E,T,W,W,W,W);
-       BODY_16_19(19,T,A,B,C,D,E,W,W,W,X);
+#undef  SHA_1
+#define SHA_0
 
-       BODY_20_31(20,E,T,A,B,C,D,W,W,W,X);
-       BODY_20_31(21,D,E,T,A,B,C,W,W,W,X);
-       BODY_20_31(22,C,D,E,T,A,B,W,W,W,X);
-       BODY_20_31(23,B,C,D,E,T,A,W,W,W,X);
-       BODY_20_31(24,A,B,C,D,E,T,W,W,X,X);
-       BODY_20_31(25,T,A,B,C,D,E,W,W,X,X);
-       BODY_20_31(26,E,T,A,B,C,D,W,W,X,X);
-       BODY_20_31(27,D,E,T,A,B,C,W,W,X,X);
-       BODY_20_31(28,C,D,E,T,A,B,W,W,X,X);
-       BODY_20_31(29,B,C,D,E,T,A,W,W,X,X);
-       BODY_20_31(30,A,B,C,D,E,T,W,X,X,X);
-       BODY_20_31(31,T,A,B,C,D,E,W,X,X,X);
-       BODY_32_39(32,E,T,A,B,C,D,X);
-       BODY_32_39(33,D,E,T,A,B,C,X);
-       BODY_32_39(34,C,D,E,T,A,B,X);
-       BODY_32_39(35,B,C,D,E,T,A,X);
-       BODY_32_39(36,A,B,C,D,E,T,X);
-       BODY_32_39(37,T,A,B,C,D,E,X);
-       BODY_32_39(38,E,T,A,B,C,D,X);
-       BODY_32_39(39,D,E,T,A,B,C,X);
-
-       BODY_40_59(40,C,D,E,T,A,B,X);
-       BODY_40_59(41,B,C,D,E,T,A,X);
-       BODY_40_59(42,A,B,C,D,E,T,X);
-       BODY_40_59(43,T,A,B,C,D,E,X);
-       BODY_40_59(44,E,T,A,B,C,D,X);
-       BODY_40_59(45,D,E,T,A,B,C,X);
-       BODY_40_59(46,C,D,E,T,A,B,X);
-       BODY_40_59(47,B,C,D,E,T,A,X);
-       BODY_40_59(48,A,B,C,D,E,T,X);
-       BODY_40_59(49,T,A,B,C,D,E,X);
-       BODY_40_59(50,E,T,A,B,C,D,X);
-       BODY_40_59(51,D,E,T,A,B,C,X);
-       BODY_40_59(52,C,D,E,T,A,B,X);
-       BODY_40_59(53,B,C,D,E,T,A,X);
-       BODY_40_59(54,A,B,C,D,E,T,X);
-       BODY_40_59(55,T,A,B,C,D,E,X);
-       BODY_40_59(56,E,T,A,B,C,D,X);
-       BODY_40_59(57,D,E,T,A,B,C,X);
-       BODY_40_59(58,C,D,E,T,A,B,X);
-       BODY_40_59(59,B,C,D,E,T,A,X);
-
-       BODY_60_79(60,A,B,C,D,E,T,X);
-       BODY_60_79(61,T,A,B,C,D,E,X);
-       BODY_60_79(62,E,T,A,B,C,D,X);
-       BODY_60_79(63,D,E,T,A,B,C,X);
-       BODY_60_79(64,C,D,E,T,A,B,X);
-       BODY_60_79(65,B,C,D,E,T,A,X);
-       BODY_60_79(66,A,B,C,D,E,T,X);
-       BODY_60_79(67,T,A,B,C,D,E,X);
-       BODY_60_79(68,E,T,A,B,C,D,X);
-       BODY_60_79(69,D,E,T,A,B,C,X);
-       BODY_60_79(70,C,D,E,T,A,B,X);
-       BODY_60_79(71,B,C,D,E,T,A,X);
-       BODY_60_79(72,A,B,C,D,E,T,X);
-       BODY_60_79(73,T,A,B,C,D,E,X);
-       BODY_60_79(74,E,T,A,B,C,D,X);
-       BODY_60_79(75,D,E,T,A,B,C,X);
-       BODY_60_79(76,C,D,E,T,A,B,X);
-       BODY_60_79(77,B,C,D,E,T,A,X);
-       BODY_60_79(78,A,B,C,D,E,T,X);
-       BODY_60_79(79,T,A,B,C,D,E,X);
-       
-       c->h0=(c->h0+E)&0xffffffffL; 
-       c->h1=(c->h1+T)&0xffffffffL;
-       c->h2=(c->h2+A)&0xffffffffL;
-       c->h3=(c->h3+B)&0xffffffffL;
-       c->h4=(c->h4+C)&0xffffffffL;
-
-       if (--num <= 0) break;
-
-       A=c->h0;
-       B=c->h1;
-       C=c->h2;
-       D=c->h3;
-       E=c->h4;
+#include <openssl/opensslv.h>
 
-       W+=SHA_LBLOCK;  /* Note! This can happen only when sizeof(SHA_LONG)
-                        * is 4. Whenever it's not the actual case this
-                        * function is never called with num larger than 1
-                        * and we never advance down here.
-                        *                      <appro@fy.chalmers.se>
-                        */
-               }
-       }
-#endif
+char *SHA_version="SHA" OPENSSL_VERSION_PTEXT;
 
-void SHA_Final(unsigned char *md, SHA_CTX *c)
-       {
-       register int i,j;
-       register SHA_LONG l;
-       register SHA_LONG *p;
-       static unsigned char end[4]={0x80,0x00,0x00,0x00};
-       unsigned char *cp=end;
+#include "sha_locl.h"
 
-       /* c->num should definitly have room for at least one more byte. */
-       p=c->data;
-       j=c->num;
-       i=j>>2;
-#ifdef PURIFY
-       if ((j&0x03) == 0) p[i]=0;
-#endif
-       l=p[i];
-       M_p_c2nl(cp,l,j&0x03);
-       p[i]=l;
-       i++;
-       /* i is the next 'undefined word' */
-       if (c->num >= SHA_LAST_BLOCK)
-               {
-               for (; i<SHA_LBLOCK; i++)
-                       p[i]=0;
-               sha_block(c,p,1);
-               i=0;
-               }
-       for (; i<(SHA_LBLOCK-2); i++)
-               p[i]=0;
-       p[SHA_LBLOCK-2]=c->Nh;
-       p[SHA_LBLOCK-1]=c->Nl;
-#if SHA_LONG_LOG2==2
-#if !defined(B_ENDIAN) && defined(SHA_ASM)
-       Endian_Reverse32(p[SHA_LBLOCK-2]);
-       Endian_Reverse32(p[SHA_LBLOCK-1]);
 #endif
-#endif
-       sha_block(c,p,1);
-       cp=md;
-       l=c->h0; nl2c(l,cp);
-       l=c->h1; nl2c(l,cp);
-       l=c->h2; nl2c(l,cp);
-       l=c->h3; nl2c(l,cp);
-       l=c->h4; nl2c(l,cp);
 
-       c->num=0;
-       /* sha_block may be leaving some stuff on the stack
-        * but I'm not worried :-)
-       memset((void *)c,0,sizeof(SHA_CTX));
-        */
-       }
-#endif
index 6646a8915b757291e8b840329761018381e75aa7..6f64e57627570083938dfd1d81a4213ddfe7d90e 100644 (file)
 #include <string.h>
 
 #include <openssl/opensslconf.h>
-
-#ifdef undef
-/* one or the other needs to be defined */
-#ifndef SHA_1 /* FIPE 180-1 */
-#define SHA_0 /* FIPS 180   */
-#endif
-#endif
-
-#undef c2nl
-#define c2nl(c,l)      (l =(((unsigned long)(*((c)++)))<<24), \
-                        l|=(((unsigned long)(*((c)++)))<<16), \
-                        l|=(((unsigned long)(*((c)++)))<< 8), \
-                        l|=(((unsigned long)(*((c)++)))    ))
-
-#undef p_c2nl
-#define p_c2nl(c,l,n)  { \
-                       switch (n) { \
-                       case 0: l =((unsigned long)(*((c)++)))<<24; \
-                       case 1: l|=((unsigned long)(*((c)++)))<<16; \
-                       case 2: l|=((unsigned long)(*((c)++)))<< 8; \
-                       case 3: l|=((unsigned long)(*((c)++))); \
-                               } \
-                       }
-
-#undef c2nl_p
-/* NOTE the pointer is not incremented at the end of this */
-#define c2nl_p(c,l,n)  { \
-                       l=0; \
-                       (c)+=n; \
-                       switch (n) { \
-                       case 3: l =((unsigned long)(*(--(c))))<< 8; \
-                       case 2: l|=((unsigned long)(*(--(c))))<<16; \
-                       case 1: l|=((unsigned long)(*(--(c))))<<24; \
-                               } \
-                       }
-
-#undef p_c2nl_p
-#define p_c2nl_p(c,l,sc,len) { \
-                       switch (sc) \
-                               { \
-                       case 0: l =((unsigned long)(*((c)++)))<<24; \
-                               if (--len == 0) break; \
-                       case 1: l|=((unsigned long)(*((c)++)))<<16; \
-                               if (--len == 0) break; \
-                       case 2: l|=((unsigned long)(*((c)++)))<< 8; \
-                               } \
-                       }
-
-#undef nl2c
-#define nl2c(l,c)      (*((c)++)=(unsigned char)(((l)>>24)&0xff), \
-                        *((c)++)=(unsigned char)(((l)>>16)&0xff), \
-                        *((c)++)=(unsigned char)(((l)>> 8)&0xff), \
-                        *((c)++)=(unsigned char)(((l)    )&0xff))
-
-#undef c2l
-#define c2l(c,l)       (l =(((unsigned long)(*((c)++)))    ), \
-                        l|=(((unsigned long)(*((c)++)))<< 8), \
-                        l|=(((unsigned long)(*((c)++)))<<16), \
-                        l|=(((unsigned long)(*((c)++)))<<24))
-
-#undef p_c2l
-#define p_c2l(c,l,n)   { \
-                       switch (n) { \
-                       case 0: l =((unsigned long)(*((c)++))); \
-                       case 1: l|=((unsigned long)(*((c)++)))<< 8; \
-                       case 2: l|=((unsigned long)(*((c)++)))<<16; \
-                       case 3: l|=((unsigned long)(*((c)++)))<<24; \
-                               } \
-                       }
-
-#undef c2l_p
-/* NOTE the pointer is not incremented at the end of this */
-#define c2l_p(c,l,n)   { \
-                       l=0; \
-                       (c)+=n; \
-                       switch (n) { \
-                       case 3: l =((unsigned long)(*(--(c))))<<16; \
-                       case 2: l|=((unsigned long)(*(--(c))))<< 8; \
-                       case 1: l|=((unsigned long)(*(--(c)))); \
-                               } \
-                       }
-
-#undef p_c2l_p
-#define p_c2l_p(c,l,sc,len) { \
-                       switch (sc) \
-                               { \
-                       case 0: l =((unsigned long)(*((c)++))); \
-                               if (--len == 0) break; \
-                       case 1: l|=((unsigned long)(*((c)++)))<< 8; \
-                               if (--len == 0) break; \
-                       case 2: l|=((unsigned long)(*((c)++)))<<16; \
-                               } \
-                       }
-
-#undef l2c
-#define l2c(l,c)       (*((c)++)=(unsigned char)(((l)    )&0xff), \
-                        *((c)++)=(unsigned char)(((l)>> 8)&0xff), \
-                        *((c)++)=(unsigned char)(((l)>>16)&0xff), \
-                        *((c)++)=(unsigned char)(((l)>>24)&0xff))
+#include <openssl/sha.h>
 
 #ifndef SHA_LONG_LOG2
 #define SHA_LONG_LOG2  2       /* default to 32 bits */
 #endif
 
-#undef ROTATE
-#undef Endian_Reverse32
-#if defined(WIN32)
-#define ROTATE(a,n)     _lrotl(a,n)
-#elif defined(__GNUC__) && !defined(PEDANTIC)
-/* some inline assembler templates by <appro@fy.chalmers.se> */
-#if defined(__i386) && !defined(NO_ASM)
-#define ROTATE(a,n)    ({ register unsigned int ret;   \
-                               asm ("roll %1,%0"       \
-                               : "=r"(ret)             \
-                               : "I"(n), "0"(a)        \
-                               : "cc");                \
-                          ret;                         \
-                       })
-#ifndef I386_ONLY
-#define Endian_Reverse32(a) \
-                       { register unsigned int ltmp=(a);       \
-                               asm ("bswapl %0"        \
-                               : "=r"(ltmp) : "0"(ltmp));      \
-                         (a)=ltmp;                     \
-                       }
-#endif
-#elif defined(__powerpc)
-#define ROTATE(a,n)    ({ register unsigned int ret;           \
-                               asm ("rlwinm %0,%1,%2,0,31"     \
-                               : "=r"(ret)                     \
-                               : "r"(a), "I"(n));              \
-                          ret;                                 \
-                       })
-/* Endian_Reverse32 is not needed for PowerPC */
-#endif
-#endif
+#define DATA_ORDER_IS_BIG_ENDIAN
+
+#define HASH_LONG               SHA_LONG
+#define HASH_LONG_LOG2          SHA_LONG_LOG2
+#define HASH_CTX                SHA_CTX
+#define HASH_CBLOCK             SHA_CBLOCK
+#define HASH_LBLOCK             SHA_LBLOCK
+#define HASH_MAKE_STRING(c,s)   do {   \
+       unsigned long l;                \
+       l=(c)->h0; HOST_l2c(l,(s));     \
+       l=(c)->h1; HOST_l2c(l,(s));     \
+       l=(c)->h2; HOST_l2c(l,(s));     \
+       l=(c)->h3; HOST_l2c(l,(s));     \
+       l=(c)->h4; HOST_l2c(l,(s));     \
+       } while (0)
+
+#if defined(SHA_0)
+
+# define HASH_UPDATE                   SHA_Update
+# define HASH_TRANSFORM                SHA_Transform
+# define HASH_FINAL                    SHA_Final
+# define HASH_INIT                     SHA_Init
+# define HASH_BLOCK_HOST_ORDER         sha_block_host_order
+# define HASH_BLOCK_DATA_ORDER         sha_block_data_order
+# define Xupdate(a,ix,ia,ib,ic,id)     (ix=(a)=(ia^ib^ic^id))
+
+  void sha_block_host_order (SHA_CTX *c, const void *p,int num);
+  void sha_block_data_order (SHA_CTX *c, const void *p,int num);
+
+#elif defined(SHA_1)
+
+# define HASH_UPDATE                   SHA1_Update
+# define HASH_TRANSFORM                SHA1_Transform
+# define HASH_FINAL                    SHA1_Final
+# define HASH_INIT                     SHA1_Init
+# define HASH_BLOCK_HOST_ORDER         sha1_block_host_order
+# define HASH_BLOCK_DATA_ORDER         sha1_block_data_order
+# define Xupdate(a,ix,ia,ib,ic,id)     ( (a)=(ia^ib^ic^id),    \
+                                         ix=(a)=ROTATE((a),1)  \
+                                       )
+
+# ifdef SHA1_ASM
+#  if defined(__i386) || defined(_M_IX86) || defined(__INTEL__)
+#   define sha1_block_host_order               sha1_block_asm_host_order
+#   define DONT_IMPLEMENT_BLOCK_HOST_ORDER
+#   define sha1_block_data_order               sha1_block_asm_data_order
+#   define DONT_IMPLEMENT_BLOCK_DATA_ORDER
+#   define HASH_BLOCK_DATA_ORDER_ALIGNED       sha1_block_asm_data_order
+#  endif
+# endif
+  void sha1_block_host_order (SHA_CTX *c, const void *p,int num);
+  void sha1_block_data_order (SHA_CTX *c, const void *p,int num);
 
-/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
-#ifdef ROTATE
-#ifndef Endian_Reverse32
-/* 5 instructions with rotate instruction, else 9 */
-#define Endian_Reverse32(a) \
-       { \
-       unsigned long t=(a); \
-       (a)=((ROTATE(t,8)&0x00FF00FF)|(ROTATE((t&0x00FF00FF),24))); \
-       }
-#endif
 #else
-#define ROTATE(a,n)     (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
-#ifndef Endian_Reverse32
-/* 6 instructions with rotate instruction, else 8 */
-#define Endian_Reverse32(a) \
-       { \
-       unsigned long t=(a); \
-       t=(((t>>8)&0x00FF00FF)|((t&0x00FF00FF)<<8)); \
-       (a)=ROTATE(t,16); \
-       }
+# error "Either SHA_0 or SHA_1 must be defined."
 #endif
-/*
- * Originally the middle line started with l=(((l&0xFF00FF00)>>8)|...
- * It's rewritten as above for two reasons:
- *     - RISCs aren't good at long constants and have to explicitely
- *       compose 'em with several (well, usually 2) instructions in a
- *       register before performing the actual operation and (as you
- *       already realized:-) having same constant should inspire the
- *       compiler to permanently allocate the only register for it;
- *     - most modern CPUs have two ALUs, but usually only one has
- *       circuitry for shifts:-( this minor tweak inspires compiler
- *       to schedule shift instructions in a better way...
- *
- *                             <appro@fy.chalmers.se>
- */
+
+#ifndef FLAT_INC
+#include "../md32_common.h"
+#else
+#include "md32_common.h"
 #endif
 
+#define INIT_DATA_h0 0x67452301UL
+#define INIT_DATA_h1 0xefcdab89UL
+#define INIT_DATA_h2 0x98badcfeUL
+#define INIT_DATA_h3 0x10325476UL
+#define INIT_DATA_h4 0xc3d2e1f0UL
+
+void HASH_INIT (SHA_CTX *c)
+       {
+       c->h0=INIT_DATA_h0;
+       c->h1=INIT_DATA_h1;
+       c->h2=INIT_DATA_h2;
+       c->h3=INIT_DATA_h3;
+       c->h4=INIT_DATA_h4;
+       c->Nl=0;
+       c->Nh=0;
+       c->num=0;
+       }
+
+#define K_00_19        0x5a827999UL
+#define K_20_39 0x6ed9eba1UL
+#define K_40_59 0x8f1bbcdcUL
+#define K_60_79 0xca62c1d6UL
+
 /* As  pointed out by Wei Dai <weidai@eskimo.com>, F() below can be
  * simplified to the code in F_00_19.  Wei attributes these optimisations
  * to Peter Gutmann's SHS code, and he attributes it to Rich Schroeppel.
 #define F_40_59(b,c,d) (((b) & (c)) | (((b)|(c)) & (d))) 
 #define        F_60_79(b,c,d)  F_20_39(b,c,d)
 
-#undef Xupdate
-#ifdef SHA_0
-#define Xupdate(a,i,ia,ib,ic,id) X[(i)&0x0f]=(a)=\
-       (ia[(i)&0x0f]^ib[((i)+2)&0x0f]^ic[((i)+8)&0x0f]^id[((i)+13)&0x0f]);
-#endif
-#ifdef SHA_1
-#define Xupdate(a,i,ia,ib,ic,id) (a)=\
-       (ia[(i)&0x0f]^ib[((i)+2)&0x0f]^ic[((i)+8)&0x0f]^id[((i)+13)&0x0f]);\
-       X[(i)&0x0f]=(a)=ROTATE((a),1);
-#endif
-
-#define BODY_00_15(i,a,b,c,d,e,f,xa) \
-       (f)=xa[i]+(e)+K_00_19+ROTATE((a),5)+F_00_19((b),(c),(d)); \
+#define BODY_00_15(i,a,b,c,d,e,f,xi) \
+       (f)=xi+(e)+K_00_19+ROTATE((a),5)+F_00_19((b),(c),(d)); \
        (b)=ROTATE((b),30);
 
-#define BODY_16_19(i,a,b,c,d,e,f,xa,xb,xc,xd) \
-       Xupdate(f,i,xa,xb,xc,xd); \
+#define BODY_16_19(i,a,b,c,d,e,f,xi,xa,xb,xc,xd) \
+       Xupdate(f,xi,xa,xb,xc,xd); \
        (f)+=(e)+K_00_19+ROTATE((a),5)+F_00_19((b),(c),(d)); \
        (b)=ROTATE((b),30);
 
-#define BODY_20_31(i,a,b,c,d,e,f,xa,xb,xc,xd) \
-       Xupdate(f,i,xa,xb,xc,xd); \
+#define BODY_20_31(i,a,b,c,d,e,f,xi,xa,xb,xc,xd) \
+       Xupdate(f,xi,xa,xb,xc,xd); \
        (f)+=(e)+K_20_39+ROTATE((a),5)+F_20_39((b),(c),(d)); \
        (b)=ROTATE((b),30);
 
-#define BODY_32_39(i,a,b,c,d,e,f,xa) \
-       Xupdate(f,i,xa,xa,xa,xa); \
+#define BODY_32_39(i,a,b,c,d,e,f,xa,xb,xc,xd) \
+       Xupdate(f,xa,xa,xb,xc,xd); \
        (f)+=(e)+K_20_39+ROTATE((a),5)+F_20_39((b),(c),(d)); \
        (b)=ROTATE((b),30);
 
-#define BODY_40_59(i,a,b,c,d,e,f,xa) \
-       Xupdate(f,i,xa,xa,xa,xa); \
+#define BODY_40_59(i,a,b,c,d,e,f,xa,xb,xc,xd) \
+       Xupdate(f,xa,xa,xb,xc,xd); \
        (f)+=(e)+K_40_59+ROTATE((a),5)+F_40_59((b),(c),(d)); \
        (b)=ROTATE((b),30);
 
-#define BODY_60_79(i,a,b,c,d,e,f,xa) \
-       Xupdate(f,i,xa,xa,xa,xa); \
-       (f)=X[(i)&0x0f]+(e)+K_60_79+ROTATE((a),5)+F_60_79((b),(c),(d)); \
+#define BODY_60_79(i,a,b,c,d,e,f,xa,xb,xc,xd) \
+       Xupdate(f,xa,xa,xb,xc,xd); \
+       (f)=xa+(e)+K_60_79+ROTATE((a),5)+F_60_79((b),(c),(d)); \
        (b)=ROTATE((b),30);
 
+#ifndef DONT_IMPLEMENT_BLOCK_HOST_ORDER
+void HASH_BLOCK_HOST_ORDER (SHA_CTX *c, const void *d, int num)
+       {
+       const SHA_LONG *W=d;
+       register unsigned long A,B,C,D,E,T;
+#ifdef SHA_XARRAY
+       SHA_LONG        X[16];
+# define X(i) X[(i)]
+#else
+       unsigned long    X0, X1, X2, X3, X4, X5, X6, X7,
+                        X8, X9,X10,X11,X12,X13,X14,X15;
+# define X(i) X##i
+#endif
+
+       A=c->h0;
+       B=c->h1;
+       C=c->h2;
+       D=c->h3;
+       E=c->h4;
+
+       for (;;)
+               {
+       BODY_00_15( 0,A,B,C,D,E,T,W[ 0]);
+       BODY_00_15( 1,T,A,B,C,D,E,W[ 1]);
+       BODY_00_15( 2,E,T,A,B,C,D,W[ 2]);
+       BODY_00_15( 3,D,E,T,A,B,C,W[ 3]);
+       BODY_00_15( 4,C,D,E,T,A,B,W[ 4]);
+       BODY_00_15( 5,B,C,D,E,T,A,W[ 5]);
+       BODY_00_15( 6,A,B,C,D,E,T,W[ 6]);
+       BODY_00_15( 7,T,A,B,C,D,E,W[ 7]);
+       BODY_00_15( 8,E,T,A,B,C,D,W[ 8]);
+       BODY_00_15( 9,D,E,T,A,B,C,W[ 9]);
+       BODY_00_15(10,C,D,E,T,A,B,W[10]);
+       BODY_00_15(11,B,C,D,E,T,A,W[11]);
+       BODY_00_15(12,A,B,C,D,E,T,W[12]);
+       BODY_00_15(13,T,A,B,C,D,E,W[13]);
+       BODY_00_15(14,E,T,A,B,C,D,W[14]);
+       BODY_00_15(15,D,E,T,A,B,C,W[15]);
+
+       BODY_16_19(16,C,D,E,T,A,B,X( 0),W[ 0],W[ 2],W[ 8],W[13]);
+       BODY_16_19(17,B,C,D,E,T,A,X( 1),W[ 1],W[ 3],W[ 9],W[14]);
+       BODY_16_19(18,A,B,C,D,E,T,X( 2),W[ 2],W[ 4],W[10],W[15]);
+       BODY_16_19(19,T,A,B,C,D,E,X( 3),W[ 3],W[ 5],W[11],X( 0));
+
+       BODY_20_31(20,E,T,A,B,C,D,X( 4),W[ 4],W[ 6],W[12],X( 1));
+       BODY_20_31(21,D,E,T,A,B,C,X( 5),W[ 5],W[ 7],W[13],X( 2));
+       BODY_20_31(22,C,D,E,T,A,B,X( 6),W[ 6],W[ 8],W[14],X( 3));
+       BODY_20_31(23,B,C,D,E,T,A,X( 7),W[ 7],W[ 9],W[15],X( 4));
+       BODY_20_31(24,A,B,C,D,E,T,X( 8),W[ 8],W[10],X( 0),X( 5));
+       BODY_20_31(25,T,A,B,C,D,E,X( 9),W[ 9],W[11],X( 1),X( 6));
+       BODY_20_31(26,E,T,A,B,C,D,X(10),W[10],W[12],X( 2),X( 7));
+       BODY_20_31(27,D,E,T,A,B,C,X(11),W[11],W[13],X( 3),X( 8));
+       BODY_20_31(28,C,D,E,T,A,B,X(12),W[12],W[14],X( 4),X( 9));
+       BODY_20_31(29,B,C,D,E,T,A,X(13),W[13],W[15],X( 5),X(10));
+       BODY_20_31(30,A,B,C,D,E,T,X(14),W[14],X( 0),X( 6),X(11));
+       BODY_20_31(31,T,A,B,C,D,E,X(15),W[15],X( 1),X( 7),X(12));
+
+       BODY_32_39(32,E,T,A,B,C,D,X( 0),X( 2),X( 8),X(13));
+       BODY_32_39(33,D,E,T,A,B,C,X( 1),X( 3),X( 9),X(14));
+       BODY_32_39(34,C,D,E,T,A,B,X( 2),X( 4),X(10),X(15));
+       BODY_32_39(35,B,C,D,E,T,A,X( 3),X( 5),X(11),X( 0));
+       BODY_32_39(36,A,B,C,D,E,T,X( 4),X( 6),X(12),X( 1));
+       BODY_32_39(37,T,A,B,C,D,E,X( 5),X( 7),X(13),X( 2));
+       BODY_32_39(38,E,T,A,B,C,D,X( 6),X( 8),X(14),X( 3));
+       BODY_32_39(39,D,E,T,A,B,C,X( 7),X( 9),X(15),X( 4));
+
+       BODY_40_59(40,C,D,E,T,A,B,X( 8),X(10),X( 0),X( 5));
+       BODY_40_59(41,B,C,D,E,T,A,X( 9),X(11),X( 1),X( 6));
+       BODY_40_59(42,A,B,C,D,E,T,X(10),X(12),X( 2),X( 7));
+       BODY_40_59(43,T,A,B,C,D,E,X(11),X(13),X( 3),X( 8));
+       BODY_40_59(44,E,T,A,B,C,D,X(12),X(14),X( 4),X( 9));
+       BODY_40_59(45,D,E,T,A,B,C,X(13),X(15),X( 5),X(10));
+       BODY_40_59(46,C,D,E,T,A,B,X(14),X( 0),X( 6),X(11));
+       BODY_40_59(47,B,C,D,E,T,A,X(15),X( 1),X( 7),X(12));
+       BODY_40_59(48,A,B,C,D,E,T,X( 0),X( 2),X( 8),X(13));
+       BODY_40_59(49,T,A,B,C,D,E,X( 1),X( 3),X( 9),X(14));
+       BODY_40_59(50,E,T,A,B,C,D,X( 2),X( 4),X(10),X(15));
+       BODY_40_59(51,D,E,T,A,B,C,X( 3),X( 5),X(11),X( 0));
+       BODY_40_59(52,C,D,E,T,A,B,X( 4),X( 6),X(12),X( 1));
+       BODY_40_59(53,B,C,D,E,T,A,X( 5),X( 7),X(13),X( 2));
+       BODY_40_59(54,A,B,C,D,E,T,X( 6),X( 8),X(14),X( 3));
+       BODY_40_59(55,T,A,B,C,D,E,X( 7),X( 9),X(15),X( 4));
+       BODY_40_59(56,E,T,A,B,C,D,X( 8),X(10),X( 0),X( 5));
+       BODY_40_59(57,D,E,T,A,B,C,X( 9),X(11),X( 1),X( 6));
+       BODY_40_59(58,C,D,E,T,A,B,X(10),X(12),X( 2),X( 7));
+       BODY_40_59(59,B,C,D,E,T,A,X(11),X(13),X( 3),X( 8));
+
+       BODY_60_79(60,A,B,C,D,E,T,X(12),X(14),X( 4),X( 9));
+       BODY_60_79(61,T,A,B,C,D,E,X(13),X(15),X( 5),X(10));
+       BODY_60_79(62,E,T,A,B,C,D,X(14),X( 0),X( 6),X(11));
+       BODY_60_79(63,D,E,T,A,B,C,X(15),X( 1),X( 7),X(12));
+       BODY_60_79(64,C,D,E,T,A,B,X( 0),X( 2),X( 8),X(13));
+       BODY_60_79(65,B,C,D,E,T,A,X( 1),X( 3),X( 9),X(14));
+       BODY_60_79(66,A,B,C,D,E,T,X( 2),X( 4),X(10),X(15));
+       BODY_60_79(67,T,A,B,C,D,E,X( 3),X( 5),X(11),X( 0));
+       BODY_60_79(68,E,T,A,B,C,D,X( 4),X( 6),X(12),X( 1));
+       BODY_60_79(69,D,E,T,A,B,C,X( 5),X( 7),X(13),X( 2));
+       BODY_60_79(70,C,D,E,T,A,B,X( 6),X( 8),X(14),X( 3));
+       BODY_60_79(71,B,C,D,E,T,A,X( 7),X( 9),X(15),X( 4));
+       BODY_60_79(72,A,B,C,D,E,T,X( 8),X(10),X( 0),X( 5));
+       BODY_60_79(73,T,A,B,C,D,E,X( 9),X(11),X( 1),X( 6));
+       BODY_60_79(74,E,T,A,B,C,D,X(10),X(12),X( 2),X( 7));
+       BODY_60_79(75,D,E,T,A,B,C,X(11),X(13),X( 3),X( 8));
+       BODY_60_79(76,C,D,E,T,A,B,X(12),X(14),X( 4),X( 9));
+       BODY_60_79(77,B,C,D,E,T,A,X(13),X(15),X( 5),X(10));
+       BODY_60_79(78,A,B,C,D,E,T,X(14),X( 0),X( 6),X(11));
+       BODY_60_79(79,T,A,B,C,D,E,X(15),X( 1),X( 7),X(12));
+       
+       c->h0=(c->h0+E)&0xffffffffL; 
+       c->h1=(c->h1+T)&0xffffffffL;
+       c->h2=(c->h2+A)&0xffffffffL;
+       c->h3=(c->h3+B)&0xffffffffL;
+       c->h4=(c->h4+C)&0xffffffffL;
+
+       if (--num <= 0) break;
+
+       A=c->h0;
+       B=c->h1;
+       C=c->h2;
+       D=c->h3;
+       E=c->h4;
+
+       W+=SHA_LBLOCK;
+               }
+       }
+#endif
+
+#ifndef DONT_IMPLEMENT_BLOCK_DATA_ORDER
+void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, int num)
+       {
+       const unsigned char *data=p;
+       register unsigned long A,B,C,D,E,T,l;
+#ifdef SHA_XARRAY
+       SHA_LONG        X[16];
+# define X(i) X[(i)]
+#else
+       unsigned long    X0, X1, X2, X3, X4, X5, X6, X7,
+                        X8, X9,X10,X11,X12,X13,X14,X15;
+# define X(i) X##i
+#endif
+
+       A=c->h0;
+       B=c->h1;
+       C=c->h2;
+       D=c->h3;
+       E=c->h4;
+
+       for (;;)
+               {
+
+       HOST_c2l(data,l); X( 0)=l;              HOST_c2l(data,l); X( 1)=l;
+       BODY_00_15( 0,A,B,C,D,E,T,X( 0));       HOST_c2l(data,l); X( 2)=l;
+       BODY_00_15( 1,T,A,B,C,D,E,X( 1));       HOST_c2l(data,l); X( 3)=l;
+       BODY_00_15( 2,E,T,A,B,C,D,X( 2));       HOST_c2l(data,l); X( 4)=l;
+       BODY_00_15( 3,D,E,T,A,B,C,X( 3));       HOST_c2l(data,l); X( 5)=l;
+       BODY_00_15( 4,C,D,E,T,A,B,X( 4));       HOST_c2l(data,l); X( 6)=l;
+       BODY_00_15( 5,B,C,D,E,T,A,X( 5));       HOST_c2l(data,l); X( 7)=l;
+       BODY_00_15( 6,A,B,C,D,E,T,X( 6));       HOST_c2l(data,l); X( 8)=l;
+       BODY_00_15( 7,T,A,B,C,D,E,X( 7));       HOST_c2l(data,l); X( 9)=l;
+       BODY_00_15( 8,E,T,A,B,C,D,X( 8));       HOST_c2l(data,l); X(10)=l;
+       BODY_00_15( 9,D,E,T,A,B,C,X( 9));       HOST_c2l(data,l); X(11)=l;
+       BODY_00_15(10,C,D,E,T,A,B,X(10));       HOST_c2l(data,l); X(12)=l;
+       BODY_00_15(11,B,C,D,E,T,A,X(11));       HOST_c2l(data,l); X(13)=l;
+       BODY_00_15(12,A,B,C,D,E,T,X(12));       HOST_c2l(data,l); X(14)=l;
+       BODY_00_15(13,T,A,B,C,D,E,X(13));       HOST_c2l(data,l); X(15)=l;
+       BODY_00_15(14,E,T,A,B,C,D,X(14));
+       BODY_00_15(15,D,E,T,A,B,C,X(15));
+
+       BODY_16_19(16,C,D,E,T,A,B,X( 0),X( 0),X( 2),X( 8),X(13));
+       BODY_16_19(17,B,C,D,E,T,A,X( 1),X( 1),X( 3),X( 9),X(14));
+       BODY_16_19(18,A,B,C,D,E,T,X( 2),X( 2),X( 4),X(10),X(15));
+       BODY_16_19(19,T,A,B,C,D,E,X( 3),X( 3),X( 5),X(11),X( 0));
+
+       BODY_20_31(20,E,T,A,B,C,D,X( 4),X( 4),X( 6),X(12),X( 1));
+       BODY_20_31(21,D,E,T,A,B,C,X( 5),X( 5),X( 7),X(13),X( 2));
+       BODY_20_31(22,C,D,E,T,A,B,X( 6),X( 6),X( 8),X(14),X( 3));
+       BODY_20_31(23,B,C,D,E,T,A,X( 7),X( 7),X( 9),X(15),X( 4));
+       BODY_20_31(24,A,B,C,D,E,T,X( 8),X( 8),X(10),X( 0),X( 5));
+       BODY_20_31(25,T,A,B,C,D,E,X( 9),X( 9),X(11),X( 1),X( 6));
+       BODY_20_31(26,E,T,A,B,C,D,X(10),X(10),X(12),X( 2),X( 7));
+       BODY_20_31(27,D,E,T,A,B,C,X(11),X(11),X(13),X( 3),X( 8));
+       BODY_20_31(28,C,D,E,T,A,B,X(12),X(12),X(14),X( 4),X( 9));
+       BODY_20_31(29,B,C,D,E,T,A,X(13),X(13),X(15),X( 5),X(10));
+       BODY_20_31(30,A,B,C,D,E,T,X(14),X(14),X( 0),X( 6),X(11));
+       BODY_20_31(31,T,A,B,C,D,E,X(15),X(15),X( 1),X( 7),X(12));
+
+       BODY_32_39(32,E,T,A,B,C,D,X( 0),X( 2),X( 8),X(13));
+       BODY_32_39(33,D,E,T,A,B,C,X( 1),X( 3),X( 9),X(14));
+       BODY_32_39(34,C,D,E,T,A,B,X( 2),X( 4),X(10),X(15));
+       BODY_32_39(35,B,C,D,E,T,A,X( 3),X( 5),X(11),X( 0));
+       BODY_32_39(36,A,B,C,D,E,T,X( 4),X( 6),X(12),X( 1));
+       BODY_32_39(37,T,A,B,C,D,E,X( 5),X( 7),X(13),X( 2));
+       BODY_32_39(38,E,T,A,B,C,D,X( 6),X( 8),X(14),X( 3));
+       BODY_32_39(39,D,E,T,A,B,C,X( 7),X( 9),X(15),X( 4));
+
+       BODY_40_59(40,C,D,E,T,A,B,X( 8),X(10),X( 0),X( 5));
+       BODY_40_59(41,B,C,D,E,T,A,X( 9),X(11),X( 1),X( 6));
+       BODY_40_59(42,A,B,C,D,E,T,X(10),X(12),X( 2),X( 7));
+       BODY_40_59(43,T,A,B,C,D,E,X(11),X(13),X( 3),X( 8));
+       BODY_40_59(44,E,T,A,B,C,D,X(12),X(14),X( 4),X( 9));
+       BODY_40_59(45,D,E,T,A,B,C,X(13),X(15),X( 5),X(10));
+       BODY_40_59(46,C,D,E,T,A,B,X(14),X( 0),X( 6),X(11));
+       BODY_40_59(47,B,C,D,E,T,A,X(15),X( 1),X( 7),X(12));
+       BODY_40_59(48,A,B,C,D,E,T,X( 0),X( 2),X( 8),X(13));
+       BODY_40_59(49,T,A,B,C,D,E,X( 1),X( 3),X( 9),X(14));
+       BODY_40_59(50,E,T,A,B,C,D,X( 2),X( 4),X(10),X(15));
+       BODY_40_59(51,D,E,T,A,B,C,X( 3),X( 5),X(11),X( 0));
+       BODY_40_59(52,C,D,E,T,A,B,X( 4),X( 6),X(12),X( 1));
+       BODY_40_59(53,B,C,D,E,T,A,X( 5),X( 7),X(13),X( 2));
+       BODY_40_59(54,A,B,C,D,E,T,X( 6),X( 8),X(14),X( 3));
+       BODY_40_59(55,T,A,B,C,D,E,X( 7),X( 9),X(15),X( 4));
+       BODY_40_59(56,E,T,A,B,C,D,X( 8),X(10),X( 0),X( 5));
+       BODY_40_59(57,D,E,T,A,B,C,X( 9),X(11),X( 1),X( 6));
+       BODY_40_59(58,C,D,E,T,A,B,X(10),X(12),X( 2),X( 7));
+       BODY_40_59(59,B,C,D,E,T,A,X(11),X(13),X( 3),X( 8));
+
+       BODY_60_79(60,A,B,C,D,E,T,X(12),X(14),X( 4),X( 9));
+       BODY_60_79(61,T,A,B,C,D,E,X(13),X(15),X( 5),X(10));
+       BODY_60_79(62,E,T,A,B,C,D,X(14),X( 0),X( 6),X(11));
+       BODY_60_79(63,D,E,T,A,B,C,X(15),X( 1),X( 7),X(12));
+       BODY_60_79(64,C,D,E,T,A,B,X( 0),X( 2),X( 8),X(13));
+       BODY_60_79(65,B,C,D,E,T,A,X( 1),X( 3),X( 9),X(14));
+       BODY_60_79(66,A,B,C,D,E,T,X( 2),X( 4),X(10),X(15));
+       BODY_60_79(67,T,A,B,C,D,E,X( 3),X( 5),X(11),X( 0));
+       BODY_60_79(68,E,T,A,B,C,D,X( 4),X( 6),X(12),X( 1));
+       BODY_60_79(69,D,E,T,A,B,C,X( 5),X( 7),X(13),X( 2));
+       BODY_60_79(70,C,D,E,T,A,B,X( 6),X( 8),X(14),X( 3));
+       BODY_60_79(71,B,C,D,E,T,A,X( 7),X( 9),X(15),X( 4));
+       BODY_60_79(72,A,B,C,D,E,T,X( 8),X(10),X( 0),X( 5));
+       BODY_60_79(73,T,A,B,C,D,E,X( 9),X(11),X( 1),X( 6));
+       BODY_60_79(74,E,T,A,B,C,D,X(10),X(12),X( 2),X( 7));
+       BODY_60_79(75,D,E,T,A,B,C,X(11),X(13),X( 3),X( 8));
+       BODY_60_79(76,C,D,E,T,A,B,X(12),X(14),X( 4),X( 9));
+       BODY_60_79(77,B,C,D,E,T,A,X(13),X(15),X( 5),X(10));
+       BODY_60_79(78,A,B,C,D,E,T,X(14),X( 0),X( 6),X(11));
+       BODY_60_79(79,T,A,B,C,D,E,X(15),X( 1),X( 7),X(12));
+       
+       c->h0=(c->h0+E)&0xffffffffL; 
+       c->h1=(c->h1+T)&0xffffffffL;
+       c->h2=(c->h2+A)&0xffffffffL;
+       c->h3=(c->h3+B)&0xffffffffL;
+       c->h4=(c->h4+C)&0xffffffffL;
+
+       if (--num <= 0) break;
+
+       A=c->h0;
+       B=c->h1;
+       C=c->h2;
+       D=c->h3;
+       E=c->h4;
+
+               }
+       }
+#endif