SHA-1 cleanups and performance enhancements.

author Ulf Möller <ulf@openssl.org>

Wed, 5 May 1999 00:23:53 +0000 (00:23 +0000)

committer Ulf Möller <ulf@openssl.org>

Wed, 5 May 1999 00:23:53 +0000 (00:23 +0000)
author Ulf Möller <ulf@openssl.org>
Wed, 5 May 1999 00:23:53 +0000 (00:23 +0000)
committer Ulf Möller <ulf@openssl.org>
Wed, 5 May 1999 00:23:53 +0000 (00:23 +0000)
diff --git a/CHANGES b/CHANGES

index 8f18223c7154c43b6a4e934308552832dbc01cca..519dca970ce258011107c558e6f286550be8992d 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -5,8 +5,11 @@
  
   Changes between 0.9.2b and 0.9.3
  
+  *) SHA-1 cleanups and performance enhancements.
+     [Andy Polyakov <appro@fy.chalmers.se>]
+
    *) Sparc v8plus assembler for the bignum library.
-    [Andy Polyakov <appro@fy.chalmers.se>]
+     [Andy Polyakov <appro@fy.chalmers.se>]
  
    *) Accept any -xxx and +xxx compiler options in Configure.
       [Ulf Möller]
diff --git a/Configure b/Configure

index eff6b1e42ba17ccbaff858852be8cbd3654d6da8..f4e97b4b9abcc844673a65415f82ce1fc061d53a 100755 (executable)
--- a/Configure
+++ b/Configure
@@ -587,6 +587,9 @@ while (<IN>)
                 { printf OUT "#define RC4_INT unsigned %s\n",$type[$rc4_int]; }
         elsif   (/^#((define)|(undef))\s+RC4_INDEX/)
                 { printf OUT "#%s RC4_INDEX\n",($rc4_idx)?"define":"undef"; }
+       elsif (/^#(define|undef)\s+I386_ONLY/)
+               { printf OUT "#%s I386_ONLY\n", ($processor == 386)?
+                       "define":"undef"; }
         elsif   (/^#define\s+MD2_INT\s/)
                 { printf OUT "#define MD2_INT unsigned %s\n",$type[$md2_int]; }
         elsif   (/^#define\s+IDEA_INT\s/)
diff --git a/crypto/opensslconf.h.in b/crypto/opensslconf.h.in

index cd05361eb82c6f89bb1aa3354862dbe66f2b8355..4e28f3e666d5c6c461ad82c50c28227b7d8bc828 100644 (file)
--- a/crypto/opensslconf.h.in
+++ b/crypto/opensslconf.h.in
@@ -1,6 +1,9 @@
  /* crypto/opensslconf.h */
  /* WARNING: This file is autogenerated by Configure */
  
+/* Generate 80386 code? */
+#undef I386_ONLY
+
  #if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR)
  #define OPENSSLDIR "/usr/local/ssl"
  #endif
@@ -34,7 +37,7 @@
  
  #if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H)
  #define CONFIG_HEADER_BN_H
-#define BN_LLONG
+#undef BN_LLONG
  
  /* Should we define BN_DIV2W here? */
  
@@ -53,7 +56,7 @@
  #define CONFIG_HEADER_RC4_LOCL_H
  /* if this is defined data[i] is used instead of *data, this is a %20
   * speedup on x86 */
-#define RC4_INDEX
+#undef RC4_INDEX
  #endif
  
  #if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H)
@@ -67,14 +70,14 @@
  /* the following is tweaked from a config script, that is why it is a
   * protected undef/define */
  #ifndef DES_PTR
-#define DES_PTR
+#undef DES_PTR
  #endif
  
  /* This helps C compiler generate the correct code for multiple functional
   * units.  It reduces register dependancies at the expense of 2 more
   * registers */
  #ifndef DES_RISC1
-#define DES_RISC1
+#undef DES_RISC1
  #endif
  
  #ifndef DES_RISC2
@@ -88,7 +91,7 @@ YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!!
  /* Unroll the inner loop, this sometimes helps, sometimes hinders.
   * Very mucy CPU dependant */
  #ifndef DES_UNROLL
-#define DES_UNROLL
+#undef DES_UNROLL
  #endif
  
  /* These default values were supplied by
diff --git a/crypto/sha/sha.h b/crypto/sha/sha.h

index ba40aafc135b58368913c88a05b13fff17430c5b..cd6960ee1a3d0a8df475eaaf4a178b53afc58c00 100644 (file)
--- a/crypto/sha/sha.h
+++ b/crypto/sha/sha.h
@@ -67,18 +67,28 @@ extern "C" {
  #error SHA is disabled.
  #endif
  
-#define SHA_CBLOCK     64
-#define SHA_LBLOCK     16
-#define SHA_BLOCK      16
-#define SHA_LAST_BLOCK  56
-#define SHA_LENGTH_BLOCK 8
-#define SHA_DIGEST_LENGTH 20
+/*
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ * ! SHA_LONG has to be at least 32 bits wide. If it's wider, then !
+ * ! SHA_LONG_LOG2 has to be defined along.                        !
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ */
  
-#ifdef WIN16
+#if defined(WIN16) || defined(__LP32__)
+#define SHA_LONG unsigned long
+#elif defined(_CRAY) || defined(__ILP64__)
  #define SHA_LONG unsigned long
+#define SHA_LONG_LOG2 3
  #else
  #define SHA_LONG unsigned int
-#endif 
+#endif
+
+#define SHA_LBLOCK     16
+#define SHA_CBLOCK     (SHA_LBLOCK*4)  /* SHA treats input data as a
+                                        * contiguous array of 32 bit
+                                        * wide big-endian values. */
+#define SHA_LAST_BLOCK  (SHA_CBLOCK-8)
+#define SHA_DIGEST_LENGTH 20
  
  typedef struct SHAstate_st
         {
diff --git a/crypto/sha/sha1dgst.c b/crypto/sha/sha1dgst.c

index f4a47f3768b860d54c78b260a357148a63ee5024..e867f6972b4d28125e5ffbfeacb4bba690f5a00e 100644 (file)
--- a/crypto/sha/sha1dgst.c
+++ b/crypto/sha/sha1dgst.c
@@ -81,14 +81,14 @@ char *SHA1_version="SHA1" OPENSSL_VERSION_PTEXT;
  #define K_40_59 0x8f1bbcdcUL
  #define K_60_79 0xca62c1d6UL
  
-#  ifdef SHA1_ASM
-     void sha1_block_x86(SHA_CTX *c, register SHA_LONG *p, int num);
-#    define sha1_block sha1_block_x86
-#  else
-     void sha1_block(SHA_CTX *c, register SHA_LONG *p, int num);
-#  endif
+#ifdef SHA1_ASM
+   void sha1_block_x86(SHA_CTX *c, register SHA_LONG *p, int num);
+#  define sha1_block(c,p,n) sha1_block_x86((c),(p),(n)*SHA_CBLOCK)
+#else
+   static void sha1_block(SHA_CTX *c, register SHA_LONG *p, int num);
+#endif
  
-#if defined(L_ENDIAN) && defined(SHA1_ASM)
+#if !defined(B_ENDIAN) && defined(SHA1_ASM)
  #  define      M_c2nl          c2l
  #  define      M_p_c2nl        p_c2l
  #  define      M_c2nl_p        c2l_p
@@ -147,7 +147,7 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data,
                                 }
                         len-=(SHA_CBLOCK-c->num);
  
-                       sha1_block(c,p,64);
+                       sha1_block(c,p,1);
                         c->num=0;
                         /* drop through and do the rest */
                         }
@@ -184,15 +184,15 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data,
          * copies it to a local array.  I should be able to do this for
          * the C version as well....
          */
-#if 1
+#if SHA_LONG_LOG2==2
  #if defined(B_ENDIAN) || defined(SHA1_ASM)
         if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
                 {
                 sw=len/SHA_CBLOCK;
                 if (sw)
                         {
-                       sw*=SHA_CBLOCK;
                         sha1_block(c,(SHA_LONG *)data,sw);
+                       sw*=SHA_CBLOCK;
                         data+=sw;
                         len-=sw;
                         }
@@ -204,35 +204,61 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data,
         p=c->data;
         while (len >= SHA_CBLOCK)
                 {
-#if defined(B_ENDIAN) || defined(L_ENDIAN)
+#if SHA_LONG_LOG2==2
+#if defined(B_ENDIAN) || defined(SHA1_ASM)
+#define SHA_NO_TAIL_CODE
+               /*
+                * Basically we get here only when data happens
+                * to be unaligned.
+                */
                 if (p != (SHA_LONG *)data)
                         memcpy(p,data,SHA_CBLOCK);
                 data+=SHA_CBLOCK;
-#  ifdef L_ENDIAN
-#    ifndef SHA1_ASM /* Will not happen */
-               for (sw=(SHA_LBLOCK/4); sw; sw--)
+               sha1_block(c,p=c->data,1);
+               len-=SHA_CBLOCK;
+#else  /* little-endian */
+#define BE_COPY(dst,src,i)     {                               \
+                               l = ((SHA_LONG *)src)[i];       \
+                               Endian_Reverse32(l);            \
+                               dst[i] = l;                     \
+                               }
+               if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
                         {
-                       Endian_Reverse32(p[0]);
-                       Endian_Reverse32(p[1]);
-                       Endian_Reverse32(p[2]);
-                       Endian_Reverse32(p[3]);
-                       p+=4;
+                       for (sw=(SHA_LBLOCK/4); sw; sw--)
+                               {
+                               BE_COPY(p,data,0);
+                               BE_COPY(p,data,1);
+                               BE_COPY(p,data,2);
+                               BE_COPY(p,data,3);
+                               p+=4;
+                               data += 4*sizeof(SHA_LONG);
+                               }
+                       sha1_block(c,p=c->data,1);
+                       len-=SHA_CBLOCK;
+                       continue;
                         }
+#endif
+#endif
+#ifndef SHA_NO_TAIL_CODE
+               /*
+                * In addition to "sizeof(SHA_LONG)!= 4" case the
+                * following code covers unaligned access cases on
+                * little-endian machines.
+                *                      <appro@fy.chalmers.se>
+                */
                 p=c->data;
-#    endif
-#  endif
-#else
-               for (sw=(SHA_BLOCK/4); sw; sw--)
+               for (sw=(SHA_LBLOCK/4); sw; sw--)
                         {
-                       M_c2nl(data,l); *(p++)=l;
-                       M_c2nl(data,l); *(p++)=l;
-                       M_c2nl(data,l); *(p++)=l;
-                       M_c2nl(data,l); *(p++)=l;
+                       M_c2nl(data,l); p[0]=l;
+                       M_c2nl(data,l); p[1]=l;
+                       M_c2nl(data,l); p[2]=l;
+                       M_c2nl(data,l); p[3]=l;
+                       p+=4;
                         }
                 p=c->data;
-#endif
-               sha1_block(c,p,64);
+               sha1_block(c,p,1);
                 len-=SHA_CBLOCK;
+#endif
                 }
         ec=(int)len;
         c->num=ec;
@@ -247,26 +273,35 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data,
  
  void SHA1_Transform(SHA_CTX *c, unsigned char *b)
         {
-       SHA_LONG p[16];
-#ifndef B_ENDIAN
+       SHA_LONG p[SHA_LBLOCK];
         SHA_LONG *q;
         int i;
-#endif
  
-#if defined(B_ENDIAN) || defined(L_ENDIAN)
-       memcpy(p,b,64);
-#ifdef L_ENDIAN
-       q=p;
-       for (i=(SHA_LBLOCK/4); i; i--)
+#if SHA_LONG_LOG2==2
+#if defined(B_ENDIAN) || defined(SHA1_ASM)
+       memcpy(p,b,SHA_CBLOCK);
+       sha1_block(c,p,1);
+       return;
+#else
+       if (((unsigned long)b%sizeof(SHA_LONG)) == 0)
                 {
-               Endian_Reverse32(q[0]);
-               Endian_Reverse32(q[1]);
-               Endian_Reverse32(q[2]);
-               Endian_Reverse32(q[3]);
-               q+=4;
+               q=p;
+               for (i=(SHA_LBLOCK/4); i; i--)
+                       {
+                       unsigned long l;
+                       BE_COPY(q,b,0); /* BE_COPY was defined above */
+                       BE_COPY(q,b,1);
+                       BE_COPY(q,b,2);
+                       BE_COPY(q,b,3);
+                       q+=4;
+                       b+=4*sizeof(SHA_LONG);
+                       }
+               sha1_block(c,p,1);
+               return;
                 }
  #endif
-#else
+#endif
+#ifndef SHA_NO_TAIL_CODE /* defined above, see comment */
         q=p;
         for (i=(SHA_LBLOCK/4); i; i--)
                 {
@@ -276,16 +311,15 @@ void SHA1_Transform(SHA_CTX *c, unsigned char *b)
                 c2nl(b,l); *(q++)=l;
                 c2nl(b,l); *(q++)=l; 
                 } 
+       sha1_block(c,p,1);
  #endif
-       sha1_block(c,p,64);
         }
  
  #ifndef SHA1_ASM
-
-void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num)
+static void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num)
         {
         register SHA_LONG A,B,C,D,E,T;
-       SHA_LONG X[16];
+       SHA_LONG X[SHA_LBLOCK];
  
         A=c->h0;
         B=c->h1;
@@ -385,8 +419,7 @@ void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num)
         c->h3=(c->h3+B)&0xffffffffL;
         c->h4=(c->h4+C)&0xffffffffL;
  
-       num-=64;
-       if (num <= 0) break;
+       if (--num <= 0) break;
  
         A=c->h0;
         B=c->h1;
@@ -394,7 +427,12 @@ void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num)
         D=c->h3;
         E=c->h4;
  
-       W+=16;
+       W+=SHA_LBLOCK;  /* Note! This can happen only when sizeof(SHA_LONG)
+                        * is 4. Whenever it's not the actual case this
+                        * function is never called with num larger than 1
+                        * and we never advance down here.
+                        *                      <appro@fy.chalmers.se>
+                        */
                 }
         }
  #endif
@@ -423,18 +461,20 @@ void SHA1_Final(unsigned char *md, SHA_CTX *c)
                 {
                 for (; i<SHA_LBLOCK; i++)
                         p[i]=0;
-               sha1_block(c,p,64);
+               sha1_block(c,p,1);
                 i=0;
                 }
         for (; i<(SHA_LBLOCK-2); i++)
                 p[i]=0;
         p[SHA_LBLOCK-2]=c->Nh;
         p[SHA_LBLOCK-1]=c->Nl;
-#if defined(L_ENDIAN) && defined(SHA1_ASM)
+#if SHA_LONG_LOG2==2
+#if !defined(B_ENDIAN) && defined(SHA1_ASM)
         Endian_Reverse32(p[SHA_LBLOCK-2]);
         Endian_Reverse32(p[SHA_LBLOCK-1]);
  #endif
-       sha1_block(c,p,64);
+#endif
+       sha1_block(c,p,1);
         cp=md;
         l=c->h0; nl2c(l,cp);
         l=c->h1; nl2c(l,cp);
@@ -442,10 +482,11 @@ void SHA1_Final(unsigned char *md, SHA_CTX *c)
         l=c->h3; nl2c(l,cp);
         l=c->h4; nl2c(l,cp);
  
-       /* clear stuff, sha1_block may be leaving some stuff on the stack
-        * but I'm not worried :-) */
         c->num=0;
-/*     memset((char *)&c,0,sizeof(c));*/
+       /* sha_block may be leaving some stuff on the stack
+        * but I'm not worried :-)
+       memset((void *)c,0,sizeof(SHA_CTX));
+        */
         }
  #endif
  
diff --git a/crypto/sha/sha_dgst.c b/crypto/sha/sha_dgst.c

index 5827c73ceab6ba53f156c01a8f530dd6ecb50c01..d90f497763be9a00cdfb7b46aaece74ad97aeb38 100644 (file)
--- a/crypto/sha/sha_dgst.c
+++ b/crypto/sha/sha_dgst.c
@@ -81,12 +81,21 @@ char *SHA_version="SHA" OPENSSL_VERSION_PTEXT;
  #define K_40_59 0x8f1bbcdcUL
  #define K_60_79 0xca62c1d6UL
  
-   void sha_block(SHA_CTX *c, register SHA_LONG *p, int num);
-#define        M_c2nl          c2nl
-#define        M_p_c2nl        p_c2nl
-#define        M_c2nl_p        c2nl_p
-#define        M_p_c2nl_p      p_c2nl_p
-#define        M_nl2c          nl2c
+static void sha_block(SHA_CTX *c, register SHA_LONG *p, int num);
+
+#if !defined(B_ENDIAN) && defined(SHA_ASM)
+#  define      M_c2nl          c2l
+#  define      M_p_c2nl        p_c2l
+#  define      M_c2nl_p        c2l_p
+#  define      M_p_c2nl_p      p_c2l_p
+#  define      M_nl2c          l2c
+#else
+#  define      M_c2nl          c2nl
+#  define      M_p_c2nl        p_c2nl
+#  define      M_c2nl_p        c2nl_p
+#  define      M_p_c2nl_p      p_c2nl_p
+#  define      M_nl2c          nl2c
+#endif
  
  void SHA_Init(SHA_CTX *c)
         {
@@ -133,7 +142,7 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data,
                                 }
                         len-=(SHA_CBLOCK-c->num);
  
-                       sha_block(c,p,64);
+                       sha_block(c,p,1);
                         c->num=0;
                         /* drop through and do the rest */
                         }
@@ -170,15 +179,15 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data,
          * copies it to a local array.  I should be able to do this for
          * the C version as well....
          */
-#if 1
+#if SHA_LONG_LOG2==2
  #if defined(B_ENDIAN) || defined(SHA_ASM)
         if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
                 {
                 sw=len/SHA_CBLOCK;
                 if (sw)
                         {
-                       sw*=SHA_CBLOCK;
                         sha_block(c,(SHA_LONG *)data,sw);
+                       sw*=SHA_CBLOCK;
                         data+=sw;
                         len-=sw;
                         }
@@ -190,35 +199,61 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data,
         p=c->data;
         while (len >= SHA_CBLOCK)
                 {
-#if defined(B_ENDIAN) || defined(L_ENDIAN)
+#if SHA_LONG_LOG2==2
+#if defined(B_ENDIAN) || defined(SHA_ASM)
+#define SHA_NO_TAIL_CODE
+               /*
+                * Basically we get here only when data happens
+                * to be unaligned.
+                */
                 if (p != (SHA_LONG *)data)
                         memcpy(p,data,SHA_CBLOCK);
                 data+=SHA_CBLOCK;
-#  ifdef L_ENDIAN
-#    ifndef SHA_ASM /* Will not happen */
-               for (sw=(SHA_LBLOCK/4); sw; sw--)
+               sha_block(c,p=c->data,1);
+               len-=SHA_CBLOCK;
+#else  /* little-endian */
+#define BE_COPY(dst,src,i)     {                               \
+                               l = ((SHA_LONG *)src)[i];       \
+                               Endian_Reverse32(l);            \
+                               dst[i] = l;                     \
+                               }
+               if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
                         {
-                       Endian_Reverse32(p[0]);
-                       Endian_Reverse32(p[1]);
-                       Endian_Reverse32(p[2]);
-                       Endian_Reverse32(p[3]);
-                       p+=4;
+                       for (sw=(SHA_LBLOCK/4); sw; sw--)
+                               {
+                               BE_COPY(p,data,0);
+                               BE_COPY(p,data,1);
+                               BE_COPY(p,data,2);
+                               BE_COPY(p,data,3);
+                               p+=4;
+                               data += 4*sizeof(SHA_LONG);
+                               }
+                       sha_block(c,p=c->data,1);
+                       len-=SHA_CBLOCK;
+                       continue;
                         }
+#endif
+#endif
+#ifndef SHA_NO_TAIL_CODE
+               /*
+                * In addition to "sizeof(SHA_LONG)!= 4" case the
+                * following code covers unaligned access cases on
+                * little-endian machines.
+                *                      <appro@fy.chalmers.se>
+                */
                 p=c->data;
-#    endif
-#  endif
-#else
-               for (sw=(SHA_BLOCK/4); sw; sw--)
+               for (sw=(SHA_LBLOCK/4); sw; sw--)
                         {
-                       M_c2nl(data,l); *(p++)=l;
-                       M_c2nl(data,l); *(p++)=l;
-                       M_c2nl(data,l); *(p++)=l;
-                       M_c2nl(data,l); *(p++)=l;
+                       M_c2nl(data,l); p[0]=l;
+                       M_c2nl(data,l); p[1]=l;
+                       M_c2nl(data,l); p[2]=l;
+                       M_c2nl(data,l); p[3]=l;
+                       p+=4;
                         }
                 p=c->data;
-#endif
-               sha_block(c,p,64);
+               sha_block(c,p,1);
                 len-=SHA_CBLOCK;
+#endif
                 }
         ec=(int)len;
         c->num=ec;
@@ -233,26 +268,35 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data,
  
  void SHA_Transform(SHA_CTX *c, unsigned char *b)
         {
-       SHA_LONG p[16];
-#if !defined(B_ENDIAN)
+       SHA_LONG p[SHA_LBLOCK];
         SHA_LONG *q;
         int i;
-#endif
  
-#if defined(B_ENDIAN) || defined(L_ENDIAN)
-       memcpy(p,b,64);
-#ifdef L_ENDIAN
-       q=p;
-       for (i=(SHA_LBLOCK/4); i; i--)
+#if SHA_LONG_LOG2==2
+#if defined(B_ENDIAN) || defined(SHA_ASM)
+       memcpy(p,b,SHA_CBLOCK);
+       sha_block(c,p,1);
+       return;
+#else
+       if (((unsigned long)b%sizeof(SHA_LONG)) == 0)
                 {
-               Endian_Reverse32(q[0]);
-               Endian_Reverse32(q[1]);
-               Endian_Reverse32(q[2]);
-               Endian_Reverse32(q[3]);
-               q+=4;
+               q=p;
+               for (i=(SHA_LBLOCK/4); i; i--)
+                       {
+                       unsigned long l;
+                       BE_COPY(q,b,0); /* BE_COPY was defined above */
+                       BE_COPY(q,b,1);
+                       BE_COPY(q,b,2);
+                       BE_COPY(q,b,3);
+                       q+=4;
+                       b+=4*sizeof(SHA_LONG);
+                       }
+               sha_block(c,p,1);
+               return;
                 }
  #endif
-#else
+#endif
+#ifndef SHA_NO_TAIL_CODE /* defined above, see comment */
         q=p;
         for (i=(SHA_LBLOCK/4); i; i--)
                 {
@@ -262,14 +306,15 @@ void SHA_Transform(SHA_CTX *c, unsigned char *b)
                 c2nl(b,l); *(q++)=l;
                 c2nl(b,l); *(q++)=l; 
                 } 
+       sha_block(c,p,1);
  #endif
-       sha_block(c,p,64);
         }
  
-void sha_block(SHA_CTX *c, register SHA_LONG *W, int num)
+#ifndef SHA_ASM
+static void sha_block(SHA_CTX *c, register SHA_LONG *W, int num)
         {
         register SHA_LONG A,B,C,D,E,T;
-       SHA_LONG X[16];
+       SHA_LONG X[SHA_LBLOCK];
  
         A=c->h0;
         B=c->h1;
@@ -369,8 +414,7 @@ void sha_block(SHA_CTX *c, register SHA_LONG *W, int num)
         c->h3=(c->h3+B)&0xffffffffL;
         c->h4=(c->h4+C)&0xffffffffL;
  
-       num-=64;
-       if (num <= 0) break;
+       if (--num <= 0) break;
  
         A=c->h0;
         B=c->h1;
@@ -378,9 +422,15 @@ void sha_block(SHA_CTX *c, register SHA_LONG *W, int num)
         D=c->h3;
         E=c->h4;
  
-       W+=16;
+       W+=SHA_LBLOCK;  /* Note! This can happen only when sizeof(SHA_LONG)
+                        * is 4. Whenever it's not the actual case this
+                        * function is never called with num larger than 1
+                        * and we never advance down here.
+                        *                      <appro@fy.chalmers.se>
+                        */
                 }
         }
+#endif
  
  void SHA_Final(unsigned char *md, SHA_CTX *c)
         {
@@ -406,14 +456,20 @@ void SHA_Final(unsigned char *md, SHA_CTX *c)
                 {
                 for (; i<SHA_LBLOCK; i++)
                         p[i]=0;
-               sha_block(c,p,64);
+               sha_block(c,p,1);
                 i=0;
                 }
         for (; i<(SHA_LBLOCK-2); i++)
                 p[i]=0;
         p[SHA_LBLOCK-2]=c->Nh;
         p[SHA_LBLOCK-1]=c->Nl;
-       sha_block(c,p,64);
+#if SHA_LONG_LOG2==2
+#if !defined(B_ENDIAN) && defined(SHA_ASM)
+       Endian_Reverse32(p[SHA_LBLOCK-2]);
+       Endian_Reverse32(p[SHA_LBLOCK-1]);
+#endif
+#endif
+       sha_block(c,p,1);
         cp=md;
         l=c->h0; nl2c(l,cp);
         l=c->h1; nl2c(l,cp);
@@ -421,9 +477,10 @@ void SHA_Final(unsigned char *md, SHA_CTX *c)
         l=c->h3; nl2c(l,cp);
         l=c->h4; nl2c(l,cp);
  
-       /* clear stuff, sha_block may be leaving some stuff on the stack
-        * but I'm not worried :-) */
         c->num=0;
-/*     memset((char *)&c,0,sizeof(c));*/
+       /* sha_block may be leaving some stuff on the stack
+        * but I'm not worried :-)
+       memset((void *)c,0,sizeof(SHA_CTX));
+        */
         }
  #endif
diff --git a/crypto/sha/sha_locl.h b/crypto/sha/sha_locl.h

index 9f1251e7877cf872819ab5783ebb013ccb5b7e6c..32bbe30afd40f01b006945ed6905d015086c9d86 100644 (file)
--- a/crypto/sha/sha_locl.h
+++ b/crypto/sha/sha_locl.h
@@ -158,30 +158,79 @@
                          *((c)++)=(unsigned char)(((l)>>16)&0xff), \
                          *((c)++)=(unsigned char)(((l)>>24)&0xff))
  
+#ifndef SHA_LONG_LOG2
+#define SHA_LONG_LOG2  2       /* default to 32 bits */
+#endif
+
  #undef ROTATE
+#undef Endian_Reverse32
  #if defined(WIN32)
  #define ROTATE(a,n)     _lrotl(a,n)
-#else
-#define ROTATE(a,n)     (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
+#elif defined(__GNUC__)
+/* some inline assembler templates by <appro@fy.chalmers.se> */
+#if defined(__i386)
+#define ROTATE(a,n)    ({ register unsigned int ret;   \
+                               asm ("roll %1,%0"       \
+                               : "=r"(ret)             \
+                               : "I"(n), "0"(a)        \
+                               : "cc");                \
+                          ret;                         \
+                       })
+#ifndef I386_ONLY
+#define Endian_Reverse32(a) \
+                       { register unsigned int l=(a);  \
+                               asm ("bswapl %0"        \
+                               : "=r"(l) : "0"(l));    \
+                         (a)=l;                        \
+                       }
+#endif
+#elif defined(__powerpc)
+#define ROTATE(a,n)    ({ register unsigned int ret;           \
+                               asm ("rlwinm %0,%1,%2,0,31"     \
+                               : "=r"(ret)                     \
+                               : "r"(a), "I"(n));              \
+                          ret;                                 \
+                       })
+/* Endian_Reverse32 is not needed for PowerPC */
+#endif
  #endif
  
  /* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
-#if defined(WIN32)
+#ifdef ROTATE
+#ifndef Endian_Reverse32
  /* 5 instructions with rotate instruction, else 9 */
  #define Endian_Reverse32(a) \
         { \
-       unsigned long l=(a); \
-       (a)=((ROTATE(l,8)&0x00FF00FF)|(ROTATE(l,24)&0xFF00FF00)); \
+       unsigned long t=(a); \
+       (a)=((ROTATE(t,8)&0x00FF00FF)|(ROTATE((t&0x00FF00FF),24))); \
         }
+#endif
  #else
+#define ROTATE(a,n)     (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
+#ifndef Endian_Reverse32
  /* 6 instructions with rotate instruction, else 8 */
  #define Endian_Reverse32(a) \
         { \
-       unsigned long l=(a); \
-       l=(((l&0xFF00FF00)>>8L)|((l&0x00FF00FF)<<8L)); \
-       (a)=ROTATE(l,16L); \
+       unsigned long t=(a); \
+       t=(((t>>8)&0x00FF00FF)|((t&0x00FF00FF)<<8)); \
+       (a)=ROTATE(t,16); \
         }
  #endif
+/*
+ * Originally the middle line started with l=(((l&0xFF00FF00)>>8)|...
+ * It's rewritten as above for two reasons:
+ *     - RISCs aren't good at long constants and have to explicitely
+ *       compose 'em with several (well, usually 2) instructions in a
+ *       register before performing the actual operation and (as you
+ *       already realized:-) having same constant should inspire the
+ *       compiler to permanently allocate the only register for it;
+ *     - most modern CPUs have two ALUs, but usually only one has
+ *       circuitry for shifts:-( this minor tweak inspires compiler
+ *       to schedule shift instructions in a better way...
+ *
+ *                             <appro@fy.chalmers.se>
+ */
+#endif
  
  /* As  pointed out by Wei Dai <weidai@eskimo.com>, F() below can be
   * simplified to the code in F_00_19.  Wei attributes these optimisations
@@ -195,13 +244,12 @@
  #define F_40_59(b,c,d) (((b) & (c)) | (((b)|(c)) & (d))) 
  #define        F_60_79(b,c,d)  F_20_39(b,c,d)
  
-#ifdef SHA_0
  #undef Xupdate
+#ifdef SHA_0
  #define Xupdate(a,i,ia,ib,ic,id) X[(i)&0x0f]=(a)=\
         (ia[(i)&0x0f]^ib[((i)+2)&0x0f]^ic[((i)+8)&0x0f]^id[((i)+13)&0x0f]);
  #endif
  #ifdef SHA_1
-#undef Xupdate
  #define Xupdate(a,i,ia,ib,ic,id) (a)=\
         (ia[(i)&0x0f]^ib[((i)+2)&0x0f]^ic[((i)+8)&0x0f]^id[((i)+13)&0x0f]);\
         X[(i)&0x0f]=(a)=ROTATE((a),1);
author	Ulf Möller <ulf@openssl.org>
	Wed, 5 May 1999 00:23:53 +0000 (00:23 +0000)
committer	Ulf Möller <ulf@openssl.org>
	Wed, 5 May 1999 00:23:53 +0000 (00:23 +0000)
CHANGES		patch \| blob \| history
Configure		patch \| blob \| history
crypto/opensslconf.h.in		patch \| blob \| history
crypto/sha/sha.h		patch \| blob \| history
crypto/sha/sha1dgst.c		patch \| blob \| history
crypto/sha/sha_dgst.c		patch \| blob \| history
crypto/sha/sha_locl.h		patch \| blob \| history