From bcba6cc60f8b44bec1232a32cd703dd032255a15 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sat, 12 Feb 2000 23:33:01 +0000 Subject: [PATCH] HP-UX tune-up: new unified configs, HP C compiler bug workaround. --- CHANGES | 3 + Configure | 41 +++++++++-- TABLE | 68 +++++++++++++----- config | 4 +- crypto/md5/md5_dgst.c | 152 ++++++++++++++++++++------------------- crypto/ripemd/rmd_dgst.c | 25 ++++--- crypto/sha/sha_locl.h | 43 +++++++---- 7 files changed, 214 insertions(+), 122 deletions(-) diff --git a/CHANGES b/CHANGES index 0bb813b7d0..6457df55ca 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,9 @@ Changes between 0.9.4 and 0.9.5 [xx XXX 2000] + *) HP-UX tune-up: new unified configs, HP C compiler bug workaround. + [Andy Polyakov] + *) Add -rand argument to smime and pkcs12 applications and read/write of seed file. [Steve Henson] diff --git a/Configure b/Configure index 6ce16f5fde..c2c0798aa9 100755 --- a/Configure +++ b/Configure @@ -169,11 +169,43 @@ my %table=( "irix64-mips4-gcc","gcc:-mabi=64 -mips4 -mmips-as -O3 -DTERMIOS -DB_ENDIAN -DBN_DIV3W::(unknown)::RC4_CHAR RC4_CHUNK DES_RISC2 DES_UNROLL SIXTY_FOUR_BIT_LONG:asm/mips3.o::", "irix64-mips4-cc", "cc:-64 -mips4 -O2 -use_readonly_const -DTERMIOS -DB_ENDIAN -DBN_DIV3W::(unknown)::RC4_CHAR RC4_CHUNK DES_RISC2 DES_UNROLL SIXTY_FOUR_BIT_LONG:asm/mips3.o::", +#### Unified HP-UX ANSI C configs. +# Special notes: +# - Originally we were optimizing at +O4 level. It should be noted +# that the only difference between +O3 and +O4 is global inter- +# procedural analysis. As it has to be performed during the link +# stage the compiler leaves behind certain pseudo-code in lib*.a +# which might be release or even patch level specific. Generating +# the machine code and analyzing the *whole* program appears to be +# *extremely* memory demanding while the performance gain is +# actually questionable. The situation is intensified by the default +# HP-UX data set size limit (infamous 'maxdsiz' tunable) of 64MB +# which is way too low for +O4. In other words, doesn't +O3 make +# more sense? +# - Keep in mind that the compiler by default generates code suitable +# for execution on the host you're currently compiling at. If you +# intend to use it across various PA-RISC processors consider adding +# +Dportable. +# - +DD64 is chosen in favour of +DA2.0W because it's ment to be +# compatible with *future* releases. +# - If you run ./Configure hpux-parisc-[g]cc manually don't forget to +# pass -D_REENTRANT on HP-UX 10 and later. +# - -DMD32_XARRAY triggers workaround for compiler bug we ran into in +# 32-bit message digests. (For the moment of this writing) HP C +# doesn't seem to "digest" too many local variables (they make "him" +# chew forever:-). For more details look-up MD32_XARRAY comment in +# crypto/sha/sha_lcl.h. +# +# +"hpux-parisc-cc","cc:-Ae +O3 +ESlit -z -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY::::BN_LLONG DES_PTR DES_UNROLL DES_RISC1:::", +"hpux-parisc-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::::BN_LLONG DES_PTR DES_UNROLL DES_RISC1:::", +"hpux64-parisc-cc","cc:-Ae +DD64 +O3 +ESlit -z -DB_ENDIAN -DMD32_XARRAY::-D_REENTRANT::SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT:::", + # HPUX 9.X config. # Don't use the bundled cc. It is broken. Use HP ANSI C if possible, or # egcs. gcc 2.8.1 is also broken. -"hpux-cc", "cc:-DB_ENDIAN -DBN_DIV2W -Ae +ESlit +O4 -z::(unknown)::BN_LLONG DES_PTR DES_UNROLL DES_RISC1:::", +"hpux-cc", "cc:-DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY -Ae +ESlit +O3 -z::(unknown)::BN_LLONG DES_PTR DES_UNROLL DES_RISC1:::", # If hpux-cc fails (e.g. during "make test"), try the next one; otherwise, # please report your OS and compiler version to the openssl-bugs@openssl.org # mailing list. @@ -184,7 +216,7 @@ my %table=( "hpux-brokengcc", "gcc:-DB_ENDIAN -DBN_DIV2W -O3::(unknown)::DES_PTR DES_UNROLL DES_RISC1:::", # HPUX 10.X config. Supports threads. -"hpux10-cc", "cc:-DB_ENDIAN -DBN_DIV2W -Ae +ESlit +O4 -z::-D_REENTRANT::BN_LLONG DES_PTR DES_UNROLL DES_RISC1:::", +"hpux10-cc", "cc:-DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY -Ae +ESlit +O3 -z::-D_REENTRANT::BN_LLONG DES_PTR DES_UNROLL DES_RISC1:::", # If hpux10-cc fails, try this one (if still fails, try deleting BN_LLONG): "hpux10-brokencc", "cc:-DB_ENDIAN -DBN_DIV2W -Ae +ESlit +O2 -z::-D_REENTRANT::BN_LLONG DES_PTR DES_UNROLL DES_RISC1:::", @@ -194,8 +226,9 @@ my %table=( # HPUX 11.X from www.globus.org. # Only works on PA-RISC 2.0 cpus, and not optimized. Why? -"hpux11-32bit-cc","cc:+DA2.0 -DB_ENDIAN -D_HPUX_SOURCE -Aa -Ae +ESlit::-D_REENTRANT::DES_PTR DES_UNROLL DES_RISC1:::", -"hpux11-64bit-cc","cc:+DA2.0W -g -D_HPUX_SOURCE -Aa -Ae +ESlit::-D_REENTRANT::SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT :::", +#"hpux11-32bit-cc","cc:+DA2.0 -DB_ENDIAN -D_HPUX_SOURCE -Aa -Ae +ESlit::-D_REENTRANT::DES_PTR DES_UNROLL DES_RISC1:::", +#"hpux11-64bit-cc","cc:+DA2.0W -g -D_HPUX_SOURCE -Aa -Ae +ESlit::-D_REENTRANT::SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT :::", +# Use unified settings above instead. # Dec Alpha, OSF/1 - the alpha164-cc is the flags for a 21164A with # the new compiler diff --git a/TABLE b/TABLE index 15e71fb536..7398fd938e 100644 --- a/TABLE +++ b/TABLE @@ -937,7 +937,7 @@ $rc5_obj = *** hpux-cc $cc = cc -$cflags = -DB_ENDIAN -DBN_DIV2W -Ae +ESlit +O4 -z +$cflags = -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY -Ae +ESlit +O3 -z $unistd = $thread_cflag = (unknown) $lflags = @@ -969,11 +969,11 @@ $rc4_obj = $rmd160_obj = $rc5_obj = -*** hpux10-brokencc +*** hpux-parisc-cc $cc = cc -$cflags = -DB_ENDIAN -DBN_DIV2W -Ae +ESlit +O2 -z +$cflags = -Ae +O3 +ESlit -z -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY $unistd = -$thread_cflag = -D_REENTRANT +$thread_cflag = $lflags = $bn_ops = BN_LLONG DES_PTR DES_UNROLL DES_RISC1 $bn_obj = @@ -986,13 +986,13 @@ $rc4_obj = $rmd160_obj = $rc5_obj = -*** hpux10-brokengcc +*** hpux-parisc-gcc $cc = gcc -$cflags = -DB_ENDIAN -DBN_DIV2W -O3 +$cflags = -O3 -DB_ENDIAN -DBN_DIV2W $unistd = -$thread_cflag = -D_REENTRANT +$thread_cflag = $lflags = -$bn_ops = DES_PTR DES_UNROLL DES_RISC1 +$bn_ops = BN_LLONG DES_PTR DES_UNROLL DES_RISC1 $bn_obj = $des_obj = $bf_obj = @@ -1003,9 +1003,9 @@ $rc4_obj = $rmd160_obj = $rc5_obj = -*** hpux10-cc +*** hpux10-brokencc $cc = cc -$cflags = -DB_ENDIAN -DBN_DIV2W -Ae +ESlit +O4 -z +$cflags = -DB_ENDIAN -DBN_DIV2W -Ae +ESlit +O2 -z $unistd = $thread_cflag = -D_REENTRANT $lflags = @@ -1020,13 +1020,13 @@ $rc4_obj = $rmd160_obj = $rc5_obj = -*** hpux10-gcc +*** hpux10-brokengcc $cc = gcc $cflags = -DB_ENDIAN -DBN_DIV2W -O3 $unistd = $thread_cflag = -D_REENTRANT $lflags = -$bn_ops = BN_LLONG DES_PTR DES_UNROLL DES_RISC1 +$bn_ops = DES_PTR DES_UNROLL DES_RISC1 $bn_obj = $des_obj = $bf_obj = @@ -1037,13 +1037,30 @@ $rc4_obj = $rmd160_obj = $rc5_obj = -*** hpux11-32bit-cc +*** hpux10-cc $cc = cc -$cflags = +DA2.0 -DB_ENDIAN -D_HPUX_SOURCE -Aa -Ae +ESlit +$cflags = -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY -Ae +ESlit +O3 -z $unistd = $thread_cflag = -D_REENTRANT $lflags = -$bn_ops = DES_PTR DES_UNROLL DES_RISC1 +$bn_ops = BN_LLONG DES_PTR DES_UNROLL DES_RISC1 +$bn_obj = +$des_obj = +$bf_obj = +$md5_obj = +$sha1_obj = +$cast_obj = +$rc4_obj = +$rmd160_obj = +$rc5_obj = + +*** hpux10-gcc +$cc = gcc +$cflags = -DB_ENDIAN -DBN_DIV2W -O3 +$unistd = +$thread_cflag = -D_REENTRANT +$lflags = +$bn_ops = BN_LLONG DES_PTR DES_UNROLL DES_RISC1 $bn_obj = $des_obj = $bf_obj = @@ -1054,9 +1071,9 @@ $rc4_obj = $rmd160_obj = $rc5_obj = -*** hpux11-64bit-cc +*** hpux64-parisc-cc $cc = cc -$cflags = +DA2.0W -g -D_HPUX_SOURCE -Aa -Ae +ESlit +$cflags = -Ae +DD64 +O3 +ESlit -z -DB_ENDIAN -DMD32_XARRAY $unistd = $thread_cflag = -D_REENTRANT $lflags = @@ -1428,6 +1445,23 @@ $rc4_obj = $rmd160_obj = $rc5_obj = +*** rhapsody-ppc-cc +$cc = cc +$cflags = -O3 -DB_ENDIAN +$unistd = +$thread_cflag = (unknown) +$lflags = +$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR +$bn_obj = +$des_obj = +$bf_obj = +$md5_obj = +$sha1_obj = +$cast_obj = +$rc4_obj = +$rmd160_obj = +$rc5_obj = + *** sco5-cc $cc = cc $cflags = diff --git a/config b/config index 8752e2304d..e9ab8befee 100755 --- a/config +++ b/config @@ -449,8 +449,10 @@ case "$GUESSOS" in BS2000-siemens-sysv4) OUT="BS2000-OSD" ;; RM*-siemens-sysv4) OUT="ReliantUNIX" ;; *-siemens-sysv4) OUT="SINIX" ;; + *-hpux1*) OUT="hpux-parisc-$CC" + options="$options -D_REENTRANT" ;; + *-hpux) OUT="hpux-parisc-$CC" ;; # these are all covered by the catchall below - # *-hpux*) OUT="hpux-$CC" ;; # *-aix) OUT="aix-$CC" ;; # *-dgux) OUT="dgux" ;; *) OUT=`echo $GUESSOS | awk -F- '{print $3}'`;; diff --git a/crypto/md5/md5_dgst.c b/crypto/md5/md5_dgst.c index 9ff9f938cc..23d196b8d4 100644 --- a/crypto/md5/md5_dgst.c +++ b/crypto/md5/md5_dgst.c @@ -186,6 +186,9 @@ void md5_block_host_order (MD5_CTX *c, const void *data, int num) #endif #ifndef md5_block_data_order +#ifdef X +#undef X +#endif void md5_block_data_order (MD5_CTX *c, const void *data_, int num) { const unsigned char *data=data_; @@ -204,16 +207,15 @@ void md5_block_data_order (MD5_CTX *c, const void *data_, int num) * * */ - MD5_LONG X[MD5_LBLOCK]; - /* - * In case you wonder why don't I use c->data for this. - * RISCs usually have a handful of registers and if X is - * declared as automatic array good optimizing compiler - * shall accomodate at least part of it in register bank - * instead of memory. - * - * - */ +#ifndef MD32_XARRAY + /* See comment in crypto/sha/sha_locl.h for details. */ + unsigned long XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7, + XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15; +# define X(i) XX##i +#else + MD5_LONG XX[MD5_LBLOCK]; +# define X(i) XX[i] +#endif A=c->A; B=c->B; @@ -222,75 +224,75 @@ void md5_block_data_order (MD5_CTX *c, const void *data_, int num) for (;num--;) { - HOST_c2l(data,l); X[ 0]=l; HOST_c2l(data,l); X[ 1]=l; + HOST_c2l(data,l); X( 0)=l; HOST_c2l(data,l); X( 1)=l; /* Round 0 */ - R0(A,B,C,D,X[ 0], 7,0xd76aa478L); HOST_c2l(data,l); X[ 2]=l; - R0(D,A,B,C,X[ 1],12,0xe8c7b756L); HOST_c2l(data,l); X[ 3]=l; - R0(C,D,A,B,X[ 2],17,0x242070dbL); HOST_c2l(data,l); X[ 4]=l; - R0(B,C,D,A,X[ 3],22,0xc1bdceeeL); HOST_c2l(data,l); X[ 5]=l; - R0(A,B,C,D,X[ 4], 7,0xf57c0fafL); HOST_c2l(data,l); X[ 6]=l; - R0(D,A,B,C,X[ 5],12,0x4787c62aL); HOST_c2l(data,l); X[ 7]=l; - R0(C,D,A,B,X[ 6],17,0xa8304613L); HOST_c2l(data,l); X[ 8]=l; - R0(B,C,D,A,X[ 7],22,0xfd469501L); HOST_c2l(data,l); X[ 9]=l; - R0(A,B,C,D,X[ 8], 7,0x698098d8L); HOST_c2l(data,l); X[10]=l; - R0(D,A,B,C,X[ 9],12,0x8b44f7afL); HOST_c2l(data,l); X[11]=l; - R0(C,D,A,B,X[10],17,0xffff5bb1L); HOST_c2l(data,l); X[12]=l; - R0(B,C,D,A,X[11],22,0x895cd7beL); HOST_c2l(data,l); X[13]=l; - R0(A,B,C,D,X[12], 7,0x6b901122L); HOST_c2l(data,l); X[14]=l; - R0(D,A,B,C,X[13],12,0xfd987193L); HOST_c2l(data,l); X[15]=l; - R0(C,D,A,B,X[14],17,0xa679438eL); - R0(B,C,D,A,X[15],22,0x49b40821L); + R0(A,B,C,D,X( 0), 7,0xd76aa478L); HOST_c2l(data,l); X( 2)=l; + R0(D,A,B,C,X( 1),12,0xe8c7b756L); HOST_c2l(data,l); X( 3)=l; + R0(C,D,A,B,X( 2),17,0x242070dbL); HOST_c2l(data,l); X( 4)=l; + R0(B,C,D,A,X( 3),22,0xc1bdceeeL); HOST_c2l(data,l); X( 5)=l; + R0(A,B,C,D,X( 4), 7,0xf57c0fafL); HOST_c2l(data,l); X( 6)=l; + R0(D,A,B,C,X( 5),12,0x4787c62aL); HOST_c2l(data,l); X( 7)=l; + R0(C,D,A,B,X( 6),17,0xa8304613L); HOST_c2l(data,l); X( 8)=l; + R0(B,C,D,A,X( 7),22,0xfd469501L); HOST_c2l(data,l); X( 9)=l; + R0(A,B,C,D,X( 8), 7,0x698098d8L); HOST_c2l(data,l); X(10)=l; + R0(D,A,B,C,X( 9),12,0x8b44f7afL); HOST_c2l(data,l); X(11)=l; + R0(C,D,A,B,X(10),17,0xffff5bb1L); HOST_c2l(data,l); X(12)=l; + R0(B,C,D,A,X(11),22,0x895cd7beL); HOST_c2l(data,l); X(13)=l; + R0(A,B,C,D,X(12), 7,0x6b901122L); HOST_c2l(data,l); X(14)=l; + R0(D,A,B,C,X(13),12,0xfd987193L); HOST_c2l(data,l); X(15)=l; + R0(C,D,A,B,X(14),17,0xa679438eL); + R0(B,C,D,A,X(15),22,0x49b40821L); /* Round 1 */ - R1(A,B,C,D,X[ 1], 5,0xf61e2562L); - R1(D,A,B,C,X[ 6], 9,0xc040b340L); - R1(C,D,A,B,X[11],14,0x265e5a51L); - R1(B,C,D,A,X[ 0],20,0xe9b6c7aaL); - R1(A,B,C,D,X[ 5], 5,0xd62f105dL); - R1(D,A,B,C,X[10], 9,0x02441453L); - R1(C,D,A,B,X[15],14,0xd8a1e681L); - R1(B,C,D,A,X[ 4],20,0xe7d3fbc8L); - R1(A,B,C,D,X[ 9], 5,0x21e1cde6L); - R1(D,A,B,C,X[14], 9,0xc33707d6L); - R1(C,D,A,B,X[ 3],14,0xf4d50d87L); - R1(B,C,D,A,X[ 8],20,0x455a14edL); - R1(A,B,C,D,X[13], 5,0xa9e3e905L); - R1(D,A,B,C,X[ 2], 9,0xfcefa3f8L); - R1(C,D,A,B,X[ 7],14,0x676f02d9L); - R1(B,C,D,A,X[12],20,0x8d2a4c8aL); + R1(A,B,C,D,X( 1), 5,0xf61e2562L); + R1(D,A,B,C,X( 6), 9,0xc040b340L); + R1(C,D,A,B,X(11),14,0x265e5a51L); + R1(B,C,D,A,X( 0),20,0xe9b6c7aaL); + R1(A,B,C,D,X( 5), 5,0xd62f105dL); + R1(D,A,B,C,X(10), 9,0x02441453L); + R1(C,D,A,B,X(15),14,0xd8a1e681L); + R1(B,C,D,A,X( 4),20,0xe7d3fbc8L); + R1(A,B,C,D,X( 9), 5,0x21e1cde6L); + R1(D,A,B,C,X(14), 9,0xc33707d6L); + R1(C,D,A,B,X( 3),14,0xf4d50d87L); + R1(B,C,D,A,X( 8),20,0x455a14edL); + R1(A,B,C,D,X(13), 5,0xa9e3e905L); + R1(D,A,B,C,X( 2), 9,0xfcefa3f8L); + R1(C,D,A,B,X( 7),14,0x676f02d9L); + R1(B,C,D,A,X(12),20,0x8d2a4c8aL); /* Round 2 */ - R2(A,B,C,D,X[ 5], 4,0xfffa3942L); - R2(D,A,B,C,X[ 8],11,0x8771f681L); - R2(C,D,A,B,X[11],16,0x6d9d6122L); - R2(B,C,D,A,X[14],23,0xfde5380cL); - R2(A,B,C,D,X[ 1], 4,0xa4beea44L); - R2(D,A,B,C,X[ 4],11,0x4bdecfa9L); - R2(C,D,A,B,X[ 7],16,0xf6bb4b60L); - R2(B,C,D,A,X[10],23,0xbebfbc70L); - R2(A,B,C,D,X[13], 4,0x289b7ec6L); - R2(D,A,B,C,X[ 0],11,0xeaa127faL); - R2(C,D,A,B,X[ 3],16,0xd4ef3085L); - R2(B,C,D,A,X[ 6],23,0x04881d05L); - R2(A,B,C,D,X[ 9], 4,0xd9d4d039L); - R2(D,A,B,C,X[12],11,0xe6db99e5L); - R2(C,D,A,B,X[15],16,0x1fa27cf8L); - R2(B,C,D,A,X[ 2],23,0xc4ac5665L); + R2(A,B,C,D,X( 5), 4,0xfffa3942L); + R2(D,A,B,C,X( 8),11,0x8771f681L); + R2(C,D,A,B,X(11),16,0x6d9d6122L); + R2(B,C,D,A,X(14),23,0xfde5380cL); + R2(A,B,C,D,X( 1), 4,0xa4beea44L); + R2(D,A,B,C,X( 4),11,0x4bdecfa9L); + R2(C,D,A,B,X( 7),16,0xf6bb4b60L); + R2(B,C,D,A,X(10),23,0xbebfbc70L); + R2(A,B,C,D,X(13), 4,0x289b7ec6L); + R2(D,A,B,C,X( 0),11,0xeaa127faL); + R2(C,D,A,B,X( 3),16,0xd4ef3085L); + R2(B,C,D,A,X( 6),23,0x04881d05L); + R2(A,B,C,D,X( 9), 4,0xd9d4d039L); + R2(D,A,B,C,X(12),11,0xe6db99e5L); + R2(C,D,A,B,X(15),16,0x1fa27cf8L); + R2(B,C,D,A,X( 2),23,0xc4ac5665L); /* Round 3 */ - R3(A,B,C,D,X[ 0], 6,0xf4292244L); - R3(D,A,B,C,X[ 7],10,0x432aff97L); - R3(C,D,A,B,X[14],15,0xab9423a7L); - R3(B,C,D,A,X[ 5],21,0xfc93a039L); - R3(A,B,C,D,X[12], 6,0x655b59c3L); - R3(D,A,B,C,X[ 3],10,0x8f0ccc92L); - R3(C,D,A,B,X[10],15,0xffeff47dL); - R3(B,C,D,A,X[ 1],21,0x85845dd1L); - R3(A,B,C,D,X[ 8], 6,0x6fa87e4fL); - R3(D,A,B,C,X[15],10,0xfe2ce6e0L); - R3(C,D,A,B,X[ 6],15,0xa3014314L); - R3(B,C,D,A,X[13],21,0x4e0811a1L); - R3(A,B,C,D,X[ 4], 6,0xf7537e82L); - R3(D,A,B,C,X[11],10,0xbd3af235L); - R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL); - R3(B,C,D,A,X[ 9],21,0xeb86d391L); + R3(A,B,C,D,X( 0), 6,0xf4292244L); + R3(D,A,B,C,X( 7),10,0x432aff97L); + R3(C,D,A,B,X(14),15,0xab9423a7L); + R3(B,C,D,A,X( 5),21,0xfc93a039L); + R3(A,B,C,D,X(12), 6,0x655b59c3L); + R3(D,A,B,C,X( 3),10,0x8f0ccc92L); + R3(C,D,A,B,X(10),15,0xffeff47dL); + R3(B,C,D,A,X( 1),21,0x85845dd1L); + R3(A,B,C,D,X( 8), 6,0x6fa87e4fL); + R3(D,A,B,C,X(15),10,0xfe2ce6e0L); + R3(C,D,A,B,X( 6),15,0xa3014314L); + R3(B,C,D,A,X(13),21,0x4e0811a1L); + R3(A,B,C,D,X( 4), 6,0xf7537e82L); + R3(D,A,B,C,X(11),10,0xbd3af235L); + R3(C,D,A,B,X( 2),15,0x2ad7d2bbL); + R3(B,C,D,A,X( 9),21,0xeb86d391L); A = c->A += A; B = c->B += B; diff --git a/crypto/ripemd/rmd_dgst.c b/crypto/ripemd/rmd_dgst.c index be3eb2204a..bdfae270b6 100644 --- a/crypto/ripemd/rmd_dgst.c +++ b/crypto/ripemd/rmd_dgst.c @@ -85,14 +85,14 @@ void RIPEMD160_Init(RIPEMD160_CTX *c) #ifdef X #undef X #endif -#define X(i) X[(i)] +#define X(i) XX[i] void ripemd160_block_host_order (RIPEMD160_CTX *ctx, const void *p, int num) { - const RIPEMD160_LONG *X=p; + const RIPEMD160_LONG *XX=p; register unsigned long A,B,C,D,E; register unsigned long a,b,c,d,e; - for (;num--;X+=HASH_LBLOCK) + for (;num--;XX+=HASH_LBLOCK) { A=ctx->A; B=ctx->B; C=ctx->C; D=ctx->D; E=ctx->E; @@ -286,21 +286,20 @@ void ripemd160_block_host_order (RIPEMD160_CTX *ctx, const void *p, int num) #ifdef X #undef X #endif -#define X(i) X##i void ripemd160_block_data_order (RIPEMD160_CTX *ctx, const void *p, int num) { const unsigned char *data=p; register unsigned long A,B,C,D,E; unsigned long a,b,c,d,e,l; - RIPEMD160_LONG X0, X1, X2, X3, X4, X5, X6, X7, - X8, X9,X10,X11,X12,X13,X14,X15; - /* - * Originally the above was declared as RIPEMD160_LONG X[16]; - * The idea was to make RISC compilers to accomodate at - * least part of X in the register bank. Unfortunately not - * all compilers get this idea:-( - * - */ +#ifndef MD32_XARRAY + /* See comment in crypto/sha/sha_locl.h for details. */ + unsigned long XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7, + XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15; +# define X(i) XX##i +#else + RIPEMD160_LONG XX[16]; +# define X(i) XX[i] +#endif for (;num--;) { diff --git a/crypto/sha/sha_locl.h b/crypto/sha/sha_locl.h index 0a78b399a9..3e6f489b87 100644 --- a/crypto/sha/sha_locl.h +++ b/crypto/sha/sha_locl.h @@ -200,18 +200,39 @@ void HASH_INIT (SHA_CTX *c) (f)=xa+(e)+K_60_79+ROTATE((a),5)+F_60_79((b),(c),(d)); \ (b)=ROTATE((b),30); +#ifdef X +#undef X +#endif +#ifndef MD32_XARRAY + /* + * Originally X was an array. As it's automatic it's natural + * to expect RISC compiler to accomodate at least part of it in + * the register bank, isn't it? Unfortunately not all compilers + * "find" this expectation reasonable:-( On order to make such + * compilers generate better code I replace X[] with a bunch of + * X0, X1, etc. See the function body below... + * + */ +# define X(i) XX##i +#else + /* + * However! Some compilers (most notably HP C) get overwhelmed by + * that many local variables so that we have to have the way to + * fall down to the original behavior. + */ +# define X(i) XX[i] +#endif + #ifndef DONT_IMPLEMENT_BLOCK_HOST_ORDER void HASH_BLOCK_HOST_ORDER (SHA_CTX *c, const void *d, int num) { const SHA_LONG *W=d; register unsigned long A,B,C,D,E,T; -#ifdef SHA_XARRAY - SHA_LONG X[16]; -# define X(i) X[(i)] +#ifndef MD32_XARRAY + unsigned long XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7, + XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15; #else - unsigned long X0, X1, X2, X3, X4, X5, X6, X7, - X8, X9,X10,X11,X12,X13,X14,X15; -# define X(i) X##i + SHA_LONG XX[16]; #endif A=c->h0; @@ -332,13 +353,11 @@ void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, int num) { const unsigned char *data=p; register unsigned long A,B,C,D,E,T,l; -#ifdef SHA_XARRAY - SHA_LONG X[16]; -# define X(i) X[(i)] +#ifndef MD32_XARRAY + unsigned long XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7, + XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15; #else - unsigned long X0, X1, X2, X3, X4, X5, X6, X7, - X8, X9,X10,X11,X12,X13,X14,X15; -# define X(i) X##i + SHA_LONG XX[16]; #endif A=c->h0; -- 2.25.1