# *-generic* is endian-neutral target, but ./config is free to
# throw in -D[BL]_ENDIAN, whichever appropriate...
"linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc32.o linix_ppc32-mont.o:::::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc32.o linix_ppc32-mont.o:::::sha1-ppc_linux32.o::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
#### IA-32 targets...
"linux-ia32-icc", "icc:-DL_ENDIAN -DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-elf", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
####
"linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
# -bpowerpc64-linux is transient option, -m64 should be the one to use...
-"linux-ppc64", "gcc:-bpowerpc64-linux -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc64.o linux_ppc64-mont.o:::::::::::dlfcn:linux-shared:-fPIC:-bpowerpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"linux-ppc64", "gcc:-bpowerpc64-linux -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc64.o linux_ppc64-mont.o:::::sha1-ppc_linux64.o::::::dlfcn:linux-shared:-fPIC:-bpowerpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
#### IBM's AIX.
"aix3-cc", "cc:-O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::",
-"aix-gcc", "gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::::::::dlfcn:",
-"aix64-gcc","gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::::::::dlfcn::::::-X64",
+"aix-gcc", "gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::sha1-ppc_aix32.o::::::dlfcn:",
+"aix64-gcc","gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::sha1-ppc_aix64.o::::::dlfcn::::::-X64",
# Below targets assume AIX 5. Idea is to effectively disregard $OBJECT_MODE
# at build time. $OBJECT_MODE is respected at ./config stage!
-"aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384::-qthreaded:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::::::::dlfcn:aix-shared::-q32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32",
-"aix64-cc", "cc:-q64 -O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::::::::dlfcn:aix-shared::-q64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64",
+"aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384::-qthreaded:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::sha1-ppc_aix32.o::::::dlfcn:aix-shared::-q32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32",
+"aix64-cc", "cc:-q64 -O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::sha1-ppc_aix64.o::::::dlfcn:aix-shared::-q64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64",
#
# Cray T90 and similar (SDSC)
##### MacOS X (a.k.a. Rhapsody or Darwin) setup
"rhapsody-ppc-cc","cc:-O3 -DB_ENDIAN::(unknown):MACOSX_RHAPSODY::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}::",
-"darwin-ppc-cc","cc:-O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::::::::dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
-"darwin64-ppc-cc","cc:-m64 -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc64.o osx_ppc64-mont.o:::::::::::dlfcn:darwin-shared:-fPIC -fno-common:-m64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
+"darwin-ppc-cc","cc:-O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::sha1-ppc_osx32.o::::::dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
+"darwin64-ppc-cc","cc:-m64 -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc64.o osx_ppc64-mont.o:::::sha1-ppc_osx64.o::::::dlfcn:darwin-shared:-fPIC -fno-common:-m64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
"darwin-i386-cc","cc:-O3 -fomit-frame-pointer -DL_ENDIAN::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
-"debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::::::::dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
+"debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::sha1-ppc_osx32.o::::::dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
##### A/UX
"aux3-gcc","gcc:-O2 -DTERMIO::(unknown):AUX:-lbsd:RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:::",
--- /dev/null
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. Rights for redistribution and usage in source and binary
+# forms are granted according to the OpenSSL license.
+# ====================================================================
+
+# I let hardware handle unaligned input, except on page boundaries
+# (see below for details). Otherwise straightforward implementation
+# with X vector in register bank. The module is big-endian [which is
+# not big deal as there're no little-endian targets left around].
+
+# gcc-4.0.0 -m64 -m32
+# --------------------------
+# sha1 +76% +59%
+
+$output = shift;
+
+if ($output =~ /64\.s/) {
+ $SIZE_T =8;
+ $RZONE =288;
+ $UCMP ="cmpld";
+ $STU ="stdu";
+ $POP ="ld";
+ $PUSH ="std";
+} elsif ($output =~ /32\.s/) {
+ $SIZE_T =4;
+ $RZONE =224;
+ $UCMP ="cmplw";
+ $STU ="stwu";
+ $POP ="lwz";
+ $PUSH ="stw";
+} else { die "nonsense $output"; }
+
+( defined shift || open STDOUT,"| $^X ../perlasm/ppc-xlate.pl $output" ) ||
+ die "can't call ../perlasm/ppc-xlate.pl: $!";
+
+$FRAME=24*$SIZE_T;
+
+$K ="r0";
+$sp ="r1";
+$toc="r2";
+$ctx="r3";
+$inp="r4";
+$num="r5";
+$t0 ="r15";
+$t1 ="r6";
+
+$A ="r7";
+$B ="r8";
+$C ="r9";
+$D ="r10";
+$E ="r11";
+$T ="r12";
+
+@V=($A,$B,$C,$D,$E,$T);
+@X=("r16","r17","r18","r19","r20","r21","r22","r23",
+ "r24","r25","r26","r27","r28","r29","r30","r31");
+
+sub BODY_00_19 {
+my ($i,$a,$b,$c,$d,$e,$f)=@_;
+my $j=$i+1;
+$code.=<<___ if ($i==0);
+ lwz @X[$i],$i*4($inp)
+___
+$code.=<<___ if ($i<15);
+ lwz @X[$j],$j*4($inp)
+ add $f,$K,$e
+ rotlwi $e,$a,5
+ add $f,$f,@X[$i]
+ and $t0,$c,$b
+ add $f,$f,$e
+ andc $t1,$d,$b
+ rotlwi $b,$b,30
+ or $t0,$t0,$t1
+ add $f,$f,$t0
+___
+$code.=<<___ if ($i>=15);
+ add $f,$K,$e
+ rotlwi $e,$a,5
+ xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
+ add $f,$f,@X[$i%16]
+ and $t0,$c,$b
+ xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
+ add $f,$f,$e
+ andc $t1,$d,$b
+ rotlwi $b,$b,30
+ or $t0,$t0,$t1
+ xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
+ add $f,$f,$t0
+ rotlwi @X[$j%16],@X[$j%16],1
+___
+}
+
+sub BODY_20_39 {
+my ($i,$a,$b,$c,$d,$e,$f)=@_;
+my $j=$i+1;
+$code.=<<___ if ($i<79);
+ add $f,$K,$e
+ rotlwi $e,$a,5
+ xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
+ add $f,$f,@X[$i%16]
+ xor $t0,$b,$c
+ xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
+ add $f,$f,$e
+ rotlwi $b,$b,30
+ xor $t0,$t0,$d
+ xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
+ add $f,$f,$t0
+ rotlwi @X[$j%16],@X[$j%16],1
+___
+$code.=<<___ if ($i==79);
+ add $f,$K,$e
+ rotlwi $e,$a,5
+ lwz r16,0($ctx)
+ add $f,$f,@X[$i%16]
+ xor $t0,$b,$c
+ lwz r17,4($ctx)
+ add $f,$f,$e
+ rotlwi $b,$b,30
+ lwz r18,8($ctx)
+ xor $t0,$t0,$d
+ lwz r19,12($ctx)
+ add $f,$f,$t0
+ lwz r20,16($ctx)
+___
+}
+
+sub BODY_40_59 {
+my ($i,$a,$b,$c,$d,$e,$f)=@_;
+my $j=$i+1;
+$code.=<<___;
+ add $f,$K,$e
+ rotlwi $e,$a,5
+ xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
+ add $f,$f,@X[$i%16]
+ and $t0,$b,$c
+ xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
+ add $f,$f,$e
+ or $t1,$b,$c
+ rotlwi $b,$b,30
+ xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
+ and $t1,$t1,$d
+ or $t0,$t0,$t1
+ rotlwi @X[$j%16],@X[$j%16],1
+ add $f,$f,$t0
+___
+}
+
+$code=<<___;
+.text
+
+.globl .sha1_block_asm_data_order
+.align 4
+.sha1_block_asm_data_order:
+ mflr r0
+ $STU $sp,`-($FRAME+64+$RZONE)`($sp)
+ $PUSH r0,`$FRAME-$SIZE_T*18`($sp)
+ $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
+ $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
+ $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
+ $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
+ $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
+ $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
+ $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
+ $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
+ $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
+ $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
+ $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
+ $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
+ $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
+ $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
+ $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
+ $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
+ $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
+ lwz $A,0($ctx)
+ lwz $B,4($ctx)
+ lwz $C,8($ctx)
+ lwz $D,12($ctx)
+ lwz $E,16($ctx)
+ andi. r0,$inp,3
+ bne Lunaligned
+Laligned:
+ mtctr $num
+ bl Lsha1_block_private
+Ldone:
+ $POP r0,`$FRAME-$SIZE_T*18`($sp)
+ $POP r15,`$FRAME-$SIZE_T*17`($sp)
+ $POP r16,`$FRAME-$SIZE_T*16`($sp)
+ $POP r17,`$FRAME-$SIZE_T*15`($sp)
+ $POP r18,`$FRAME-$SIZE_T*14`($sp)
+ $POP r19,`$FRAME-$SIZE_T*13`($sp)
+ $POP r20,`$FRAME-$SIZE_T*12`($sp)
+ $POP r21,`$FRAME-$SIZE_T*11`($sp)
+ $POP r22,`$FRAME-$SIZE_T*10`($sp)
+ $POP r23,`$FRAME-$SIZE_T*9`($sp)
+ $POP r24,`$FRAME-$SIZE_T*8`($sp)
+ $POP r25,`$FRAME-$SIZE_T*7`($sp)
+ $POP r26,`$FRAME-$SIZE_T*6`($sp)
+ $POP r27,`$FRAME-$SIZE_T*5`($sp)
+ $POP r28,`$FRAME-$SIZE_T*4`($sp)
+ $POP r29,`$FRAME-$SIZE_T*3`($sp)
+ $POP r30,`$FRAME-$SIZE_T*2`($sp)
+ $POP r31,`$FRAME-$SIZE_T*1`($sp)
+ mtlr r0
+ addi $sp,$sp,`$FRAME+64+$RZONE`
+ blr
+___
+
+# PowerPC specification allows an implementation to be ill-behaved
+# upon unaligned access which crosses page boundary. "Better safe
+# than sorry" principle makes me treat it specially. But I don't
+# look for particular offending word, but rather for 64-byte input
+# block which crosses the boundary. Once found that block is aligned
+# and hashed separately...
+$code.=<<___;
+.align 4
+Lunaligned:
+ li $t1,4096
+ subf $t1,$inp,$t1
+ andi. $t1,$t1,4095 ; distance to closest page boundary
+ srwi. $t1,$t1,6 ; t1/=64
+ beq Lcross_page
+ $UCMP $num,$t1
+ ble- Laligned ; didn't cross the page boundary
+ mtctr $t1
+ subf $num,$t1,$num
+ bl Lsha1_block_private
+Lcross_page:
+ li $t1,16
+ mtctr $t1
+ addi r20,$sp,$FRAME ; spot below the frame
+Lmemcpy:
+ lbz r16,0($inp)
+ lbz r17,1($inp)
+ lbz r18,2($inp)
+ lbz r19,3($inp)
+ addi $inp,$inp,4
+ stb r16,0(r20)
+ stb r17,1(r20)
+ stb r18,2(r20)
+ stb r19,3(r20)
+ addi r20,r20,4
+ bdnz Lmemcpy
+
+ $PUSH $inp,`$FRAME-$SIZE_T*19`($sp)
+ li $t1,1
+ addi $inp,$sp,$FRAME
+ mtctr $t1
+ bl Lsha1_block_private
+ $POP $inp,`$FRAME-$SIZE_T*19`($sp)
+ addic. $num,$num,-1
+ bne- Lunaligned
+ b Ldone
+___
+
+# This is private block function, which uses tailored calling
+# interface, namely upon entry SHA_CTX is pre-loaded to given
+# registers and counter register contains amount of chunks to
+# digest...
+$code.=<<___;
+.align 4
+Lsha1_block_private:
+___
+$code.=<<___; # load K_00_19
+ lis $K,0x5a82
+ ori $K,$K,0x7999
+___
+for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___; # load K_20_39
+ lis $K,0x6ed9
+ ori $K,$K,0xeba1
+___
+for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___; # load K_40_59
+ lis $K,0x8f1b
+ ori $K,$K,0xbcdc
+___
+for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___; # load K_60_79
+ lis $K,0xca62
+ ori $K,$K,0xc1d6
+___
+for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+ add r16,r16,$E
+ add r17,r17,$T
+ add r18,r18,$A
+ add r19,r19,$B
+ add r20,r20,$C
+ stw r16,0($ctx)
+ mr $A,r16
+ stw r17,4($ctx)
+ mr $B,r17
+ stw r18,8($ctx)
+ mr $C,r18
+ stw r19,12($ctx)
+ mr $D,r19
+ stw r20,16($ctx)
+ mr $E,r20
+ addi $inp,$inp,`16*4`
+ bdnz- Lsha1_block_private
+ blr
+___
+
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+print $code;
+close STDOUT;