my $bits1="THIRTY_TWO_BIT ";
my $bits2="SIXTY_FOUR_BIT ";
-my $x86_elf_asm="x86cpuid-elf.o:bn86-elf.o co86-elf.o mo86-elf.o:dx86-elf.o yx86-elf.o:ax86-elf.o:bx86-elf.o:mx86-elf.o:sx86-elf.o s512sse2-elf.o:cx86-elf.o:rx86-elf.o:rm86-elf.o:r586-elf.o:wp_block.o w86mmx-elf.o:";
-my $x86_coff_asm="x86cpuid-cof.o:bn86-cof.o co86-cof.o mo86-cof.o:dx86-cof.o yx86-cof.o:ax86-cof.o:bx86-cof.o:mx86-cof.o:sx86-cof.o s512sse2-cof.o:cx86-cof.o:rx86-cof.o:rm86-cof.o:r586-cof.o:wp_block.o w86mmx-cof.o:";
-my $x86_out_asm="x86cpuid-out.o:bn86-out.o co86-out.o mo86-out.o:dx86-out.o yx86-out.o:ax86-out.o:bx86-out.o:mx86-out.o:sx86-out.o s512sse2-out.o:cx86-out.o:rx86-out.o:rm86-out.o:r586-out.o:wp_block.o w86mmx-out.o:";
+my $x86_elf_asm="x86cpuid-elf.o:bn86-elf.o co86-elf.o mo86-elf.o:dx86-elf.o yx86-elf.o:ax86-elf.o:bx86-elf.o:mx86-elf.o:sx86-elf.o sha256x86-elf.o sha512x86-elf.o:cx86-elf.o:rx86-elf.o:rm86-elf.o:r586-elf.o:wp_block.o w86mmx-elf.o:";
+my $x86_coff_asm="x86cpuid-cof.o:bn86-cof.o co86-cof.o mo86-cof.o:dx86-cof.o yx86-cof.o:ax86-cof.o:bx86-cof.o:mx86-cof.o:sx86-cof.o sha256x86-cof.o sha512x86-cof.o:cx86-cof.o:rx86-cof.o:rm86-cof.o:r586-cof.o:wp_block.o w86mmx-cof.o:";
+my $x86_out_asm="x86cpuid-out.o:bn86-out.o co86-out.o mo86-out.o:dx86-out.o yx86-out.o:ax86-out.o:bx86-out.o:mx86-out.o:sx86-out.o sha256x86-out.o sha512x86-out.o:cx86-out.o:rx86-out.o:rm86-out.o:r586-out.o:wp_block.o w86mmx-out.o:";
my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:";
my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o::::";
$aes_obj = ax86-out.o
$bf_obj = bx86-out.o
$md5_obj = mx86-out.o
-$sha1_obj = sx86-out.o s512sse2-out.o
+$sha1_obj = sx86-out.o sha256x86-out.o sha512x86-out.o
$cast_obj = cx86-out.o
$rc4_obj = rx86-out.o
$rmd160_obj = rm86-out.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-cof.o
$bf_obj = bx86-cof.o
$md5_obj = mx86-cof.o
-$sha1_obj = sx86-cof.o s512sse2-cof.o
+$sha1_obj = sx86-cof.o sha256x86-cof.o sha512x86-cof.o
$cast_obj = cx86-cof.o
$rc4_obj = rx86-cof.o
$rmd160_obj = rm86-cof.o
$aes_obj = ax86-out.o
$bf_obj = bx86-out.o
$md5_obj = mx86-out.o
-$sha1_obj = sx86-out.o s512sse2-out.o
+$sha1_obj = sx86-out.o sha256x86-out.o sha512x86-out.o
$cast_obj = cx86-out.o
$rc4_obj = rx86-out.o
$rmd160_obj = rm86-out.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-out.o
$bf_obj = bx86-out.o
$md5_obj = mx86-out.o
-$sha1_obj = sx86-out.o s512sse2-out.o
+$sha1_obj = sx86-out.o sha256x86-out.o sha512x86-out.o
$cast_obj = cx86-out.o
$rc4_obj = rx86-out.o
$rmd160_obj = rm86-out.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-cof.o
$bf_obj = bx86-cof.o
$md5_obj = mx86-cof.o
-$sha1_obj = sx86-cof.o s512sse2-cof.o
+$sha1_obj = sx86-cof.o sha256x86-cof.o sha512x86-cof.o
$cast_obj = cx86-cof.o
$rc4_obj = rx86-cof.o
$rmd160_obj = rm86-cof.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
$aes_obj = ax86-elf.o
$bf_obj = bx86-elf.o
$md5_obj = mx86-elf.o
-$sha1_obj = sx86-elf.o s512sse2-elf.o
+$sha1_obj = sx86-elf.o sha256x86-elf.o sha512x86-elf.o
$cast_obj = cx86-elf.o
$rc4_obj = rx86-elf.o
$rmd160_obj = rm86-elf.o
# ELF
sx86-elf.s: asm/sha1-586.pl ../perlasm/x86asm.pl
(cd asm; $(PERL) sha1-586.pl elf $(CFLAGS) $(PROCESSOR) > ../$@)
-s512sse2-elf.s: asm/sha512-sse2.pl ../perlasm/x86asm.pl
- (cd asm; $(PERL) sha512-sse2.pl elf $(CFLAGS) $(PROCESSOR) > ../$@)
+sha256x86-elf.s: asm/sha256-586.pl ../perlasm/x86asm.pl
+ (cd asm; $(PERL) sha256-586.pl elf $(CFLAGS) $(PROCESSOR) > ../$@)
+sha512x86-elf.s: asm/sha512-586.pl ../perlasm/x86asm.pl
+ (cd asm; $(PERL) sha512-586.pl elf $(CFLAGS) $(PROCESSOR) > ../$@)
# COFF
sx86-cof.s: asm/sha1-586.pl ../perlasm/x86asm.pl
(cd asm; $(PERL) sha1-586.pl coff $(CFLAGS) $(PROCESSOR) > ../$@)
-s512sse2-cof.s: asm/sha512-sse2.pl ../perlasm/x86asm.pl
- (cd asm; $(PERL) sha512-sse2.pl coff $(CFLAGS) $(PROCESSOR) > ../$@)
+sha256x86-cof.s: asm/sha256-586.pl ../perlasm/x86asm.pl
+ (cd asm; $(PERL) sha256-586.pl coff $(CFLAGS) $(PROCESSOR) > ../$@)
+sha512x86-cof.s: asm/sha512-586.pl ../perlasm/x86asm.pl
+ (cd asm; $(PERL) sha512-586.pl coff $(CFLAGS) $(PROCESSOR) > ../$@)
# a.out
sx86-out.s: asm/sha1-586.pl ../perlasm/x86asm.pl
(cd asm; $(PERL) sha1-586.pl a.out $(CFLAGS) $(PROCESSOR) > ../$@)
-s512sse2-out.s: asm/sha512-sse2.pl ../perlasm/x86asm.pl
- (cd asm; $(PERL) sha512-sse2.pl a.out $(CFLAGS) $(PROCESSOR) > ../$@)
+sha256x86-out.s: asm/sha256-586.pl ../perlasm/x86asm.pl
+ (cd asm; $(PERL) sha256-586.pl a.out $(CFLAGS) $(PROCESSOR) > ../$@)
+sha512x86-out.s: asm/sha512-586.pl ../perlasm/x86asm.pl
+ (cd asm; $(PERL) sha512-586.pl a.out $(CFLAGS) $(PROCESSOR) > ../$@)
sha1-ia64.s: asm/sha1-ia64.pl
(cd asm; $(PERL) sha1-ia64.pl $(CFLAGS) ) > $@
&asm_init($ARGV[0],"sha512-586.pl",$ARGV[$#ARGV] eq "386");
+$sse2=0;
+for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
+
+&external_label("OPENSSL_ia32cap_P") if ($sse2);
+
$Tlo=&DWP(0,"esp"); $Thi=&DWP(4,"esp");
$Alo=&DWP(8,"esp"); $Ahi=&DWP(8+4,"esp");
$Blo=&DWP(16,"esp"); $Bhi=&DWP(16+4,"esp");
&movq ("mm1",$E); # %mm1 is sliding right
&movq ("mm2",$E); # %mm2 is sliding left
&psrlq ("mm1",14);
- &movq ($Esse2,$E); # module-scheduled save e
+ &movq ($Esse2,$E); # modulo-scheduled save e
&psllq ("mm2",23);
&movq ("mm3","mm1"); # %mm3 is T1
&psrlq ("mm1",4);
&pxor ("mm7","mm6");
&psllq ("mm6",6);
&pxor ("mm7","mm5");
- &movq (&QWP(0,"esp"),$A); # module-scheduled save a
+ &movq (&QWP(0,"esp"),$A); # modulo-scheduled save a
&pxor ("mm7","mm6"); # T2=Sigma0_512(a)
&movq ("mm5",$A); # %mm5=a
&mov (&DWP(8,"esp"),"eax"); # inp+num*128
&mov (&DWP(12,"esp"),"ebx"); # saved sp
- &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("pic_point"));
+if ($sse2) {
+ &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512"));
&bt (&DWP(0,"edx"),26);
&jnc (&label("loop_x86"));
&emms ();
&mov ("esp",&DWP(8*10+12,"esp")); # restore sp
&function_end_A();
-
+}
&set_label("loop_x86",16);
# copy input block to stack reversing byte and qword order
for ($i=0;$i<8;$i++) {