X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=crypto%2Fec%2Fasm%2Fecp_nistz256-x86.pl;h=edc1d5a3a9f2003aea3603d9847f13cde87b4864;hb=32be631ca1f2b73c92e4f7f5d23f1c3aee80ec69;hp=b96b1aa0171ca5e6405ff2a3f4445e3aa0b5313a;hpb=dfde4219fdebbb5a8a17602fea036f7690e517ea;p=oweals%2Fopenssl.git diff --git a/crypto/ec/asm/ecp_nistz256-x86.pl b/crypto/ec/asm/ecp_nistz256-x86.pl index b96b1aa017..edc1d5a3a9 100755 --- a/crypto/ec/asm/ecp_nistz256-x86.pl +++ b/crypto/ec/asm/ecp_nistz256-x86.pl @@ -1,7 +1,7 @@ #! /usr/bin/env perl -# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2015-2018 The OpenSSL Project Authors. All Rights Reserved. # -# Licensed under the OpenSSL license (the "License"). You may not use +# Licensed under the Apache License 2.0 (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy # in the file LICENSE in the source distribution or at # https://www.openssl.org/source/license.html @@ -42,10 +42,9 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; -$output=pop; -open STDOUT,">$output"; +$output=pop and open STDOUT,">$output"; -&asm_init($ARGV[0],"ecp_nistz256-x86.pl",$ARGV[$#ARGV] eq "386"); +&asm_init($ARGV[0],$ARGV[$#ARGV] eq "386"); $sse2=0; for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } @@ -443,7 +442,7 @@ for(1..37) { &mov (&DWP(20,"esp"),"eax"); &mov (&DWP(24,"esp"),"eax"); &mov (&DWP(28,"esp"),"eax"); - + &call ("_ecp_nistz256_sub"); &stack_pop(8); @@ -1179,7 +1178,7 @@ for ($i=0;$i<7;$i++) { &mov ("esi",&wparam(1)); &mov ("ebp",&wparam(2)); - &lea ("edi",&DWP(-1,"edi","ebp")); + &lea ("edi",&DWP(0,"edi","ebp")); &mov ("ebp",64/4); &set_label("scatter_w7_loop"); &mov ("eax",&DWP(0,"esi")); @@ -1388,7 +1387,7 @@ for ($i=0;$i<7;$i++) { # above map() describes stack layout with 18 temporary # 256-bit vectors on top, then we take extra words for - # !in1infty, !in2infty, result of check for zero and + # ~in1infty, ~in2infty, result of check for zero and # OPENSSL_ia32cap_P copy. [one unused word for padding] &stack_push(8*18+5); if ($sse2) { @@ -1405,21 +1404,21 @@ for ($i=0;$i<7;$i++) { &mov ("edx",&DWP($i+12,"esi")); &mov (&DWP($i+0,"edi"),"eax"); &mov (&DWP(32*18+12,"esp"),"ebp") if ($i==0); - &mov ("ebp","eax") if ($i==0); - &or ("ebp","eax") if ($i!=0 && $i<64); + &mov ("ebp","eax") if ($i==64); + &or ("ebp","eax") if ($i>64); &mov (&DWP($i+4,"edi"),"ebx"); - &or ("ebp","ebx") if ($i<64); + &or ("ebp","ebx") if ($i>=64); &mov (&DWP($i+8,"edi"),"ecx"); - &or ("ebp","ecx") if ($i<64); + &or ("ebp","ecx") if ($i>=64); &mov (&DWP($i+12,"edi"),"edx"); - &or ("ebp","edx") if ($i<64); + &or ("ebp","edx") if ($i>=64); } &xor ("eax","eax"); &mov ("esi",&wparam(1)); &sub ("eax","ebp"); &or ("ebp","eax"); &sar ("ebp",31); - &mov (&DWP(32*18+4,"esp"),"ebp"); # !in2infty + &mov (&DWP(32*18+4,"esp"),"ebp"); # ~in2infty &lea ("edi",&DWP($in1_x,"esp")); for($i=0;$i<96;$i+=16) { @@ -1428,20 +1427,20 @@ for ($i=0;$i<7;$i++) { &mov ("ecx",&DWP($i+8,"esi")); &mov ("edx",&DWP($i+12,"esi")); &mov (&DWP($i+0,"edi"),"eax"); - &mov ("ebp","eax") if ($i==0); - &or ("ebp","eax") if ($i!=0 && $i<64); + &mov ("ebp","eax") if ($i==64); + &or ("ebp","eax") if ($i>64); &mov (&DWP($i+4,"edi"),"ebx"); - &or ("ebp","ebx") if ($i<64); + &or ("ebp","ebx") if ($i>=64); &mov (&DWP($i+8,"edi"),"ecx"); - &or ("ebp","ecx") if ($i<64); + &or ("ebp","ecx") if ($i>=64); &mov (&DWP($i+12,"edi"),"edx"); - &or ("ebp","edx") if ($i<64); + &or ("ebp","edx") if ($i>=64); } &xor ("eax","eax"); &sub ("eax","ebp"); &or ("ebp","eax"); &sar ("ebp",31); - &mov (&DWP(32*18+0,"esp"),"ebp"); # !in1infty + &mov (&DWP(32*18+0,"esp"),"ebp"); # ~in1infty &mov ("eax",&DWP(32*18+12,"esp")); # OPENSSL_ia32cap_P copy &lea ("esi",&DWP($in2_z,"esp")); @@ -1516,23 +1515,19 @@ for ($i=0;$i<7;$i++) { &or ("eax",&DWP(0,"edi")); &or ("eax",&DWP(4,"edi")); &or ("eax",&DWP(8,"edi")); - &or ("eax",&DWP(12,"edi")); - - &data_byte(0x3e); # predict taken - &jnz (&label("add_proceed")); # is_equal(U1,U2)? + &or ("eax",&DWP(12,"edi")); # ~is_equal(U1,U2) - &mov ("eax",&DWP(32*18+0,"esp")); - &and ("eax",&DWP(32*18+4,"esp")); - &mov ("ebx",&DWP(32*18+8,"esp")); - &jz (&label("add_proceed")); # (in1infty || in2infty)? - &test ("ebx","ebx"); - &jz (&label("add_double")); # is_equal(S1,S2)? + &mov ("ebx",&DWP(32*18+0,"esp")); # ~in1infty + ¬ ("ebx"); # -1/0 -> 0/-1 + &or ("eax","ebx"); + &mov ("ebx",&DWP(32*18+4,"esp")); # ~in2infty + ¬ ("ebx"); # -1/0 -> 0/-1 + &or ("eax","ebx"); + &or ("eax",&DWP(32*18+8,"esp")); # ~is_equal(S1,S2) - &mov ("edi",&wparam(0)); - &xor ("eax","eax"); - &mov ("ecx",96/4); - &data_byte(0xfc,0xf3,0xab); # cld; stosd - &jmp (&label("add_done")); + # if (~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2)) + &data_byte(0x3e); # predict taken + &jnz (&label("add_proceed")); &set_label("add_double",16); &mov ("esi",&wparam(1)); @@ -1614,34 +1609,34 @@ for ($i=0;$i<7;$i++) { &lea ("edi",&DWP($res_y,"esp")); &call ("_ecp_nistz256_sub"); # p256_sub(res_y, res_y, S2); - &mov ("ebp",&DWP(32*18+0,"esp")); # !in1infty - &mov ("esi",&DWP(32*18+4,"esp")); # !in2infty + &mov ("ebp",&DWP(32*18+0,"esp")); # ~in1infty + &mov ("esi",&DWP(32*18+4,"esp")); # ~in2infty &mov ("edi",&wparam(0)); &mov ("edx","ebp"); ¬ ("ebp"); - &and ("edx","esi"); - &and ("ebp","esi"); - ¬ ("esi"); + &and ("edx","esi"); # ~in1infty & ~in2infty + &and ("ebp","esi"); # in1infty & ~in2infty + ¬ ("esi"); # in2infty ######################################## # conditional moves for($i=64;$i<96;$i+=4) { - &mov ("eax","edx"); + &mov ("eax","edx"); # ~in1infty & ~in2infty &and ("eax",&DWP($res_x+$i,"esp")); - &mov ("ebx","ebp"); + &mov ("ebx","ebp"); # in1infty & ~in2infty &and ("ebx",&DWP($in2_x+$i,"esp")); - &mov ("ecx","esi"); + &mov ("ecx","esi"); # in2infty &and ("ecx",&DWP($in1_x+$i,"esp")); &or ("eax","ebx"); &or ("eax","ecx"); &mov (&DWP($i,"edi"),"eax"); } for($i=0;$i<64;$i+=4) { - &mov ("eax","edx"); + &mov ("eax","edx"); # ~in1infty & ~in2infty &and ("eax",&DWP($res_x+$i,"esp")); - &mov ("ebx","ebp"); + &mov ("ebx","ebp"); # in1infty & ~in2infty &and ("ebx",&DWP($in2_x+$i,"esp")); - &mov ("ecx","esi"); + &mov ("ecx","esi"); # in2infty &and ("ecx",&DWP($in1_x+$i,"esp")); &or ("eax","ebx"); &or ("eax","ecx"); @@ -1668,7 +1663,7 @@ for ($i=0;$i<7;$i++) { # above map() describes stack layout with 15 temporary # 256-bit vectors on top, then we take extra words for - # !in1infty, !in2infty, and OPENSSL_ia32cap_P copy. + # ~in1infty, ~in2infty, and OPENSSL_ia32cap_P copy. &stack_push(8*15+3); if ($sse2) { &call ("_picup_eax"); @@ -1684,21 +1679,21 @@ for ($i=0;$i<7;$i++) { &mov ("edx",&DWP($i+12,"esi")); &mov (&DWP($i+0,"edi"),"eax"); &mov (&DWP(32*15+8,"esp"),"ebp") if ($i==0); - &mov ("ebp","eax") if ($i==0); - &or ("ebp","eax") if ($i!=0 && $i<64); + &mov ("ebp","eax") if ($i==64); + &or ("ebp","eax") if ($i>64); &mov (&DWP($i+4,"edi"),"ebx"); - &or ("ebp","ebx") if ($i<64); + &or ("ebp","ebx") if ($i>=64); &mov (&DWP($i+8,"edi"),"ecx"); - &or ("ebp","ecx") if ($i<64); + &or ("ebp","ecx") if ($i>=64); &mov (&DWP($i+12,"edi"),"edx"); - &or ("ebp","edx") if ($i<64); + &or ("ebp","edx") if ($i>=64); } &xor ("eax","eax"); &mov ("esi",&wparam(2)); &sub ("eax","ebp"); &or ("ebp","eax"); &sar ("ebp",31); - &mov (&DWP(32*15+0,"esp"),"ebp"); # !in1infty + &mov (&DWP(32*15+0,"esp"),"ebp"); # ~in1infty &lea ("edi",&DWP($in2_x,"esp")); for($i=0;$i<64;$i+=16) { @@ -1724,7 +1719,7 @@ for ($i=0;$i<7;$i++) { &lea ("ebp",&DWP($in1_z,"esp")); &sar ("ebx",31); &lea ("edi",&DWP($Z1sqr,"esp")); - &mov (&DWP(32*15+4,"esp"),"ebx"); # !in2infty + &mov (&DWP(32*15+4,"esp"),"ebx"); # ~in2infty &call ("_ecp_nistz256_mul_mont"); # p256_sqr_mont(Z1sqr, in1_z); @@ -1823,14 +1818,14 @@ for ($i=0;$i<7;$i++) { &lea ("edi",&DWP($res_y,"esp")); &call ("_ecp_nistz256_sub"); # p256_sub(res_y, res_y, S2); - &mov ("ebp",&DWP(32*15+0,"esp")); # !in1infty - &mov ("esi",&DWP(32*15+4,"esp")); # !in2infty + &mov ("ebp",&DWP(32*15+0,"esp")); # ~in1infty + &mov ("esi",&DWP(32*15+4,"esp")); # ~in2infty &mov ("edi",&wparam(0)); &mov ("edx","ebp"); ¬ ("ebp"); - &and ("edx","esi"); - &and ("ebp","esi"); - ¬ ("esi"); + &and ("edx","esi"); # ~in1infty & ~in2infty + &and ("ebp","esi"); # in1infty & ~in2infty + ¬ ("esi"); # in2infty ######################################## # conditional moves @@ -1848,11 +1843,11 @@ for ($i=0;$i<7;$i++) { &mov (&DWP($i,"edi"),"eax"); } for($i=0;$i<64;$i+=4) { - &mov ("eax","edx"); + &mov ("eax","edx"); # ~in1infty & ~in2infty &and ("eax",&DWP($res_x+$i,"esp")); - &mov ("ebx","ebp"); + &mov ("ebx","ebp"); # in1infty & ~in2infty &and ("ebx",&DWP($in2_x+$i,"esp")); - &mov ("ecx","esi"); + &mov ("ecx","esi"); # in2infty &and ("ecx",&DWP($in1_x+$i,"esp")); &or ("eax","ebx"); &or ("eax","ecx"); @@ -1863,4 +1858,4 @@ for ($i=0;$i<7;$i++) { &asm_finish(); -close STDOUT; +close STDOUT or die "error closing STDOUT";