Changes between 0.9.8a and 0.9.9 [xx XXX xxxx]
+ *) New candidate for BIGNUM assembler implementation, bn_mul_mont,
+ dedicated Montgomery multiplication procedure, is introduced.
+ BN_MONT_CTX is modified to allow bn_mul_mont to reach for higher
+ "64-bit" performance on certain 32-bit targets.
+ [Andy Polyakov]
+
*) New option SSL_OP_NO_COMP to disable use of compression selectively
in SSL structures. New SSL ctrl to set maximum send fragment size.
Save memory by seeting the I/O buffer sizes dynamically instead of
$ap="%i1"; # const BN_ULONG *ap,
$bp="%i2"; # const BN_ULONG *bp,
$np="%i3"; # const BN_ULONG *np,
-$n0="%i4"; # BN_ULONG n0,
+$n0="%i4"; # const BN_ULONG *n0,
$num="%i5"; # int num);
$tp="%l0";
sethi %hi(0xffff),$mask
sll $num,3,$num ! num*=8
or $mask,%lo(0xffff),$mask
- mov %i4,$n0 ! reassigned, remember?
+ ldx [%i4],$n0 ! reassigned, remember?
add %sp,$bias,%o0 ! real top of stack
sll $num,2,%o1
&mov ("ebx",&wparam(1)); # const BN_ULONG *ap
&mov ("ecx",&wparam(2)); # const BN_ULONG *bp
&mov ("edx",&wparam(3)); # const BN_ULONG *np
- &mov ("esi",&wparam(4)); # BN_ULONG n0
+ &mov ("esi",&wparam(4)); # const BN_ULONG *n0
&mov ($num,&wparam(5)); # int num
&mov ("edi","esp"); # saved stack pointer!
&sub ($num,1); # num is restored to its original value
# and will remain constant from now...
+ &mov ("esi",&DWP(0,"esi")); # pull n0[0]
&mov ($_rp,"eax"); # ... save a copy of argument block
&mov ($_ap,"ebx");
&mov ($_bp,"ecx");
$ap="%rsi"; # const BN_ULONG *ap,
$bp="%rdx"; # const BN_ULONG *bp,
$np="%rcx"; # const BN_ULONG *np,
-$n0="%r8"; # BN_ULONG n0,
+$n0="%r8"; # const BN_ULONG *n0,
$num="%r9"; # int num);
$lo0="%r10";
$hi0="%r11";
mov %rbp,8(%rsp,$num,8) # tp[num+1]=%rsp
mov %rdx,$bp # $bp reassigned, remember?
+ mov ($n0),$n0 # pull n0[0] value
+
xor $i,$i # i=0
xor $j,$j # j=0
BIGNUM N; /* The modulus */
BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1
* (Ni is only stored for bignum algorithm) */
- BN_ULONG n0; /* least significant word of Ni */
+ BN_ULONG n0[2];/* least significant word(s) of Ni */
int flags;
};
bn_pollute(a); \
}
-int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num);
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num);
* versions. Assembler vs. assembler improvement coefficients can
* [and are known to] differ and are to be documented elsewhere.
*/
-int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num)
{
- BN_ULONG c0,c1,ml,*tp;
+ BN_ULONG c0,c1,ml,*tp,n0;
#ifdef mul64
BN_ULONG mh;
#endif
int i=0,j;
#if 0 /* template for platform-specific implementation */
- if (ap==bp) return bn_sqr_mont(rp,ap,np,n0,num);
+ if (ap==bp) return bn_sqr_mont(rp,ap,np,n0p,num);
#endif
vp = tp = alloca((num+2)*sizeof(BN_ULONG));
+ n0 = *n0p;
+
tp[num] = bn_mul_words(tp,ap,num,bp[0]);
tp[num+1] = 0;
goto enter;
#endif
r->top=max;
- n0=mont->n0;
+ n0=mont->n0[0];
#ifdef BN_COUNT
fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl);
BIGNUM tmod;
BN_ULONG buf[2];
+ tmod.d=buf;
+ tmod.dmax=2;
+ tmod.neg=0;
+
mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
+
+#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
+ BN_zero(R);
+ if (!(BN_set_bit(R,2*BN_BITS2))) goto err;
+
+ tmod.top=0;
+ if (buf[0] = mod->d[0]) tmod.top=1;
+ if (buf[1] = mod->top>1 ? mod->d[1] : 0) tmod.top=2;
+
+ if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
+ goto err;
+ if (!BN_lshift(Ri,Ri,2*BN_BITS2)) goto err; /* R*Ri */
+ if (!BN_is_zero(Ri))
+ {
+ if (!BN_sub_word(Ri,1)) goto err;
+ }
+ else /* if N mod word size == 1 */
+ {
+ if (bn_expand(Ri,(int)sizeof(BN_ULONG)*2) == NULL)
+ goto err;
+ /* Ri-- (mod double word size) */
+ Ri->neg=0;
+ Ri->d[0]=BN_MASK2;
+ Ri->d[1]=BN_MASK2;
+ Ri->top=2;
+ }
+ if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
+ /* Ni = (R*Ri-1)/N,
+ * keep only couple of least significant words: */
+ mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
+ mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
+#else
BN_zero(R);
if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */
buf[0]=mod->d[0]; /* tmod = N mod word size */
buf[1]=0;
- tmod.d=buf;
tmod.top = buf[0] != 0 ? 1 : 0;
- tmod.dmax=2;
- tmod.neg=0;
/* Ri = R^-1 mod N*/
if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
goto err;
if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
/* Ni = (R*Ri-1)/N,
* keep only least significant word: */
- mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0;
+ mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
+ mont->n0[1] = 0;
+#endif
}
#else /* !MONT_WORD */
{ /* bignum version */
if (!BN_copy(&(to->N),&(from->N))) return NULL;
if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL;
to->ri=from->ri;
- to->n0=from->n0;
+ to->n0[0]=from->n0[0];
+ to->n0[1]=from->n0[1];
return(to);
}