if (mtop > sizeof(storage) / sizeof(storage[0])
&& (tp = OPENSSL_malloc(mtop * sizeof(BN_ULONG))) == NULL)
- return 0;
+ return 0;
ap = a->d != NULL ? a->d : tp;
bp = b->d != NULL ? b->d : tp;
((volatile BN_ULONG *)tp)[i] = 0;
}
r->top = mtop;
+ r->flags |= BN_FLG_FIXED_TOP;
r->neg = 0;
if (tp != storage)
return BN_nnmod(r, r, m, ctx);
}
+/*
+ * BN_mod_sub variant that may be used if both a and b are non-negative,
+ * a is less than m, while b is of same bit width as m. It's implemented
+ * as subtraction followed by two conditional additions.
+ *
+ * 0 <= a < m
+ * 0 <= b < 2^w < 2*m
+ *
+ * after subtraction
+ *
+ * -2*m < r = a - b < m
+ *
+ * Thus it takes up to two conditional additions to make |r| positive.
+ */
+int bn_mod_sub_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+ const BIGNUM *m)
+{
+ size_t i, ai, bi, mtop = m->top;
+ BN_ULONG borrow, carry, ta, tb, mask, *rp;
+ const BN_ULONG *ap, *bp;
+
+ if (bn_wexpand(r, m->top) == NULL)
+ return 0;
+
+ rp = r->d;
+ ap = a->d != NULL ? a->d : rp;
+ bp = b->d != NULL ? b->d : rp;
+
+ for (i = 0, ai = 0, bi = 0, borrow = 0; i < mtop;) {
+ mask = (BN_ULONG)0 - ((i - a->top) >> (8 * sizeof(i) - 1));
+ ta = ap[ai] & mask;
+
+ mask = (BN_ULONG)0 - ((i - b->top) >> (8 * sizeof(i) - 1));
+ tb = bp[bi] & mask;
+ rp[i] = ta - tb - borrow;
+ if (ta != tb)
+ borrow = (ta < tb);
+
+ i++;
+ ai += (i - a->dmax) >> (8 * sizeof(i) - 1);
+ bi += (i - b->dmax) >> (8 * sizeof(i) - 1);
+ }
+ ap = m->d;
+ for (i = 0, mask = 0 - borrow, carry = 0; i < mtop; i++) {
+ ta = ((ap[i] & mask) + carry) & BN_MASK2;
+ carry = (ta < carry);
+ rp[i] = (rp[i] + ta) & BN_MASK2;
+ carry += (rp[i] < ta);
+ }
+ borrow -= carry;
+ for (i = 0, mask = 0 - borrow, carry = 0; i < mtop; i++) {
+ ta = ((ap[i] & mask) + carry) & BN_MASK2;
+ carry = (ta < carry);
+ rp[i] = (rp[i] + ta) & BN_MASK2;
+ carry += (rp[i] < ta);
+ }
+
+ r->top = mtop;
+ r->flags |= BN_FLG_FIXED_TOP;
+ r->neg = 0;
+
+ return 1;
+}
+
/*
* BN_mod_sub variant that may be used if both a and b are non-negative and
* less than m
bn_check_top(tmp);
if (a == b) {
- if (!BN_sqr(tmp, a, ctx))
+ if (!bn_sqr_fixed_top(tmp, a, ctx))
goto err;
} else {
- if (!BN_mul(tmp, a, b, ctx))
+ if (!bn_mul_fixed_top(tmp, a, b, ctx))
goto err;
}
/* reduce from aRR to aR */
BIGNUM *n;
BN_ULONG *ap, *np, *rp, n0, v, carry;
int nl, max, i;
+ unsigned int rtop;
n = &(mont->N);
nl = n->top;
rp = r->d;
/* clear the top words of T */
-# if 1
- for (i = r->top; i < max; i++) /* memset? XXX */
- rp[i] = 0;
-# else
- memset(&(rp[r->top]), 0, (max - r->top) * sizeof(BN_ULONG));
-# endif
+ for (rtop = r->top, i = 0; i < max; i++) {
+ v = (BN_ULONG)0 - ((i - rtop) >> (8 * sizeof(rtop) - 1));
+ rp[i] &= v;
+ }
r->top = max;
r->flags |= BN_FLG_FIXED_TOP;
int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
BN_CTX *ctx)
+{
+ int retn;
+
+ retn = bn_from_mont_fixed_top(ret, a, mont, ctx);
+ bn_correct_top(ret);
+ bn_check_top(ret);
+
+ return retn;
+}
+
+int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
+ BN_CTX *ctx)
{
int retn = 0;
#ifdef MONT_WORD
BN_CTX_start(ctx);
if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) {
retn = bn_from_montgomery_word(ret, t, mont);
- bn_correct_top(ret);
- bn_check_top(ret);
}
BN_CTX_end(ctx);
#else /* !MONT_WORD */
#endif /* BN_RECURSION */
int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
+{
+ int ret = bn_mul_fixed_top(r, a, b, ctx);
+
+ bn_correct_top(r);
+ bn_check_top(r);
+
+ return ret;
+}
+
+int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
{
int ret = 0;
int top, al, bl;
#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
end:
#endif
- bn_correct_top(rr);
+ rr->flags |= BN_FLG_FIXED_TOP;
if (r != rr && BN_copy(r, rr) == NULL)
goto err;
* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96
*/
int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
+{
+ int ret = bn_sqr_fixed_top(r, a, ctx);
+
+ bn_correct_top(r);
+ bn_check_top(r);
+
+ return ret;
+}
+
+int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
{
int max, al;
int ret = 0;
rr->neg = 0;
rr->top = max;
- bn_correct_top(rr);
+ rr->flags |= BN_FLG_FIXED_TOP;
if (r != rr && BN_copy(r, rr) == NULL)
goto err;
*/
int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
BN_MONT_CTX *mont, BN_CTX *ctx);
+int bn_from_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont,
+ BN_CTX *ctx);
int bn_to_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont,
BN_CTX *ctx);
int bn_mod_add_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
const BIGNUM *m);
+int bn_mod_sub_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+ const BIGNUM *m);
+int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx);
int bn_bn2binpad(const BIGNUM *a, unsigned char *to, int tolen);