2 * Copyright 2011-2020 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
10 /* Copyright 2011 Google Inc.
12 * Licensed under the Apache License, Version 2.0 (the "License");
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
17 * http://www.apache.org/licenses/LICENSE-2.0
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
27 * ECDSA low level APIs are deprecated for public use, but still ok for
30 #include "internal/deprecated.h"
33 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
35 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
36 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
37 * work which got its smarts from Daniel J. Bernstein's work on the same.
40 #include <openssl/opensslconf.h>
44 #include <openssl/err.h>
47 #if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
48 /* even with gcc, the typedef won't work for 32-bit platforms */
49 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
51 typedef __int128_t int128_t;
53 # error "Your compiler doesn't appear to support 128-bit integer types"
61 * The underlying field. P256 operates over GF(2^256-2^224+2^192+2^96-1). We
62 * can serialise an element of this field into 32 bytes. We call this an
66 typedef u8 felem_bytearray[32];
69 * These are the parameters of P256, taken from FIPS 186-3, page 86. These
70 * values are big-endian.
72 static const felem_bytearray nistp256_curve_params[5] = {
73 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
74 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
75 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
77 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
78 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
79 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc},
81 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, /* b */
82 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
83 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
84 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
85 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
86 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
87 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
88 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
89 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
90 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
91 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
92 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
96 * The representation of field elements.
97 * ------------------------------------
99 * We represent field elements with either four 128-bit values, eight 128-bit
100 * values, or four 64-bit values. The field element represented is:
101 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
103 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
105 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
106 * apart, but are 128-bits wide, the most significant bits of each limb overlap
107 * with the least significant bits of the next.
109 * A field element with four limbs is an 'felem'. One with eight limbs is a
112 * A field element with four, 64-bit values is called a 'smallfelem'. Small
113 * values are used as intermediate values before multiplication.
118 typedef uint128_t limb;
119 typedef limb felem[NLIMBS];
120 typedef limb longfelem[NLIMBS * 2];
121 typedef u64 smallfelem[NLIMBS];
123 /* This is the value of the prime as four 64-bit words, little-endian. */
124 static const u64 kPrime[4] =
125 { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
126 static const u64 bottom63bits = 0x7ffffffffffffffful;
129 * bin32_to_felem takes a little-endian byte array and converts it into felem
130 * form. This assumes that the CPU is little-endian.
132 static void bin32_to_felem(felem out, const u8 in[32])
134 out[0] = *((u64 *)&in[0]);
135 out[1] = *((u64 *)&in[8]);
136 out[2] = *((u64 *)&in[16]);
137 out[3] = *((u64 *)&in[24]);
141 * smallfelem_to_bin32 takes a smallfelem and serialises into a little
142 * endian, 32 byte array. This assumes that the CPU is little-endian.
144 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
146 *((u64 *)&out[0]) = in[0];
147 *((u64 *)&out[8]) = in[1];
148 *((u64 *)&out[16]) = in[2];
149 *((u64 *)&out[24]) = in[3];
152 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
153 static int BN_to_felem(felem out, const BIGNUM *bn)
155 felem_bytearray b_out;
158 if (BN_is_negative(bn)) {
159 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
162 num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
164 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
167 bin32_to_felem(out, b_out);
171 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
172 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
174 felem_bytearray b_out;
175 smallfelem_to_bin32(b_out, in);
176 return BN_lebin2bn(b_out, sizeof(b_out), out);
184 static void smallfelem_one(smallfelem out)
192 static void smallfelem_assign(smallfelem out, const smallfelem in)
200 static void felem_assign(felem out, const felem in)
208 /* felem_sum sets out = out + in. */
209 static void felem_sum(felem out, const felem in)
217 /* felem_small_sum sets out = out + in. */
218 static void felem_small_sum(felem out, const smallfelem in)
226 /* felem_scalar sets out = out * scalar */
227 static void felem_scalar(felem out, const u64 scalar)
235 /* longfelem_scalar sets out = out * scalar */
236 static void longfelem_scalar(longfelem out, const u64 scalar)
248 #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
249 #define two105 (((limb)1) << 105)
250 #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
252 /* zero105 is 0 mod p */
253 static const felem zero105 =
254 { two105m41m9, two105, two105m41p9, two105m41p9 };
257 * smallfelem_neg sets |out| to |-small|
259 * out[i] < out[i] + 2^105
261 static void smallfelem_neg(felem out, const smallfelem small)
263 /* In order to prevent underflow, we subtract from 0 mod p. */
264 out[0] = zero105[0] - small[0];
265 out[1] = zero105[1] - small[1];
266 out[2] = zero105[2] - small[2];
267 out[3] = zero105[3] - small[3];
271 * felem_diff subtracts |in| from |out|
275 * out[i] < out[i] + 2^105
277 static void felem_diff(felem out, const felem in)
280 * In order to prevent underflow, we add 0 mod p before subtracting.
282 out[0] += zero105[0];
283 out[1] += zero105[1];
284 out[2] += zero105[2];
285 out[3] += zero105[3];
293 #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
294 #define two107 (((limb)1) << 107)
295 #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
297 /* zero107 is 0 mod p */
298 static const felem zero107 =
299 { two107m43m11, two107, two107m43p11, two107m43p11 };
302 * An alternative felem_diff for larger inputs |in|
303 * felem_diff_zero107 subtracts |in| from |out|
307 * out[i] < out[i] + 2^107
309 static void felem_diff_zero107(felem out, const felem in)
312 * In order to prevent underflow, we add 0 mod p before subtracting.
314 out[0] += zero107[0];
315 out[1] += zero107[1];
316 out[2] += zero107[2];
317 out[3] += zero107[3];
326 * longfelem_diff subtracts |in| from |out|
330 * out[i] < out[i] + 2^70 + 2^40
332 static void longfelem_diff(longfelem out, const longfelem in)
334 static const limb two70m8p6 =
335 (((limb) 1) << 70) - (((limb) 1) << 8) + (((limb) 1) << 6);
336 static const limb two70p40 = (((limb) 1) << 70) + (((limb) 1) << 40);
337 static const limb two70 = (((limb) 1) << 70);
338 static const limb two70m40m38p6 =
339 (((limb) 1) << 70) - (((limb) 1) << 40) - (((limb) 1) << 38) +
341 static const limb two70m6 = (((limb) 1) << 70) - (((limb) 1) << 6);
343 /* add 0 mod p to avoid underflow */
347 out[3] += two70m40m38p6;
353 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
364 #define two64m0 (((limb)1) << 64) - 1
365 #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
366 #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
367 #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
369 /* zero110 is 0 mod p */
370 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
373 * felem_shrink converts an felem into a smallfelem. The result isn't quite
374 * minimal as the value may be greater than p.
381 static void felem_shrink(smallfelem out, const felem in)
386 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
389 tmp[3] = zero110[3] + in[3] + ((u64)(in[2] >> 64));
392 tmp[2] = zero110[2] + (u64)in[2];
393 tmp[0] = zero110[0] + in[0];
394 tmp[1] = zero110[1] + in[1];
395 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
398 * We perform two partial reductions where we eliminate the high-word of
399 * tmp[3]. We don't update the other words till the end.
401 a = tmp[3] >> 64; /* a < 2^46 */
402 tmp[3] = (u64)tmp[3];
404 tmp[3] += ((limb) a) << 32;
408 a = tmp[3] >> 64; /* a < 2^15 */
409 b += a; /* b < 2^46 + 2^15 < 2^47 */
410 tmp[3] = (u64)tmp[3];
412 tmp[3] += ((limb) a) << 32;
413 /* tmp[3] < 2^64 + 2^47 */
416 * This adjusts the other two words to complete the two partial
420 tmp[1] -= (((limb) b) << 32);
423 * In order to make space in tmp[3] for the carry from 2 -> 3, we
424 * conditionally subtract kPrime if tmp[3] is large enough.
426 high = (u64)(tmp[3] >> 64);
427 /* As tmp[3] < 2^65, high is either 1 or 0 */
431 * all ones if the high word of tmp[3] is 1
432 * all zeros if the high word of tmp[3] if 0
435 mask = 0 - (low >> 63);
438 * all ones if the MSB of low is 1
439 * all zeros if the MSB of low if 0
443 /* if low was greater than kPrime3Test then the MSB is zero */
445 low = 0 - (low >> 63);
448 * all ones if low was > kPrime3Test
449 * all zeros if low was <= kPrime3Test
451 mask = (mask & low) | high;
452 tmp[0] -= mask & kPrime[0];
453 tmp[1] -= mask & kPrime[1];
454 /* kPrime[2] is zero, so omitted */
455 tmp[3] -= mask & kPrime[3];
456 /* tmp[3] < 2**64 - 2**32 + 1 */
458 tmp[1] += ((u64)(tmp[0] >> 64));
459 tmp[0] = (u64)tmp[0];
460 tmp[2] += ((u64)(tmp[1] >> 64));
461 tmp[1] = (u64)tmp[1];
462 tmp[3] += ((u64)(tmp[2] >> 64));
463 tmp[2] = (u64)tmp[2];
472 /* smallfelem_expand converts a smallfelem to an felem */
473 static void smallfelem_expand(felem out, const smallfelem in)
482 * smallfelem_square sets |out| = |small|^2
486 * out[i] < 7 * 2^64 < 2^67
488 static void smallfelem_square(longfelem out, const smallfelem small)
493 a = ((uint128_t) small[0]) * small[0];
499 a = ((uint128_t) small[0]) * small[1];
506 a = ((uint128_t) small[0]) * small[2];
513 a = ((uint128_t) small[0]) * small[3];
519 a = ((uint128_t) small[1]) * small[2];
526 a = ((uint128_t) small[1]) * small[1];
532 a = ((uint128_t) small[1]) * small[3];
539 a = ((uint128_t) small[2]) * small[3];
547 a = ((uint128_t) small[2]) * small[2];
553 a = ((uint128_t) small[3]) * small[3];
561 * felem_square sets |out| = |in|^2
565 * out[i] < 7 * 2^64 < 2^67
567 static void felem_square(longfelem out, const felem in)
570 felem_shrink(small, in);
571 smallfelem_square(out, small);
575 * smallfelem_mul sets |out| = |small1| * |small2|
580 * out[i] < 7 * 2^64 < 2^67
582 static void smallfelem_mul(longfelem out, const smallfelem small1,
583 const smallfelem small2)
588 a = ((uint128_t) small1[0]) * small2[0];
594 a = ((uint128_t) small1[0]) * small2[1];
600 a = ((uint128_t) small1[1]) * small2[0];
606 a = ((uint128_t) small1[0]) * small2[2];
612 a = ((uint128_t) small1[1]) * small2[1];
618 a = ((uint128_t) small1[2]) * small2[0];
624 a = ((uint128_t) small1[0]) * small2[3];
630 a = ((uint128_t) small1[1]) * small2[2];
636 a = ((uint128_t) small1[2]) * small2[1];
642 a = ((uint128_t) small1[3]) * small2[0];
648 a = ((uint128_t) small1[1]) * small2[3];
654 a = ((uint128_t) small1[2]) * small2[2];
660 a = ((uint128_t) small1[3]) * small2[1];
666 a = ((uint128_t) small1[2]) * small2[3];
672 a = ((uint128_t) small1[3]) * small2[2];
678 a = ((uint128_t) small1[3]) * small2[3];
686 * felem_mul sets |out| = |in1| * |in2|
691 * out[i] < 7 * 2^64 < 2^67
693 static void felem_mul(longfelem out, const felem in1, const felem in2)
695 smallfelem small1, small2;
696 felem_shrink(small1, in1);
697 felem_shrink(small2, in2);
698 smallfelem_mul(out, small1, small2);
702 * felem_small_mul sets |out| = |small1| * |in2|
707 * out[i] < 7 * 2^64 < 2^67
709 static void felem_small_mul(longfelem out, const smallfelem small1,
713 felem_shrink(small2, in2);
714 smallfelem_mul(out, small1, small2);
717 #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
718 #define two100 (((limb)1) << 100)
719 #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
720 /* zero100 is 0 mod p */
721 static const felem zero100 =
722 { two100m36m4, two100, two100m36p4, two100m36p4 };
725 * Internal function for the different flavours of felem_reduce.
726 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
728 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
729 * out[1] >= in[7] + 2^32*in[4]
730 * out[2] >= in[5] + 2^32*in[5]
731 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
733 * out[0] <= out[0] + in[4] + 2^32*in[5]
734 * out[1] <= out[1] + in[5] + 2^33*in[6]
735 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
736 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
738 static void felem_reduce_(felem out, const longfelem in)
741 /* combine common terms from below */
742 c = in[4] + (in[5] << 32);
750 /* the remaining terms */
751 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
752 out[1] -= (in[4] << 32);
753 out[3] += (in[4] << 32);
755 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
756 out[2] -= (in[5] << 32);
758 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
760 out[0] -= (in[6] << 32);
761 out[1] += (in[6] << 33);
762 out[2] += (in[6] * 2);
763 out[3] -= (in[6] << 32);
765 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
767 out[0] -= (in[7] << 32);
768 out[2] += (in[7] << 33);
769 out[3] += (in[7] * 3);
773 * felem_reduce converts a longfelem into an felem.
774 * To be called directly after felem_square or felem_mul.
776 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
777 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
781 static void felem_reduce(felem out, const longfelem in)
783 out[0] = zero100[0] + in[0];
784 out[1] = zero100[1] + in[1];
785 out[2] = zero100[2] + in[2];
786 out[3] = zero100[3] + in[3];
788 felem_reduce_(out, in);
791 * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
792 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
793 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
794 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
796 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
797 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
798 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
799 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
804 * felem_reduce_zero105 converts a larger longfelem into an felem.
810 static void felem_reduce_zero105(felem out, const longfelem in)
812 out[0] = zero105[0] + in[0];
813 out[1] = zero105[1] + in[1];
814 out[2] = zero105[2] + in[2];
815 out[3] = zero105[3] + in[3];
817 felem_reduce_(out, in);
820 * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
821 * out[1] > 2^105 - 2^71 - 2^103 > 0
822 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
823 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
825 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
826 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
827 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
828 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
833 * subtract_u64 sets *result = *result - v and *carry to one if the
834 * subtraction underflowed.
836 static void subtract_u64(u64 *result, u64 *carry, u64 v)
838 uint128_t r = *result;
840 *carry = (r >> 64) & 1;
845 * felem_contract converts |in| to its unique, minimal representation. On
846 * entry: in[i] < 2^109
848 static void felem_contract(smallfelem out, const felem in)
851 u64 all_equal_so_far = 0, result = 0, carry;
853 felem_shrink(out, in);
854 /* small is minimal except that the value might be > p */
858 * We are doing a constant time test if out >= kPrime. We need to compare
859 * each u64, from most-significant to least significant. For each one, if
860 * all words so far have been equal (m is all ones) then a non-equal
861 * result is the answer. Otherwise we continue.
863 for (i = 3; i < 4; i--) {
865 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
867 * if out[i] > kPrime[i] then a will underflow and the high 64-bits
870 result |= all_equal_so_far & ((u64)(a >> 64));
873 * if kPrime[i] == out[i] then |equal| will be all zeros and the
874 * decrement will make it all ones.
876 equal = kPrime[i] ^ out[i];
878 equal &= equal << 32;
879 equal &= equal << 16;
884 equal = 0 - (equal >> 63);
886 all_equal_so_far &= equal;
890 * if all_equal_so_far is still all ones then the two values are equal
891 * and so out >= kPrime is true.
893 result |= all_equal_so_far;
895 /* if out >= kPrime then we subtract kPrime. */
896 subtract_u64(&out[0], &carry, result & kPrime[0]);
897 subtract_u64(&out[1], &carry, carry);
898 subtract_u64(&out[2], &carry, carry);
899 subtract_u64(&out[3], &carry, carry);
901 subtract_u64(&out[1], &carry, result & kPrime[1]);
902 subtract_u64(&out[2], &carry, carry);
903 subtract_u64(&out[3], &carry, carry);
905 subtract_u64(&out[2], &carry, result & kPrime[2]);
906 subtract_u64(&out[3], &carry, carry);
908 subtract_u64(&out[3], &carry, result & kPrime[3]);
911 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
916 smallfelem_square(longtmp, in);
917 felem_reduce(tmp, longtmp);
918 felem_contract(out, tmp);
921 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1,
922 const smallfelem in2)
927 smallfelem_mul(longtmp, in1, in2);
928 felem_reduce(tmp, longtmp);
929 felem_contract(out, tmp);
933 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
938 static limb smallfelem_is_zero(const smallfelem small)
943 u64 is_zero = small[0] | small[1] | small[2] | small[3];
945 is_zero &= is_zero << 32;
946 is_zero &= is_zero << 16;
947 is_zero &= is_zero << 8;
948 is_zero &= is_zero << 4;
949 is_zero &= is_zero << 2;
950 is_zero &= is_zero << 1;
951 is_zero = 0 - (is_zero >> 63);
953 is_p = (small[0] ^ kPrime[0]) |
954 (small[1] ^ kPrime[1]) |
955 (small[2] ^ kPrime[2]) | (small[3] ^ kPrime[3]);
963 is_p = 0 - (is_p >> 63);
968 result |= ((limb) is_zero) << 64;
972 static int smallfelem_is_zero_int(const void *small)
974 return (int)(smallfelem_is_zero(small) & ((limb) 1));
978 * felem_inv calculates |out| = |in|^{-1}
980 * Based on Fermat's Little Theorem:
982 * a^{p-1} = 1 (mod p)
983 * a^{p-2} = a^{-1} (mod p)
985 static void felem_inv(felem out, const felem in)
988 /* each e_I will hold |in|^{2^I - 1} */
989 felem e2, e4, e8, e16, e32, e64;
993 felem_square(tmp, in);
994 felem_reduce(ftmp, tmp); /* 2^1 */
995 felem_mul(tmp, in, ftmp);
996 felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
997 felem_assign(e2, ftmp);
998 felem_square(tmp, ftmp);
999 felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
1000 felem_square(tmp, ftmp);
1001 felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
1002 felem_mul(tmp, ftmp, e2);
1003 felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
1004 felem_assign(e4, ftmp);
1005 felem_square(tmp, ftmp);
1006 felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
1007 felem_square(tmp, ftmp);
1008 felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
1009 felem_square(tmp, ftmp);
1010 felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
1011 felem_square(tmp, ftmp);
1012 felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
1013 felem_mul(tmp, ftmp, e4);
1014 felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
1015 felem_assign(e8, ftmp);
1016 for (i = 0; i < 8; i++) {
1017 felem_square(tmp, ftmp);
1018 felem_reduce(ftmp, tmp);
1020 felem_mul(tmp, ftmp, e8);
1021 felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
1022 felem_assign(e16, ftmp);
1023 for (i = 0; i < 16; i++) {
1024 felem_square(tmp, ftmp);
1025 felem_reduce(ftmp, tmp);
1027 felem_mul(tmp, ftmp, e16);
1028 felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
1029 felem_assign(e32, ftmp);
1030 for (i = 0; i < 32; i++) {
1031 felem_square(tmp, ftmp);
1032 felem_reduce(ftmp, tmp);
1034 felem_assign(e64, ftmp);
1035 felem_mul(tmp, ftmp, in);
1036 felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
1037 for (i = 0; i < 192; i++) {
1038 felem_square(tmp, ftmp);
1039 felem_reduce(ftmp, tmp);
1040 } /* 2^256 - 2^224 + 2^192 */
1042 felem_mul(tmp, e64, e32);
1043 felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
1044 for (i = 0; i < 16; i++) {
1045 felem_square(tmp, ftmp2);
1046 felem_reduce(ftmp2, tmp);
1048 felem_mul(tmp, ftmp2, e16);
1049 felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
1050 for (i = 0; i < 8; i++) {
1051 felem_square(tmp, ftmp2);
1052 felem_reduce(ftmp2, tmp);
1054 felem_mul(tmp, ftmp2, e8);
1055 felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
1056 for (i = 0; i < 4; i++) {
1057 felem_square(tmp, ftmp2);
1058 felem_reduce(ftmp2, tmp);
1060 felem_mul(tmp, ftmp2, e4);
1061 felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
1062 felem_square(tmp, ftmp2);
1063 felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
1064 felem_square(tmp, ftmp2);
1065 felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
1066 felem_mul(tmp, ftmp2, e2);
1067 felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
1068 felem_square(tmp, ftmp2);
1069 felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
1070 felem_square(tmp, ftmp2);
1071 felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1072 felem_mul(tmp, ftmp2, in);
1073 felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1075 felem_mul(tmp, ftmp2, ftmp);
1076 felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1079 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1083 smallfelem_expand(tmp, in);
1084 felem_inv(tmp, tmp);
1085 felem_contract(out, tmp);
1092 * Building on top of the field operations we have the operations on the
1093 * elliptic curve group itself. Points on the curve are represented in Jacobian
1098 * point_double calculates 2*(x_in, y_in, z_in)
1100 * The method is taken from:
1101 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1103 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1104 * while x_out == y_in is not (maybe this works, but it's not tested).
1107 point_double(felem x_out, felem y_out, felem z_out,
1108 const felem x_in, const felem y_in, const felem z_in)
1110 longfelem tmp, tmp2;
1111 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1112 smallfelem small1, small2;
1114 felem_assign(ftmp, x_in);
1115 /* ftmp[i] < 2^106 */
1116 felem_assign(ftmp2, x_in);
1117 /* ftmp2[i] < 2^106 */
1120 felem_square(tmp, z_in);
1121 felem_reduce(delta, tmp);
1122 /* delta[i] < 2^101 */
1125 felem_square(tmp, y_in);
1126 felem_reduce(gamma, tmp);
1127 /* gamma[i] < 2^101 */
1128 felem_shrink(small1, gamma);
1130 /* beta = x*gamma */
1131 felem_small_mul(tmp, small1, x_in);
1132 felem_reduce(beta, tmp);
1133 /* beta[i] < 2^101 */
1135 /* alpha = 3*(x-delta)*(x+delta) */
1136 felem_diff(ftmp, delta);
1137 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1138 felem_sum(ftmp2, delta);
1139 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1140 felem_scalar(ftmp2, 3);
1141 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1142 felem_mul(tmp, ftmp, ftmp2);
1143 felem_reduce(alpha, tmp);
1144 /* alpha[i] < 2^101 */
1145 felem_shrink(small2, alpha);
1147 /* x' = alpha^2 - 8*beta */
1148 smallfelem_square(tmp, small2);
1149 felem_reduce(x_out, tmp);
1150 felem_assign(ftmp, beta);
1151 felem_scalar(ftmp, 8);
1152 /* ftmp[i] < 8 * 2^101 = 2^104 */
1153 felem_diff(x_out, ftmp);
1154 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1156 /* z' = (y + z)^2 - gamma - delta */
1157 felem_sum(delta, gamma);
1158 /* delta[i] < 2^101 + 2^101 = 2^102 */
1159 felem_assign(ftmp, y_in);
1160 felem_sum(ftmp, z_in);
1161 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1162 felem_square(tmp, ftmp);
1163 felem_reduce(z_out, tmp);
1164 felem_diff(z_out, delta);
1165 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1167 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1168 felem_scalar(beta, 4);
1169 /* beta[i] < 4 * 2^101 = 2^103 */
1170 felem_diff_zero107(beta, x_out);
1171 /* beta[i] < 2^107 + 2^103 < 2^108 */
1172 felem_small_mul(tmp, small2, beta);
1173 /* tmp[i] < 7 * 2^64 < 2^67 */
1174 smallfelem_square(tmp2, small1);
1175 /* tmp2[i] < 7 * 2^64 */
1176 longfelem_scalar(tmp2, 8);
1177 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1178 longfelem_diff(tmp, tmp2);
1179 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1180 felem_reduce_zero105(y_out, tmp);
1181 /* y_out[i] < 2^106 */
1185 * point_double_small is the same as point_double, except that it operates on
1189 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1190 const smallfelem x_in, const smallfelem y_in,
1191 const smallfelem z_in)
1193 felem felem_x_out, felem_y_out, felem_z_out;
1194 felem felem_x_in, felem_y_in, felem_z_in;
1196 smallfelem_expand(felem_x_in, x_in);
1197 smallfelem_expand(felem_y_in, y_in);
1198 smallfelem_expand(felem_z_in, z_in);
1199 point_double(felem_x_out, felem_y_out, felem_z_out,
1200 felem_x_in, felem_y_in, felem_z_in);
1201 felem_shrink(x_out, felem_x_out);
1202 felem_shrink(y_out, felem_y_out);
1203 felem_shrink(z_out, felem_z_out);
1206 /* copy_conditional copies in to out iff mask is all ones. */
1207 static void copy_conditional(felem out, const felem in, limb mask)
1210 for (i = 0; i < NLIMBS; ++i) {
1211 const limb tmp = mask & (in[i] ^ out[i]);
1216 /* copy_small_conditional copies in to out iff mask is all ones. */
1217 static void copy_small_conditional(felem out, const smallfelem in, limb mask)
1220 const u64 mask64 = mask;
1221 for (i = 0; i < NLIMBS; ++i) {
1222 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1227 * point_add calculates (x1, y1, z1) + (x2, y2, z2)
1229 * The method is taken from:
1230 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1231 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1233 * This function includes a branch for checking whether the two input points
1234 * are equal, (while not equal to the point at infinity). This case never
1235 * happens during single point multiplication, so there is no timing leak for
1236 * ECDH or ECDSA signing.
1238 static void point_add(felem x3, felem y3, felem z3,
1239 const felem x1, const felem y1, const felem z1,
1240 const int mixed, const smallfelem x2,
1241 const smallfelem y2, const smallfelem z2)
1243 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1244 longfelem tmp, tmp2;
1245 smallfelem small1, small2, small3, small4, small5;
1246 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1249 felem_shrink(small3, z1);
1251 z1_is_zero = smallfelem_is_zero(small3);
1252 z2_is_zero = smallfelem_is_zero(z2);
1254 /* ftmp = z1z1 = z1**2 */
1255 smallfelem_square(tmp, small3);
1256 felem_reduce(ftmp, tmp);
1257 /* ftmp[i] < 2^101 */
1258 felem_shrink(small1, ftmp);
1261 /* ftmp2 = z2z2 = z2**2 */
1262 smallfelem_square(tmp, z2);
1263 felem_reduce(ftmp2, tmp);
1264 /* ftmp2[i] < 2^101 */
1265 felem_shrink(small2, ftmp2);
1267 felem_shrink(small5, x1);
1269 /* u1 = ftmp3 = x1*z2z2 */
1270 smallfelem_mul(tmp, small5, small2);
1271 felem_reduce(ftmp3, tmp);
1272 /* ftmp3[i] < 2^101 */
1274 /* ftmp5 = z1 + z2 */
1275 felem_assign(ftmp5, z1);
1276 felem_small_sum(ftmp5, z2);
1277 /* ftmp5[i] < 2^107 */
1279 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1280 felem_square(tmp, ftmp5);
1281 felem_reduce(ftmp5, tmp);
1282 /* ftmp2 = z2z2 + z1z1 */
1283 felem_sum(ftmp2, ftmp);
1284 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1285 felem_diff(ftmp5, ftmp2);
1286 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1288 /* ftmp2 = z2 * z2z2 */
1289 smallfelem_mul(tmp, small2, z2);
1290 felem_reduce(ftmp2, tmp);
1292 /* s1 = ftmp2 = y1 * z2**3 */
1293 felem_mul(tmp, y1, ftmp2);
1294 felem_reduce(ftmp6, tmp);
1295 /* ftmp6[i] < 2^101 */
1298 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
1301 /* u1 = ftmp3 = x1*z2z2 */
1302 felem_assign(ftmp3, x1);
1303 /* ftmp3[i] < 2^106 */
1306 felem_assign(ftmp5, z1);
1307 felem_scalar(ftmp5, 2);
1308 /* ftmp5[i] < 2*2^106 = 2^107 */
1310 /* s1 = ftmp2 = y1 * z2**3 */
1311 felem_assign(ftmp6, y1);
1312 /* ftmp6[i] < 2^106 */
1316 smallfelem_mul(tmp, x2, small1);
1317 felem_reduce(ftmp4, tmp);
1319 /* h = ftmp4 = u2 - u1 */
1320 felem_diff_zero107(ftmp4, ftmp3);
1321 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1322 felem_shrink(small4, ftmp4);
1324 x_equal = smallfelem_is_zero(small4);
1326 /* z_out = ftmp5 * h */
1327 felem_small_mul(tmp, small4, ftmp5);
1328 felem_reduce(z_out, tmp);
1329 /* z_out[i] < 2^101 */
1331 /* ftmp = z1 * z1z1 */
1332 smallfelem_mul(tmp, small1, small3);
1333 felem_reduce(ftmp, tmp);
1335 /* s2 = tmp = y2 * z1**3 */
1336 felem_small_mul(tmp, y2, ftmp);
1337 felem_reduce(ftmp5, tmp);
1339 /* r = ftmp5 = (s2 - s1)*2 */
1340 felem_diff_zero107(ftmp5, ftmp6);
1341 /* ftmp5[i] < 2^107 + 2^107 = 2^108 */
1342 felem_scalar(ftmp5, 2);
1343 /* ftmp5[i] < 2^109 */
1344 felem_shrink(small1, ftmp5);
1345 y_equal = smallfelem_is_zero(small1);
1348 * The formulae are incorrect if the points are equal, in affine coordinates
1349 * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
1352 * We use bitwise operations to avoid potential side-channels introduced by
1353 * the short-circuiting behaviour of boolean operators.
1355 * The special case of either point being the point at infinity (z1 and/or
1356 * z2 are zero), is handled separately later on in this function, so we
1357 * avoid jumping to point_double here in those special cases.
1359 points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));
1363 * This is obviously not constant-time but, as mentioned before, this
1364 * case never happens during single point multiplication, so there is no
1365 * timing leak for ECDH or ECDSA signing.
1367 point_double(x3, y3, z3, x1, y1, z1);
1371 /* I = ftmp = (2h)**2 */
1372 felem_assign(ftmp, ftmp4);
1373 felem_scalar(ftmp, 2);
1374 /* ftmp[i] < 2*2^108 = 2^109 */
1375 felem_square(tmp, ftmp);
1376 felem_reduce(ftmp, tmp);
1378 /* J = ftmp2 = h * I */
1379 felem_mul(tmp, ftmp4, ftmp);
1380 felem_reduce(ftmp2, tmp);
1382 /* V = ftmp4 = U1 * I */
1383 felem_mul(tmp, ftmp3, ftmp);
1384 felem_reduce(ftmp4, tmp);
1386 /* x_out = r**2 - J - 2V */
1387 smallfelem_square(tmp, small1);
1388 felem_reduce(x_out, tmp);
1389 felem_assign(ftmp3, ftmp4);
1390 felem_scalar(ftmp4, 2);
1391 felem_sum(ftmp4, ftmp2);
1392 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1393 felem_diff(x_out, ftmp4);
1394 /* x_out[i] < 2^105 + 2^101 */
1396 /* y_out = r(V-x_out) - 2 * s1 * J */
1397 felem_diff_zero107(ftmp3, x_out);
1398 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1399 felem_small_mul(tmp, small1, ftmp3);
1400 felem_mul(tmp2, ftmp6, ftmp2);
1401 longfelem_scalar(tmp2, 2);
1402 /* tmp2[i] < 2*2^67 = 2^68 */
1403 longfelem_diff(tmp, tmp2);
1404 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1405 felem_reduce_zero105(y_out, tmp);
1406 /* y_out[i] < 2^106 */
1408 copy_small_conditional(x_out, x2, z1_is_zero);
1409 copy_conditional(x_out, x1, z2_is_zero);
1410 copy_small_conditional(y_out, y2, z1_is_zero);
1411 copy_conditional(y_out, y1, z2_is_zero);
1412 copy_small_conditional(z_out, z2, z1_is_zero);
1413 copy_conditional(z_out, z1, z2_is_zero);
1414 felem_assign(x3, x_out);
1415 felem_assign(y3, y_out);
1416 felem_assign(z3, z_out);
1420 * point_add_small is the same as point_add, except that it operates on
1423 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1424 smallfelem x1, smallfelem y1, smallfelem z1,
1425 smallfelem x2, smallfelem y2, smallfelem z2)
1427 felem felem_x3, felem_y3, felem_z3;
1428 felem felem_x1, felem_y1, felem_z1;
1429 smallfelem_expand(felem_x1, x1);
1430 smallfelem_expand(felem_y1, y1);
1431 smallfelem_expand(felem_z1, z1);
1432 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0,
1434 felem_shrink(x3, felem_x3);
1435 felem_shrink(y3, felem_y3);
1436 felem_shrink(z3, felem_z3);
1440 * Base point pre computation
1441 * --------------------------
1443 * Two different sorts of precomputed tables are used in the following code.
1444 * Each contain various points on the curve, where each point is three field
1445 * elements (x, y, z).
1447 * For the base point table, z is usually 1 (0 for the point at infinity).
1448 * This table has 2 * 16 elements, starting with the following:
1449 * index | bits | point
1450 * ------+---------+------------------------------
1453 * 2 | 0 0 1 0 | 2^64G
1454 * 3 | 0 0 1 1 | (2^64 + 1)G
1455 * 4 | 0 1 0 0 | 2^128G
1456 * 5 | 0 1 0 1 | (2^128 + 1)G
1457 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1458 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1459 * 8 | 1 0 0 0 | 2^192G
1460 * 9 | 1 0 0 1 | (2^192 + 1)G
1461 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1462 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1463 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1464 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1465 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1466 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1467 * followed by a copy of this with each element multiplied by 2^32.
1469 * The reason for this is so that we can clock bits into four different
1470 * locations when doing simple scalar multiplies against the base point,
1471 * and then another four locations using the second 16 elements.
1473 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1475 /* gmul is the table of precomputed base points */
1476 static const smallfelem gmul[2][16][3] = {
1480 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2,
1481 0x6b17d1f2e12c4247},
1482 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16,
1483 0x4fe342e2fe1a7f9b},
1485 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de,
1486 0x0fa822bc2811aaa5},
1487 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b,
1488 0xbff44ae8f5dba80d},
1490 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789,
1491 0x300a4bbc89d6726f},
1492 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f,
1493 0x72aac7e0d09b4644},
1495 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e,
1496 0x447d739beedb5e67},
1497 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7,
1498 0x2d4825ab834131ee},
1500 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60,
1501 0xef9519328a9c72ff},
1502 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c,
1503 0x611e9fc37dbb2c9b},
1505 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf,
1506 0x550663797b51f5d8},
1507 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5,
1508 0x157164848aecb851},
1510 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391,
1511 0xeb5d7745b21141ea},
1512 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee,
1513 0xeafd72ebdbecc17b},
1515 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5,
1516 0xa6d39677a7849276},
1517 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf,
1518 0x674f84749b0b8816},
1520 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb,
1521 0x4e769e7672c9ddad},
1522 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281,
1523 0x42b99082de830663},
1525 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478,
1526 0x78878ef61c6ce04d},
1527 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def,
1528 0xb6cb3f5d7b72c321},
1530 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae,
1531 0x0c88bc4d716b1287},
1532 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa,
1533 0xdd5ddea3f3901dc6},
1535 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3,
1536 0x68f344af6b317466},
1537 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3,
1538 0x31b9c405f8540a20},
1540 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0,
1541 0x4052bf4b6f461db9},
1542 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8,
1543 0xfecf4d5190b0fc61},
1545 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a,
1546 0x1eddbae2c802e41a},
1547 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0,
1548 0x43104d86560ebcfc},
1550 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a,
1551 0xb48e26b484f7a21c},
1552 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668,
1553 0xfac015404d4d3dab},
1558 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da,
1559 0x7fe36b40af22af89},
1560 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1,
1561 0xe697d45825b63624},
1563 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902,
1564 0x4a5b506612a677a6},
1565 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40,
1566 0xeb13461ceac089f1},
1568 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857,
1569 0x0781b8291c6a220a},
1570 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434,
1571 0x690cde8df0151593},
1573 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326,
1574 0x8a535f566ec73617},
1575 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf,
1576 0x0455c08468b08bd7},
1578 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279,
1579 0x06bada7ab77f8276},
1580 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70,
1581 0x5b476dfd0e6cb18a},
1583 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8,
1584 0x3e29864e8a2ec908},
1585 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed,
1586 0x239b90ea3dc31e7e},
1588 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4,
1589 0x820f4dd949f72ff7},
1590 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3,
1591 0x140406ec783a05ec},
1593 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe,
1594 0x68f6b8542783dfee},
1595 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028,
1596 0xcbe1feba92e40ce6},
1598 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927,
1599 0xd0b2f94d2f420109},
1600 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a,
1601 0x971459828b0719e5},
1603 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687,
1604 0x961610004a866aba},
1605 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c,
1606 0x7acb9fadcee75e44},
1608 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea,
1609 0x24eb9acca333bf5b},
1610 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d,
1611 0x69f891c5acd079cc},
1613 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514,
1614 0xe51f547c5972a107},
1615 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06,
1616 0x1c309a2b25bb1387},
1618 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828,
1619 0x20b87b8aa2c4e503},
1620 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044,
1621 0xf5c6fa49919776be},
1623 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56,
1624 0x1ed7d1b9332010b9},
1625 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24,
1626 0x3a2b03f03217257a},
1628 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b,
1629 0x15fee545c78dd9f6},
1630 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb,
1631 0x4ab5b6b2b8753f81},
1636 * select_point selects the |idx|th point from a precomputation table and
1639 static void select_point(const u64 idx, unsigned int size,
1640 const smallfelem pre_comp[16][3], smallfelem out[3])
1643 u64 *outlimbs = &out[0][0];
1645 memset(out, 0, sizeof(*out) * 3);
1647 for (i = 0; i < size; i++) {
1648 const u64 *inlimbs = (u64 *)&pre_comp[i][0][0];
1655 for (j = 0; j < NLIMBS * 3; j++)
1656 outlimbs[j] |= inlimbs[j] & mask;
1660 /* get_bit returns the |i|th bit in |in| */
1661 static char get_bit(const felem_bytearray in, int i)
1663 if ((i < 0) || (i >= 256))
1665 return (in[i >> 3] >> (i & 7)) & 1;
1669 * Interleaved point multiplication using precomputed point multiples: The
1670 * small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[], the scalars
1671 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1672 * generator, using certain (large) precomputed multiples in g_pre_comp.
1673 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1675 static void batch_mul(felem x_out, felem y_out, felem z_out,
1676 const felem_bytearray scalars[],
1677 const unsigned num_points, const u8 *g_scalar,
1678 const int mixed, const smallfelem pre_comp[][17][3],
1679 const smallfelem g_pre_comp[2][16][3])
1682 unsigned num, gen_mul = (g_scalar != NULL);
1688 /* set nq to the point at infinity */
1689 memset(nq, 0, sizeof(nq));
1692 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1693 * of the generator (two in each of the last 32 rounds) and additions of
1694 * other points multiples (every 5th round).
1696 skip = 1; /* save two point operations in the first
1698 for (i = (num_points ? 255 : 31); i >= 0; --i) {
1701 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1703 /* add multiples of the generator */
1704 if (gen_mul && (i <= 31)) {
1705 /* first, look 32 bits upwards */
1706 bits = get_bit(g_scalar, i + 224) << 3;
1707 bits |= get_bit(g_scalar, i + 160) << 2;
1708 bits |= get_bit(g_scalar, i + 96) << 1;
1709 bits |= get_bit(g_scalar, i + 32);
1710 /* select the point to add, in constant time */
1711 select_point(bits, 16, g_pre_comp[1], tmp);
1714 /* Arg 1 below is for "mixed" */
1715 point_add(nq[0], nq[1], nq[2],
1716 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1718 smallfelem_expand(nq[0], tmp[0]);
1719 smallfelem_expand(nq[1], tmp[1]);
1720 smallfelem_expand(nq[2], tmp[2]);
1724 /* second, look at the current position */
1725 bits = get_bit(g_scalar, i + 192) << 3;
1726 bits |= get_bit(g_scalar, i + 128) << 2;
1727 bits |= get_bit(g_scalar, i + 64) << 1;
1728 bits |= get_bit(g_scalar, i);
1729 /* select the point to add, in constant time */
1730 select_point(bits, 16, g_pre_comp[0], tmp);
1731 /* Arg 1 below is for "mixed" */
1732 point_add(nq[0], nq[1], nq[2],
1733 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1736 /* do other additions every 5 doublings */
1737 if (num_points && (i % 5 == 0)) {
1738 /* loop over all scalars */
1739 for (num = 0; num < num_points; ++num) {
1740 bits = get_bit(scalars[num], i + 4) << 5;
1741 bits |= get_bit(scalars[num], i + 3) << 4;
1742 bits |= get_bit(scalars[num], i + 2) << 3;
1743 bits |= get_bit(scalars[num], i + 1) << 2;
1744 bits |= get_bit(scalars[num], i) << 1;
1745 bits |= get_bit(scalars[num], i - 1);
1746 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1749 * select the point to add or subtract, in constant time
1751 select_point(digit, 17, pre_comp[num], tmp);
1752 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative
1754 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1755 felem_contract(tmp[1], ftmp);
1758 point_add(nq[0], nq[1], nq[2],
1759 nq[0], nq[1], nq[2],
1760 mixed, tmp[0], tmp[1], tmp[2]);
1762 smallfelem_expand(nq[0], tmp[0]);
1763 smallfelem_expand(nq[1], tmp[1]);
1764 smallfelem_expand(nq[2], tmp[2]);
1770 felem_assign(x_out, nq[0]);
1771 felem_assign(y_out, nq[1]);
1772 felem_assign(z_out, nq[2]);
1775 /* Precomputation for the group generator. */
1776 struct nistp256_pre_comp_st {
1777 smallfelem g_pre_comp[2][16][3];
1778 CRYPTO_REF_COUNT references;
1779 CRYPTO_RWLOCK *lock;
1782 const EC_METHOD *EC_GFp_nistp256_method(void)
1784 static const EC_METHOD ret = {
1785 EC_FLAGS_DEFAULT_OCT,
1786 NID_X9_62_prime_field,
1787 ec_GFp_nistp256_group_init,
1788 ec_GFp_simple_group_finish,
1789 ec_GFp_simple_group_clear_finish,
1790 ec_GFp_nist_group_copy,
1791 ec_GFp_nistp256_group_set_curve,
1792 ec_GFp_simple_group_get_curve,
1793 ec_GFp_simple_group_get_degree,
1794 ec_group_simple_order_bits,
1795 ec_GFp_simple_group_check_discriminant,
1796 ec_GFp_simple_point_init,
1797 ec_GFp_simple_point_finish,
1798 ec_GFp_simple_point_clear_finish,
1799 ec_GFp_simple_point_copy,
1800 ec_GFp_simple_point_set_to_infinity,
1801 ec_GFp_simple_point_set_affine_coordinates,
1802 ec_GFp_nistp256_point_get_affine_coordinates,
1803 0 /* point_set_compressed_coordinates */ ,
1808 ec_GFp_simple_invert,
1809 ec_GFp_simple_is_at_infinity,
1810 ec_GFp_simple_is_on_curve,
1812 ec_GFp_simple_make_affine,
1813 ec_GFp_simple_points_make_affine,
1814 ec_GFp_nistp256_points_mul,
1815 ec_GFp_nistp256_precompute_mult,
1816 ec_GFp_nistp256_have_precompute_mult,
1817 ec_GFp_nist_field_mul,
1818 ec_GFp_nist_field_sqr,
1820 ec_GFp_simple_field_inv,
1821 0 /* field_encode */ ,
1822 0 /* field_decode */ ,
1823 0, /* field_set_to_one */
1824 ec_key_simple_priv2oct,
1825 ec_key_simple_oct2priv,
1826 0, /* set private */
1827 ec_key_simple_generate_key,
1828 ec_key_simple_check_key,
1829 ec_key_simple_generate_public_key,
1832 ecdh_simple_compute_key,
1833 ecdsa_simple_sign_setup,
1834 ecdsa_simple_sign_sig,
1835 ecdsa_simple_verify_sig,
1836 0, /* field_inverse_mod_ord */
1837 0, /* blind_coordinates */
1839 0, /* ladder_step */
1846 /******************************************************************************/
1848 * FUNCTIONS TO MANAGE PRECOMPUTATION
1851 static NISTP256_PRE_COMP *nistp256_pre_comp_new(void)
1853 NISTP256_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1856 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1860 ret->references = 1;
1862 ret->lock = CRYPTO_THREAD_lock_new();
1863 if (ret->lock == NULL) {
1864 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1871 NISTP256_PRE_COMP *EC_nistp256_pre_comp_dup(NISTP256_PRE_COMP *p)
1875 CRYPTO_UP_REF(&p->references, &i, p->lock);
1879 void EC_nistp256_pre_comp_free(NISTP256_PRE_COMP *pre)
1886 CRYPTO_DOWN_REF(&pre->references, &i, pre->lock);
1887 REF_PRINT_COUNT("EC_nistp256", x);
1890 REF_ASSERT_ISNT(i < 0);
1892 CRYPTO_THREAD_lock_free(pre->lock);
1896 /******************************************************************************/
1898 * OPENSSL EC_METHOD FUNCTIONS
1901 int ec_GFp_nistp256_group_init(EC_GROUP *group)
1904 ret = ec_GFp_simple_group_init(group);
1905 group->a_is_minus3 = 1;
1909 int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1910 const BIGNUM *a, const BIGNUM *b,
1914 BIGNUM *curve_p, *curve_a, *curve_b;
1916 BN_CTX *new_ctx = NULL;
1919 ctx = new_ctx = BN_CTX_new();
1925 curve_p = BN_CTX_get(ctx);
1926 curve_a = BN_CTX_get(ctx);
1927 curve_b = BN_CTX_get(ctx);
1928 if (curve_b == NULL)
1930 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1931 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1932 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1933 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1934 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1935 EC_R_WRONG_CURVE_PARAMETERS);
1938 group->field_mod_func = BN_nist_mod_256;
1939 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1943 BN_CTX_free(new_ctx);
1949 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1952 int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1953 const EC_POINT *point,
1954 BIGNUM *x, BIGNUM *y,
1957 felem z1, z2, x_in, y_in;
1958 smallfelem x_out, y_out;
1961 if (EC_POINT_is_at_infinity(group, point)) {
1962 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1963 EC_R_POINT_AT_INFINITY);
1966 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1967 (!BN_to_felem(z1, point->Z)))
1970 felem_square(tmp, z2);
1971 felem_reduce(z1, tmp);
1972 felem_mul(tmp, x_in, z1);
1973 felem_reduce(x_in, tmp);
1974 felem_contract(x_out, x_in);
1976 if (!smallfelem_to_BN(x, x_out)) {
1977 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1982 felem_mul(tmp, z1, z2);
1983 felem_reduce(z1, tmp);
1984 felem_mul(tmp, y_in, z1);
1985 felem_reduce(y_in, tmp);
1986 felem_contract(y_out, y_in);
1988 if (!smallfelem_to_BN(y, y_out)) {
1989 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1997 /* points below is of size |num|, and tmp_smallfelems is of size |num+1| */
1998 static void make_points_affine(size_t num, smallfelem points[][3],
1999 smallfelem tmp_smallfelems[])
2002 * Runs in constant time, unless an input is the point at infinity (which
2003 * normally shouldn't happen).
2005 ec_GFp_nistp_points_make_affine_internal(num,
2009 (void (*)(void *))smallfelem_one,
2010 smallfelem_is_zero_int,
2011 (void (*)(void *, const void *))
2013 (void (*)(void *, const void *))
2014 smallfelem_square_contract,
2016 (void *, const void *,
2018 smallfelem_mul_contract,
2019 (void (*)(void *, const void *))
2020 smallfelem_inv_contract,
2021 /* nothing to contract */
2022 (void (*)(void *, const void *))
2027 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
2028 * values Result is stored in r (r can equal one of the inputs).
2030 int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
2031 const BIGNUM *scalar, size_t num,
2032 const EC_POINT *points[],
2033 const BIGNUM *scalars[], BN_CTX *ctx)
2038 BIGNUM *x, *y, *z, *tmp_scalar;
2039 felem_bytearray g_secret;
2040 felem_bytearray *secrets = NULL;
2041 smallfelem (*pre_comp)[17][3] = NULL;
2042 smallfelem *tmp_smallfelems = NULL;
2045 int have_pre_comp = 0;
2046 size_t num_points = num;
2047 smallfelem x_in, y_in, z_in;
2048 felem x_out, y_out, z_out;
2049 NISTP256_PRE_COMP *pre = NULL;
2050 const smallfelem(*g_pre_comp)[16][3] = NULL;
2051 EC_POINT *generator = NULL;
2052 const EC_POINT *p = NULL;
2053 const BIGNUM *p_scalar = NULL;
2056 x = BN_CTX_get(ctx);
2057 y = BN_CTX_get(ctx);
2058 z = BN_CTX_get(ctx);
2059 tmp_scalar = BN_CTX_get(ctx);
2060 if (tmp_scalar == NULL)
2063 if (scalar != NULL) {
2064 pre = group->pre_comp.nistp256;
2066 /* we have precomputation, try to use it */
2067 g_pre_comp = (const smallfelem(*)[16][3])pre->g_pre_comp;
2069 /* try to use the standard precomputation */
2070 g_pre_comp = &gmul[0];
2071 generator = EC_POINT_new(group);
2072 if (generator == NULL)
2074 /* get the generator from precomputation */
2075 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
2076 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
2077 !smallfelem_to_BN(z, g_pre_comp[0][1][2])) {
2078 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2081 if (!ec_GFp_simple_set_Jprojective_coordinates_GFp(group, generator, x,
2084 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2085 /* precomputation matches generator */
2089 * we don't have valid precomputation: treat the generator as a
2094 if (num_points > 0) {
2095 if (num_points >= 3) {
2097 * unless we precompute multiples for just one or two points,
2098 * converting those into affine form is time well spent
2102 secrets = OPENSSL_malloc(sizeof(*secrets) * num_points);
2103 pre_comp = OPENSSL_malloc(sizeof(*pre_comp) * num_points);
2106 OPENSSL_malloc(sizeof(*tmp_smallfelems) * (num_points * 17 + 1));
2107 if ((secrets == NULL) || (pre_comp == NULL)
2108 || (mixed && (tmp_smallfelems == NULL))) {
2109 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
2114 * we treat NULL scalars as 0, and NULL points as points at infinity,
2115 * i.e., they contribute nothing to the linear combination
2117 memset(secrets, 0, sizeof(*secrets) * num_points);
2118 memset(pre_comp, 0, sizeof(*pre_comp) * num_points);
2119 for (i = 0; i < num_points; ++i) {
2122 * we didn't have a valid precomputation, so we pick the
2125 p = EC_GROUP_get0_generator(group);
2128 /* the i^th point */
2130 p_scalar = scalars[i];
2132 if ((p_scalar != NULL) && (p != NULL)) {
2133 /* reduce scalar to 0 <= scalar < 2^256 */
2134 if ((BN_num_bits(p_scalar) > 256)
2135 || (BN_is_negative(p_scalar))) {
2137 * this is an unusual input, and we don't guarantee
2140 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
2141 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2144 num_bytes = BN_bn2lebinpad(tmp_scalar,
2145 secrets[i], sizeof(secrets[i]));
2147 num_bytes = BN_bn2lebinpad(p_scalar,
2148 secrets[i], sizeof(secrets[i]));
2150 if (num_bytes < 0) {
2151 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2154 /* precompute multiples */
2155 if ((!BN_to_felem(x_out, p->X)) ||
2156 (!BN_to_felem(y_out, p->Y)) ||
2157 (!BN_to_felem(z_out, p->Z)))
2159 felem_shrink(pre_comp[i][1][0], x_out);
2160 felem_shrink(pre_comp[i][1][1], y_out);
2161 felem_shrink(pre_comp[i][1][2], z_out);
2162 for (j = 2; j <= 16; ++j) {
2164 point_add_small(pre_comp[i][j][0], pre_comp[i][j][1],
2165 pre_comp[i][j][2], pre_comp[i][1][0],
2166 pre_comp[i][1][1], pre_comp[i][1][2],
2167 pre_comp[i][j - 1][0],
2168 pre_comp[i][j - 1][1],
2169 pre_comp[i][j - 1][2]);
2171 point_double_small(pre_comp[i][j][0],
2174 pre_comp[i][j / 2][0],
2175 pre_comp[i][j / 2][1],
2176 pre_comp[i][j / 2][2]);
2182 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
2185 /* the scalar for the generator */
2186 if ((scalar != NULL) && (have_pre_comp)) {
2187 memset(g_secret, 0, sizeof(g_secret));
2188 /* reduce scalar to 0 <= scalar < 2^256 */
2189 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar))) {
2191 * this is an unusual input, and we don't guarantee
2194 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
2195 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2198 num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
2200 num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
2202 /* do the multiplication with generator precomputation */
2203 batch_mul(x_out, y_out, z_out,
2204 (const felem_bytearray(*))secrets, num_points,
2206 mixed, (const smallfelem(*)[17][3])pre_comp, g_pre_comp);
2208 /* do the multiplication without generator precomputation */
2209 batch_mul(x_out, y_out, z_out,
2210 (const felem_bytearray(*))secrets, num_points,
2211 NULL, mixed, (const smallfelem(*)[17][3])pre_comp, NULL);
2213 /* reduce the output to its unique minimal representation */
2214 felem_contract(x_in, x_out);
2215 felem_contract(y_in, y_out);
2216 felem_contract(z_in, z_out);
2217 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2218 (!smallfelem_to_BN(z, z_in))) {
2219 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2222 ret = ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2226 EC_POINT_free(generator);
2227 OPENSSL_free(secrets);
2228 OPENSSL_free(pre_comp);
2229 OPENSSL_free(tmp_smallfelems);
2233 int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2236 NISTP256_PRE_COMP *pre = NULL;
2239 EC_POINT *generator = NULL;
2240 smallfelem tmp_smallfelems[32];
2241 felem x_tmp, y_tmp, z_tmp;
2243 BN_CTX *new_ctx = NULL;
2246 /* throw away old precomputation */
2247 EC_pre_comp_free(group);
2251 ctx = new_ctx = BN_CTX_new();
2257 x = BN_CTX_get(ctx);
2258 y = BN_CTX_get(ctx);
2261 /* get the generator */
2262 if (group->generator == NULL)
2264 generator = EC_POINT_new(group);
2265 if (generator == NULL)
2267 BN_bin2bn(nistp256_curve_params[3], sizeof(felem_bytearray), x);
2268 BN_bin2bn(nistp256_curve_params[4], sizeof(felem_bytearray), y);
2269 if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
2271 if ((pre = nistp256_pre_comp_new()) == NULL)
2274 * if the generator is the standard one, use built-in precomputation
2276 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2277 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2280 if ((!BN_to_felem(x_tmp, group->generator->X)) ||
2281 (!BN_to_felem(y_tmp, group->generator->Y)) ||
2282 (!BN_to_felem(z_tmp, group->generator->Z)))
2284 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2285 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2286 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2288 * compute 2^64*G, 2^128*G, 2^192*G for the first table, 2^32*G, 2^96*G,
2289 * 2^160*G, 2^224*G for the second one
2291 for (i = 1; i <= 8; i <<= 1) {
2292 point_double_small(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2293 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
2294 pre->g_pre_comp[0][i][1],
2295 pre->g_pre_comp[0][i][2]);
2296 for (j = 0; j < 31; ++j) {
2297 point_double_small(pre->g_pre_comp[1][i][0],
2298 pre->g_pre_comp[1][i][1],
2299 pre->g_pre_comp[1][i][2],
2300 pre->g_pre_comp[1][i][0],
2301 pre->g_pre_comp[1][i][1],
2302 pre->g_pre_comp[1][i][2]);
2306 point_double_small(pre->g_pre_comp[0][2 * i][0],
2307 pre->g_pre_comp[0][2 * i][1],
2308 pre->g_pre_comp[0][2 * i][2],
2309 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2310 pre->g_pre_comp[1][i][2]);
2311 for (j = 0; j < 31; ++j) {
2312 point_double_small(pre->g_pre_comp[0][2 * i][0],
2313 pre->g_pre_comp[0][2 * i][1],
2314 pre->g_pre_comp[0][2 * i][2],
2315 pre->g_pre_comp[0][2 * i][0],
2316 pre->g_pre_comp[0][2 * i][1],
2317 pre->g_pre_comp[0][2 * i][2]);
2320 for (i = 0; i < 2; i++) {
2321 /* g_pre_comp[i][0] is the point at infinity */
2322 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2323 /* the remaining multiples */
2324 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2325 point_add_small(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
2326 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
2327 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2328 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2329 pre->g_pre_comp[i][2][2]);
2330 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2331 point_add_small(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
2332 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
2333 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2334 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2335 pre->g_pre_comp[i][2][2]);
2336 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2337 point_add_small(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
2338 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
2339 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2340 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
2341 pre->g_pre_comp[i][4][2]);
2343 * 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G
2345 point_add_small(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
2346 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
2347 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2348 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2349 pre->g_pre_comp[i][2][2]);
2350 for (j = 1; j < 8; ++j) {
2351 /* odd multiples: add G resp. 2^32*G */
2352 point_add_small(pre->g_pre_comp[i][2 * j + 1][0],
2353 pre->g_pre_comp[i][2 * j + 1][1],
2354 pre->g_pre_comp[i][2 * j + 1][2],
2355 pre->g_pre_comp[i][2 * j][0],
2356 pre->g_pre_comp[i][2 * j][1],
2357 pre->g_pre_comp[i][2 * j][2],
2358 pre->g_pre_comp[i][1][0],
2359 pre->g_pre_comp[i][1][1],
2360 pre->g_pre_comp[i][1][2]);
2363 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2366 SETPRECOMP(group, nistp256, pre);
2372 EC_POINT_free(generator);
2374 BN_CTX_free(new_ctx);
2376 EC_nistp256_pre_comp_free(pre);
2380 int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2382 return HAVEPRECOMP(group, nistp256);