1 /* crypto/ec/ecp_nistp256.c */
3 * Written by Adam Langley (Google) for the OpenSSL project
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
29 #ifdef EC_NISTP_64_GCC_128
33 #include <openssl/err.h>
36 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
37 /* even with gcc, the typedef won't work for 32-bit platforms */
38 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
39 typedef __int128_t int128_t;
41 #error "Need GCC 3.1 or later to define type uint128_t"
49 /* The underlying field.
51 * P256 operates over GF(2^256-2^224+2^192+2^96-1). We can serialise an element
52 * of this field into 32 bytes. We call this an felem_bytearray. */
54 typedef u8 felem_bytearray[32];
56 /* These are the parameters of P256, taken from FIPS 186-3, page 86. These
57 * values are big-endian. */
58 static const felem_bytearray nistp256_curve_params[5] = {
59 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
60 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
61 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
62 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
63 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
64 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
65 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
67 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
68 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
69 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
70 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
71 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
72 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
73 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
74 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
75 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
76 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
77 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
78 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
81 /* The representation of field elements.
82 * ------------------------------------
84 * We represent field elements with either four 128-bit values, eight 128-bit
85 * values, or four 64-bit values. The field element represented is:
86 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
88 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
90 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
91 * apart, but are 128-bits wide, the most significant bits of each limb overlap
92 * with the least significant bits of the next.
94 * A field element with four limbs is an 'felem'. One with eight limbs is a
97 * A field element with four, 64-bit values is called a 'smallfelem'. Small
98 * values are used as intermediate values before multiplication.
103 typedef uint128_t limb;
104 typedef limb felem[NLIMBS];
105 typedef limb longfelem[NLIMBS * 2];
106 typedef u64 smallfelem[NLIMBS];
108 /* This is the value of the prime as four 64-bit words, little-endian. */
109 static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
110 static const limb bottom32bits = 0xffffffff;
111 static const u64 bottom63bits = 0x7ffffffffffffffful;
113 /* bin32_to_felem takes a little-endian byte array and converts it into felem
114 * form. This assumes that the CPU is little-endian. */
115 static void bin32_to_felem(felem out, const u8 in[32])
117 out[0] = *((u64*) &in[0]);
118 out[1] = *((u64*) &in[8]);
119 out[2] = *((u64*) &in[16]);
120 out[3] = *((u64*) &in[24]);
123 /* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian,
124 * 32 byte array. This assumes that the CPU is little-endian. */
125 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
127 *((u64*) &out[0]) = in[0];
128 *((u64*) &out[8]) = in[1];
129 *((u64*) &out[16]) = in[2];
130 *((u64*) &out[24]) = in[3];
133 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
134 static void flip_endian(u8 *out, const u8 *in, unsigned len)
137 for (i = 0; i < len; ++i)
138 out[i] = in[len-1-i];
141 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
142 static int BN_to_felem(felem out, const BIGNUM *bn)
144 felem_bytearray b_in;
145 felem_bytearray b_out;
148 /* BN_bn2bin eats leading zeroes */
149 memset(b_out, 0, sizeof b_out);
150 num_bytes = BN_num_bytes(bn);
151 if (num_bytes > sizeof b_out)
153 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
156 if (BN_is_negative(bn))
158 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
161 num_bytes = BN_bn2bin(bn, b_in);
162 flip_endian(b_out, b_in, num_bytes);
163 bin32_to_felem(out, b_out);
167 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
168 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
170 felem_bytearray b_in, b_out;
171 smallfelem_to_bin32(b_in, in);
172 flip_endian(b_out, b_in, sizeof b_out);
173 return BN_bin2bn(b_out, sizeof b_out, out);
178 * ---------------- */
180 static void smallfelem_one(smallfelem out)
188 static void smallfelem_assign(smallfelem out, const smallfelem in)
196 static void felem_assign(felem out, const felem in)
204 /* felem_sum sets out = out + in. */
205 static void felem_sum(felem out, const felem in)
213 /* felem_small_sum sets out = out + in. */
214 static void felem_small_sum(felem out, const smallfelem in)
222 /* felem_scalar sets out = out * scalar */
223 static void felem_scalar(felem out, const u64 scalar)
231 /* longfelem_scalar sets out = out * scalar */
232 static void longfelem_scalar(longfelem out, const u64 scalar)
244 #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
245 #define two105 (((limb)1) << 105)
246 #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
248 /* zero105 is 0 mod p */
249 static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
251 /* smallfelem_neg sets |out| to |-small|
253 * out[i] < out[i] + 2^105
255 static void smallfelem_neg(felem out, const smallfelem small)
257 /* In order to prevent underflow, we subtract from 0 mod p. */
258 out[0] = zero105[0] - small[0];
259 out[1] = zero105[1] - small[1];
260 out[2] = zero105[2] - small[2];
261 out[3] = zero105[3] - small[3];
264 /* felem_diff subtracts |in| from |out|
268 * out[i] < out[i] + 2^105
270 static void felem_diff(felem out, const felem in)
272 /* In order to prevent underflow, we add 0 mod p before subtracting. */
273 out[0] += zero105[0];
274 out[1] += zero105[1];
275 out[2] += zero105[2];
276 out[3] += zero105[3];
284 #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
285 #define two107 (((limb)1) << 107)
286 #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
288 /* zero107 is 0 mod p */
289 static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 };
291 /* An alternative felem_diff for larger inputs |in|
292 * felem_diff_zero107 subtracts |in| from |out|
296 * out[i] < out[i] + 2^107
298 static void felem_diff_zero107(felem out, const felem in)
300 /* In order to prevent underflow, we add 0 mod p before subtracting. */
301 out[0] += zero107[0];
302 out[1] += zero107[1];
303 out[2] += zero107[2];
304 out[3] += zero107[3];
312 /* longfelem_diff subtracts |in| from |out|
316 * out[i] < out[i] + 2^70 + 2^40
318 static void longfelem_diff(longfelem out, const longfelem in)
320 static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6);
321 static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40);
322 static const limb two70 = (((limb)1) << 70);
323 static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6);
324 static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6);
326 /* add 0 mod p to avoid underflow */
330 out[3] += two70m40m38p6;
336 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
347 #define two64m0 (((limb)1) << 64) - 1
348 #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
349 #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
350 #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
352 /* zero110 is 0 mod p */
353 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
355 /* felem_shrink converts an felem into a smallfelem. The result isn't quite
356 * minimal as the value may be greater than p.
363 static void felem_shrink(smallfelem out, const felem in)
367 tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64));
370 tmp[2] = zero110[2] + (u64) in[2];
371 tmp[0] = zero110[0] + in[0];
372 tmp[1] = zero110[1] + in[1];
373 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
375 /* We perform two partial reductions where we eliminate the
376 * high-word of tmp[3]. We don't update the other words till the end.
378 u64 a = tmp[3] >> 64; /* a < 2^46 */
379 tmp[3] = (u64) tmp[3];
381 tmp[3] += ((limb)a) << 32;
385 a = tmp[3] >> 64; /* a < 2^15 */
386 b += a; /* b < 2^46 + 2^15 < 2^47 */
387 tmp[3] = (u64) tmp[3];
389 tmp[3] += ((limb)a) << 32;
390 /* tmp[3] < 2^64 + 2^47 */
392 /* This adjusts the other two words to complete the two partial
395 tmp[1] -= (((limb)b) << 32);
397 /* In order to make space in tmp[3] for the carry from 2 -> 3, we
398 * conditionally subtract kPrime if tmp[3] is large enough. */
399 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
400 s64 high = tmp[3] >> 64;
401 /* As tmp[3] < 2^65, high is either 1 or 0 */
405 * all ones if the high word of tmp[3] is 1
406 * all zeros if the high word of tmp[3] if 0 */
408 u64 mask = low >> 63;
410 * all ones if the MSB of low is 1
411 * all zeros if the MSB of low if 0 */
414 /* if low was greater than kPrime3Test then the MSB is zero */
418 * all ones if low was > kPrime3Test
419 * all zeros if low was <= kPrime3Test */
420 mask = (mask & low) | high;
421 tmp[0] -= mask & kPrime[0];
422 tmp[1] -= mask & kPrime[1];
423 /* kPrime[2] is zero, so omitted */
424 tmp[3] -= mask & kPrime[3];
425 /* tmp[3] < 2**64 - 2**32 + 1 */
427 tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0];
428 tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1];
429 tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2];
438 /* smallfelem_expand converts a smallfelem to an felem */
439 static void smallfelem_expand(felem out, const smallfelem in)
447 /* smallfelem_square sets |out| = |small|^2
451 * out[i] < 7 * 2^64 < 2^67
453 static void smallfelem_square(longfelem out, const smallfelem small)
458 a = ((uint128_t) small[0]) * small[0];
464 a = ((uint128_t) small[0]) * small[1];
471 a = ((uint128_t) small[0]) * small[2];
478 a = ((uint128_t) small[0]) * small[3];
484 a = ((uint128_t) small[1]) * small[2];
491 a = ((uint128_t) small[1]) * small[1];
497 a = ((uint128_t) small[1]) * small[3];
504 a = ((uint128_t) small[2]) * small[3];
512 a = ((uint128_t) small[2]) * small[2];
518 a = ((uint128_t) small[3]) * small[3];
525 /* felem_square sets |out| = |in|^2
529 * out[i] < 7 * 2^64 < 2^67
531 static void felem_square(longfelem out, const felem in)
534 felem_shrink(small, in);
535 smallfelem_square(out, small);
538 /* smallfelem_mul sets |out| = |small1| * |small2|
543 * out[i] < 7 * 2^64 < 2^67
545 static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2)
550 a = ((uint128_t) small1[0]) * small2[0];
557 a = ((uint128_t) small1[0]) * small2[1];
563 a = ((uint128_t) small1[1]) * small2[0];
570 a = ((uint128_t) small1[0]) * small2[2];
576 a = ((uint128_t) small1[1]) * small2[1];
582 a = ((uint128_t) small1[2]) * small2[0];
589 a = ((uint128_t) small1[0]) * small2[3];
595 a = ((uint128_t) small1[1]) * small2[2];
601 a = ((uint128_t) small1[2]) * small2[1];
607 a = ((uint128_t) small1[3]) * small2[0];
614 a = ((uint128_t) small1[1]) * small2[3];
620 a = ((uint128_t) small1[2]) * small2[2];
626 a = ((uint128_t) small1[3]) * small2[1];
633 a = ((uint128_t) small1[2]) * small2[3];
639 a = ((uint128_t) small1[3]) * small2[2];
646 a = ((uint128_t) small1[3]) * small2[3];
653 /* felem_mul sets |out| = |in1| * |in2|
658 * out[i] < 7 * 2^64 < 2^67
660 static void felem_mul(longfelem out, const felem in1, const felem in2)
662 smallfelem small1, small2;
663 felem_shrink(small1, in1);
664 felem_shrink(small2, in2);
665 smallfelem_mul(out, small1, small2);
668 /* felem_small_mul sets |out| = |small1| * |in2|
673 * out[i] < 7 * 2^64 < 2^67
675 static void felem_small_mul(longfelem out, const smallfelem small1, const felem in2)
678 felem_shrink(small2, in2);
679 smallfelem_mul(out, small1, small2);
682 #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
683 #define two100 (((limb)1) << 100)
684 #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
685 /* zero100 is 0 mod p */
686 static const felem zero100 = { two100m36m4, two100, two100m36p4, two100m36p4 };
688 /* Internal function for the different flavours of felem_reduce.
689 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
691 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
692 * out[1] >= in[7] + 2^32*in[4]
693 * out[2] >= in[5] + 2^32*in[5]
694 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
696 * out[0] <= out[0] + in[4] + 2^32*in[5]
697 * out[1] <= out[1] + in[5] + 2^33*in[6]
698 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
699 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
701 static void felem_reduce_(felem out, const longfelem in)
704 /* combine common terms from below */
705 c = in[4] + (in[5] << 32);
713 /* the remaining terms */
714 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
715 out[1] -= (in[4] << 32);
716 out[3] += (in[4] << 32);
718 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
719 out[2] -= (in[5] << 32);
721 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
723 out[0] -= (in[6] << 32);
724 out[1] += (in[6] << 33);
725 out[2] += (in[6] * 2);
726 out[3] -= (in[6] << 32);
728 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
730 out[0] -= (in[7] << 32);
731 out[2] += (in[7] << 33);
732 out[3] += (in[7] * 3);
735 /* felem_reduce converts a longfelem into an felem.
736 * To be called directly after felem_square or felem_mul.
738 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
739 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
743 static void felem_reduce(felem out, const longfelem in)
745 out[0] = zero100[0] + in[0];
746 out[1] = zero100[1] + in[1];
747 out[2] = zero100[2] + in[2];
748 out[3] = zero100[3] + in[3];
750 felem_reduce_(out, in);
752 /* out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
753 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
754 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
755 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
757 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
758 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
759 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
760 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
764 /* felem_reduce_zero105 converts a larger longfelem into an felem.
770 static void felem_reduce_zero105(felem out, const longfelem in)
772 out[0] = zero105[0] + in[0];
773 out[1] = zero105[1] + in[1];
774 out[2] = zero105[2] + in[2];
775 out[3] = zero105[3] + in[3];
777 felem_reduce_(out, in);
779 /* out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
780 * out[1] > 2^105 - 2^71 - 2^103 > 0
781 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
782 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
784 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
785 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
786 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
787 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
791 /* subtract_u64 sets *result = *result - v and *carry to one if the subtraction
793 static void subtract_u64(u64* result, u64* carry, u64 v)
795 uint128_t r = *result;
797 *carry = (r >> 64) & 1;
801 /* felem_contract converts |in| to its unique, minimal representation.
805 static void felem_contract(smallfelem out, const felem in)
808 u64 all_equal_so_far = 0, result = 0, carry;
810 felem_shrink(out, in);
811 /* small is minimal except that the value might be > p */
814 /* We are doing a constant time test if out >= kPrime. We need to
815 * compare each u64, from most-significant to least significant. For
816 * each one, if all words so far have been equal (m is all ones) then a
817 * non-equal result is the answer. Otherwise we continue. */
818 for (i = 3; i < 4; i--) {
819 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
820 /* if out[i] > kPrime[i] then a will underflow and the high
821 * 64-bits will all be set. */
822 result |= all_equal_so_far & ((u64) (a >> 64));
824 /* if kPrime[i] == out[i] then |equal| will be all zeros and
825 * the decrement will make it all ones. */
826 u64 equal = kPrime[i] ^ out[i];
828 equal &= equal << 32;
829 equal &= equal << 16;
834 equal = ((s64) equal) >> 63;
836 all_equal_so_far &= equal;
839 /* if all_equal_so_far is still all ones then the two values are equal
840 * and so out >= kPrime is true. */
841 result |= all_equal_so_far;
843 /* if out >= kPrime then we subtract kPrime. */
844 subtract_u64(&out[0], &carry, result & kPrime[0]);
845 subtract_u64(&out[1], &carry, carry);
846 subtract_u64(&out[2], &carry, carry);
847 subtract_u64(&out[3], &carry, carry);
849 subtract_u64(&out[1], &carry, result & kPrime[1]);
850 subtract_u64(&out[2], &carry, carry);
851 subtract_u64(&out[3], &carry, carry);
853 subtract_u64(&out[2], &carry, result & kPrime[2]);
854 subtract_u64(&out[3], &carry, carry);
856 subtract_u64(&out[3], &carry, result & kPrime[3]);
859 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
864 smallfelem_square(longtmp, in);
865 felem_reduce(tmp, longtmp);
866 felem_contract(out, tmp);
869 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1, const smallfelem in2)
874 smallfelem_mul(longtmp, in1, in2);
875 felem_reduce(tmp, longtmp);
876 felem_contract(out, tmp);
879 /* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
884 static limb smallfelem_is_zero(const smallfelem small)
888 u64 is_zero = small[0] | small[1] | small[2] | small[3];
890 is_zero &= is_zero << 32;
891 is_zero &= is_zero << 16;
892 is_zero &= is_zero << 8;
893 is_zero &= is_zero << 4;
894 is_zero &= is_zero << 2;
895 is_zero &= is_zero << 1;
896 is_zero = ((s64) is_zero) >> 63;
898 u64 is_p = (small[0] ^ kPrime[0]) |
899 (small[1] ^ kPrime[1]) |
900 (small[2] ^ kPrime[2]) |
901 (small[3] ^ kPrime[3]);
909 is_p = ((s64) is_p) >> 63;
914 result |= ((limb) is_zero) << 64;
918 static int smallfelem_is_zero_int(const smallfelem small)
920 return (int) (smallfelem_is_zero(small) & ((limb)1));
923 /* felem_inv calculates |out| = |in|^{-1}
925 * Based on Fermat's Little Theorem:
927 * a^{p-1} = 1 (mod p)
928 * a^{p-2} = a^{-1} (mod p)
930 static void felem_inv(felem out, const felem in)
933 /* each e_I will hold |in|^{2^I - 1} */
934 felem e2, e4, e8, e16, e32, e64;
938 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
939 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
940 felem_assign(e2, ftmp);
941 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
942 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
943 felem_mul(tmp, ftmp, e2); felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
944 felem_assign(e4, ftmp);
945 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
946 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
947 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
948 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
949 felem_mul(tmp, ftmp, e4); felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
950 felem_assign(e8, ftmp);
951 for (i = 0; i < 8; i++) {
952 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
954 felem_mul(tmp, ftmp, e8); felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
955 felem_assign(e16, ftmp);
956 for (i = 0; i < 16; i++) {
957 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
959 felem_mul(tmp, ftmp, e16); felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
960 felem_assign(e32, ftmp);
961 for (i = 0; i < 32; i++) {
962 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
964 felem_assign(e64, ftmp);
965 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
966 for (i = 0; i < 192; i++) {
967 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
968 } /* 2^256 - 2^224 + 2^192 */
970 felem_mul(tmp, e64, e32); felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
971 for (i = 0; i < 16; i++) {
972 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
974 felem_mul(tmp, ftmp2, e16); felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
975 for (i = 0; i < 8; i++) {
976 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
978 felem_mul(tmp, ftmp2, e8); felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
979 for (i = 0; i < 4; i++) {
980 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
982 felem_mul(tmp, ftmp2, e4); felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
983 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
984 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
985 felem_mul(tmp, ftmp2, e2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
986 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
987 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
988 felem_mul(tmp, ftmp2, in); felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
990 felem_mul(tmp, ftmp2, ftmp); felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
993 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
997 smallfelem_expand(tmp, in);
999 felem_contract(out, tmp);
1005 * Building on top of the field operations we have the operations on the
1006 * elliptic curve group itself. Points on the curve are represented in Jacobian
1009 /* point_double calculates 2*(x_in, y_in, z_in)
1011 * The method is taken from:
1012 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1014 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1015 * while x_out == y_in is not (maybe this works, but it's not tested). */
1017 point_double(felem x_out, felem y_out, felem z_out,
1018 const felem x_in, const felem y_in, const felem z_in)
1020 longfelem tmp, tmp2;
1021 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1022 smallfelem small1, small2;
1024 felem_assign(ftmp, x_in);
1025 /* ftmp[i] < 2^106 */
1026 felem_assign(ftmp2, x_in);
1027 /* ftmp2[i] < 2^106 */
1030 felem_square(tmp, z_in);
1031 felem_reduce(delta, tmp);
1032 /* delta[i] < 2^101 */
1035 felem_square(tmp, y_in);
1036 felem_reduce(gamma, tmp);
1037 /* gamma[i] < 2^101 */
1038 felem_shrink(small1, gamma);
1040 /* beta = x*gamma */
1041 felem_small_mul(tmp, small1, x_in);
1042 felem_reduce(beta, tmp);
1043 /* beta[i] < 2^101 */
1045 /* alpha = 3*(x-delta)*(x+delta) */
1046 felem_diff(ftmp, delta);
1047 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1048 felem_sum(ftmp2, delta);
1049 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1050 felem_scalar(ftmp2, 3);
1051 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1052 felem_mul(tmp, ftmp, ftmp2);
1053 felem_reduce(alpha, tmp);
1054 /* alpha[i] < 2^101 */
1055 felem_shrink(small2, alpha);
1057 /* x' = alpha^2 - 8*beta */
1058 smallfelem_square(tmp, small2);
1059 felem_reduce(x_out, tmp);
1060 felem_assign(ftmp, beta);
1061 felem_scalar(ftmp, 8);
1062 /* ftmp[i] < 8 * 2^101 = 2^104 */
1063 felem_diff(x_out, ftmp);
1064 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1066 /* z' = (y + z)^2 - gamma - delta */
1067 felem_sum(delta, gamma);
1068 /* delta[i] < 2^101 + 2^101 = 2^102 */
1069 felem_assign(ftmp, y_in);
1070 felem_sum(ftmp, z_in);
1071 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1072 felem_square(tmp, ftmp);
1073 felem_reduce(z_out, tmp);
1074 felem_diff(z_out, delta);
1075 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1077 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1078 felem_scalar(beta, 4);
1079 /* beta[i] < 4 * 2^101 = 2^103 */
1080 felem_diff_zero107(beta, x_out);
1081 /* beta[i] < 2^107 + 2^103 < 2^108 */
1082 felem_small_mul(tmp, small2, beta);
1083 /* tmp[i] < 7 * 2^64 < 2^67 */
1084 smallfelem_square(tmp2, small1);
1085 /* tmp2[i] < 7 * 2^64 */
1086 longfelem_scalar(tmp2, 8);
1087 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1088 longfelem_diff(tmp, tmp2);
1089 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1090 felem_reduce_zero105(y_out, tmp);
1091 /* y_out[i] < 2^106 */
1094 /* point_double_small is the same as point_double, except that it operates on
1097 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1098 const smallfelem x_in, const smallfelem y_in, const smallfelem z_in)
1100 felem felem_x_out, felem_y_out, felem_z_out;
1101 felem felem_x_in, felem_y_in, felem_z_in;
1103 smallfelem_expand(felem_x_in, x_in);
1104 smallfelem_expand(felem_y_in, y_in);
1105 smallfelem_expand(felem_z_in, z_in);
1106 point_double(felem_x_out, felem_y_out, felem_z_out,
1107 felem_x_in, felem_y_in, felem_z_in);
1108 felem_shrink(x_out, felem_x_out);
1109 felem_shrink(y_out, felem_y_out);
1110 felem_shrink(z_out, felem_z_out);
1113 /* copy_conditional copies in to out iff mask is all ones. */
1115 copy_conditional(felem out, const felem in, limb mask)
1118 for (i = 0; i < NLIMBS; ++i)
1120 const limb tmp = mask & (in[i] ^ out[i]);
1125 /* copy_small_conditional copies in to out iff mask is all ones. */
1127 copy_small_conditional(felem out, const smallfelem in, limb mask)
1130 const u64 mask64 = mask;
1131 for (i = 0; i < NLIMBS; ++i)
1133 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1137 /* point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1139 * The method is taken from:
1140 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1141 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1143 * This function includes a branch for checking whether the two input points
1144 * are equal, (while not equal to the point at infinity). This case never
1145 * happens during single point multiplication, so there is no timing leak for
1146 * ECDH or ECDSA signing. */
1147 static void point_add(felem x3, felem y3, felem z3,
1148 const felem x1, const felem y1, const felem z1,
1149 const int mixed, const smallfelem x2, const smallfelem y2, const smallfelem z2)
1151 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1152 longfelem tmp, tmp2;
1153 smallfelem small1, small2, small3, small4, small5;
1154 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1156 felem_shrink(small3, z1);
1158 z1_is_zero = smallfelem_is_zero(small3);
1159 z2_is_zero = smallfelem_is_zero(z2);
1161 /* ftmp = z1z1 = z1**2 */
1162 smallfelem_square(tmp, small3);
1163 felem_reduce(ftmp, tmp);
1164 /* ftmp[i] < 2^101 */
1165 felem_shrink(small1, ftmp);
1169 /* ftmp2 = z2z2 = z2**2 */
1170 smallfelem_square(tmp, z2);
1171 felem_reduce(ftmp2, tmp);
1172 /* ftmp2[i] < 2^101 */
1173 felem_shrink(small2, ftmp2);
1175 felem_shrink(small5, x1);
1177 /* u1 = ftmp3 = x1*z2z2 */
1178 smallfelem_mul(tmp, small5, small2);
1179 felem_reduce(ftmp3, tmp);
1180 /* ftmp3[i] < 2^101 */
1182 /* ftmp5 = z1 + z2 */
1183 felem_assign(ftmp5, z1);
1184 felem_small_sum(ftmp5, z2);
1185 /* ftmp5[i] < 2^107 */
1187 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1188 felem_square(tmp, ftmp5);
1189 felem_reduce(ftmp5, tmp);
1190 /* ftmp2 = z2z2 + z1z1 */
1191 felem_sum(ftmp2, ftmp);
1192 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1193 felem_diff(ftmp5, ftmp2);
1194 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1196 /* ftmp2 = z2 * z2z2 */
1197 smallfelem_mul(tmp, small2, z2);
1198 felem_reduce(ftmp2, tmp);
1200 /* s1 = ftmp2 = y1 * z2**3 */
1201 felem_mul(tmp, y1, ftmp2);
1202 felem_reduce(ftmp6, tmp);
1203 /* ftmp6[i] < 2^101 */
1207 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1209 /* u1 = ftmp3 = x1*z2z2 */
1210 felem_assign(ftmp3, x1);
1211 /* ftmp3[i] < 2^106 */
1214 felem_assign(ftmp5, z1);
1215 felem_scalar(ftmp5, 2);
1216 /* ftmp5[i] < 2*2^106 = 2^107 */
1218 /* s1 = ftmp2 = y1 * z2**3 */
1219 felem_assign(ftmp6, y1);
1220 /* ftmp6[i] < 2^106 */
1224 smallfelem_mul(tmp, x2, small1);
1225 felem_reduce(ftmp4, tmp);
1227 /* h = ftmp4 = u2 - u1 */
1228 felem_diff_zero107(ftmp4, ftmp3);
1229 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1230 felem_shrink(small4, ftmp4);
1232 x_equal = smallfelem_is_zero(small4);
1234 /* z_out = ftmp5 * h */
1235 felem_small_mul(tmp, small4, ftmp5);
1236 felem_reduce(z_out, tmp);
1237 /* z_out[i] < 2^101 */
1239 /* ftmp = z1 * z1z1 */
1240 smallfelem_mul(tmp, small1, small3);
1241 felem_reduce(ftmp, tmp);
1243 /* s2 = tmp = y2 * z1**3 */
1244 felem_small_mul(tmp, y2, ftmp);
1245 felem_reduce(ftmp5, tmp);
1247 /* r = ftmp5 = (s2 - s1)*2 */
1248 felem_diff_zero107(ftmp5, ftmp6);
1249 /* ftmp5[i] < 2^107 + 2^107 = 2^108*/
1250 felem_scalar(ftmp5, 2);
1251 /* ftmp5[i] < 2^109 */
1252 felem_shrink(small1, ftmp5);
1253 y_equal = smallfelem_is_zero(small1);
1255 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1257 point_double(x3, y3, z3, x1, y1, z1);
1261 /* I = ftmp = (2h)**2 */
1262 felem_assign(ftmp, ftmp4);
1263 felem_scalar(ftmp, 2);
1264 /* ftmp[i] < 2*2^108 = 2^109 */
1265 felem_square(tmp, ftmp);
1266 felem_reduce(ftmp, tmp);
1268 /* J = ftmp2 = h * I */
1269 felem_mul(tmp, ftmp4, ftmp);
1270 felem_reduce(ftmp2, tmp);
1272 /* V = ftmp4 = U1 * I */
1273 felem_mul(tmp, ftmp3, ftmp);
1274 felem_reduce(ftmp4, tmp);
1276 /* x_out = r**2 - J - 2V */
1277 smallfelem_square(tmp, small1);
1278 felem_reduce(x_out, tmp);
1279 felem_assign(ftmp3, ftmp4);
1280 felem_scalar(ftmp4, 2);
1281 felem_sum(ftmp4, ftmp2);
1282 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1283 felem_diff(x_out, ftmp4);
1284 /* x_out[i] < 2^105 + 2^101 */
1286 /* y_out = r(V-x_out) - 2 * s1 * J */
1287 felem_diff_zero107(ftmp3, x_out);
1288 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1289 felem_small_mul(tmp, small1, ftmp3);
1290 felem_mul(tmp2, ftmp6, ftmp2);
1291 longfelem_scalar(tmp2, 2);
1292 /* tmp2[i] < 2*2^67 = 2^68 */
1293 longfelem_diff(tmp, tmp2);
1294 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1295 felem_reduce_zero105(y_out, tmp);
1296 /* y_out[i] < 2^106 */
1298 copy_small_conditional(x_out, x2, z1_is_zero);
1299 copy_conditional(x_out, x1, z2_is_zero);
1300 copy_small_conditional(y_out, y2, z1_is_zero);
1301 copy_conditional(y_out, y1, z2_is_zero);
1302 copy_small_conditional(z_out, z2, z1_is_zero);
1303 copy_conditional(z_out, z1, z2_is_zero);
1304 felem_assign(x3, x_out);
1305 felem_assign(y3, y_out);
1306 felem_assign(z3, z_out);
1309 /* point_add_small is the same as point_add, except that it operates on
1311 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1312 smallfelem x1, smallfelem y1, smallfelem z1,
1313 smallfelem x2, smallfelem y2, smallfelem z2)
1315 felem felem_x3, felem_y3, felem_z3;
1316 felem felem_x1, felem_y1, felem_z1;
1317 smallfelem_expand(felem_x1, x1);
1318 smallfelem_expand(felem_y1, y1);
1319 smallfelem_expand(felem_z1, z1);
1320 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0, x2, y2, z2);
1321 felem_shrink(x3, felem_x3);
1322 felem_shrink(y3, felem_y3);
1323 felem_shrink(z3, felem_z3);
1326 /* Base point pre computation
1327 * --------------------------
1329 * Two different sorts of precomputed tables are used in the following code.
1330 * Each contain various points on the curve, where each point is three field
1331 * elements (x, y, z).
1333 * For the base point table, z is usually 1 (0 for the point at infinity).
1334 * This table has 2 * 16 elements, starting with the following:
1335 * index | bits | point
1336 * ------+---------+------------------------------
1339 * 2 | 0 0 1 0 | 2^64G
1340 * 3 | 0 0 1 1 | (2^64 + 1)G
1341 * 4 | 0 1 0 0 | 2^128G
1342 * 5 | 0 1 0 1 | (2^128 + 1)G
1343 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1344 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1345 * 8 | 1 0 0 0 | 2^192G
1346 * 9 | 1 0 0 1 | (2^192 + 1)G
1347 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1348 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1349 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1350 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1351 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1352 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1353 * followed by a copy of this with each element multiplied by 2^32.
1355 * The reason for this is so that we can clock bits into four different
1356 * locations when doing simple scalar multiplies against the base point,
1357 * and then another four locations using the second 16 elements.
1359 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1361 /* gmul is the table of precomputed base points */
1362 static const smallfelem gmul[2][16][3] =
1366 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247},
1367 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b},
1369 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5},
1370 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d},
1372 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f},
1373 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644},
1375 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67},
1376 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee},
1378 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff},
1379 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b},
1381 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8},
1382 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851},
1384 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea},
1385 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b},
1387 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276},
1388 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816},
1390 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad},
1391 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663},
1393 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d},
1394 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321},
1396 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287},
1397 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6},
1399 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466},
1400 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20},
1402 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9},
1403 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61},
1405 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a},
1406 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc},
1408 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c},
1409 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab},
1414 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89},
1415 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624},
1417 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6},
1418 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1},
1420 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a},
1421 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593},
1423 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617},
1424 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7},
1426 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276},
1427 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a},
1429 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908},
1430 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e},
1432 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7},
1433 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec},
1435 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee},
1436 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6},
1438 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109},
1439 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5},
1441 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba},
1442 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44},
1444 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b},
1445 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc},
1447 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107},
1448 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387},
1450 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503},
1451 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be},
1453 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9},
1454 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a},
1456 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6},
1457 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81},
1460 /* select_point selects the |index|th point from a precomputation table and
1461 * copies it to out. */
1462 static void select_point(const u64 index, unsigned int size, const smallfelem pre_comp[16][3], smallfelem out[3])
1465 u64 *outlimbs = &out[0][0];
1466 memset(outlimbs, 0, 3 * sizeof(smallfelem));
1468 for (i = 0; i < size; i++)
1470 const u64 *inlimbs = (u64*) &pre_comp[i][0][0];
1471 u64 mask = i ^ index;
1477 for (j = 0; j < NLIMBS * 3; j++)
1478 outlimbs[j] |= inlimbs[j] & mask;
1482 /* get_bit returns the |i|th bit in |in| */
1483 static char get_bit(const felem_bytearray in, int i)
1485 if ((i < 0) || (i >= 256))
1487 return (in[i >> 3] >> (i & 7)) & 1;
1490 /* Interleaved point multiplication using precomputed point multiples:
1491 * The small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[],
1492 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1493 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1494 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1495 static void batch_mul(felem x_out, felem y_out, felem z_out,
1496 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1497 const int mixed, const smallfelem pre_comp[][17][3], const smallfelem g_pre_comp[2][16][3])
1500 unsigned num, gen_mul = (g_scalar != NULL);
1506 /* set nq to the point at infinity */
1507 memset(nq, 0, 3 * sizeof(felem));
1509 /* Loop over all scalars msb-to-lsb, interleaving additions
1510 * of multiples of the generator (two in each of the last 32 rounds)
1511 * and additions of other points multiples (every 5th round).
1513 skip = 1; /* save two point operations in the first round */
1514 for (i = (num_points ? 255 : 31); i >= 0; --i)
1518 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1520 /* add multiples of the generator */
1521 if (gen_mul && (i <= 31))
1523 /* first, look 32 bits upwards */
1524 bits = get_bit(g_scalar, i + 224) << 3;
1525 bits |= get_bit(g_scalar, i + 160) << 2;
1526 bits |= get_bit(g_scalar, i + 96) << 1;
1527 bits |= get_bit(g_scalar, i + 32);
1528 /* select the point to add, in constant time */
1529 select_point(bits, 16, g_pre_comp[1], tmp);
1533 point_add(nq[0], nq[1], nq[2],
1534 nq[0], nq[1], nq[2],
1535 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1539 smallfelem_expand(nq[0], tmp[0]);
1540 smallfelem_expand(nq[1], tmp[1]);
1541 smallfelem_expand(nq[2], tmp[2]);
1545 /* second, look at the current position */
1546 bits = get_bit(g_scalar, i + 192) << 3;
1547 bits |= get_bit(g_scalar, i + 128) << 2;
1548 bits |= get_bit(g_scalar, i + 64) << 1;
1549 bits |= get_bit(g_scalar, i);
1550 /* select the point to add, in constant time */
1551 select_point(bits, 16, g_pre_comp[0], tmp);
1552 point_add(nq[0], nq[1], nq[2],
1553 nq[0], nq[1], nq[2],
1554 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1557 /* do other additions every 5 doublings */
1558 if (num_points && (i % 5 == 0))
1560 /* loop over all scalars */
1561 for (num = 0; num < num_points; ++num)
1563 bits = get_bit(scalars[num], i + 4) << 5;
1564 bits |= get_bit(scalars[num], i + 3) << 4;
1565 bits |= get_bit(scalars[num], i + 2) << 3;
1566 bits |= get_bit(scalars[num], i + 1) << 2;
1567 bits |= get_bit(scalars[num], i) << 1;
1568 bits |= get_bit(scalars[num], i - 1);
1569 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1571 /* select the point to add or subtract, in constant time */
1572 select_point(digit, 17, pre_comp[num], tmp);
1573 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative point */
1574 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1575 felem_contract(tmp[1], ftmp);
1579 point_add(nq[0], nq[1], nq[2],
1580 nq[0], nq[1], nq[2],
1581 mixed, tmp[0], tmp[1], tmp[2]);
1585 smallfelem_expand(nq[0], tmp[0]);
1586 smallfelem_expand(nq[1], tmp[1]);
1587 smallfelem_expand(nq[2], tmp[2]);
1593 felem_assign(x_out, nq[0]);
1594 felem_assign(y_out, nq[1]);
1595 felem_assign(z_out, nq[2]);
1598 /* Precomputation for the group generator. */
1600 smallfelem g_pre_comp[2][16][3];
1602 } NISTP256_PRE_COMP;
1604 const EC_METHOD *EC_GFp_nistp256_method(void)
1606 static const EC_METHOD ret = {
1607 EC_FLAGS_DEFAULT_OCT,
1608 NID_X9_62_prime_field,
1609 ec_GFp_nistp256_group_init,
1610 ec_GFp_simple_group_finish,
1611 ec_GFp_simple_group_clear_finish,
1612 ec_GFp_nist_group_copy,
1613 ec_GFp_nistp256_group_set_curve,
1614 ec_GFp_simple_group_get_curve,
1615 ec_GFp_simple_group_get_degree,
1616 ec_GFp_simple_group_check_discriminant,
1617 ec_GFp_simple_point_init,
1618 ec_GFp_simple_point_finish,
1619 ec_GFp_simple_point_clear_finish,
1620 ec_GFp_simple_point_copy,
1621 ec_GFp_simple_point_set_to_infinity,
1622 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1623 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1624 ec_GFp_simple_point_set_affine_coordinates,
1625 ec_GFp_nistp256_point_get_affine_coordinates,
1626 0 /* point_set_compressed_coordinates */,
1631 ec_GFp_simple_invert,
1632 ec_GFp_simple_is_at_infinity,
1633 ec_GFp_simple_is_on_curve,
1635 ec_GFp_simple_make_affine,
1636 ec_GFp_simple_points_make_affine,
1637 ec_GFp_nistp256_points_mul,
1638 ec_GFp_nistp256_precompute_mult,
1639 ec_GFp_nistp256_have_precompute_mult,
1640 ec_GFp_nist_field_mul,
1641 ec_GFp_nist_field_sqr,
1643 0 /* field_encode */,
1644 0 /* field_decode */,
1645 0 /* field_set_to_one */ };
1650 /******************************************************************************/
1651 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1654 static NISTP256_PRE_COMP *nistp256_pre_comp_new()
1656 NISTP256_PRE_COMP *ret = NULL;
1657 ret = (NISTP256_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1660 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1663 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1664 ret->references = 1;
1668 static void *nistp256_pre_comp_dup(void *src_)
1670 NISTP256_PRE_COMP *src = src_;
1672 /* no need to actually copy, these objects never change! */
1673 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1678 static void nistp256_pre_comp_free(void *pre_)
1681 NISTP256_PRE_COMP *pre = pre_;
1686 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1693 static void nistp256_pre_comp_clear_free(void *pre_)
1696 NISTP256_PRE_COMP *pre = pre_;
1701 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1705 OPENSSL_cleanse(pre, sizeof *pre);
1709 /******************************************************************************/
1710 /* OPENSSL EC_METHOD FUNCTIONS
1713 int ec_GFp_nistp256_group_init(EC_GROUP *group)
1716 ret = ec_GFp_simple_group_init(group);
1717 group->a_is_minus3 = 1;
1721 int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1722 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1725 BN_CTX *new_ctx = NULL;
1726 BIGNUM *curve_p, *curve_a, *curve_b;
1729 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1731 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1732 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1733 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1734 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1735 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1736 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1737 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1738 (BN_cmp(curve_b, b)))
1740 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1741 EC_R_WRONG_CURVE_PARAMETERS);
1744 group->field_mod_func = BN_nist_mod_256;
1745 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1748 if (new_ctx != NULL)
1749 BN_CTX_free(new_ctx);
1753 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1754 * (X', Y') = (X/Z^2, Y/Z^3) */
1755 int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1756 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1758 felem z1, z2, x_in, y_in;
1759 smallfelem x_out, y_out;
1762 if (EC_POINT_is_at_infinity(group, point))
1764 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1765 EC_R_POINT_AT_INFINITY);
1768 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1769 (!BN_to_felem(z1, &point->Z))) return 0;
1771 felem_square(tmp, z2); felem_reduce(z1, tmp);
1772 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1773 felem_contract(x_out, x_in);
1776 if (!smallfelem_to_BN(x, x_out)) {
1777 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1782 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1783 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1784 felem_contract(y_out, y_in);
1787 if (!smallfelem_to_BN(y, y_out)) {
1788 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1796 static void make_points_affine(size_t num, smallfelem points[num][3], smallfelem tmp_smallfelems[num+1])
1798 /* Runs in constant time, unless an input is the point at infinity
1799 * (which normally shouldn't happen). */
1800 ec_GFp_nistp_points_make_affine_internal(
1805 (void (*)(void *)) smallfelem_one,
1806 (int (*)(const void *)) smallfelem_is_zero_int,
1807 (void (*)(void *, const void *)) smallfelem_assign,
1808 (void (*)(void *, const void *)) smallfelem_square_contract,
1809 (void (*)(void *, const void *, const void *)) smallfelem_mul_contract,
1810 (void (*)(void *, const void *)) smallfelem_inv_contract,
1811 (void (*)(void *, const void *)) smallfelem_assign /* nothing to contract */);
1814 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1815 * Result is stored in r (r can equal one of the inputs). */
1816 int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
1817 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1818 const BIGNUM *scalars[], BN_CTX *ctx)
1823 BN_CTX *new_ctx = NULL;
1824 BIGNUM *x, *y, *z, *tmp_scalar;
1825 felem_bytearray g_secret;
1826 felem_bytearray *secrets = NULL;
1827 smallfelem (*pre_comp)[17][3] = NULL;
1828 smallfelem *tmp_smallfelems = NULL;
1829 felem_bytearray tmp;
1830 unsigned i, num_bytes;
1831 int have_pre_comp = 0;
1832 size_t num_points = num;
1833 smallfelem x_in, y_in, z_in;
1834 felem x_out, y_out, z_out;
1835 NISTP256_PRE_COMP *pre = NULL;
1836 const smallfelem (*g_pre_comp)[16][3] = NULL;
1837 EC_POINT *generator = NULL;
1838 const EC_POINT *p = NULL;
1839 const BIGNUM *p_scalar = NULL;
1842 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1844 if (((x = BN_CTX_get(ctx)) == NULL) ||
1845 ((y = BN_CTX_get(ctx)) == NULL) ||
1846 ((z = BN_CTX_get(ctx)) == NULL) ||
1847 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1852 pre = EC_EX_DATA_get_data(group->extra_data,
1853 nistp256_pre_comp_dup, nistp256_pre_comp_free,
1854 nistp256_pre_comp_clear_free);
1856 /* we have precomputation, try to use it */
1857 g_pre_comp = (const smallfelem (*)[16][3]) pre->g_pre_comp;
1859 /* try to use the standard precomputation */
1860 g_pre_comp = &gmul[0];
1861 generator = EC_POINT_new(group);
1862 if (generator == NULL)
1864 /* get the generator from precomputation */
1865 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
1866 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
1867 !smallfelem_to_BN(z, g_pre_comp[0][1][2]))
1869 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1872 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1873 generator, x, y, z, ctx))
1875 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1876 /* precomputation matches generator */
1879 /* we don't have valid precomputation:
1880 * treat the generator as a random point */
1885 if (num_points >= 3)
1887 /* unless we precompute multiples for just one or two points,
1888 * converting those into affine form is time well spent */
1891 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1892 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(smallfelem));
1894 tmp_smallfelems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(smallfelem));
1895 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_smallfelems == NULL)))
1897 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1901 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1902 * i.e., they contribute nothing to the linear combination */
1903 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1904 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(smallfelem));
1905 for (i = 0; i < num_points; ++i)
1908 /* we didn't have a valid precomputation, so we pick
1911 p = EC_GROUP_get0_generator(group);
1915 /* the i^th point */
1918 p_scalar = scalars[i];
1920 if ((p_scalar != NULL) && (p != NULL))
1922 /* reduce scalar to 0 <= scalar < 2^256 */
1923 if ((BN_num_bits(p_scalar) > 256) || (BN_is_negative(p_scalar)))
1925 /* this is an unusual input, and we don't guarantee
1926 * constant-timeness */
1927 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1929 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1932 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1935 num_bytes = BN_bn2bin(p_scalar, tmp);
1936 flip_endian(secrets[i], tmp, num_bytes);
1937 /* precompute multiples */
1938 if ((!BN_to_felem(x_out, &p->X)) ||
1939 (!BN_to_felem(y_out, &p->Y)) ||
1940 (!BN_to_felem(z_out, &p->Z))) goto err;
1941 felem_shrink(pre_comp[i][1][0], x_out);
1942 felem_shrink(pre_comp[i][1][1], y_out);
1943 felem_shrink(pre_comp[i][1][2], z_out);
1944 for (j = 2; j <= 16; ++j)
1949 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1950 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1951 pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1956 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1957 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1963 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
1966 /* the scalar for the generator */
1967 if ((scalar != NULL) && (have_pre_comp))
1969 memset(g_secret, 0, sizeof(g_secret));
1970 /* reduce scalar to 0 <= scalar < 2^256 */
1971 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar)))
1973 /* this is an unusual input, and we don't guarantee
1974 * constant-timeness */
1975 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1977 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1980 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1983 num_bytes = BN_bn2bin(scalar, tmp);
1984 flip_endian(g_secret, tmp, num_bytes);
1985 /* do the multiplication with generator precomputation*/
1986 batch_mul(x_out, y_out, z_out,
1987 (const felem_bytearray (*)) secrets, num_points,
1989 mixed, (const smallfelem (*)[17][3]) pre_comp,
1993 /* do the multiplication without generator precomputation */
1994 batch_mul(x_out, y_out, z_out,
1995 (const felem_bytearray (*)) secrets, num_points,
1996 NULL, mixed, (const smallfelem (*)[17][3]) pre_comp, NULL);
1997 /* reduce the output to its unique minimal representation */
1998 felem_contract(x_in, x_out);
1999 felem_contract(y_in, y_out);
2000 felem_contract(z_in, z_out);
2001 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2002 (!smallfelem_to_BN(z, z_in)))
2004 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2007 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2011 if (generator != NULL)
2012 EC_POINT_free(generator);
2013 if (new_ctx != NULL)
2014 BN_CTX_free(new_ctx);
2015 if (secrets != NULL)
2016 OPENSSL_free(secrets);
2017 if (pre_comp != NULL)
2018 OPENSSL_free(pre_comp);
2019 if (tmp_smallfelems != NULL)
2020 OPENSSL_free(tmp_smallfelems);
2024 int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2027 NISTP256_PRE_COMP *pre = NULL;
2029 BN_CTX *new_ctx = NULL;
2031 EC_POINT *generator = NULL;
2032 smallfelem tmp_smallfelems[32];
2033 felem x_tmp, y_tmp, z_tmp;
2035 /* throw away old precomputation */
2036 EC_EX_DATA_free_data(&group->extra_data, nistp256_pre_comp_dup,
2037 nistp256_pre_comp_free, nistp256_pre_comp_clear_free);
2039 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
2041 if (((x = BN_CTX_get(ctx)) == NULL) ||
2042 ((y = BN_CTX_get(ctx)) == NULL))
2044 /* get the generator */
2045 if (group->generator == NULL) goto err;
2046 generator = EC_POINT_new(group);
2047 if (generator == NULL)
2049 BN_bin2bn(nistp256_curve_params[3], sizeof (felem_bytearray), x);
2050 BN_bin2bn(nistp256_curve_params[4], sizeof (felem_bytearray), y);
2051 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2053 if ((pre = nistp256_pre_comp_new()) == NULL)
2055 /* if the generator is the standard one, use built-in precomputation */
2056 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2058 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2062 if ((!BN_to_felem(x_tmp, &group->generator->X)) ||
2063 (!BN_to_felem(y_tmp, &group->generator->Y)) ||
2064 (!BN_to_felem(z_tmp, &group->generator->Z)))
2066 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2067 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2068 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2069 /* compute 2^64*G, 2^128*G, 2^192*G for the first table,
2070 * 2^32*G, 2^96*G, 2^160*G, 2^224*G for the second one
2072 for (i = 1; i <= 8; i <<= 1)
2075 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2076 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
2077 for (j = 0; j < 31; ++j)
2080 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2081 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2086 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2087 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2088 for (j = 0; j < 31; ++j)
2091 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2092 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
2095 for (i = 0; i < 2; i++)
2097 /* g_pre_comp[i][0] is the point at infinity */
2098 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2099 /* the remaining multiples */
2100 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2102 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], pre->g_pre_comp[i][6][2],
2103 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2104 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2105 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2107 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], pre->g_pre_comp[i][10][2],
2108 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2109 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2110 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2112 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2113 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2114 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2]);
2115 /* 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G */
2117 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], pre->g_pre_comp[i][14][2],
2118 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2119 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2120 for (j = 1; j < 8; ++j)
2122 /* odd multiples: add G resp. 2^32*G */
2124 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1], pre->g_pre_comp[i][2*j+1][2],
2125 pre->g_pre_comp[i][2*j][0], pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
2126 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], pre->g_pre_comp[i][1][2]);
2129 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2131 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup,
2132 nistp256_pre_comp_free, nistp256_pre_comp_clear_free))
2138 if (generator != NULL)
2139 EC_POINT_free(generator);
2140 if (new_ctx != NULL)
2141 BN_CTX_free(new_ctx);
2143 nistp256_pre_comp_free(pre);
2147 int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2149 if (EC_EX_DATA_get_data(group->extra_data, nistp256_pre_comp_dup,
2150 nistp256_pre_comp_free, nistp256_pre_comp_clear_free)
2157 static void *dummy=&dummy;