2 * Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the OpenSSL license (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
10 /* Copyright 2011 Google Inc.
12 * Licensed under the Apache License, Version 2.0 (the "License");
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
17 * http://www.apache.org/licenses/LICENSE-2.0
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
27 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
29 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
30 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
31 * work which got its smarts from Daniel J. Bernstein's work on the same.
34 #include <openssl/opensslconf.h>
35 #ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
36 NON_EMPTY_TRANSLATION_UNIT
41 # include <openssl/err.h>
44 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
45 /* even with gcc, the typedef won't work for 32-bit platforms */
46 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
48 typedef __int128_t int128_t;
50 # error "Need GCC 3.1 or later to define type uint128_t"
59 * The underlying field. P256 operates over GF(2^256-2^224+2^192+2^96-1). We
60 * can serialise an element of this field into 32 bytes. We call this an
64 typedef u8 felem_bytearray[32];
67 * These are the parameters of P256, taken from FIPS 186-3, page 86. These
68 * values are big-endian.
70 static const felem_bytearray nistp256_curve_params[5] = {
71 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
72 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
73 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
74 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
75 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
76 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
77 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
79 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
80 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
81 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
82 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
83 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
84 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
85 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
86 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
87 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
88 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
89 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
90 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
94 * The representation of field elements.
95 * ------------------------------------
97 * We represent field elements with either four 128-bit values, eight 128-bit
98 * values, or four 64-bit values. The field element represented is:
99 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
101 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
103 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
104 * apart, but are 128-bits wide, the most significant bits of each limb overlap
105 * with the least significant bits of the next.
107 * A field element with four limbs is an 'felem'. One with eight limbs is a
110 * A field element with four, 64-bit values is called a 'smallfelem'. Small
111 * values are used as intermediate values before multiplication.
116 typedef uint128_t limb;
117 typedef limb felem[NLIMBS];
118 typedef limb longfelem[NLIMBS * 2];
119 typedef u64 smallfelem[NLIMBS];
121 /* This is the value of the prime as four 64-bit words, little-endian. */
122 static const u64 kPrime[4] =
123 { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
124 static const u64 bottom63bits = 0x7ffffffffffffffful;
127 * bin32_to_felem takes a little-endian byte array and converts it into felem
128 * form. This assumes that the CPU is little-endian.
130 static void bin32_to_felem(felem out, const u8 in[32])
132 out[0] = *((u64 *)&in[0]);
133 out[1] = *((u64 *)&in[8]);
134 out[2] = *((u64 *)&in[16]);
135 out[3] = *((u64 *)&in[24]);
139 * smallfelem_to_bin32 takes a smallfelem and serialises into a little
140 * endian, 32 byte array. This assumes that the CPU is little-endian.
142 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
144 *((u64 *)&out[0]) = in[0];
145 *((u64 *)&out[8]) = in[1];
146 *((u64 *)&out[16]) = in[2];
147 *((u64 *)&out[24]) = in[3];
150 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
151 static void flip_endian(u8 *out, const u8 *in, unsigned len)
154 for (i = 0; i < len; ++i)
155 out[i] = in[len - 1 - i];
158 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
159 static int BN_to_felem(felem out, const BIGNUM *bn)
161 felem_bytearray b_in;
162 felem_bytearray b_out;
165 /* BN_bn2bin eats leading zeroes */
166 memset(b_out, 0, sizeof(b_out));
167 num_bytes = BN_num_bytes(bn);
168 if (num_bytes > sizeof b_out) {
169 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
172 if (BN_is_negative(bn)) {
173 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
176 num_bytes = BN_bn2bin(bn, b_in);
177 flip_endian(b_out, b_in, num_bytes);
178 bin32_to_felem(out, b_out);
182 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
183 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
185 felem_bytearray b_in, b_out;
186 smallfelem_to_bin32(b_in, in);
187 flip_endian(b_out, b_in, sizeof b_out);
188 return BN_bin2bn(b_out, sizeof b_out, out);
196 static void smallfelem_one(smallfelem out)
204 static void smallfelem_assign(smallfelem out, const smallfelem in)
212 static void felem_assign(felem out, const felem in)
220 /* felem_sum sets out = out + in. */
221 static void felem_sum(felem out, const felem in)
229 /* felem_small_sum sets out = out + in. */
230 static void felem_small_sum(felem out, const smallfelem in)
238 /* felem_scalar sets out = out * scalar */
239 static void felem_scalar(felem out, const u64 scalar)
247 /* longfelem_scalar sets out = out * scalar */
248 static void longfelem_scalar(longfelem out, const u64 scalar)
260 # define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
261 # define two105 (((limb)1) << 105)
262 # define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
264 /* zero105 is 0 mod p */
265 static const felem zero105 =
266 { two105m41m9, two105, two105m41p9, two105m41p9 };
269 * smallfelem_neg sets |out| to |-small|
271 * out[i] < out[i] + 2^105
273 static void smallfelem_neg(felem out, const smallfelem small)
275 /* In order to prevent underflow, we subtract from 0 mod p. */
276 out[0] = zero105[0] - small[0];
277 out[1] = zero105[1] - small[1];
278 out[2] = zero105[2] - small[2];
279 out[3] = zero105[3] - small[3];
283 * felem_diff subtracts |in| from |out|
287 * out[i] < out[i] + 2^105
289 static void felem_diff(felem out, const felem in)
292 * In order to prevent underflow, we add 0 mod p before subtracting.
294 out[0] += zero105[0];
295 out[1] += zero105[1];
296 out[2] += zero105[2];
297 out[3] += zero105[3];
305 # define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
306 # define two107 (((limb)1) << 107)
307 # define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
309 /* zero107 is 0 mod p */
310 static const felem zero107 =
311 { two107m43m11, two107, two107m43p11, two107m43p11 };
314 * An alternative felem_diff for larger inputs |in|
315 * felem_diff_zero107 subtracts |in| from |out|
319 * out[i] < out[i] + 2^107
321 static void felem_diff_zero107(felem out, const felem in)
324 * In order to prevent underflow, we add 0 mod p before subtracting.
326 out[0] += zero107[0];
327 out[1] += zero107[1];
328 out[2] += zero107[2];
329 out[3] += zero107[3];
338 * longfelem_diff subtracts |in| from |out|
342 * out[i] < out[i] + 2^70 + 2^40
344 static void longfelem_diff(longfelem out, const longfelem in)
346 static const limb two70m8p6 =
347 (((limb) 1) << 70) - (((limb) 1) << 8) + (((limb) 1) << 6);
348 static const limb two70p40 = (((limb) 1) << 70) + (((limb) 1) << 40);
349 static const limb two70 = (((limb) 1) << 70);
350 static const limb two70m40m38p6 =
351 (((limb) 1) << 70) - (((limb) 1) << 40) - (((limb) 1) << 38) +
353 static const limb two70m6 = (((limb) 1) << 70) - (((limb) 1) << 6);
355 /* add 0 mod p to avoid underflow */
359 out[3] += two70m40m38p6;
365 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
376 # define two64m0 (((limb)1) << 64) - 1
377 # define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
378 # define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
379 # define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
381 /* zero110 is 0 mod p */
382 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
385 * felem_shrink converts an felem into a smallfelem. The result isn't quite
386 * minimal as the value may be greater than p.
393 static void felem_shrink(smallfelem out, const felem in)
398 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
401 tmp[3] = zero110[3] + in[3] + ((u64)(in[2] >> 64));
404 tmp[2] = zero110[2] + (u64)in[2];
405 tmp[0] = zero110[0] + in[0];
406 tmp[1] = zero110[1] + in[1];
407 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
410 * We perform two partial reductions where we eliminate the high-word of
411 * tmp[3]. We don't update the other words till the end.
413 a = tmp[3] >> 64; /* a < 2^46 */
414 tmp[3] = (u64)tmp[3];
416 tmp[3] += ((limb) a) << 32;
420 a = tmp[3] >> 64; /* a < 2^15 */
421 b += a; /* b < 2^46 + 2^15 < 2^47 */
422 tmp[3] = (u64)tmp[3];
424 tmp[3] += ((limb) a) << 32;
425 /* tmp[3] < 2^64 + 2^47 */
428 * This adjusts the other two words to complete the two partial
432 tmp[1] -= (((limb) b) << 32);
435 * In order to make space in tmp[3] for the carry from 2 -> 3, we
436 * conditionally subtract kPrime if tmp[3] is large enough.
439 /* As tmp[3] < 2^65, high is either 1 or 0 */
444 * all ones if the high word of tmp[3] is 1
445 * all zeros if the high word of tmp[3] if 0 */
450 * all ones if the MSB of low is 1
451 * all zeros if the MSB of low if 0 */
454 /* if low was greater than kPrime3Test then the MSB is zero */
459 * all ones if low was > kPrime3Test
460 * all zeros if low was <= kPrime3Test */
461 mask = (mask & low) | high;
462 tmp[0] -= mask & kPrime[0];
463 tmp[1] -= mask & kPrime[1];
464 /* kPrime[2] is zero, so omitted */
465 tmp[3] -= mask & kPrime[3];
466 /* tmp[3] < 2**64 - 2**32 + 1 */
468 tmp[1] += ((u64)(tmp[0] >> 64));
469 tmp[0] = (u64)tmp[0];
470 tmp[2] += ((u64)(tmp[1] >> 64));
471 tmp[1] = (u64)tmp[1];
472 tmp[3] += ((u64)(tmp[2] >> 64));
473 tmp[2] = (u64)tmp[2];
482 /* smallfelem_expand converts a smallfelem to an felem */
483 static void smallfelem_expand(felem out, const smallfelem in)
492 * smallfelem_square sets |out| = |small|^2
496 * out[i] < 7 * 2^64 < 2^67
498 static void smallfelem_square(longfelem out, const smallfelem small)
503 a = ((uint128_t) small[0]) * small[0];
509 a = ((uint128_t) small[0]) * small[1];
516 a = ((uint128_t) small[0]) * small[2];
523 a = ((uint128_t) small[0]) * small[3];
529 a = ((uint128_t) small[1]) * small[2];
536 a = ((uint128_t) small[1]) * small[1];
542 a = ((uint128_t) small[1]) * small[3];
549 a = ((uint128_t) small[2]) * small[3];
557 a = ((uint128_t) small[2]) * small[2];
563 a = ((uint128_t) small[3]) * small[3];
571 * felem_square sets |out| = |in|^2
575 * out[i] < 7 * 2^64 < 2^67
577 static void felem_square(longfelem out, const felem in)
580 felem_shrink(small, in);
581 smallfelem_square(out, small);
585 * smallfelem_mul sets |out| = |small1| * |small2|
590 * out[i] < 7 * 2^64 < 2^67
592 static void smallfelem_mul(longfelem out, const smallfelem small1,
593 const smallfelem small2)
598 a = ((uint128_t) small1[0]) * small2[0];
604 a = ((uint128_t) small1[0]) * small2[1];
610 a = ((uint128_t) small1[1]) * small2[0];
616 a = ((uint128_t) small1[0]) * small2[2];
622 a = ((uint128_t) small1[1]) * small2[1];
628 a = ((uint128_t) small1[2]) * small2[0];
634 a = ((uint128_t) small1[0]) * small2[3];
640 a = ((uint128_t) small1[1]) * small2[2];
646 a = ((uint128_t) small1[2]) * small2[1];
652 a = ((uint128_t) small1[3]) * small2[0];
658 a = ((uint128_t) small1[1]) * small2[3];
664 a = ((uint128_t) small1[2]) * small2[2];
670 a = ((uint128_t) small1[3]) * small2[1];
676 a = ((uint128_t) small1[2]) * small2[3];
682 a = ((uint128_t) small1[3]) * small2[2];
688 a = ((uint128_t) small1[3]) * small2[3];
696 * felem_mul sets |out| = |in1| * |in2|
701 * out[i] < 7 * 2^64 < 2^67
703 static void felem_mul(longfelem out, const felem in1, const felem in2)
705 smallfelem small1, small2;
706 felem_shrink(small1, in1);
707 felem_shrink(small2, in2);
708 smallfelem_mul(out, small1, small2);
712 * felem_small_mul sets |out| = |small1| * |in2|
717 * out[i] < 7 * 2^64 < 2^67
719 static void felem_small_mul(longfelem out, const smallfelem small1,
723 felem_shrink(small2, in2);
724 smallfelem_mul(out, small1, small2);
727 # define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
728 # define two100 (((limb)1) << 100)
729 # define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
730 /* zero100 is 0 mod p */
731 static const felem zero100 =
732 { two100m36m4, two100, two100m36p4, two100m36p4 };
735 * Internal function for the different flavours of felem_reduce.
736 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
738 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
739 * out[1] >= in[7] + 2^32*in[4]
740 * out[2] >= in[5] + 2^32*in[5]
741 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
743 * out[0] <= out[0] + in[4] + 2^32*in[5]
744 * out[1] <= out[1] + in[5] + 2^33*in[6]
745 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
746 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
748 static void felem_reduce_(felem out, const longfelem in)
751 /* combine common terms from below */
752 c = in[4] + (in[5] << 32);
760 /* the remaining terms */
761 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
762 out[1] -= (in[4] << 32);
763 out[3] += (in[4] << 32);
765 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
766 out[2] -= (in[5] << 32);
768 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
770 out[0] -= (in[6] << 32);
771 out[1] += (in[6] << 33);
772 out[2] += (in[6] * 2);
773 out[3] -= (in[6] << 32);
775 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
777 out[0] -= (in[7] << 32);
778 out[2] += (in[7] << 33);
779 out[3] += (in[7] * 3);
783 * felem_reduce converts a longfelem into an felem.
784 * To be called directly after felem_square or felem_mul.
786 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
787 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
791 static void felem_reduce(felem out, const longfelem in)
793 out[0] = zero100[0] + in[0];
794 out[1] = zero100[1] + in[1];
795 out[2] = zero100[2] + in[2];
796 out[3] = zero100[3] + in[3];
798 felem_reduce_(out, in);
801 * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
802 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
803 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
804 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
806 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
807 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
808 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
809 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
814 * felem_reduce_zero105 converts a larger longfelem into an felem.
820 static void felem_reduce_zero105(felem out, const longfelem in)
822 out[0] = zero105[0] + in[0];
823 out[1] = zero105[1] + in[1];
824 out[2] = zero105[2] + in[2];
825 out[3] = zero105[3] + in[3];
827 felem_reduce_(out, in);
830 * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
831 * out[1] > 2^105 - 2^71 - 2^103 > 0
832 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
833 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
835 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
836 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
837 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
838 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
843 * subtract_u64 sets *result = *result - v and *carry to one if the
844 * subtraction underflowed.
846 static void subtract_u64(u64 *result, u64 *carry, u64 v)
848 uint128_t r = *result;
850 *carry = (r >> 64) & 1;
855 * felem_contract converts |in| to its unique, minimal representation. On
856 * entry: in[i] < 2^109
858 static void felem_contract(smallfelem out, const felem in)
861 u64 all_equal_so_far = 0, result = 0, carry;
863 felem_shrink(out, in);
864 /* small is minimal except that the value might be > p */
868 * We are doing a constant time test if out >= kPrime. We need to compare
869 * each u64, from most-significant to least significant. For each one, if
870 * all words so far have been equal (m is all ones) then a non-equal
871 * result is the answer. Otherwise we continue.
873 for (i = 3; i < 4; i--) {
875 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
877 * if out[i] > kPrime[i] then a will underflow and the high 64-bits
880 result |= all_equal_so_far & ((u64)(a >> 64));
883 * if kPrime[i] == out[i] then |equal| will be all zeros and the
884 * decrement will make it all ones.
886 equal = kPrime[i] ^ out[i];
888 equal &= equal << 32;
889 equal &= equal << 16;
894 equal = ((s64) equal) >> 63;
896 all_equal_so_far &= equal;
900 * if all_equal_so_far is still all ones then the two values are equal
901 * and so out >= kPrime is true.
903 result |= all_equal_so_far;
905 /* if out >= kPrime then we subtract kPrime. */
906 subtract_u64(&out[0], &carry, result & kPrime[0]);
907 subtract_u64(&out[1], &carry, carry);
908 subtract_u64(&out[2], &carry, carry);
909 subtract_u64(&out[3], &carry, carry);
911 subtract_u64(&out[1], &carry, result & kPrime[1]);
912 subtract_u64(&out[2], &carry, carry);
913 subtract_u64(&out[3], &carry, carry);
915 subtract_u64(&out[2], &carry, result & kPrime[2]);
916 subtract_u64(&out[3], &carry, carry);
918 subtract_u64(&out[3], &carry, result & kPrime[3]);
921 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
926 smallfelem_square(longtmp, in);
927 felem_reduce(tmp, longtmp);
928 felem_contract(out, tmp);
931 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1,
932 const smallfelem in2)
937 smallfelem_mul(longtmp, in1, in2);
938 felem_reduce(tmp, longtmp);
939 felem_contract(out, tmp);
943 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
948 static limb smallfelem_is_zero(const smallfelem small)
953 u64 is_zero = small[0] | small[1] | small[2] | small[3];
955 is_zero &= is_zero << 32;
956 is_zero &= is_zero << 16;
957 is_zero &= is_zero << 8;
958 is_zero &= is_zero << 4;
959 is_zero &= is_zero << 2;
960 is_zero &= is_zero << 1;
961 is_zero = ((s64) is_zero) >> 63;
963 is_p = (small[0] ^ kPrime[0]) |
964 (small[1] ^ kPrime[1]) |
965 (small[2] ^ kPrime[2]) | (small[3] ^ kPrime[3]);
973 is_p = ((s64) is_p) >> 63;
978 result |= ((limb) is_zero) << 64;
982 static int smallfelem_is_zero_int(const smallfelem small)
984 return (int)(smallfelem_is_zero(small) & ((limb) 1));
988 * felem_inv calculates |out| = |in|^{-1}
990 * Based on Fermat's Little Theorem:
992 * a^{p-1} = 1 (mod p)
993 * a^{p-2} = a^{-1} (mod p)
995 static void felem_inv(felem out, const felem in)
998 /* each e_I will hold |in|^{2^I - 1} */
999 felem e2, e4, e8, e16, e32, e64;
1003 felem_square(tmp, in);
1004 felem_reduce(ftmp, tmp); /* 2^1 */
1005 felem_mul(tmp, in, ftmp);
1006 felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
1007 felem_assign(e2, ftmp);
1008 felem_square(tmp, ftmp);
1009 felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
1010 felem_square(tmp, ftmp);
1011 felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
1012 felem_mul(tmp, ftmp, e2);
1013 felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
1014 felem_assign(e4, ftmp);
1015 felem_square(tmp, ftmp);
1016 felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
1017 felem_square(tmp, ftmp);
1018 felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
1019 felem_square(tmp, ftmp);
1020 felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
1021 felem_square(tmp, ftmp);
1022 felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
1023 felem_mul(tmp, ftmp, e4);
1024 felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
1025 felem_assign(e8, ftmp);
1026 for (i = 0; i < 8; i++) {
1027 felem_square(tmp, ftmp);
1028 felem_reduce(ftmp, tmp);
1030 felem_mul(tmp, ftmp, e8);
1031 felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
1032 felem_assign(e16, ftmp);
1033 for (i = 0; i < 16; i++) {
1034 felem_square(tmp, ftmp);
1035 felem_reduce(ftmp, tmp);
1037 felem_mul(tmp, ftmp, e16);
1038 felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
1039 felem_assign(e32, ftmp);
1040 for (i = 0; i < 32; i++) {
1041 felem_square(tmp, ftmp);
1042 felem_reduce(ftmp, tmp);
1044 felem_assign(e64, ftmp);
1045 felem_mul(tmp, ftmp, in);
1046 felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
1047 for (i = 0; i < 192; i++) {
1048 felem_square(tmp, ftmp);
1049 felem_reduce(ftmp, tmp);
1050 } /* 2^256 - 2^224 + 2^192 */
1052 felem_mul(tmp, e64, e32);
1053 felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
1054 for (i = 0; i < 16; i++) {
1055 felem_square(tmp, ftmp2);
1056 felem_reduce(ftmp2, tmp);
1058 felem_mul(tmp, ftmp2, e16);
1059 felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
1060 for (i = 0; i < 8; i++) {
1061 felem_square(tmp, ftmp2);
1062 felem_reduce(ftmp2, tmp);
1064 felem_mul(tmp, ftmp2, e8);
1065 felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
1066 for (i = 0; i < 4; i++) {
1067 felem_square(tmp, ftmp2);
1068 felem_reduce(ftmp2, tmp);
1070 felem_mul(tmp, ftmp2, e4);
1071 felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
1072 felem_square(tmp, ftmp2);
1073 felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
1074 felem_square(tmp, ftmp2);
1075 felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
1076 felem_mul(tmp, ftmp2, e2);
1077 felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
1078 felem_square(tmp, ftmp2);
1079 felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
1080 felem_square(tmp, ftmp2);
1081 felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1082 felem_mul(tmp, ftmp2, in);
1083 felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1085 felem_mul(tmp, ftmp2, ftmp);
1086 felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1089 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1093 smallfelem_expand(tmp, in);
1094 felem_inv(tmp, tmp);
1095 felem_contract(out, tmp);
1102 * Building on top of the field operations we have the operations on the
1103 * elliptic curve group itself. Points on the curve are represented in Jacobian
1108 * point_double calculates 2*(x_in, y_in, z_in)
1110 * The method is taken from:
1111 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1113 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1114 * while x_out == y_in is not (maybe this works, but it's not tested).
1117 point_double(felem x_out, felem y_out, felem z_out,
1118 const felem x_in, const felem y_in, const felem z_in)
1120 longfelem tmp, tmp2;
1121 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1122 smallfelem small1, small2;
1124 felem_assign(ftmp, x_in);
1125 /* ftmp[i] < 2^106 */
1126 felem_assign(ftmp2, x_in);
1127 /* ftmp2[i] < 2^106 */
1130 felem_square(tmp, z_in);
1131 felem_reduce(delta, tmp);
1132 /* delta[i] < 2^101 */
1135 felem_square(tmp, y_in);
1136 felem_reduce(gamma, tmp);
1137 /* gamma[i] < 2^101 */
1138 felem_shrink(small1, gamma);
1140 /* beta = x*gamma */
1141 felem_small_mul(tmp, small1, x_in);
1142 felem_reduce(beta, tmp);
1143 /* beta[i] < 2^101 */
1145 /* alpha = 3*(x-delta)*(x+delta) */
1146 felem_diff(ftmp, delta);
1147 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1148 felem_sum(ftmp2, delta);
1149 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1150 felem_scalar(ftmp2, 3);
1151 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1152 felem_mul(tmp, ftmp, ftmp2);
1153 felem_reduce(alpha, tmp);
1154 /* alpha[i] < 2^101 */
1155 felem_shrink(small2, alpha);
1157 /* x' = alpha^2 - 8*beta */
1158 smallfelem_square(tmp, small2);
1159 felem_reduce(x_out, tmp);
1160 felem_assign(ftmp, beta);
1161 felem_scalar(ftmp, 8);
1162 /* ftmp[i] < 8 * 2^101 = 2^104 */
1163 felem_diff(x_out, ftmp);
1164 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1166 /* z' = (y + z)^2 - gamma - delta */
1167 felem_sum(delta, gamma);
1168 /* delta[i] < 2^101 + 2^101 = 2^102 */
1169 felem_assign(ftmp, y_in);
1170 felem_sum(ftmp, z_in);
1171 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1172 felem_square(tmp, ftmp);
1173 felem_reduce(z_out, tmp);
1174 felem_diff(z_out, delta);
1175 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1177 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1178 felem_scalar(beta, 4);
1179 /* beta[i] < 4 * 2^101 = 2^103 */
1180 felem_diff_zero107(beta, x_out);
1181 /* beta[i] < 2^107 + 2^103 < 2^108 */
1182 felem_small_mul(tmp, small2, beta);
1183 /* tmp[i] < 7 * 2^64 < 2^67 */
1184 smallfelem_square(tmp2, small1);
1185 /* tmp2[i] < 7 * 2^64 */
1186 longfelem_scalar(tmp2, 8);
1187 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1188 longfelem_diff(tmp, tmp2);
1189 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1190 felem_reduce_zero105(y_out, tmp);
1191 /* y_out[i] < 2^106 */
1195 * point_double_small is the same as point_double, except that it operates on
1199 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1200 const smallfelem x_in, const smallfelem y_in,
1201 const smallfelem z_in)
1203 felem felem_x_out, felem_y_out, felem_z_out;
1204 felem felem_x_in, felem_y_in, felem_z_in;
1206 smallfelem_expand(felem_x_in, x_in);
1207 smallfelem_expand(felem_y_in, y_in);
1208 smallfelem_expand(felem_z_in, z_in);
1209 point_double(felem_x_out, felem_y_out, felem_z_out,
1210 felem_x_in, felem_y_in, felem_z_in);
1211 felem_shrink(x_out, felem_x_out);
1212 felem_shrink(y_out, felem_y_out);
1213 felem_shrink(z_out, felem_z_out);
1216 /* copy_conditional copies in to out iff mask is all ones. */
1217 static void copy_conditional(felem out, const felem in, limb mask)
1220 for (i = 0; i < NLIMBS; ++i) {
1221 const limb tmp = mask & (in[i] ^ out[i]);
1226 /* copy_small_conditional copies in to out iff mask is all ones. */
1227 static void copy_small_conditional(felem out, const smallfelem in, limb mask)
1230 const u64 mask64 = mask;
1231 for (i = 0; i < NLIMBS; ++i) {
1232 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1237 * point_add calculates (x1, y1, z1) + (x2, y2, z2)
1239 * The method is taken from:
1240 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1241 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1243 * This function includes a branch for checking whether the two input points
1244 * are equal, (while not equal to the point at infinity). This case never
1245 * happens during single point multiplication, so there is no timing leak for
1246 * ECDH or ECDSA signing.
1248 static void point_add(felem x3, felem y3, felem z3,
1249 const felem x1, const felem y1, const felem z1,
1250 const int mixed, const smallfelem x2,
1251 const smallfelem y2, const smallfelem z2)
1253 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1254 longfelem tmp, tmp2;
1255 smallfelem small1, small2, small3, small4, small5;
1256 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1258 felem_shrink(small3, z1);
1260 z1_is_zero = smallfelem_is_zero(small3);
1261 z2_is_zero = smallfelem_is_zero(z2);
1263 /* ftmp = z1z1 = z1**2 */
1264 smallfelem_square(tmp, small3);
1265 felem_reduce(ftmp, tmp);
1266 /* ftmp[i] < 2^101 */
1267 felem_shrink(small1, ftmp);
1270 /* ftmp2 = z2z2 = z2**2 */
1271 smallfelem_square(tmp, z2);
1272 felem_reduce(ftmp2, tmp);
1273 /* ftmp2[i] < 2^101 */
1274 felem_shrink(small2, ftmp2);
1276 felem_shrink(small5, x1);
1278 /* u1 = ftmp3 = x1*z2z2 */
1279 smallfelem_mul(tmp, small5, small2);
1280 felem_reduce(ftmp3, tmp);
1281 /* ftmp3[i] < 2^101 */
1283 /* ftmp5 = z1 + z2 */
1284 felem_assign(ftmp5, z1);
1285 felem_small_sum(ftmp5, z2);
1286 /* ftmp5[i] < 2^107 */
1288 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1289 felem_square(tmp, ftmp5);
1290 felem_reduce(ftmp5, tmp);
1291 /* ftmp2 = z2z2 + z1z1 */
1292 felem_sum(ftmp2, ftmp);
1293 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1294 felem_diff(ftmp5, ftmp2);
1295 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1297 /* ftmp2 = z2 * z2z2 */
1298 smallfelem_mul(tmp, small2, z2);
1299 felem_reduce(ftmp2, tmp);
1301 /* s1 = ftmp2 = y1 * z2**3 */
1302 felem_mul(tmp, y1, ftmp2);
1303 felem_reduce(ftmp6, tmp);
1304 /* ftmp6[i] < 2^101 */
1307 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
1310 /* u1 = ftmp3 = x1*z2z2 */
1311 felem_assign(ftmp3, x1);
1312 /* ftmp3[i] < 2^106 */
1315 felem_assign(ftmp5, z1);
1316 felem_scalar(ftmp5, 2);
1317 /* ftmp5[i] < 2*2^106 = 2^107 */
1319 /* s1 = ftmp2 = y1 * z2**3 */
1320 felem_assign(ftmp6, y1);
1321 /* ftmp6[i] < 2^106 */
1325 smallfelem_mul(tmp, x2, small1);
1326 felem_reduce(ftmp4, tmp);
1328 /* h = ftmp4 = u2 - u1 */
1329 felem_diff_zero107(ftmp4, ftmp3);
1330 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1331 felem_shrink(small4, ftmp4);
1333 x_equal = smallfelem_is_zero(small4);
1335 /* z_out = ftmp5 * h */
1336 felem_small_mul(tmp, small4, ftmp5);
1337 felem_reduce(z_out, tmp);
1338 /* z_out[i] < 2^101 */
1340 /* ftmp = z1 * z1z1 */
1341 smallfelem_mul(tmp, small1, small3);
1342 felem_reduce(ftmp, tmp);
1344 /* s2 = tmp = y2 * z1**3 */
1345 felem_small_mul(tmp, y2, ftmp);
1346 felem_reduce(ftmp5, tmp);
1348 /* r = ftmp5 = (s2 - s1)*2 */
1349 felem_diff_zero107(ftmp5, ftmp6);
1350 /* ftmp5[i] < 2^107 + 2^107 = 2^108 */
1351 felem_scalar(ftmp5, 2);
1352 /* ftmp5[i] < 2^109 */
1353 felem_shrink(small1, ftmp5);
1354 y_equal = smallfelem_is_zero(small1);
1356 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1357 point_double(x3, y3, z3, x1, y1, z1);
1361 /* I = ftmp = (2h)**2 */
1362 felem_assign(ftmp, ftmp4);
1363 felem_scalar(ftmp, 2);
1364 /* ftmp[i] < 2*2^108 = 2^109 */
1365 felem_square(tmp, ftmp);
1366 felem_reduce(ftmp, tmp);
1368 /* J = ftmp2 = h * I */
1369 felem_mul(tmp, ftmp4, ftmp);
1370 felem_reduce(ftmp2, tmp);
1372 /* V = ftmp4 = U1 * I */
1373 felem_mul(tmp, ftmp3, ftmp);
1374 felem_reduce(ftmp4, tmp);
1376 /* x_out = r**2 - J - 2V */
1377 smallfelem_square(tmp, small1);
1378 felem_reduce(x_out, tmp);
1379 felem_assign(ftmp3, ftmp4);
1380 felem_scalar(ftmp4, 2);
1381 felem_sum(ftmp4, ftmp2);
1382 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1383 felem_diff(x_out, ftmp4);
1384 /* x_out[i] < 2^105 + 2^101 */
1386 /* y_out = r(V-x_out) - 2 * s1 * J */
1387 felem_diff_zero107(ftmp3, x_out);
1388 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1389 felem_small_mul(tmp, small1, ftmp3);
1390 felem_mul(tmp2, ftmp6, ftmp2);
1391 longfelem_scalar(tmp2, 2);
1392 /* tmp2[i] < 2*2^67 = 2^68 */
1393 longfelem_diff(tmp, tmp2);
1394 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1395 felem_reduce_zero105(y_out, tmp);
1396 /* y_out[i] < 2^106 */
1398 copy_small_conditional(x_out, x2, z1_is_zero);
1399 copy_conditional(x_out, x1, z2_is_zero);
1400 copy_small_conditional(y_out, y2, z1_is_zero);
1401 copy_conditional(y_out, y1, z2_is_zero);
1402 copy_small_conditional(z_out, z2, z1_is_zero);
1403 copy_conditional(z_out, z1, z2_is_zero);
1404 felem_assign(x3, x_out);
1405 felem_assign(y3, y_out);
1406 felem_assign(z3, z_out);
1410 * point_add_small is the same as point_add, except that it operates on
1413 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1414 smallfelem x1, smallfelem y1, smallfelem z1,
1415 smallfelem x2, smallfelem y2, smallfelem z2)
1417 felem felem_x3, felem_y3, felem_z3;
1418 felem felem_x1, felem_y1, felem_z1;
1419 smallfelem_expand(felem_x1, x1);
1420 smallfelem_expand(felem_y1, y1);
1421 smallfelem_expand(felem_z1, z1);
1422 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0,
1424 felem_shrink(x3, felem_x3);
1425 felem_shrink(y3, felem_y3);
1426 felem_shrink(z3, felem_z3);
1430 * Base point pre computation
1431 * --------------------------
1433 * Two different sorts of precomputed tables are used in the following code.
1434 * Each contain various points on the curve, where each point is three field
1435 * elements (x, y, z).
1437 * For the base point table, z is usually 1 (0 for the point at infinity).
1438 * This table has 2 * 16 elements, starting with the following:
1439 * index | bits | point
1440 * ------+---------+------------------------------
1443 * 2 | 0 0 1 0 | 2^64G
1444 * 3 | 0 0 1 1 | (2^64 + 1)G
1445 * 4 | 0 1 0 0 | 2^128G
1446 * 5 | 0 1 0 1 | (2^128 + 1)G
1447 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1448 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1449 * 8 | 1 0 0 0 | 2^192G
1450 * 9 | 1 0 0 1 | (2^192 + 1)G
1451 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1452 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1453 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1454 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1455 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1456 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1457 * followed by a copy of this with each element multiplied by 2^32.
1459 * The reason for this is so that we can clock bits into four different
1460 * locations when doing simple scalar multiplies against the base point,
1461 * and then another four locations using the second 16 elements.
1463 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1465 /* gmul is the table of precomputed base points */
1466 static const smallfelem gmul[2][16][3] = {
1470 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2,
1471 0x6b17d1f2e12c4247},
1472 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16,
1473 0x4fe342e2fe1a7f9b},
1475 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de,
1476 0x0fa822bc2811aaa5},
1477 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b,
1478 0xbff44ae8f5dba80d},
1480 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789,
1481 0x300a4bbc89d6726f},
1482 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f,
1483 0x72aac7e0d09b4644},
1485 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e,
1486 0x447d739beedb5e67},
1487 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7,
1488 0x2d4825ab834131ee},
1490 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60,
1491 0xef9519328a9c72ff},
1492 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c,
1493 0x611e9fc37dbb2c9b},
1495 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf,
1496 0x550663797b51f5d8},
1497 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5,
1498 0x157164848aecb851},
1500 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391,
1501 0xeb5d7745b21141ea},
1502 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee,
1503 0xeafd72ebdbecc17b},
1505 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5,
1506 0xa6d39677a7849276},
1507 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf,
1508 0x674f84749b0b8816},
1510 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb,
1511 0x4e769e7672c9ddad},
1512 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281,
1513 0x42b99082de830663},
1515 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478,
1516 0x78878ef61c6ce04d},
1517 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def,
1518 0xb6cb3f5d7b72c321},
1520 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae,
1521 0x0c88bc4d716b1287},
1522 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa,
1523 0xdd5ddea3f3901dc6},
1525 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3,
1526 0x68f344af6b317466},
1527 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3,
1528 0x31b9c405f8540a20},
1530 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0,
1531 0x4052bf4b6f461db9},
1532 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8,
1533 0xfecf4d5190b0fc61},
1535 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a,
1536 0x1eddbae2c802e41a},
1537 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0,
1538 0x43104d86560ebcfc},
1540 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a,
1541 0xb48e26b484f7a21c},
1542 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668,
1543 0xfac015404d4d3dab},
1548 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da,
1549 0x7fe36b40af22af89},
1550 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1,
1551 0xe697d45825b63624},
1553 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902,
1554 0x4a5b506612a677a6},
1555 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40,
1556 0xeb13461ceac089f1},
1558 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857,
1559 0x0781b8291c6a220a},
1560 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434,
1561 0x690cde8df0151593},
1563 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326,
1564 0x8a535f566ec73617},
1565 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf,
1566 0x0455c08468b08bd7},
1568 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279,
1569 0x06bada7ab77f8276},
1570 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70,
1571 0x5b476dfd0e6cb18a},
1573 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8,
1574 0x3e29864e8a2ec908},
1575 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed,
1576 0x239b90ea3dc31e7e},
1578 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4,
1579 0x820f4dd949f72ff7},
1580 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3,
1581 0x140406ec783a05ec},
1583 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe,
1584 0x68f6b8542783dfee},
1585 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028,
1586 0xcbe1feba92e40ce6},
1588 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927,
1589 0xd0b2f94d2f420109},
1590 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a,
1591 0x971459828b0719e5},
1593 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687,
1594 0x961610004a866aba},
1595 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c,
1596 0x7acb9fadcee75e44},
1598 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea,
1599 0x24eb9acca333bf5b},
1600 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d,
1601 0x69f891c5acd079cc},
1603 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514,
1604 0xe51f547c5972a107},
1605 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06,
1606 0x1c309a2b25bb1387},
1608 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828,
1609 0x20b87b8aa2c4e503},
1610 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044,
1611 0xf5c6fa49919776be},
1613 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56,
1614 0x1ed7d1b9332010b9},
1615 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24,
1616 0x3a2b03f03217257a},
1618 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b,
1619 0x15fee545c78dd9f6},
1620 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb,
1621 0x4ab5b6b2b8753f81},
1626 * select_point selects the |idx|th point from a precomputation table and
1629 static void select_point(const u64 idx, unsigned int size,
1630 const smallfelem pre_comp[16][3], smallfelem out[3])
1633 u64 *outlimbs = &out[0][0];
1635 memset(out, 0, sizeof(*out) * 3);
1637 for (i = 0; i < size; i++) {
1638 const u64 *inlimbs = (u64 *)&pre_comp[i][0][0];
1645 for (j = 0; j < NLIMBS * 3; j++)
1646 outlimbs[j] |= inlimbs[j] & mask;
1650 /* get_bit returns the |i|th bit in |in| */
1651 static char get_bit(const felem_bytearray in, int i)
1653 if ((i < 0) || (i >= 256))
1655 return (in[i >> 3] >> (i & 7)) & 1;
1659 * Interleaved point multiplication using precomputed point multiples: The
1660 * small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[], the scalars
1661 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1662 * generator, using certain (large) precomputed multiples in g_pre_comp.
1663 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1665 static void batch_mul(felem x_out, felem y_out, felem z_out,
1666 const felem_bytearray scalars[],
1667 const unsigned num_points, const u8 *g_scalar,
1668 const int mixed, const smallfelem pre_comp[][17][3],
1669 const smallfelem g_pre_comp[2][16][3])
1672 unsigned num, gen_mul = (g_scalar != NULL);
1678 /* set nq to the point at infinity */
1679 memset(nq, 0, sizeof(nq));
1682 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1683 * of the generator (two in each of the last 32 rounds) and additions of
1684 * other points multiples (every 5th round).
1686 skip = 1; /* save two point operations in the first
1688 for (i = (num_points ? 255 : 31); i >= 0; --i) {
1691 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1693 /* add multiples of the generator */
1694 if (gen_mul && (i <= 31)) {
1695 /* first, look 32 bits upwards */
1696 bits = get_bit(g_scalar, i + 224) << 3;
1697 bits |= get_bit(g_scalar, i + 160) << 2;
1698 bits |= get_bit(g_scalar, i + 96) << 1;
1699 bits |= get_bit(g_scalar, i + 32);
1700 /* select the point to add, in constant time */
1701 select_point(bits, 16, g_pre_comp[1], tmp);
1704 /* Arg 1 below is for "mixed" */
1705 point_add(nq[0], nq[1], nq[2],
1706 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1708 smallfelem_expand(nq[0], tmp[0]);
1709 smallfelem_expand(nq[1], tmp[1]);
1710 smallfelem_expand(nq[2], tmp[2]);
1714 /* second, look at the current position */
1715 bits = get_bit(g_scalar, i + 192) << 3;
1716 bits |= get_bit(g_scalar, i + 128) << 2;
1717 bits |= get_bit(g_scalar, i + 64) << 1;
1718 bits |= get_bit(g_scalar, i);
1719 /* select the point to add, in constant time */
1720 select_point(bits, 16, g_pre_comp[0], tmp);
1721 /* Arg 1 below is for "mixed" */
1722 point_add(nq[0], nq[1], nq[2],
1723 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1726 /* do other additions every 5 doublings */
1727 if (num_points && (i % 5 == 0)) {
1728 /* loop over all scalars */
1729 for (num = 0; num < num_points; ++num) {
1730 bits = get_bit(scalars[num], i + 4) << 5;
1731 bits |= get_bit(scalars[num], i + 3) << 4;
1732 bits |= get_bit(scalars[num], i + 2) << 3;
1733 bits |= get_bit(scalars[num], i + 1) << 2;
1734 bits |= get_bit(scalars[num], i) << 1;
1735 bits |= get_bit(scalars[num], i - 1);
1736 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1739 * select the point to add or subtract, in constant time
1741 select_point(digit, 17, pre_comp[num], tmp);
1742 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative
1744 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1745 felem_contract(tmp[1], ftmp);
1748 point_add(nq[0], nq[1], nq[2],
1749 nq[0], nq[1], nq[2],
1750 mixed, tmp[0], tmp[1], tmp[2]);
1752 smallfelem_expand(nq[0], tmp[0]);
1753 smallfelem_expand(nq[1], tmp[1]);
1754 smallfelem_expand(nq[2], tmp[2]);
1760 felem_assign(x_out, nq[0]);
1761 felem_assign(y_out, nq[1]);
1762 felem_assign(z_out, nq[2]);
1765 /* Precomputation for the group generator. */
1766 struct nistp256_pre_comp_st {
1767 smallfelem g_pre_comp[2][16][3];
1768 CRYPTO_REF_COUNT references;
1769 CRYPTO_RWLOCK *lock;
1772 const EC_METHOD *EC_GFp_nistp256_method(void)
1774 static const EC_METHOD ret = {
1775 EC_FLAGS_DEFAULT_OCT,
1776 NID_X9_62_prime_field,
1777 ec_GFp_nistp256_group_init,
1778 ec_GFp_simple_group_finish,
1779 ec_GFp_simple_group_clear_finish,
1780 ec_GFp_nist_group_copy,
1781 ec_GFp_nistp256_group_set_curve,
1782 ec_GFp_simple_group_get_curve,
1783 ec_GFp_simple_group_get_degree,
1784 ec_group_simple_order_bits,
1785 ec_GFp_simple_group_check_discriminant,
1786 ec_GFp_simple_point_init,
1787 ec_GFp_simple_point_finish,
1788 ec_GFp_simple_point_clear_finish,
1789 ec_GFp_simple_point_copy,
1790 ec_GFp_simple_point_set_to_infinity,
1791 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1792 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1793 ec_GFp_simple_point_set_affine_coordinates,
1794 ec_GFp_nistp256_point_get_affine_coordinates,
1795 0 /* point_set_compressed_coordinates */ ,
1800 ec_GFp_simple_invert,
1801 ec_GFp_simple_is_at_infinity,
1802 ec_GFp_simple_is_on_curve,
1804 ec_GFp_simple_make_affine,
1805 ec_GFp_simple_points_make_affine,
1806 ec_GFp_nistp256_points_mul,
1807 ec_GFp_nistp256_precompute_mult,
1808 ec_GFp_nistp256_have_precompute_mult,
1809 ec_GFp_nist_field_mul,
1810 ec_GFp_nist_field_sqr,
1812 0 /* field_encode */ ,
1813 0 /* field_decode */ ,
1814 0, /* field_set_to_one */
1815 ec_key_simple_priv2oct,
1816 ec_key_simple_oct2priv,
1817 0, /* set private */
1818 ec_key_simple_generate_key,
1819 ec_key_simple_check_key,
1820 ec_key_simple_generate_public_key,
1823 ecdh_simple_compute_key
1829 /******************************************************************************/
1831 * FUNCTIONS TO MANAGE PRECOMPUTATION
1834 static NISTP256_PRE_COMP *nistp256_pre_comp_new()
1836 NISTP256_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1839 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1843 ret->references = 1;
1845 ret->lock = CRYPTO_THREAD_lock_new();
1846 if (ret->lock == NULL) {
1847 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1854 NISTP256_PRE_COMP *EC_nistp256_pre_comp_dup(NISTP256_PRE_COMP *p)
1858 CRYPTO_UP_REF(&p->references, &i, p->lock);
1862 void EC_nistp256_pre_comp_free(NISTP256_PRE_COMP *pre)
1869 CRYPTO_DOWN_REF(&pre->references, &i, pre->lock);
1870 REF_PRINT_COUNT("EC_nistp256", x);
1873 REF_ASSERT_ISNT(i < 0);
1875 CRYPTO_THREAD_lock_free(pre->lock);
1879 /******************************************************************************/
1881 * OPENSSL EC_METHOD FUNCTIONS
1884 int ec_GFp_nistp256_group_init(EC_GROUP *group)
1887 ret = ec_GFp_simple_group_init(group);
1888 group->a_is_minus3 = 1;
1892 int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1893 const BIGNUM *a, const BIGNUM *b,
1897 BN_CTX *new_ctx = NULL;
1898 BIGNUM *curve_p, *curve_a, *curve_b;
1901 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1904 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1905 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1906 ((curve_b = BN_CTX_get(ctx)) == NULL))
1908 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1909 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1910 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1911 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1912 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1913 EC_R_WRONG_CURVE_PARAMETERS);
1916 group->field_mod_func = BN_nist_mod_256;
1917 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1920 BN_CTX_free(new_ctx);
1925 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1928 int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1929 const EC_POINT *point,
1930 BIGNUM *x, BIGNUM *y,
1933 felem z1, z2, x_in, y_in;
1934 smallfelem x_out, y_out;
1937 if (EC_POINT_is_at_infinity(group, point)) {
1938 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1939 EC_R_POINT_AT_INFINITY);
1942 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1943 (!BN_to_felem(z1, point->Z)))
1946 felem_square(tmp, z2);
1947 felem_reduce(z1, tmp);
1948 felem_mul(tmp, x_in, z1);
1949 felem_reduce(x_in, tmp);
1950 felem_contract(x_out, x_in);
1952 if (!smallfelem_to_BN(x, x_out)) {
1953 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1958 felem_mul(tmp, z1, z2);
1959 felem_reduce(z1, tmp);
1960 felem_mul(tmp, y_in, z1);
1961 felem_reduce(y_in, tmp);
1962 felem_contract(y_out, y_in);
1964 if (!smallfelem_to_BN(y, y_out)) {
1965 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1973 /* points below is of size |num|, and tmp_smallfelems is of size |num+1| */
1974 static void make_points_affine(size_t num, smallfelem points[][3],
1975 smallfelem tmp_smallfelems[])
1978 * Runs in constant time, unless an input is the point at infinity (which
1979 * normally shouldn't happen).
1981 ec_GFp_nistp_points_make_affine_internal(num,
1985 (void (*)(void *))smallfelem_one,
1986 (int (*)(const void *))
1987 smallfelem_is_zero_int,
1988 (void (*)(void *, const void *))
1990 (void (*)(void *, const void *))
1991 smallfelem_square_contract,
1993 (void *, const void *,
1995 smallfelem_mul_contract,
1996 (void (*)(void *, const void *))
1997 smallfelem_inv_contract,
1998 /* nothing to contract */
1999 (void (*)(void *, const void *))
2004 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
2005 * values Result is stored in r (r can equal one of the inputs).
2007 int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
2008 const BIGNUM *scalar, size_t num,
2009 const EC_POINT *points[],
2010 const BIGNUM *scalars[], BN_CTX *ctx)
2015 BN_CTX *new_ctx = NULL;
2016 BIGNUM *x, *y, *z, *tmp_scalar;
2017 felem_bytearray g_secret;
2018 felem_bytearray *secrets = NULL;
2019 smallfelem (*pre_comp)[17][3] = NULL;
2020 smallfelem *tmp_smallfelems = NULL;
2021 felem_bytearray tmp;
2022 unsigned i, num_bytes;
2023 int have_pre_comp = 0;
2024 size_t num_points = num;
2025 smallfelem x_in, y_in, z_in;
2026 felem x_out, y_out, z_out;
2027 NISTP256_PRE_COMP *pre = NULL;
2028 const smallfelem(*g_pre_comp)[16][3] = NULL;
2029 EC_POINT *generator = NULL;
2030 const EC_POINT *p = NULL;
2031 const BIGNUM *p_scalar = NULL;
2034 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
2037 if (((x = BN_CTX_get(ctx)) == NULL) ||
2038 ((y = BN_CTX_get(ctx)) == NULL) ||
2039 ((z = BN_CTX_get(ctx)) == NULL) ||
2040 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
2043 if (scalar != NULL) {
2044 pre = group->pre_comp.nistp256;
2046 /* we have precomputation, try to use it */
2047 g_pre_comp = (const smallfelem(*)[16][3])pre->g_pre_comp;
2049 /* try to use the standard precomputation */
2050 g_pre_comp = &gmul[0];
2051 generator = EC_POINT_new(group);
2052 if (generator == NULL)
2054 /* get the generator from precomputation */
2055 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
2056 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
2057 !smallfelem_to_BN(z, g_pre_comp[0][1][2])) {
2058 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2061 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
2065 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2066 /* precomputation matches generator */
2070 * we don't have valid precomputation: treat the generator as a
2075 if (num_points > 0) {
2076 if (num_points >= 3) {
2078 * unless we precompute multiples for just one or two points,
2079 * converting those into affine form is time well spent
2083 secrets = OPENSSL_malloc(sizeof(*secrets) * num_points);
2084 pre_comp = OPENSSL_malloc(sizeof(*pre_comp) * num_points);
2087 OPENSSL_malloc(sizeof(*tmp_smallfelems) * (num_points * 17 + 1));
2088 if ((secrets == NULL) || (pre_comp == NULL)
2089 || (mixed && (tmp_smallfelems == NULL))) {
2090 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
2095 * we treat NULL scalars as 0, and NULL points as points at infinity,
2096 * i.e., they contribute nothing to the linear combination
2098 memset(secrets, 0, sizeof(*secrets) * num_points);
2099 memset(pre_comp, 0, sizeof(*pre_comp) * num_points);
2100 for (i = 0; i < num_points; ++i) {
2103 * we didn't have a valid precomputation, so we pick the
2107 p = EC_GROUP_get0_generator(group);
2110 /* the i^th point */
2113 p_scalar = scalars[i];
2115 if ((p_scalar != NULL) && (p != NULL)) {
2116 /* reduce scalar to 0 <= scalar < 2^256 */
2117 if ((BN_num_bits(p_scalar) > 256)
2118 || (BN_is_negative(p_scalar))) {
2120 * this is an unusual input, and we don't guarantee
2123 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
2124 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2127 num_bytes = BN_bn2bin(tmp_scalar, tmp);
2129 num_bytes = BN_bn2bin(p_scalar, tmp);
2130 flip_endian(secrets[i], tmp, num_bytes);
2131 /* precompute multiples */
2132 if ((!BN_to_felem(x_out, p->X)) ||
2133 (!BN_to_felem(y_out, p->Y)) ||
2134 (!BN_to_felem(z_out, p->Z)))
2136 felem_shrink(pre_comp[i][1][0], x_out);
2137 felem_shrink(pre_comp[i][1][1], y_out);
2138 felem_shrink(pre_comp[i][1][2], z_out);
2139 for (j = 2; j <= 16; ++j) {
2141 point_add_small(pre_comp[i][j][0], pre_comp[i][j][1],
2142 pre_comp[i][j][2], pre_comp[i][1][0],
2143 pre_comp[i][1][1], pre_comp[i][1][2],
2144 pre_comp[i][j - 1][0],
2145 pre_comp[i][j - 1][1],
2146 pre_comp[i][j - 1][2]);
2148 point_double_small(pre_comp[i][j][0],
2151 pre_comp[i][j / 2][0],
2152 pre_comp[i][j / 2][1],
2153 pre_comp[i][j / 2][2]);
2159 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
2162 /* the scalar for the generator */
2163 if ((scalar != NULL) && (have_pre_comp)) {
2164 memset(g_secret, 0, sizeof(g_secret));
2165 /* reduce scalar to 0 <= scalar < 2^256 */
2166 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar))) {
2168 * this is an unusual input, and we don't guarantee
2171 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
2172 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2175 num_bytes = BN_bn2bin(tmp_scalar, tmp);
2177 num_bytes = BN_bn2bin(scalar, tmp);
2178 flip_endian(g_secret, tmp, num_bytes);
2179 /* do the multiplication with generator precomputation */
2180 batch_mul(x_out, y_out, z_out,
2181 (const felem_bytearray(*))secrets, num_points,
2183 mixed, (const smallfelem(*)[17][3])pre_comp, g_pre_comp);
2185 /* do the multiplication without generator precomputation */
2186 batch_mul(x_out, y_out, z_out,
2187 (const felem_bytearray(*))secrets, num_points,
2188 NULL, mixed, (const smallfelem(*)[17][3])pre_comp, NULL);
2189 /* reduce the output to its unique minimal representation */
2190 felem_contract(x_in, x_out);
2191 felem_contract(y_in, y_out);
2192 felem_contract(z_in, z_out);
2193 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2194 (!smallfelem_to_BN(z, z_in))) {
2195 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2198 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2202 EC_POINT_free(generator);
2203 BN_CTX_free(new_ctx);
2204 OPENSSL_free(secrets);
2205 OPENSSL_free(pre_comp);
2206 OPENSSL_free(tmp_smallfelems);
2210 int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2213 NISTP256_PRE_COMP *pre = NULL;
2215 BN_CTX *new_ctx = NULL;
2217 EC_POINT *generator = NULL;
2218 smallfelem tmp_smallfelems[32];
2219 felem x_tmp, y_tmp, z_tmp;
2221 /* throw away old precomputation */
2222 EC_pre_comp_free(group);
2224 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
2227 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
2229 /* get the generator */
2230 if (group->generator == NULL)
2232 generator = EC_POINT_new(group);
2233 if (generator == NULL)
2235 BN_bin2bn(nistp256_curve_params[3], sizeof(felem_bytearray), x);
2236 BN_bin2bn(nistp256_curve_params[4], sizeof(felem_bytearray), y);
2237 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2239 if ((pre = nistp256_pre_comp_new()) == NULL)
2242 * if the generator is the standard one, use built-in precomputation
2244 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2245 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2248 if ((!BN_to_felem(x_tmp, group->generator->X)) ||
2249 (!BN_to_felem(y_tmp, group->generator->Y)) ||
2250 (!BN_to_felem(z_tmp, group->generator->Z)))
2252 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2253 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2254 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2256 * compute 2^64*G, 2^128*G, 2^192*G for the first table, 2^32*G, 2^96*G,
2257 * 2^160*G, 2^224*G for the second one
2259 for (i = 1; i <= 8; i <<= 1) {
2260 point_double_small(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2261 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
2262 pre->g_pre_comp[0][i][1],
2263 pre->g_pre_comp[0][i][2]);
2264 for (j = 0; j < 31; ++j) {
2265 point_double_small(pre->g_pre_comp[1][i][0],
2266 pre->g_pre_comp[1][i][1],
2267 pre->g_pre_comp[1][i][2],
2268 pre->g_pre_comp[1][i][0],
2269 pre->g_pre_comp[1][i][1],
2270 pre->g_pre_comp[1][i][2]);
2274 point_double_small(pre->g_pre_comp[0][2 * i][0],
2275 pre->g_pre_comp[0][2 * i][1],
2276 pre->g_pre_comp[0][2 * i][2],
2277 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2278 pre->g_pre_comp[1][i][2]);
2279 for (j = 0; j < 31; ++j) {
2280 point_double_small(pre->g_pre_comp[0][2 * i][0],
2281 pre->g_pre_comp[0][2 * i][1],
2282 pre->g_pre_comp[0][2 * i][2],
2283 pre->g_pre_comp[0][2 * i][0],
2284 pre->g_pre_comp[0][2 * i][1],
2285 pre->g_pre_comp[0][2 * i][2]);
2288 for (i = 0; i < 2; i++) {
2289 /* g_pre_comp[i][0] is the point at infinity */
2290 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2291 /* the remaining multiples */
2292 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2293 point_add_small(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
2294 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
2295 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2296 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2297 pre->g_pre_comp[i][2][2]);
2298 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2299 point_add_small(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
2300 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
2301 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2302 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2303 pre->g_pre_comp[i][2][2]);
2304 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2305 point_add_small(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
2306 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
2307 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2308 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
2309 pre->g_pre_comp[i][4][2]);
2311 * 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G
2313 point_add_small(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
2314 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
2315 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2316 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2317 pre->g_pre_comp[i][2][2]);
2318 for (j = 1; j < 8; ++j) {
2319 /* odd multiples: add G resp. 2^32*G */
2320 point_add_small(pre->g_pre_comp[i][2 * j + 1][0],
2321 pre->g_pre_comp[i][2 * j + 1][1],
2322 pre->g_pre_comp[i][2 * j + 1][2],
2323 pre->g_pre_comp[i][2 * j][0],
2324 pre->g_pre_comp[i][2 * j][1],
2325 pre->g_pre_comp[i][2 * j][2],
2326 pre->g_pre_comp[i][1][0],
2327 pre->g_pre_comp[i][1][1],
2328 pre->g_pre_comp[i][1][2]);
2331 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2334 SETPRECOMP(group, nistp256, pre);
2340 EC_POINT_free(generator);
2341 BN_CTX_free(new_ctx);
2342 EC_nistp256_pre_comp_free(pre);
2346 int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2348 return HAVEPRECOMP(group, nistp256);