1 /* crypto/ec/ecp_nistp256.c */
3 * Written by Adam Langley (Google) for the OpenSSL project
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
29 #include <openssl/opensslconf.h>
30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
32 #ifndef OPENSSL_SYS_VMS
39 #include <openssl/err.h>
42 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43 /* even with gcc, the typedef won't work for 32-bit platforms */
44 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
45 typedef __int128_t int128_t;
47 #error "Need GCC 3.1 or later to define type uint128_t"
55 /* The underlying field.
57 * P256 operates over GF(2^256-2^224+2^192+2^96-1). We can serialise an element
58 * of this field into 32 bytes. We call this an felem_bytearray. */
60 typedef u8 felem_bytearray[32];
62 /* These are the parameters of P256, taken from FIPS 186-3, page 86. These
63 * values are big-endian. */
64 static const felem_bytearray nistp256_curve_params[5] = {
65 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
66 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
67 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
69 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
70 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
71 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
73 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
74 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
75 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
76 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
77 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
78 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
79 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
80 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
81 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
82 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
83 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
84 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
87 /* The representation of field elements.
88 * ------------------------------------
90 * We represent field elements with either four 128-bit values, eight 128-bit
91 * values, or four 64-bit values. The field element represented is:
92 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
94 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
96 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
97 * apart, but are 128-bits wide, the most significant bits of each limb overlap
98 * with the least significant bits of the next.
100 * A field element with four limbs is an 'felem'. One with eight limbs is a
103 * A field element with four, 64-bit values is called a 'smallfelem'. Small
104 * values are used as intermediate values before multiplication.
109 typedef uint128_t limb;
110 typedef limb felem[NLIMBS];
111 typedef limb longfelem[NLIMBS * 2];
112 typedef u64 smallfelem[NLIMBS];
114 /* This is the value of the prime as four 64-bit words, little-endian. */
115 static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
116 static const u64 bottom63bits = 0x7ffffffffffffffful;
118 /* bin32_to_felem takes a little-endian byte array and converts it into felem
119 * form. This assumes that the CPU is little-endian. */
120 static void bin32_to_felem(felem out, const u8 in[32])
122 out[0] = *((u64*) &in[0]);
123 out[1] = *((u64*) &in[8]);
124 out[2] = *((u64*) &in[16]);
125 out[3] = *((u64*) &in[24]);
128 /* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian,
129 * 32 byte array. This assumes that the CPU is little-endian. */
130 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
132 *((u64*) &out[0]) = in[0];
133 *((u64*) &out[8]) = in[1];
134 *((u64*) &out[16]) = in[2];
135 *((u64*) &out[24]) = in[3];
138 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
139 static void flip_endian(u8 *out, const u8 *in, unsigned len)
142 for (i = 0; i < len; ++i)
143 out[i] = in[len-1-i];
146 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
147 static int BN_to_felem(felem out, const BIGNUM *bn)
149 felem_bytearray b_in;
150 felem_bytearray b_out;
153 /* BN_bn2bin eats leading zeroes */
154 memset(b_out, 0, sizeof b_out);
155 num_bytes = BN_num_bytes(bn);
156 if (num_bytes > sizeof b_out)
158 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
161 if (BN_is_negative(bn))
163 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
166 num_bytes = BN_bn2bin(bn, b_in);
167 flip_endian(b_out, b_in, num_bytes);
168 bin32_to_felem(out, b_out);
172 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
173 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
175 felem_bytearray b_in, b_out;
176 smallfelem_to_bin32(b_in, in);
177 flip_endian(b_out, b_in, sizeof b_out);
178 return BN_bin2bn(b_out, sizeof b_out, out);
183 * ---------------- */
185 static void smallfelem_one(smallfelem out)
193 static void smallfelem_assign(smallfelem out, const smallfelem in)
201 static void felem_assign(felem out, const felem in)
209 /* felem_sum sets out = out + in. */
210 static void felem_sum(felem out, const felem in)
218 /* felem_small_sum sets out = out + in. */
219 static void felem_small_sum(felem out, const smallfelem in)
227 /* felem_scalar sets out = out * scalar */
228 static void felem_scalar(felem out, const u64 scalar)
236 /* longfelem_scalar sets out = out * scalar */
237 static void longfelem_scalar(longfelem out, const u64 scalar)
249 #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
250 #define two105 (((limb)1) << 105)
251 #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
253 /* zero105 is 0 mod p */
254 static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
256 /* smallfelem_neg sets |out| to |-small|
258 * out[i] < out[i] + 2^105
260 static void smallfelem_neg(felem out, const smallfelem small)
262 /* In order to prevent underflow, we subtract from 0 mod p. */
263 out[0] = zero105[0] - small[0];
264 out[1] = zero105[1] - small[1];
265 out[2] = zero105[2] - small[2];
266 out[3] = zero105[3] - small[3];
269 /* felem_diff subtracts |in| from |out|
273 * out[i] < out[i] + 2^105
275 static void felem_diff(felem out, const felem in)
277 /* In order to prevent underflow, we add 0 mod p before subtracting. */
278 out[0] += zero105[0];
279 out[1] += zero105[1];
280 out[2] += zero105[2];
281 out[3] += zero105[3];
289 #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
290 #define two107 (((limb)1) << 107)
291 #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
293 /* zero107 is 0 mod p */
294 static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 };
296 /* An alternative felem_diff for larger inputs |in|
297 * felem_diff_zero107 subtracts |in| from |out|
301 * out[i] < out[i] + 2^107
303 static void felem_diff_zero107(felem out, const felem in)
305 /* In order to prevent underflow, we add 0 mod p before subtracting. */
306 out[0] += zero107[0];
307 out[1] += zero107[1];
308 out[2] += zero107[2];
309 out[3] += zero107[3];
317 /* longfelem_diff subtracts |in| from |out|
321 * out[i] < out[i] + 2^70 + 2^40
323 static void longfelem_diff(longfelem out, const longfelem in)
325 static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6);
326 static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40);
327 static const limb two70 = (((limb)1) << 70);
328 static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6);
329 static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6);
331 /* add 0 mod p to avoid underflow */
335 out[3] += two70m40m38p6;
341 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
352 #define two64m0 (((limb)1) << 64) - 1
353 #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
354 #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
355 #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
357 /* zero110 is 0 mod p */
358 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
360 /* felem_shrink converts an felem into a smallfelem. The result isn't quite
361 * minimal as the value may be greater than p.
368 static void felem_shrink(smallfelem out, const felem in)
373 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
376 tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64));
379 tmp[2] = zero110[2] + (u64) in[2];
380 tmp[0] = zero110[0] + in[0];
381 tmp[1] = zero110[1] + in[1];
382 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
384 /* We perform two partial reductions where we eliminate the
385 * high-word of tmp[3]. We don't update the other words till the end.
387 a = tmp[3] >> 64; /* a < 2^46 */
388 tmp[3] = (u64) tmp[3];
390 tmp[3] += ((limb)a) << 32;
394 a = tmp[3] >> 64; /* a < 2^15 */
395 b += a; /* b < 2^46 + 2^15 < 2^47 */
396 tmp[3] = (u64) tmp[3];
398 tmp[3] += ((limb)a) << 32;
399 /* tmp[3] < 2^64 + 2^47 */
401 /* This adjusts the other two words to complete the two partial
404 tmp[1] -= (((limb)b) << 32);
406 /* In order to make space in tmp[3] for the carry from 2 -> 3, we
407 * conditionally subtract kPrime if tmp[3] is large enough. */
409 /* As tmp[3] < 2^65, high is either 1 or 0 */
413 * all ones if the high word of tmp[3] is 1
414 * all zeros if the high word of tmp[3] if 0 */
418 * all ones if the MSB of low is 1
419 * all zeros if the MSB of low if 0 */
422 /* if low was greater than kPrime3Test then the MSB is zero */
426 * all ones if low was > kPrime3Test
427 * all zeros if low was <= kPrime3Test */
428 mask = (mask & low) | high;
429 tmp[0] -= mask & kPrime[0];
430 tmp[1] -= mask & kPrime[1];
431 /* kPrime[2] is zero, so omitted */
432 tmp[3] -= mask & kPrime[3];
433 /* tmp[3] < 2**64 - 2**32 + 1 */
435 tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0];
436 tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1];
437 tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2];
446 /* smallfelem_expand converts a smallfelem to an felem */
447 static void smallfelem_expand(felem out, const smallfelem in)
455 /* smallfelem_square sets |out| = |small|^2
459 * out[i] < 7 * 2^64 < 2^67
461 static void smallfelem_square(longfelem out, const smallfelem small)
466 a = ((uint128_t) small[0]) * small[0];
472 a = ((uint128_t) small[0]) * small[1];
479 a = ((uint128_t) small[0]) * small[2];
486 a = ((uint128_t) small[0]) * small[3];
492 a = ((uint128_t) small[1]) * small[2];
499 a = ((uint128_t) small[1]) * small[1];
505 a = ((uint128_t) small[1]) * small[3];
512 a = ((uint128_t) small[2]) * small[3];
520 a = ((uint128_t) small[2]) * small[2];
526 a = ((uint128_t) small[3]) * small[3];
533 /* felem_square sets |out| = |in|^2
537 * out[i] < 7 * 2^64 < 2^67
539 static void felem_square(longfelem out, const felem in)
542 felem_shrink(small, in);
543 smallfelem_square(out, small);
546 /* smallfelem_mul sets |out| = |small1| * |small2|
551 * out[i] < 7 * 2^64 < 2^67
553 static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2)
558 a = ((uint128_t) small1[0]) * small2[0];
565 a = ((uint128_t) small1[0]) * small2[1];
571 a = ((uint128_t) small1[1]) * small2[0];
578 a = ((uint128_t) small1[0]) * small2[2];
584 a = ((uint128_t) small1[1]) * small2[1];
590 a = ((uint128_t) small1[2]) * small2[0];
597 a = ((uint128_t) small1[0]) * small2[3];
603 a = ((uint128_t) small1[1]) * small2[2];
609 a = ((uint128_t) small1[2]) * small2[1];
615 a = ((uint128_t) small1[3]) * small2[0];
622 a = ((uint128_t) small1[1]) * small2[3];
628 a = ((uint128_t) small1[2]) * small2[2];
634 a = ((uint128_t) small1[3]) * small2[1];
641 a = ((uint128_t) small1[2]) * small2[3];
647 a = ((uint128_t) small1[3]) * small2[2];
654 a = ((uint128_t) small1[3]) * small2[3];
661 /* felem_mul sets |out| = |in1| * |in2|
666 * out[i] < 7 * 2^64 < 2^67
668 static void felem_mul(longfelem out, const felem in1, const felem in2)
670 smallfelem small1, small2;
671 felem_shrink(small1, in1);
672 felem_shrink(small2, in2);
673 smallfelem_mul(out, small1, small2);
676 /* felem_small_mul sets |out| = |small1| * |in2|
681 * out[i] < 7 * 2^64 < 2^67
683 static void felem_small_mul(longfelem out, const smallfelem small1, const felem in2)
686 felem_shrink(small2, in2);
687 smallfelem_mul(out, small1, small2);
690 #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
691 #define two100 (((limb)1) << 100)
692 #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
693 /* zero100 is 0 mod p */
694 static const felem zero100 = { two100m36m4, two100, two100m36p4, two100m36p4 };
696 /* Internal function for the different flavours of felem_reduce.
697 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
699 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
700 * out[1] >= in[7] + 2^32*in[4]
701 * out[2] >= in[5] + 2^32*in[5]
702 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
704 * out[0] <= out[0] + in[4] + 2^32*in[5]
705 * out[1] <= out[1] + in[5] + 2^33*in[6]
706 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
707 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
709 static void felem_reduce_(felem out, const longfelem in)
712 /* combine common terms from below */
713 c = in[4] + (in[5] << 32);
721 /* the remaining terms */
722 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
723 out[1] -= (in[4] << 32);
724 out[3] += (in[4] << 32);
726 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
727 out[2] -= (in[5] << 32);
729 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
731 out[0] -= (in[6] << 32);
732 out[1] += (in[6] << 33);
733 out[2] += (in[6] * 2);
734 out[3] -= (in[6] << 32);
736 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
738 out[0] -= (in[7] << 32);
739 out[2] += (in[7] << 33);
740 out[3] += (in[7] * 3);
743 /* felem_reduce converts a longfelem into an felem.
744 * To be called directly after felem_square or felem_mul.
746 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
747 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
751 static void felem_reduce(felem out, const longfelem in)
753 out[0] = zero100[0] + in[0];
754 out[1] = zero100[1] + in[1];
755 out[2] = zero100[2] + in[2];
756 out[3] = zero100[3] + in[3];
758 felem_reduce_(out, in);
760 /* out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
761 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
762 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
763 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
765 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
766 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
767 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
768 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
772 /* felem_reduce_zero105 converts a larger longfelem into an felem.
778 static void felem_reduce_zero105(felem out, const longfelem in)
780 out[0] = zero105[0] + in[0];
781 out[1] = zero105[1] + in[1];
782 out[2] = zero105[2] + in[2];
783 out[3] = zero105[3] + in[3];
785 felem_reduce_(out, in);
787 /* out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
788 * out[1] > 2^105 - 2^71 - 2^103 > 0
789 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
790 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
792 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
793 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
794 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
795 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
799 /* subtract_u64 sets *result = *result - v and *carry to one if the subtraction
801 static void subtract_u64(u64* result, u64* carry, u64 v)
803 uint128_t r = *result;
805 *carry = (r >> 64) & 1;
809 /* felem_contract converts |in| to its unique, minimal representation.
813 static void felem_contract(smallfelem out, const felem in)
816 u64 all_equal_so_far = 0, result = 0, carry;
818 felem_shrink(out, in);
819 /* small is minimal except that the value might be > p */
822 /* We are doing a constant time test if out >= kPrime. We need to
823 * compare each u64, from most-significant to least significant. For
824 * each one, if all words so far have been equal (m is all ones) then a
825 * non-equal result is the answer. Otherwise we continue. */
826 for (i = 3; i < 4; i--)
829 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
830 /* if out[i] > kPrime[i] then a will underflow and the high
831 * 64-bits will all be set. */
832 result |= all_equal_so_far & ((u64) (a >> 64));
834 /* if kPrime[i] == out[i] then |equal| will be all zeros and
835 * the decrement will make it all ones. */
836 equal = kPrime[i] ^ out[i];
838 equal &= equal << 32;
839 equal &= equal << 16;
844 equal = ((s64) equal) >> 63;
846 all_equal_so_far &= equal;
849 /* if all_equal_so_far is still all ones then the two values are equal
850 * and so out >= kPrime is true. */
851 result |= all_equal_so_far;
853 /* if out >= kPrime then we subtract kPrime. */
854 subtract_u64(&out[0], &carry, result & kPrime[0]);
855 subtract_u64(&out[1], &carry, carry);
856 subtract_u64(&out[2], &carry, carry);
857 subtract_u64(&out[3], &carry, carry);
859 subtract_u64(&out[1], &carry, result & kPrime[1]);
860 subtract_u64(&out[2], &carry, carry);
861 subtract_u64(&out[3], &carry, carry);
863 subtract_u64(&out[2], &carry, result & kPrime[2]);
864 subtract_u64(&out[3], &carry, carry);
866 subtract_u64(&out[3], &carry, result & kPrime[3]);
869 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
874 smallfelem_square(longtmp, in);
875 felem_reduce(tmp, longtmp);
876 felem_contract(out, tmp);
879 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1, const smallfelem in2)
884 smallfelem_mul(longtmp, in1, in2);
885 felem_reduce(tmp, longtmp);
886 felem_contract(out, tmp);
889 /* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
894 static limb smallfelem_is_zero(const smallfelem small)
899 u64 is_zero = small[0] | small[1] | small[2] | small[3];
901 is_zero &= is_zero << 32;
902 is_zero &= is_zero << 16;
903 is_zero &= is_zero << 8;
904 is_zero &= is_zero << 4;
905 is_zero &= is_zero << 2;
906 is_zero &= is_zero << 1;
907 is_zero = ((s64) is_zero) >> 63;
909 is_p = (small[0] ^ kPrime[0]) |
910 (small[1] ^ kPrime[1]) |
911 (small[2] ^ kPrime[2]) |
912 (small[3] ^ kPrime[3]);
920 is_p = ((s64) is_p) >> 63;
925 result |= ((limb) is_zero) << 64;
929 static int smallfelem_is_zero_int(const smallfelem small)
931 return (int) (smallfelem_is_zero(small) & ((limb)1));
934 /* felem_inv calculates |out| = |in|^{-1}
936 * Based on Fermat's Little Theorem:
938 * a^{p-1} = 1 (mod p)
939 * a^{p-2} = a^{-1} (mod p)
941 static void felem_inv(felem out, const felem in)
944 /* each e_I will hold |in|^{2^I - 1} */
945 felem e2, e4, e8, e16, e32, e64;
949 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
950 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
951 felem_assign(e2, ftmp);
952 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
953 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
954 felem_mul(tmp, ftmp, e2); felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
955 felem_assign(e4, ftmp);
956 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
957 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
958 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
959 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
960 felem_mul(tmp, ftmp, e4); felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
961 felem_assign(e8, ftmp);
962 for (i = 0; i < 8; i++) {
963 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
965 felem_mul(tmp, ftmp, e8); felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
966 felem_assign(e16, ftmp);
967 for (i = 0; i < 16; i++) {
968 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
970 felem_mul(tmp, ftmp, e16); felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
971 felem_assign(e32, ftmp);
972 for (i = 0; i < 32; i++) {
973 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
975 felem_assign(e64, ftmp);
976 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
977 for (i = 0; i < 192; i++) {
978 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
979 } /* 2^256 - 2^224 + 2^192 */
981 felem_mul(tmp, e64, e32); felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
982 for (i = 0; i < 16; i++) {
983 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
985 felem_mul(tmp, ftmp2, e16); felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
986 for (i = 0; i < 8; i++) {
987 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
989 felem_mul(tmp, ftmp2, e8); felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
990 for (i = 0; i < 4; i++) {
991 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
993 felem_mul(tmp, ftmp2, e4); felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
994 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
995 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
996 felem_mul(tmp, ftmp2, e2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
997 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
998 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
999 felem_mul(tmp, ftmp2, in); felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1001 felem_mul(tmp, ftmp2, ftmp); felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1004 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1008 smallfelem_expand(tmp, in);
1009 felem_inv(tmp, tmp);
1010 felem_contract(out, tmp);
1016 * Building on top of the field operations we have the operations on the
1017 * elliptic curve group itself. Points on the curve are represented in Jacobian
1020 /* point_double calculates 2*(x_in, y_in, z_in)
1022 * The method is taken from:
1023 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1025 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1026 * while x_out == y_in is not (maybe this works, but it's not tested). */
1028 point_double(felem x_out, felem y_out, felem z_out,
1029 const felem x_in, const felem y_in, const felem z_in)
1031 longfelem tmp, tmp2;
1032 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1033 smallfelem small1, small2;
1035 felem_assign(ftmp, x_in);
1036 /* ftmp[i] < 2^106 */
1037 felem_assign(ftmp2, x_in);
1038 /* ftmp2[i] < 2^106 */
1041 felem_square(tmp, z_in);
1042 felem_reduce(delta, tmp);
1043 /* delta[i] < 2^101 */
1046 felem_square(tmp, y_in);
1047 felem_reduce(gamma, tmp);
1048 /* gamma[i] < 2^101 */
1049 felem_shrink(small1, gamma);
1051 /* beta = x*gamma */
1052 felem_small_mul(tmp, small1, x_in);
1053 felem_reduce(beta, tmp);
1054 /* beta[i] < 2^101 */
1056 /* alpha = 3*(x-delta)*(x+delta) */
1057 felem_diff(ftmp, delta);
1058 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1059 felem_sum(ftmp2, delta);
1060 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1061 felem_scalar(ftmp2, 3);
1062 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1063 felem_mul(tmp, ftmp, ftmp2);
1064 felem_reduce(alpha, tmp);
1065 /* alpha[i] < 2^101 */
1066 felem_shrink(small2, alpha);
1068 /* x' = alpha^2 - 8*beta */
1069 smallfelem_square(tmp, small2);
1070 felem_reduce(x_out, tmp);
1071 felem_assign(ftmp, beta);
1072 felem_scalar(ftmp, 8);
1073 /* ftmp[i] < 8 * 2^101 = 2^104 */
1074 felem_diff(x_out, ftmp);
1075 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1077 /* z' = (y + z)^2 - gamma - delta */
1078 felem_sum(delta, gamma);
1079 /* delta[i] < 2^101 + 2^101 = 2^102 */
1080 felem_assign(ftmp, y_in);
1081 felem_sum(ftmp, z_in);
1082 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1083 felem_square(tmp, ftmp);
1084 felem_reduce(z_out, tmp);
1085 felem_diff(z_out, delta);
1086 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1088 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1089 felem_scalar(beta, 4);
1090 /* beta[i] < 4 * 2^101 = 2^103 */
1091 felem_diff_zero107(beta, x_out);
1092 /* beta[i] < 2^107 + 2^103 < 2^108 */
1093 felem_small_mul(tmp, small2, beta);
1094 /* tmp[i] < 7 * 2^64 < 2^67 */
1095 smallfelem_square(tmp2, small1);
1096 /* tmp2[i] < 7 * 2^64 */
1097 longfelem_scalar(tmp2, 8);
1098 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1099 longfelem_diff(tmp, tmp2);
1100 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1101 felem_reduce_zero105(y_out, tmp);
1102 /* y_out[i] < 2^106 */
1105 /* point_double_small is the same as point_double, except that it operates on
1108 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1109 const smallfelem x_in, const smallfelem y_in, const smallfelem z_in)
1111 felem felem_x_out, felem_y_out, felem_z_out;
1112 felem felem_x_in, felem_y_in, felem_z_in;
1114 smallfelem_expand(felem_x_in, x_in);
1115 smallfelem_expand(felem_y_in, y_in);
1116 smallfelem_expand(felem_z_in, z_in);
1117 point_double(felem_x_out, felem_y_out, felem_z_out,
1118 felem_x_in, felem_y_in, felem_z_in);
1119 felem_shrink(x_out, felem_x_out);
1120 felem_shrink(y_out, felem_y_out);
1121 felem_shrink(z_out, felem_z_out);
1124 /* copy_conditional copies in to out iff mask is all ones. */
1126 copy_conditional(felem out, const felem in, limb mask)
1129 for (i = 0; i < NLIMBS; ++i)
1131 const limb tmp = mask & (in[i] ^ out[i]);
1136 /* copy_small_conditional copies in to out iff mask is all ones. */
1138 copy_small_conditional(felem out, const smallfelem in, limb mask)
1141 const u64 mask64 = mask;
1142 for (i = 0; i < NLIMBS; ++i)
1144 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1148 /* point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1150 * The method is taken from:
1151 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1152 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1154 * This function includes a branch for checking whether the two input points
1155 * are equal, (while not equal to the point at infinity). This case never
1156 * happens during single point multiplication, so there is no timing leak for
1157 * ECDH or ECDSA signing. */
1158 static void point_add(felem x3, felem y3, felem z3,
1159 const felem x1, const felem y1, const felem z1,
1160 const int mixed, const smallfelem x2, const smallfelem y2, const smallfelem z2)
1162 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1163 longfelem tmp, tmp2;
1164 smallfelem small1, small2, small3, small4, small5;
1165 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1167 felem_shrink(small3, z1);
1169 z1_is_zero = smallfelem_is_zero(small3);
1170 z2_is_zero = smallfelem_is_zero(z2);
1172 /* ftmp = z1z1 = z1**2 */
1173 smallfelem_square(tmp, small3);
1174 felem_reduce(ftmp, tmp);
1175 /* ftmp[i] < 2^101 */
1176 felem_shrink(small1, ftmp);
1180 /* ftmp2 = z2z2 = z2**2 */
1181 smallfelem_square(tmp, z2);
1182 felem_reduce(ftmp2, tmp);
1183 /* ftmp2[i] < 2^101 */
1184 felem_shrink(small2, ftmp2);
1186 felem_shrink(small5, x1);
1188 /* u1 = ftmp3 = x1*z2z2 */
1189 smallfelem_mul(tmp, small5, small2);
1190 felem_reduce(ftmp3, tmp);
1191 /* ftmp3[i] < 2^101 */
1193 /* ftmp5 = z1 + z2 */
1194 felem_assign(ftmp5, z1);
1195 felem_small_sum(ftmp5, z2);
1196 /* ftmp5[i] < 2^107 */
1198 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1199 felem_square(tmp, ftmp5);
1200 felem_reduce(ftmp5, tmp);
1201 /* ftmp2 = z2z2 + z1z1 */
1202 felem_sum(ftmp2, ftmp);
1203 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1204 felem_diff(ftmp5, ftmp2);
1205 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1207 /* ftmp2 = z2 * z2z2 */
1208 smallfelem_mul(tmp, small2, z2);
1209 felem_reduce(ftmp2, tmp);
1211 /* s1 = ftmp2 = y1 * z2**3 */
1212 felem_mul(tmp, y1, ftmp2);
1213 felem_reduce(ftmp6, tmp);
1214 /* ftmp6[i] < 2^101 */
1218 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1220 /* u1 = ftmp3 = x1*z2z2 */
1221 felem_assign(ftmp3, x1);
1222 /* ftmp3[i] < 2^106 */
1225 felem_assign(ftmp5, z1);
1226 felem_scalar(ftmp5, 2);
1227 /* ftmp5[i] < 2*2^106 = 2^107 */
1229 /* s1 = ftmp2 = y1 * z2**3 */
1230 felem_assign(ftmp6, y1);
1231 /* ftmp6[i] < 2^106 */
1235 smallfelem_mul(tmp, x2, small1);
1236 felem_reduce(ftmp4, tmp);
1238 /* h = ftmp4 = u2 - u1 */
1239 felem_diff_zero107(ftmp4, ftmp3);
1240 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1241 felem_shrink(small4, ftmp4);
1243 x_equal = smallfelem_is_zero(small4);
1245 /* z_out = ftmp5 * h */
1246 felem_small_mul(tmp, small4, ftmp5);
1247 felem_reduce(z_out, tmp);
1248 /* z_out[i] < 2^101 */
1250 /* ftmp = z1 * z1z1 */
1251 smallfelem_mul(tmp, small1, small3);
1252 felem_reduce(ftmp, tmp);
1254 /* s2 = tmp = y2 * z1**3 */
1255 felem_small_mul(tmp, y2, ftmp);
1256 felem_reduce(ftmp5, tmp);
1258 /* r = ftmp5 = (s2 - s1)*2 */
1259 felem_diff_zero107(ftmp5, ftmp6);
1260 /* ftmp5[i] < 2^107 + 2^107 = 2^108*/
1261 felem_scalar(ftmp5, 2);
1262 /* ftmp5[i] < 2^109 */
1263 felem_shrink(small1, ftmp5);
1264 y_equal = smallfelem_is_zero(small1);
1266 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1268 point_double(x3, y3, z3, x1, y1, z1);
1272 /* I = ftmp = (2h)**2 */
1273 felem_assign(ftmp, ftmp4);
1274 felem_scalar(ftmp, 2);
1275 /* ftmp[i] < 2*2^108 = 2^109 */
1276 felem_square(tmp, ftmp);
1277 felem_reduce(ftmp, tmp);
1279 /* J = ftmp2 = h * I */
1280 felem_mul(tmp, ftmp4, ftmp);
1281 felem_reduce(ftmp2, tmp);
1283 /* V = ftmp4 = U1 * I */
1284 felem_mul(tmp, ftmp3, ftmp);
1285 felem_reduce(ftmp4, tmp);
1287 /* x_out = r**2 - J - 2V */
1288 smallfelem_square(tmp, small1);
1289 felem_reduce(x_out, tmp);
1290 felem_assign(ftmp3, ftmp4);
1291 felem_scalar(ftmp4, 2);
1292 felem_sum(ftmp4, ftmp2);
1293 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1294 felem_diff(x_out, ftmp4);
1295 /* x_out[i] < 2^105 + 2^101 */
1297 /* y_out = r(V-x_out) - 2 * s1 * J */
1298 felem_diff_zero107(ftmp3, x_out);
1299 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1300 felem_small_mul(tmp, small1, ftmp3);
1301 felem_mul(tmp2, ftmp6, ftmp2);
1302 longfelem_scalar(tmp2, 2);
1303 /* tmp2[i] < 2*2^67 = 2^68 */
1304 longfelem_diff(tmp, tmp2);
1305 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1306 felem_reduce_zero105(y_out, tmp);
1307 /* y_out[i] < 2^106 */
1309 copy_small_conditional(x_out, x2, z1_is_zero);
1310 copy_conditional(x_out, x1, z2_is_zero);
1311 copy_small_conditional(y_out, y2, z1_is_zero);
1312 copy_conditional(y_out, y1, z2_is_zero);
1313 copy_small_conditional(z_out, z2, z1_is_zero);
1314 copy_conditional(z_out, z1, z2_is_zero);
1315 felem_assign(x3, x_out);
1316 felem_assign(y3, y_out);
1317 felem_assign(z3, z_out);
1320 /* point_add_small is the same as point_add, except that it operates on
1322 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1323 smallfelem x1, smallfelem y1, smallfelem z1,
1324 smallfelem x2, smallfelem y2, smallfelem z2)
1326 felem felem_x3, felem_y3, felem_z3;
1327 felem felem_x1, felem_y1, felem_z1;
1328 smallfelem_expand(felem_x1, x1);
1329 smallfelem_expand(felem_y1, y1);
1330 smallfelem_expand(felem_z1, z1);
1331 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0, x2, y2, z2);
1332 felem_shrink(x3, felem_x3);
1333 felem_shrink(y3, felem_y3);
1334 felem_shrink(z3, felem_z3);
1337 /* Base point pre computation
1338 * --------------------------
1340 * Two different sorts of precomputed tables are used in the following code.
1341 * Each contain various points on the curve, where each point is three field
1342 * elements (x, y, z).
1344 * For the base point table, z is usually 1 (0 for the point at infinity).
1345 * This table has 2 * 16 elements, starting with the following:
1346 * index | bits | point
1347 * ------+---------+------------------------------
1350 * 2 | 0 0 1 0 | 2^64G
1351 * 3 | 0 0 1 1 | (2^64 + 1)G
1352 * 4 | 0 1 0 0 | 2^128G
1353 * 5 | 0 1 0 1 | (2^128 + 1)G
1354 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1355 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1356 * 8 | 1 0 0 0 | 2^192G
1357 * 9 | 1 0 0 1 | (2^192 + 1)G
1358 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1359 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1360 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1361 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1362 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1363 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1364 * followed by a copy of this with each element multiplied by 2^32.
1366 * The reason for this is so that we can clock bits into four different
1367 * locations when doing simple scalar multiplies against the base point,
1368 * and then another four locations using the second 16 elements.
1370 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1372 /* gmul is the table of precomputed base points */
1373 static const smallfelem gmul[2][16][3] =
1377 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247},
1378 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b},
1380 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5},
1381 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d},
1383 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f},
1384 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644},
1386 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67},
1387 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee},
1389 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff},
1390 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b},
1392 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8},
1393 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851},
1395 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea},
1396 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b},
1398 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276},
1399 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816},
1401 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad},
1402 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663},
1404 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d},
1405 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321},
1407 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287},
1408 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6},
1410 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466},
1411 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20},
1413 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9},
1414 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61},
1416 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a},
1417 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc},
1419 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c},
1420 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab},
1425 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89},
1426 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624},
1428 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6},
1429 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1},
1431 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a},
1432 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593},
1434 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617},
1435 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7},
1437 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276},
1438 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a},
1440 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908},
1441 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e},
1443 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7},
1444 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec},
1446 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee},
1447 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6},
1449 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109},
1450 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5},
1452 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba},
1453 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44},
1455 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b},
1456 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc},
1458 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107},
1459 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387},
1461 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503},
1462 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be},
1464 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9},
1465 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a},
1467 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6},
1468 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81},
1471 /* select_point selects the |idx|th point from a precomputation table and
1472 * copies it to out. */
1473 static void select_point(const u64 idx, unsigned int size, const smallfelem pre_comp[16][3], smallfelem out[3])
1476 u64 *outlimbs = &out[0][0];
1477 memset(outlimbs, 0, 3 * sizeof(smallfelem));
1479 for (i = 0; i < size; i++)
1481 const u64 *inlimbs = (u64*) &pre_comp[i][0][0];
1488 for (j = 0; j < NLIMBS * 3; j++)
1489 outlimbs[j] |= inlimbs[j] & mask;
1493 /* get_bit returns the |i|th bit in |in| */
1494 static char get_bit(const felem_bytearray in, int i)
1496 if ((i < 0) || (i >= 256))
1498 return (in[i >> 3] >> (i & 7)) & 1;
1501 /* Interleaved point multiplication using precomputed point multiples:
1502 * The small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[],
1503 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1504 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1505 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1506 static void batch_mul(felem x_out, felem y_out, felem z_out,
1507 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1508 const int mixed, const smallfelem pre_comp[][17][3], const smallfelem g_pre_comp[2][16][3])
1511 unsigned num, gen_mul = (g_scalar != NULL);
1517 /* set nq to the point at infinity */
1518 memset(nq, 0, 3 * sizeof(felem));
1520 /* Loop over all scalars msb-to-lsb, interleaving additions
1521 * of multiples of the generator (two in each of the last 32 rounds)
1522 * and additions of other points multiples (every 5th round).
1524 skip = 1; /* save two point operations in the first round */
1525 for (i = (num_points ? 255 : 31); i >= 0; --i)
1529 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1531 /* add multiples of the generator */
1532 if (gen_mul && (i <= 31))
1534 /* first, look 32 bits upwards */
1535 bits = get_bit(g_scalar, i + 224) << 3;
1536 bits |= get_bit(g_scalar, i + 160) << 2;
1537 bits |= get_bit(g_scalar, i + 96) << 1;
1538 bits |= get_bit(g_scalar, i + 32);
1539 /* select the point to add, in constant time */
1540 select_point(bits, 16, g_pre_comp[1], tmp);
1544 point_add(nq[0], nq[1], nq[2],
1545 nq[0], nq[1], nq[2],
1546 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1550 smallfelem_expand(nq[0], tmp[0]);
1551 smallfelem_expand(nq[1], tmp[1]);
1552 smallfelem_expand(nq[2], tmp[2]);
1556 /* second, look at the current position */
1557 bits = get_bit(g_scalar, i + 192) << 3;
1558 bits |= get_bit(g_scalar, i + 128) << 2;
1559 bits |= get_bit(g_scalar, i + 64) << 1;
1560 bits |= get_bit(g_scalar, i);
1561 /* select the point to add, in constant time */
1562 select_point(bits, 16, g_pre_comp[0], tmp);
1563 point_add(nq[0], nq[1], nq[2],
1564 nq[0], nq[1], nq[2],
1565 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1568 /* do other additions every 5 doublings */
1569 if (num_points && (i % 5 == 0))
1571 /* loop over all scalars */
1572 for (num = 0; num < num_points; ++num)
1574 bits = get_bit(scalars[num], i + 4) << 5;
1575 bits |= get_bit(scalars[num], i + 3) << 4;
1576 bits |= get_bit(scalars[num], i + 2) << 3;
1577 bits |= get_bit(scalars[num], i + 1) << 2;
1578 bits |= get_bit(scalars[num], i) << 1;
1579 bits |= get_bit(scalars[num], i - 1);
1580 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1582 /* select the point to add or subtract, in constant time */
1583 select_point(digit, 17, pre_comp[num], tmp);
1584 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative point */
1585 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1586 felem_contract(tmp[1], ftmp);
1590 point_add(nq[0], nq[1], nq[2],
1591 nq[0], nq[1], nq[2],
1592 mixed, tmp[0], tmp[1], tmp[2]);
1596 smallfelem_expand(nq[0], tmp[0]);
1597 smallfelem_expand(nq[1], tmp[1]);
1598 smallfelem_expand(nq[2], tmp[2]);
1604 felem_assign(x_out, nq[0]);
1605 felem_assign(y_out, nq[1]);
1606 felem_assign(z_out, nq[2]);
1609 /* Precomputation for the group generator. */
1611 smallfelem g_pre_comp[2][16][3];
1613 } NISTP256_PRE_COMP;
1615 const EC_METHOD *EC_GFp_nistp256_method(void)
1617 static const EC_METHOD ret = {
1618 EC_FLAGS_DEFAULT_OCT,
1619 NID_X9_62_prime_field,
1620 ec_GFp_nistp256_group_init,
1621 ec_GFp_simple_group_finish,
1622 ec_GFp_simple_group_clear_finish,
1623 ec_GFp_nist_group_copy,
1624 ec_GFp_nistp256_group_set_curve,
1625 ec_GFp_simple_group_get_curve,
1626 ec_GFp_simple_group_get_degree,
1627 ec_GFp_simple_group_check_discriminant,
1628 ec_GFp_simple_point_init,
1629 ec_GFp_simple_point_finish,
1630 ec_GFp_simple_point_clear_finish,
1631 ec_GFp_simple_point_copy,
1632 ec_GFp_simple_point_set_to_infinity,
1633 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1634 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1635 ec_GFp_simple_point_set_affine_coordinates,
1636 ec_GFp_nistp256_point_get_affine_coordinates,
1637 0 /* point_set_compressed_coordinates */,
1642 ec_GFp_simple_invert,
1643 ec_GFp_simple_is_at_infinity,
1644 ec_GFp_simple_is_on_curve,
1646 ec_GFp_simple_make_affine,
1647 ec_GFp_simple_points_make_affine,
1648 ec_GFp_nistp256_points_mul,
1649 ec_GFp_nistp256_precompute_mult,
1650 ec_GFp_nistp256_have_precompute_mult,
1651 ec_GFp_nist_field_mul,
1652 ec_GFp_nist_field_sqr,
1654 0 /* field_encode */,
1655 0 /* field_decode */,
1656 0 /* field_set_to_one */ };
1661 /******************************************************************************/
1662 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1665 static NISTP256_PRE_COMP *nistp256_pre_comp_new()
1667 NISTP256_PRE_COMP *ret = NULL;
1668 ret = (NISTP256_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1671 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1674 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1675 ret->references = 1;
1679 static void *nistp256_pre_comp_dup(void *src_)
1681 NISTP256_PRE_COMP *src = src_;
1683 /* no need to actually copy, these objects never change! */
1684 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1689 static void nistp256_pre_comp_free(void *pre_)
1692 NISTP256_PRE_COMP *pre = pre_;
1697 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1704 static void nistp256_pre_comp_clear_free(void *pre_)
1707 NISTP256_PRE_COMP *pre = pre_;
1712 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1716 OPENSSL_cleanse(pre, sizeof *pre);
1720 /******************************************************************************/
1721 /* OPENSSL EC_METHOD FUNCTIONS
1724 int ec_GFp_nistp256_group_init(EC_GROUP *group)
1727 ret = ec_GFp_simple_group_init(group);
1728 group->a_is_minus3 = 1;
1732 int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1733 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1736 BN_CTX *new_ctx = NULL;
1737 BIGNUM *curve_p, *curve_a, *curve_b;
1740 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1742 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1743 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1744 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1745 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1746 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1747 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1748 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1749 (BN_cmp(curve_b, b)))
1751 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1752 EC_R_WRONG_CURVE_PARAMETERS);
1755 group->field_mod_func = BN_nist_mod_256;
1756 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1759 if (new_ctx != NULL)
1760 BN_CTX_free(new_ctx);
1764 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1765 * (X', Y') = (X/Z^2, Y/Z^3) */
1766 int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1767 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1769 felem z1, z2, x_in, y_in;
1770 smallfelem x_out, y_out;
1773 if (EC_POINT_is_at_infinity(group, point))
1775 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1776 EC_R_POINT_AT_INFINITY);
1779 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1780 (!BN_to_felem(z1, &point->Z))) return 0;
1782 felem_square(tmp, z2); felem_reduce(z1, tmp);
1783 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1784 felem_contract(x_out, x_in);
1787 if (!smallfelem_to_BN(x, x_out)) {
1788 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1793 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1794 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1795 felem_contract(y_out, y_in);
1798 if (!smallfelem_to_BN(y, y_out))
1800 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1808 static void make_points_affine(size_t num, smallfelem points[/* num */][3], smallfelem tmp_smallfelems[/* num+1 */])
1810 /* Runs in constant time, unless an input is the point at infinity
1811 * (which normally shouldn't happen). */
1812 ec_GFp_nistp_points_make_affine_internal(
1817 (void (*)(void *)) smallfelem_one,
1818 (int (*)(const void *)) smallfelem_is_zero_int,
1819 (void (*)(void *, const void *)) smallfelem_assign,
1820 (void (*)(void *, const void *)) smallfelem_square_contract,
1821 (void (*)(void *, const void *, const void *)) smallfelem_mul_contract,
1822 (void (*)(void *, const void *)) smallfelem_inv_contract,
1823 (void (*)(void *, const void *)) smallfelem_assign /* nothing to contract */);
1826 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1827 * Result is stored in r (r can equal one of the inputs). */
1828 int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
1829 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1830 const BIGNUM *scalars[], BN_CTX *ctx)
1835 BN_CTX *new_ctx = NULL;
1836 BIGNUM *x, *y, *z, *tmp_scalar;
1837 felem_bytearray g_secret;
1838 felem_bytearray *secrets = NULL;
1839 smallfelem (*pre_comp)[17][3] = NULL;
1840 smallfelem *tmp_smallfelems = NULL;
1841 felem_bytearray tmp;
1842 unsigned i, num_bytes;
1843 int have_pre_comp = 0;
1844 size_t num_points = num;
1845 smallfelem x_in, y_in, z_in;
1846 felem x_out, y_out, z_out;
1847 NISTP256_PRE_COMP *pre = NULL;
1848 const smallfelem (*g_pre_comp)[16][3] = NULL;
1849 EC_POINT *generator = NULL;
1850 const EC_POINT *p = NULL;
1851 const BIGNUM *p_scalar = NULL;
1854 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1856 if (((x = BN_CTX_get(ctx)) == NULL) ||
1857 ((y = BN_CTX_get(ctx)) == NULL) ||
1858 ((z = BN_CTX_get(ctx)) == NULL) ||
1859 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1864 pre = EC_EX_DATA_get_data(group->extra_data,
1865 nistp256_pre_comp_dup, nistp256_pre_comp_free,
1866 nistp256_pre_comp_clear_free);
1868 /* we have precomputation, try to use it */
1869 g_pre_comp = (const smallfelem (*)[16][3]) pre->g_pre_comp;
1871 /* try to use the standard precomputation */
1872 g_pre_comp = &gmul[0];
1873 generator = EC_POINT_new(group);
1874 if (generator == NULL)
1876 /* get the generator from precomputation */
1877 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
1878 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
1879 !smallfelem_to_BN(z, g_pre_comp[0][1][2]))
1881 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1884 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1885 generator, x, y, z, ctx))
1887 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1888 /* precomputation matches generator */
1891 /* we don't have valid precomputation:
1892 * treat the generator as a random point */
1897 if (num_points >= 3)
1899 /* unless we precompute multiples for just one or two points,
1900 * converting those into affine form is time well spent */
1903 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1904 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(smallfelem));
1906 tmp_smallfelems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(smallfelem));
1907 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_smallfelems == NULL)))
1909 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1913 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1914 * i.e., they contribute nothing to the linear combination */
1915 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1916 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(smallfelem));
1917 for (i = 0; i < num_points; ++i)
1920 /* we didn't have a valid precomputation, so we pick
1923 p = EC_GROUP_get0_generator(group);
1927 /* the i^th point */
1930 p_scalar = scalars[i];
1932 if ((p_scalar != NULL) && (p != NULL))
1934 /* reduce scalar to 0 <= scalar < 2^256 */
1935 if ((BN_num_bits(p_scalar) > 256) || (BN_is_negative(p_scalar)))
1937 /* this is an unusual input, and we don't guarantee
1938 * constant-timeness */
1939 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1941 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1944 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1947 num_bytes = BN_bn2bin(p_scalar, tmp);
1948 flip_endian(secrets[i], tmp, num_bytes);
1949 /* precompute multiples */
1950 if ((!BN_to_felem(x_out, &p->X)) ||
1951 (!BN_to_felem(y_out, &p->Y)) ||
1952 (!BN_to_felem(z_out, &p->Z))) goto err;
1953 felem_shrink(pre_comp[i][1][0], x_out);
1954 felem_shrink(pre_comp[i][1][1], y_out);
1955 felem_shrink(pre_comp[i][1][2], z_out);
1956 for (j = 2; j <= 16; ++j)
1961 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1962 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1963 pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1968 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1969 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1975 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
1978 /* the scalar for the generator */
1979 if ((scalar != NULL) && (have_pre_comp))
1981 memset(g_secret, 0, sizeof(g_secret));
1982 /* reduce scalar to 0 <= scalar < 2^256 */
1983 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar)))
1985 /* this is an unusual input, and we don't guarantee
1986 * constant-timeness */
1987 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1989 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1992 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1995 num_bytes = BN_bn2bin(scalar, tmp);
1996 flip_endian(g_secret, tmp, num_bytes);
1997 /* do the multiplication with generator precomputation*/
1998 batch_mul(x_out, y_out, z_out,
1999 (const felem_bytearray (*)) secrets, num_points,
2001 mixed, (const smallfelem (*)[17][3]) pre_comp,
2005 /* do the multiplication without generator precomputation */
2006 batch_mul(x_out, y_out, z_out,
2007 (const felem_bytearray (*)) secrets, num_points,
2008 NULL, mixed, (const smallfelem (*)[17][3]) pre_comp, NULL);
2009 /* reduce the output to its unique minimal representation */
2010 felem_contract(x_in, x_out);
2011 felem_contract(y_in, y_out);
2012 felem_contract(z_in, z_out);
2013 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2014 (!smallfelem_to_BN(z, z_in)))
2016 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2019 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2023 if (generator != NULL)
2024 EC_POINT_free(generator);
2025 if (new_ctx != NULL)
2026 BN_CTX_free(new_ctx);
2027 if (secrets != NULL)
2028 OPENSSL_free(secrets);
2029 if (pre_comp != NULL)
2030 OPENSSL_free(pre_comp);
2031 if (tmp_smallfelems != NULL)
2032 OPENSSL_free(tmp_smallfelems);
2036 int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2039 NISTP256_PRE_COMP *pre = NULL;
2041 BN_CTX *new_ctx = NULL;
2043 EC_POINT *generator = NULL;
2044 smallfelem tmp_smallfelems[32];
2045 felem x_tmp, y_tmp, z_tmp;
2047 /* throw away old precomputation */
2048 EC_EX_DATA_free_data(&group->extra_data, nistp256_pre_comp_dup,
2049 nistp256_pre_comp_free, nistp256_pre_comp_clear_free);
2051 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
2053 if (((x = BN_CTX_get(ctx)) == NULL) ||
2054 ((y = BN_CTX_get(ctx)) == NULL))
2056 /* get the generator */
2057 if (group->generator == NULL) goto err;
2058 generator = EC_POINT_new(group);
2059 if (generator == NULL)
2061 BN_bin2bn(nistp256_curve_params[3], sizeof (felem_bytearray), x);
2062 BN_bin2bn(nistp256_curve_params[4], sizeof (felem_bytearray), y);
2063 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2065 if ((pre = nistp256_pre_comp_new()) == NULL)
2067 /* if the generator is the standard one, use built-in precomputation */
2068 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2070 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2074 if ((!BN_to_felem(x_tmp, &group->generator->X)) ||
2075 (!BN_to_felem(y_tmp, &group->generator->Y)) ||
2076 (!BN_to_felem(z_tmp, &group->generator->Z)))
2078 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2079 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2080 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2081 /* compute 2^64*G, 2^128*G, 2^192*G for the first table,
2082 * 2^32*G, 2^96*G, 2^160*G, 2^224*G for the second one
2084 for (i = 1; i <= 8; i <<= 1)
2087 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2088 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
2089 for (j = 0; j < 31; ++j)
2092 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2093 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2098 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2099 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2100 for (j = 0; j < 31; ++j)
2103 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2104 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
2107 for (i = 0; i < 2; i++)
2109 /* g_pre_comp[i][0] is the point at infinity */
2110 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2111 /* the remaining multiples */
2112 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2114 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], pre->g_pre_comp[i][6][2],
2115 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2116 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2117 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2119 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], pre->g_pre_comp[i][10][2],
2120 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2121 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2122 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2124 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2125 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2126 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2]);
2127 /* 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G */
2129 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], pre->g_pre_comp[i][14][2],
2130 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2131 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2132 for (j = 1; j < 8; ++j)
2134 /* odd multiples: add G resp. 2^32*G */
2136 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1], pre->g_pre_comp[i][2*j+1][2],
2137 pre->g_pre_comp[i][2*j][0], pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
2138 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], pre->g_pre_comp[i][1][2]);
2141 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2143 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup,
2144 nistp256_pre_comp_free, nistp256_pre_comp_clear_free))
2150 if (generator != NULL)
2151 EC_POINT_free(generator);
2152 if (new_ctx != NULL)
2153 BN_CTX_free(new_ctx);
2155 nistp256_pre_comp_free(pre);
2159 int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2161 if (EC_EX_DATA_get_data(group->extra_data, nistp256_pre_comp_dup,
2162 nistp256_pre_comp_free, nistp256_pre_comp_clear_free)
2169 static void *dummy=&dummy;