1 /* crypto/ec/ecp_nistp256.c */
3 * Written by Adam Langley (Google) for the OpenSSL project
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
29 #include <openssl/opensslconf.h>
30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
34 #include <openssl/err.h>
37 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
38 /* even with gcc, the typedef won't work for 32-bit platforms */
39 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
40 typedef __int128_t int128_t;
42 #error "Need GCC 3.1 or later to define type uint128_t"
50 /* The underlying field.
52 * P256 operates over GF(2^256-2^224+2^192+2^96-1). We can serialise an element
53 * of this field into 32 bytes. We call this an felem_bytearray. */
55 typedef u8 felem_bytearray[32];
57 /* These are the parameters of P256, taken from FIPS 186-3, page 86. These
58 * values are big-endian. */
59 static const felem_bytearray nistp256_curve_params[5] = {
60 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
61 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
62 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
63 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
64 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
65 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
66 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
68 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
69 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
70 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
71 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
72 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
73 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
74 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
75 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
76 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
77 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
78 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
79 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
83 * The representation of field elements.
84 * ------------------------------------
86 * We represent field elements with either four 128-bit values, eight 128-bit
87 * values, or four 64-bit values. The field element represented is:
88 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
90 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
92 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
93 * apart, but are 128-bits wide, the most significant bits of each limb overlap
94 * with the least significant bits of the next.
96 * A field element with four limbs is an 'felem'. One with eight limbs is a
99 * A field element with four, 64-bit values is called a 'smallfelem'. Small
100 * values are used as intermediate values before multiplication.
105 typedef uint128_t limb;
106 typedef limb felem[NLIMBS];
107 typedef limb longfelem[NLIMBS * 2];
108 typedef u64 smallfelem[NLIMBS];
110 /* This is the value of the prime as four 64-bit words, little-endian. */
111 static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
112 static const u64 bottom63bits = 0x7ffffffffffffffful;
114 /* bin32_to_felem takes a little-endian byte array and converts it into felem
115 * form. This assumes that the CPU is little-endian. */
116 static void bin32_to_felem(felem out, const u8 in[32])
118 out[0] = *((u64*) &in[0]);
119 out[1] = *((u64*) &in[8]);
120 out[2] = *((u64*) &in[16]);
121 out[3] = *((u64*) &in[24]);
124 /* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian,
125 * 32 byte array. This assumes that the CPU is little-endian. */
126 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
128 *((u64*) &out[0]) = in[0];
129 *((u64*) &out[8]) = in[1];
130 *((u64*) &out[16]) = in[2];
131 *((u64*) &out[24]) = in[3];
134 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
135 static void flip_endian(u8 *out, const u8 *in, unsigned len)
138 for (i = 0; i < len; ++i)
139 out[i] = in[len-1-i];
142 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
143 static int BN_to_felem(felem out, const BIGNUM *bn)
145 felem_bytearray b_in;
146 felem_bytearray b_out;
149 /* BN_bn2bin eats leading zeroes */
150 memset(b_out, 0, sizeof b_out);
151 num_bytes = BN_num_bytes(bn);
152 if (num_bytes > sizeof b_out)
154 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
157 if (BN_is_negative(bn))
159 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
162 num_bytes = BN_bn2bin(bn, b_in);
163 flip_endian(b_out, b_in, num_bytes);
164 bin32_to_felem(out, b_out);
168 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
169 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
171 felem_bytearray b_in, b_out;
172 smallfelem_to_bin32(b_in, in);
173 flip_endian(b_out, b_in, sizeof b_out);
174 return BN_bin2bn(b_out, sizeof b_out, out);
183 static void smallfelem_one(smallfelem out)
191 static void smallfelem_assign(smallfelem out, const smallfelem in)
199 static void felem_assign(felem out, const felem in)
207 /* felem_sum sets out = out + in. */
208 static void felem_sum(felem out, const felem in)
216 /* felem_small_sum sets out = out + in. */
217 static void felem_small_sum(felem out, const smallfelem in)
225 /* felem_scalar sets out = out * scalar */
226 static void felem_scalar(felem out, const u64 scalar)
234 /* longfelem_scalar sets out = out * scalar */
235 static void longfelem_scalar(longfelem out, const u64 scalar)
247 #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
248 #define two105 (((limb)1) << 105)
249 #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
251 /* zero105 is 0 mod p */
252 static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
255 * smallfelem_neg sets |out| to |-small|
257 * out[i] < out[i] + 2^105
259 static void smallfelem_neg(felem out, const smallfelem small)
261 /* In order to prevent underflow, we subtract from 0 mod p. */
262 out[0] = zero105[0] - small[0];
263 out[1] = zero105[1] - small[1];
264 out[2] = zero105[2] - small[2];
265 out[3] = zero105[3] - small[3];
269 * felem_diff subtracts |in| from |out|
273 * out[i] < out[i] + 2^105
275 static void felem_diff(felem out, const felem in)
277 /* In order to prevent underflow, we add 0 mod p before subtracting. */
278 out[0] += zero105[0];
279 out[1] += zero105[1];
280 out[2] += zero105[2];
281 out[3] += zero105[3];
289 #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
290 #define two107 (((limb)1) << 107)
291 #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
293 /* zero107 is 0 mod p */
294 static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 };
297 * An alternative felem_diff for larger inputs |in|
298 * felem_diff_zero107 subtracts |in| from |out|
302 * out[i] < out[i] + 2^107
304 static void felem_diff_zero107(felem out, const felem in)
306 /* In order to prevent underflow, we add 0 mod p before subtracting. */
307 out[0] += zero107[0];
308 out[1] += zero107[1];
309 out[2] += zero107[2];
310 out[3] += zero107[3];
319 * longfelem_diff subtracts |in| from |out|
323 * out[i] < out[i] + 2^70 + 2^40
325 static void longfelem_diff(longfelem out, const longfelem in)
327 static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6);
328 static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40);
329 static const limb two70 = (((limb)1) << 70);
330 static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6);
331 static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6);
333 /* add 0 mod p to avoid underflow */
337 out[3] += two70m40m38p6;
343 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
354 #define two64m0 (((limb)1) << 64) - 1
355 #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
356 #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
357 #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
359 /* zero110 is 0 mod p */
360 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
363 * felem_shrink converts an felem into a smallfelem. The result isn't quite
364 * minimal as the value may be greater than p.
371 static void felem_shrink(smallfelem out, const felem in)
376 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
379 tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64));
382 tmp[2] = zero110[2] + (u64) in[2];
383 tmp[0] = zero110[0] + in[0];
384 tmp[1] = zero110[1] + in[1];
385 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
387 /* We perform two partial reductions where we eliminate the
388 * high-word of tmp[3]. We don't update the other words till the end.
390 a = tmp[3] >> 64; /* a < 2^46 */
391 tmp[3] = (u64) tmp[3];
393 tmp[3] += ((limb)a) << 32;
397 a = tmp[3] >> 64; /* a < 2^15 */
398 b += a; /* b < 2^46 + 2^15 < 2^47 */
399 tmp[3] = (u64) tmp[3];
401 tmp[3] += ((limb)a) << 32;
402 /* tmp[3] < 2^64 + 2^47 */
404 /* This adjusts the other two words to complete the two partial
407 tmp[1] -= (((limb)b) << 32);
409 /* In order to make space in tmp[3] for the carry from 2 -> 3, we
410 * conditionally subtract kPrime if tmp[3] is large enough. */
412 /* As tmp[3] < 2^65, high is either 1 or 0 */
417 * all ones if the high word of tmp[3] is 1
418 * all zeros if the high word of tmp[3] if 0 */
423 * all ones if the MSB of low is 1
424 * all zeros if the MSB of low if 0 */
427 /* if low was greater than kPrime3Test then the MSB is zero */
432 * all ones if low was > kPrime3Test
433 * all zeros if low was <= kPrime3Test */
434 mask = (mask & low) | high;
435 tmp[0] -= mask & kPrime[0];
436 tmp[1] -= mask & kPrime[1];
437 /* kPrime[2] is zero, so omitted */
438 tmp[3] -= mask & kPrime[3];
439 /* tmp[3] < 2**64 - 2**32 + 1 */
441 tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0];
442 tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1];
443 tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2];
452 /* smallfelem_expand converts a smallfelem to an felem */
453 static void smallfelem_expand(felem out, const smallfelem in)
462 * smallfelem_square sets |out| = |small|^2
466 * out[i] < 7 * 2^64 < 2^67
468 static void smallfelem_square(longfelem out, const smallfelem small)
473 a = ((uint128_t) small[0]) * small[0];
479 a = ((uint128_t) small[0]) * small[1];
486 a = ((uint128_t) small[0]) * small[2];
493 a = ((uint128_t) small[0]) * small[3];
499 a = ((uint128_t) small[1]) * small[2];
506 a = ((uint128_t) small[1]) * small[1];
512 a = ((uint128_t) small[1]) * small[3];
519 a = ((uint128_t) small[2]) * small[3];
527 a = ((uint128_t) small[2]) * small[2];
533 a = ((uint128_t) small[3]) * small[3];
541 * felem_square sets |out| = |in|^2
545 * out[i] < 7 * 2^64 < 2^67
547 static void felem_square(longfelem out, const felem in)
550 felem_shrink(small, in);
551 smallfelem_square(out, small);
555 * smallfelem_mul sets |out| = |small1| * |small2|
560 * out[i] < 7 * 2^64 < 2^67
562 static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2)
567 a = ((uint128_t) small1[0]) * small2[0];
574 a = ((uint128_t) small1[0]) * small2[1];
580 a = ((uint128_t) small1[1]) * small2[0];
587 a = ((uint128_t) small1[0]) * small2[2];
593 a = ((uint128_t) small1[1]) * small2[1];
599 a = ((uint128_t) small1[2]) * small2[0];
606 a = ((uint128_t) small1[0]) * small2[3];
612 a = ((uint128_t) small1[1]) * small2[2];
618 a = ((uint128_t) small1[2]) * small2[1];
624 a = ((uint128_t) small1[3]) * small2[0];
631 a = ((uint128_t) small1[1]) * small2[3];
637 a = ((uint128_t) small1[2]) * small2[2];
643 a = ((uint128_t) small1[3]) * small2[1];
650 a = ((uint128_t) small1[2]) * small2[3];
656 a = ((uint128_t) small1[3]) * small2[2];
663 a = ((uint128_t) small1[3]) * small2[3];
671 * felem_mul sets |out| = |in1| * |in2|
676 * out[i] < 7 * 2^64 < 2^67
678 static void felem_mul(longfelem out, const felem in1, const felem in2)
680 smallfelem small1, small2;
681 felem_shrink(small1, in1);
682 felem_shrink(small2, in2);
683 smallfelem_mul(out, small1, small2);
687 * felem_small_mul sets |out| = |small1| * |in2|
692 * out[i] < 7 * 2^64 < 2^67
694 static void felem_small_mul(longfelem out, const smallfelem small1, const felem in2)
697 felem_shrink(small2, in2);
698 smallfelem_mul(out, small1, small2);
701 #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
702 #define two100 (((limb)1) << 100)
703 #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
704 /* zero100 is 0 mod p */
705 static const felem zero100 = { two100m36m4, two100, two100m36p4, two100m36p4 };
708 * Internal function for the different flavours of felem_reduce.
709 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
711 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
712 * out[1] >= in[7] + 2^32*in[4]
713 * out[2] >= in[5] + 2^32*in[5]
714 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
716 * out[0] <= out[0] + in[4] + 2^32*in[5]
717 * out[1] <= out[1] + in[5] + 2^33*in[6]
718 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
719 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
721 static void felem_reduce_(felem out, const longfelem in)
724 /* combine common terms from below */
725 c = in[4] + (in[5] << 32);
733 /* the remaining terms */
734 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
735 out[1] -= (in[4] << 32);
736 out[3] += (in[4] << 32);
738 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
739 out[2] -= (in[5] << 32);
741 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
743 out[0] -= (in[6] << 32);
744 out[1] += (in[6] << 33);
745 out[2] += (in[6] * 2);
746 out[3] -= (in[6] << 32);
748 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
750 out[0] -= (in[7] << 32);
751 out[2] += (in[7] << 33);
752 out[3] += (in[7] * 3);
756 * felem_reduce converts a longfelem into an felem.
757 * To be called directly after felem_square or felem_mul.
759 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
760 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
764 static void felem_reduce(felem out, const longfelem in)
766 out[0] = zero100[0] + in[0];
767 out[1] = zero100[1] + in[1];
768 out[2] = zero100[2] + in[2];
769 out[3] = zero100[3] + in[3];
771 felem_reduce_(out, in);
774 * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
775 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
776 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
777 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
779 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
780 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
781 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
782 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
787 * felem_reduce_zero105 converts a larger longfelem into an felem.
793 static void felem_reduce_zero105(felem out, const longfelem in)
795 out[0] = zero105[0] + in[0];
796 out[1] = zero105[1] + in[1];
797 out[2] = zero105[2] + in[2];
798 out[3] = zero105[3] + in[3];
800 felem_reduce_(out, in);
803 * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
804 * out[1] > 2^105 - 2^71 - 2^103 > 0
805 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
806 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
808 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
809 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
810 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
811 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
815 /* subtract_u64 sets *result = *result - v and *carry to one if the subtraction
817 static void subtract_u64(u64* result, u64* carry, u64 v)
819 uint128_t r = *result;
821 *carry = (r >> 64) & 1;
825 /* felem_contract converts |in| to its unique, minimal representation.
829 static void felem_contract(smallfelem out, const felem in)
832 u64 all_equal_so_far = 0, result = 0, carry;
834 felem_shrink(out, in);
835 /* small is minimal except that the value might be > p */
838 /* We are doing a constant time test if out >= kPrime. We need to
839 * compare each u64, from most-significant to least significant. For
840 * each one, if all words so far have been equal (m is all ones) then a
841 * non-equal result is the answer. Otherwise we continue. */
842 for (i = 3; i < 4; i--)
845 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
846 /* if out[i] > kPrime[i] then a will underflow and the high
847 * 64-bits will all be set. */
848 result |= all_equal_so_far & ((u64) (a >> 64));
850 /* if kPrime[i] == out[i] then |equal| will be all zeros and
851 * the decrement will make it all ones. */
852 equal = kPrime[i] ^ out[i];
854 equal &= equal << 32;
855 equal &= equal << 16;
860 equal = ((s64) equal) >> 63;
862 all_equal_so_far &= equal;
865 /* if all_equal_so_far is still all ones then the two values are equal
866 * and so out >= kPrime is true. */
867 result |= all_equal_so_far;
869 /* if out >= kPrime then we subtract kPrime. */
870 subtract_u64(&out[0], &carry, result & kPrime[0]);
871 subtract_u64(&out[1], &carry, carry);
872 subtract_u64(&out[2], &carry, carry);
873 subtract_u64(&out[3], &carry, carry);
875 subtract_u64(&out[1], &carry, result & kPrime[1]);
876 subtract_u64(&out[2], &carry, carry);
877 subtract_u64(&out[3], &carry, carry);
879 subtract_u64(&out[2], &carry, result & kPrime[2]);
880 subtract_u64(&out[3], &carry, carry);
882 subtract_u64(&out[3], &carry, result & kPrime[3]);
885 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
890 smallfelem_square(longtmp, in);
891 felem_reduce(tmp, longtmp);
892 felem_contract(out, tmp);
895 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1, const smallfelem in2)
900 smallfelem_mul(longtmp, in1, in2);
901 felem_reduce(tmp, longtmp);
902 felem_contract(out, tmp);
906 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
911 static limb smallfelem_is_zero(const smallfelem small)
916 u64 is_zero = small[0] | small[1] | small[2] | small[3];
918 is_zero &= is_zero << 32;
919 is_zero &= is_zero << 16;
920 is_zero &= is_zero << 8;
921 is_zero &= is_zero << 4;
922 is_zero &= is_zero << 2;
923 is_zero &= is_zero << 1;
924 is_zero = ((s64) is_zero) >> 63;
926 is_p = (small[0] ^ kPrime[0]) |
927 (small[1] ^ kPrime[1]) |
928 (small[2] ^ kPrime[2]) |
929 (small[3] ^ kPrime[3]);
937 is_p = ((s64) is_p) >> 63;
942 result |= ((limb) is_zero) << 64;
946 static int smallfelem_is_zero_int(const smallfelem small)
948 return (int) (smallfelem_is_zero(small) & ((limb)1));
952 * felem_inv calculates |out| = |in|^{-1}
954 * Based on Fermat's Little Theorem:
956 * a^{p-1} = 1 (mod p)
957 * a^{p-2} = a^{-1} (mod p)
959 static void felem_inv(felem out, const felem in)
962 /* each e_I will hold |in|^{2^I - 1} */
963 felem e2, e4, e8, e16, e32, e64;
967 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
968 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
969 felem_assign(e2, ftmp);
970 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
971 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
972 felem_mul(tmp, ftmp, e2); felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
973 felem_assign(e4, ftmp);
974 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
975 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
976 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
977 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
978 felem_mul(tmp, ftmp, e4); felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
979 felem_assign(e8, ftmp);
980 for (i = 0; i < 8; i++) {
981 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
983 felem_mul(tmp, ftmp, e8); felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
984 felem_assign(e16, ftmp);
985 for (i = 0; i < 16; i++) {
986 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
988 felem_mul(tmp, ftmp, e16); felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
989 felem_assign(e32, ftmp);
990 for (i = 0; i < 32; i++) {
991 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
993 felem_assign(e64, ftmp);
994 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
995 for (i = 0; i < 192; i++) {
996 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
997 } /* 2^256 - 2^224 + 2^192 */
999 felem_mul(tmp, e64, e32); felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
1000 for (i = 0; i < 16; i++) {
1001 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
1003 felem_mul(tmp, ftmp2, e16); felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
1004 for (i = 0; i < 8; i++) {
1005 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
1007 felem_mul(tmp, ftmp2, e8); felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
1008 for (i = 0; i < 4; i++) {
1009 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
1011 felem_mul(tmp, ftmp2, e4); felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
1012 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
1013 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
1014 felem_mul(tmp, ftmp2, e2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
1015 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
1016 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1017 felem_mul(tmp, ftmp2, in); felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1019 felem_mul(tmp, ftmp2, ftmp); felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1022 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1026 smallfelem_expand(tmp, in);
1027 felem_inv(tmp, tmp);
1028 felem_contract(out, tmp);
1035 * Building on top of the field operations we have the operations on the
1036 * elliptic curve group itself. Points on the curve are represented in Jacobian
1040 * point_double calculates 2*(x_in, y_in, z_in)
1042 * The method is taken from:
1043 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1045 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1046 * while x_out == y_in is not (maybe this works, but it's not tested). */
1048 point_double(felem x_out, felem y_out, felem z_out,
1049 const felem x_in, const felem y_in, const felem z_in)
1051 longfelem tmp, tmp2;
1052 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1053 smallfelem small1, small2;
1055 felem_assign(ftmp, x_in);
1056 /* ftmp[i] < 2^106 */
1057 felem_assign(ftmp2, x_in);
1058 /* ftmp2[i] < 2^106 */
1061 felem_square(tmp, z_in);
1062 felem_reduce(delta, tmp);
1063 /* delta[i] < 2^101 */
1066 felem_square(tmp, y_in);
1067 felem_reduce(gamma, tmp);
1068 /* gamma[i] < 2^101 */
1069 felem_shrink(small1, gamma);
1071 /* beta = x*gamma */
1072 felem_small_mul(tmp, small1, x_in);
1073 felem_reduce(beta, tmp);
1074 /* beta[i] < 2^101 */
1076 /* alpha = 3*(x-delta)*(x+delta) */
1077 felem_diff(ftmp, delta);
1078 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1079 felem_sum(ftmp2, delta);
1080 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1081 felem_scalar(ftmp2, 3);
1082 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1083 felem_mul(tmp, ftmp, ftmp2);
1084 felem_reduce(alpha, tmp);
1085 /* alpha[i] < 2^101 */
1086 felem_shrink(small2, alpha);
1088 /* x' = alpha^2 - 8*beta */
1089 smallfelem_square(tmp, small2);
1090 felem_reduce(x_out, tmp);
1091 felem_assign(ftmp, beta);
1092 felem_scalar(ftmp, 8);
1093 /* ftmp[i] < 8 * 2^101 = 2^104 */
1094 felem_diff(x_out, ftmp);
1095 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1097 /* z' = (y + z)^2 - gamma - delta */
1098 felem_sum(delta, gamma);
1099 /* delta[i] < 2^101 + 2^101 = 2^102 */
1100 felem_assign(ftmp, y_in);
1101 felem_sum(ftmp, z_in);
1102 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1103 felem_square(tmp, ftmp);
1104 felem_reduce(z_out, tmp);
1105 felem_diff(z_out, delta);
1106 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1108 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1109 felem_scalar(beta, 4);
1110 /* beta[i] < 4 * 2^101 = 2^103 */
1111 felem_diff_zero107(beta, x_out);
1112 /* beta[i] < 2^107 + 2^103 < 2^108 */
1113 felem_small_mul(tmp, small2, beta);
1114 /* tmp[i] < 7 * 2^64 < 2^67 */
1115 smallfelem_square(tmp2, small1);
1116 /* tmp2[i] < 7 * 2^64 */
1117 longfelem_scalar(tmp2, 8);
1118 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1119 longfelem_diff(tmp, tmp2);
1120 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1121 felem_reduce_zero105(y_out, tmp);
1122 /* y_out[i] < 2^106 */
1125 /* point_double_small is the same as point_double, except that it operates on
1128 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1129 const smallfelem x_in, const smallfelem y_in, const smallfelem z_in)
1131 felem felem_x_out, felem_y_out, felem_z_out;
1132 felem felem_x_in, felem_y_in, felem_z_in;
1134 smallfelem_expand(felem_x_in, x_in);
1135 smallfelem_expand(felem_y_in, y_in);
1136 smallfelem_expand(felem_z_in, z_in);
1137 point_double(felem_x_out, felem_y_out, felem_z_out,
1138 felem_x_in, felem_y_in, felem_z_in);
1139 felem_shrink(x_out, felem_x_out);
1140 felem_shrink(y_out, felem_y_out);
1141 felem_shrink(z_out, felem_z_out);
1144 /* copy_conditional copies in to out iff mask is all ones. */
1146 copy_conditional(felem out, const felem in, limb mask)
1149 for (i = 0; i < NLIMBS; ++i)
1151 const limb tmp = mask & (in[i] ^ out[i]);
1156 /* copy_small_conditional copies in to out iff mask is all ones. */
1158 copy_small_conditional(felem out, const smallfelem in, limb mask)
1161 const u64 mask64 = mask;
1162 for (i = 0; i < NLIMBS; ++i)
1164 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1169 * point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1171 * The method is taken from:
1172 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1173 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1175 * This function includes a branch for checking whether the two input points
1176 * are equal, (while not equal to the point at infinity). This case never
1177 * happens during single point multiplication, so there is no timing leak for
1178 * ECDH or ECDSA signing. */
1179 static void point_add(felem x3, felem y3, felem z3,
1180 const felem x1, const felem y1, const felem z1,
1181 const int mixed, const smallfelem x2, const smallfelem y2, const smallfelem z2)
1183 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1184 longfelem tmp, tmp2;
1185 smallfelem small1, small2, small3, small4, small5;
1186 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1188 felem_shrink(small3, z1);
1190 z1_is_zero = smallfelem_is_zero(small3);
1191 z2_is_zero = smallfelem_is_zero(z2);
1193 /* ftmp = z1z1 = z1**2 */
1194 smallfelem_square(tmp, small3);
1195 felem_reduce(ftmp, tmp);
1196 /* ftmp[i] < 2^101 */
1197 felem_shrink(small1, ftmp);
1201 /* ftmp2 = z2z2 = z2**2 */
1202 smallfelem_square(tmp, z2);
1203 felem_reduce(ftmp2, tmp);
1204 /* ftmp2[i] < 2^101 */
1205 felem_shrink(small2, ftmp2);
1207 felem_shrink(small5, x1);
1209 /* u1 = ftmp3 = x1*z2z2 */
1210 smallfelem_mul(tmp, small5, small2);
1211 felem_reduce(ftmp3, tmp);
1212 /* ftmp3[i] < 2^101 */
1214 /* ftmp5 = z1 + z2 */
1215 felem_assign(ftmp5, z1);
1216 felem_small_sum(ftmp5, z2);
1217 /* ftmp5[i] < 2^107 */
1219 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1220 felem_square(tmp, ftmp5);
1221 felem_reduce(ftmp5, tmp);
1222 /* ftmp2 = z2z2 + z1z1 */
1223 felem_sum(ftmp2, ftmp);
1224 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1225 felem_diff(ftmp5, ftmp2);
1226 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1228 /* ftmp2 = z2 * z2z2 */
1229 smallfelem_mul(tmp, small2, z2);
1230 felem_reduce(ftmp2, tmp);
1232 /* s1 = ftmp2 = y1 * z2**3 */
1233 felem_mul(tmp, y1, ftmp2);
1234 felem_reduce(ftmp6, tmp);
1235 /* ftmp6[i] < 2^101 */
1239 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1241 /* u1 = ftmp3 = x1*z2z2 */
1242 felem_assign(ftmp3, x1);
1243 /* ftmp3[i] < 2^106 */
1246 felem_assign(ftmp5, z1);
1247 felem_scalar(ftmp5, 2);
1248 /* ftmp5[i] < 2*2^106 = 2^107 */
1250 /* s1 = ftmp2 = y1 * z2**3 */
1251 felem_assign(ftmp6, y1);
1252 /* ftmp6[i] < 2^106 */
1256 smallfelem_mul(tmp, x2, small1);
1257 felem_reduce(ftmp4, tmp);
1259 /* h = ftmp4 = u2 - u1 */
1260 felem_diff_zero107(ftmp4, ftmp3);
1261 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1262 felem_shrink(small4, ftmp4);
1264 x_equal = smallfelem_is_zero(small4);
1266 /* z_out = ftmp5 * h */
1267 felem_small_mul(tmp, small4, ftmp5);
1268 felem_reduce(z_out, tmp);
1269 /* z_out[i] < 2^101 */
1271 /* ftmp = z1 * z1z1 */
1272 smallfelem_mul(tmp, small1, small3);
1273 felem_reduce(ftmp, tmp);
1275 /* s2 = tmp = y2 * z1**3 */
1276 felem_small_mul(tmp, y2, ftmp);
1277 felem_reduce(ftmp5, tmp);
1279 /* r = ftmp5 = (s2 - s1)*2 */
1280 felem_diff_zero107(ftmp5, ftmp6);
1281 /* ftmp5[i] < 2^107 + 2^107 = 2^108*/
1282 felem_scalar(ftmp5, 2);
1283 /* ftmp5[i] < 2^109 */
1284 felem_shrink(small1, ftmp5);
1285 y_equal = smallfelem_is_zero(small1);
1287 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1289 point_double(x3, y3, z3, x1, y1, z1);
1293 /* I = ftmp = (2h)**2 */
1294 felem_assign(ftmp, ftmp4);
1295 felem_scalar(ftmp, 2);
1296 /* ftmp[i] < 2*2^108 = 2^109 */
1297 felem_square(tmp, ftmp);
1298 felem_reduce(ftmp, tmp);
1300 /* J = ftmp2 = h * I */
1301 felem_mul(tmp, ftmp4, ftmp);
1302 felem_reduce(ftmp2, tmp);
1304 /* V = ftmp4 = U1 * I */
1305 felem_mul(tmp, ftmp3, ftmp);
1306 felem_reduce(ftmp4, tmp);
1308 /* x_out = r**2 - J - 2V */
1309 smallfelem_square(tmp, small1);
1310 felem_reduce(x_out, tmp);
1311 felem_assign(ftmp3, ftmp4);
1312 felem_scalar(ftmp4, 2);
1313 felem_sum(ftmp4, ftmp2);
1314 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1315 felem_diff(x_out, ftmp4);
1316 /* x_out[i] < 2^105 + 2^101 */
1318 /* y_out = r(V-x_out) - 2 * s1 * J */
1319 felem_diff_zero107(ftmp3, x_out);
1320 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1321 felem_small_mul(tmp, small1, ftmp3);
1322 felem_mul(tmp2, ftmp6, ftmp2);
1323 longfelem_scalar(tmp2, 2);
1324 /* tmp2[i] < 2*2^67 = 2^68 */
1325 longfelem_diff(tmp, tmp2);
1326 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1327 felem_reduce_zero105(y_out, tmp);
1328 /* y_out[i] < 2^106 */
1330 copy_small_conditional(x_out, x2, z1_is_zero);
1331 copy_conditional(x_out, x1, z2_is_zero);
1332 copy_small_conditional(y_out, y2, z1_is_zero);
1333 copy_conditional(y_out, y1, z2_is_zero);
1334 copy_small_conditional(z_out, z2, z1_is_zero);
1335 copy_conditional(z_out, z1, z2_is_zero);
1336 felem_assign(x3, x_out);
1337 felem_assign(y3, y_out);
1338 felem_assign(z3, z_out);
1341 /* point_add_small is the same as point_add, except that it operates on
1343 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1344 smallfelem x1, smallfelem y1, smallfelem z1,
1345 smallfelem x2, smallfelem y2, smallfelem z2)
1347 felem felem_x3, felem_y3, felem_z3;
1348 felem felem_x1, felem_y1, felem_z1;
1349 smallfelem_expand(felem_x1, x1);
1350 smallfelem_expand(felem_y1, y1);
1351 smallfelem_expand(felem_z1, z1);
1352 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0, x2, y2, z2);
1353 felem_shrink(x3, felem_x3);
1354 felem_shrink(y3, felem_y3);
1355 felem_shrink(z3, felem_z3);
1359 * Base point pre computation
1360 * --------------------------
1362 * Two different sorts of precomputed tables are used in the following code.
1363 * Each contain various points on the curve, where each point is three field
1364 * elements (x, y, z).
1366 * For the base point table, z is usually 1 (0 for the point at infinity).
1367 * This table has 2 * 16 elements, starting with the following:
1368 * index | bits | point
1369 * ------+---------+------------------------------
1372 * 2 | 0 0 1 0 | 2^64G
1373 * 3 | 0 0 1 1 | (2^64 + 1)G
1374 * 4 | 0 1 0 0 | 2^128G
1375 * 5 | 0 1 0 1 | (2^128 + 1)G
1376 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1377 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1378 * 8 | 1 0 0 0 | 2^192G
1379 * 9 | 1 0 0 1 | (2^192 + 1)G
1380 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1381 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1382 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1383 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1384 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1385 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1386 * followed by a copy of this with each element multiplied by 2^32.
1388 * The reason for this is so that we can clock bits into four different
1389 * locations when doing simple scalar multiplies against the base point,
1390 * and then another four locations using the second 16 elements.
1392 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1394 /* gmul is the table of precomputed base points */
1395 static const smallfelem gmul[2][16][3] =
1399 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247},
1400 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b},
1402 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5},
1403 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d},
1405 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f},
1406 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644},
1408 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67},
1409 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee},
1411 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff},
1412 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b},
1414 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8},
1415 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851},
1417 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea},
1418 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b},
1420 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276},
1421 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816},
1423 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad},
1424 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663},
1426 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d},
1427 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321},
1429 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287},
1430 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6},
1432 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466},
1433 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20},
1435 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9},
1436 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61},
1438 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a},
1439 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc},
1441 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c},
1442 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab},
1447 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89},
1448 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624},
1450 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6},
1451 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1},
1453 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a},
1454 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593},
1456 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617},
1457 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7},
1459 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276},
1460 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a},
1462 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908},
1463 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e},
1465 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7},
1466 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec},
1468 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee},
1469 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6},
1471 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109},
1472 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5},
1474 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba},
1475 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44},
1477 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b},
1478 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc},
1480 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107},
1481 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387},
1483 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503},
1484 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be},
1486 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9},
1487 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a},
1489 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6},
1490 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81},
1493 /* select_point selects the |idx|th point from a precomputation table and
1494 * copies it to out. */
1495 static void select_point(const u64 idx, unsigned int size, const smallfelem pre_comp[16][3], smallfelem out[3])
1498 u64 *outlimbs = &out[0][0];
1499 memset(outlimbs, 0, 3 * sizeof(smallfelem));
1501 for (i = 0; i < size; i++)
1503 const u64 *inlimbs = (u64*) &pre_comp[i][0][0];
1510 for (j = 0; j < NLIMBS * 3; j++)
1511 outlimbs[j] |= inlimbs[j] & mask;
1515 /* get_bit returns the |i|th bit in |in| */
1516 static char get_bit(const felem_bytearray in, int i)
1518 if ((i < 0) || (i >= 256))
1520 return (in[i >> 3] >> (i & 7)) & 1;
1523 /* Interleaved point multiplication using precomputed point multiples:
1524 * The small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[],
1525 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1526 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1527 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1528 static void batch_mul(felem x_out, felem y_out, felem z_out,
1529 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1530 const int mixed, const smallfelem pre_comp[][17][3], const smallfelem g_pre_comp[2][16][3])
1533 unsigned num, gen_mul = (g_scalar != NULL);
1539 /* set nq to the point at infinity */
1540 memset(nq, 0, 3 * sizeof(felem));
1542 /* Loop over all scalars msb-to-lsb, interleaving additions
1543 * of multiples of the generator (two in each of the last 32 rounds)
1544 * and additions of other points multiples (every 5th round).
1546 skip = 1; /* save two point operations in the first round */
1547 for (i = (num_points ? 255 : 31); i >= 0; --i)
1551 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1553 /* add multiples of the generator */
1554 if (gen_mul && (i <= 31))
1556 /* first, look 32 bits upwards */
1557 bits = get_bit(g_scalar, i + 224) << 3;
1558 bits |= get_bit(g_scalar, i + 160) << 2;
1559 bits |= get_bit(g_scalar, i + 96) << 1;
1560 bits |= get_bit(g_scalar, i + 32);
1561 /* select the point to add, in constant time */
1562 select_point(bits, 16, g_pre_comp[1], tmp);
1566 point_add(nq[0], nq[1], nq[2],
1567 nq[0], nq[1], nq[2],
1568 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1572 smallfelem_expand(nq[0], tmp[0]);
1573 smallfelem_expand(nq[1], tmp[1]);
1574 smallfelem_expand(nq[2], tmp[2]);
1578 /* second, look at the current position */
1579 bits = get_bit(g_scalar, i + 192) << 3;
1580 bits |= get_bit(g_scalar, i + 128) << 2;
1581 bits |= get_bit(g_scalar, i + 64) << 1;
1582 bits |= get_bit(g_scalar, i);
1583 /* select the point to add, in constant time */
1584 select_point(bits, 16, g_pre_comp[0], tmp);
1585 point_add(nq[0], nq[1], nq[2],
1586 nq[0], nq[1], nq[2],
1587 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1590 /* do other additions every 5 doublings */
1591 if (num_points && (i % 5 == 0))
1593 /* loop over all scalars */
1594 for (num = 0; num < num_points; ++num)
1596 bits = get_bit(scalars[num], i + 4) << 5;
1597 bits |= get_bit(scalars[num], i + 3) << 4;
1598 bits |= get_bit(scalars[num], i + 2) << 3;
1599 bits |= get_bit(scalars[num], i + 1) << 2;
1600 bits |= get_bit(scalars[num], i) << 1;
1601 bits |= get_bit(scalars[num], i - 1);
1602 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1604 /* select the point to add or subtract, in constant time */
1605 select_point(digit, 17, pre_comp[num], tmp);
1606 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative point */
1607 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1608 felem_contract(tmp[1], ftmp);
1612 point_add(nq[0], nq[1], nq[2],
1613 nq[0], nq[1], nq[2],
1614 mixed, tmp[0], tmp[1], tmp[2]);
1618 smallfelem_expand(nq[0], tmp[0]);
1619 smallfelem_expand(nq[1], tmp[1]);
1620 smallfelem_expand(nq[2], tmp[2]);
1626 felem_assign(x_out, nq[0]);
1627 felem_assign(y_out, nq[1]);
1628 felem_assign(z_out, nq[2]);
1631 /* Precomputation for the group generator. */
1633 smallfelem g_pre_comp[2][16][3];
1635 } NISTP256_PRE_COMP;
1637 const EC_METHOD *EC_GFp_nistp256_method(void)
1639 static const EC_METHOD ret = {
1640 EC_FLAGS_DEFAULT_OCT,
1641 NID_X9_62_prime_field,
1642 ec_GFp_nistp256_group_init,
1643 ec_GFp_simple_group_finish,
1644 ec_GFp_simple_group_clear_finish,
1645 ec_GFp_nist_group_copy,
1646 ec_GFp_nistp256_group_set_curve,
1647 ec_GFp_simple_group_get_curve,
1648 ec_GFp_simple_group_get_degree,
1649 ec_GFp_simple_group_check_discriminant,
1650 ec_GFp_simple_point_init,
1651 ec_GFp_simple_point_finish,
1652 ec_GFp_simple_point_clear_finish,
1653 ec_GFp_simple_point_copy,
1654 ec_GFp_simple_point_set_to_infinity,
1655 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1656 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1657 ec_GFp_simple_point_set_affine_coordinates,
1658 ec_GFp_nistp256_point_get_affine_coordinates,
1659 0 /* point_set_compressed_coordinates */,
1664 ec_GFp_simple_invert,
1665 ec_GFp_simple_is_at_infinity,
1666 ec_GFp_simple_is_on_curve,
1668 ec_GFp_simple_make_affine,
1669 ec_GFp_simple_points_make_affine,
1670 ec_GFp_nistp256_points_mul,
1671 ec_GFp_nistp256_precompute_mult,
1672 ec_GFp_nistp256_have_precompute_mult,
1673 ec_GFp_nist_field_mul,
1674 ec_GFp_nist_field_sqr,
1676 0 /* field_encode */,
1677 0 /* field_decode */,
1678 0 /* field_set_to_one */ };
1683 /******************************************************************************/
1684 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1687 static NISTP256_PRE_COMP *nistp256_pre_comp_new()
1689 NISTP256_PRE_COMP *ret = NULL;
1690 ret = (NISTP256_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1693 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1696 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1697 ret->references = 1;
1701 static void *nistp256_pre_comp_dup(void *src_)
1703 NISTP256_PRE_COMP *src = src_;
1705 /* no need to actually copy, these objects never change! */
1706 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1711 static void nistp256_pre_comp_free(void *pre_)
1714 NISTP256_PRE_COMP *pre = pre_;
1719 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1726 static void nistp256_pre_comp_clear_free(void *pre_)
1729 NISTP256_PRE_COMP *pre = pre_;
1734 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1738 OPENSSL_cleanse(pre, sizeof *pre);
1742 /******************************************************************************/
1743 /* OPENSSL EC_METHOD FUNCTIONS
1746 int ec_GFp_nistp256_group_init(EC_GROUP *group)
1749 ret = ec_GFp_simple_group_init(group);
1750 group->a_is_minus3 = 1;
1754 int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1755 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1758 BN_CTX *new_ctx = NULL;
1759 BIGNUM *curve_p, *curve_a, *curve_b;
1762 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1764 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1765 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1766 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1767 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1768 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1769 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1770 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1771 (BN_cmp(curve_b, b)))
1773 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1774 EC_R_WRONG_CURVE_PARAMETERS);
1777 group->field_mod_func = BN_nist_mod_256;
1778 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1781 if (new_ctx != NULL)
1782 BN_CTX_free(new_ctx);
1786 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1787 * (X', Y') = (X/Z^2, Y/Z^3) */
1788 int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1789 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1791 felem z1, z2, x_in, y_in;
1792 smallfelem x_out, y_out;
1795 if (EC_POINT_is_at_infinity(group, point))
1797 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1798 EC_R_POINT_AT_INFINITY);
1801 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1802 (!BN_to_felem(z1, &point->Z))) return 0;
1804 felem_square(tmp, z2); felem_reduce(z1, tmp);
1805 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1806 felem_contract(x_out, x_in);
1809 if (!smallfelem_to_BN(x, x_out)) {
1810 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1815 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1816 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1817 felem_contract(y_out, y_in);
1820 if (!smallfelem_to_BN(y, y_out))
1822 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1830 static void make_points_affine(size_t num, smallfelem points[/* num */][3], smallfelem tmp_smallfelems[/* num+1 */])
1832 /* Runs in constant time, unless an input is the point at infinity
1833 * (which normally shouldn't happen). */
1834 ec_GFp_nistp_points_make_affine_internal(
1839 (void (*)(void *)) smallfelem_one,
1840 (int (*)(const void *)) smallfelem_is_zero_int,
1841 (void (*)(void *, const void *)) smallfelem_assign,
1842 (void (*)(void *, const void *)) smallfelem_square_contract,
1843 (void (*)(void *, const void *, const void *)) smallfelem_mul_contract,
1844 (void (*)(void *, const void *)) smallfelem_inv_contract,
1845 (void (*)(void *, const void *)) smallfelem_assign /* nothing to contract */);
1848 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1849 * Result is stored in r (r can equal one of the inputs). */
1850 int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
1851 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1852 const BIGNUM *scalars[], BN_CTX *ctx)
1857 BN_CTX *new_ctx = NULL;
1858 BIGNUM *x, *y, *z, *tmp_scalar;
1859 felem_bytearray g_secret;
1860 felem_bytearray *secrets = NULL;
1861 smallfelem (*pre_comp)[17][3] = NULL;
1862 smallfelem *tmp_smallfelems = NULL;
1863 felem_bytearray tmp;
1864 unsigned i, num_bytes;
1865 int have_pre_comp = 0;
1866 size_t num_points = num;
1867 smallfelem x_in, y_in, z_in;
1868 felem x_out, y_out, z_out;
1869 NISTP256_PRE_COMP *pre = NULL;
1870 const smallfelem (*g_pre_comp)[16][3] = NULL;
1871 EC_POINT *generator = NULL;
1872 const EC_POINT *p = NULL;
1873 const BIGNUM *p_scalar = NULL;
1876 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1878 if (((x = BN_CTX_get(ctx)) == NULL) ||
1879 ((y = BN_CTX_get(ctx)) == NULL) ||
1880 ((z = BN_CTX_get(ctx)) == NULL) ||
1881 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1886 pre = EC_EX_DATA_get_data(group->extra_data,
1887 nistp256_pre_comp_dup, nistp256_pre_comp_free,
1888 nistp256_pre_comp_clear_free);
1890 /* we have precomputation, try to use it */
1891 g_pre_comp = (const smallfelem (*)[16][3]) pre->g_pre_comp;
1893 /* try to use the standard precomputation */
1894 g_pre_comp = &gmul[0];
1895 generator = EC_POINT_new(group);
1896 if (generator == NULL)
1898 /* get the generator from precomputation */
1899 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
1900 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
1901 !smallfelem_to_BN(z, g_pre_comp[0][1][2]))
1903 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1906 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1907 generator, x, y, z, ctx))
1909 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1910 /* precomputation matches generator */
1913 /* we don't have valid precomputation:
1914 * treat the generator as a random point */
1919 if (num_points >= 3)
1921 /* unless we precompute multiples for just one or two points,
1922 * converting those into affine form is time well spent */
1925 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1926 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(smallfelem));
1928 tmp_smallfelems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(smallfelem));
1929 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_smallfelems == NULL)))
1931 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1935 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1936 * i.e., they contribute nothing to the linear combination */
1937 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1938 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(smallfelem));
1939 for (i = 0; i < num_points; ++i)
1942 /* we didn't have a valid precomputation, so we pick
1945 p = EC_GROUP_get0_generator(group);
1949 /* the i^th point */
1952 p_scalar = scalars[i];
1954 if ((p_scalar != NULL) && (p != NULL))
1956 /* reduce scalar to 0 <= scalar < 2^256 */
1957 if ((BN_num_bits(p_scalar) > 256) || (BN_is_negative(p_scalar)))
1959 /* this is an unusual input, and we don't guarantee
1960 * constant-timeness */
1961 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1963 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1966 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1969 num_bytes = BN_bn2bin(p_scalar, tmp);
1970 flip_endian(secrets[i], tmp, num_bytes);
1971 /* precompute multiples */
1972 if ((!BN_to_felem(x_out, &p->X)) ||
1973 (!BN_to_felem(y_out, &p->Y)) ||
1974 (!BN_to_felem(z_out, &p->Z))) goto err;
1975 felem_shrink(pre_comp[i][1][0], x_out);
1976 felem_shrink(pre_comp[i][1][1], y_out);
1977 felem_shrink(pre_comp[i][1][2], z_out);
1978 for (j = 2; j <= 16; ++j)
1983 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1984 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1985 pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1990 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1991 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1997 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
2000 /* the scalar for the generator */
2001 if ((scalar != NULL) && (have_pre_comp))
2003 memset(g_secret, 0, sizeof(g_secret));
2004 /* reduce scalar to 0 <= scalar < 2^256 */
2005 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar)))
2007 /* this is an unusual input, and we don't guarantee
2008 * constant-timeness */
2009 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
2011 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2014 num_bytes = BN_bn2bin(tmp_scalar, tmp);
2017 num_bytes = BN_bn2bin(scalar, tmp);
2018 flip_endian(g_secret, tmp, num_bytes);
2019 /* do the multiplication with generator precomputation*/
2020 batch_mul(x_out, y_out, z_out,
2021 (const felem_bytearray (*)) secrets, num_points,
2023 mixed, (const smallfelem (*)[17][3]) pre_comp,
2027 /* do the multiplication without generator precomputation */
2028 batch_mul(x_out, y_out, z_out,
2029 (const felem_bytearray (*)) secrets, num_points,
2030 NULL, mixed, (const smallfelem (*)[17][3]) pre_comp, NULL);
2031 /* reduce the output to its unique minimal representation */
2032 felem_contract(x_in, x_out);
2033 felem_contract(y_in, y_out);
2034 felem_contract(z_in, z_out);
2035 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2036 (!smallfelem_to_BN(z, z_in)))
2038 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2041 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2045 if (generator != NULL)
2046 EC_POINT_free(generator);
2047 if (new_ctx != NULL)
2048 BN_CTX_free(new_ctx);
2049 if (secrets != NULL)
2050 OPENSSL_free(secrets);
2051 if (pre_comp != NULL)
2052 OPENSSL_free(pre_comp);
2053 if (tmp_smallfelems != NULL)
2054 OPENSSL_free(tmp_smallfelems);
2058 int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2061 NISTP256_PRE_COMP *pre = NULL;
2063 BN_CTX *new_ctx = NULL;
2065 EC_POINT *generator = NULL;
2066 smallfelem tmp_smallfelems[32];
2067 felem x_tmp, y_tmp, z_tmp;
2069 /* throw away old precomputation */
2070 EC_EX_DATA_free_data(&group->extra_data, nistp256_pre_comp_dup,
2071 nistp256_pre_comp_free, nistp256_pre_comp_clear_free);
2073 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
2075 if (((x = BN_CTX_get(ctx)) == NULL) ||
2076 ((y = BN_CTX_get(ctx)) == NULL))
2078 /* get the generator */
2079 if (group->generator == NULL) goto err;
2080 generator = EC_POINT_new(group);
2081 if (generator == NULL)
2083 BN_bin2bn(nistp256_curve_params[3], sizeof (felem_bytearray), x);
2084 BN_bin2bn(nistp256_curve_params[4], sizeof (felem_bytearray), y);
2085 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2087 if ((pre = nistp256_pre_comp_new()) == NULL)
2089 /* if the generator is the standard one, use built-in precomputation */
2090 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2092 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2096 if ((!BN_to_felem(x_tmp, &group->generator->X)) ||
2097 (!BN_to_felem(y_tmp, &group->generator->Y)) ||
2098 (!BN_to_felem(z_tmp, &group->generator->Z)))
2100 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2101 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2102 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2103 /* compute 2^64*G, 2^128*G, 2^192*G for the first table,
2104 * 2^32*G, 2^96*G, 2^160*G, 2^224*G for the second one
2106 for (i = 1; i <= 8; i <<= 1)
2109 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2110 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
2111 for (j = 0; j < 31; ++j)
2114 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2115 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2120 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2121 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2122 for (j = 0; j < 31; ++j)
2125 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2126 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
2129 for (i = 0; i < 2; i++)
2131 /* g_pre_comp[i][0] is the point at infinity */
2132 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2133 /* the remaining multiples */
2134 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2136 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], pre->g_pre_comp[i][6][2],
2137 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2138 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2139 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2141 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], pre->g_pre_comp[i][10][2],
2142 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2143 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2144 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2146 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2147 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2148 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2]);
2149 /* 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G */
2151 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], pre->g_pre_comp[i][14][2],
2152 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2153 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2154 for (j = 1; j < 8; ++j)
2156 /* odd multiples: add G resp. 2^32*G */
2158 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1], pre->g_pre_comp[i][2*j+1][2],
2159 pre->g_pre_comp[i][2*j][0], pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
2160 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], pre->g_pre_comp[i][1][2]);
2163 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2165 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup,
2166 nistp256_pre_comp_free, nistp256_pre_comp_clear_free))
2172 if (generator != NULL)
2173 EC_POINT_free(generator);
2174 if (new_ctx != NULL)
2175 BN_CTX_free(new_ctx);
2177 nistp256_pre_comp_free(pre);
2181 int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2183 if (EC_EX_DATA_get_data(group->extra_data, nistp256_pre_comp_dup,
2184 nistp256_pre_comp_free, nistp256_pre_comp_clear_free)
2191 static void *dummy=&dummy;