1 /* crypto/ec/ecp_nistp256.c */
3 * Written by Adam Langley (Google) for the OpenSSL project
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
29 #include <openssl/opensslconf.h>
30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
32 #ifndef OPENSSL_SYS_VMS
39 #include <openssl/err.h>
42 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43 /* even with gcc, the typedef won't work for 32-bit platforms */
44 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
45 typedef __int128_t int128_t;
47 #error "Need GCC 3.1 or later to define type uint128_t"
55 /* The underlying field.
57 * P256 operates over GF(2^256-2^224+2^192+2^96-1). We can serialise an element
58 * of this field into 32 bytes. We call this an felem_bytearray. */
60 typedef u8 felem_bytearray[32];
62 /* These are the parameters of P256, taken from FIPS 186-3, page 86. These
63 * values are big-endian. */
64 static const felem_bytearray nistp256_curve_params[5] = {
65 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
66 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
67 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
69 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
70 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
71 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
73 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
74 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
75 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
76 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
77 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
78 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
79 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
80 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
81 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
82 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
83 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
84 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
87 /* The representation of field elements.
88 * ------------------------------------
90 * We represent field elements with either four 128-bit values, eight 128-bit
91 * values, or four 64-bit values. The field element represented is:
92 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
94 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
96 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
97 * apart, but are 128-bits wide, the most significant bits of each limb overlap
98 * with the least significant bits of the next.
100 * A field element with four limbs is an 'felem'. One with eight limbs is a
103 * A field element with four, 64-bit values is called a 'smallfelem'. Small
104 * values are used as intermediate values before multiplication.
109 typedef uint128_t limb;
110 typedef limb felem[NLIMBS];
111 typedef limb longfelem[NLIMBS * 2];
112 typedef u64 smallfelem[NLIMBS];
114 /* This is the value of the prime as four 64-bit words, little-endian. */
115 static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
116 static const limb bottom32bits = 0xffffffff;
117 static const u64 bottom63bits = 0x7ffffffffffffffful;
119 /* bin32_to_felem takes a little-endian byte array and converts it into felem
120 * form. This assumes that the CPU is little-endian. */
121 static void bin32_to_felem(felem out, const u8 in[32])
123 out[0] = *((u64*) &in[0]);
124 out[1] = *((u64*) &in[8]);
125 out[2] = *((u64*) &in[16]);
126 out[3] = *((u64*) &in[24]);
129 /* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian,
130 * 32 byte array. This assumes that the CPU is little-endian. */
131 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
133 *((u64*) &out[0]) = in[0];
134 *((u64*) &out[8]) = in[1];
135 *((u64*) &out[16]) = in[2];
136 *((u64*) &out[24]) = in[3];
139 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
140 static void flip_endian(u8 *out, const u8 *in, unsigned len)
143 for (i = 0; i < len; ++i)
144 out[i] = in[len-1-i];
147 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
148 static int BN_to_felem(felem out, const BIGNUM *bn)
150 felem_bytearray b_in;
151 felem_bytearray b_out;
154 /* BN_bn2bin eats leading zeroes */
155 memset(b_out, 0, sizeof b_out);
156 num_bytes = BN_num_bytes(bn);
157 if (num_bytes > sizeof b_out)
159 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
162 if (BN_is_negative(bn))
164 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
167 num_bytes = BN_bn2bin(bn, b_in);
168 flip_endian(b_out, b_in, num_bytes);
169 bin32_to_felem(out, b_out);
173 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
174 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
176 felem_bytearray b_in, b_out;
177 smallfelem_to_bin32(b_in, in);
178 flip_endian(b_out, b_in, sizeof b_out);
179 return BN_bin2bn(b_out, sizeof b_out, out);
184 * ---------------- */
186 static void smallfelem_one(smallfelem out)
194 static void smallfelem_assign(smallfelem out, const smallfelem in)
202 static void felem_assign(felem out, const felem in)
210 /* felem_sum sets out = out + in. */
211 static void felem_sum(felem out, const felem in)
219 /* felem_small_sum sets out = out + in. */
220 static void felem_small_sum(felem out, const smallfelem in)
228 /* felem_scalar sets out = out * scalar */
229 static void felem_scalar(felem out, const u64 scalar)
237 /* longfelem_scalar sets out = out * scalar */
238 static void longfelem_scalar(longfelem out, const u64 scalar)
250 #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
251 #define two105 (((limb)1) << 105)
252 #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
254 /* zero105 is 0 mod p */
255 static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
257 /* smallfelem_neg sets |out| to |-small|
259 * out[i] < out[i] + 2^105
261 static void smallfelem_neg(felem out, const smallfelem small)
263 /* In order to prevent underflow, we subtract from 0 mod p. */
264 out[0] = zero105[0] - small[0];
265 out[1] = zero105[1] - small[1];
266 out[2] = zero105[2] - small[2];
267 out[3] = zero105[3] - small[3];
270 /* felem_diff subtracts |in| from |out|
274 * out[i] < out[i] + 2^105
276 static void felem_diff(felem out, const felem in)
278 /* In order to prevent underflow, we add 0 mod p before subtracting. */
279 out[0] += zero105[0];
280 out[1] += zero105[1];
281 out[2] += zero105[2];
282 out[3] += zero105[3];
290 #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
291 #define two107 (((limb)1) << 107)
292 #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
294 /* zero107 is 0 mod p */
295 static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 };
297 /* An alternative felem_diff for larger inputs |in|
298 * felem_diff_zero107 subtracts |in| from |out|
302 * out[i] < out[i] + 2^107
304 static void felem_diff_zero107(felem out, const felem in)
306 /* In order to prevent underflow, we add 0 mod p before subtracting. */
307 out[0] += zero107[0];
308 out[1] += zero107[1];
309 out[2] += zero107[2];
310 out[3] += zero107[3];
318 /* longfelem_diff subtracts |in| from |out|
322 * out[i] < out[i] + 2^70 + 2^40
324 static void longfelem_diff(longfelem out, const longfelem in)
326 static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6);
327 static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40);
328 static const limb two70 = (((limb)1) << 70);
329 static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6);
330 static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6);
332 /* add 0 mod p to avoid underflow */
336 out[3] += two70m40m38p6;
342 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
353 #define two64m0 (((limb)1) << 64) - 1
354 #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
355 #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
356 #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
358 /* zero110 is 0 mod p */
359 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
361 /* felem_shrink converts an felem into a smallfelem. The result isn't quite
362 * minimal as the value may be greater than p.
369 static void felem_shrink(smallfelem out, const felem in)
374 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
377 tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64));
380 tmp[2] = zero110[2] + (u64) in[2];
381 tmp[0] = zero110[0] + in[0];
382 tmp[1] = zero110[1] + in[1];
383 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
385 /* We perform two partial reductions where we eliminate the
386 * high-word of tmp[3]. We don't update the other words till the end.
388 a = tmp[3] >> 64; /* a < 2^46 */
389 tmp[3] = (u64) tmp[3];
391 tmp[3] += ((limb)a) << 32;
395 a = tmp[3] >> 64; /* a < 2^15 */
396 b += a; /* b < 2^46 + 2^15 < 2^47 */
397 tmp[3] = (u64) tmp[3];
399 tmp[3] += ((limb)a) << 32;
400 /* tmp[3] < 2^64 + 2^47 */
402 /* This adjusts the other two words to complete the two partial
405 tmp[1] -= (((limb)b) << 32);
407 /* In order to make space in tmp[3] for the carry from 2 -> 3, we
408 * conditionally subtract kPrime if tmp[3] is large enough. */
410 /* As tmp[3] < 2^65, high is either 1 or 0 */
414 * all ones if the high word of tmp[3] is 1
415 * all zeros if the high word of tmp[3] if 0 */
419 * all ones if the MSB of low is 1
420 * all zeros if the MSB of low if 0 */
423 /* if low was greater than kPrime3Test then the MSB is zero */
427 * all ones if low was > kPrime3Test
428 * all zeros if low was <= kPrime3Test */
429 mask = (mask & low) | high;
430 tmp[0] -= mask & kPrime[0];
431 tmp[1] -= mask & kPrime[1];
432 /* kPrime[2] is zero, so omitted */
433 tmp[3] -= mask & kPrime[3];
434 /* tmp[3] < 2**64 - 2**32 + 1 */
436 tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0];
437 tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1];
438 tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2];
447 /* smallfelem_expand converts a smallfelem to an felem */
448 static void smallfelem_expand(felem out, const smallfelem in)
456 /* smallfelem_square sets |out| = |small|^2
460 * out[i] < 7 * 2^64 < 2^67
462 static void smallfelem_square(longfelem out, const smallfelem small)
467 a = ((uint128_t) small[0]) * small[0];
473 a = ((uint128_t) small[0]) * small[1];
480 a = ((uint128_t) small[0]) * small[2];
487 a = ((uint128_t) small[0]) * small[3];
493 a = ((uint128_t) small[1]) * small[2];
500 a = ((uint128_t) small[1]) * small[1];
506 a = ((uint128_t) small[1]) * small[3];
513 a = ((uint128_t) small[2]) * small[3];
521 a = ((uint128_t) small[2]) * small[2];
527 a = ((uint128_t) small[3]) * small[3];
534 /* felem_square sets |out| = |in|^2
538 * out[i] < 7 * 2^64 < 2^67
540 static void felem_square(longfelem out, const felem in)
543 felem_shrink(small, in);
544 smallfelem_square(out, small);
547 /* smallfelem_mul sets |out| = |small1| * |small2|
552 * out[i] < 7 * 2^64 < 2^67
554 static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2)
559 a = ((uint128_t) small1[0]) * small2[0];
566 a = ((uint128_t) small1[0]) * small2[1];
572 a = ((uint128_t) small1[1]) * small2[0];
579 a = ((uint128_t) small1[0]) * small2[2];
585 a = ((uint128_t) small1[1]) * small2[1];
591 a = ((uint128_t) small1[2]) * small2[0];
598 a = ((uint128_t) small1[0]) * small2[3];
604 a = ((uint128_t) small1[1]) * small2[2];
610 a = ((uint128_t) small1[2]) * small2[1];
616 a = ((uint128_t) small1[3]) * small2[0];
623 a = ((uint128_t) small1[1]) * small2[3];
629 a = ((uint128_t) small1[2]) * small2[2];
635 a = ((uint128_t) small1[3]) * small2[1];
642 a = ((uint128_t) small1[2]) * small2[3];
648 a = ((uint128_t) small1[3]) * small2[2];
655 a = ((uint128_t) small1[3]) * small2[3];
662 /* felem_mul sets |out| = |in1| * |in2|
667 * out[i] < 7 * 2^64 < 2^67
669 static void felem_mul(longfelem out, const felem in1, const felem in2)
671 smallfelem small1, small2;
672 felem_shrink(small1, in1);
673 felem_shrink(small2, in2);
674 smallfelem_mul(out, small1, small2);
677 /* felem_small_mul sets |out| = |small1| * |in2|
682 * out[i] < 7 * 2^64 < 2^67
684 static void felem_small_mul(longfelem out, const smallfelem small1, const felem in2)
687 felem_shrink(small2, in2);
688 smallfelem_mul(out, small1, small2);
691 #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
692 #define two100 (((limb)1) << 100)
693 #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
694 /* zero100 is 0 mod p */
695 static const felem zero100 = { two100m36m4, two100, two100m36p4, two100m36p4 };
697 /* Internal function for the different flavours of felem_reduce.
698 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
700 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
701 * out[1] >= in[7] + 2^32*in[4]
702 * out[2] >= in[5] + 2^32*in[5]
703 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
705 * out[0] <= out[0] + in[4] + 2^32*in[5]
706 * out[1] <= out[1] + in[5] + 2^33*in[6]
707 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
708 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
710 static void felem_reduce_(felem out, const longfelem in)
713 /* combine common terms from below */
714 c = in[4] + (in[5] << 32);
722 /* the remaining terms */
723 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
724 out[1] -= (in[4] << 32);
725 out[3] += (in[4] << 32);
727 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
728 out[2] -= (in[5] << 32);
730 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
732 out[0] -= (in[6] << 32);
733 out[1] += (in[6] << 33);
734 out[2] += (in[6] * 2);
735 out[3] -= (in[6] << 32);
737 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
739 out[0] -= (in[7] << 32);
740 out[2] += (in[7] << 33);
741 out[3] += (in[7] * 3);
744 /* felem_reduce converts a longfelem into an felem.
745 * To be called directly after felem_square or felem_mul.
747 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
748 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
752 static void felem_reduce(felem out, const longfelem in)
754 out[0] = zero100[0] + in[0];
755 out[1] = zero100[1] + in[1];
756 out[2] = zero100[2] + in[2];
757 out[3] = zero100[3] + in[3];
759 felem_reduce_(out, in);
761 /* out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
762 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
763 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
764 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
766 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
767 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
768 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
769 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
773 /* felem_reduce_zero105 converts a larger longfelem into an felem.
779 static void felem_reduce_zero105(felem out, const longfelem in)
781 out[0] = zero105[0] + in[0];
782 out[1] = zero105[1] + in[1];
783 out[2] = zero105[2] + in[2];
784 out[3] = zero105[3] + in[3];
786 felem_reduce_(out, in);
788 /* out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
789 * out[1] > 2^105 - 2^71 - 2^103 > 0
790 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
791 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
793 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
794 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
795 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
796 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
800 /* subtract_u64 sets *result = *result - v and *carry to one if the subtraction
802 static void subtract_u64(u64* result, u64* carry, u64 v)
804 uint128_t r = *result;
806 *carry = (r >> 64) & 1;
810 /* felem_contract converts |in| to its unique, minimal representation.
814 static void felem_contract(smallfelem out, const felem in)
817 u64 all_equal_so_far = 0, result = 0, carry;
819 felem_shrink(out, in);
820 /* small is minimal except that the value might be > p */
823 /* We are doing a constant time test if out >= kPrime. We need to
824 * compare each u64, from most-significant to least significant. For
825 * each one, if all words so far have been equal (m is all ones) then a
826 * non-equal result is the answer. Otherwise we continue. */
827 for (i = 3; i < 4; i--)
830 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
831 /* if out[i] > kPrime[i] then a will underflow and the high
832 * 64-bits will all be set. */
833 result |= all_equal_so_far & ((u64) (a >> 64));
835 /* if kPrime[i] == out[i] then |equal| will be all zeros and
836 * the decrement will make it all ones. */
837 equal = kPrime[i] ^ out[i];
839 equal &= equal << 32;
840 equal &= equal << 16;
845 equal = ((s64) equal) >> 63;
847 all_equal_so_far &= equal;
850 /* if all_equal_so_far is still all ones then the two values are equal
851 * and so out >= kPrime is true. */
852 result |= all_equal_so_far;
854 /* if out >= kPrime then we subtract kPrime. */
855 subtract_u64(&out[0], &carry, result & kPrime[0]);
856 subtract_u64(&out[1], &carry, carry);
857 subtract_u64(&out[2], &carry, carry);
858 subtract_u64(&out[3], &carry, carry);
860 subtract_u64(&out[1], &carry, result & kPrime[1]);
861 subtract_u64(&out[2], &carry, carry);
862 subtract_u64(&out[3], &carry, carry);
864 subtract_u64(&out[2], &carry, result & kPrime[2]);
865 subtract_u64(&out[3], &carry, carry);
867 subtract_u64(&out[3], &carry, result & kPrime[3]);
870 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
875 smallfelem_square(longtmp, in);
876 felem_reduce(tmp, longtmp);
877 felem_contract(out, tmp);
880 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1, const smallfelem in2)
885 smallfelem_mul(longtmp, in1, in2);
886 felem_reduce(tmp, longtmp);
887 felem_contract(out, tmp);
890 /* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
895 static limb smallfelem_is_zero(const smallfelem small)
900 u64 is_zero = small[0] | small[1] | small[2] | small[3];
902 is_zero &= is_zero << 32;
903 is_zero &= is_zero << 16;
904 is_zero &= is_zero << 8;
905 is_zero &= is_zero << 4;
906 is_zero &= is_zero << 2;
907 is_zero &= is_zero << 1;
908 is_zero = ((s64) is_zero) >> 63;
910 is_p = (small[0] ^ kPrime[0]) |
911 (small[1] ^ kPrime[1]) |
912 (small[2] ^ kPrime[2]) |
913 (small[3] ^ kPrime[3]);
921 is_p = ((s64) is_p) >> 63;
926 result |= ((limb) is_zero) << 64;
930 static int smallfelem_is_zero_int(const smallfelem small)
932 return (int) (smallfelem_is_zero(small) & ((limb)1));
935 /* felem_inv calculates |out| = |in|^{-1}
937 * Based on Fermat's Little Theorem:
939 * a^{p-1} = 1 (mod p)
940 * a^{p-2} = a^{-1} (mod p)
942 static void felem_inv(felem out, const felem in)
945 /* each e_I will hold |in|^{2^I - 1} */
946 felem e2, e4, e8, e16, e32, e64;
950 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
951 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
952 felem_assign(e2, ftmp);
953 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
954 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
955 felem_mul(tmp, ftmp, e2); felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
956 felem_assign(e4, ftmp);
957 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
958 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
959 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
960 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
961 felem_mul(tmp, ftmp, e4); felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
962 felem_assign(e8, ftmp);
963 for (i = 0; i < 8; i++) {
964 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
966 felem_mul(tmp, ftmp, e8); felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
967 felem_assign(e16, ftmp);
968 for (i = 0; i < 16; i++) {
969 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
971 felem_mul(tmp, ftmp, e16); felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
972 felem_assign(e32, ftmp);
973 for (i = 0; i < 32; i++) {
974 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
976 felem_assign(e64, ftmp);
977 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
978 for (i = 0; i < 192; i++) {
979 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
980 } /* 2^256 - 2^224 + 2^192 */
982 felem_mul(tmp, e64, e32); felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
983 for (i = 0; i < 16; i++) {
984 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
986 felem_mul(tmp, ftmp2, e16); felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
987 for (i = 0; i < 8; i++) {
988 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
990 felem_mul(tmp, ftmp2, e8); felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
991 for (i = 0; i < 4; i++) {
992 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
994 felem_mul(tmp, ftmp2, e4); felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
995 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
996 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
997 felem_mul(tmp, ftmp2, e2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
998 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
999 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1000 felem_mul(tmp, ftmp2, in); felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1002 felem_mul(tmp, ftmp2, ftmp); felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1005 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1009 smallfelem_expand(tmp, in);
1010 felem_inv(tmp, tmp);
1011 felem_contract(out, tmp);
1017 * Building on top of the field operations we have the operations on the
1018 * elliptic curve group itself. Points on the curve are represented in Jacobian
1021 /* point_double calculates 2*(x_in, y_in, z_in)
1023 * The method is taken from:
1024 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1026 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1027 * while x_out == y_in is not (maybe this works, but it's not tested). */
1029 point_double(felem x_out, felem y_out, felem z_out,
1030 const felem x_in, const felem y_in, const felem z_in)
1032 longfelem tmp, tmp2;
1033 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1034 smallfelem small1, small2;
1036 felem_assign(ftmp, x_in);
1037 /* ftmp[i] < 2^106 */
1038 felem_assign(ftmp2, x_in);
1039 /* ftmp2[i] < 2^106 */
1042 felem_square(tmp, z_in);
1043 felem_reduce(delta, tmp);
1044 /* delta[i] < 2^101 */
1047 felem_square(tmp, y_in);
1048 felem_reduce(gamma, tmp);
1049 /* gamma[i] < 2^101 */
1050 felem_shrink(small1, gamma);
1052 /* beta = x*gamma */
1053 felem_small_mul(tmp, small1, x_in);
1054 felem_reduce(beta, tmp);
1055 /* beta[i] < 2^101 */
1057 /* alpha = 3*(x-delta)*(x+delta) */
1058 felem_diff(ftmp, delta);
1059 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1060 felem_sum(ftmp2, delta);
1061 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1062 felem_scalar(ftmp2, 3);
1063 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1064 felem_mul(tmp, ftmp, ftmp2);
1065 felem_reduce(alpha, tmp);
1066 /* alpha[i] < 2^101 */
1067 felem_shrink(small2, alpha);
1069 /* x' = alpha^2 - 8*beta */
1070 smallfelem_square(tmp, small2);
1071 felem_reduce(x_out, tmp);
1072 felem_assign(ftmp, beta);
1073 felem_scalar(ftmp, 8);
1074 /* ftmp[i] < 8 * 2^101 = 2^104 */
1075 felem_diff(x_out, ftmp);
1076 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1078 /* z' = (y + z)^2 - gamma - delta */
1079 felem_sum(delta, gamma);
1080 /* delta[i] < 2^101 + 2^101 = 2^102 */
1081 felem_assign(ftmp, y_in);
1082 felem_sum(ftmp, z_in);
1083 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1084 felem_square(tmp, ftmp);
1085 felem_reduce(z_out, tmp);
1086 felem_diff(z_out, delta);
1087 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1089 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1090 felem_scalar(beta, 4);
1091 /* beta[i] < 4 * 2^101 = 2^103 */
1092 felem_diff_zero107(beta, x_out);
1093 /* beta[i] < 2^107 + 2^103 < 2^108 */
1094 felem_small_mul(tmp, small2, beta);
1095 /* tmp[i] < 7 * 2^64 < 2^67 */
1096 smallfelem_square(tmp2, small1);
1097 /* tmp2[i] < 7 * 2^64 */
1098 longfelem_scalar(tmp2, 8);
1099 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1100 longfelem_diff(tmp, tmp2);
1101 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1102 felem_reduce_zero105(y_out, tmp);
1103 /* y_out[i] < 2^106 */
1106 /* point_double_small is the same as point_double, except that it operates on
1109 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1110 const smallfelem x_in, const smallfelem y_in, const smallfelem z_in)
1112 felem felem_x_out, felem_y_out, felem_z_out;
1113 felem felem_x_in, felem_y_in, felem_z_in;
1115 smallfelem_expand(felem_x_in, x_in);
1116 smallfelem_expand(felem_y_in, y_in);
1117 smallfelem_expand(felem_z_in, z_in);
1118 point_double(felem_x_out, felem_y_out, felem_z_out,
1119 felem_x_in, felem_y_in, felem_z_in);
1120 felem_shrink(x_out, felem_x_out);
1121 felem_shrink(y_out, felem_y_out);
1122 felem_shrink(z_out, felem_z_out);
1125 /* copy_conditional copies in to out iff mask is all ones. */
1127 copy_conditional(felem out, const felem in, limb mask)
1130 for (i = 0; i < NLIMBS; ++i)
1132 const limb tmp = mask & (in[i] ^ out[i]);
1137 /* copy_small_conditional copies in to out iff mask is all ones. */
1139 copy_small_conditional(felem out, const smallfelem in, limb mask)
1142 const u64 mask64 = mask;
1143 for (i = 0; i < NLIMBS; ++i)
1145 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1149 /* point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1151 * The method is taken from:
1152 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1153 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1155 * This function includes a branch for checking whether the two input points
1156 * are equal, (while not equal to the point at infinity). This case never
1157 * happens during single point multiplication, so there is no timing leak for
1158 * ECDH or ECDSA signing. */
1159 static void point_add(felem x3, felem y3, felem z3,
1160 const felem x1, const felem y1, const felem z1,
1161 const int mixed, const smallfelem x2, const smallfelem y2, const smallfelem z2)
1163 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1164 longfelem tmp, tmp2;
1165 smallfelem small1, small2, small3, small4, small5;
1166 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1168 felem_shrink(small3, z1);
1170 z1_is_zero = smallfelem_is_zero(small3);
1171 z2_is_zero = smallfelem_is_zero(z2);
1173 /* ftmp = z1z1 = z1**2 */
1174 smallfelem_square(tmp, small3);
1175 felem_reduce(ftmp, tmp);
1176 /* ftmp[i] < 2^101 */
1177 felem_shrink(small1, ftmp);
1181 /* ftmp2 = z2z2 = z2**2 */
1182 smallfelem_square(tmp, z2);
1183 felem_reduce(ftmp2, tmp);
1184 /* ftmp2[i] < 2^101 */
1185 felem_shrink(small2, ftmp2);
1187 felem_shrink(small5, x1);
1189 /* u1 = ftmp3 = x1*z2z2 */
1190 smallfelem_mul(tmp, small5, small2);
1191 felem_reduce(ftmp3, tmp);
1192 /* ftmp3[i] < 2^101 */
1194 /* ftmp5 = z1 + z2 */
1195 felem_assign(ftmp5, z1);
1196 felem_small_sum(ftmp5, z2);
1197 /* ftmp5[i] < 2^107 */
1199 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1200 felem_square(tmp, ftmp5);
1201 felem_reduce(ftmp5, tmp);
1202 /* ftmp2 = z2z2 + z1z1 */
1203 felem_sum(ftmp2, ftmp);
1204 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1205 felem_diff(ftmp5, ftmp2);
1206 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1208 /* ftmp2 = z2 * z2z2 */
1209 smallfelem_mul(tmp, small2, z2);
1210 felem_reduce(ftmp2, tmp);
1212 /* s1 = ftmp2 = y1 * z2**3 */
1213 felem_mul(tmp, y1, ftmp2);
1214 felem_reduce(ftmp6, tmp);
1215 /* ftmp6[i] < 2^101 */
1219 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1221 /* u1 = ftmp3 = x1*z2z2 */
1222 felem_assign(ftmp3, x1);
1223 /* ftmp3[i] < 2^106 */
1226 felem_assign(ftmp5, z1);
1227 felem_scalar(ftmp5, 2);
1228 /* ftmp5[i] < 2*2^106 = 2^107 */
1230 /* s1 = ftmp2 = y1 * z2**3 */
1231 felem_assign(ftmp6, y1);
1232 /* ftmp6[i] < 2^106 */
1236 smallfelem_mul(tmp, x2, small1);
1237 felem_reduce(ftmp4, tmp);
1239 /* h = ftmp4 = u2 - u1 */
1240 felem_diff_zero107(ftmp4, ftmp3);
1241 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1242 felem_shrink(small4, ftmp4);
1244 x_equal = smallfelem_is_zero(small4);
1246 /* z_out = ftmp5 * h */
1247 felem_small_mul(tmp, small4, ftmp5);
1248 felem_reduce(z_out, tmp);
1249 /* z_out[i] < 2^101 */
1251 /* ftmp = z1 * z1z1 */
1252 smallfelem_mul(tmp, small1, small3);
1253 felem_reduce(ftmp, tmp);
1255 /* s2 = tmp = y2 * z1**3 */
1256 felem_small_mul(tmp, y2, ftmp);
1257 felem_reduce(ftmp5, tmp);
1259 /* r = ftmp5 = (s2 - s1)*2 */
1260 felem_diff_zero107(ftmp5, ftmp6);
1261 /* ftmp5[i] < 2^107 + 2^107 = 2^108*/
1262 felem_scalar(ftmp5, 2);
1263 /* ftmp5[i] < 2^109 */
1264 felem_shrink(small1, ftmp5);
1265 y_equal = smallfelem_is_zero(small1);
1267 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1269 point_double(x3, y3, z3, x1, y1, z1);
1273 /* I = ftmp = (2h)**2 */
1274 felem_assign(ftmp, ftmp4);
1275 felem_scalar(ftmp, 2);
1276 /* ftmp[i] < 2*2^108 = 2^109 */
1277 felem_square(tmp, ftmp);
1278 felem_reduce(ftmp, tmp);
1280 /* J = ftmp2 = h * I */
1281 felem_mul(tmp, ftmp4, ftmp);
1282 felem_reduce(ftmp2, tmp);
1284 /* V = ftmp4 = U1 * I */
1285 felem_mul(tmp, ftmp3, ftmp);
1286 felem_reduce(ftmp4, tmp);
1288 /* x_out = r**2 - J - 2V */
1289 smallfelem_square(tmp, small1);
1290 felem_reduce(x_out, tmp);
1291 felem_assign(ftmp3, ftmp4);
1292 felem_scalar(ftmp4, 2);
1293 felem_sum(ftmp4, ftmp2);
1294 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1295 felem_diff(x_out, ftmp4);
1296 /* x_out[i] < 2^105 + 2^101 */
1298 /* y_out = r(V-x_out) - 2 * s1 * J */
1299 felem_diff_zero107(ftmp3, x_out);
1300 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1301 felem_small_mul(tmp, small1, ftmp3);
1302 felem_mul(tmp2, ftmp6, ftmp2);
1303 longfelem_scalar(tmp2, 2);
1304 /* tmp2[i] < 2*2^67 = 2^68 */
1305 longfelem_diff(tmp, tmp2);
1306 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1307 felem_reduce_zero105(y_out, tmp);
1308 /* y_out[i] < 2^106 */
1310 copy_small_conditional(x_out, x2, z1_is_zero);
1311 copy_conditional(x_out, x1, z2_is_zero);
1312 copy_small_conditional(y_out, y2, z1_is_zero);
1313 copy_conditional(y_out, y1, z2_is_zero);
1314 copy_small_conditional(z_out, z2, z1_is_zero);
1315 copy_conditional(z_out, z1, z2_is_zero);
1316 felem_assign(x3, x_out);
1317 felem_assign(y3, y_out);
1318 felem_assign(z3, z_out);
1321 /* point_add_small is the same as point_add, except that it operates on
1323 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1324 smallfelem x1, smallfelem y1, smallfelem z1,
1325 smallfelem x2, smallfelem y2, smallfelem z2)
1327 felem felem_x3, felem_y3, felem_z3;
1328 felem felem_x1, felem_y1, felem_z1;
1329 smallfelem_expand(felem_x1, x1);
1330 smallfelem_expand(felem_y1, y1);
1331 smallfelem_expand(felem_z1, z1);
1332 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0, x2, y2, z2);
1333 felem_shrink(x3, felem_x3);
1334 felem_shrink(y3, felem_y3);
1335 felem_shrink(z3, felem_z3);
1338 /* Base point pre computation
1339 * --------------------------
1341 * Two different sorts of precomputed tables are used in the following code.
1342 * Each contain various points on the curve, where each point is three field
1343 * elements (x, y, z).
1345 * For the base point table, z is usually 1 (0 for the point at infinity).
1346 * This table has 2 * 16 elements, starting with the following:
1347 * index | bits | point
1348 * ------+---------+------------------------------
1351 * 2 | 0 0 1 0 | 2^64G
1352 * 3 | 0 0 1 1 | (2^64 + 1)G
1353 * 4 | 0 1 0 0 | 2^128G
1354 * 5 | 0 1 0 1 | (2^128 + 1)G
1355 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1356 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1357 * 8 | 1 0 0 0 | 2^192G
1358 * 9 | 1 0 0 1 | (2^192 + 1)G
1359 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1360 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1361 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1362 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1363 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1364 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1365 * followed by a copy of this with each element multiplied by 2^32.
1367 * The reason for this is so that we can clock bits into four different
1368 * locations when doing simple scalar multiplies against the base point,
1369 * and then another four locations using the second 16 elements.
1371 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1373 /* gmul is the table of precomputed base points */
1374 static const smallfelem gmul[2][16][3] =
1378 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247},
1379 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b},
1381 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5},
1382 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d},
1384 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f},
1385 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644},
1387 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67},
1388 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee},
1390 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff},
1391 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b},
1393 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8},
1394 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851},
1396 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea},
1397 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b},
1399 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276},
1400 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816},
1402 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad},
1403 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663},
1405 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d},
1406 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321},
1408 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287},
1409 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6},
1411 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466},
1412 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20},
1414 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9},
1415 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61},
1417 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a},
1418 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc},
1420 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c},
1421 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab},
1426 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89},
1427 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624},
1429 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6},
1430 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1},
1432 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a},
1433 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593},
1435 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617},
1436 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7},
1438 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276},
1439 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a},
1441 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908},
1442 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e},
1444 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7},
1445 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec},
1447 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee},
1448 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6},
1450 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109},
1451 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5},
1453 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba},
1454 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44},
1456 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b},
1457 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc},
1459 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107},
1460 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387},
1462 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503},
1463 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be},
1465 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9},
1466 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a},
1468 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6},
1469 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81},
1472 /* select_point selects the |idx|th point from a precomputation table and
1473 * copies it to out. */
1474 static void select_point(const u64 idx, unsigned int size, const smallfelem pre_comp[16][3], smallfelem out[3])
1477 u64 *outlimbs = &out[0][0];
1478 memset(outlimbs, 0, 3 * sizeof(smallfelem));
1480 for (i = 0; i < size; i++)
1482 const u64 *inlimbs = (u64*) &pre_comp[i][0][0];
1489 for (j = 0; j < NLIMBS * 3; j++)
1490 outlimbs[j] |= inlimbs[j] & mask;
1494 /* get_bit returns the |i|th bit in |in| */
1495 static char get_bit(const felem_bytearray in, int i)
1497 if ((i < 0) || (i >= 256))
1499 return (in[i >> 3] >> (i & 7)) & 1;
1502 /* Interleaved point multiplication using precomputed point multiples:
1503 * The small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[],
1504 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1505 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1506 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1507 static void batch_mul(felem x_out, felem y_out, felem z_out,
1508 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1509 const int mixed, const smallfelem pre_comp[][17][3], const smallfelem g_pre_comp[2][16][3])
1512 unsigned num, gen_mul = (g_scalar != NULL);
1518 /* set nq to the point at infinity */
1519 memset(nq, 0, 3 * sizeof(felem));
1521 /* Loop over all scalars msb-to-lsb, interleaving additions
1522 * of multiples of the generator (two in each of the last 32 rounds)
1523 * and additions of other points multiples (every 5th round).
1525 skip = 1; /* save two point operations in the first round */
1526 for (i = (num_points ? 255 : 31); i >= 0; --i)
1530 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1532 /* add multiples of the generator */
1533 if (gen_mul && (i <= 31))
1535 /* first, look 32 bits upwards */
1536 bits = get_bit(g_scalar, i + 224) << 3;
1537 bits |= get_bit(g_scalar, i + 160) << 2;
1538 bits |= get_bit(g_scalar, i + 96) << 1;
1539 bits |= get_bit(g_scalar, i + 32);
1540 /* select the point to add, in constant time */
1541 select_point(bits, 16, g_pre_comp[1], tmp);
1545 point_add(nq[0], nq[1], nq[2],
1546 nq[0], nq[1], nq[2],
1547 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1551 smallfelem_expand(nq[0], tmp[0]);
1552 smallfelem_expand(nq[1], tmp[1]);
1553 smallfelem_expand(nq[2], tmp[2]);
1557 /* second, look at the current position */
1558 bits = get_bit(g_scalar, i + 192) << 3;
1559 bits |= get_bit(g_scalar, i + 128) << 2;
1560 bits |= get_bit(g_scalar, i + 64) << 1;
1561 bits |= get_bit(g_scalar, i);
1562 /* select the point to add, in constant time */
1563 select_point(bits, 16, g_pre_comp[0], tmp);
1564 point_add(nq[0], nq[1], nq[2],
1565 nq[0], nq[1], nq[2],
1566 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1569 /* do other additions every 5 doublings */
1570 if (num_points && (i % 5 == 0))
1572 /* loop over all scalars */
1573 for (num = 0; num < num_points; ++num)
1575 bits = get_bit(scalars[num], i + 4) << 5;
1576 bits |= get_bit(scalars[num], i + 3) << 4;
1577 bits |= get_bit(scalars[num], i + 2) << 3;
1578 bits |= get_bit(scalars[num], i + 1) << 2;
1579 bits |= get_bit(scalars[num], i) << 1;
1580 bits |= get_bit(scalars[num], i - 1);
1581 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1583 /* select the point to add or subtract, in constant time */
1584 select_point(digit, 17, pre_comp[num], tmp);
1585 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative point */
1586 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1587 felem_contract(tmp[1], ftmp);
1591 point_add(nq[0], nq[1], nq[2],
1592 nq[0], nq[1], nq[2],
1593 mixed, tmp[0], tmp[1], tmp[2]);
1597 smallfelem_expand(nq[0], tmp[0]);
1598 smallfelem_expand(nq[1], tmp[1]);
1599 smallfelem_expand(nq[2], tmp[2]);
1605 felem_assign(x_out, nq[0]);
1606 felem_assign(y_out, nq[1]);
1607 felem_assign(z_out, nq[2]);
1610 /* Precomputation for the group generator. */
1612 smallfelem g_pre_comp[2][16][3];
1614 } NISTP256_PRE_COMP;
1616 const EC_METHOD *EC_GFp_nistp256_method(void)
1618 static const EC_METHOD ret = {
1619 EC_FLAGS_DEFAULT_OCT,
1620 NID_X9_62_prime_field,
1621 ec_GFp_nistp256_group_init,
1622 ec_GFp_simple_group_finish,
1623 ec_GFp_simple_group_clear_finish,
1624 ec_GFp_nist_group_copy,
1625 ec_GFp_nistp256_group_set_curve,
1626 ec_GFp_simple_group_get_curve,
1627 ec_GFp_simple_group_get_degree,
1628 ec_GFp_simple_group_check_discriminant,
1629 ec_GFp_simple_point_init,
1630 ec_GFp_simple_point_finish,
1631 ec_GFp_simple_point_clear_finish,
1632 ec_GFp_simple_point_copy,
1633 ec_GFp_simple_point_set_to_infinity,
1634 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1635 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1636 ec_GFp_simple_point_set_affine_coordinates,
1637 ec_GFp_nistp256_point_get_affine_coordinates,
1638 0 /* point_set_compressed_coordinates */,
1643 ec_GFp_simple_invert,
1644 ec_GFp_simple_is_at_infinity,
1645 ec_GFp_simple_is_on_curve,
1647 ec_GFp_simple_make_affine,
1648 ec_GFp_simple_points_make_affine,
1649 ec_GFp_nistp256_points_mul,
1650 ec_GFp_nistp256_precompute_mult,
1651 ec_GFp_nistp256_have_precompute_mult,
1652 ec_GFp_nist_field_mul,
1653 ec_GFp_nist_field_sqr,
1655 0 /* field_encode */,
1656 0 /* field_decode */,
1657 0 /* field_set_to_one */ };
1662 /******************************************************************************/
1663 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1666 static NISTP256_PRE_COMP *nistp256_pre_comp_new()
1668 NISTP256_PRE_COMP *ret = NULL;
1669 ret = (NISTP256_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1672 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1675 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1676 ret->references = 1;
1680 static void *nistp256_pre_comp_dup(void *src_)
1682 NISTP256_PRE_COMP *src = src_;
1684 /* no need to actually copy, these objects never change! */
1685 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1690 static void nistp256_pre_comp_free(void *pre_)
1693 NISTP256_PRE_COMP *pre = pre_;
1698 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1705 static void nistp256_pre_comp_clear_free(void *pre_)
1708 NISTP256_PRE_COMP *pre = pre_;
1713 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1717 OPENSSL_cleanse(pre, sizeof *pre);
1721 /******************************************************************************/
1722 /* OPENSSL EC_METHOD FUNCTIONS
1725 int ec_GFp_nistp256_group_init(EC_GROUP *group)
1728 ret = ec_GFp_simple_group_init(group);
1729 group->a_is_minus3 = 1;
1733 int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1734 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1737 BN_CTX *new_ctx = NULL;
1738 BIGNUM *curve_p, *curve_a, *curve_b;
1741 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1743 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1744 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1745 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1746 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1747 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1748 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1749 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1750 (BN_cmp(curve_b, b)))
1752 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1753 EC_R_WRONG_CURVE_PARAMETERS);
1756 group->field_mod_func = BN_nist_mod_256;
1757 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1760 if (new_ctx != NULL)
1761 BN_CTX_free(new_ctx);
1765 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1766 * (X', Y') = (X/Z^2, Y/Z^3) */
1767 int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1768 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1770 felem z1, z2, x_in, y_in;
1771 smallfelem x_out, y_out;
1774 if (EC_POINT_is_at_infinity(group, point))
1776 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1777 EC_R_POINT_AT_INFINITY);
1780 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1781 (!BN_to_felem(z1, &point->Z))) return 0;
1783 felem_square(tmp, z2); felem_reduce(z1, tmp);
1784 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1785 felem_contract(x_out, x_in);
1788 if (!smallfelem_to_BN(x, x_out)) {
1789 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1794 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1795 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1796 felem_contract(y_out, y_in);
1799 if (!smallfelem_to_BN(y, y_out))
1801 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1809 static void make_points_affine(size_t num, smallfelem points[/* num */][3], smallfelem tmp_smallfelems[/* num+1 */])
1811 /* Runs in constant time, unless an input is the point at infinity
1812 * (which normally shouldn't happen). */
1813 ec_GFp_nistp_points_make_affine_internal(
1818 (void (*)(void *)) smallfelem_one,
1819 (int (*)(const void *)) smallfelem_is_zero_int,
1820 (void (*)(void *, const void *)) smallfelem_assign,
1821 (void (*)(void *, const void *)) smallfelem_square_contract,
1822 (void (*)(void *, const void *, const void *)) smallfelem_mul_contract,
1823 (void (*)(void *, const void *)) smallfelem_inv_contract,
1824 (void (*)(void *, const void *)) smallfelem_assign /* nothing to contract */);
1827 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1828 * Result is stored in r (r can equal one of the inputs). */
1829 int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
1830 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1831 const BIGNUM *scalars[], BN_CTX *ctx)
1836 BN_CTX *new_ctx = NULL;
1837 BIGNUM *x, *y, *z, *tmp_scalar;
1838 felem_bytearray g_secret;
1839 felem_bytearray *secrets = NULL;
1840 smallfelem (*pre_comp)[17][3] = NULL;
1841 smallfelem *tmp_smallfelems = NULL;
1842 felem_bytearray tmp;
1843 unsigned i, num_bytes;
1844 int have_pre_comp = 0;
1845 size_t num_points = num;
1846 smallfelem x_in, y_in, z_in;
1847 felem x_out, y_out, z_out;
1848 NISTP256_PRE_COMP *pre = NULL;
1849 const smallfelem (*g_pre_comp)[16][3] = NULL;
1850 EC_POINT *generator = NULL;
1851 const EC_POINT *p = NULL;
1852 const BIGNUM *p_scalar = NULL;
1855 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1857 if (((x = BN_CTX_get(ctx)) == NULL) ||
1858 ((y = BN_CTX_get(ctx)) == NULL) ||
1859 ((z = BN_CTX_get(ctx)) == NULL) ||
1860 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1865 pre = EC_EX_DATA_get_data(group->extra_data,
1866 nistp256_pre_comp_dup, nistp256_pre_comp_free,
1867 nistp256_pre_comp_clear_free);
1869 /* we have precomputation, try to use it */
1870 g_pre_comp = (const smallfelem (*)[16][3]) pre->g_pre_comp;
1872 /* try to use the standard precomputation */
1873 g_pre_comp = &gmul[0];
1874 generator = EC_POINT_new(group);
1875 if (generator == NULL)
1877 /* get the generator from precomputation */
1878 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
1879 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
1880 !smallfelem_to_BN(z, g_pre_comp[0][1][2]))
1882 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1885 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1886 generator, x, y, z, ctx))
1888 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1889 /* precomputation matches generator */
1892 /* we don't have valid precomputation:
1893 * treat the generator as a random point */
1898 if (num_points >= 3)
1900 /* unless we precompute multiples for just one or two points,
1901 * converting those into affine form is time well spent */
1904 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1905 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(smallfelem));
1907 tmp_smallfelems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(smallfelem));
1908 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_smallfelems == NULL)))
1910 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1914 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1915 * i.e., they contribute nothing to the linear combination */
1916 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1917 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(smallfelem));
1918 for (i = 0; i < num_points; ++i)
1921 /* we didn't have a valid precomputation, so we pick
1924 p = EC_GROUP_get0_generator(group);
1928 /* the i^th point */
1931 p_scalar = scalars[i];
1933 if ((p_scalar != NULL) && (p != NULL))
1935 /* reduce scalar to 0 <= scalar < 2^256 */
1936 if ((BN_num_bits(p_scalar) > 256) || (BN_is_negative(p_scalar)))
1938 /* this is an unusual input, and we don't guarantee
1939 * constant-timeness */
1940 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1942 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1945 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1948 num_bytes = BN_bn2bin(p_scalar, tmp);
1949 flip_endian(secrets[i], tmp, num_bytes);
1950 /* precompute multiples */
1951 if ((!BN_to_felem(x_out, &p->X)) ||
1952 (!BN_to_felem(y_out, &p->Y)) ||
1953 (!BN_to_felem(z_out, &p->Z))) goto err;
1954 felem_shrink(pre_comp[i][1][0], x_out);
1955 felem_shrink(pre_comp[i][1][1], y_out);
1956 felem_shrink(pre_comp[i][1][2], z_out);
1957 for (j = 2; j <= 16; ++j)
1962 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1963 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1964 pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1969 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1970 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1976 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
1979 /* the scalar for the generator */
1980 if ((scalar != NULL) && (have_pre_comp))
1982 memset(g_secret, 0, sizeof(g_secret));
1983 /* reduce scalar to 0 <= scalar < 2^256 */
1984 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar)))
1986 /* this is an unusual input, and we don't guarantee
1987 * constant-timeness */
1988 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1990 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1993 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1996 num_bytes = BN_bn2bin(scalar, tmp);
1997 flip_endian(g_secret, tmp, num_bytes);
1998 /* do the multiplication with generator precomputation*/
1999 batch_mul(x_out, y_out, z_out,
2000 (const felem_bytearray (*)) secrets, num_points,
2002 mixed, (const smallfelem (*)[17][3]) pre_comp,
2006 /* do the multiplication without generator precomputation */
2007 batch_mul(x_out, y_out, z_out,
2008 (const felem_bytearray (*)) secrets, num_points,
2009 NULL, mixed, (const smallfelem (*)[17][3]) pre_comp, NULL);
2010 /* reduce the output to its unique minimal representation */
2011 felem_contract(x_in, x_out);
2012 felem_contract(y_in, y_out);
2013 felem_contract(z_in, z_out);
2014 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2015 (!smallfelem_to_BN(z, z_in)))
2017 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2020 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2024 if (generator != NULL)
2025 EC_POINT_free(generator);
2026 if (new_ctx != NULL)
2027 BN_CTX_free(new_ctx);
2028 if (secrets != NULL)
2029 OPENSSL_free(secrets);
2030 if (pre_comp != NULL)
2031 OPENSSL_free(pre_comp);
2032 if (tmp_smallfelems != NULL)
2033 OPENSSL_free(tmp_smallfelems);
2037 int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2040 NISTP256_PRE_COMP *pre = NULL;
2042 BN_CTX *new_ctx = NULL;
2044 EC_POINT *generator = NULL;
2045 smallfelem tmp_smallfelems[32];
2046 felem x_tmp, y_tmp, z_tmp;
2048 /* throw away old precomputation */
2049 EC_EX_DATA_free_data(&group->extra_data, nistp256_pre_comp_dup,
2050 nistp256_pre_comp_free, nistp256_pre_comp_clear_free);
2052 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
2054 if (((x = BN_CTX_get(ctx)) == NULL) ||
2055 ((y = BN_CTX_get(ctx)) == NULL))
2057 /* get the generator */
2058 if (group->generator == NULL) goto err;
2059 generator = EC_POINT_new(group);
2060 if (generator == NULL)
2062 BN_bin2bn(nistp256_curve_params[3], sizeof (felem_bytearray), x);
2063 BN_bin2bn(nistp256_curve_params[4], sizeof (felem_bytearray), y);
2064 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2066 if ((pre = nistp256_pre_comp_new()) == NULL)
2068 /* if the generator is the standard one, use built-in precomputation */
2069 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2071 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2075 if ((!BN_to_felem(x_tmp, &group->generator->X)) ||
2076 (!BN_to_felem(y_tmp, &group->generator->Y)) ||
2077 (!BN_to_felem(z_tmp, &group->generator->Z)))
2079 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2080 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2081 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2082 /* compute 2^64*G, 2^128*G, 2^192*G for the first table,
2083 * 2^32*G, 2^96*G, 2^160*G, 2^224*G for the second one
2085 for (i = 1; i <= 8; i <<= 1)
2088 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2089 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
2090 for (j = 0; j < 31; ++j)
2093 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2094 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2099 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2100 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2101 for (j = 0; j < 31; ++j)
2104 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2105 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
2108 for (i = 0; i < 2; i++)
2110 /* g_pre_comp[i][0] is the point at infinity */
2111 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2112 /* the remaining multiples */
2113 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2115 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], pre->g_pre_comp[i][6][2],
2116 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2117 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2118 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2120 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], pre->g_pre_comp[i][10][2],
2121 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2122 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2123 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2125 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2126 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2127 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2]);
2128 /* 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G */
2130 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], pre->g_pre_comp[i][14][2],
2131 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2132 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2133 for (j = 1; j < 8; ++j)
2135 /* odd multiples: add G resp. 2^32*G */
2137 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1], pre->g_pre_comp[i][2*j+1][2],
2138 pre->g_pre_comp[i][2*j][0], pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
2139 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], pre->g_pre_comp[i][1][2]);
2142 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2144 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup,
2145 nistp256_pre_comp_free, nistp256_pre_comp_clear_free))
2151 if (generator != NULL)
2152 EC_POINT_free(generator);
2153 if (new_ctx != NULL)
2154 BN_CTX_free(new_ctx);
2156 nistp256_pre_comp_free(pre);
2160 int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2162 if (EC_EX_DATA_get_data(group->extra_data, nistp256_pre_comp_dup,
2163 nistp256_pre_comp_free, nistp256_pre_comp_clear_free)
2170 static void *dummy=&dummy;