1 /* crypto/ec/ecp_nistp256.c */
3 * Written by Adam Langley (Google) for the OpenSSL project
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
29 #include <openssl/opensslconf.h>
30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
34 #include <openssl/err.h>
37 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
38 /* even with gcc, the typedef won't work for 32-bit platforms */
39 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
40 typedef __int128_t int128_t;
42 #error "Need GCC 3.1 or later to define type uint128_t"
50 /* The underlying field.
52 * P256 operates over GF(2^256-2^224+2^192+2^96-1). We can serialise an element
53 * of this field into 32 bytes. We call this an felem_bytearray. */
55 typedef u8 felem_bytearray[32];
57 /* These are the parameters of P256, taken from FIPS 186-3, page 86. These
58 * values are big-endian. */
59 static const felem_bytearray nistp256_curve_params[5] = {
60 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
61 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
62 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
63 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
64 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
65 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
66 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
68 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
69 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
70 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
71 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
72 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
73 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
74 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
75 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
76 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
77 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
78 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
79 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
83 * The representation of field elements.
84 * ------------------------------------
86 * We represent field elements with either four 128-bit values, eight 128-bit
87 * values, or four 64-bit values. The field element represented is:
88 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
90 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
92 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
93 * apart, but are 128-bits wide, the most significant bits of each limb overlap
94 * with the least significant bits of the next.
96 * A field element with four limbs is an 'felem'. One with eight limbs is a
99 * A field element with four, 64-bit values is called a 'smallfelem'. Small
100 * values are used as intermediate values before multiplication.
105 typedef uint128_t limb;
106 typedef limb felem[NLIMBS];
107 typedef limb longfelem[NLIMBS * 2];
108 typedef u64 smallfelem[NLIMBS];
110 /* This is the value of the prime as four 64-bit words, little-endian. */
111 static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
112 static const u64 bottom63bits = 0x7ffffffffffffffful;
114 /* bin32_to_felem takes a little-endian byte array and converts it into felem
115 * form. This assumes that the CPU is little-endian. */
116 static void bin32_to_felem(felem out, const u8 in[32])
118 out[0] = *((u64*) &in[0]);
119 out[1] = *((u64*) &in[8]);
120 out[2] = *((u64*) &in[16]);
121 out[3] = *((u64*) &in[24]);
124 /* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian,
125 * 32 byte array. This assumes that the CPU is little-endian. */
126 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
128 *((u64*) &out[0]) = in[0];
129 *((u64*) &out[8]) = in[1];
130 *((u64*) &out[16]) = in[2];
131 *((u64*) &out[24]) = in[3];
134 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
135 static void flip_endian(u8 *out, const u8 *in, unsigned len)
138 for (i = 0; i < len; ++i)
139 out[i] = in[len-1-i];
142 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
143 static int BN_to_felem(felem out, const BIGNUM *bn)
145 felem_bytearray b_in;
146 felem_bytearray b_out;
149 /* BN_bn2bin eats leading zeroes */
150 memset(b_out, 0, sizeof b_out);
151 num_bytes = BN_num_bytes(bn);
152 if (num_bytes > sizeof b_out)
154 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
157 if (BN_is_negative(bn))
159 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
162 num_bytes = BN_bn2bin(bn, b_in);
163 flip_endian(b_out, b_in, num_bytes);
164 bin32_to_felem(out, b_out);
168 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
169 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
171 felem_bytearray b_in, b_out;
172 smallfelem_to_bin32(b_in, in);
173 flip_endian(b_out, b_in, sizeof b_out);
174 return BN_bin2bn(b_out, sizeof b_out, out);
179 * ---------------- */
181 static void smallfelem_one(smallfelem out)
189 static void smallfelem_assign(smallfelem out, const smallfelem in)
197 static void felem_assign(felem out, const felem in)
205 /* felem_sum sets out = out + in. */
206 static void felem_sum(felem out, const felem in)
214 /* felem_small_sum sets out = out + in. */
215 static void felem_small_sum(felem out, const smallfelem in)
223 /* felem_scalar sets out = out * scalar */
224 static void felem_scalar(felem out, const u64 scalar)
232 /* longfelem_scalar sets out = out * scalar */
233 static void longfelem_scalar(longfelem out, const u64 scalar)
245 #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
246 #define two105 (((limb)1) << 105)
247 #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
249 /* zero105 is 0 mod p */
250 static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
253 * smallfelem_neg sets |out| to |-small|
255 * out[i] < out[i] + 2^105
257 static void smallfelem_neg(felem out, const smallfelem small)
259 /* In order to prevent underflow, we subtract from 0 mod p. */
260 out[0] = zero105[0] - small[0];
261 out[1] = zero105[1] - small[1];
262 out[2] = zero105[2] - small[2];
263 out[3] = zero105[3] - small[3];
267 * felem_diff subtracts |in| from |out|
271 * out[i] < out[i] + 2^105
273 static void felem_diff(felem out, const felem in)
275 /* In order to prevent underflow, we add 0 mod p before subtracting. */
276 out[0] += zero105[0];
277 out[1] += zero105[1];
278 out[2] += zero105[2];
279 out[3] += zero105[3];
287 #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
288 #define two107 (((limb)1) << 107)
289 #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
291 /* zero107 is 0 mod p */
292 static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 };
295 * An alternative felem_diff for larger inputs |in|
296 * felem_diff_zero107 subtracts |in| from |out|
300 * out[i] < out[i] + 2^107
302 static void felem_diff_zero107(felem out, const felem in)
304 /* In order to prevent underflow, we add 0 mod p before subtracting. */
305 out[0] += zero107[0];
306 out[1] += zero107[1];
307 out[2] += zero107[2];
308 out[3] += zero107[3];
317 * longfelem_diff subtracts |in| from |out|
321 * out[i] < out[i] + 2^70 + 2^40
323 static void longfelem_diff(longfelem out, const longfelem in)
325 static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6);
326 static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40);
327 static const limb two70 = (((limb)1) << 70);
328 static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6);
329 static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6);
331 /* add 0 mod p to avoid underflow */
335 out[3] += two70m40m38p6;
341 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
352 #define two64m0 (((limb)1) << 64) - 1
353 #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
354 #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
355 #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
357 /* zero110 is 0 mod p */
358 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
361 * felem_shrink converts an felem into a smallfelem. The result isn't quite
362 * minimal as the value may be greater than p.
369 static void felem_shrink(smallfelem out, const felem in)
374 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
377 tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64));
380 tmp[2] = zero110[2] + (u64) in[2];
381 tmp[0] = zero110[0] + in[0];
382 tmp[1] = zero110[1] + in[1];
383 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
385 /* We perform two partial reductions where we eliminate the
386 * high-word of tmp[3]. We don't update the other words till the end.
388 a = tmp[3] >> 64; /* a < 2^46 */
389 tmp[3] = (u64) tmp[3];
391 tmp[3] += ((limb)a) << 32;
395 a = tmp[3] >> 64; /* a < 2^15 */
396 b += a; /* b < 2^46 + 2^15 < 2^47 */
397 tmp[3] = (u64) tmp[3];
399 tmp[3] += ((limb)a) << 32;
400 /* tmp[3] < 2^64 + 2^47 */
402 /* This adjusts the other two words to complete the two partial
405 tmp[1] -= (((limb)b) << 32);
407 /* In order to make space in tmp[3] for the carry from 2 -> 3, we
408 * conditionally subtract kPrime if tmp[3] is large enough. */
410 /* As tmp[3] < 2^65, high is either 1 or 0 */
415 * all ones if the high word of tmp[3] is 1
416 * all zeros if the high word of tmp[3] if 0 */
421 * all ones if the MSB of low is 1
422 * all zeros if the MSB of low if 0 */
425 /* if low was greater than kPrime3Test then the MSB is zero */
430 * all ones if low was > kPrime3Test
431 * all zeros if low was <= kPrime3Test */
432 mask = (mask & low) | high;
433 tmp[0] -= mask & kPrime[0];
434 tmp[1] -= mask & kPrime[1];
435 /* kPrime[2] is zero, so omitted */
436 tmp[3] -= mask & kPrime[3];
437 /* tmp[3] < 2**64 - 2**32 + 1 */
439 tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0];
440 tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1];
441 tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2];
450 /* smallfelem_expand converts a smallfelem to an felem */
451 static void smallfelem_expand(felem out, const smallfelem in)
460 * smallfelem_square sets |out| = |small|^2
464 * out[i] < 7 * 2^64 < 2^67
466 static void smallfelem_square(longfelem out, const smallfelem small)
471 a = ((uint128_t) small[0]) * small[0];
477 a = ((uint128_t) small[0]) * small[1];
484 a = ((uint128_t) small[0]) * small[2];
491 a = ((uint128_t) small[0]) * small[3];
497 a = ((uint128_t) small[1]) * small[2];
504 a = ((uint128_t) small[1]) * small[1];
510 a = ((uint128_t) small[1]) * small[3];
517 a = ((uint128_t) small[2]) * small[3];
525 a = ((uint128_t) small[2]) * small[2];
531 a = ((uint128_t) small[3]) * small[3];
539 * felem_square sets |out| = |in|^2
543 * out[i] < 7 * 2^64 < 2^67
545 static void felem_square(longfelem out, const felem in)
548 felem_shrink(small, in);
549 smallfelem_square(out, small);
553 * smallfelem_mul sets |out| = |small1| * |small2|
558 * out[i] < 7 * 2^64 < 2^67
560 static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2)
565 a = ((uint128_t) small1[0]) * small2[0];
572 a = ((uint128_t) small1[0]) * small2[1];
578 a = ((uint128_t) small1[1]) * small2[0];
585 a = ((uint128_t) small1[0]) * small2[2];
591 a = ((uint128_t) small1[1]) * small2[1];
597 a = ((uint128_t) small1[2]) * small2[0];
604 a = ((uint128_t) small1[0]) * small2[3];
610 a = ((uint128_t) small1[1]) * small2[2];
616 a = ((uint128_t) small1[2]) * small2[1];
622 a = ((uint128_t) small1[3]) * small2[0];
629 a = ((uint128_t) small1[1]) * small2[3];
635 a = ((uint128_t) small1[2]) * small2[2];
641 a = ((uint128_t) small1[3]) * small2[1];
648 a = ((uint128_t) small1[2]) * small2[3];
654 a = ((uint128_t) small1[3]) * small2[2];
661 a = ((uint128_t) small1[3]) * small2[3];
669 * felem_mul sets |out| = |in1| * |in2|
674 * out[i] < 7 * 2^64 < 2^67
676 static void felem_mul(longfelem out, const felem in1, const felem in2)
678 smallfelem small1, small2;
679 felem_shrink(small1, in1);
680 felem_shrink(small2, in2);
681 smallfelem_mul(out, small1, small2);
685 * felem_small_mul sets |out| = |small1| * |in2|
690 * out[i] < 7 * 2^64 < 2^67
692 static void felem_small_mul(longfelem out, const smallfelem small1, const felem in2)
695 felem_shrink(small2, in2);
696 smallfelem_mul(out, small1, small2);
699 #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
700 #define two100 (((limb)1) << 100)
701 #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
702 /* zero100 is 0 mod p */
703 static const felem zero100 = { two100m36m4, two100, two100m36p4, two100m36p4 };
706 * Internal function for the different flavours of felem_reduce.
707 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
709 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
710 * out[1] >= in[7] + 2^32*in[4]
711 * out[2] >= in[5] + 2^32*in[5]
712 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
714 * out[0] <= out[0] + in[4] + 2^32*in[5]
715 * out[1] <= out[1] + in[5] + 2^33*in[6]
716 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
717 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
719 static void felem_reduce_(felem out, const longfelem in)
722 /* combine common terms from below */
723 c = in[4] + (in[5] << 32);
731 /* the remaining terms */
732 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
733 out[1] -= (in[4] << 32);
734 out[3] += (in[4] << 32);
736 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
737 out[2] -= (in[5] << 32);
739 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
741 out[0] -= (in[6] << 32);
742 out[1] += (in[6] << 33);
743 out[2] += (in[6] * 2);
744 out[3] -= (in[6] << 32);
746 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
748 out[0] -= (in[7] << 32);
749 out[2] += (in[7] << 33);
750 out[3] += (in[7] * 3);
754 * felem_reduce converts a longfelem into an felem.
755 * To be called directly after felem_square or felem_mul.
757 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
758 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
762 static void felem_reduce(felem out, const longfelem in)
764 out[0] = zero100[0] + in[0];
765 out[1] = zero100[1] + in[1];
766 out[2] = zero100[2] + in[2];
767 out[3] = zero100[3] + in[3];
769 felem_reduce_(out, in);
772 * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
773 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
774 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
775 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
777 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
778 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
779 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
780 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
785 * felem_reduce_zero105 converts a larger longfelem into an felem.
791 static void felem_reduce_zero105(felem out, const longfelem in)
793 out[0] = zero105[0] + in[0];
794 out[1] = zero105[1] + in[1];
795 out[2] = zero105[2] + in[2];
796 out[3] = zero105[3] + in[3];
798 felem_reduce_(out, in);
801 * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
802 * out[1] > 2^105 - 2^71 - 2^103 > 0
803 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
804 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
806 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
807 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
808 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
809 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
813 /* subtract_u64 sets *result = *result - v and *carry to one if the subtraction
815 static void subtract_u64(u64* result, u64* carry, u64 v)
817 uint128_t r = *result;
819 *carry = (r >> 64) & 1;
823 /* felem_contract converts |in| to its unique, minimal representation.
827 static void felem_contract(smallfelem out, const felem in)
830 u64 all_equal_so_far = 0, result = 0, carry;
832 felem_shrink(out, in);
833 /* small is minimal except that the value might be > p */
836 /* We are doing a constant time test if out >= kPrime. We need to
837 * compare each u64, from most-significant to least significant. For
838 * each one, if all words so far have been equal (m is all ones) then a
839 * non-equal result is the answer. Otherwise we continue. */
840 for (i = 3; i < 4; i--)
843 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
844 /* if out[i] > kPrime[i] then a will underflow and the high
845 * 64-bits will all be set. */
846 result |= all_equal_so_far & ((u64) (a >> 64));
848 /* if kPrime[i] == out[i] then |equal| will be all zeros and
849 * the decrement will make it all ones. */
850 equal = kPrime[i] ^ out[i];
852 equal &= equal << 32;
853 equal &= equal << 16;
858 equal = ((s64) equal) >> 63;
860 all_equal_so_far &= equal;
863 /* if all_equal_so_far is still all ones then the two values are equal
864 * and so out >= kPrime is true. */
865 result |= all_equal_so_far;
867 /* if out >= kPrime then we subtract kPrime. */
868 subtract_u64(&out[0], &carry, result & kPrime[0]);
869 subtract_u64(&out[1], &carry, carry);
870 subtract_u64(&out[2], &carry, carry);
871 subtract_u64(&out[3], &carry, carry);
873 subtract_u64(&out[1], &carry, result & kPrime[1]);
874 subtract_u64(&out[2], &carry, carry);
875 subtract_u64(&out[3], &carry, carry);
877 subtract_u64(&out[2], &carry, result & kPrime[2]);
878 subtract_u64(&out[3], &carry, carry);
880 subtract_u64(&out[3], &carry, result & kPrime[3]);
883 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
888 smallfelem_square(longtmp, in);
889 felem_reduce(tmp, longtmp);
890 felem_contract(out, tmp);
893 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1, const smallfelem in2)
898 smallfelem_mul(longtmp, in1, in2);
899 felem_reduce(tmp, longtmp);
900 felem_contract(out, tmp);
904 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
909 static limb smallfelem_is_zero(const smallfelem small)
914 u64 is_zero = small[0] | small[1] | small[2] | small[3];
916 is_zero &= is_zero << 32;
917 is_zero &= is_zero << 16;
918 is_zero &= is_zero << 8;
919 is_zero &= is_zero << 4;
920 is_zero &= is_zero << 2;
921 is_zero &= is_zero << 1;
922 is_zero = ((s64) is_zero) >> 63;
924 is_p = (small[0] ^ kPrime[0]) |
925 (small[1] ^ kPrime[1]) |
926 (small[2] ^ kPrime[2]) |
927 (small[3] ^ kPrime[3]);
935 is_p = ((s64) is_p) >> 63;
940 result |= ((limb) is_zero) << 64;
944 static int smallfelem_is_zero_int(const smallfelem small)
946 return (int) (smallfelem_is_zero(small) & ((limb)1));
950 * felem_inv calculates |out| = |in|^{-1}
952 * Based on Fermat's Little Theorem:
954 * a^{p-1} = 1 (mod p)
955 * a^{p-2} = a^{-1} (mod p)
957 static void felem_inv(felem out, const felem in)
960 /* each e_I will hold |in|^{2^I - 1} */
961 felem e2, e4, e8, e16, e32, e64;
965 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
966 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
967 felem_assign(e2, ftmp);
968 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
969 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
970 felem_mul(tmp, ftmp, e2); felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
971 felem_assign(e4, ftmp);
972 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
973 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
974 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
975 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
976 felem_mul(tmp, ftmp, e4); felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
977 felem_assign(e8, ftmp);
978 for (i = 0; i < 8; i++) {
979 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
981 felem_mul(tmp, ftmp, e8); felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
982 felem_assign(e16, ftmp);
983 for (i = 0; i < 16; i++) {
984 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
986 felem_mul(tmp, ftmp, e16); felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
987 felem_assign(e32, ftmp);
988 for (i = 0; i < 32; i++) {
989 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
991 felem_assign(e64, ftmp);
992 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
993 for (i = 0; i < 192; i++) {
994 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
995 } /* 2^256 - 2^224 + 2^192 */
997 felem_mul(tmp, e64, e32); felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
998 for (i = 0; i < 16; i++) {
999 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
1001 felem_mul(tmp, ftmp2, e16); felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
1002 for (i = 0; i < 8; i++) {
1003 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
1005 felem_mul(tmp, ftmp2, e8); felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
1006 for (i = 0; i < 4; i++) {
1007 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
1009 felem_mul(tmp, ftmp2, e4); felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
1010 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
1011 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
1012 felem_mul(tmp, ftmp2, e2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
1013 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
1014 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1015 felem_mul(tmp, ftmp2, in); felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1017 felem_mul(tmp, ftmp2, ftmp); felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1020 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1024 smallfelem_expand(tmp, in);
1025 felem_inv(tmp, tmp);
1026 felem_contract(out, tmp);
1033 * Building on top of the field operations we have the operations on the
1034 * elliptic curve group itself. Points on the curve are represented in Jacobian
1038 * point_double calculates 2*(x_in, y_in, z_in)
1040 * The method is taken from:
1041 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1043 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1044 * while x_out == y_in is not (maybe this works, but it's not tested). */
1046 point_double(felem x_out, felem y_out, felem z_out,
1047 const felem x_in, const felem y_in, const felem z_in)
1049 longfelem tmp, tmp2;
1050 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1051 smallfelem small1, small2;
1053 felem_assign(ftmp, x_in);
1054 /* ftmp[i] < 2^106 */
1055 felem_assign(ftmp2, x_in);
1056 /* ftmp2[i] < 2^106 */
1059 felem_square(tmp, z_in);
1060 felem_reduce(delta, tmp);
1061 /* delta[i] < 2^101 */
1064 felem_square(tmp, y_in);
1065 felem_reduce(gamma, tmp);
1066 /* gamma[i] < 2^101 */
1067 felem_shrink(small1, gamma);
1069 /* beta = x*gamma */
1070 felem_small_mul(tmp, small1, x_in);
1071 felem_reduce(beta, tmp);
1072 /* beta[i] < 2^101 */
1074 /* alpha = 3*(x-delta)*(x+delta) */
1075 felem_diff(ftmp, delta);
1076 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1077 felem_sum(ftmp2, delta);
1078 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1079 felem_scalar(ftmp2, 3);
1080 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1081 felem_mul(tmp, ftmp, ftmp2);
1082 felem_reduce(alpha, tmp);
1083 /* alpha[i] < 2^101 */
1084 felem_shrink(small2, alpha);
1086 /* x' = alpha^2 - 8*beta */
1087 smallfelem_square(tmp, small2);
1088 felem_reduce(x_out, tmp);
1089 felem_assign(ftmp, beta);
1090 felem_scalar(ftmp, 8);
1091 /* ftmp[i] < 8 * 2^101 = 2^104 */
1092 felem_diff(x_out, ftmp);
1093 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1095 /* z' = (y + z)^2 - gamma - delta */
1096 felem_sum(delta, gamma);
1097 /* delta[i] < 2^101 + 2^101 = 2^102 */
1098 felem_assign(ftmp, y_in);
1099 felem_sum(ftmp, z_in);
1100 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1101 felem_square(tmp, ftmp);
1102 felem_reduce(z_out, tmp);
1103 felem_diff(z_out, delta);
1104 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1106 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1107 felem_scalar(beta, 4);
1108 /* beta[i] < 4 * 2^101 = 2^103 */
1109 felem_diff_zero107(beta, x_out);
1110 /* beta[i] < 2^107 + 2^103 < 2^108 */
1111 felem_small_mul(tmp, small2, beta);
1112 /* tmp[i] < 7 * 2^64 < 2^67 */
1113 smallfelem_square(tmp2, small1);
1114 /* tmp2[i] < 7 * 2^64 */
1115 longfelem_scalar(tmp2, 8);
1116 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1117 longfelem_diff(tmp, tmp2);
1118 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1119 felem_reduce_zero105(y_out, tmp);
1120 /* y_out[i] < 2^106 */
1123 /* point_double_small is the same as point_double, except that it operates on
1126 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1127 const smallfelem x_in, const smallfelem y_in, const smallfelem z_in)
1129 felem felem_x_out, felem_y_out, felem_z_out;
1130 felem felem_x_in, felem_y_in, felem_z_in;
1132 smallfelem_expand(felem_x_in, x_in);
1133 smallfelem_expand(felem_y_in, y_in);
1134 smallfelem_expand(felem_z_in, z_in);
1135 point_double(felem_x_out, felem_y_out, felem_z_out,
1136 felem_x_in, felem_y_in, felem_z_in);
1137 felem_shrink(x_out, felem_x_out);
1138 felem_shrink(y_out, felem_y_out);
1139 felem_shrink(z_out, felem_z_out);
1142 /* copy_conditional copies in to out iff mask is all ones. */
1144 copy_conditional(felem out, const felem in, limb mask)
1147 for (i = 0; i < NLIMBS; ++i)
1149 const limb tmp = mask & (in[i] ^ out[i]);
1154 /* copy_small_conditional copies in to out iff mask is all ones. */
1156 copy_small_conditional(felem out, const smallfelem in, limb mask)
1159 const u64 mask64 = mask;
1160 for (i = 0; i < NLIMBS; ++i)
1162 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1167 * point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1169 * The method is taken from:
1170 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1171 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1173 * This function includes a branch for checking whether the two input points
1174 * are equal, (while not equal to the point at infinity). This case never
1175 * happens during single point multiplication, so there is no timing leak for
1176 * ECDH or ECDSA signing. */
1177 static void point_add(felem x3, felem y3, felem z3,
1178 const felem x1, const felem y1, const felem z1,
1179 const int mixed, const smallfelem x2, const smallfelem y2, const smallfelem z2)
1181 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1182 longfelem tmp, tmp2;
1183 smallfelem small1, small2, small3, small4, small5;
1184 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1186 felem_shrink(small3, z1);
1188 z1_is_zero = smallfelem_is_zero(small3);
1189 z2_is_zero = smallfelem_is_zero(z2);
1191 /* ftmp = z1z1 = z1**2 */
1192 smallfelem_square(tmp, small3);
1193 felem_reduce(ftmp, tmp);
1194 /* ftmp[i] < 2^101 */
1195 felem_shrink(small1, ftmp);
1199 /* ftmp2 = z2z2 = z2**2 */
1200 smallfelem_square(tmp, z2);
1201 felem_reduce(ftmp2, tmp);
1202 /* ftmp2[i] < 2^101 */
1203 felem_shrink(small2, ftmp2);
1205 felem_shrink(small5, x1);
1207 /* u1 = ftmp3 = x1*z2z2 */
1208 smallfelem_mul(tmp, small5, small2);
1209 felem_reduce(ftmp3, tmp);
1210 /* ftmp3[i] < 2^101 */
1212 /* ftmp5 = z1 + z2 */
1213 felem_assign(ftmp5, z1);
1214 felem_small_sum(ftmp5, z2);
1215 /* ftmp5[i] < 2^107 */
1217 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1218 felem_square(tmp, ftmp5);
1219 felem_reduce(ftmp5, tmp);
1220 /* ftmp2 = z2z2 + z1z1 */
1221 felem_sum(ftmp2, ftmp);
1222 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1223 felem_diff(ftmp5, ftmp2);
1224 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1226 /* ftmp2 = z2 * z2z2 */
1227 smallfelem_mul(tmp, small2, z2);
1228 felem_reduce(ftmp2, tmp);
1230 /* s1 = ftmp2 = y1 * z2**3 */
1231 felem_mul(tmp, y1, ftmp2);
1232 felem_reduce(ftmp6, tmp);
1233 /* ftmp6[i] < 2^101 */
1237 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1239 /* u1 = ftmp3 = x1*z2z2 */
1240 felem_assign(ftmp3, x1);
1241 /* ftmp3[i] < 2^106 */
1244 felem_assign(ftmp5, z1);
1245 felem_scalar(ftmp5, 2);
1246 /* ftmp5[i] < 2*2^106 = 2^107 */
1248 /* s1 = ftmp2 = y1 * z2**3 */
1249 felem_assign(ftmp6, y1);
1250 /* ftmp6[i] < 2^106 */
1254 smallfelem_mul(tmp, x2, small1);
1255 felem_reduce(ftmp4, tmp);
1257 /* h = ftmp4 = u2 - u1 */
1258 felem_diff_zero107(ftmp4, ftmp3);
1259 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1260 felem_shrink(small4, ftmp4);
1262 x_equal = smallfelem_is_zero(small4);
1264 /* z_out = ftmp5 * h */
1265 felem_small_mul(tmp, small4, ftmp5);
1266 felem_reduce(z_out, tmp);
1267 /* z_out[i] < 2^101 */
1269 /* ftmp = z1 * z1z1 */
1270 smallfelem_mul(tmp, small1, small3);
1271 felem_reduce(ftmp, tmp);
1273 /* s2 = tmp = y2 * z1**3 */
1274 felem_small_mul(tmp, y2, ftmp);
1275 felem_reduce(ftmp5, tmp);
1277 /* r = ftmp5 = (s2 - s1)*2 */
1278 felem_diff_zero107(ftmp5, ftmp6);
1279 /* ftmp5[i] < 2^107 + 2^107 = 2^108*/
1280 felem_scalar(ftmp5, 2);
1281 /* ftmp5[i] < 2^109 */
1282 felem_shrink(small1, ftmp5);
1283 y_equal = smallfelem_is_zero(small1);
1285 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1287 point_double(x3, y3, z3, x1, y1, z1);
1291 /* I = ftmp = (2h)**2 */
1292 felem_assign(ftmp, ftmp4);
1293 felem_scalar(ftmp, 2);
1294 /* ftmp[i] < 2*2^108 = 2^109 */
1295 felem_square(tmp, ftmp);
1296 felem_reduce(ftmp, tmp);
1298 /* J = ftmp2 = h * I */
1299 felem_mul(tmp, ftmp4, ftmp);
1300 felem_reduce(ftmp2, tmp);
1302 /* V = ftmp4 = U1 * I */
1303 felem_mul(tmp, ftmp3, ftmp);
1304 felem_reduce(ftmp4, tmp);
1306 /* x_out = r**2 - J - 2V */
1307 smallfelem_square(tmp, small1);
1308 felem_reduce(x_out, tmp);
1309 felem_assign(ftmp3, ftmp4);
1310 felem_scalar(ftmp4, 2);
1311 felem_sum(ftmp4, ftmp2);
1312 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1313 felem_diff(x_out, ftmp4);
1314 /* x_out[i] < 2^105 + 2^101 */
1316 /* y_out = r(V-x_out) - 2 * s1 * J */
1317 felem_diff_zero107(ftmp3, x_out);
1318 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1319 felem_small_mul(tmp, small1, ftmp3);
1320 felem_mul(tmp2, ftmp6, ftmp2);
1321 longfelem_scalar(tmp2, 2);
1322 /* tmp2[i] < 2*2^67 = 2^68 */
1323 longfelem_diff(tmp, tmp2);
1324 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1325 felem_reduce_zero105(y_out, tmp);
1326 /* y_out[i] < 2^106 */
1328 copy_small_conditional(x_out, x2, z1_is_zero);
1329 copy_conditional(x_out, x1, z2_is_zero);
1330 copy_small_conditional(y_out, y2, z1_is_zero);
1331 copy_conditional(y_out, y1, z2_is_zero);
1332 copy_small_conditional(z_out, z2, z1_is_zero);
1333 copy_conditional(z_out, z1, z2_is_zero);
1334 felem_assign(x3, x_out);
1335 felem_assign(y3, y_out);
1336 felem_assign(z3, z_out);
1339 /* point_add_small is the same as point_add, except that it operates on
1341 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1342 smallfelem x1, smallfelem y1, smallfelem z1,
1343 smallfelem x2, smallfelem y2, smallfelem z2)
1345 felem felem_x3, felem_y3, felem_z3;
1346 felem felem_x1, felem_y1, felem_z1;
1347 smallfelem_expand(felem_x1, x1);
1348 smallfelem_expand(felem_y1, y1);
1349 smallfelem_expand(felem_z1, z1);
1350 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0, x2, y2, z2);
1351 felem_shrink(x3, felem_x3);
1352 felem_shrink(y3, felem_y3);
1353 felem_shrink(z3, felem_z3);
1357 * Base point pre computation
1358 * --------------------------
1360 * Two different sorts of precomputed tables are used in the following code.
1361 * Each contain various points on the curve, where each point is three field
1362 * elements (x, y, z).
1364 * For the base point table, z is usually 1 (0 for the point at infinity).
1365 * This table has 2 * 16 elements, starting with the following:
1366 * index | bits | point
1367 * ------+---------+------------------------------
1370 * 2 | 0 0 1 0 | 2^64G
1371 * 3 | 0 0 1 1 | (2^64 + 1)G
1372 * 4 | 0 1 0 0 | 2^128G
1373 * 5 | 0 1 0 1 | (2^128 + 1)G
1374 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1375 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1376 * 8 | 1 0 0 0 | 2^192G
1377 * 9 | 1 0 0 1 | (2^192 + 1)G
1378 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1379 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1380 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1381 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1382 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1383 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1384 * followed by a copy of this with each element multiplied by 2^32.
1386 * The reason for this is so that we can clock bits into four different
1387 * locations when doing simple scalar multiplies against the base point,
1388 * and then another four locations using the second 16 elements.
1390 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1392 /* gmul is the table of precomputed base points */
1393 static const smallfelem gmul[2][16][3] =
1397 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247},
1398 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b},
1400 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5},
1401 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d},
1403 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f},
1404 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644},
1406 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67},
1407 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee},
1409 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff},
1410 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b},
1412 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8},
1413 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851},
1415 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea},
1416 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b},
1418 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276},
1419 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816},
1421 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad},
1422 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663},
1424 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d},
1425 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321},
1427 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287},
1428 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6},
1430 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466},
1431 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20},
1433 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9},
1434 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61},
1436 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a},
1437 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc},
1439 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c},
1440 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab},
1445 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89},
1446 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624},
1448 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6},
1449 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1},
1451 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a},
1452 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593},
1454 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617},
1455 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7},
1457 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276},
1458 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a},
1460 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908},
1461 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e},
1463 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7},
1464 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec},
1466 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee},
1467 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6},
1469 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109},
1470 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5},
1472 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba},
1473 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44},
1475 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b},
1476 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc},
1478 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107},
1479 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387},
1481 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503},
1482 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be},
1484 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9},
1485 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a},
1487 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6},
1488 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81},
1491 /* select_point selects the |idx|th point from a precomputation table and
1492 * copies it to out. */
1493 static void select_point(const u64 idx, unsigned int size, const smallfelem pre_comp[16][3], smallfelem out[3])
1496 u64 *outlimbs = &out[0][0];
1497 memset(outlimbs, 0, 3 * sizeof(smallfelem));
1499 for (i = 0; i < size; i++)
1501 const u64 *inlimbs = (u64*) &pre_comp[i][0][0];
1508 for (j = 0; j < NLIMBS * 3; j++)
1509 outlimbs[j] |= inlimbs[j] & mask;
1513 /* get_bit returns the |i|th bit in |in| */
1514 static char get_bit(const felem_bytearray in, int i)
1516 if ((i < 0) || (i >= 256))
1518 return (in[i >> 3] >> (i & 7)) & 1;
1521 /* Interleaved point multiplication using precomputed point multiples:
1522 * The small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[],
1523 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1524 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1525 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1526 static void batch_mul(felem x_out, felem y_out, felem z_out,
1527 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1528 const int mixed, const smallfelem pre_comp[][17][3], const smallfelem g_pre_comp[2][16][3])
1531 unsigned num, gen_mul = (g_scalar != NULL);
1537 /* set nq to the point at infinity */
1538 memset(nq, 0, 3 * sizeof(felem));
1540 /* Loop over all scalars msb-to-lsb, interleaving additions
1541 * of multiples of the generator (two in each of the last 32 rounds)
1542 * and additions of other points multiples (every 5th round).
1544 skip = 1; /* save two point operations in the first round */
1545 for (i = (num_points ? 255 : 31); i >= 0; --i)
1549 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1551 /* add multiples of the generator */
1552 if (gen_mul && (i <= 31))
1554 /* first, look 32 bits upwards */
1555 bits = get_bit(g_scalar, i + 224) << 3;
1556 bits |= get_bit(g_scalar, i + 160) << 2;
1557 bits |= get_bit(g_scalar, i + 96) << 1;
1558 bits |= get_bit(g_scalar, i + 32);
1559 /* select the point to add, in constant time */
1560 select_point(bits, 16, g_pre_comp[1], tmp);
1564 point_add(nq[0], nq[1], nq[2],
1565 nq[0], nq[1], nq[2],
1566 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1570 smallfelem_expand(nq[0], tmp[0]);
1571 smallfelem_expand(nq[1], tmp[1]);
1572 smallfelem_expand(nq[2], tmp[2]);
1576 /* second, look at the current position */
1577 bits = get_bit(g_scalar, i + 192) << 3;
1578 bits |= get_bit(g_scalar, i + 128) << 2;
1579 bits |= get_bit(g_scalar, i + 64) << 1;
1580 bits |= get_bit(g_scalar, i);
1581 /* select the point to add, in constant time */
1582 select_point(bits, 16, g_pre_comp[0], tmp);
1583 point_add(nq[0], nq[1], nq[2],
1584 nq[0], nq[1], nq[2],
1585 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1588 /* do other additions every 5 doublings */
1589 if (num_points && (i % 5 == 0))
1591 /* loop over all scalars */
1592 for (num = 0; num < num_points; ++num)
1594 bits = get_bit(scalars[num], i + 4) << 5;
1595 bits |= get_bit(scalars[num], i + 3) << 4;
1596 bits |= get_bit(scalars[num], i + 2) << 3;
1597 bits |= get_bit(scalars[num], i + 1) << 2;
1598 bits |= get_bit(scalars[num], i) << 1;
1599 bits |= get_bit(scalars[num], i - 1);
1600 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1602 /* select the point to add or subtract, in constant time */
1603 select_point(digit, 17, pre_comp[num], tmp);
1604 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative point */
1605 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1606 felem_contract(tmp[1], ftmp);
1610 point_add(nq[0], nq[1], nq[2],
1611 nq[0], nq[1], nq[2],
1612 mixed, tmp[0], tmp[1], tmp[2]);
1616 smallfelem_expand(nq[0], tmp[0]);
1617 smallfelem_expand(nq[1], tmp[1]);
1618 smallfelem_expand(nq[2], tmp[2]);
1624 felem_assign(x_out, nq[0]);
1625 felem_assign(y_out, nq[1]);
1626 felem_assign(z_out, nq[2]);
1629 /* Precomputation for the group generator. */
1631 smallfelem g_pre_comp[2][16][3];
1633 } NISTP256_PRE_COMP;
1635 const EC_METHOD *EC_GFp_nistp256_method(void)
1637 static const EC_METHOD ret = {
1638 EC_FLAGS_DEFAULT_OCT,
1639 NID_X9_62_prime_field,
1640 ec_GFp_nistp256_group_init,
1641 ec_GFp_simple_group_finish,
1642 ec_GFp_simple_group_clear_finish,
1643 ec_GFp_nist_group_copy,
1644 ec_GFp_nistp256_group_set_curve,
1645 ec_GFp_simple_group_get_curve,
1646 ec_GFp_simple_group_get_degree,
1647 ec_GFp_simple_group_check_discriminant,
1648 ec_GFp_simple_point_init,
1649 ec_GFp_simple_point_finish,
1650 ec_GFp_simple_point_clear_finish,
1651 ec_GFp_simple_point_copy,
1652 ec_GFp_simple_point_set_to_infinity,
1653 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1654 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1655 ec_GFp_simple_point_set_affine_coordinates,
1656 ec_GFp_nistp256_point_get_affine_coordinates,
1657 0 /* point_set_compressed_coordinates */,
1662 ec_GFp_simple_invert,
1663 ec_GFp_simple_is_at_infinity,
1664 ec_GFp_simple_is_on_curve,
1666 ec_GFp_simple_make_affine,
1667 ec_GFp_simple_points_make_affine,
1668 ec_GFp_nistp256_points_mul,
1669 ec_GFp_nistp256_precompute_mult,
1670 ec_GFp_nistp256_have_precompute_mult,
1671 ec_GFp_nist_field_mul,
1672 ec_GFp_nist_field_sqr,
1674 0 /* field_encode */,
1675 0 /* field_decode */,
1676 0 /* field_set_to_one */ };
1681 /******************************************************************************/
1682 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1685 static NISTP256_PRE_COMP *nistp256_pre_comp_new()
1687 NISTP256_PRE_COMP *ret = NULL;
1688 ret = (NISTP256_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1691 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1694 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1695 ret->references = 1;
1699 static void *nistp256_pre_comp_dup(void *src_)
1701 NISTP256_PRE_COMP *src = src_;
1703 /* no need to actually copy, these objects never change! */
1704 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1709 static void nistp256_pre_comp_free(void *pre_)
1712 NISTP256_PRE_COMP *pre = pre_;
1717 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1724 static void nistp256_pre_comp_clear_free(void *pre_)
1727 NISTP256_PRE_COMP *pre = pre_;
1732 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1736 OPENSSL_cleanse(pre, sizeof *pre);
1740 /******************************************************************************/
1741 /* OPENSSL EC_METHOD FUNCTIONS
1744 int ec_GFp_nistp256_group_init(EC_GROUP *group)
1747 ret = ec_GFp_simple_group_init(group);
1748 group->a_is_minus3 = 1;
1752 int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1753 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1756 BN_CTX *new_ctx = NULL;
1757 BIGNUM *curve_p, *curve_a, *curve_b;
1760 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1762 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1763 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1764 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1765 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1766 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1767 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1768 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1769 (BN_cmp(curve_b, b)))
1771 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1772 EC_R_WRONG_CURVE_PARAMETERS);
1775 group->field_mod_func = BN_nist_mod_256;
1776 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1779 if (new_ctx != NULL)
1780 BN_CTX_free(new_ctx);
1784 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1785 * (X', Y') = (X/Z^2, Y/Z^3) */
1786 int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1787 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1789 felem z1, z2, x_in, y_in;
1790 smallfelem x_out, y_out;
1793 if (EC_POINT_is_at_infinity(group, point))
1795 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1796 EC_R_POINT_AT_INFINITY);
1799 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1800 (!BN_to_felem(z1, &point->Z))) return 0;
1802 felem_square(tmp, z2); felem_reduce(z1, tmp);
1803 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1804 felem_contract(x_out, x_in);
1807 if (!smallfelem_to_BN(x, x_out)) {
1808 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1813 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1814 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1815 felem_contract(y_out, y_in);
1818 if (!smallfelem_to_BN(y, y_out))
1820 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1828 static void make_points_affine(size_t num, smallfelem points[/* num */][3], smallfelem tmp_smallfelems[/* num+1 */])
1830 /* Runs in constant time, unless an input is the point at infinity
1831 * (which normally shouldn't happen). */
1832 ec_GFp_nistp_points_make_affine_internal(
1837 (void (*)(void *)) smallfelem_one,
1838 (int (*)(const void *)) smallfelem_is_zero_int,
1839 (void (*)(void *, const void *)) smallfelem_assign,
1840 (void (*)(void *, const void *)) smallfelem_square_contract,
1841 (void (*)(void *, const void *, const void *)) smallfelem_mul_contract,
1842 (void (*)(void *, const void *)) smallfelem_inv_contract,
1843 (void (*)(void *, const void *)) smallfelem_assign /* nothing to contract */);
1846 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1847 * Result is stored in r (r can equal one of the inputs). */
1848 int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
1849 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1850 const BIGNUM *scalars[], BN_CTX *ctx)
1855 BN_CTX *new_ctx = NULL;
1856 BIGNUM *x, *y, *z, *tmp_scalar;
1857 felem_bytearray g_secret;
1858 felem_bytearray *secrets = NULL;
1859 smallfelem (*pre_comp)[17][3] = NULL;
1860 smallfelem *tmp_smallfelems = NULL;
1861 felem_bytearray tmp;
1862 unsigned i, num_bytes;
1863 int have_pre_comp = 0;
1864 size_t num_points = num;
1865 smallfelem x_in, y_in, z_in;
1866 felem x_out, y_out, z_out;
1867 NISTP256_PRE_COMP *pre = NULL;
1868 const smallfelem (*g_pre_comp)[16][3] = NULL;
1869 EC_POINT *generator = NULL;
1870 const EC_POINT *p = NULL;
1871 const BIGNUM *p_scalar = NULL;
1874 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1876 if (((x = BN_CTX_get(ctx)) == NULL) ||
1877 ((y = BN_CTX_get(ctx)) == NULL) ||
1878 ((z = BN_CTX_get(ctx)) == NULL) ||
1879 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1884 pre = EC_EX_DATA_get_data(group->extra_data,
1885 nistp256_pre_comp_dup, nistp256_pre_comp_free,
1886 nistp256_pre_comp_clear_free);
1888 /* we have precomputation, try to use it */
1889 g_pre_comp = (const smallfelem (*)[16][3]) pre->g_pre_comp;
1891 /* try to use the standard precomputation */
1892 g_pre_comp = &gmul[0];
1893 generator = EC_POINT_new(group);
1894 if (generator == NULL)
1896 /* get the generator from precomputation */
1897 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
1898 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
1899 !smallfelem_to_BN(z, g_pre_comp[0][1][2]))
1901 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1904 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1905 generator, x, y, z, ctx))
1907 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1908 /* precomputation matches generator */
1911 /* we don't have valid precomputation:
1912 * treat the generator as a random point */
1917 if (num_points >= 3)
1919 /* unless we precompute multiples for just one or two points,
1920 * converting those into affine form is time well spent */
1923 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1924 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(smallfelem));
1926 tmp_smallfelems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(smallfelem));
1927 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_smallfelems == NULL)))
1929 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1933 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1934 * i.e., they contribute nothing to the linear combination */
1935 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1936 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(smallfelem));
1937 for (i = 0; i < num_points; ++i)
1940 /* we didn't have a valid precomputation, so we pick
1943 p = EC_GROUP_get0_generator(group);
1947 /* the i^th point */
1950 p_scalar = scalars[i];
1952 if ((p_scalar != NULL) && (p != NULL))
1954 /* reduce scalar to 0 <= scalar < 2^256 */
1955 if ((BN_num_bits(p_scalar) > 256) || (BN_is_negative(p_scalar)))
1957 /* this is an unusual input, and we don't guarantee
1958 * constant-timeness */
1959 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1961 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1964 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1967 num_bytes = BN_bn2bin(p_scalar, tmp);
1968 flip_endian(secrets[i], tmp, num_bytes);
1969 /* precompute multiples */
1970 if ((!BN_to_felem(x_out, &p->X)) ||
1971 (!BN_to_felem(y_out, &p->Y)) ||
1972 (!BN_to_felem(z_out, &p->Z))) goto err;
1973 felem_shrink(pre_comp[i][1][0], x_out);
1974 felem_shrink(pre_comp[i][1][1], y_out);
1975 felem_shrink(pre_comp[i][1][2], z_out);
1976 for (j = 2; j <= 16; ++j)
1981 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1982 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1983 pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1988 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1989 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1995 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
1998 /* the scalar for the generator */
1999 if ((scalar != NULL) && (have_pre_comp))
2001 memset(g_secret, 0, sizeof(g_secret));
2002 /* reduce scalar to 0 <= scalar < 2^256 */
2003 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar)))
2005 /* this is an unusual input, and we don't guarantee
2006 * constant-timeness */
2007 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
2009 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2012 num_bytes = BN_bn2bin(tmp_scalar, tmp);
2015 num_bytes = BN_bn2bin(scalar, tmp);
2016 flip_endian(g_secret, tmp, num_bytes);
2017 /* do the multiplication with generator precomputation*/
2018 batch_mul(x_out, y_out, z_out,
2019 (const felem_bytearray (*)) secrets, num_points,
2021 mixed, (const smallfelem (*)[17][3]) pre_comp,
2025 /* do the multiplication without generator precomputation */
2026 batch_mul(x_out, y_out, z_out,
2027 (const felem_bytearray (*)) secrets, num_points,
2028 NULL, mixed, (const smallfelem (*)[17][3]) pre_comp, NULL);
2029 /* reduce the output to its unique minimal representation */
2030 felem_contract(x_in, x_out);
2031 felem_contract(y_in, y_out);
2032 felem_contract(z_in, z_out);
2033 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2034 (!smallfelem_to_BN(z, z_in)))
2036 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2039 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2043 if (generator != NULL)
2044 EC_POINT_free(generator);
2045 if (new_ctx != NULL)
2046 BN_CTX_free(new_ctx);
2047 if (secrets != NULL)
2048 OPENSSL_free(secrets);
2049 if (pre_comp != NULL)
2050 OPENSSL_free(pre_comp);
2051 if (tmp_smallfelems != NULL)
2052 OPENSSL_free(tmp_smallfelems);
2056 int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2059 NISTP256_PRE_COMP *pre = NULL;
2061 BN_CTX *new_ctx = NULL;
2063 EC_POINT *generator = NULL;
2064 smallfelem tmp_smallfelems[32];
2065 felem x_tmp, y_tmp, z_tmp;
2067 /* throw away old precomputation */
2068 EC_EX_DATA_free_data(&group->extra_data, nistp256_pre_comp_dup,
2069 nistp256_pre_comp_free, nistp256_pre_comp_clear_free);
2071 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
2073 if (((x = BN_CTX_get(ctx)) == NULL) ||
2074 ((y = BN_CTX_get(ctx)) == NULL))
2076 /* get the generator */
2077 if (group->generator == NULL) goto err;
2078 generator = EC_POINT_new(group);
2079 if (generator == NULL)
2081 BN_bin2bn(nistp256_curve_params[3], sizeof (felem_bytearray), x);
2082 BN_bin2bn(nistp256_curve_params[4], sizeof (felem_bytearray), y);
2083 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2085 if ((pre = nistp256_pre_comp_new()) == NULL)
2087 /* if the generator is the standard one, use built-in precomputation */
2088 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2090 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2094 if ((!BN_to_felem(x_tmp, &group->generator->X)) ||
2095 (!BN_to_felem(y_tmp, &group->generator->Y)) ||
2096 (!BN_to_felem(z_tmp, &group->generator->Z)))
2098 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2099 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2100 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2101 /* compute 2^64*G, 2^128*G, 2^192*G for the first table,
2102 * 2^32*G, 2^96*G, 2^160*G, 2^224*G for the second one
2104 for (i = 1; i <= 8; i <<= 1)
2107 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2108 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
2109 for (j = 0; j < 31; ++j)
2112 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2113 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2118 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2119 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2120 for (j = 0; j < 31; ++j)
2123 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2124 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
2127 for (i = 0; i < 2; i++)
2129 /* g_pre_comp[i][0] is the point at infinity */
2130 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2131 /* the remaining multiples */
2132 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2134 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], pre->g_pre_comp[i][6][2],
2135 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2136 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2137 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2139 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], pre->g_pre_comp[i][10][2],
2140 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2141 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2142 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2144 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2145 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2146 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2]);
2147 /* 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G */
2149 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], pre->g_pre_comp[i][14][2],
2150 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2151 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2152 for (j = 1; j < 8; ++j)
2154 /* odd multiples: add G resp. 2^32*G */
2156 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1], pre->g_pre_comp[i][2*j+1][2],
2157 pre->g_pre_comp[i][2*j][0], pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
2158 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], pre->g_pre_comp[i][1][2]);
2161 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2163 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup,
2164 nistp256_pre_comp_free, nistp256_pre_comp_clear_free))
2170 if (generator != NULL)
2171 EC_POINT_free(generator);
2172 if (new_ctx != NULL)
2173 BN_CTX_free(new_ctx);
2175 nistp256_pre_comp_free(pre);
2179 int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2181 if (EC_EX_DATA_get_data(group->extra_data, nistp256_pre_comp_dup,
2182 nistp256_pre_comp_free, nistp256_pre_comp_clear_free)
2189 static void *dummy=&dummy;