1 /* crypto/ec/ecp_nistp256.c */
3 * Written by Adam Langley (Google) for the OpenSSL project
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
29 #include <openssl/opensslconf.h>
30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
34 #include <openssl/err.h>
37 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
38 /* even with gcc, the typedef won't work for 32-bit platforms */
39 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
40 typedef __int128_t int128_t;
42 #error "Need GCC 3.1 or later to define type uint128_t"
50 /* The underlying field.
52 * P256 operates over GF(2^256-2^224+2^192+2^96-1). We can serialise an element
53 * of this field into 32 bytes. We call this an felem_bytearray. */
55 typedef u8 felem_bytearray[32];
57 /* These are the parameters of P256, taken from FIPS 186-3, page 86. These
58 * values are big-endian. */
59 static const felem_bytearray nistp256_curve_params[5] = {
60 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
61 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
62 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
63 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
64 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
65 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
66 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
68 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
69 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
70 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
71 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
72 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
73 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
74 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
75 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
76 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
77 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
78 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
79 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
82 /* The representation of field elements.
83 * ------------------------------------
85 * We represent field elements with either four 128-bit values, eight 128-bit
86 * values, or four 64-bit values. The field element represented is:
87 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
89 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
91 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
92 * apart, but are 128-bits wide, the most significant bits of each limb overlap
93 * with the least significant bits of the next.
95 * A field element with four limbs is an 'felem'. One with eight limbs is a
98 * A field element with four, 64-bit values is called a 'smallfelem'. Small
99 * values are used as intermediate values before multiplication.
104 typedef uint128_t limb;
105 typedef limb felem[NLIMBS];
106 typedef limb longfelem[NLIMBS * 2];
107 typedef u64 smallfelem[NLIMBS];
109 /* This is the value of the prime as four 64-bit words, little-endian. */
110 static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
111 static const u64 bottom63bits = 0x7ffffffffffffffful;
113 /* bin32_to_felem takes a little-endian byte array and converts it into felem
114 * form. This assumes that the CPU is little-endian. */
115 static void bin32_to_felem(felem out, const u8 in[32])
117 out[0] = *((u64*) &in[0]);
118 out[1] = *((u64*) &in[8]);
119 out[2] = *((u64*) &in[16]);
120 out[3] = *((u64*) &in[24]);
123 /* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian,
124 * 32 byte array. This assumes that the CPU is little-endian. */
125 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
127 *((u64*) &out[0]) = in[0];
128 *((u64*) &out[8]) = in[1];
129 *((u64*) &out[16]) = in[2];
130 *((u64*) &out[24]) = in[3];
133 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
134 static void flip_endian(u8 *out, const u8 *in, unsigned len)
137 for (i = 0; i < len; ++i)
138 out[i] = in[len-1-i];
141 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
142 static int BN_to_felem(felem out, const BIGNUM *bn)
144 felem_bytearray b_in;
145 felem_bytearray b_out;
148 /* BN_bn2bin eats leading zeroes */
149 memset(b_out, 0, sizeof b_out);
150 num_bytes = BN_num_bytes(bn);
151 if (num_bytes > sizeof b_out)
153 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
156 if (BN_is_negative(bn))
158 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
161 num_bytes = BN_bn2bin(bn, b_in);
162 flip_endian(b_out, b_in, num_bytes);
163 bin32_to_felem(out, b_out);
167 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
168 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
170 felem_bytearray b_in, b_out;
171 smallfelem_to_bin32(b_in, in);
172 flip_endian(b_out, b_in, sizeof b_out);
173 return BN_bin2bn(b_out, sizeof b_out, out);
178 * ---------------- */
180 static void smallfelem_one(smallfelem out)
188 static void smallfelem_assign(smallfelem out, const smallfelem in)
196 static void felem_assign(felem out, const felem in)
204 /* felem_sum sets out = out + in. */
205 static void felem_sum(felem out, const felem in)
213 /* felem_small_sum sets out = out + in. */
214 static void felem_small_sum(felem out, const smallfelem in)
222 /* felem_scalar sets out = out * scalar */
223 static void felem_scalar(felem out, const u64 scalar)
231 /* longfelem_scalar sets out = out * scalar */
232 static void longfelem_scalar(longfelem out, const u64 scalar)
244 #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
245 #define two105 (((limb)1) << 105)
246 #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
248 /* zero105 is 0 mod p */
249 static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
251 /* smallfelem_neg sets |out| to |-small|
253 * out[i] < out[i] + 2^105
255 static void smallfelem_neg(felem out, const smallfelem small)
257 /* In order to prevent underflow, we subtract from 0 mod p. */
258 out[0] = zero105[0] - small[0];
259 out[1] = zero105[1] - small[1];
260 out[2] = zero105[2] - small[2];
261 out[3] = zero105[3] - small[3];
264 /* felem_diff subtracts |in| from |out|
268 * out[i] < out[i] + 2^105
270 static void felem_diff(felem out, const felem in)
272 /* In order to prevent underflow, we add 0 mod p before subtracting. */
273 out[0] += zero105[0];
274 out[1] += zero105[1];
275 out[2] += zero105[2];
276 out[3] += zero105[3];
284 #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
285 #define two107 (((limb)1) << 107)
286 #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
288 /* zero107 is 0 mod p */
289 static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 };
291 /* An alternative felem_diff for larger inputs |in|
292 * felem_diff_zero107 subtracts |in| from |out|
296 * out[i] < out[i] + 2^107
298 static void felem_diff_zero107(felem out, const felem in)
300 /* In order to prevent underflow, we add 0 mod p before subtracting. */
301 out[0] += zero107[0];
302 out[1] += zero107[1];
303 out[2] += zero107[2];
304 out[3] += zero107[3];
312 /* longfelem_diff subtracts |in| from |out|
316 * out[i] < out[i] + 2^70 + 2^40
318 static void longfelem_diff(longfelem out, const longfelem in)
320 static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6);
321 static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40);
322 static const limb two70 = (((limb)1) << 70);
323 static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6);
324 static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6);
326 /* add 0 mod p to avoid underflow */
330 out[3] += two70m40m38p6;
336 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
347 #define two64m0 (((limb)1) << 64) - 1
348 #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
349 #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
350 #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
352 /* zero110 is 0 mod p */
353 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
355 /* felem_shrink converts an felem into a smallfelem. The result isn't quite
356 * minimal as the value may be greater than p.
363 static void felem_shrink(smallfelem out, const felem in)
368 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
371 tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64));
374 tmp[2] = zero110[2] + (u64) in[2];
375 tmp[0] = zero110[0] + in[0];
376 tmp[1] = zero110[1] + in[1];
377 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
379 /* We perform two partial reductions where we eliminate the
380 * high-word of tmp[3]. We don't update the other words till the end.
382 a = tmp[3] >> 64; /* a < 2^46 */
383 tmp[3] = (u64) tmp[3];
385 tmp[3] += ((limb)a) << 32;
389 a = tmp[3] >> 64; /* a < 2^15 */
390 b += a; /* b < 2^46 + 2^15 < 2^47 */
391 tmp[3] = (u64) tmp[3];
393 tmp[3] += ((limb)a) << 32;
394 /* tmp[3] < 2^64 + 2^47 */
396 /* This adjusts the other two words to complete the two partial
399 tmp[1] -= (((limb)b) << 32);
401 /* In order to make space in tmp[3] for the carry from 2 -> 3, we
402 * conditionally subtract kPrime if tmp[3] is large enough. */
404 /* As tmp[3] < 2^65, high is either 1 or 0 */
408 * all ones if the high word of tmp[3] is 1
409 * all zeros if the high word of tmp[3] if 0 */
413 * all ones if the MSB of low is 1
414 * all zeros if the MSB of low if 0 */
417 /* if low was greater than kPrime3Test then the MSB is zero */
421 * all ones if low was > kPrime3Test
422 * all zeros if low was <= kPrime3Test */
423 mask = (mask & low) | high;
424 tmp[0] -= mask & kPrime[0];
425 tmp[1] -= mask & kPrime[1];
426 /* kPrime[2] is zero, so omitted */
427 tmp[3] -= mask & kPrime[3];
428 /* tmp[3] < 2**64 - 2**32 + 1 */
430 tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0];
431 tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1];
432 tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2];
441 /* smallfelem_expand converts a smallfelem to an felem */
442 static void smallfelem_expand(felem out, const smallfelem in)
450 /* smallfelem_square sets |out| = |small|^2
454 * out[i] < 7 * 2^64 < 2^67
456 static void smallfelem_square(longfelem out, const smallfelem small)
461 a = ((uint128_t) small[0]) * small[0];
467 a = ((uint128_t) small[0]) * small[1];
474 a = ((uint128_t) small[0]) * small[2];
481 a = ((uint128_t) small[0]) * small[3];
487 a = ((uint128_t) small[1]) * small[2];
494 a = ((uint128_t) small[1]) * small[1];
500 a = ((uint128_t) small[1]) * small[3];
507 a = ((uint128_t) small[2]) * small[3];
515 a = ((uint128_t) small[2]) * small[2];
521 a = ((uint128_t) small[3]) * small[3];
528 /* felem_square sets |out| = |in|^2
532 * out[i] < 7 * 2^64 < 2^67
534 static void felem_square(longfelem out, const felem in)
537 felem_shrink(small, in);
538 smallfelem_square(out, small);
541 /* smallfelem_mul sets |out| = |small1| * |small2|
546 * out[i] < 7 * 2^64 < 2^67
548 static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2)
553 a = ((uint128_t) small1[0]) * small2[0];
560 a = ((uint128_t) small1[0]) * small2[1];
566 a = ((uint128_t) small1[1]) * small2[0];
573 a = ((uint128_t) small1[0]) * small2[2];
579 a = ((uint128_t) small1[1]) * small2[1];
585 a = ((uint128_t) small1[2]) * small2[0];
592 a = ((uint128_t) small1[0]) * small2[3];
598 a = ((uint128_t) small1[1]) * small2[2];
604 a = ((uint128_t) small1[2]) * small2[1];
610 a = ((uint128_t) small1[3]) * small2[0];
617 a = ((uint128_t) small1[1]) * small2[3];
623 a = ((uint128_t) small1[2]) * small2[2];
629 a = ((uint128_t) small1[3]) * small2[1];
636 a = ((uint128_t) small1[2]) * small2[3];
642 a = ((uint128_t) small1[3]) * small2[2];
649 a = ((uint128_t) small1[3]) * small2[3];
656 /* felem_mul sets |out| = |in1| * |in2|
661 * out[i] < 7 * 2^64 < 2^67
663 static void felem_mul(longfelem out, const felem in1, const felem in2)
665 smallfelem small1, small2;
666 felem_shrink(small1, in1);
667 felem_shrink(small2, in2);
668 smallfelem_mul(out, small1, small2);
671 /* felem_small_mul sets |out| = |small1| * |in2|
676 * out[i] < 7 * 2^64 < 2^67
678 static void felem_small_mul(longfelem out, const smallfelem small1, const felem in2)
681 felem_shrink(small2, in2);
682 smallfelem_mul(out, small1, small2);
685 #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
686 #define two100 (((limb)1) << 100)
687 #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
688 /* zero100 is 0 mod p */
689 static const felem zero100 = { two100m36m4, two100, two100m36p4, two100m36p4 };
691 /* Internal function for the different flavours of felem_reduce.
692 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
694 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
695 * out[1] >= in[7] + 2^32*in[4]
696 * out[2] >= in[5] + 2^32*in[5]
697 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
699 * out[0] <= out[0] + in[4] + 2^32*in[5]
700 * out[1] <= out[1] + in[5] + 2^33*in[6]
701 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
702 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
704 static void felem_reduce_(felem out, const longfelem in)
707 /* combine common terms from below */
708 c = in[4] + (in[5] << 32);
716 /* the remaining terms */
717 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
718 out[1] -= (in[4] << 32);
719 out[3] += (in[4] << 32);
721 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
722 out[2] -= (in[5] << 32);
724 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
726 out[0] -= (in[6] << 32);
727 out[1] += (in[6] << 33);
728 out[2] += (in[6] * 2);
729 out[3] -= (in[6] << 32);
731 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
733 out[0] -= (in[7] << 32);
734 out[2] += (in[7] << 33);
735 out[3] += (in[7] * 3);
738 /* felem_reduce converts a longfelem into an felem.
739 * To be called directly after felem_square or felem_mul.
741 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
742 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
746 static void felem_reduce(felem out, const longfelem in)
748 out[0] = zero100[0] + in[0];
749 out[1] = zero100[1] + in[1];
750 out[2] = zero100[2] + in[2];
751 out[3] = zero100[3] + in[3];
753 felem_reduce_(out, in);
755 /* out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
756 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
757 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
758 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
760 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
761 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
762 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
763 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
767 /* felem_reduce_zero105 converts a larger longfelem into an felem.
773 static void felem_reduce_zero105(felem out, const longfelem in)
775 out[0] = zero105[0] + in[0];
776 out[1] = zero105[1] + in[1];
777 out[2] = zero105[2] + in[2];
778 out[3] = zero105[3] + in[3];
780 felem_reduce_(out, in);
782 /* out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
783 * out[1] > 2^105 - 2^71 - 2^103 > 0
784 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
785 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
787 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
788 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
789 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
790 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
794 /* subtract_u64 sets *result = *result - v and *carry to one if the subtraction
796 static void subtract_u64(u64* result, u64* carry, u64 v)
798 uint128_t r = *result;
800 *carry = (r >> 64) & 1;
804 /* felem_contract converts |in| to its unique, minimal representation.
808 static void felem_contract(smallfelem out, const felem in)
811 u64 all_equal_so_far = 0, result = 0, carry;
813 felem_shrink(out, in);
814 /* small is minimal except that the value might be > p */
817 /* We are doing a constant time test if out >= kPrime. We need to
818 * compare each u64, from most-significant to least significant. For
819 * each one, if all words so far have been equal (m is all ones) then a
820 * non-equal result is the answer. Otherwise we continue. */
821 for (i = 3; i < 4; i--)
824 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
825 /* if out[i] > kPrime[i] then a will underflow and the high
826 * 64-bits will all be set. */
827 result |= all_equal_so_far & ((u64) (a >> 64));
829 /* if kPrime[i] == out[i] then |equal| will be all zeros and
830 * the decrement will make it all ones. */
831 equal = kPrime[i] ^ out[i];
833 equal &= equal << 32;
834 equal &= equal << 16;
839 equal = ((s64) equal) >> 63;
841 all_equal_so_far &= equal;
844 /* if all_equal_so_far is still all ones then the two values are equal
845 * and so out >= kPrime is true. */
846 result |= all_equal_so_far;
848 /* if out >= kPrime then we subtract kPrime. */
849 subtract_u64(&out[0], &carry, result & kPrime[0]);
850 subtract_u64(&out[1], &carry, carry);
851 subtract_u64(&out[2], &carry, carry);
852 subtract_u64(&out[3], &carry, carry);
854 subtract_u64(&out[1], &carry, result & kPrime[1]);
855 subtract_u64(&out[2], &carry, carry);
856 subtract_u64(&out[3], &carry, carry);
858 subtract_u64(&out[2], &carry, result & kPrime[2]);
859 subtract_u64(&out[3], &carry, carry);
861 subtract_u64(&out[3], &carry, result & kPrime[3]);
864 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
869 smallfelem_square(longtmp, in);
870 felem_reduce(tmp, longtmp);
871 felem_contract(out, tmp);
874 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1, const smallfelem in2)
879 smallfelem_mul(longtmp, in1, in2);
880 felem_reduce(tmp, longtmp);
881 felem_contract(out, tmp);
884 /* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
889 static limb smallfelem_is_zero(const smallfelem small)
894 u64 is_zero = small[0] | small[1] | small[2] | small[3];
896 is_zero &= is_zero << 32;
897 is_zero &= is_zero << 16;
898 is_zero &= is_zero << 8;
899 is_zero &= is_zero << 4;
900 is_zero &= is_zero << 2;
901 is_zero &= is_zero << 1;
902 is_zero = ((s64) is_zero) >> 63;
904 is_p = (small[0] ^ kPrime[0]) |
905 (small[1] ^ kPrime[1]) |
906 (small[2] ^ kPrime[2]) |
907 (small[3] ^ kPrime[3]);
915 is_p = ((s64) is_p) >> 63;
920 result |= ((limb) is_zero) << 64;
924 static int smallfelem_is_zero_int(const smallfelem small)
926 return (int) (smallfelem_is_zero(small) & ((limb)1));
929 /* felem_inv calculates |out| = |in|^{-1}
931 * Based on Fermat's Little Theorem:
933 * a^{p-1} = 1 (mod p)
934 * a^{p-2} = a^{-1} (mod p)
936 static void felem_inv(felem out, const felem in)
939 /* each e_I will hold |in|^{2^I - 1} */
940 felem e2, e4, e8, e16, e32, e64;
944 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
945 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
946 felem_assign(e2, ftmp);
947 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
948 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
949 felem_mul(tmp, ftmp, e2); felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
950 felem_assign(e4, ftmp);
951 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
952 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
953 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
954 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
955 felem_mul(tmp, ftmp, e4); felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
956 felem_assign(e8, ftmp);
957 for (i = 0; i < 8; i++) {
958 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
960 felem_mul(tmp, ftmp, e8); felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
961 felem_assign(e16, ftmp);
962 for (i = 0; i < 16; i++) {
963 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
965 felem_mul(tmp, ftmp, e16); felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
966 felem_assign(e32, ftmp);
967 for (i = 0; i < 32; i++) {
968 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
970 felem_assign(e64, ftmp);
971 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
972 for (i = 0; i < 192; i++) {
973 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
974 } /* 2^256 - 2^224 + 2^192 */
976 felem_mul(tmp, e64, e32); felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
977 for (i = 0; i < 16; i++) {
978 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
980 felem_mul(tmp, ftmp2, e16); felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
981 for (i = 0; i < 8; i++) {
982 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
984 felem_mul(tmp, ftmp2, e8); felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
985 for (i = 0; i < 4; i++) {
986 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
988 felem_mul(tmp, ftmp2, e4); felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
989 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
990 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
991 felem_mul(tmp, ftmp2, e2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
992 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
993 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
994 felem_mul(tmp, ftmp2, in); felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
996 felem_mul(tmp, ftmp2, ftmp); felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
999 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1003 smallfelem_expand(tmp, in);
1004 felem_inv(tmp, tmp);
1005 felem_contract(out, tmp);
1011 * Building on top of the field operations we have the operations on the
1012 * elliptic curve group itself. Points on the curve are represented in Jacobian
1015 /* point_double calculates 2*(x_in, y_in, z_in)
1017 * The method is taken from:
1018 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1020 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1021 * while x_out == y_in is not (maybe this works, but it's not tested). */
1023 point_double(felem x_out, felem y_out, felem z_out,
1024 const felem x_in, const felem y_in, const felem z_in)
1026 longfelem tmp, tmp2;
1027 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1028 smallfelem small1, small2;
1030 felem_assign(ftmp, x_in);
1031 /* ftmp[i] < 2^106 */
1032 felem_assign(ftmp2, x_in);
1033 /* ftmp2[i] < 2^106 */
1036 felem_square(tmp, z_in);
1037 felem_reduce(delta, tmp);
1038 /* delta[i] < 2^101 */
1041 felem_square(tmp, y_in);
1042 felem_reduce(gamma, tmp);
1043 /* gamma[i] < 2^101 */
1044 felem_shrink(small1, gamma);
1046 /* beta = x*gamma */
1047 felem_small_mul(tmp, small1, x_in);
1048 felem_reduce(beta, tmp);
1049 /* beta[i] < 2^101 */
1051 /* alpha = 3*(x-delta)*(x+delta) */
1052 felem_diff(ftmp, delta);
1053 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1054 felem_sum(ftmp2, delta);
1055 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1056 felem_scalar(ftmp2, 3);
1057 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1058 felem_mul(tmp, ftmp, ftmp2);
1059 felem_reduce(alpha, tmp);
1060 /* alpha[i] < 2^101 */
1061 felem_shrink(small2, alpha);
1063 /* x' = alpha^2 - 8*beta */
1064 smallfelem_square(tmp, small2);
1065 felem_reduce(x_out, tmp);
1066 felem_assign(ftmp, beta);
1067 felem_scalar(ftmp, 8);
1068 /* ftmp[i] < 8 * 2^101 = 2^104 */
1069 felem_diff(x_out, ftmp);
1070 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1072 /* z' = (y + z)^2 - gamma - delta */
1073 felem_sum(delta, gamma);
1074 /* delta[i] < 2^101 + 2^101 = 2^102 */
1075 felem_assign(ftmp, y_in);
1076 felem_sum(ftmp, z_in);
1077 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1078 felem_square(tmp, ftmp);
1079 felem_reduce(z_out, tmp);
1080 felem_diff(z_out, delta);
1081 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1083 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1084 felem_scalar(beta, 4);
1085 /* beta[i] < 4 * 2^101 = 2^103 */
1086 felem_diff_zero107(beta, x_out);
1087 /* beta[i] < 2^107 + 2^103 < 2^108 */
1088 felem_small_mul(tmp, small2, beta);
1089 /* tmp[i] < 7 * 2^64 < 2^67 */
1090 smallfelem_square(tmp2, small1);
1091 /* tmp2[i] < 7 * 2^64 */
1092 longfelem_scalar(tmp2, 8);
1093 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1094 longfelem_diff(tmp, tmp2);
1095 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1096 felem_reduce_zero105(y_out, tmp);
1097 /* y_out[i] < 2^106 */
1100 /* point_double_small is the same as point_double, except that it operates on
1103 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1104 const smallfelem x_in, const smallfelem y_in, const smallfelem z_in)
1106 felem felem_x_out, felem_y_out, felem_z_out;
1107 felem felem_x_in, felem_y_in, felem_z_in;
1109 smallfelem_expand(felem_x_in, x_in);
1110 smallfelem_expand(felem_y_in, y_in);
1111 smallfelem_expand(felem_z_in, z_in);
1112 point_double(felem_x_out, felem_y_out, felem_z_out,
1113 felem_x_in, felem_y_in, felem_z_in);
1114 felem_shrink(x_out, felem_x_out);
1115 felem_shrink(y_out, felem_y_out);
1116 felem_shrink(z_out, felem_z_out);
1119 /* copy_conditional copies in to out iff mask is all ones. */
1121 copy_conditional(felem out, const felem in, limb mask)
1124 for (i = 0; i < NLIMBS; ++i)
1126 const limb tmp = mask & (in[i] ^ out[i]);
1131 /* copy_small_conditional copies in to out iff mask is all ones. */
1133 copy_small_conditional(felem out, const smallfelem in, limb mask)
1136 const u64 mask64 = mask;
1137 for (i = 0; i < NLIMBS; ++i)
1139 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1143 /* point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1145 * The method is taken from:
1146 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1147 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1149 * This function includes a branch for checking whether the two input points
1150 * are equal, (while not equal to the point at infinity). This case never
1151 * happens during single point multiplication, so there is no timing leak for
1152 * ECDH or ECDSA signing. */
1153 static void point_add(felem x3, felem y3, felem z3,
1154 const felem x1, const felem y1, const felem z1,
1155 const int mixed, const smallfelem x2, const smallfelem y2, const smallfelem z2)
1157 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1158 longfelem tmp, tmp2;
1159 smallfelem small1, small2, small3, small4, small5;
1160 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1162 felem_shrink(small3, z1);
1164 z1_is_zero = smallfelem_is_zero(small3);
1165 z2_is_zero = smallfelem_is_zero(z2);
1167 /* ftmp = z1z1 = z1**2 */
1168 smallfelem_square(tmp, small3);
1169 felem_reduce(ftmp, tmp);
1170 /* ftmp[i] < 2^101 */
1171 felem_shrink(small1, ftmp);
1175 /* ftmp2 = z2z2 = z2**2 */
1176 smallfelem_square(tmp, z2);
1177 felem_reduce(ftmp2, tmp);
1178 /* ftmp2[i] < 2^101 */
1179 felem_shrink(small2, ftmp2);
1181 felem_shrink(small5, x1);
1183 /* u1 = ftmp3 = x1*z2z2 */
1184 smallfelem_mul(tmp, small5, small2);
1185 felem_reduce(ftmp3, tmp);
1186 /* ftmp3[i] < 2^101 */
1188 /* ftmp5 = z1 + z2 */
1189 felem_assign(ftmp5, z1);
1190 felem_small_sum(ftmp5, z2);
1191 /* ftmp5[i] < 2^107 */
1193 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1194 felem_square(tmp, ftmp5);
1195 felem_reduce(ftmp5, tmp);
1196 /* ftmp2 = z2z2 + z1z1 */
1197 felem_sum(ftmp2, ftmp);
1198 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1199 felem_diff(ftmp5, ftmp2);
1200 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1202 /* ftmp2 = z2 * z2z2 */
1203 smallfelem_mul(tmp, small2, z2);
1204 felem_reduce(ftmp2, tmp);
1206 /* s1 = ftmp2 = y1 * z2**3 */
1207 felem_mul(tmp, y1, ftmp2);
1208 felem_reduce(ftmp6, tmp);
1209 /* ftmp6[i] < 2^101 */
1213 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1215 /* u1 = ftmp3 = x1*z2z2 */
1216 felem_assign(ftmp3, x1);
1217 /* ftmp3[i] < 2^106 */
1220 felem_assign(ftmp5, z1);
1221 felem_scalar(ftmp5, 2);
1222 /* ftmp5[i] < 2*2^106 = 2^107 */
1224 /* s1 = ftmp2 = y1 * z2**3 */
1225 felem_assign(ftmp6, y1);
1226 /* ftmp6[i] < 2^106 */
1230 smallfelem_mul(tmp, x2, small1);
1231 felem_reduce(ftmp4, tmp);
1233 /* h = ftmp4 = u2 - u1 */
1234 felem_diff_zero107(ftmp4, ftmp3);
1235 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1236 felem_shrink(small4, ftmp4);
1238 x_equal = smallfelem_is_zero(small4);
1240 /* z_out = ftmp5 * h */
1241 felem_small_mul(tmp, small4, ftmp5);
1242 felem_reduce(z_out, tmp);
1243 /* z_out[i] < 2^101 */
1245 /* ftmp = z1 * z1z1 */
1246 smallfelem_mul(tmp, small1, small3);
1247 felem_reduce(ftmp, tmp);
1249 /* s2 = tmp = y2 * z1**3 */
1250 felem_small_mul(tmp, y2, ftmp);
1251 felem_reduce(ftmp5, tmp);
1253 /* r = ftmp5 = (s2 - s1)*2 */
1254 felem_diff_zero107(ftmp5, ftmp6);
1255 /* ftmp5[i] < 2^107 + 2^107 = 2^108*/
1256 felem_scalar(ftmp5, 2);
1257 /* ftmp5[i] < 2^109 */
1258 felem_shrink(small1, ftmp5);
1259 y_equal = smallfelem_is_zero(small1);
1261 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1263 point_double(x3, y3, z3, x1, y1, z1);
1267 /* I = ftmp = (2h)**2 */
1268 felem_assign(ftmp, ftmp4);
1269 felem_scalar(ftmp, 2);
1270 /* ftmp[i] < 2*2^108 = 2^109 */
1271 felem_square(tmp, ftmp);
1272 felem_reduce(ftmp, tmp);
1274 /* J = ftmp2 = h * I */
1275 felem_mul(tmp, ftmp4, ftmp);
1276 felem_reduce(ftmp2, tmp);
1278 /* V = ftmp4 = U1 * I */
1279 felem_mul(tmp, ftmp3, ftmp);
1280 felem_reduce(ftmp4, tmp);
1282 /* x_out = r**2 - J - 2V */
1283 smallfelem_square(tmp, small1);
1284 felem_reduce(x_out, tmp);
1285 felem_assign(ftmp3, ftmp4);
1286 felem_scalar(ftmp4, 2);
1287 felem_sum(ftmp4, ftmp2);
1288 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1289 felem_diff(x_out, ftmp4);
1290 /* x_out[i] < 2^105 + 2^101 */
1292 /* y_out = r(V-x_out) - 2 * s1 * J */
1293 felem_diff_zero107(ftmp3, x_out);
1294 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1295 felem_small_mul(tmp, small1, ftmp3);
1296 felem_mul(tmp2, ftmp6, ftmp2);
1297 longfelem_scalar(tmp2, 2);
1298 /* tmp2[i] < 2*2^67 = 2^68 */
1299 longfelem_diff(tmp, tmp2);
1300 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1301 felem_reduce_zero105(y_out, tmp);
1302 /* y_out[i] < 2^106 */
1304 copy_small_conditional(x_out, x2, z1_is_zero);
1305 copy_conditional(x_out, x1, z2_is_zero);
1306 copy_small_conditional(y_out, y2, z1_is_zero);
1307 copy_conditional(y_out, y1, z2_is_zero);
1308 copy_small_conditional(z_out, z2, z1_is_zero);
1309 copy_conditional(z_out, z1, z2_is_zero);
1310 felem_assign(x3, x_out);
1311 felem_assign(y3, y_out);
1312 felem_assign(z3, z_out);
1315 /* point_add_small is the same as point_add, except that it operates on
1317 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1318 smallfelem x1, smallfelem y1, smallfelem z1,
1319 smallfelem x2, smallfelem y2, smallfelem z2)
1321 felem felem_x3, felem_y3, felem_z3;
1322 felem felem_x1, felem_y1, felem_z1;
1323 smallfelem_expand(felem_x1, x1);
1324 smallfelem_expand(felem_y1, y1);
1325 smallfelem_expand(felem_z1, z1);
1326 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0, x2, y2, z2);
1327 felem_shrink(x3, felem_x3);
1328 felem_shrink(y3, felem_y3);
1329 felem_shrink(z3, felem_z3);
1332 /* Base point pre computation
1333 * --------------------------
1335 * Two different sorts of precomputed tables are used in the following code.
1336 * Each contain various points on the curve, where each point is three field
1337 * elements (x, y, z).
1339 * For the base point table, z is usually 1 (0 for the point at infinity).
1340 * This table has 2 * 16 elements, starting with the following:
1341 * index | bits | point
1342 * ------+---------+------------------------------
1345 * 2 | 0 0 1 0 | 2^64G
1346 * 3 | 0 0 1 1 | (2^64 + 1)G
1347 * 4 | 0 1 0 0 | 2^128G
1348 * 5 | 0 1 0 1 | (2^128 + 1)G
1349 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1350 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1351 * 8 | 1 0 0 0 | 2^192G
1352 * 9 | 1 0 0 1 | (2^192 + 1)G
1353 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1354 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1355 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1356 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1357 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1358 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1359 * followed by a copy of this with each element multiplied by 2^32.
1361 * The reason for this is so that we can clock bits into four different
1362 * locations when doing simple scalar multiplies against the base point,
1363 * and then another four locations using the second 16 elements.
1365 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1367 /* gmul is the table of precomputed base points */
1368 static const smallfelem gmul[2][16][3] =
1372 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247},
1373 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b},
1375 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5},
1376 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d},
1378 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f},
1379 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644},
1381 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67},
1382 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee},
1384 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff},
1385 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b},
1387 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8},
1388 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851},
1390 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea},
1391 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b},
1393 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276},
1394 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816},
1396 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad},
1397 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663},
1399 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d},
1400 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321},
1402 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287},
1403 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6},
1405 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466},
1406 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20},
1408 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9},
1409 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61},
1411 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a},
1412 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc},
1414 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c},
1415 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab},
1420 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89},
1421 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624},
1423 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6},
1424 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1},
1426 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a},
1427 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593},
1429 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617},
1430 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7},
1432 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276},
1433 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a},
1435 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908},
1436 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e},
1438 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7},
1439 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec},
1441 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee},
1442 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6},
1444 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109},
1445 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5},
1447 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba},
1448 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44},
1450 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b},
1451 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc},
1453 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107},
1454 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387},
1456 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503},
1457 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be},
1459 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9},
1460 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a},
1462 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6},
1463 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81},
1466 /* select_point selects the |idx|th point from a precomputation table and
1467 * copies it to out. */
1468 static void select_point(const u64 idx, unsigned int size, const smallfelem pre_comp[16][3], smallfelem out[3])
1471 u64 *outlimbs = &out[0][0];
1472 memset(outlimbs, 0, 3 * sizeof(smallfelem));
1474 for (i = 0; i < size; i++)
1476 const u64 *inlimbs = (u64*) &pre_comp[i][0][0];
1483 for (j = 0; j < NLIMBS * 3; j++)
1484 outlimbs[j] |= inlimbs[j] & mask;
1488 /* get_bit returns the |i|th bit in |in| */
1489 static char get_bit(const felem_bytearray in, int i)
1491 if ((i < 0) || (i >= 256))
1493 return (in[i >> 3] >> (i & 7)) & 1;
1496 /* Interleaved point multiplication using precomputed point multiples:
1497 * The small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[],
1498 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1499 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1500 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1501 static void batch_mul(felem x_out, felem y_out, felem z_out,
1502 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1503 const int mixed, const smallfelem pre_comp[][17][3], const smallfelem g_pre_comp[2][16][3])
1506 unsigned num, gen_mul = (g_scalar != NULL);
1512 /* set nq to the point at infinity */
1513 memset(nq, 0, 3 * sizeof(felem));
1515 /* Loop over all scalars msb-to-lsb, interleaving additions
1516 * of multiples of the generator (two in each of the last 32 rounds)
1517 * and additions of other points multiples (every 5th round).
1519 skip = 1; /* save two point operations in the first round */
1520 for (i = (num_points ? 255 : 31); i >= 0; --i)
1524 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1526 /* add multiples of the generator */
1527 if (gen_mul && (i <= 31))
1529 /* first, look 32 bits upwards */
1530 bits = get_bit(g_scalar, i + 224) << 3;
1531 bits |= get_bit(g_scalar, i + 160) << 2;
1532 bits |= get_bit(g_scalar, i + 96) << 1;
1533 bits |= get_bit(g_scalar, i + 32);
1534 /* select the point to add, in constant time */
1535 select_point(bits, 16, g_pre_comp[1], tmp);
1539 point_add(nq[0], nq[1], nq[2],
1540 nq[0], nq[1], nq[2],
1541 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1545 smallfelem_expand(nq[0], tmp[0]);
1546 smallfelem_expand(nq[1], tmp[1]);
1547 smallfelem_expand(nq[2], tmp[2]);
1551 /* second, look at the current position */
1552 bits = get_bit(g_scalar, i + 192) << 3;
1553 bits |= get_bit(g_scalar, i + 128) << 2;
1554 bits |= get_bit(g_scalar, i + 64) << 1;
1555 bits |= get_bit(g_scalar, i);
1556 /* select the point to add, in constant time */
1557 select_point(bits, 16, g_pre_comp[0], tmp);
1558 point_add(nq[0], nq[1], nq[2],
1559 nq[0], nq[1], nq[2],
1560 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1563 /* do other additions every 5 doublings */
1564 if (num_points && (i % 5 == 0))
1566 /* loop over all scalars */
1567 for (num = 0; num < num_points; ++num)
1569 bits = get_bit(scalars[num], i + 4) << 5;
1570 bits |= get_bit(scalars[num], i + 3) << 4;
1571 bits |= get_bit(scalars[num], i + 2) << 3;
1572 bits |= get_bit(scalars[num], i + 1) << 2;
1573 bits |= get_bit(scalars[num], i) << 1;
1574 bits |= get_bit(scalars[num], i - 1);
1575 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1577 /* select the point to add or subtract, in constant time */
1578 select_point(digit, 17, pre_comp[num], tmp);
1579 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative point */
1580 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1581 felem_contract(tmp[1], ftmp);
1585 point_add(nq[0], nq[1], nq[2],
1586 nq[0], nq[1], nq[2],
1587 mixed, tmp[0], tmp[1], tmp[2]);
1591 smallfelem_expand(nq[0], tmp[0]);
1592 smallfelem_expand(nq[1], tmp[1]);
1593 smallfelem_expand(nq[2], tmp[2]);
1599 felem_assign(x_out, nq[0]);
1600 felem_assign(y_out, nq[1]);
1601 felem_assign(z_out, nq[2]);
1604 /* Precomputation for the group generator. */
1606 smallfelem g_pre_comp[2][16][3];
1608 } NISTP256_PRE_COMP;
1610 const EC_METHOD *EC_GFp_nistp256_method(void)
1612 static const EC_METHOD ret = {
1613 EC_FLAGS_DEFAULT_OCT,
1614 NID_X9_62_prime_field,
1615 ec_GFp_nistp256_group_init,
1616 ec_GFp_simple_group_finish,
1617 ec_GFp_simple_group_clear_finish,
1618 ec_GFp_nist_group_copy,
1619 ec_GFp_nistp256_group_set_curve,
1620 ec_GFp_simple_group_get_curve,
1621 ec_GFp_simple_group_get_degree,
1622 ec_GFp_simple_group_check_discriminant,
1623 ec_GFp_simple_point_init,
1624 ec_GFp_simple_point_finish,
1625 ec_GFp_simple_point_clear_finish,
1626 ec_GFp_simple_point_copy,
1627 ec_GFp_simple_point_set_to_infinity,
1628 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1629 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1630 ec_GFp_simple_point_set_affine_coordinates,
1631 ec_GFp_nistp256_point_get_affine_coordinates,
1632 0 /* point_set_compressed_coordinates */,
1637 ec_GFp_simple_invert,
1638 ec_GFp_simple_is_at_infinity,
1639 ec_GFp_simple_is_on_curve,
1641 ec_GFp_simple_make_affine,
1642 ec_GFp_simple_points_make_affine,
1643 ec_GFp_nistp256_points_mul,
1644 ec_GFp_nistp256_precompute_mult,
1645 ec_GFp_nistp256_have_precompute_mult,
1646 ec_GFp_nist_field_mul,
1647 ec_GFp_nist_field_sqr,
1649 0 /* field_encode */,
1650 0 /* field_decode */,
1651 0 /* field_set_to_one */ };
1656 /******************************************************************************/
1657 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1660 static NISTP256_PRE_COMP *nistp256_pre_comp_new()
1662 NISTP256_PRE_COMP *ret = NULL;
1663 ret = (NISTP256_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1666 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1669 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1670 ret->references = 1;
1674 static void *nistp256_pre_comp_dup(void *src_)
1676 NISTP256_PRE_COMP *src = src_;
1678 /* no need to actually copy, these objects never change! */
1679 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1684 static void nistp256_pre_comp_free(void *pre_)
1687 NISTP256_PRE_COMP *pre = pre_;
1692 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1699 static void nistp256_pre_comp_clear_free(void *pre_)
1702 NISTP256_PRE_COMP *pre = pre_;
1707 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1711 OPENSSL_cleanse(pre, sizeof *pre);
1715 /******************************************************************************/
1716 /* OPENSSL EC_METHOD FUNCTIONS
1719 int ec_GFp_nistp256_group_init(EC_GROUP *group)
1722 ret = ec_GFp_simple_group_init(group);
1723 group->a_is_minus3 = 1;
1727 int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1728 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1731 BN_CTX *new_ctx = NULL;
1732 BIGNUM *curve_p, *curve_a, *curve_b;
1735 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1737 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1738 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1739 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1740 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1741 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1742 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1743 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1744 (BN_cmp(curve_b, b)))
1746 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1747 EC_R_WRONG_CURVE_PARAMETERS);
1750 group->field_mod_func = BN_nist_mod_256;
1751 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1754 if (new_ctx != NULL)
1755 BN_CTX_free(new_ctx);
1759 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1760 * (X', Y') = (X/Z^2, Y/Z^3) */
1761 int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1762 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1764 felem z1, z2, x_in, y_in;
1765 smallfelem x_out, y_out;
1768 if (EC_POINT_is_at_infinity(group, point))
1770 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1771 EC_R_POINT_AT_INFINITY);
1774 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1775 (!BN_to_felem(z1, &point->Z))) return 0;
1777 felem_square(tmp, z2); felem_reduce(z1, tmp);
1778 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1779 felem_contract(x_out, x_in);
1782 if (!smallfelem_to_BN(x, x_out)) {
1783 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1788 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1789 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1790 felem_contract(y_out, y_in);
1793 if (!smallfelem_to_BN(y, y_out))
1795 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1803 static void make_points_affine(size_t num, smallfelem points[/* num */][3], smallfelem tmp_smallfelems[/* num+1 */])
1805 /* Runs in constant time, unless an input is the point at infinity
1806 * (which normally shouldn't happen). */
1807 ec_GFp_nistp_points_make_affine_internal(
1812 (void (*)(void *)) smallfelem_one,
1813 (int (*)(const void *)) smallfelem_is_zero_int,
1814 (void (*)(void *, const void *)) smallfelem_assign,
1815 (void (*)(void *, const void *)) smallfelem_square_contract,
1816 (void (*)(void *, const void *, const void *)) smallfelem_mul_contract,
1817 (void (*)(void *, const void *)) smallfelem_inv_contract,
1818 (void (*)(void *, const void *)) smallfelem_assign /* nothing to contract */);
1821 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1822 * Result is stored in r (r can equal one of the inputs). */
1823 int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
1824 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1825 const BIGNUM *scalars[], BN_CTX *ctx)
1830 BN_CTX *new_ctx = NULL;
1831 BIGNUM *x, *y, *z, *tmp_scalar;
1832 felem_bytearray g_secret;
1833 felem_bytearray *secrets = NULL;
1834 smallfelem (*pre_comp)[17][3] = NULL;
1835 smallfelem *tmp_smallfelems = NULL;
1836 felem_bytearray tmp;
1837 unsigned i, num_bytes;
1838 int have_pre_comp = 0;
1839 size_t num_points = num;
1840 smallfelem x_in, y_in, z_in;
1841 felem x_out, y_out, z_out;
1842 NISTP256_PRE_COMP *pre = NULL;
1843 const smallfelem (*g_pre_comp)[16][3] = NULL;
1844 EC_POINT *generator = NULL;
1845 const EC_POINT *p = NULL;
1846 const BIGNUM *p_scalar = NULL;
1849 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1851 if (((x = BN_CTX_get(ctx)) == NULL) ||
1852 ((y = BN_CTX_get(ctx)) == NULL) ||
1853 ((z = BN_CTX_get(ctx)) == NULL) ||
1854 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1859 pre = EC_EX_DATA_get_data(group->extra_data,
1860 nistp256_pre_comp_dup, nistp256_pre_comp_free,
1861 nistp256_pre_comp_clear_free);
1863 /* we have precomputation, try to use it */
1864 g_pre_comp = (const smallfelem (*)[16][3]) pre->g_pre_comp;
1866 /* try to use the standard precomputation */
1867 g_pre_comp = &gmul[0];
1868 generator = EC_POINT_new(group);
1869 if (generator == NULL)
1871 /* get the generator from precomputation */
1872 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
1873 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
1874 !smallfelem_to_BN(z, g_pre_comp[0][1][2]))
1876 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1879 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1880 generator, x, y, z, ctx))
1882 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1883 /* precomputation matches generator */
1886 /* we don't have valid precomputation:
1887 * treat the generator as a random point */
1892 if (num_points >= 3)
1894 /* unless we precompute multiples for just one or two points,
1895 * converting those into affine form is time well spent */
1898 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1899 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(smallfelem));
1901 tmp_smallfelems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(smallfelem));
1902 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_smallfelems == NULL)))
1904 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1908 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1909 * i.e., they contribute nothing to the linear combination */
1910 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1911 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(smallfelem));
1912 for (i = 0; i < num_points; ++i)
1915 /* we didn't have a valid precomputation, so we pick
1918 p = EC_GROUP_get0_generator(group);
1922 /* the i^th point */
1925 p_scalar = scalars[i];
1927 if ((p_scalar != NULL) && (p != NULL))
1929 /* reduce scalar to 0 <= scalar < 2^256 */
1930 if ((BN_num_bits(p_scalar) > 256) || (BN_is_negative(p_scalar)))
1932 /* this is an unusual input, and we don't guarantee
1933 * constant-timeness */
1934 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1936 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1939 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1942 num_bytes = BN_bn2bin(p_scalar, tmp);
1943 flip_endian(secrets[i], tmp, num_bytes);
1944 /* precompute multiples */
1945 if ((!BN_to_felem(x_out, &p->X)) ||
1946 (!BN_to_felem(y_out, &p->Y)) ||
1947 (!BN_to_felem(z_out, &p->Z))) goto err;
1948 felem_shrink(pre_comp[i][1][0], x_out);
1949 felem_shrink(pre_comp[i][1][1], y_out);
1950 felem_shrink(pre_comp[i][1][2], z_out);
1951 for (j = 2; j <= 16; ++j)
1956 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1957 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1958 pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1963 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1964 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1970 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
1973 /* the scalar for the generator */
1974 if ((scalar != NULL) && (have_pre_comp))
1976 memset(g_secret, 0, sizeof(g_secret));
1977 /* reduce scalar to 0 <= scalar < 2^256 */
1978 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar)))
1980 /* this is an unusual input, and we don't guarantee
1981 * constant-timeness */
1982 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1984 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1987 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1990 num_bytes = BN_bn2bin(scalar, tmp);
1991 flip_endian(g_secret, tmp, num_bytes);
1992 /* do the multiplication with generator precomputation*/
1993 batch_mul(x_out, y_out, z_out,
1994 (const felem_bytearray (*)) secrets, num_points,
1996 mixed, (const smallfelem (*)[17][3]) pre_comp,
2000 /* do the multiplication without generator precomputation */
2001 batch_mul(x_out, y_out, z_out,
2002 (const felem_bytearray (*)) secrets, num_points,
2003 NULL, mixed, (const smallfelem (*)[17][3]) pre_comp, NULL);
2004 /* reduce the output to its unique minimal representation */
2005 felem_contract(x_in, x_out);
2006 felem_contract(y_in, y_out);
2007 felem_contract(z_in, z_out);
2008 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2009 (!smallfelem_to_BN(z, z_in)))
2011 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2014 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2018 if (generator != NULL)
2019 EC_POINT_free(generator);
2020 if (new_ctx != NULL)
2021 BN_CTX_free(new_ctx);
2022 if (secrets != NULL)
2023 OPENSSL_free(secrets);
2024 if (pre_comp != NULL)
2025 OPENSSL_free(pre_comp);
2026 if (tmp_smallfelems != NULL)
2027 OPENSSL_free(tmp_smallfelems);
2031 int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2034 NISTP256_PRE_COMP *pre = NULL;
2036 BN_CTX *new_ctx = NULL;
2038 EC_POINT *generator = NULL;
2039 smallfelem tmp_smallfelems[32];
2040 felem x_tmp, y_tmp, z_tmp;
2042 /* throw away old precomputation */
2043 EC_EX_DATA_free_data(&group->extra_data, nistp256_pre_comp_dup,
2044 nistp256_pre_comp_free, nistp256_pre_comp_clear_free);
2046 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
2048 if (((x = BN_CTX_get(ctx)) == NULL) ||
2049 ((y = BN_CTX_get(ctx)) == NULL))
2051 /* get the generator */
2052 if (group->generator == NULL) goto err;
2053 generator = EC_POINT_new(group);
2054 if (generator == NULL)
2056 BN_bin2bn(nistp256_curve_params[3], sizeof (felem_bytearray), x);
2057 BN_bin2bn(nistp256_curve_params[4], sizeof (felem_bytearray), y);
2058 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2060 if ((pre = nistp256_pre_comp_new()) == NULL)
2062 /* if the generator is the standard one, use built-in precomputation */
2063 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2065 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2069 if ((!BN_to_felem(x_tmp, &group->generator->X)) ||
2070 (!BN_to_felem(y_tmp, &group->generator->Y)) ||
2071 (!BN_to_felem(z_tmp, &group->generator->Z)))
2073 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2074 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2075 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2076 /* compute 2^64*G, 2^128*G, 2^192*G for the first table,
2077 * 2^32*G, 2^96*G, 2^160*G, 2^224*G for the second one
2079 for (i = 1; i <= 8; i <<= 1)
2082 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2083 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
2084 for (j = 0; j < 31; ++j)
2087 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2088 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2093 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2094 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2095 for (j = 0; j < 31; ++j)
2098 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2099 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
2102 for (i = 0; i < 2; i++)
2104 /* g_pre_comp[i][0] is the point at infinity */
2105 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2106 /* the remaining multiples */
2107 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2109 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], pre->g_pre_comp[i][6][2],
2110 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2111 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2112 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2114 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], pre->g_pre_comp[i][10][2],
2115 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2116 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2117 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2119 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2120 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2121 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2]);
2122 /* 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G */
2124 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], pre->g_pre_comp[i][14][2],
2125 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2126 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2127 for (j = 1; j < 8; ++j)
2129 /* odd multiples: add G resp. 2^32*G */
2131 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1], pre->g_pre_comp[i][2*j+1][2],
2132 pre->g_pre_comp[i][2*j][0], pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
2133 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], pre->g_pre_comp[i][1][2]);
2136 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2138 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup,
2139 nistp256_pre_comp_free, nistp256_pre_comp_clear_free))
2145 if (generator != NULL)
2146 EC_POINT_free(generator);
2147 if (new_ctx != NULL)
2148 BN_CTX_free(new_ctx);
2150 nistp256_pre_comp_free(pre);
2154 int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2156 if (EC_EX_DATA_get_data(group->extra_data, nistp256_pre_comp_dup,
2157 nistp256_pre_comp_free, nistp256_pre_comp_clear_free)
2164 static void *dummy=&dummy;