1 /* crypto/ec/ecp_nistp256.c */
3 * Written by Adam Langley (Google) for the OpenSSL project
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
29 #include <openssl/opensslconf.h>
30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
32 #ifndef OPENSSL_SYS_VMS
39 #include <openssl/err.h>
42 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43 /* even with gcc, the typedef won't work for 32-bit platforms */
44 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
45 typedef __int128_t int128_t;
47 #error "Need GCC 3.1 or later to define type uint128_t"
55 /* The underlying field.
57 * P256 operates over GF(2^256-2^224+2^192+2^96-1). We can serialise an element
58 * of this field into 32 bytes. We call this an felem_bytearray. */
60 typedef u8 felem_bytearray[32];
62 /* These are the parameters of P256, taken from FIPS 186-3, page 86. These
63 * values are big-endian. */
64 static const felem_bytearray nistp256_curve_params[5] = {
65 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
66 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
67 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
69 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
70 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
71 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
73 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
74 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
75 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
76 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
77 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
78 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
79 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
80 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
81 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
82 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
83 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
84 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
88 * The representation of field elements.
89 * ------------------------------------
91 * We represent field elements with either four 128-bit values, eight 128-bit
92 * values, or four 64-bit values. The field element represented is:
93 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
95 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
97 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
98 * apart, but are 128-bits wide, the most significant bits of each limb overlap
99 * with the least significant bits of the next.
101 * A field element with four limbs is an 'felem'. One with eight limbs is a
104 * A field element with four, 64-bit values is called a 'smallfelem'. Small
105 * values are used as intermediate values before multiplication.
110 typedef uint128_t limb;
111 typedef limb felem[NLIMBS];
112 typedef limb longfelem[NLIMBS * 2];
113 typedef u64 smallfelem[NLIMBS];
115 /* This is the value of the prime as four 64-bit words, little-endian. */
116 static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
117 static const u64 bottom63bits = 0x7ffffffffffffffful;
119 /* bin32_to_felem takes a little-endian byte array and converts it into felem
120 * form. This assumes that the CPU is little-endian. */
121 static void bin32_to_felem(felem out, const u8 in[32])
123 out[0] = *((u64*) &in[0]);
124 out[1] = *((u64*) &in[8]);
125 out[2] = *((u64*) &in[16]);
126 out[3] = *((u64*) &in[24]);
129 /* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian,
130 * 32 byte array. This assumes that the CPU is little-endian. */
131 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
133 *((u64*) &out[0]) = in[0];
134 *((u64*) &out[8]) = in[1];
135 *((u64*) &out[16]) = in[2];
136 *((u64*) &out[24]) = in[3];
139 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
140 static void flip_endian(u8 *out, const u8 *in, unsigned len)
143 for (i = 0; i < len; ++i)
144 out[i] = in[len-1-i];
147 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
148 static int BN_to_felem(felem out, const BIGNUM *bn)
150 felem_bytearray b_in;
151 felem_bytearray b_out;
154 /* BN_bn2bin eats leading zeroes */
155 memset(b_out, 0, sizeof b_out);
156 num_bytes = BN_num_bytes(bn);
157 if (num_bytes > sizeof b_out)
159 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
162 if (BN_is_negative(bn))
164 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
167 num_bytes = BN_bn2bin(bn, b_in);
168 flip_endian(b_out, b_in, num_bytes);
169 bin32_to_felem(out, b_out);
173 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
174 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
176 felem_bytearray b_in, b_out;
177 smallfelem_to_bin32(b_in, in);
178 flip_endian(b_out, b_in, sizeof b_out);
179 return BN_bin2bn(b_out, sizeof b_out, out);
188 static void smallfelem_one(smallfelem out)
196 static void smallfelem_assign(smallfelem out, const smallfelem in)
204 static void felem_assign(felem out, const felem in)
212 /* felem_sum sets out = out + in. */
213 static void felem_sum(felem out, const felem in)
221 /* felem_small_sum sets out = out + in. */
222 static void felem_small_sum(felem out, const smallfelem in)
230 /* felem_scalar sets out = out * scalar */
231 static void felem_scalar(felem out, const u64 scalar)
239 /* longfelem_scalar sets out = out * scalar */
240 static void longfelem_scalar(longfelem out, const u64 scalar)
252 #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
253 #define two105 (((limb)1) << 105)
254 #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
256 /* zero105 is 0 mod p */
257 static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
260 * smallfelem_neg sets |out| to |-small|
262 * out[i] < out[i] + 2^105
264 static void smallfelem_neg(felem out, const smallfelem small)
266 /* In order to prevent underflow, we subtract from 0 mod p. */
267 out[0] = zero105[0] - small[0];
268 out[1] = zero105[1] - small[1];
269 out[2] = zero105[2] - small[2];
270 out[3] = zero105[3] - small[3];
274 * felem_diff subtracts |in| from |out|
278 * out[i] < out[i] + 2^105
280 static void felem_diff(felem out, const felem in)
282 /* In order to prevent underflow, we add 0 mod p before subtracting. */
283 out[0] += zero105[0];
284 out[1] += zero105[1];
285 out[2] += zero105[2];
286 out[3] += zero105[3];
294 #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
295 #define two107 (((limb)1) << 107)
296 #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
298 /* zero107 is 0 mod p */
299 static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 };
302 * An alternative felem_diff for larger inputs |in|
303 * felem_diff_zero107 subtracts |in| from |out|
307 * out[i] < out[i] + 2^107
309 static void felem_diff_zero107(felem out, const felem in)
311 /* In order to prevent underflow, we add 0 mod p before subtracting. */
312 out[0] += zero107[0];
313 out[1] += zero107[1];
314 out[2] += zero107[2];
315 out[3] += zero107[3];
324 * longfelem_diff subtracts |in| from |out|
328 * out[i] < out[i] + 2^70 + 2^40
330 static void longfelem_diff(longfelem out, const longfelem in)
332 static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6);
333 static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40);
334 static const limb two70 = (((limb)1) << 70);
335 static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6);
336 static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6);
338 /* add 0 mod p to avoid underflow */
342 out[3] += two70m40m38p6;
348 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
359 #define two64m0 (((limb)1) << 64) - 1
360 #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
361 #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
362 #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
364 /* zero110 is 0 mod p */
365 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
368 * felem_shrink converts an felem into a smallfelem. The result isn't quite
369 * minimal as the value may be greater than p.
376 static void felem_shrink(smallfelem out, const felem in)
381 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
384 tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64));
387 tmp[2] = zero110[2] + (u64) in[2];
388 tmp[0] = zero110[0] + in[0];
389 tmp[1] = zero110[1] + in[1];
390 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
392 /* We perform two partial reductions where we eliminate the
393 * high-word of tmp[3]. We don't update the other words till the end.
395 a = tmp[3] >> 64; /* a < 2^46 */
396 tmp[3] = (u64) tmp[3];
398 tmp[3] += ((limb)a) << 32;
402 a = tmp[3] >> 64; /* a < 2^15 */
403 b += a; /* b < 2^46 + 2^15 < 2^47 */
404 tmp[3] = (u64) tmp[3];
406 tmp[3] += ((limb)a) << 32;
407 /* tmp[3] < 2^64 + 2^47 */
409 /* This adjusts the other two words to complete the two partial
412 tmp[1] -= (((limb)b) << 32);
414 /* In order to make space in tmp[3] for the carry from 2 -> 3, we
415 * conditionally subtract kPrime if tmp[3] is large enough. */
417 /* As tmp[3] < 2^65, high is either 1 or 0 */
422 * all ones if the high word of tmp[3] is 1
423 * all zeros if the high word of tmp[3] if 0 */
428 * all ones if the MSB of low is 1
429 * all zeros if the MSB of low if 0 */
432 /* if low was greater than kPrime3Test then the MSB is zero */
437 * all ones if low was > kPrime3Test
438 * all zeros if low was <= kPrime3Test */
439 mask = (mask & low) | high;
440 tmp[0] -= mask & kPrime[0];
441 tmp[1] -= mask & kPrime[1];
442 /* kPrime[2] is zero, so omitted */
443 tmp[3] -= mask & kPrime[3];
444 /* tmp[3] < 2**64 - 2**32 + 1 */
446 tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0];
447 tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1];
448 tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2];
457 /* smallfelem_expand converts a smallfelem to an felem */
458 static void smallfelem_expand(felem out, const smallfelem in)
467 * smallfelem_square sets |out| = |small|^2
471 * out[i] < 7 * 2^64 < 2^67
473 static void smallfelem_square(longfelem out, const smallfelem small)
478 a = ((uint128_t) small[0]) * small[0];
484 a = ((uint128_t) small[0]) * small[1];
491 a = ((uint128_t) small[0]) * small[2];
498 a = ((uint128_t) small[0]) * small[3];
504 a = ((uint128_t) small[1]) * small[2];
511 a = ((uint128_t) small[1]) * small[1];
517 a = ((uint128_t) small[1]) * small[3];
524 a = ((uint128_t) small[2]) * small[3];
532 a = ((uint128_t) small[2]) * small[2];
538 a = ((uint128_t) small[3]) * small[3];
546 * felem_square sets |out| = |in|^2
550 * out[i] < 7 * 2^64 < 2^67
552 static void felem_square(longfelem out, const felem in)
555 felem_shrink(small, in);
556 smallfelem_square(out, small);
560 * smallfelem_mul sets |out| = |small1| * |small2|
565 * out[i] < 7 * 2^64 < 2^67
567 static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2)
572 a = ((uint128_t) small1[0]) * small2[0];
579 a = ((uint128_t) small1[0]) * small2[1];
585 a = ((uint128_t) small1[1]) * small2[0];
592 a = ((uint128_t) small1[0]) * small2[2];
598 a = ((uint128_t) small1[1]) * small2[1];
604 a = ((uint128_t) small1[2]) * small2[0];
611 a = ((uint128_t) small1[0]) * small2[3];
617 a = ((uint128_t) small1[1]) * small2[2];
623 a = ((uint128_t) small1[2]) * small2[1];
629 a = ((uint128_t) small1[3]) * small2[0];
636 a = ((uint128_t) small1[1]) * small2[3];
642 a = ((uint128_t) small1[2]) * small2[2];
648 a = ((uint128_t) small1[3]) * small2[1];
655 a = ((uint128_t) small1[2]) * small2[3];
661 a = ((uint128_t) small1[3]) * small2[2];
668 a = ((uint128_t) small1[3]) * small2[3];
676 * felem_mul sets |out| = |in1| * |in2|
681 * out[i] < 7 * 2^64 < 2^67
683 static void felem_mul(longfelem out, const felem in1, const felem in2)
685 smallfelem small1, small2;
686 felem_shrink(small1, in1);
687 felem_shrink(small2, in2);
688 smallfelem_mul(out, small1, small2);
692 * felem_small_mul sets |out| = |small1| * |in2|
697 * out[i] < 7 * 2^64 < 2^67
699 static void felem_small_mul(longfelem out, const smallfelem small1, const felem in2)
702 felem_shrink(small2, in2);
703 smallfelem_mul(out, small1, small2);
706 #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
707 #define two100 (((limb)1) << 100)
708 #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
709 /* zero100 is 0 mod p */
710 static const felem zero100 = { two100m36m4, two100, two100m36p4, two100m36p4 };
713 * Internal function for the different flavours of felem_reduce.
714 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
716 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
717 * out[1] >= in[7] + 2^32*in[4]
718 * out[2] >= in[5] + 2^32*in[5]
719 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
721 * out[0] <= out[0] + in[4] + 2^32*in[5]
722 * out[1] <= out[1] + in[5] + 2^33*in[6]
723 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
724 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
726 static void felem_reduce_(felem out, const longfelem in)
729 /* combine common terms from below */
730 c = in[4] + (in[5] << 32);
738 /* the remaining terms */
739 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
740 out[1] -= (in[4] << 32);
741 out[3] += (in[4] << 32);
743 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
744 out[2] -= (in[5] << 32);
746 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
748 out[0] -= (in[6] << 32);
749 out[1] += (in[6] << 33);
750 out[2] += (in[6] * 2);
751 out[3] -= (in[6] << 32);
753 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
755 out[0] -= (in[7] << 32);
756 out[2] += (in[7] << 33);
757 out[3] += (in[7] * 3);
761 * felem_reduce converts a longfelem into an felem.
762 * To be called directly after felem_square or felem_mul.
764 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
765 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
769 static void felem_reduce(felem out, const longfelem in)
771 out[0] = zero100[0] + in[0];
772 out[1] = zero100[1] + in[1];
773 out[2] = zero100[2] + in[2];
774 out[3] = zero100[3] + in[3];
776 felem_reduce_(out, in);
779 * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
780 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
781 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
782 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
784 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
785 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
786 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
787 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
792 * felem_reduce_zero105 converts a larger longfelem into an felem.
798 static void felem_reduce_zero105(felem out, const longfelem in)
800 out[0] = zero105[0] + in[0];
801 out[1] = zero105[1] + in[1];
802 out[2] = zero105[2] + in[2];
803 out[3] = zero105[3] + in[3];
805 felem_reduce_(out, in);
808 * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
809 * out[1] > 2^105 - 2^71 - 2^103 > 0
810 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
811 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
813 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
814 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
815 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
816 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
820 /* subtract_u64 sets *result = *result - v and *carry to one if the subtraction
822 static void subtract_u64(u64* result, u64* carry, u64 v)
824 uint128_t r = *result;
826 *carry = (r >> 64) & 1;
830 /* felem_contract converts |in| to its unique, minimal representation.
834 static void felem_contract(smallfelem out, const felem in)
837 u64 all_equal_so_far = 0, result = 0, carry;
839 felem_shrink(out, in);
840 /* small is minimal except that the value might be > p */
843 /* We are doing a constant time test if out >= kPrime. We need to
844 * compare each u64, from most-significant to least significant. For
845 * each one, if all words so far have been equal (m is all ones) then a
846 * non-equal result is the answer. Otherwise we continue. */
847 for (i = 3; i < 4; i--)
850 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
851 /* if out[i] > kPrime[i] then a will underflow and the high
852 * 64-bits will all be set. */
853 result |= all_equal_so_far & ((u64) (a >> 64));
855 /* if kPrime[i] == out[i] then |equal| will be all zeros and
856 * the decrement will make it all ones. */
857 equal = kPrime[i] ^ out[i];
859 equal &= equal << 32;
860 equal &= equal << 16;
865 equal = ((s64) equal) >> 63;
867 all_equal_so_far &= equal;
870 /* if all_equal_so_far is still all ones then the two values are equal
871 * and so out >= kPrime is true. */
872 result |= all_equal_so_far;
874 /* if out >= kPrime then we subtract kPrime. */
875 subtract_u64(&out[0], &carry, result & kPrime[0]);
876 subtract_u64(&out[1], &carry, carry);
877 subtract_u64(&out[2], &carry, carry);
878 subtract_u64(&out[3], &carry, carry);
880 subtract_u64(&out[1], &carry, result & kPrime[1]);
881 subtract_u64(&out[2], &carry, carry);
882 subtract_u64(&out[3], &carry, carry);
884 subtract_u64(&out[2], &carry, result & kPrime[2]);
885 subtract_u64(&out[3], &carry, carry);
887 subtract_u64(&out[3], &carry, result & kPrime[3]);
890 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
895 smallfelem_square(longtmp, in);
896 felem_reduce(tmp, longtmp);
897 felem_contract(out, tmp);
900 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1, const smallfelem in2)
905 smallfelem_mul(longtmp, in1, in2);
906 felem_reduce(tmp, longtmp);
907 felem_contract(out, tmp);
911 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
916 static limb smallfelem_is_zero(const smallfelem small)
921 u64 is_zero = small[0] | small[1] | small[2] | small[3];
923 is_zero &= is_zero << 32;
924 is_zero &= is_zero << 16;
925 is_zero &= is_zero << 8;
926 is_zero &= is_zero << 4;
927 is_zero &= is_zero << 2;
928 is_zero &= is_zero << 1;
929 is_zero = ((s64) is_zero) >> 63;
931 is_p = (small[0] ^ kPrime[0]) |
932 (small[1] ^ kPrime[1]) |
933 (small[2] ^ kPrime[2]) |
934 (small[3] ^ kPrime[3]);
942 is_p = ((s64) is_p) >> 63;
947 result |= ((limb) is_zero) << 64;
951 static int smallfelem_is_zero_int(const smallfelem small)
953 return (int) (smallfelem_is_zero(small) & ((limb)1));
957 * felem_inv calculates |out| = |in|^{-1}
959 * Based on Fermat's Little Theorem:
961 * a^{p-1} = 1 (mod p)
962 * a^{p-2} = a^{-1} (mod p)
964 static void felem_inv(felem out, const felem in)
967 /* each e_I will hold |in|^{2^I - 1} */
968 felem e2, e4, e8, e16, e32, e64;
972 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
973 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
974 felem_assign(e2, ftmp);
975 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
976 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
977 felem_mul(tmp, ftmp, e2); felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
978 felem_assign(e4, ftmp);
979 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
980 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
981 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
982 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
983 felem_mul(tmp, ftmp, e4); felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
984 felem_assign(e8, ftmp);
985 for (i = 0; i < 8; i++) {
986 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
988 felem_mul(tmp, ftmp, e8); felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
989 felem_assign(e16, ftmp);
990 for (i = 0; i < 16; i++) {
991 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
993 felem_mul(tmp, ftmp, e16); felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
994 felem_assign(e32, ftmp);
995 for (i = 0; i < 32; i++) {
996 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
998 felem_assign(e64, ftmp);
999 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
1000 for (i = 0; i < 192; i++) {
1001 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
1002 } /* 2^256 - 2^224 + 2^192 */
1004 felem_mul(tmp, e64, e32); felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
1005 for (i = 0; i < 16; i++) {
1006 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
1008 felem_mul(tmp, ftmp2, e16); felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
1009 for (i = 0; i < 8; i++) {
1010 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
1012 felem_mul(tmp, ftmp2, e8); felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
1013 for (i = 0; i < 4; i++) {
1014 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
1016 felem_mul(tmp, ftmp2, e4); felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
1017 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
1018 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
1019 felem_mul(tmp, ftmp2, e2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
1020 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
1021 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1022 felem_mul(tmp, ftmp2, in); felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1024 felem_mul(tmp, ftmp2, ftmp); felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1027 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1031 smallfelem_expand(tmp, in);
1032 felem_inv(tmp, tmp);
1033 felem_contract(out, tmp);
1040 * Building on top of the field operations we have the operations on the
1041 * elliptic curve group itself. Points on the curve are represented in Jacobian
1045 * point_double calculates 2*(x_in, y_in, z_in)
1047 * The method is taken from:
1048 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1050 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1051 * while x_out == y_in is not (maybe this works, but it's not tested). */
1053 point_double(felem x_out, felem y_out, felem z_out,
1054 const felem x_in, const felem y_in, const felem z_in)
1056 longfelem tmp, tmp2;
1057 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1058 smallfelem small1, small2;
1060 felem_assign(ftmp, x_in);
1061 /* ftmp[i] < 2^106 */
1062 felem_assign(ftmp2, x_in);
1063 /* ftmp2[i] < 2^106 */
1066 felem_square(tmp, z_in);
1067 felem_reduce(delta, tmp);
1068 /* delta[i] < 2^101 */
1071 felem_square(tmp, y_in);
1072 felem_reduce(gamma, tmp);
1073 /* gamma[i] < 2^101 */
1074 felem_shrink(small1, gamma);
1076 /* beta = x*gamma */
1077 felem_small_mul(tmp, small1, x_in);
1078 felem_reduce(beta, tmp);
1079 /* beta[i] < 2^101 */
1081 /* alpha = 3*(x-delta)*(x+delta) */
1082 felem_diff(ftmp, delta);
1083 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1084 felem_sum(ftmp2, delta);
1085 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1086 felem_scalar(ftmp2, 3);
1087 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1088 felem_mul(tmp, ftmp, ftmp2);
1089 felem_reduce(alpha, tmp);
1090 /* alpha[i] < 2^101 */
1091 felem_shrink(small2, alpha);
1093 /* x' = alpha^2 - 8*beta */
1094 smallfelem_square(tmp, small2);
1095 felem_reduce(x_out, tmp);
1096 felem_assign(ftmp, beta);
1097 felem_scalar(ftmp, 8);
1098 /* ftmp[i] < 8 * 2^101 = 2^104 */
1099 felem_diff(x_out, ftmp);
1100 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1102 /* z' = (y + z)^2 - gamma - delta */
1103 felem_sum(delta, gamma);
1104 /* delta[i] < 2^101 + 2^101 = 2^102 */
1105 felem_assign(ftmp, y_in);
1106 felem_sum(ftmp, z_in);
1107 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1108 felem_square(tmp, ftmp);
1109 felem_reduce(z_out, tmp);
1110 felem_diff(z_out, delta);
1111 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1113 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1114 felem_scalar(beta, 4);
1115 /* beta[i] < 4 * 2^101 = 2^103 */
1116 felem_diff_zero107(beta, x_out);
1117 /* beta[i] < 2^107 + 2^103 < 2^108 */
1118 felem_small_mul(tmp, small2, beta);
1119 /* tmp[i] < 7 * 2^64 < 2^67 */
1120 smallfelem_square(tmp2, small1);
1121 /* tmp2[i] < 7 * 2^64 */
1122 longfelem_scalar(tmp2, 8);
1123 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1124 longfelem_diff(tmp, tmp2);
1125 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1126 felem_reduce_zero105(y_out, tmp);
1127 /* y_out[i] < 2^106 */
1130 /* point_double_small is the same as point_double, except that it operates on
1133 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1134 const smallfelem x_in, const smallfelem y_in, const smallfelem z_in)
1136 felem felem_x_out, felem_y_out, felem_z_out;
1137 felem felem_x_in, felem_y_in, felem_z_in;
1139 smallfelem_expand(felem_x_in, x_in);
1140 smallfelem_expand(felem_y_in, y_in);
1141 smallfelem_expand(felem_z_in, z_in);
1142 point_double(felem_x_out, felem_y_out, felem_z_out,
1143 felem_x_in, felem_y_in, felem_z_in);
1144 felem_shrink(x_out, felem_x_out);
1145 felem_shrink(y_out, felem_y_out);
1146 felem_shrink(z_out, felem_z_out);
1149 /* copy_conditional copies in to out iff mask is all ones. */
1151 copy_conditional(felem out, const felem in, limb mask)
1154 for (i = 0; i < NLIMBS; ++i)
1156 const limb tmp = mask & (in[i] ^ out[i]);
1161 /* copy_small_conditional copies in to out iff mask is all ones. */
1163 copy_small_conditional(felem out, const smallfelem in, limb mask)
1166 const u64 mask64 = mask;
1167 for (i = 0; i < NLIMBS; ++i)
1169 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1174 * point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1176 * The method is taken from:
1177 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1178 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1180 * This function includes a branch for checking whether the two input points
1181 * are equal, (while not equal to the point at infinity). This case never
1182 * happens during single point multiplication, so there is no timing leak for
1183 * ECDH or ECDSA signing. */
1184 static void point_add(felem x3, felem y3, felem z3,
1185 const felem x1, const felem y1, const felem z1,
1186 const int mixed, const smallfelem x2, const smallfelem y2, const smallfelem z2)
1188 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1189 longfelem tmp, tmp2;
1190 smallfelem small1, small2, small3, small4, small5;
1191 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1193 felem_shrink(small3, z1);
1195 z1_is_zero = smallfelem_is_zero(small3);
1196 z2_is_zero = smallfelem_is_zero(z2);
1198 /* ftmp = z1z1 = z1**2 */
1199 smallfelem_square(tmp, small3);
1200 felem_reduce(ftmp, tmp);
1201 /* ftmp[i] < 2^101 */
1202 felem_shrink(small1, ftmp);
1206 /* ftmp2 = z2z2 = z2**2 */
1207 smallfelem_square(tmp, z2);
1208 felem_reduce(ftmp2, tmp);
1209 /* ftmp2[i] < 2^101 */
1210 felem_shrink(small2, ftmp2);
1212 felem_shrink(small5, x1);
1214 /* u1 = ftmp3 = x1*z2z2 */
1215 smallfelem_mul(tmp, small5, small2);
1216 felem_reduce(ftmp3, tmp);
1217 /* ftmp3[i] < 2^101 */
1219 /* ftmp5 = z1 + z2 */
1220 felem_assign(ftmp5, z1);
1221 felem_small_sum(ftmp5, z2);
1222 /* ftmp5[i] < 2^107 */
1224 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1225 felem_square(tmp, ftmp5);
1226 felem_reduce(ftmp5, tmp);
1227 /* ftmp2 = z2z2 + z1z1 */
1228 felem_sum(ftmp2, ftmp);
1229 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1230 felem_diff(ftmp5, ftmp2);
1231 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1233 /* ftmp2 = z2 * z2z2 */
1234 smallfelem_mul(tmp, small2, z2);
1235 felem_reduce(ftmp2, tmp);
1237 /* s1 = ftmp2 = y1 * z2**3 */
1238 felem_mul(tmp, y1, ftmp2);
1239 felem_reduce(ftmp6, tmp);
1240 /* ftmp6[i] < 2^101 */
1244 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1246 /* u1 = ftmp3 = x1*z2z2 */
1247 felem_assign(ftmp3, x1);
1248 /* ftmp3[i] < 2^106 */
1251 felem_assign(ftmp5, z1);
1252 felem_scalar(ftmp5, 2);
1253 /* ftmp5[i] < 2*2^106 = 2^107 */
1255 /* s1 = ftmp2 = y1 * z2**3 */
1256 felem_assign(ftmp6, y1);
1257 /* ftmp6[i] < 2^106 */
1261 smallfelem_mul(tmp, x2, small1);
1262 felem_reduce(ftmp4, tmp);
1264 /* h = ftmp4 = u2 - u1 */
1265 felem_diff_zero107(ftmp4, ftmp3);
1266 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1267 felem_shrink(small4, ftmp4);
1269 x_equal = smallfelem_is_zero(small4);
1271 /* z_out = ftmp5 * h */
1272 felem_small_mul(tmp, small4, ftmp5);
1273 felem_reduce(z_out, tmp);
1274 /* z_out[i] < 2^101 */
1276 /* ftmp = z1 * z1z1 */
1277 smallfelem_mul(tmp, small1, small3);
1278 felem_reduce(ftmp, tmp);
1280 /* s2 = tmp = y2 * z1**3 */
1281 felem_small_mul(tmp, y2, ftmp);
1282 felem_reduce(ftmp5, tmp);
1284 /* r = ftmp5 = (s2 - s1)*2 */
1285 felem_diff_zero107(ftmp5, ftmp6);
1286 /* ftmp5[i] < 2^107 + 2^107 = 2^108*/
1287 felem_scalar(ftmp5, 2);
1288 /* ftmp5[i] < 2^109 */
1289 felem_shrink(small1, ftmp5);
1290 y_equal = smallfelem_is_zero(small1);
1292 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1294 point_double(x3, y3, z3, x1, y1, z1);
1298 /* I = ftmp = (2h)**2 */
1299 felem_assign(ftmp, ftmp4);
1300 felem_scalar(ftmp, 2);
1301 /* ftmp[i] < 2*2^108 = 2^109 */
1302 felem_square(tmp, ftmp);
1303 felem_reduce(ftmp, tmp);
1305 /* J = ftmp2 = h * I */
1306 felem_mul(tmp, ftmp4, ftmp);
1307 felem_reduce(ftmp2, tmp);
1309 /* V = ftmp4 = U1 * I */
1310 felem_mul(tmp, ftmp3, ftmp);
1311 felem_reduce(ftmp4, tmp);
1313 /* x_out = r**2 - J - 2V */
1314 smallfelem_square(tmp, small1);
1315 felem_reduce(x_out, tmp);
1316 felem_assign(ftmp3, ftmp4);
1317 felem_scalar(ftmp4, 2);
1318 felem_sum(ftmp4, ftmp2);
1319 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1320 felem_diff(x_out, ftmp4);
1321 /* x_out[i] < 2^105 + 2^101 */
1323 /* y_out = r(V-x_out) - 2 * s1 * J */
1324 felem_diff_zero107(ftmp3, x_out);
1325 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1326 felem_small_mul(tmp, small1, ftmp3);
1327 felem_mul(tmp2, ftmp6, ftmp2);
1328 longfelem_scalar(tmp2, 2);
1329 /* tmp2[i] < 2*2^67 = 2^68 */
1330 longfelem_diff(tmp, tmp2);
1331 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1332 felem_reduce_zero105(y_out, tmp);
1333 /* y_out[i] < 2^106 */
1335 copy_small_conditional(x_out, x2, z1_is_zero);
1336 copy_conditional(x_out, x1, z2_is_zero);
1337 copy_small_conditional(y_out, y2, z1_is_zero);
1338 copy_conditional(y_out, y1, z2_is_zero);
1339 copy_small_conditional(z_out, z2, z1_is_zero);
1340 copy_conditional(z_out, z1, z2_is_zero);
1341 felem_assign(x3, x_out);
1342 felem_assign(y3, y_out);
1343 felem_assign(z3, z_out);
1346 /* point_add_small is the same as point_add, except that it operates on
1348 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1349 smallfelem x1, smallfelem y1, smallfelem z1,
1350 smallfelem x2, smallfelem y2, smallfelem z2)
1352 felem felem_x3, felem_y3, felem_z3;
1353 felem felem_x1, felem_y1, felem_z1;
1354 smallfelem_expand(felem_x1, x1);
1355 smallfelem_expand(felem_y1, y1);
1356 smallfelem_expand(felem_z1, z1);
1357 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0, x2, y2, z2);
1358 felem_shrink(x3, felem_x3);
1359 felem_shrink(y3, felem_y3);
1360 felem_shrink(z3, felem_z3);
1364 * Base point pre computation
1365 * --------------------------
1367 * Two different sorts of precomputed tables are used in the following code.
1368 * Each contain various points on the curve, where each point is three field
1369 * elements (x, y, z).
1371 * For the base point table, z is usually 1 (0 for the point at infinity).
1372 * This table has 2 * 16 elements, starting with the following:
1373 * index | bits | point
1374 * ------+---------+------------------------------
1377 * 2 | 0 0 1 0 | 2^64G
1378 * 3 | 0 0 1 1 | (2^64 + 1)G
1379 * 4 | 0 1 0 0 | 2^128G
1380 * 5 | 0 1 0 1 | (2^128 + 1)G
1381 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1382 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1383 * 8 | 1 0 0 0 | 2^192G
1384 * 9 | 1 0 0 1 | (2^192 + 1)G
1385 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1386 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1387 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1388 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1389 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1390 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1391 * followed by a copy of this with each element multiplied by 2^32.
1393 * The reason for this is so that we can clock bits into four different
1394 * locations when doing simple scalar multiplies against the base point,
1395 * and then another four locations using the second 16 elements.
1397 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1399 /* gmul is the table of precomputed base points */
1400 static const smallfelem gmul[2][16][3] = {
1404 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247},
1405 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b},
1407 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5},
1408 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d},
1410 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f},
1411 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644},
1413 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67},
1414 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee},
1416 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff},
1417 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b},
1419 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8},
1420 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851},
1422 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea},
1423 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b},
1425 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276},
1426 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816},
1428 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad},
1429 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663},
1431 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d},
1432 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321},
1434 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287},
1435 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6},
1437 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466},
1438 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20},
1440 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9},
1441 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61},
1443 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a},
1444 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc},
1446 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c},
1447 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab},
1452 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89},
1453 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624},
1455 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6},
1456 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1},
1458 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a},
1459 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593},
1461 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617},
1462 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7},
1464 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276},
1465 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a},
1467 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908},
1468 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e},
1470 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7},
1471 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec},
1473 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee},
1474 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6},
1476 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109},
1477 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5},
1479 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba},
1480 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44},
1482 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b},
1483 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc},
1485 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107},
1486 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387},
1488 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503},
1489 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be},
1491 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9},
1492 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a},
1494 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6},
1495 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81},
1498 /* select_point selects the |idx|th point from a precomputation table and
1499 * copies it to out. */
1500 static void select_point(const u64 idx, unsigned int size, const smallfelem pre_comp[16][3], smallfelem out[3])
1503 u64 *outlimbs = &out[0][0];
1504 memset(outlimbs, 0, 3 * sizeof(smallfelem));
1506 for (i = 0; i < size; i++)
1508 const u64 *inlimbs = (u64*) &pre_comp[i][0][0];
1515 for (j = 0; j < NLIMBS * 3; j++)
1516 outlimbs[j] |= inlimbs[j] & mask;
1520 /* get_bit returns the |i|th bit in |in| */
1521 static char get_bit(const felem_bytearray in, int i)
1523 if ((i < 0) || (i >= 256))
1525 return (in[i >> 3] >> (i & 7)) & 1;
1528 /* Interleaved point multiplication using precomputed point multiples:
1529 * The small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[],
1530 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1531 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1532 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1533 static void batch_mul(felem x_out, felem y_out, felem z_out,
1534 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1535 const int mixed, const smallfelem pre_comp[][17][3], const smallfelem g_pre_comp[2][16][3])
1538 unsigned num, gen_mul = (g_scalar != NULL);
1544 /* set nq to the point at infinity */
1545 memset(nq, 0, 3 * sizeof(felem));
1547 /* Loop over all scalars msb-to-lsb, interleaving additions
1548 * of multiples of the generator (two in each of the last 32 rounds)
1549 * and additions of other points multiples (every 5th round).
1551 skip = 1; /* save two point operations in the first round */
1552 for (i = (num_points ? 255 : 31); i >= 0; --i)
1556 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1558 /* add multiples of the generator */
1559 if (gen_mul && (i <= 31))
1561 /* first, look 32 bits upwards */
1562 bits = get_bit(g_scalar, i + 224) << 3;
1563 bits |= get_bit(g_scalar, i + 160) << 2;
1564 bits |= get_bit(g_scalar, i + 96) << 1;
1565 bits |= get_bit(g_scalar, i + 32);
1566 /* select the point to add, in constant time */
1567 select_point(bits, 16, g_pre_comp[1], tmp);
1571 /* Arg 1 below is for "mixed" */
1572 point_add(nq[0], nq[1], nq[2],
1573 nq[0], nq[1], nq[2],
1574 1, tmp[0], tmp[1], tmp[2]);
1578 smallfelem_expand(nq[0], tmp[0]);
1579 smallfelem_expand(nq[1], tmp[1]);
1580 smallfelem_expand(nq[2], tmp[2]);
1584 /* second, look at the current position */
1585 bits = get_bit(g_scalar, i + 192) << 3;
1586 bits |= get_bit(g_scalar, i + 128) << 2;
1587 bits |= get_bit(g_scalar, i + 64) << 1;
1588 bits |= get_bit(g_scalar, i);
1589 /* select the point to add, in constant time */
1590 select_point(bits, 16, g_pre_comp[0], tmp);
1591 /* Arg 1 below is for "mixed" */
1592 point_add(nq[0], nq[1], nq[2],
1593 nq[0], nq[1], nq[2],
1594 1, tmp[0], tmp[1], tmp[2]);
1597 /* do other additions every 5 doublings */
1598 if (num_points && (i % 5 == 0))
1600 /* loop over all scalars */
1601 for (num = 0; num < num_points; ++num)
1603 bits = get_bit(scalars[num], i + 4) << 5;
1604 bits |= get_bit(scalars[num], i + 3) << 4;
1605 bits |= get_bit(scalars[num], i + 2) << 3;
1606 bits |= get_bit(scalars[num], i + 1) << 2;
1607 bits |= get_bit(scalars[num], i) << 1;
1608 bits |= get_bit(scalars[num], i - 1);
1609 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1611 /* select the point to add or subtract, in constant time */
1612 select_point(digit, 17, pre_comp[num], tmp);
1613 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative point */
1614 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1615 felem_contract(tmp[1], ftmp);
1619 point_add(nq[0], nq[1], nq[2],
1620 nq[0], nq[1], nq[2],
1621 mixed, tmp[0], tmp[1], tmp[2]);
1625 smallfelem_expand(nq[0], tmp[0]);
1626 smallfelem_expand(nq[1], tmp[1]);
1627 smallfelem_expand(nq[2], tmp[2]);
1633 felem_assign(x_out, nq[0]);
1634 felem_assign(y_out, nq[1]);
1635 felem_assign(z_out, nq[2]);
1638 /* Precomputation for the group generator. */
1640 smallfelem g_pre_comp[2][16][3];
1642 } NISTP256_PRE_COMP;
1644 const EC_METHOD *EC_GFp_nistp256_method(void)
1646 static const EC_METHOD ret = {
1647 EC_FLAGS_DEFAULT_OCT,
1648 NID_X9_62_prime_field,
1649 ec_GFp_nistp256_group_init,
1650 ec_GFp_simple_group_finish,
1651 ec_GFp_simple_group_clear_finish,
1652 ec_GFp_nist_group_copy,
1653 ec_GFp_nistp256_group_set_curve,
1654 ec_GFp_simple_group_get_curve,
1655 ec_GFp_simple_group_get_degree,
1656 ec_GFp_simple_group_check_discriminant,
1657 ec_GFp_simple_point_init,
1658 ec_GFp_simple_point_finish,
1659 ec_GFp_simple_point_clear_finish,
1660 ec_GFp_simple_point_copy,
1661 ec_GFp_simple_point_set_to_infinity,
1662 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1663 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1664 ec_GFp_simple_point_set_affine_coordinates,
1665 ec_GFp_nistp256_point_get_affine_coordinates,
1666 0 /* point_set_compressed_coordinates */,
1671 ec_GFp_simple_invert,
1672 ec_GFp_simple_is_at_infinity,
1673 ec_GFp_simple_is_on_curve,
1675 ec_GFp_simple_make_affine,
1676 ec_GFp_simple_points_make_affine,
1677 ec_GFp_nistp256_points_mul,
1678 ec_GFp_nistp256_precompute_mult,
1679 ec_GFp_nistp256_have_precompute_mult,
1680 ec_GFp_nist_field_mul,
1681 ec_GFp_nist_field_sqr,
1683 0 /* field_encode */,
1684 0 /* field_decode */,
1685 0 /* field_set_to_one */ };
1690 /******************************************************************************/
1691 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1694 static NISTP256_PRE_COMP *nistp256_pre_comp_new()
1696 NISTP256_PRE_COMP *ret = NULL;
1697 ret = (NISTP256_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1700 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1703 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1704 ret->references = 1;
1708 static void *nistp256_pre_comp_dup(void *src_)
1710 NISTP256_PRE_COMP *src = src_;
1712 /* no need to actually copy, these objects never change! */
1713 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1718 static void nistp256_pre_comp_free(void *pre_)
1721 NISTP256_PRE_COMP *pre = pre_;
1726 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1733 static void nistp256_pre_comp_clear_free(void *pre_)
1736 NISTP256_PRE_COMP *pre = pre_;
1741 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1745 OPENSSL_cleanse(pre, sizeof *pre);
1749 /******************************************************************************/
1750 /* OPENSSL EC_METHOD FUNCTIONS
1753 int ec_GFp_nistp256_group_init(EC_GROUP *group)
1756 ret = ec_GFp_simple_group_init(group);
1757 group->a_is_minus3 = 1;
1761 int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1762 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1765 BN_CTX *new_ctx = NULL;
1766 BIGNUM *curve_p, *curve_a, *curve_b;
1769 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1771 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1772 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1773 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1774 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1775 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1776 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1777 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1778 (BN_cmp(curve_b, b)))
1780 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1781 EC_R_WRONG_CURVE_PARAMETERS);
1784 group->field_mod_func = BN_nist_mod_256;
1785 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1788 if (new_ctx != NULL)
1789 BN_CTX_free(new_ctx);
1793 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1794 * (X', Y') = (X/Z^2, Y/Z^3) */
1795 int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1796 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1798 felem z1, z2, x_in, y_in;
1799 smallfelem x_out, y_out;
1802 if (EC_POINT_is_at_infinity(group, point))
1804 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1805 EC_R_POINT_AT_INFINITY);
1808 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1809 (!BN_to_felem(z1, &point->Z))) return 0;
1811 felem_square(tmp, z2); felem_reduce(z1, tmp);
1812 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1813 felem_contract(x_out, x_in);
1816 if (!smallfelem_to_BN(x, x_out)) {
1817 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1822 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1823 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1824 felem_contract(y_out, y_in);
1827 if (!smallfelem_to_BN(y, y_out))
1829 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1837 /* points below is of size |num|, and tmp_smallfelems is of size |num+1| */
1838 static void make_points_affine(size_t num, smallfelem points[][3], smallfelem tmp_smallfelems[])
1840 /* Runs in constant time, unless an input is the point at infinity
1841 * (which normally shouldn't happen). */
1842 ec_GFp_nistp_points_make_affine_internal(
1847 (void (*)(void *)) smallfelem_one,
1848 (int (*)(const void *)) smallfelem_is_zero_int,
1849 (void (*)(void *, const void *)) smallfelem_assign,
1850 (void (*)(void *, const void *)) smallfelem_square_contract,
1851 (void (*)(void *, const void *, const void *)) smallfelem_mul_contract,
1852 (void (*)(void *, const void *)) smallfelem_inv_contract,
1853 /* nothing to contract */
1854 (void (*)(void *, const void *)) smallfelem_assign);
1857 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1858 * Result is stored in r (r can equal one of the inputs). */
1859 int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
1860 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1861 const BIGNUM *scalars[], BN_CTX *ctx)
1866 BN_CTX *new_ctx = NULL;
1867 BIGNUM *x, *y, *z, *tmp_scalar;
1868 felem_bytearray g_secret;
1869 felem_bytearray *secrets = NULL;
1870 smallfelem (*pre_comp)[17][3] = NULL;
1871 smallfelem *tmp_smallfelems = NULL;
1872 felem_bytearray tmp;
1873 unsigned i, num_bytes;
1874 int have_pre_comp = 0;
1875 size_t num_points = num;
1876 smallfelem x_in, y_in, z_in;
1877 felem x_out, y_out, z_out;
1878 NISTP256_PRE_COMP *pre = NULL;
1879 const smallfelem (*g_pre_comp)[16][3] = NULL;
1880 EC_POINT *generator = NULL;
1881 const EC_POINT *p = NULL;
1882 const BIGNUM *p_scalar = NULL;
1885 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1887 if (((x = BN_CTX_get(ctx)) == NULL) ||
1888 ((y = BN_CTX_get(ctx)) == NULL) ||
1889 ((z = BN_CTX_get(ctx)) == NULL) ||
1890 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1895 pre = EC_EX_DATA_get_data(group->extra_data,
1896 nistp256_pre_comp_dup, nistp256_pre_comp_free,
1897 nistp256_pre_comp_clear_free);
1899 /* we have precomputation, try to use it */
1900 g_pre_comp = (const smallfelem (*)[16][3]) pre->g_pre_comp;
1902 /* try to use the standard precomputation */
1903 g_pre_comp = &gmul[0];
1904 generator = EC_POINT_new(group);
1905 if (generator == NULL)
1907 /* get the generator from precomputation */
1908 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
1909 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
1910 !smallfelem_to_BN(z, g_pre_comp[0][1][2]))
1912 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1915 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1916 generator, x, y, z, ctx))
1918 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1919 /* precomputation matches generator */
1922 /* we don't have valid precomputation:
1923 * treat the generator as a random point */
1928 if (num_points >= 3)
1930 /* unless we precompute multiples for just one or two points,
1931 * converting those into affine form is time well spent */
1934 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1935 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(smallfelem));
1937 tmp_smallfelems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(smallfelem));
1938 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_smallfelems == NULL)))
1940 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1944 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1945 * i.e., they contribute nothing to the linear combination */
1946 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1947 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(smallfelem));
1948 for (i = 0; i < num_points; ++i)
1951 /* we didn't have a valid precomputation, so we pick
1954 p = EC_GROUP_get0_generator(group);
1958 /* the i^th point */
1961 p_scalar = scalars[i];
1963 if ((p_scalar != NULL) && (p != NULL))
1965 /* reduce scalar to 0 <= scalar < 2^256 */
1966 if ((BN_num_bits(p_scalar) > 256) || (BN_is_negative(p_scalar)))
1968 /* this is an unusual input, and we don't guarantee
1969 * constant-timeness */
1970 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1972 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1975 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1978 num_bytes = BN_bn2bin(p_scalar, tmp);
1979 flip_endian(secrets[i], tmp, num_bytes);
1980 /* precompute multiples */
1981 if ((!BN_to_felem(x_out, &p->X)) ||
1982 (!BN_to_felem(y_out, &p->Y)) ||
1983 (!BN_to_felem(z_out, &p->Z))) goto err;
1984 felem_shrink(pre_comp[i][1][0], x_out);
1985 felem_shrink(pre_comp[i][1][1], y_out);
1986 felem_shrink(pre_comp[i][1][2], z_out);
1987 for (j = 2; j <= 16; ++j)
1992 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1993 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1994 pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1999 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
2000 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
2006 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
2009 /* the scalar for the generator */
2010 if ((scalar != NULL) && (have_pre_comp))
2012 memset(g_secret, 0, sizeof(g_secret));
2013 /* reduce scalar to 0 <= scalar < 2^256 */
2014 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar)))
2016 /* this is an unusual input, and we don't guarantee
2017 * constant-timeness */
2018 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
2020 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2023 num_bytes = BN_bn2bin(tmp_scalar, tmp);
2026 num_bytes = BN_bn2bin(scalar, tmp);
2027 flip_endian(g_secret, tmp, num_bytes);
2028 /* do the multiplication with generator precomputation*/
2029 batch_mul(x_out, y_out, z_out,
2030 (const felem_bytearray (*)) secrets, num_points,
2032 mixed, (const smallfelem (*)[17][3]) pre_comp,
2036 /* do the multiplication without generator precomputation */
2037 batch_mul(x_out, y_out, z_out,
2038 (const felem_bytearray (*)) secrets, num_points,
2039 NULL, mixed, (const smallfelem (*)[17][3]) pre_comp, NULL);
2040 /* reduce the output to its unique minimal representation */
2041 felem_contract(x_in, x_out);
2042 felem_contract(y_in, y_out);
2043 felem_contract(z_in, z_out);
2044 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2045 (!smallfelem_to_BN(z, z_in)))
2047 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2050 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2054 if (generator != NULL)
2055 EC_POINT_free(generator);
2056 if (new_ctx != NULL)
2057 BN_CTX_free(new_ctx);
2058 if (secrets != NULL)
2059 OPENSSL_free(secrets);
2060 if (pre_comp != NULL)
2061 OPENSSL_free(pre_comp);
2062 if (tmp_smallfelems != NULL)
2063 OPENSSL_free(tmp_smallfelems);
2067 int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2070 NISTP256_PRE_COMP *pre = NULL;
2072 BN_CTX *new_ctx = NULL;
2074 EC_POINT *generator = NULL;
2075 smallfelem tmp_smallfelems[32];
2076 felem x_tmp, y_tmp, z_tmp;
2078 /* throw away old precomputation */
2079 EC_EX_DATA_free_data(&group->extra_data, nistp256_pre_comp_dup,
2080 nistp256_pre_comp_free, nistp256_pre_comp_clear_free);
2082 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
2084 if (((x = BN_CTX_get(ctx)) == NULL) ||
2085 ((y = BN_CTX_get(ctx)) == NULL))
2087 /* get the generator */
2088 if (group->generator == NULL) goto err;
2089 generator = EC_POINT_new(group);
2090 if (generator == NULL)
2092 BN_bin2bn(nistp256_curve_params[3], sizeof (felem_bytearray), x);
2093 BN_bin2bn(nistp256_curve_params[4], sizeof (felem_bytearray), y);
2094 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2096 if ((pre = nistp256_pre_comp_new()) == NULL)
2098 /* if the generator is the standard one, use built-in precomputation */
2099 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2101 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2105 if ((!BN_to_felem(x_tmp, &group->generator->X)) ||
2106 (!BN_to_felem(y_tmp, &group->generator->Y)) ||
2107 (!BN_to_felem(z_tmp, &group->generator->Z)))
2109 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2110 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2111 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2112 /* compute 2^64*G, 2^128*G, 2^192*G for the first table,
2113 * 2^32*G, 2^96*G, 2^160*G, 2^224*G for the second one
2115 for (i = 1; i <= 8; i <<= 1)
2118 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2119 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
2120 for (j = 0; j < 31; ++j)
2123 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2124 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2129 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2130 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2131 for (j = 0; j < 31; ++j)
2134 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2135 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
2138 for (i = 0; i < 2; i++)
2140 /* g_pre_comp[i][0] is the point at infinity */
2141 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2142 /* the remaining multiples */
2143 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2145 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], pre->g_pre_comp[i][6][2],
2146 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2147 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2148 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2150 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], pre->g_pre_comp[i][10][2],
2151 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2152 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2153 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2155 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2156 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2157 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2]);
2158 /* 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G */
2160 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], pre->g_pre_comp[i][14][2],
2161 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2162 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2163 for (j = 1; j < 8; ++j)
2165 /* odd multiples: add G resp. 2^32*G */
2167 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1], pre->g_pre_comp[i][2*j+1][2],
2168 pre->g_pre_comp[i][2*j][0], pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
2169 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], pre->g_pre_comp[i][1][2]);
2172 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2174 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup,
2175 nistp256_pre_comp_free, nistp256_pre_comp_clear_free))
2181 if (generator != NULL)
2182 EC_POINT_free(generator);
2183 if (new_ctx != NULL)
2184 BN_CTX_free(new_ctx);
2186 nistp256_pre_comp_free(pre);
2190 int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2192 if (EC_EX_DATA_get_data(group->extra_data, nistp256_pre_comp_dup,
2193 nistp256_pre_comp_free, nistp256_pre_comp_clear_free)
2200 static void *dummy=&dummy;