1 /* crypto/ec/ecp_nistp256.c */
3 * Written by Adam Langley (Google) for the OpenSSL project
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
29 #include <openssl/opensslconf.h>
30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
34 #include <openssl/err.h>
37 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
38 /* even with gcc, the typedef won't work for 32-bit platforms */
39 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
40 typedef __int128_t int128_t;
42 #error "Need GCC 3.1 or later to define type uint128_t"
50 /* The underlying field.
52 * P256 operates over GF(2^256-2^224+2^192+2^96-1). We can serialise an element
53 * of this field into 32 bytes. We call this an felem_bytearray. */
55 typedef u8 felem_bytearray[32];
57 /* These are the parameters of P256, taken from FIPS 186-3, page 86. These
58 * values are big-endian. */
59 static const felem_bytearray nistp256_curve_params[5] = {
60 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
61 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
62 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
63 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
64 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
65 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
66 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
68 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
69 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
70 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
71 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
72 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
73 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
74 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
75 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
76 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
77 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
78 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
79 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
82 /* The representation of field elements.
83 * ------------------------------------
85 * We represent field elements with either four 128-bit values, eight 128-bit
86 * values, or four 64-bit values. The field element represented is:
87 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
89 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
91 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
92 * apart, but are 128-bits wide, the most significant bits of each limb overlap
93 * with the least significant bits of the next.
95 * A field element with four limbs is an 'felem'. One with eight limbs is a
98 * A field element with four, 64-bit values is called a 'smallfelem'. Small
99 * values are used as intermediate values before multiplication.
104 typedef uint128_t limb;
105 typedef limb felem[NLIMBS];
106 typedef limb longfelem[NLIMBS * 2];
107 typedef u64 smallfelem[NLIMBS];
109 /* This is the value of the prime as four 64-bit words, little-endian. */
110 static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
111 static const limb bottom32bits = 0xffffffff;
112 static const u64 bottom63bits = 0x7ffffffffffffffful;
114 /* bin32_to_felem takes a little-endian byte array and converts it into felem
115 * form. This assumes that the CPU is little-endian. */
116 static void bin32_to_felem(felem out, const u8 in[32])
118 out[0] = *((u64*) &in[0]);
119 out[1] = *((u64*) &in[8]);
120 out[2] = *((u64*) &in[16]);
121 out[3] = *((u64*) &in[24]);
124 /* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian,
125 * 32 byte array. This assumes that the CPU is little-endian. */
126 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
128 *((u64*) &out[0]) = in[0];
129 *((u64*) &out[8]) = in[1];
130 *((u64*) &out[16]) = in[2];
131 *((u64*) &out[24]) = in[3];
134 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
135 static void flip_endian(u8 *out, const u8 *in, unsigned len)
138 for (i = 0; i < len; ++i)
139 out[i] = in[len-1-i];
142 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
143 static int BN_to_felem(felem out, const BIGNUM *bn)
145 felem_bytearray b_in;
146 felem_bytearray b_out;
149 /* BN_bn2bin eats leading zeroes */
150 memset(b_out, 0, sizeof b_out);
151 num_bytes = BN_num_bytes(bn);
152 if (num_bytes > sizeof b_out)
154 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
157 if (BN_is_negative(bn))
159 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
162 num_bytes = BN_bn2bin(bn, b_in);
163 flip_endian(b_out, b_in, num_bytes);
164 bin32_to_felem(out, b_out);
168 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
169 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
171 felem_bytearray b_in, b_out;
172 smallfelem_to_bin32(b_in, in);
173 flip_endian(b_out, b_in, sizeof b_out);
174 return BN_bin2bn(b_out, sizeof b_out, out);
179 * ---------------- */
181 static void smallfelem_one(smallfelem out)
189 static void smallfelem_assign(smallfelem out, const smallfelem in)
197 static void felem_assign(felem out, const felem in)
205 /* felem_sum sets out = out + in. */
206 static void felem_sum(felem out, const felem in)
214 /* felem_small_sum sets out = out + in. */
215 static void felem_small_sum(felem out, const smallfelem in)
223 /* felem_scalar sets out = out * scalar */
224 static void felem_scalar(felem out, const u64 scalar)
232 /* longfelem_scalar sets out = out * scalar */
233 static void longfelem_scalar(longfelem out, const u64 scalar)
245 #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
246 #define two105 (((limb)1) << 105)
247 #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
249 /* zero105 is 0 mod p */
250 static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
252 /* smallfelem_neg sets |out| to |-small|
254 * out[i] < out[i] + 2^105
256 static void smallfelem_neg(felem out, const smallfelem small)
258 /* In order to prevent underflow, we subtract from 0 mod p. */
259 out[0] = zero105[0] - small[0];
260 out[1] = zero105[1] - small[1];
261 out[2] = zero105[2] - small[2];
262 out[3] = zero105[3] - small[3];
265 /* felem_diff subtracts |in| from |out|
269 * out[i] < out[i] + 2^105
271 static void felem_diff(felem out, const felem in)
273 /* In order to prevent underflow, we add 0 mod p before subtracting. */
274 out[0] += zero105[0];
275 out[1] += zero105[1];
276 out[2] += zero105[2];
277 out[3] += zero105[3];
285 #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
286 #define two107 (((limb)1) << 107)
287 #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
289 /* zero107 is 0 mod p */
290 static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 };
292 /* An alternative felem_diff for larger inputs |in|
293 * felem_diff_zero107 subtracts |in| from |out|
297 * out[i] < out[i] + 2^107
299 static void felem_diff_zero107(felem out, const felem in)
301 /* In order to prevent underflow, we add 0 mod p before subtracting. */
302 out[0] += zero107[0];
303 out[1] += zero107[1];
304 out[2] += zero107[2];
305 out[3] += zero107[3];
313 /* longfelem_diff subtracts |in| from |out|
317 * out[i] < out[i] + 2^70 + 2^40
319 static void longfelem_diff(longfelem out, const longfelem in)
321 static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6);
322 static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40);
323 static const limb two70 = (((limb)1) << 70);
324 static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6);
325 static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6);
327 /* add 0 mod p to avoid underflow */
331 out[3] += two70m40m38p6;
337 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
348 #define two64m0 (((limb)1) << 64) - 1
349 #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
350 #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
351 #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
353 /* zero110 is 0 mod p */
354 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
356 /* felem_shrink converts an felem into a smallfelem. The result isn't quite
357 * minimal as the value may be greater than p.
364 static void felem_shrink(smallfelem out, const felem in)
369 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
372 tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64));
375 tmp[2] = zero110[2] + (u64) in[2];
376 tmp[0] = zero110[0] + in[0];
377 tmp[1] = zero110[1] + in[1];
378 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
380 /* We perform two partial reductions where we eliminate the
381 * high-word of tmp[3]. We don't update the other words till the end.
383 a = tmp[3] >> 64; /* a < 2^46 */
384 tmp[3] = (u64) tmp[3];
386 tmp[3] += ((limb)a) << 32;
390 a = tmp[3] >> 64; /* a < 2^15 */
391 b += a; /* b < 2^46 + 2^15 < 2^47 */
392 tmp[3] = (u64) tmp[3];
394 tmp[3] += ((limb)a) << 32;
395 /* tmp[3] < 2^64 + 2^47 */
397 /* This adjusts the other two words to complete the two partial
400 tmp[1] -= (((limb)b) << 32);
402 /* In order to make space in tmp[3] for the carry from 2 -> 3, we
403 * conditionally subtract kPrime if tmp[3] is large enough. */
405 /* As tmp[3] < 2^65, high is either 1 or 0 */
409 * all ones if the high word of tmp[3] is 1
410 * all zeros if the high word of tmp[3] if 0 */
414 * all ones if the MSB of low is 1
415 * all zeros if the MSB of low if 0 */
418 /* if low was greater than kPrime3Test then the MSB is zero */
422 * all ones if low was > kPrime3Test
423 * all zeros if low was <= kPrime3Test */
424 mask = (mask & low) | high;
425 tmp[0] -= mask & kPrime[0];
426 tmp[1] -= mask & kPrime[1];
427 /* kPrime[2] is zero, so omitted */
428 tmp[3] -= mask & kPrime[3];
429 /* tmp[3] < 2**64 - 2**32 + 1 */
431 tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0];
432 tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1];
433 tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2];
442 /* smallfelem_expand converts a smallfelem to an felem */
443 static void smallfelem_expand(felem out, const smallfelem in)
451 /* smallfelem_square sets |out| = |small|^2
455 * out[i] < 7 * 2^64 < 2^67
457 static void smallfelem_square(longfelem out, const smallfelem small)
462 a = ((uint128_t) small[0]) * small[0];
468 a = ((uint128_t) small[0]) * small[1];
475 a = ((uint128_t) small[0]) * small[2];
482 a = ((uint128_t) small[0]) * small[3];
488 a = ((uint128_t) small[1]) * small[2];
495 a = ((uint128_t) small[1]) * small[1];
501 a = ((uint128_t) small[1]) * small[3];
508 a = ((uint128_t) small[2]) * small[3];
516 a = ((uint128_t) small[2]) * small[2];
522 a = ((uint128_t) small[3]) * small[3];
529 /* felem_square sets |out| = |in|^2
533 * out[i] < 7 * 2^64 < 2^67
535 static void felem_square(longfelem out, const felem in)
538 felem_shrink(small, in);
539 smallfelem_square(out, small);
542 /* smallfelem_mul sets |out| = |small1| * |small2|
547 * out[i] < 7 * 2^64 < 2^67
549 static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2)
554 a = ((uint128_t) small1[0]) * small2[0];
561 a = ((uint128_t) small1[0]) * small2[1];
567 a = ((uint128_t) small1[1]) * small2[0];
574 a = ((uint128_t) small1[0]) * small2[2];
580 a = ((uint128_t) small1[1]) * small2[1];
586 a = ((uint128_t) small1[2]) * small2[0];
593 a = ((uint128_t) small1[0]) * small2[3];
599 a = ((uint128_t) small1[1]) * small2[2];
605 a = ((uint128_t) small1[2]) * small2[1];
611 a = ((uint128_t) small1[3]) * small2[0];
618 a = ((uint128_t) small1[1]) * small2[3];
624 a = ((uint128_t) small1[2]) * small2[2];
630 a = ((uint128_t) small1[3]) * small2[1];
637 a = ((uint128_t) small1[2]) * small2[3];
643 a = ((uint128_t) small1[3]) * small2[2];
650 a = ((uint128_t) small1[3]) * small2[3];
657 /* felem_mul sets |out| = |in1| * |in2|
662 * out[i] < 7 * 2^64 < 2^67
664 static void felem_mul(longfelem out, const felem in1, const felem in2)
666 smallfelem small1, small2;
667 felem_shrink(small1, in1);
668 felem_shrink(small2, in2);
669 smallfelem_mul(out, small1, small2);
672 /* felem_small_mul sets |out| = |small1| * |in2|
677 * out[i] < 7 * 2^64 < 2^67
679 static void felem_small_mul(longfelem out, const smallfelem small1, const felem in2)
682 felem_shrink(small2, in2);
683 smallfelem_mul(out, small1, small2);
686 #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
687 #define two100 (((limb)1) << 100)
688 #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
689 /* zero100 is 0 mod p */
690 static const felem zero100 = { two100m36m4, two100, two100m36p4, two100m36p4 };
692 /* Internal function for the different flavours of felem_reduce.
693 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
695 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
696 * out[1] >= in[7] + 2^32*in[4]
697 * out[2] >= in[5] + 2^32*in[5]
698 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
700 * out[0] <= out[0] + in[4] + 2^32*in[5]
701 * out[1] <= out[1] + in[5] + 2^33*in[6]
702 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
703 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
705 static void felem_reduce_(felem out, const longfelem in)
708 /* combine common terms from below */
709 c = in[4] + (in[5] << 32);
717 /* the remaining terms */
718 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
719 out[1] -= (in[4] << 32);
720 out[3] += (in[4] << 32);
722 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
723 out[2] -= (in[5] << 32);
725 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
727 out[0] -= (in[6] << 32);
728 out[1] += (in[6] << 33);
729 out[2] += (in[6] * 2);
730 out[3] -= (in[6] << 32);
732 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
734 out[0] -= (in[7] << 32);
735 out[2] += (in[7] << 33);
736 out[3] += (in[7] * 3);
739 /* felem_reduce converts a longfelem into an felem.
740 * To be called directly after felem_square or felem_mul.
742 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
743 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
747 static void felem_reduce(felem out, const longfelem in)
749 out[0] = zero100[0] + in[0];
750 out[1] = zero100[1] + in[1];
751 out[2] = zero100[2] + in[2];
752 out[3] = zero100[3] + in[3];
754 felem_reduce_(out, in);
756 /* out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
757 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
758 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
759 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
761 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
762 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
763 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
764 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
768 /* felem_reduce_zero105 converts a larger longfelem into an felem.
774 static void felem_reduce_zero105(felem out, const longfelem in)
776 out[0] = zero105[0] + in[0];
777 out[1] = zero105[1] + in[1];
778 out[2] = zero105[2] + in[2];
779 out[3] = zero105[3] + in[3];
781 felem_reduce_(out, in);
783 /* out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
784 * out[1] > 2^105 - 2^71 - 2^103 > 0
785 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
786 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
788 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
789 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
790 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
791 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
795 /* subtract_u64 sets *result = *result - v and *carry to one if the subtraction
797 static void subtract_u64(u64* result, u64* carry, u64 v)
799 uint128_t r = *result;
801 *carry = (r >> 64) & 1;
805 /* felem_contract converts |in| to its unique, minimal representation.
809 static void felem_contract(smallfelem out, const felem in)
812 u64 all_equal_so_far = 0, result = 0, carry;
814 felem_shrink(out, in);
815 /* small is minimal except that the value might be > p */
818 /* We are doing a constant time test if out >= kPrime. We need to
819 * compare each u64, from most-significant to least significant. For
820 * each one, if all words so far have been equal (m is all ones) then a
821 * non-equal result is the answer. Otherwise we continue. */
822 for (i = 3; i < 4; i--)
825 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
826 /* if out[i] > kPrime[i] then a will underflow and the high
827 * 64-bits will all be set. */
828 result |= all_equal_so_far & ((u64) (a >> 64));
830 /* if kPrime[i] == out[i] then |equal| will be all zeros and
831 * the decrement will make it all ones. */
832 equal = kPrime[i] ^ out[i];
834 equal &= equal << 32;
835 equal &= equal << 16;
840 equal = ((s64) equal) >> 63;
842 all_equal_so_far &= equal;
845 /* if all_equal_so_far is still all ones then the two values are equal
846 * and so out >= kPrime is true. */
847 result |= all_equal_so_far;
849 /* if out >= kPrime then we subtract kPrime. */
850 subtract_u64(&out[0], &carry, result & kPrime[0]);
851 subtract_u64(&out[1], &carry, carry);
852 subtract_u64(&out[2], &carry, carry);
853 subtract_u64(&out[3], &carry, carry);
855 subtract_u64(&out[1], &carry, result & kPrime[1]);
856 subtract_u64(&out[2], &carry, carry);
857 subtract_u64(&out[3], &carry, carry);
859 subtract_u64(&out[2], &carry, result & kPrime[2]);
860 subtract_u64(&out[3], &carry, carry);
862 subtract_u64(&out[3], &carry, result & kPrime[3]);
865 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
870 smallfelem_square(longtmp, in);
871 felem_reduce(tmp, longtmp);
872 felem_contract(out, tmp);
875 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1, const smallfelem in2)
880 smallfelem_mul(longtmp, in1, in2);
881 felem_reduce(tmp, longtmp);
882 felem_contract(out, tmp);
885 /* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
890 static limb smallfelem_is_zero(const smallfelem small)
895 u64 is_zero = small[0] | small[1] | small[2] | small[3];
897 is_zero &= is_zero << 32;
898 is_zero &= is_zero << 16;
899 is_zero &= is_zero << 8;
900 is_zero &= is_zero << 4;
901 is_zero &= is_zero << 2;
902 is_zero &= is_zero << 1;
903 is_zero = ((s64) is_zero) >> 63;
905 is_p = (small[0] ^ kPrime[0]) |
906 (small[1] ^ kPrime[1]) |
907 (small[2] ^ kPrime[2]) |
908 (small[3] ^ kPrime[3]);
916 is_p = ((s64) is_p) >> 63;
921 result |= ((limb) is_zero) << 64;
925 static int smallfelem_is_zero_int(const smallfelem small)
927 return (int) (smallfelem_is_zero(small) & ((limb)1));
930 /* felem_inv calculates |out| = |in|^{-1}
932 * Based on Fermat's Little Theorem:
934 * a^{p-1} = 1 (mod p)
935 * a^{p-2} = a^{-1} (mod p)
937 static void felem_inv(felem out, const felem in)
940 /* each e_I will hold |in|^{2^I - 1} */
941 felem e2, e4, e8, e16, e32, e64;
945 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
946 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
947 felem_assign(e2, ftmp);
948 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
949 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
950 felem_mul(tmp, ftmp, e2); felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
951 felem_assign(e4, ftmp);
952 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
953 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
954 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
955 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
956 felem_mul(tmp, ftmp, e4); felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
957 felem_assign(e8, ftmp);
958 for (i = 0; i < 8; i++) {
959 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
961 felem_mul(tmp, ftmp, e8); felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
962 felem_assign(e16, ftmp);
963 for (i = 0; i < 16; i++) {
964 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
966 felem_mul(tmp, ftmp, e16); felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
967 felem_assign(e32, ftmp);
968 for (i = 0; i < 32; i++) {
969 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
971 felem_assign(e64, ftmp);
972 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
973 for (i = 0; i < 192; i++) {
974 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
975 } /* 2^256 - 2^224 + 2^192 */
977 felem_mul(tmp, e64, e32); felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
978 for (i = 0; i < 16; i++) {
979 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
981 felem_mul(tmp, ftmp2, e16); felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
982 for (i = 0; i < 8; i++) {
983 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
985 felem_mul(tmp, ftmp2, e8); felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
986 for (i = 0; i < 4; i++) {
987 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
989 felem_mul(tmp, ftmp2, e4); felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
990 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
991 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
992 felem_mul(tmp, ftmp2, e2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
993 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
994 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
995 felem_mul(tmp, ftmp2, in); felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
997 felem_mul(tmp, ftmp2, ftmp); felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1000 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1004 smallfelem_expand(tmp, in);
1005 felem_inv(tmp, tmp);
1006 felem_contract(out, tmp);
1012 * Building on top of the field operations we have the operations on the
1013 * elliptic curve group itself. Points on the curve are represented in Jacobian
1016 /* point_double calculates 2*(x_in, y_in, z_in)
1018 * The method is taken from:
1019 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1021 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1022 * while x_out == y_in is not (maybe this works, but it's not tested). */
1024 point_double(felem x_out, felem y_out, felem z_out,
1025 const felem x_in, const felem y_in, const felem z_in)
1027 longfelem tmp, tmp2;
1028 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1029 smallfelem small1, small2;
1031 felem_assign(ftmp, x_in);
1032 /* ftmp[i] < 2^106 */
1033 felem_assign(ftmp2, x_in);
1034 /* ftmp2[i] < 2^106 */
1037 felem_square(tmp, z_in);
1038 felem_reduce(delta, tmp);
1039 /* delta[i] < 2^101 */
1042 felem_square(tmp, y_in);
1043 felem_reduce(gamma, tmp);
1044 /* gamma[i] < 2^101 */
1045 felem_shrink(small1, gamma);
1047 /* beta = x*gamma */
1048 felem_small_mul(tmp, small1, x_in);
1049 felem_reduce(beta, tmp);
1050 /* beta[i] < 2^101 */
1052 /* alpha = 3*(x-delta)*(x+delta) */
1053 felem_diff(ftmp, delta);
1054 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1055 felem_sum(ftmp2, delta);
1056 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1057 felem_scalar(ftmp2, 3);
1058 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1059 felem_mul(tmp, ftmp, ftmp2);
1060 felem_reduce(alpha, tmp);
1061 /* alpha[i] < 2^101 */
1062 felem_shrink(small2, alpha);
1064 /* x' = alpha^2 - 8*beta */
1065 smallfelem_square(tmp, small2);
1066 felem_reduce(x_out, tmp);
1067 felem_assign(ftmp, beta);
1068 felem_scalar(ftmp, 8);
1069 /* ftmp[i] < 8 * 2^101 = 2^104 */
1070 felem_diff(x_out, ftmp);
1071 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1073 /* z' = (y + z)^2 - gamma - delta */
1074 felem_sum(delta, gamma);
1075 /* delta[i] < 2^101 + 2^101 = 2^102 */
1076 felem_assign(ftmp, y_in);
1077 felem_sum(ftmp, z_in);
1078 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1079 felem_square(tmp, ftmp);
1080 felem_reduce(z_out, tmp);
1081 felem_diff(z_out, delta);
1082 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1084 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1085 felem_scalar(beta, 4);
1086 /* beta[i] < 4 * 2^101 = 2^103 */
1087 felem_diff_zero107(beta, x_out);
1088 /* beta[i] < 2^107 + 2^103 < 2^108 */
1089 felem_small_mul(tmp, small2, beta);
1090 /* tmp[i] < 7 * 2^64 < 2^67 */
1091 smallfelem_square(tmp2, small1);
1092 /* tmp2[i] < 7 * 2^64 */
1093 longfelem_scalar(tmp2, 8);
1094 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1095 longfelem_diff(tmp, tmp2);
1096 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1097 felem_reduce_zero105(y_out, tmp);
1098 /* y_out[i] < 2^106 */
1101 /* point_double_small is the same as point_double, except that it operates on
1104 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1105 const smallfelem x_in, const smallfelem y_in, const smallfelem z_in)
1107 felem felem_x_out, felem_y_out, felem_z_out;
1108 felem felem_x_in, felem_y_in, felem_z_in;
1110 smallfelem_expand(felem_x_in, x_in);
1111 smallfelem_expand(felem_y_in, y_in);
1112 smallfelem_expand(felem_z_in, z_in);
1113 point_double(felem_x_out, felem_y_out, felem_z_out,
1114 felem_x_in, felem_y_in, felem_z_in);
1115 felem_shrink(x_out, felem_x_out);
1116 felem_shrink(y_out, felem_y_out);
1117 felem_shrink(z_out, felem_z_out);
1120 /* copy_conditional copies in to out iff mask is all ones. */
1122 copy_conditional(felem out, const felem in, limb mask)
1125 for (i = 0; i < NLIMBS; ++i)
1127 const limb tmp = mask & (in[i] ^ out[i]);
1132 /* copy_small_conditional copies in to out iff mask is all ones. */
1134 copy_small_conditional(felem out, const smallfelem in, limb mask)
1137 const u64 mask64 = mask;
1138 for (i = 0; i < NLIMBS; ++i)
1140 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1144 /* point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1146 * The method is taken from:
1147 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1148 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1150 * This function includes a branch for checking whether the two input points
1151 * are equal, (while not equal to the point at infinity). This case never
1152 * happens during single point multiplication, so there is no timing leak for
1153 * ECDH or ECDSA signing. */
1154 static void point_add(felem x3, felem y3, felem z3,
1155 const felem x1, const felem y1, const felem z1,
1156 const int mixed, const smallfelem x2, const smallfelem y2, const smallfelem z2)
1158 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1159 longfelem tmp, tmp2;
1160 smallfelem small1, small2, small3, small4, small5;
1161 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1163 felem_shrink(small3, z1);
1165 z1_is_zero = smallfelem_is_zero(small3);
1166 z2_is_zero = smallfelem_is_zero(z2);
1168 /* ftmp = z1z1 = z1**2 */
1169 smallfelem_square(tmp, small3);
1170 felem_reduce(ftmp, tmp);
1171 /* ftmp[i] < 2^101 */
1172 felem_shrink(small1, ftmp);
1176 /* ftmp2 = z2z2 = z2**2 */
1177 smallfelem_square(tmp, z2);
1178 felem_reduce(ftmp2, tmp);
1179 /* ftmp2[i] < 2^101 */
1180 felem_shrink(small2, ftmp2);
1182 felem_shrink(small5, x1);
1184 /* u1 = ftmp3 = x1*z2z2 */
1185 smallfelem_mul(tmp, small5, small2);
1186 felem_reduce(ftmp3, tmp);
1187 /* ftmp3[i] < 2^101 */
1189 /* ftmp5 = z1 + z2 */
1190 felem_assign(ftmp5, z1);
1191 felem_small_sum(ftmp5, z2);
1192 /* ftmp5[i] < 2^107 */
1194 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1195 felem_square(tmp, ftmp5);
1196 felem_reduce(ftmp5, tmp);
1197 /* ftmp2 = z2z2 + z1z1 */
1198 felem_sum(ftmp2, ftmp);
1199 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1200 felem_diff(ftmp5, ftmp2);
1201 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1203 /* ftmp2 = z2 * z2z2 */
1204 smallfelem_mul(tmp, small2, z2);
1205 felem_reduce(ftmp2, tmp);
1207 /* s1 = ftmp2 = y1 * z2**3 */
1208 felem_mul(tmp, y1, ftmp2);
1209 felem_reduce(ftmp6, tmp);
1210 /* ftmp6[i] < 2^101 */
1214 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1216 /* u1 = ftmp3 = x1*z2z2 */
1217 felem_assign(ftmp3, x1);
1218 /* ftmp3[i] < 2^106 */
1221 felem_assign(ftmp5, z1);
1222 felem_scalar(ftmp5, 2);
1223 /* ftmp5[i] < 2*2^106 = 2^107 */
1225 /* s1 = ftmp2 = y1 * z2**3 */
1226 felem_assign(ftmp6, y1);
1227 /* ftmp6[i] < 2^106 */
1231 smallfelem_mul(tmp, x2, small1);
1232 felem_reduce(ftmp4, tmp);
1234 /* h = ftmp4 = u2 - u1 */
1235 felem_diff_zero107(ftmp4, ftmp3);
1236 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1237 felem_shrink(small4, ftmp4);
1239 x_equal = smallfelem_is_zero(small4);
1241 /* z_out = ftmp5 * h */
1242 felem_small_mul(tmp, small4, ftmp5);
1243 felem_reduce(z_out, tmp);
1244 /* z_out[i] < 2^101 */
1246 /* ftmp = z1 * z1z1 */
1247 smallfelem_mul(tmp, small1, small3);
1248 felem_reduce(ftmp, tmp);
1250 /* s2 = tmp = y2 * z1**3 */
1251 felem_small_mul(tmp, y2, ftmp);
1252 felem_reduce(ftmp5, tmp);
1254 /* r = ftmp5 = (s2 - s1)*2 */
1255 felem_diff_zero107(ftmp5, ftmp6);
1256 /* ftmp5[i] < 2^107 + 2^107 = 2^108*/
1257 felem_scalar(ftmp5, 2);
1258 /* ftmp5[i] < 2^109 */
1259 felem_shrink(small1, ftmp5);
1260 y_equal = smallfelem_is_zero(small1);
1262 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1264 point_double(x3, y3, z3, x1, y1, z1);
1268 /* I = ftmp = (2h)**2 */
1269 felem_assign(ftmp, ftmp4);
1270 felem_scalar(ftmp, 2);
1271 /* ftmp[i] < 2*2^108 = 2^109 */
1272 felem_square(tmp, ftmp);
1273 felem_reduce(ftmp, tmp);
1275 /* J = ftmp2 = h * I */
1276 felem_mul(tmp, ftmp4, ftmp);
1277 felem_reduce(ftmp2, tmp);
1279 /* V = ftmp4 = U1 * I */
1280 felem_mul(tmp, ftmp3, ftmp);
1281 felem_reduce(ftmp4, tmp);
1283 /* x_out = r**2 - J - 2V */
1284 smallfelem_square(tmp, small1);
1285 felem_reduce(x_out, tmp);
1286 felem_assign(ftmp3, ftmp4);
1287 felem_scalar(ftmp4, 2);
1288 felem_sum(ftmp4, ftmp2);
1289 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1290 felem_diff(x_out, ftmp4);
1291 /* x_out[i] < 2^105 + 2^101 */
1293 /* y_out = r(V-x_out) - 2 * s1 * J */
1294 felem_diff_zero107(ftmp3, x_out);
1295 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1296 felem_small_mul(tmp, small1, ftmp3);
1297 felem_mul(tmp2, ftmp6, ftmp2);
1298 longfelem_scalar(tmp2, 2);
1299 /* tmp2[i] < 2*2^67 = 2^68 */
1300 longfelem_diff(tmp, tmp2);
1301 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1302 felem_reduce_zero105(y_out, tmp);
1303 /* y_out[i] < 2^106 */
1305 copy_small_conditional(x_out, x2, z1_is_zero);
1306 copy_conditional(x_out, x1, z2_is_zero);
1307 copy_small_conditional(y_out, y2, z1_is_zero);
1308 copy_conditional(y_out, y1, z2_is_zero);
1309 copy_small_conditional(z_out, z2, z1_is_zero);
1310 copy_conditional(z_out, z1, z2_is_zero);
1311 felem_assign(x3, x_out);
1312 felem_assign(y3, y_out);
1313 felem_assign(z3, z_out);
1316 /* point_add_small is the same as point_add, except that it operates on
1318 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1319 smallfelem x1, smallfelem y1, smallfelem z1,
1320 smallfelem x2, smallfelem y2, smallfelem z2)
1322 felem felem_x3, felem_y3, felem_z3;
1323 felem felem_x1, felem_y1, felem_z1;
1324 smallfelem_expand(felem_x1, x1);
1325 smallfelem_expand(felem_y1, y1);
1326 smallfelem_expand(felem_z1, z1);
1327 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0, x2, y2, z2);
1328 felem_shrink(x3, felem_x3);
1329 felem_shrink(y3, felem_y3);
1330 felem_shrink(z3, felem_z3);
1333 /* Base point pre computation
1334 * --------------------------
1336 * Two different sorts of precomputed tables are used in the following code.
1337 * Each contain various points on the curve, where each point is three field
1338 * elements (x, y, z).
1340 * For the base point table, z is usually 1 (0 for the point at infinity).
1341 * This table has 2 * 16 elements, starting with the following:
1342 * index | bits | point
1343 * ------+---------+------------------------------
1346 * 2 | 0 0 1 0 | 2^64G
1347 * 3 | 0 0 1 1 | (2^64 + 1)G
1348 * 4 | 0 1 0 0 | 2^128G
1349 * 5 | 0 1 0 1 | (2^128 + 1)G
1350 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1351 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1352 * 8 | 1 0 0 0 | 2^192G
1353 * 9 | 1 0 0 1 | (2^192 + 1)G
1354 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1355 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1356 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1357 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1358 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1359 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1360 * followed by a copy of this with each element multiplied by 2^32.
1362 * The reason for this is so that we can clock bits into four different
1363 * locations when doing simple scalar multiplies against the base point,
1364 * and then another four locations using the second 16 elements.
1366 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1368 /* gmul is the table of precomputed base points */
1369 static const smallfelem gmul[2][16][3] =
1373 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247},
1374 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b},
1376 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5},
1377 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d},
1379 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f},
1380 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644},
1382 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67},
1383 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee},
1385 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff},
1386 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b},
1388 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8},
1389 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851},
1391 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea},
1392 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b},
1394 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276},
1395 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816},
1397 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad},
1398 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663},
1400 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d},
1401 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321},
1403 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287},
1404 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6},
1406 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466},
1407 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20},
1409 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9},
1410 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61},
1412 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a},
1413 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc},
1415 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c},
1416 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab},
1421 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89},
1422 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624},
1424 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6},
1425 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1},
1427 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a},
1428 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593},
1430 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617},
1431 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7},
1433 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276},
1434 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a},
1436 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908},
1437 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e},
1439 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7},
1440 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec},
1442 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee},
1443 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6},
1445 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109},
1446 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5},
1448 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba},
1449 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44},
1451 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b},
1452 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc},
1454 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107},
1455 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387},
1457 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503},
1458 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be},
1460 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9},
1461 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a},
1463 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6},
1464 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81},
1467 /* select_point selects the |idx|th point from a precomputation table and
1468 * copies it to out. */
1469 static void select_point(const u64 idx, unsigned int size, const smallfelem pre_comp[16][3], smallfelem out[3])
1472 u64 *outlimbs = &out[0][0];
1473 memset(outlimbs, 0, 3 * sizeof(smallfelem));
1475 for (i = 0; i < size; i++)
1477 const u64 *inlimbs = (u64*) &pre_comp[i][0][0];
1484 for (j = 0; j < NLIMBS * 3; j++)
1485 outlimbs[j] |= inlimbs[j] & mask;
1489 /* get_bit returns the |i|th bit in |in| */
1490 static char get_bit(const felem_bytearray in, int i)
1492 if ((i < 0) || (i >= 256))
1494 return (in[i >> 3] >> (i & 7)) & 1;
1497 /* Interleaved point multiplication using precomputed point multiples:
1498 * The small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[],
1499 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1500 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1501 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1502 static void batch_mul(felem x_out, felem y_out, felem z_out,
1503 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1504 const int mixed, const smallfelem pre_comp[][17][3], const smallfelem g_pre_comp[2][16][3])
1507 unsigned num, gen_mul = (g_scalar != NULL);
1513 /* set nq to the point at infinity */
1514 memset(nq, 0, 3 * sizeof(felem));
1516 /* Loop over all scalars msb-to-lsb, interleaving additions
1517 * of multiples of the generator (two in each of the last 32 rounds)
1518 * and additions of other points multiples (every 5th round).
1520 skip = 1; /* save two point operations in the first round */
1521 for (i = (num_points ? 255 : 31); i >= 0; --i)
1525 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1527 /* add multiples of the generator */
1528 if (gen_mul && (i <= 31))
1530 /* first, look 32 bits upwards */
1531 bits = get_bit(g_scalar, i + 224) << 3;
1532 bits |= get_bit(g_scalar, i + 160) << 2;
1533 bits |= get_bit(g_scalar, i + 96) << 1;
1534 bits |= get_bit(g_scalar, i + 32);
1535 /* select the point to add, in constant time */
1536 select_point(bits, 16, g_pre_comp[1], tmp);
1540 point_add(nq[0], nq[1], nq[2],
1541 nq[0], nq[1], nq[2],
1542 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1546 smallfelem_expand(nq[0], tmp[0]);
1547 smallfelem_expand(nq[1], tmp[1]);
1548 smallfelem_expand(nq[2], tmp[2]);
1552 /* second, look at the current position */
1553 bits = get_bit(g_scalar, i + 192) << 3;
1554 bits |= get_bit(g_scalar, i + 128) << 2;
1555 bits |= get_bit(g_scalar, i + 64) << 1;
1556 bits |= get_bit(g_scalar, i);
1557 /* select the point to add, in constant time */
1558 select_point(bits, 16, g_pre_comp[0], tmp);
1559 point_add(nq[0], nq[1], nq[2],
1560 nq[0], nq[1], nq[2],
1561 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1564 /* do other additions every 5 doublings */
1565 if (num_points && (i % 5 == 0))
1567 /* loop over all scalars */
1568 for (num = 0; num < num_points; ++num)
1570 bits = get_bit(scalars[num], i + 4) << 5;
1571 bits |= get_bit(scalars[num], i + 3) << 4;
1572 bits |= get_bit(scalars[num], i + 2) << 3;
1573 bits |= get_bit(scalars[num], i + 1) << 2;
1574 bits |= get_bit(scalars[num], i) << 1;
1575 bits |= get_bit(scalars[num], i - 1);
1576 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1578 /* select the point to add or subtract, in constant time */
1579 select_point(digit, 17, pre_comp[num], tmp);
1580 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative point */
1581 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1582 felem_contract(tmp[1], ftmp);
1586 point_add(nq[0], nq[1], nq[2],
1587 nq[0], nq[1], nq[2],
1588 mixed, tmp[0], tmp[1], tmp[2]);
1592 smallfelem_expand(nq[0], tmp[0]);
1593 smallfelem_expand(nq[1], tmp[1]);
1594 smallfelem_expand(nq[2], tmp[2]);
1600 felem_assign(x_out, nq[0]);
1601 felem_assign(y_out, nq[1]);
1602 felem_assign(z_out, nq[2]);
1605 /* Precomputation for the group generator. */
1607 smallfelem g_pre_comp[2][16][3];
1609 } NISTP256_PRE_COMP;
1611 const EC_METHOD *EC_GFp_nistp256_method(void)
1613 static const EC_METHOD ret = {
1614 EC_FLAGS_DEFAULT_OCT,
1615 NID_X9_62_prime_field,
1616 ec_GFp_nistp256_group_init,
1617 ec_GFp_simple_group_finish,
1618 ec_GFp_simple_group_clear_finish,
1619 ec_GFp_nist_group_copy,
1620 ec_GFp_nistp256_group_set_curve,
1621 ec_GFp_simple_group_get_curve,
1622 ec_GFp_simple_group_get_degree,
1623 ec_GFp_simple_group_check_discriminant,
1624 ec_GFp_simple_point_init,
1625 ec_GFp_simple_point_finish,
1626 ec_GFp_simple_point_clear_finish,
1627 ec_GFp_simple_point_copy,
1628 ec_GFp_simple_point_set_to_infinity,
1629 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1630 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1631 ec_GFp_simple_point_set_affine_coordinates,
1632 ec_GFp_nistp256_point_get_affine_coordinates,
1633 0 /* point_set_compressed_coordinates */,
1638 ec_GFp_simple_invert,
1639 ec_GFp_simple_is_at_infinity,
1640 ec_GFp_simple_is_on_curve,
1642 ec_GFp_simple_make_affine,
1643 ec_GFp_simple_points_make_affine,
1644 ec_GFp_nistp256_points_mul,
1645 ec_GFp_nistp256_precompute_mult,
1646 ec_GFp_nistp256_have_precompute_mult,
1647 ec_GFp_nist_field_mul,
1648 ec_GFp_nist_field_sqr,
1650 0 /* field_encode */,
1651 0 /* field_decode */,
1652 0 /* field_set_to_one */ };
1657 /******************************************************************************/
1658 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1661 static NISTP256_PRE_COMP *nistp256_pre_comp_new()
1663 NISTP256_PRE_COMP *ret = NULL;
1664 ret = (NISTP256_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1667 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1670 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1671 ret->references = 1;
1675 static void *nistp256_pre_comp_dup(void *src_)
1677 NISTP256_PRE_COMP *src = src_;
1679 /* no need to actually copy, these objects never change! */
1680 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1685 static void nistp256_pre_comp_free(void *pre_)
1688 NISTP256_PRE_COMP *pre = pre_;
1693 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1700 static void nistp256_pre_comp_clear_free(void *pre_)
1703 NISTP256_PRE_COMP *pre = pre_;
1708 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1712 OPENSSL_cleanse(pre, sizeof *pre);
1716 /******************************************************************************/
1717 /* OPENSSL EC_METHOD FUNCTIONS
1720 int ec_GFp_nistp256_group_init(EC_GROUP *group)
1723 ret = ec_GFp_simple_group_init(group);
1724 group->a_is_minus3 = 1;
1728 int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1729 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1732 BN_CTX *new_ctx = NULL;
1733 BIGNUM *curve_p, *curve_a, *curve_b;
1736 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1738 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1739 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1740 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1741 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1742 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1743 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1744 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1745 (BN_cmp(curve_b, b)))
1747 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1748 EC_R_WRONG_CURVE_PARAMETERS);
1751 group->field_mod_func = BN_nist_mod_256;
1752 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1755 if (new_ctx != NULL)
1756 BN_CTX_free(new_ctx);
1760 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1761 * (X', Y') = (X/Z^2, Y/Z^3) */
1762 int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1763 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1765 felem z1, z2, x_in, y_in;
1766 smallfelem x_out, y_out;
1769 if (EC_POINT_is_at_infinity(group, point))
1771 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1772 EC_R_POINT_AT_INFINITY);
1775 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1776 (!BN_to_felem(z1, &point->Z))) return 0;
1778 felem_square(tmp, z2); felem_reduce(z1, tmp);
1779 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1780 felem_contract(x_out, x_in);
1783 if (!smallfelem_to_BN(x, x_out)) {
1784 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1789 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1790 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1791 felem_contract(y_out, y_in);
1794 if (!smallfelem_to_BN(y, y_out))
1796 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1804 static void make_points_affine(size_t num, smallfelem points[/* num */][3], smallfelem tmp_smallfelems[/* num+1 */])
1806 /* Runs in constant time, unless an input is the point at infinity
1807 * (which normally shouldn't happen). */
1808 ec_GFp_nistp_points_make_affine_internal(
1813 (void (*)(void *)) smallfelem_one,
1814 (int (*)(const void *)) smallfelem_is_zero_int,
1815 (void (*)(void *, const void *)) smallfelem_assign,
1816 (void (*)(void *, const void *)) smallfelem_square_contract,
1817 (void (*)(void *, const void *, const void *)) smallfelem_mul_contract,
1818 (void (*)(void *, const void *)) smallfelem_inv_contract,
1819 (void (*)(void *, const void *)) smallfelem_assign /* nothing to contract */);
1822 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1823 * Result is stored in r (r can equal one of the inputs). */
1824 int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
1825 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1826 const BIGNUM *scalars[], BN_CTX *ctx)
1831 BN_CTX *new_ctx = NULL;
1832 BIGNUM *x, *y, *z, *tmp_scalar;
1833 felem_bytearray g_secret;
1834 felem_bytearray *secrets = NULL;
1835 smallfelem (*pre_comp)[17][3] = NULL;
1836 smallfelem *tmp_smallfelems = NULL;
1837 felem_bytearray tmp;
1838 unsigned i, num_bytes;
1839 int have_pre_comp = 0;
1840 size_t num_points = num;
1841 smallfelem x_in, y_in, z_in;
1842 felem x_out, y_out, z_out;
1843 NISTP256_PRE_COMP *pre = NULL;
1844 const smallfelem (*g_pre_comp)[16][3] = NULL;
1845 EC_POINT *generator = NULL;
1846 const EC_POINT *p = NULL;
1847 const BIGNUM *p_scalar = NULL;
1850 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1852 if (((x = BN_CTX_get(ctx)) == NULL) ||
1853 ((y = BN_CTX_get(ctx)) == NULL) ||
1854 ((z = BN_CTX_get(ctx)) == NULL) ||
1855 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1860 pre = EC_EX_DATA_get_data(group->extra_data,
1861 nistp256_pre_comp_dup, nistp256_pre_comp_free,
1862 nistp256_pre_comp_clear_free);
1864 /* we have precomputation, try to use it */
1865 g_pre_comp = (const smallfelem (*)[16][3]) pre->g_pre_comp;
1867 /* try to use the standard precomputation */
1868 g_pre_comp = &gmul[0];
1869 generator = EC_POINT_new(group);
1870 if (generator == NULL)
1872 /* get the generator from precomputation */
1873 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
1874 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
1875 !smallfelem_to_BN(z, g_pre_comp[0][1][2]))
1877 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1880 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1881 generator, x, y, z, ctx))
1883 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1884 /* precomputation matches generator */
1887 /* we don't have valid precomputation:
1888 * treat the generator as a random point */
1893 if (num_points >= 3)
1895 /* unless we precompute multiples for just one or two points,
1896 * converting those into affine form is time well spent */
1899 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1900 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(smallfelem));
1902 tmp_smallfelems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(smallfelem));
1903 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_smallfelems == NULL)))
1905 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1909 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1910 * i.e., they contribute nothing to the linear combination */
1911 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1912 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(smallfelem));
1913 for (i = 0; i < num_points; ++i)
1916 /* we didn't have a valid precomputation, so we pick
1919 p = EC_GROUP_get0_generator(group);
1923 /* the i^th point */
1926 p_scalar = scalars[i];
1928 if ((p_scalar != NULL) && (p != NULL))
1930 /* reduce scalar to 0 <= scalar < 2^256 */
1931 if ((BN_num_bits(p_scalar) > 256) || (BN_is_negative(p_scalar)))
1933 /* this is an unusual input, and we don't guarantee
1934 * constant-timeness */
1935 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1937 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1940 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1943 num_bytes = BN_bn2bin(p_scalar, tmp);
1944 flip_endian(secrets[i], tmp, num_bytes);
1945 /* precompute multiples */
1946 if ((!BN_to_felem(x_out, &p->X)) ||
1947 (!BN_to_felem(y_out, &p->Y)) ||
1948 (!BN_to_felem(z_out, &p->Z))) goto err;
1949 felem_shrink(pre_comp[i][1][0], x_out);
1950 felem_shrink(pre_comp[i][1][1], y_out);
1951 felem_shrink(pre_comp[i][1][2], z_out);
1952 for (j = 2; j <= 16; ++j)
1957 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1958 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1959 pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1964 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1965 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1971 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
1974 /* the scalar for the generator */
1975 if ((scalar != NULL) && (have_pre_comp))
1977 memset(g_secret, 0, sizeof(g_secret));
1978 /* reduce scalar to 0 <= scalar < 2^256 */
1979 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar)))
1981 /* this is an unusual input, and we don't guarantee
1982 * constant-timeness */
1983 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1985 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1988 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1991 num_bytes = BN_bn2bin(scalar, tmp);
1992 flip_endian(g_secret, tmp, num_bytes);
1993 /* do the multiplication with generator precomputation*/
1994 batch_mul(x_out, y_out, z_out,
1995 (const felem_bytearray (*)) secrets, num_points,
1997 mixed, (const smallfelem (*)[17][3]) pre_comp,
2001 /* do the multiplication without generator precomputation */
2002 batch_mul(x_out, y_out, z_out,
2003 (const felem_bytearray (*)) secrets, num_points,
2004 NULL, mixed, (const smallfelem (*)[17][3]) pre_comp, NULL);
2005 /* reduce the output to its unique minimal representation */
2006 felem_contract(x_in, x_out);
2007 felem_contract(y_in, y_out);
2008 felem_contract(z_in, z_out);
2009 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2010 (!smallfelem_to_BN(z, z_in)))
2012 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2015 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2019 if (generator != NULL)
2020 EC_POINT_free(generator);
2021 if (new_ctx != NULL)
2022 BN_CTX_free(new_ctx);
2023 if (secrets != NULL)
2024 OPENSSL_free(secrets);
2025 if (pre_comp != NULL)
2026 OPENSSL_free(pre_comp);
2027 if (tmp_smallfelems != NULL)
2028 OPENSSL_free(tmp_smallfelems);
2032 int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2035 NISTP256_PRE_COMP *pre = NULL;
2037 BN_CTX *new_ctx = NULL;
2039 EC_POINT *generator = NULL;
2040 smallfelem tmp_smallfelems[32];
2041 felem x_tmp, y_tmp, z_tmp;
2043 /* throw away old precomputation */
2044 EC_EX_DATA_free_data(&group->extra_data, nistp256_pre_comp_dup,
2045 nistp256_pre_comp_free, nistp256_pre_comp_clear_free);
2047 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
2049 if (((x = BN_CTX_get(ctx)) == NULL) ||
2050 ((y = BN_CTX_get(ctx)) == NULL))
2052 /* get the generator */
2053 if (group->generator == NULL) goto err;
2054 generator = EC_POINT_new(group);
2055 if (generator == NULL)
2057 BN_bin2bn(nistp256_curve_params[3], sizeof (felem_bytearray), x);
2058 BN_bin2bn(nistp256_curve_params[4], sizeof (felem_bytearray), y);
2059 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2061 if ((pre = nistp256_pre_comp_new()) == NULL)
2063 /* if the generator is the standard one, use built-in precomputation */
2064 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2066 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2070 if ((!BN_to_felem(x_tmp, &group->generator->X)) ||
2071 (!BN_to_felem(y_tmp, &group->generator->Y)) ||
2072 (!BN_to_felem(z_tmp, &group->generator->Z)))
2074 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2075 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2076 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2077 /* compute 2^64*G, 2^128*G, 2^192*G for the first table,
2078 * 2^32*G, 2^96*G, 2^160*G, 2^224*G for the second one
2080 for (i = 1; i <= 8; i <<= 1)
2083 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2084 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
2085 for (j = 0; j < 31; ++j)
2088 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2089 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2094 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2095 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2096 for (j = 0; j < 31; ++j)
2099 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2100 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
2103 for (i = 0; i < 2; i++)
2105 /* g_pre_comp[i][0] is the point at infinity */
2106 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2107 /* the remaining multiples */
2108 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2110 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], pre->g_pre_comp[i][6][2],
2111 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2112 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2113 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2115 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], pre->g_pre_comp[i][10][2],
2116 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2117 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2118 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2120 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2121 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2122 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2]);
2123 /* 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G */
2125 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], pre->g_pre_comp[i][14][2],
2126 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2127 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2128 for (j = 1; j < 8; ++j)
2130 /* odd multiples: add G resp. 2^32*G */
2132 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1], pre->g_pre_comp[i][2*j+1][2],
2133 pre->g_pre_comp[i][2*j][0], pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
2134 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], pre->g_pre_comp[i][1][2]);
2137 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2139 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup,
2140 nistp256_pre_comp_free, nistp256_pre_comp_clear_free))
2146 if (generator != NULL)
2147 EC_POINT_free(generator);
2148 if (new_ctx != NULL)
2149 BN_CTX_free(new_ctx);
2151 nistp256_pre_comp_free(pre);
2155 int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2157 if (EC_EX_DATA_get_data(group->extra_data, nistp256_pre_comp_dup,
2158 nistp256_pre_comp_free, nistp256_pre_comp_clear_free)
2165 static void *dummy=&dummy;