1 /* crypto/ec/ecp_nistp224.c */
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
27 #ifdef EC_NISTP_64_GCC_128
30 #include <openssl/err.h>
33 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
34 /* even with gcc, the typedef won't work for 32-bit platforms */
35 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
37 #error "Need GCC 3.1 or later to define type uint128_t"
45 /******************************************************************************/
46 /* INTERNAL REPRESENTATION OF FIELD ELEMENTS
48 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
49 * using 64-bit coefficients called 'limbs',
50 * and sometimes (for multiplication results) as
51 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
52 * using 128-bit coefficients called 'widelimbs'.
53 * A 4-limb representation is an 'felem';
54 * a 7-widelimb representation is a 'widefelem'.
55 * Even within felems, bits of adjacent limbs overlap, and we don't always
56 * reduce the representations: we ensure that inputs to each felem
57 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
58 * and fit into a 128-bit word without overflow. The coefficients are then
59 * again partially reduced to obtain an felem satisfying a_i < 2^57.
60 * We only reduce to the unique minimal representation at the end of the
64 typedef uint64_t limb;
65 typedef uint128_t widelimb;
67 typedef limb felem[4];
68 typedef widelimb widefelem[7];
70 /* Field element represented as a byte arrary.
71 * 28*8 = 224 bits is also the group order size for the elliptic curve,
72 * and we also use this type for scalars for point multiplication.
74 typedef u8 felem_bytearray[28];
76 static const felem_bytearray nistp224_curve_params[5] = {
77 {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* p */
78 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,
79 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01},
80 {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* a */
81 0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFF,0xFF,
82 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE},
83 {0xB4,0x05,0x0A,0x85,0x0C,0x04,0xB3,0xAB,0xF5,0x41, /* b */
84 0x32,0x56,0x50,0x44,0xB0,0xB7,0xD7,0xBF,0xD8,0xBA,
85 0x27,0x0B,0x39,0x43,0x23,0x55,0xFF,0xB4},
86 {0xB7,0x0E,0x0C,0xBD,0x6B,0xB4,0xBF,0x7F,0x32,0x13, /* x */
87 0x90,0xB9,0x4A,0x03,0xC1,0xD3,0x56,0xC2,0x11,0x22,
88 0x34,0x32,0x80,0xD6,0x11,0x5C,0x1D,0x21},
89 {0xbd,0x37,0x63,0x88,0xb5,0xf7,0x23,0xfb,0x4c,0x22, /* y */
90 0xdf,0xe6,0xcd,0x43,0x75,0xa0,0x5a,0x07,0x47,0x64,
91 0x44,0xd5,0x81,0x99,0x85,0x00,0x7e,0x34}
94 /* Precomputed multiples of the standard generator
95 * Points are given in coordinates (X, Y, Z) where Z normally is 1
96 * (0 for the point at infinity).
97 * For each field element, slice a_0 is word 0, etc.
99 * The table has 2 * 16 elements, starting with the following:
100 * index | bits | point
101 * ------+---------+------------------------------
104 * 2 | 0 0 1 0 | 2^56G
105 * 3 | 0 0 1 1 | (2^56 + 1)G
106 * 4 | 0 1 0 0 | 2^112G
107 * 5 | 0 1 0 1 | (2^112 + 1)G
108 * 6 | 0 1 1 0 | (2^112 + 2^56)G
109 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
110 * 8 | 1 0 0 0 | 2^168G
111 * 9 | 1 0 0 1 | (2^168 + 1)G
112 * 10 | 1 0 1 0 | (2^168 + 2^56)G
113 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
114 * 12 | 1 1 0 0 | (2^168 + 2^112)G
115 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
116 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
117 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
118 * followed by a copy of this with each element multiplied by 2^28.
120 * The reason for this is so that we can clock bits into four different
121 * locations when doing simple scalar multiplies against the base point,
122 * and then another four locations using the second 16 elements.
124 static const felem gmul[2][16][3] =
128 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
129 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
131 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
132 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
134 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
135 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
137 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
138 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
140 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
141 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
143 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
144 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
146 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
147 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
149 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
150 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
152 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
153 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
155 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
156 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
158 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
159 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
161 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
162 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
164 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
165 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
167 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
168 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
170 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
171 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
176 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
177 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
179 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
180 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
182 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
183 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
185 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
186 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
188 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
189 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
191 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
192 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
194 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
195 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
197 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
198 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
200 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
201 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
203 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
204 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
206 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
207 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
209 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
210 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
212 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
213 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
215 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
216 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
218 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
219 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
222 /* Precomputation for the group generator. */
224 felem g_pre_comp[2][16][3];
228 const EC_METHOD *EC_GFp_nistp224_method(void)
230 static const EC_METHOD ret = {
231 EC_FLAGS_DEFAULT_OCT,
232 NID_X9_62_prime_field,
233 ec_GFp_nistp224_group_init,
234 ec_GFp_simple_group_finish,
235 ec_GFp_simple_group_clear_finish,
236 ec_GFp_nist_group_copy,
237 ec_GFp_nistp224_group_set_curve,
238 ec_GFp_simple_group_get_curve,
239 ec_GFp_simple_group_get_degree,
240 ec_GFp_simple_group_check_discriminant,
241 ec_GFp_simple_point_init,
242 ec_GFp_simple_point_finish,
243 ec_GFp_simple_point_clear_finish,
244 ec_GFp_simple_point_copy,
245 ec_GFp_simple_point_set_to_infinity,
246 ec_GFp_simple_set_Jprojective_coordinates_GFp,
247 ec_GFp_simple_get_Jprojective_coordinates_GFp,
248 ec_GFp_simple_point_set_affine_coordinates,
249 ec_GFp_nistp224_point_get_affine_coordinates,
250 0 /* point_set_compressed_coordinates */,
255 ec_GFp_simple_invert,
256 ec_GFp_simple_is_at_infinity,
257 ec_GFp_simple_is_on_curve,
259 ec_GFp_simple_make_affine,
260 ec_GFp_simple_points_make_affine,
261 ec_GFp_nistp224_points_mul,
262 ec_GFp_nistp224_precompute_mult,
263 ec_GFp_nistp224_have_precompute_mult,
264 ec_GFp_nist_field_mul,
265 ec_GFp_nist_field_sqr,
267 0 /* field_encode */,
268 0 /* field_decode */,
269 0 /* field_set_to_one */ };
274 /* Helper functions to convert field elements to/from internal representation */
275 static void bin28_to_felem(felem out, const u8 in[28])
277 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
278 out[1] = (*((const uint64_t *)(in+7))) & 0x00ffffffffffffff;
279 out[2] = (*((const uint64_t *)(in+14))) & 0x00ffffffffffffff;
280 out[3] = (*((const uint64_t *)(in+21))) & 0x00ffffffffffffff;
283 static void felem_to_bin28(u8 out[28], const felem in)
286 for (i = 0; i < 7; ++i)
288 out[i] = in[0]>>(8*i);
289 out[i+7] = in[1]>>(8*i);
290 out[i+14] = in[2]>>(8*i);
291 out[i+21] = in[3]>>(8*i);
295 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
296 static void flip_endian(u8 *out, const u8 *in, unsigned len)
299 for (i = 0; i < len; ++i)
300 out[i] = in[len-1-i];
303 /* From OpenSSL BIGNUM to internal representation */
304 static int BN_to_felem(felem out, const BIGNUM *bn)
306 felem_bytearray b_in;
307 felem_bytearray b_out;
310 /* BN_bn2bin eats leading zeroes */
311 memset(b_out, 0, sizeof b_out);
312 num_bytes = BN_num_bytes(bn);
313 if (num_bytes > sizeof b_out)
315 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
318 if (BN_is_negative(bn))
320 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
323 num_bytes = BN_bn2bin(bn, b_in);
324 flip_endian(b_out, b_in, num_bytes);
325 bin28_to_felem(out, b_out);
329 /* From internal representation to OpenSSL BIGNUM */
330 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
332 felem_bytearray b_in, b_out;
333 felem_to_bin28(b_in, in);
334 flip_endian(b_out, b_in, sizeof b_out);
335 return BN_bin2bn(b_out, sizeof b_out, out);
338 /******************************************************************************/
341 * Field operations, using the internal representation of field elements.
342 * NB! These operations are specific to our point multiplication and cannot be
343 * expected to be correct in general - e.g., multiplication with a large scalar
344 * will cause an overflow.
348 static void felem_one(felem out)
356 static void felem_assign(felem out, const felem in)
364 /* Sum two field elements: out += in */
365 static void felem_sum(felem out, const felem in)
373 /* Get negative value: out = -in */
374 /* Assumes in[i] < 2^57 */
375 static void felem_neg(felem out, const felem in)
377 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
378 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
379 static const limb two58m42m2 = (((limb) 1) << 58) -
380 (((limb) 1) << 42) - (((limb) 1) << 2);
382 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
383 out[0] = two58p2 - in[0];
384 out[1] = two58m42m2 - in[1];
385 out[2] = two58m2 - in[2];
386 out[3] = two58m2 - in[3];
389 /* Subtract field elements: out -= in */
390 /* Assumes in[i] < 2^57 */
391 static void felem_diff(felem out, const felem in)
393 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
394 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
395 static const limb two58m42m2 = (((limb) 1) << 58) -
396 (((limb) 1) << 42) - (((limb) 1) << 2);
398 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
400 out[1] += two58m42m2;
410 /* Subtract in unreduced 128-bit mode: out -= in */
411 /* Assumes in[i] < 2^119 */
412 static void widefelem_diff(widefelem out, const widefelem in)
414 static const widelimb two120 = ((widelimb) 1) << 120;
415 static const widelimb two120m64 = (((widelimb) 1) << 120) -
416 (((widelimb) 1) << 64);
417 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
418 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
420 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
425 out[4] += two120m104m64;
438 /* Subtract in mixed mode: out128 -= in64 */
440 static void felem_diff_128_64(widefelem out, const felem in)
442 static const widelimb two64p8 = (((widelimb) 1) << 64) +
443 (((widelimb) 1) << 8);
444 static const widelimb two64m8 = (((widelimb) 1) << 64) -
445 (((widelimb) 1) << 8);
446 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
447 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
449 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
451 out[1] += two64m48m8;
461 /* Multiply a field element by a scalar: out = out * scalar
462 * The scalars we actually use are small, so results fit without overflow */
463 static void felem_scalar(felem out, const limb scalar)
471 /* Multiply an unreduced field element by a scalar: out = out * scalar
472 * The scalars we actually use are small, so results fit without overflow */
473 static void widefelem_scalar(widefelem out, const widelimb scalar)
484 /* Square a field element: out = in^2 */
485 static void felem_square(widefelem out, const felem in)
487 limb tmp0, tmp1, tmp2;
488 tmp0 = 2 * in[0]; tmp1 = 2 * in[1]; tmp2 = 2 * in[2];
489 out[0] = ((widelimb) in[0]) * in[0];
490 out[1] = ((widelimb) in[0]) * tmp1;
491 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
492 out[3] = ((widelimb) in[3]) * tmp0 +
493 ((widelimb) in[1]) * tmp2;
494 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
495 out[5] = ((widelimb) in[3]) * tmp2;
496 out[6] = ((widelimb) in[3]) * in[3];
499 /* Multiply two field elements: out = in1 * in2 */
500 static void felem_mul(widefelem out, const felem in1, const felem in2)
502 out[0] = ((widelimb) in1[0]) * in2[0];
503 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
504 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
505 ((widelimb) in1[2]) * in2[0];
506 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
507 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
508 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
509 ((widelimb) in1[3]) * in2[1];
510 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
511 out[6] = ((widelimb) in1[3]) * in2[3];
514 /* Reduce seven 128-bit coefficients to four 64-bit coefficients.
515 * Requires in[i] < 2^126,
516 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
517 static void felem_reduce(felem out, const widefelem in)
519 static const widelimb two127p15 = (((widelimb) 1) << 127) +
520 (((widelimb) 1) << 15);
521 static const widelimb two127m71 = (((widelimb) 1) << 127) -
522 (((widelimb) 1) << 71);
523 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
524 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
527 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
528 output[0] = in[0] + two127p15;
529 output[1] = in[1] + two127m71m55;
530 output[2] = in[2] + two127m71;
534 /* Eliminate in[4], in[5], in[6] */
535 output[4] += in[6] >> 16;
536 output[3] += (in[6] & 0xffff) << 40;
539 output[3] += in[5] >> 16;
540 output[2] += (in[5] & 0xffff) << 40;
543 output[2] += output[4] >> 16;
544 output[1] += (output[4] & 0xffff) << 40;
545 output[0] -= output[4];
547 /* Carry 2 -> 3 -> 4 */
548 output[3] += output[2] >> 56;
549 output[2] &= 0x00ffffffffffffff;
551 output[4] = output[3] >> 56;
552 output[3] &= 0x00ffffffffffffff;
554 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
556 /* Eliminate output[4] */
557 output[2] += output[4] >> 16;
558 /* output[2] < 2^56 + 2^56 = 2^57 */
559 output[1] += (output[4] & 0xffff) << 40;
560 output[0] -= output[4];
562 /* Carry 0 -> 1 -> 2 -> 3 */
563 output[1] += output[0] >> 56;
564 out[0] = output[0] & 0x00ffffffffffffff;
566 output[2] += output[1] >> 56;
567 /* output[2] < 2^57 + 2^72 */
568 out[1] = output[1] & 0x00ffffffffffffff;
569 output[3] += output[2] >> 56;
570 /* output[3] <= 2^56 + 2^16 */
571 out[2] = output[2] & 0x00ffffffffffffff;
573 /* out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
574 * out[3] <= 2^56 + 2^16 (due to final carry),
579 static void felem_square_reduce(felem out, const felem in)
582 felem_square(tmp, in);
583 felem_reduce(out, tmp);
586 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
589 felem_mul(tmp, in1, in2);
590 felem_reduce(out, tmp);
593 /* Reduce to unique minimal representation.
594 * Requires 0 <= in < 2*p (always call felem_reduce first) */
595 static void felem_contract(felem out, const felem in)
597 static const int64_t two56 = ((limb) 1) << 56;
598 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
599 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
605 /* Case 1: a = 1 iff in >= 2^224 */
609 tmp[3] &= 0x00ffffffffffffff;
610 /* Case 2: a = 0 iff p <= in < 2^224, i.e.,
611 * the high 128 bits are all 1 and the lower part is non-zero */
612 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
613 (((int64_t)(in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
614 a &= 0x00ffffffffffffff;
615 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
617 /* subtract 2^224 - 2^96 + 1 if a is all-one*/
618 tmp[3] &= a ^ 0xffffffffffffffff;
619 tmp[2] &= a ^ 0xffffffffffffffff;
620 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
623 /* eliminate negative coefficients: if tmp[0] is negative, tmp[1] must
624 * be non-zero, so we only need one step */
629 /* carry 1 -> 2 -> 3 */
630 tmp[2] += tmp[1] >> 56;
631 tmp[1] &= 0x00ffffffffffffff;
633 tmp[3] += tmp[2] >> 56;
634 tmp[2] &= 0x00ffffffffffffff;
636 /* Now 0 <= out < p */
643 /* Zero-check: returns 1 if input is 0, and 0 otherwise.
644 * We know that field elements are reduced to in < 2^225,
645 * so we only need to check three cases: 0, 2^224 - 2^96 + 1,
646 * and 2^225 - 2^97 + 2 */
647 static limb felem_is_zero(const felem in)
649 limb zero, two224m96p1, two225m97p2;
651 zero = in[0] | in[1] | in[2] | in[3];
652 zero = (((int64_t)(zero) - 1) >> 63) & 1;
653 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
654 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
655 two224m96p1 = (((int64_t)(two224m96p1) - 1) >> 63) & 1;
656 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
657 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
658 two225m97p2 = (((int64_t)(two225m97p2) - 1) >> 63) & 1;
659 return (zero | two224m96p1 | two225m97p2);
662 static limb felem_is_zero_int(const felem in)
664 return (int) (felem_is_zero(in) & ((limb)1));
667 /* Invert a field element */
668 /* Computation chain copied from djb's code */
669 static void felem_inv(felem out, const felem in)
671 felem ftmp, ftmp2, ftmp3, ftmp4;
675 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2 */
676 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 1 */
677 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2 */
678 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 1 */
679 felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
680 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
681 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
682 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 1 */
683 felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
684 for (i = 0; i < 5; ++i) /* 2^12 - 2^6 */
686 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
688 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
689 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
690 for (i = 0; i < 11; ++i) /* 2^24 - 2^12 */
692 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
694 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
695 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
696 for (i = 0; i < 23; ++i) /* 2^48 - 2^24 */
698 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
700 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
701 felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
702 for (i = 0; i < 47; ++i) /* 2^96 - 2^48 */
704 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp);
706 felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
707 felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
708 for (i = 0; i < 23; ++i) /* 2^120 - 2^24 */
710 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp);
712 felem_mul(tmp, ftmp2, ftmp4); felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
713 for (i = 0; i < 6; ++i) /* 2^126 - 2^6 */
715 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
717 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^126 - 1 */
718 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^127 - 2 */
719 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^127 - 1 */
720 for (i = 0; i < 97; ++i) /* 2^224 - 2^97 */
722 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
724 felem_mul(tmp, ftmp, ftmp3); felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
727 /* Copy in constant time:
728 * if icopy == 1, copy in to out,
729 * if icopy == 0, copy out to itself. */
731 copy_conditional(felem out, const felem in, limb icopy)
734 /* icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one */
735 const limb copy = -icopy;
736 for (i = 0; i < 4; ++i)
738 const limb tmp = copy & (in[i] ^ out[i]);
743 /******************************************************************************/
744 /* ELLIPTIC CURVE POINT OPERATIONS
746 * Points are represented in Jacobian projective coordinates:
747 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
748 * or to the point at infinity if Z == 0.
752 /* Double an elliptic curve point:
753 * (X', Y', Z') = 2 * (X, Y, Z), where
754 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
755 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
756 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
757 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
758 * while x_out == y_in is not (maybe this works, but it's not tested). */
760 point_double(felem x_out, felem y_out, felem z_out,
761 const felem x_in, const felem y_in, const felem z_in)
764 felem delta, gamma, beta, alpha, ftmp, ftmp2;
766 felem_assign(ftmp, x_in);
767 felem_assign(ftmp2, x_in);
770 felem_square(tmp, z_in);
771 felem_reduce(delta, tmp);
774 felem_square(tmp, y_in);
775 felem_reduce(gamma, tmp);
778 felem_mul(tmp, x_in, gamma);
779 felem_reduce(beta, tmp);
781 /* alpha = 3*(x-delta)*(x+delta) */
782 felem_diff(ftmp, delta);
783 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
784 felem_sum(ftmp2, delta);
785 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
786 felem_scalar(ftmp2, 3);
787 /* ftmp2[i] < 3 * 2^58 < 2^60 */
788 felem_mul(tmp, ftmp, ftmp2);
789 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
790 felem_reduce(alpha, tmp);
792 /* x' = alpha^2 - 8*beta */
793 felem_square(tmp, alpha);
794 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
795 felem_assign(ftmp, beta);
796 felem_scalar(ftmp, 8);
797 /* ftmp[i] < 8 * 2^57 = 2^60 */
798 felem_diff_128_64(tmp, ftmp);
799 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
800 felem_reduce(x_out, tmp);
802 /* z' = (y + z)^2 - gamma - delta */
803 felem_sum(delta, gamma);
804 /* delta[i] < 2^57 + 2^57 = 2^58 */
805 felem_assign(ftmp, y_in);
806 felem_sum(ftmp, z_in);
807 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
808 felem_square(tmp, ftmp);
809 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
810 felem_diff_128_64(tmp, delta);
811 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
812 felem_reduce(z_out, tmp);
814 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
815 felem_scalar(beta, 4);
816 /* beta[i] < 4 * 2^57 = 2^59 */
817 felem_diff(beta, x_out);
818 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
819 felem_mul(tmp, alpha, beta);
820 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
821 felem_square(tmp2, gamma);
822 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
823 widefelem_scalar(tmp2, 8);
824 /* tmp2[i] < 8 * 2^116 = 2^119 */
825 widefelem_diff(tmp, tmp2);
826 /* tmp[i] < 2^119 + 2^120 < 2^121 */
827 felem_reduce(y_out, tmp);
830 /* Add two elliptic curve points:
831 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
832 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
833 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
834 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
835 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
836 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
838 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
841 /* This function is not entirely constant-time:
842 * it includes a branch for checking whether the two input points are equal,
843 * (while not equal to the point at infinity).
844 * This case never happens during single point multiplication,
845 * so there is no timing leak for ECDH or ECDSA signing. */
846 static void point_add(felem x3, felem y3, felem z3,
847 const felem x1, const felem y1, const felem z1,
848 const int mixed, const felem x2, const felem y2, const felem z2)
850 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
852 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
857 felem_square(tmp, z2);
858 felem_reduce(ftmp2, tmp);
861 felem_mul(tmp, ftmp2, z2);
862 felem_reduce(ftmp4, tmp);
864 /* ftmp4 = z2^3*y1 */
865 felem_mul(tmp2, ftmp4, y1);
866 felem_reduce(ftmp4, tmp2);
868 /* ftmp2 = z2^2*x1 */
869 felem_mul(tmp2, ftmp2, x1);
870 felem_reduce(ftmp2, tmp2);
874 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
876 /* ftmp4 = z2^3*y1 */
877 felem_assign(ftmp4, y1);
879 /* ftmp2 = z2^2*x1 */
880 felem_assign(ftmp2, x1);
884 felem_square(tmp, z1);
885 felem_reduce(ftmp, tmp);
888 felem_mul(tmp, ftmp, z1);
889 felem_reduce(ftmp3, tmp);
892 felem_mul(tmp, ftmp3, y2);
893 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
895 /* ftmp3 = z1^3*y2 - z2^3*y1 */
896 felem_diff_128_64(tmp, ftmp4);
897 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
898 felem_reduce(ftmp3, tmp);
901 felem_mul(tmp, ftmp, x2);
902 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
904 /* ftmp = z1^2*x2 - z2^2*x1 */
905 felem_diff_128_64(tmp, ftmp2);
906 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
907 felem_reduce(ftmp, tmp);
909 /* the formulae are incorrect if the points are equal
910 * so we check for this and do doubling if this happens */
911 x_equal = felem_is_zero(ftmp);
912 y_equal = felem_is_zero(ftmp3);
913 z1_is_zero = felem_is_zero(z1);
914 z2_is_zero = felem_is_zero(z2);
915 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
916 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
918 point_double(x3, y3, z3, x1, y1, z1);
925 felem_mul(tmp, z1, z2);
926 felem_reduce(ftmp5, tmp);
930 /* special case z2 = 0 is handled later */
931 felem_assign(ftmp5, z1);
934 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
935 felem_mul(tmp, ftmp, ftmp5);
936 felem_reduce(z_out, tmp);
938 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
939 felem_assign(ftmp5, ftmp);
940 felem_square(tmp, ftmp);
941 felem_reduce(ftmp, tmp);
943 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
944 felem_mul(tmp, ftmp, ftmp5);
945 felem_reduce(ftmp5, tmp);
947 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
948 felem_mul(tmp, ftmp2, ftmp);
949 felem_reduce(ftmp2, tmp);
951 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
952 felem_mul(tmp, ftmp4, ftmp5);
953 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
955 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
956 felem_square(tmp2, ftmp3);
957 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
959 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
960 felem_diff_128_64(tmp2, ftmp5);
961 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
963 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
964 felem_assign(ftmp5, ftmp2);
965 felem_scalar(ftmp5, 2);
966 /* ftmp5[i] < 2 * 2^57 = 2^58 */
968 /* x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
969 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
970 felem_diff_128_64(tmp2, ftmp5);
971 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
972 felem_reduce(x_out, tmp2);
974 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
975 felem_diff(ftmp2, x_out);
976 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
978 /* tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) */
979 felem_mul(tmp2, ftmp3, ftmp2);
980 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
982 /* y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
983 z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
984 widefelem_diff(tmp2, tmp);
985 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
986 felem_reduce(y_out, tmp2);
988 /* the result (x_out, y_out, z_out) is incorrect if one of the inputs is
989 * the point at infinity, so we need to check for this separately */
991 /* if point 1 is at infinity, copy point 2 to output, and vice versa */
992 copy_conditional(x_out, x2, z1_is_zero);
993 copy_conditional(x_out, x1, z2_is_zero);
994 copy_conditional(y_out, y2, z1_is_zero);
995 copy_conditional(y_out, y1, z2_is_zero);
996 copy_conditional(z_out, z2, z1_is_zero);
997 copy_conditional(z_out, z1, z2_is_zero);
998 felem_assign(x3, x_out);
999 felem_assign(y3, y_out);
1000 felem_assign(z3, z_out);
1003 /* select_point selects the |index|th point from a precomputation table and
1004 * copies it to out. */
1005 static void select_point(const u64 index, unsigned int size, const felem pre_comp[/*size*/][3], felem out[3])
1008 limb *outlimbs = &out[0][0];
1009 memset(outlimbs, 0, 3 * sizeof(felem));
1011 for (i = 0; i < size; i++)
1013 const limb *inlimbs = &pre_comp[i][0][0];
1014 u64 mask = i ^ index;
1020 for (j = 0; j < 4 * 3; j++)
1021 outlimbs[j] |= inlimbs[j] & mask;
1025 /* get_bit returns the |i|th bit in |in| */
1026 static char get_bit(const felem_bytearray in, unsigned i)
1030 return (in[i >> 3] >> (i & 7)) & 1;
1033 /* Interleaved point multiplication using precomputed point multiples:
1034 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1035 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1036 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1037 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1038 static void batch_mul(felem x_out, felem y_out, felem z_out,
1039 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1040 const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[2][16][3])
1044 unsigned gen_mul = (g_scalar != NULL);
1045 felem nq[3], tmp[4];
1049 /* set nq to the point at infinity */
1050 memset(nq, 0, 3 * sizeof(felem));
1052 /* Loop over all scalars msb-to-lsb, interleaving additions
1053 * of multiples of the generator (two in each of the last 28 rounds)
1054 * and additions of other points multiples (every 5th round).
1056 skip = 1; /* save two point operations in the first round */
1057 for (i = (num_points ? 220 : 27); i >= 0; --i)
1061 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1063 /* add multiples of the generator */
1064 if (gen_mul && (i <= 27))
1066 /* first, look 28 bits upwards */
1067 bits = get_bit(g_scalar, i + 196) << 3;
1068 bits |= get_bit(g_scalar, i + 140) << 2;
1069 bits |= get_bit(g_scalar, i + 84) << 1;
1070 bits |= get_bit(g_scalar, i + 28);
1071 /* select the point to add, in constant time */
1072 select_point(bits, 16, g_pre_comp[1], tmp);
1076 point_add(nq[0], nq[1], nq[2],
1077 nq[0], nq[1], nq[2],
1078 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1082 memcpy(nq, tmp, 3 * sizeof(felem));
1086 /* second, look at the current position */
1087 bits = get_bit(g_scalar, i + 168) << 3;
1088 bits |= get_bit(g_scalar, i + 112) << 2;
1089 bits |= get_bit(g_scalar, i + 56) << 1;
1090 bits |= get_bit(g_scalar, i);
1091 /* select the point to add, in constant time */
1092 select_point(bits, 16, g_pre_comp[0], tmp);
1093 point_add(nq[0], nq[1], nq[2],
1094 nq[0], nq[1], nq[2],
1095 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1098 /* do other additions every 5 doublings */
1099 if (num_points && (i % 5 == 0))
1101 /* loop over all scalars */
1102 for (num = 0; num < num_points; ++num)
1104 bits = get_bit(scalars[num], i + 4) << 5;
1105 bits |= get_bit(scalars[num], i + 3) << 4;
1106 bits |= get_bit(scalars[num], i + 2) << 3;
1107 bits |= get_bit(scalars[num], i + 1) << 2;
1108 bits |= get_bit(scalars[num], i) << 1;
1109 bits |= get_bit(scalars[num], i - 1);
1110 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1112 /* select the point to add or subtract */
1113 select_point(digit, 17, pre_comp[num], tmp);
1114 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative point */
1115 copy_conditional(tmp[1], tmp[3], sign);
1119 point_add(nq[0], nq[1], nq[2],
1120 nq[0], nq[1], nq[2],
1121 mixed, tmp[0], tmp[1], tmp[2]);
1125 memcpy(nq, tmp, 3 * sizeof(felem));
1131 felem_assign(x_out, nq[0]);
1132 felem_assign(y_out, nq[1]);
1133 felem_assign(z_out, nq[2]);
1136 /******************************************************************************/
1137 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1140 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1142 NISTP224_PRE_COMP *ret = NULL;
1143 ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1146 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1149 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1150 ret->references = 1;
1154 static void *nistp224_pre_comp_dup(void *src_)
1156 NISTP224_PRE_COMP *src = src_;
1158 /* no need to actually copy, these objects never change! */
1159 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1164 static void nistp224_pre_comp_free(void *pre_)
1167 NISTP224_PRE_COMP *pre = pre_;
1172 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1179 static void nistp224_pre_comp_clear_free(void *pre_)
1182 NISTP224_PRE_COMP *pre = pre_;
1187 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1191 OPENSSL_cleanse(pre, sizeof *pre);
1195 /******************************************************************************/
1196 /* OPENSSL EC_METHOD FUNCTIONS
1199 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1202 ret = ec_GFp_simple_group_init(group);
1203 group->a_is_minus3 = 1;
1207 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1208 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1211 BN_CTX *new_ctx = NULL;
1212 BIGNUM *curve_p, *curve_a, *curve_b;
1215 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1217 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1218 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1219 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1220 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1221 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1222 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1223 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1224 (BN_cmp(curve_b, b)))
1226 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1227 EC_R_WRONG_CURVE_PARAMETERS);
1230 group->field_mod_func = BN_nist_mod_224;
1231 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1234 if (new_ctx != NULL)
1235 BN_CTX_free(new_ctx);
1239 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1240 * (X', Y') = (X/Z^2, Y/Z^3) */
1241 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1242 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1244 felem z1, z2, x_in, y_in, x_out, y_out;
1247 if (EC_POINT_is_at_infinity(group, point))
1249 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1250 EC_R_POINT_AT_INFINITY);
1253 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1254 (!BN_to_felem(z1, &point->Z))) return 0;
1256 felem_square(tmp, z2); felem_reduce(z1, tmp);
1257 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1258 felem_contract(x_out, x_in);
1261 if (!felem_to_BN(x, x_out)) {
1262 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1267 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1268 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1269 felem_contract(y_out, y_in);
1272 if (!felem_to_BN(y, y_out)) {
1273 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1281 static void make_points_affine(size_t num, felem points[/*num*/][3], felem tmp_felems[/*num+1*/])
1283 /* Runs in constant time, unless an input is the point at infinity
1284 * (which normally shouldn't happen). */
1285 ec_GFp_nistp_points_make_affine_internal(
1290 (void (*)(void *)) felem_one,
1291 (int (*)(const void *)) felem_is_zero_int,
1292 (void (*)(void *, const void *)) felem_assign,
1293 (void (*)(void *, const void *)) felem_square_reduce,
1294 (void (*)(void *, const void *, const void *)) felem_mul_reduce,
1295 (void (*)(void *, const void *)) felem_inv,
1296 (void (*)(void *, const void *)) felem_contract);
1299 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1300 * Result is stored in r (r can equal one of the inputs). */
1301 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1302 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1303 const BIGNUM *scalars[], BN_CTX *ctx)
1309 BN_CTX *new_ctx = NULL;
1310 BIGNUM *x, *y, *z, *tmp_scalar;
1311 felem_bytearray g_secret;
1312 felem_bytearray *secrets = NULL;
1313 felem (*pre_comp)[17][3] = NULL;
1314 felem *tmp_felems = NULL;
1315 felem_bytearray tmp;
1317 int have_pre_comp = 0;
1318 size_t num_points = num;
1319 felem x_in, y_in, z_in, x_out, y_out, z_out;
1320 NISTP224_PRE_COMP *pre = NULL;
1321 const felem (*g_pre_comp)[16][3] = NULL;
1322 EC_POINT *generator = NULL;
1323 const EC_POINT *p = NULL;
1324 const BIGNUM *p_scalar = NULL;
1327 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1329 if (((x = BN_CTX_get(ctx)) == NULL) ||
1330 ((y = BN_CTX_get(ctx)) == NULL) ||
1331 ((z = BN_CTX_get(ctx)) == NULL) ||
1332 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1337 pre = EC_EX_DATA_get_data(group->extra_data,
1338 nistp224_pre_comp_dup, nistp224_pre_comp_free,
1339 nistp224_pre_comp_clear_free);
1341 /* we have precomputation, try to use it */
1342 g_pre_comp = (const felem (*)[16][3]) pre->g_pre_comp;
1344 /* try to use the standard precomputation */
1345 g_pre_comp = &gmul[0];
1346 generator = EC_POINT_new(group);
1347 if (generator == NULL)
1349 /* get the generator from precomputation */
1350 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1351 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1352 !felem_to_BN(z, g_pre_comp[0][1][2]))
1354 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1357 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1358 generator, x, y, z, ctx))
1360 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1361 /* precomputation matches generator */
1364 /* we don't have valid precomputation:
1365 * treat the generator as a random point */
1366 num_points = num_points + 1;
1371 if (num_points >= 3)
1373 /* unless we precompute multiples for just one or two points,
1374 * converting those into affine form is time well spent */
1377 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1378 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1380 tmp_felems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1381 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL)))
1383 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1387 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1388 * i.e., they contribute nothing to the linear combination */
1389 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1390 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1391 for (i = 0; i < num_points; ++i)
1396 p = EC_GROUP_get0_generator(group);
1400 /* the i^th point */
1403 p_scalar = scalars[i];
1405 if ((p_scalar != NULL) && (p != NULL))
1407 /* reduce scalar to 0 <= scalar < 2^224 */
1408 if ((BN_num_bits(p_scalar) > 224) || (BN_is_negative(p_scalar)))
1410 /* this is an unusual input, and we don't guarantee
1411 * constant-timeness */
1412 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1414 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1417 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1420 num_bytes = BN_bn2bin(p_scalar, tmp);
1421 flip_endian(secrets[i], tmp, num_bytes);
1422 /* precompute multiples */
1423 if ((!BN_to_felem(x_out, &p->X)) ||
1424 (!BN_to_felem(y_out, &p->Y)) ||
1425 (!BN_to_felem(z_out, &p->Z))) goto err;
1426 felem_assign(pre_comp[i][1][0], x_out);
1427 felem_assign(pre_comp[i][1][1], y_out);
1428 felem_assign(pre_comp[i][1][2], z_out);
1429 for (j = 2; j <= 16; ++j)
1434 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1435 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1436 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1441 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1442 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1448 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1451 /* the scalar for the generator */
1452 if ((scalar != NULL) && (have_pre_comp))
1454 memset(g_secret, 0, sizeof g_secret);
1455 /* reduce scalar to 0 <= scalar < 2^224 */
1456 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar)))
1458 /* this is an unusual input, and we don't guarantee
1459 * constant-timeness */
1460 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1462 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1465 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1468 num_bytes = BN_bn2bin(scalar, tmp);
1469 flip_endian(g_secret, tmp, num_bytes);
1470 /* do the multiplication with generator precomputation*/
1471 batch_mul(x_out, y_out, z_out,
1472 (const felem_bytearray (*)) secrets, num_points,
1474 mixed, (const felem (*)[17][3]) pre_comp,
1478 /* do the multiplication without generator precomputation */
1479 batch_mul(x_out, y_out, z_out,
1480 (const felem_bytearray (*)) secrets, num_points,
1481 NULL, mixed, (const felem (*)[17][3]) pre_comp, NULL);
1482 /* reduce the output to its unique minimal representation */
1483 felem_contract(x_in, x_out);
1484 felem_contract(y_in, y_out);
1485 felem_contract(z_in, z_out);
1486 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1487 (!felem_to_BN(z, z_in)))
1489 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1492 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1496 if (generator != NULL)
1497 EC_POINT_free(generator);
1498 if (new_ctx != NULL)
1499 BN_CTX_free(new_ctx);
1500 if (secrets != NULL)
1501 OPENSSL_free(secrets);
1502 if (pre_comp != NULL)
1503 OPENSSL_free(pre_comp);
1504 if (tmp_felems != NULL)
1505 OPENSSL_free(tmp_felems);
1509 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1512 NISTP224_PRE_COMP *pre = NULL;
1514 BN_CTX *new_ctx = NULL;
1516 EC_POINT *generator = NULL;
1517 felem tmp_felems[32];
1519 /* throw away old precomputation */
1520 EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1521 nistp224_pre_comp_free, nistp224_pre_comp_clear_free);
1523 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1525 if (((x = BN_CTX_get(ctx)) == NULL) ||
1526 ((y = BN_CTX_get(ctx)) == NULL))
1528 /* get the generator */
1529 if (group->generator == NULL) goto err;
1530 generator = EC_POINT_new(group);
1531 if (generator == NULL)
1533 BN_bin2bn(nistp224_curve_params[3], sizeof (felem_bytearray), x);
1534 BN_bin2bn(nistp224_curve_params[4], sizeof (felem_bytearray), y);
1535 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1537 if ((pre = nistp224_pre_comp_new()) == NULL)
1539 /* if the generator is the standard one, use built-in precomputation */
1540 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1542 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1546 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1547 (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1548 (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1550 /* compute 2^56*G, 2^112*G, 2^168*G for the first table,
1551 * 2^28*G, 2^84*G, 2^140*G, 2^196*G for the second one
1553 for (i = 1; i <= 8; i <<= 1)
1556 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
1557 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1558 for (j = 0; j < 27; ++j)
1561 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
1562 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1567 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
1568 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1569 for (j = 0; j < 27; ++j)
1572 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
1573 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
1576 for (i = 0; i < 2; i++)
1578 /* g_pre_comp[i][0] is the point at infinity */
1579 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1580 /* the remaining multiples */
1581 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1583 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1584 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1585 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1586 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1587 pre->g_pre_comp[i][2][2]);
1588 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1590 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1591 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1592 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1593 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1594 pre->g_pre_comp[i][2][2]);
1595 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1597 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1598 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1599 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1600 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1601 pre->g_pre_comp[i][4][2]);
1602 /* 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G */
1604 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1605 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1606 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1607 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1608 pre->g_pre_comp[i][2][2]);
1609 for (j = 1; j < 8; ++j)
1611 /* odd multiples: add G resp. 2^28*G */
1613 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1],
1614 pre->g_pre_comp[i][2*j+1][2], pre->g_pre_comp[i][2*j][0],
1615 pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
1616 0, pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1617 pre->g_pre_comp[i][1][2]);
1620 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1622 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1623 nistp224_pre_comp_free, nistp224_pre_comp_clear_free))
1629 if (generator != NULL)
1630 EC_POINT_free(generator);
1631 if (new_ctx != NULL)
1632 BN_CTX_free(new_ctx);
1634 nistp224_pre_comp_free(pre);
1638 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1640 if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1641 nistp224_pre_comp_free, nistp224_pre_comp_clear_free)
1649 static void *dummy=&dummy;