2 * Written by Emilia Kasper (Google) for the OpenSSL project.
4 /* Copyright 2011 Google Inc.
6 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
21 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
23 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
24 * and Adam Langley's public domain 64-bit C implementation of curve25519
27 #include <openssl/opensslconf.h>
28 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
32 # include <openssl/err.h>
35 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
36 /* even with gcc, the typedef won't work for 32-bit platforms */
37 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
40 # error "Need GCC 3.1 or later to define type uint128_t"
47 /******************************************************************************/
49 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
51 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
52 * using 64-bit coefficients called 'limbs',
53 * and sometimes (for multiplication results) as
54 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
55 * using 128-bit coefficients called 'widelimbs'.
56 * A 4-limb representation is an 'felem';
57 * a 7-widelimb representation is a 'widefelem'.
58 * Even within felems, bits of adjacent limbs overlap, and we don't always
59 * reduce the representations: we ensure that inputs to each felem
60 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
61 * and fit into a 128-bit word without overflow. The coefficients are then
62 * again partially reduced to obtain an felem satisfying a_i < 2^57.
63 * We only reduce to the unique minimal representation at the end of the
67 typedef uint64_t limb;
68 typedef uint128_t widelimb;
70 typedef limb felem[4];
71 typedef widelimb widefelem[7];
74 * Field element represented as a byte arrary. 28*8 = 224 bits is also the
75 * group order size for the elliptic curve, and we also use this type for
76 * scalars for point multiplication.
78 typedef u8 felem_bytearray[28];
80 static const felem_bytearray nistp224_curve_params[5] = {
81 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
82 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
83 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
84 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
85 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
86 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
87 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
88 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
89 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
90 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
91 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
92 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
93 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
94 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
95 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
99 * Precomputed multiples of the standard generator
100 * Points are given in coordinates (X, Y, Z) where Z normally is 1
101 * (0 for the point at infinity).
102 * For each field element, slice a_0 is word 0, etc.
104 * The table has 2 * 16 elements, starting with the following:
105 * index | bits | point
106 * ------+---------+------------------------------
109 * 2 | 0 0 1 0 | 2^56G
110 * 3 | 0 0 1 1 | (2^56 + 1)G
111 * 4 | 0 1 0 0 | 2^112G
112 * 5 | 0 1 0 1 | (2^112 + 1)G
113 * 6 | 0 1 1 0 | (2^112 + 2^56)G
114 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
115 * 8 | 1 0 0 0 | 2^168G
116 * 9 | 1 0 0 1 | (2^168 + 1)G
117 * 10 | 1 0 1 0 | (2^168 + 2^56)G
118 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
119 * 12 | 1 1 0 0 | (2^168 + 2^112)G
120 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
121 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
122 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
123 * followed by a copy of this with each element multiplied by 2^28.
125 * The reason for this is so that we can clock bits into four different
126 * locations when doing simple scalar multiplies against the base point,
127 * and then another four locations using the second 16 elements.
129 static const felem gmul[2][16][3] = {
133 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
134 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
136 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
137 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
139 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
140 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
142 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
143 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
145 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
146 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
148 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
149 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
151 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
152 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
154 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
155 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
157 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
158 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
160 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
161 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
163 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
164 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
166 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
167 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
169 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
170 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
172 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
173 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
175 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
176 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
181 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
182 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
184 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
185 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
187 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
188 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
190 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
191 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
193 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
194 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
196 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
197 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
199 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
200 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
202 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
203 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
205 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
206 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
208 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
209 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
211 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
212 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
214 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
215 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
217 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
218 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
220 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
221 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
223 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
224 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
228 /* Precomputation for the group generator. */
229 struct nistp224_pre_comp_st {
230 felem g_pre_comp[2][16][3];
234 const EC_METHOD *EC_GFp_nistp224_method(void)
236 static const EC_METHOD ret = {
237 EC_FLAGS_DEFAULT_OCT,
238 NID_X9_62_prime_field,
239 ec_GFp_nistp224_group_init,
240 ec_GFp_simple_group_finish,
241 ec_GFp_simple_group_clear_finish,
242 ec_GFp_nist_group_copy,
243 ec_GFp_nistp224_group_set_curve,
244 ec_GFp_simple_group_get_curve,
245 ec_GFp_simple_group_get_degree,
246 ec_GFp_simple_group_check_discriminant,
247 ec_GFp_simple_point_init,
248 ec_GFp_simple_point_finish,
249 ec_GFp_simple_point_clear_finish,
250 ec_GFp_simple_point_copy,
251 ec_GFp_simple_point_set_to_infinity,
252 ec_GFp_simple_set_Jprojective_coordinates_GFp,
253 ec_GFp_simple_get_Jprojective_coordinates_GFp,
254 ec_GFp_simple_point_set_affine_coordinates,
255 ec_GFp_nistp224_point_get_affine_coordinates,
256 0 /* point_set_compressed_coordinates */ ,
261 ec_GFp_simple_invert,
262 ec_GFp_simple_is_at_infinity,
263 ec_GFp_simple_is_on_curve,
265 ec_GFp_simple_make_affine,
266 ec_GFp_simple_points_make_affine,
267 ec_GFp_nistp224_points_mul,
268 ec_GFp_nistp224_precompute_mult,
269 ec_GFp_nistp224_have_precompute_mult,
270 ec_GFp_nist_field_mul,
271 ec_GFp_nist_field_sqr,
273 0 /* field_encode */ ,
274 0 /* field_decode */ ,
275 0 /* field_set_to_one */
282 * Helper functions to convert field elements to/from internal representation
284 static void bin28_to_felem(felem out, const u8 in[28])
286 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
287 out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
288 out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
289 out[3] = (*((const uint64_t *)(in+20))) >> 8;
292 static void felem_to_bin28(u8 out[28], const felem in)
295 for (i = 0; i < 7; ++i) {
296 out[i] = in[0] >> (8 * i);
297 out[i + 7] = in[1] >> (8 * i);
298 out[i + 14] = in[2] >> (8 * i);
299 out[i + 21] = in[3] >> (8 * i);
303 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
304 static void flip_endian(u8 *out, const u8 *in, unsigned len)
307 for (i = 0; i < len; ++i)
308 out[i] = in[len - 1 - i];
311 /* From OpenSSL BIGNUM to internal representation */
312 static int BN_to_felem(felem out, const BIGNUM *bn)
314 felem_bytearray b_in;
315 felem_bytearray b_out;
318 /* BN_bn2bin eats leading zeroes */
319 memset(b_out, 0, sizeof(b_out));
320 num_bytes = BN_num_bytes(bn);
321 if (num_bytes > sizeof b_out) {
322 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
325 if (BN_is_negative(bn)) {
326 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
329 num_bytes = BN_bn2bin(bn, b_in);
330 flip_endian(b_out, b_in, num_bytes);
331 bin28_to_felem(out, b_out);
335 /* From internal representation to OpenSSL BIGNUM */
336 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
338 felem_bytearray b_in, b_out;
339 felem_to_bin28(b_in, in);
340 flip_endian(b_out, b_in, sizeof b_out);
341 return BN_bin2bn(b_out, sizeof b_out, out);
344 /******************************************************************************/
348 * Field operations, using the internal representation of field elements.
349 * NB! These operations are specific to our point multiplication and cannot be
350 * expected to be correct in general - e.g., multiplication with a large scalar
351 * will cause an overflow.
355 static void felem_one(felem out)
363 static void felem_assign(felem out, const felem in)
371 /* Sum two field elements: out += in */
372 static void felem_sum(felem out, const felem in)
380 /* Get negative value: out = -in */
381 /* Assumes in[i] < 2^57 */
382 static void felem_neg(felem out, const felem in)
384 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
385 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
386 static const limb two58m42m2 = (((limb) 1) << 58) -
387 (((limb) 1) << 42) - (((limb) 1) << 2);
389 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
390 out[0] = two58p2 - in[0];
391 out[1] = two58m42m2 - in[1];
392 out[2] = two58m2 - in[2];
393 out[3] = two58m2 - in[3];
396 /* Subtract field elements: out -= in */
397 /* Assumes in[i] < 2^57 */
398 static void felem_diff(felem out, const felem in)
400 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
401 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
402 static const limb two58m42m2 = (((limb) 1) << 58) -
403 (((limb) 1) << 42) - (((limb) 1) << 2);
405 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
407 out[1] += two58m42m2;
417 /* Subtract in unreduced 128-bit mode: out -= in */
418 /* Assumes in[i] < 2^119 */
419 static void widefelem_diff(widefelem out, const widefelem in)
421 static const widelimb two120 = ((widelimb) 1) << 120;
422 static const widelimb two120m64 = (((widelimb) 1) << 120) -
423 (((widelimb) 1) << 64);
424 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
425 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
427 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
432 out[4] += two120m104m64;
445 /* Subtract in mixed mode: out128 -= in64 */
447 static void felem_diff_128_64(widefelem out, const felem in)
449 static const widelimb two64p8 = (((widelimb) 1) << 64) +
450 (((widelimb) 1) << 8);
451 static const widelimb two64m8 = (((widelimb) 1) << 64) -
452 (((widelimb) 1) << 8);
453 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
454 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
456 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
458 out[1] += two64m48m8;
469 * Multiply a field element by a scalar: out = out * scalar The scalars we
470 * actually use are small, so results fit without overflow
472 static void felem_scalar(felem out, const limb scalar)
481 * Multiply an unreduced field element by a scalar: out = out * scalar The
482 * scalars we actually use are small, so results fit without overflow
484 static void widefelem_scalar(widefelem out, const widelimb scalar)
495 /* Square a field element: out = in^2 */
496 static void felem_square(widefelem out, const felem in)
498 limb tmp0, tmp1, tmp2;
502 out[0] = ((widelimb) in[0]) * in[0];
503 out[1] = ((widelimb) in[0]) * tmp1;
504 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
505 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
506 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
507 out[5] = ((widelimb) in[3]) * tmp2;
508 out[6] = ((widelimb) in[3]) * in[3];
511 /* Multiply two field elements: out = in1 * in2 */
512 static void felem_mul(widefelem out, const felem in1, const felem in2)
514 out[0] = ((widelimb) in1[0]) * in2[0];
515 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
516 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
517 ((widelimb) in1[2]) * in2[0];
518 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
519 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
520 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
521 ((widelimb) in1[3]) * in2[1];
522 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
523 out[6] = ((widelimb) in1[3]) * in2[3];
527 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
528 * Requires in[i] < 2^126,
529 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
530 static void felem_reduce(felem out, const widefelem in)
532 static const widelimb two127p15 = (((widelimb) 1) << 127) +
533 (((widelimb) 1) << 15);
534 static const widelimb two127m71 = (((widelimb) 1) << 127) -
535 (((widelimb) 1) << 71);
536 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
537 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
540 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
541 output[0] = in[0] + two127p15;
542 output[1] = in[1] + two127m71m55;
543 output[2] = in[2] + two127m71;
547 /* Eliminate in[4], in[5], in[6] */
548 output[4] += in[6] >> 16;
549 output[3] += (in[6] & 0xffff) << 40;
552 output[3] += in[5] >> 16;
553 output[2] += (in[5] & 0xffff) << 40;
556 output[2] += output[4] >> 16;
557 output[1] += (output[4] & 0xffff) << 40;
558 output[0] -= output[4];
560 /* Carry 2 -> 3 -> 4 */
561 output[3] += output[2] >> 56;
562 output[2] &= 0x00ffffffffffffff;
564 output[4] = output[3] >> 56;
565 output[3] &= 0x00ffffffffffffff;
567 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
569 /* Eliminate output[4] */
570 output[2] += output[4] >> 16;
571 /* output[2] < 2^56 + 2^56 = 2^57 */
572 output[1] += (output[4] & 0xffff) << 40;
573 output[0] -= output[4];
575 /* Carry 0 -> 1 -> 2 -> 3 */
576 output[1] += output[0] >> 56;
577 out[0] = output[0] & 0x00ffffffffffffff;
579 output[2] += output[1] >> 56;
580 /* output[2] < 2^57 + 2^72 */
581 out[1] = output[1] & 0x00ffffffffffffff;
582 output[3] += output[2] >> 56;
583 /* output[3] <= 2^56 + 2^16 */
584 out[2] = output[2] & 0x00ffffffffffffff;
587 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
588 * out[3] <= 2^56 + 2^16 (due to final carry),
594 static void felem_square_reduce(felem out, const felem in)
597 felem_square(tmp, in);
598 felem_reduce(out, tmp);
601 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
604 felem_mul(tmp, in1, in2);
605 felem_reduce(out, tmp);
609 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
610 * call felem_reduce first)
612 static void felem_contract(felem out, const felem in)
614 static const int64_t two56 = ((limb) 1) << 56;
615 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
616 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
622 /* Case 1: a = 1 iff in >= 2^224 */
626 tmp[3] &= 0x00ffffffffffffff;
628 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
629 * and the lower part is non-zero
631 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
632 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
633 a &= 0x00ffffffffffffff;
634 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
636 /* subtract 2^224 - 2^96 + 1 if a is all-one */
637 tmp[3] &= a ^ 0xffffffffffffffff;
638 tmp[2] &= a ^ 0xffffffffffffffff;
639 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
643 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
644 * non-zero, so we only need one step
650 /* carry 1 -> 2 -> 3 */
651 tmp[2] += tmp[1] >> 56;
652 tmp[1] &= 0x00ffffffffffffff;
654 tmp[3] += tmp[2] >> 56;
655 tmp[2] &= 0x00ffffffffffffff;
657 /* Now 0 <= out < p */
665 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
666 * elements are reduced to in < 2^225, so we only need to check three cases:
667 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
669 static limb felem_is_zero(const felem in)
671 limb zero, two224m96p1, two225m97p2;
673 zero = in[0] | in[1] | in[2] | in[3];
674 zero = (((int64_t) (zero) - 1) >> 63) & 1;
675 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
676 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
677 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
678 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
679 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
680 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
681 return (zero | two224m96p1 | two225m97p2);
684 static limb felem_is_zero_int(const felem in)
686 return (int)(felem_is_zero(in) & ((limb) 1));
689 /* Invert a field element */
690 /* Computation chain copied from djb's code */
691 static void felem_inv(felem out, const felem in)
693 felem ftmp, ftmp2, ftmp3, ftmp4;
697 felem_square(tmp, in);
698 felem_reduce(ftmp, tmp); /* 2 */
699 felem_mul(tmp, in, ftmp);
700 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
701 felem_square(tmp, ftmp);
702 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
703 felem_mul(tmp, in, ftmp);
704 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
705 felem_square(tmp, ftmp);
706 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
707 felem_square(tmp, ftmp2);
708 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
709 felem_square(tmp, ftmp2);
710 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
711 felem_mul(tmp, ftmp2, ftmp);
712 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
713 felem_square(tmp, ftmp);
714 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
715 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
716 felem_square(tmp, ftmp2);
717 felem_reduce(ftmp2, tmp);
719 felem_mul(tmp, ftmp2, ftmp);
720 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
721 felem_square(tmp, ftmp2);
722 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
723 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
724 felem_square(tmp, ftmp3);
725 felem_reduce(ftmp3, tmp);
727 felem_mul(tmp, ftmp3, ftmp2);
728 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
729 felem_square(tmp, ftmp2);
730 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
731 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
732 felem_square(tmp, ftmp3);
733 felem_reduce(ftmp3, tmp);
735 felem_mul(tmp, ftmp3, ftmp2);
736 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
737 felem_square(tmp, ftmp3);
738 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
739 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
740 felem_square(tmp, ftmp4);
741 felem_reduce(ftmp4, tmp);
743 felem_mul(tmp, ftmp3, ftmp4);
744 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
745 felem_square(tmp, ftmp3);
746 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
747 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
748 felem_square(tmp, ftmp4);
749 felem_reduce(ftmp4, tmp);
751 felem_mul(tmp, ftmp2, ftmp4);
752 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
753 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
754 felem_square(tmp, ftmp2);
755 felem_reduce(ftmp2, tmp);
757 felem_mul(tmp, ftmp2, ftmp);
758 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
759 felem_square(tmp, ftmp);
760 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
761 felem_mul(tmp, ftmp, in);
762 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
763 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
764 felem_square(tmp, ftmp);
765 felem_reduce(ftmp, tmp);
767 felem_mul(tmp, ftmp, ftmp3);
768 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
772 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
775 static void copy_conditional(felem out, const felem in, limb icopy)
779 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
781 const limb copy = -icopy;
782 for (i = 0; i < 4; ++i) {
783 const limb tmp = copy & (in[i] ^ out[i]);
788 /******************************************************************************/
790 * ELLIPTIC CURVE POINT OPERATIONS
792 * Points are represented in Jacobian projective coordinates:
793 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
794 * or to the point at infinity if Z == 0.
799 * Double an elliptic curve point:
800 * (X', Y', Z') = 2 * (X, Y, Z), where
801 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
802 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
803 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
804 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
805 * while x_out == y_in is not (maybe this works, but it's not tested).
808 point_double(felem x_out, felem y_out, felem z_out,
809 const felem x_in, const felem y_in, const felem z_in)
812 felem delta, gamma, beta, alpha, ftmp, ftmp2;
814 felem_assign(ftmp, x_in);
815 felem_assign(ftmp2, x_in);
818 felem_square(tmp, z_in);
819 felem_reduce(delta, tmp);
822 felem_square(tmp, y_in);
823 felem_reduce(gamma, tmp);
826 felem_mul(tmp, x_in, gamma);
827 felem_reduce(beta, tmp);
829 /* alpha = 3*(x-delta)*(x+delta) */
830 felem_diff(ftmp, delta);
831 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
832 felem_sum(ftmp2, delta);
833 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
834 felem_scalar(ftmp2, 3);
835 /* ftmp2[i] < 3 * 2^58 < 2^60 */
836 felem_mul(tmp, ftmp, ftmp2);
837 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
838 felem_reduce(alpha, tmp);
840 /* x' = alpha^2 - 8*beta */
841 felem_square(tmp, alpha);
842 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
843 felem_assign(ftmp, beta);
844 felem_scalar(ftmp, 8);
845 /* ftmp[i] < 8 * 2^57 = 2^60 */
846 felem_diff_128_64(tmp, ftmp);
847 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
848 felem_reduce(x_out, tmp);
850 /* z' = (y + z)^2 - gamma - delta */
851 felem_sum(delta, gamma);
852 /* delta[i] < 2^57 + 2^57 = 2^58 */
853 felem_assign(ftmp, y_in);
854 felem_sum(ftmp, z_in);
855 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
856 felem_square(tmp, ftmp);
857 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
858 felem_diff_128_64(tmp, delta);
859 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
860 felem_reduce(z_out, tmp);
862 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
863 felem_scalar(beta, 4);
864 /* beta[i] < 4 * 2^57 = 2^59 */
865 felem_diff(beta, x_out);
866 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
867 felem_mul(tmp, alpha, beta);
868 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
869 felem_square(tmp2, gamma);
870 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
871 widefelem_scalar(tmp2, 8);
872 /* tmp2[i] < 8 * 2^116 = 2^119 */
873 widefelem_diff(tmp, tmp2);
874 /* tmp[i] < 2^119 + 2^120 < 2^121 */
875 felem_reduce(y_out, tmp);
879 * Add two elliptic curve points:
880 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
881 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
882 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
883 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
884 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
885 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
887 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
891 * This function is not entirely constant-time: it includes a branch for
892 * checking whether the two input points are equal, (while not equal to the
893 * point at infinity). This case never happens during single point
894 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
896 static void point_add(felem x3, felem y3, felem z3,
897 const felem x1, const felem y1, const felem z1,
898 const int mixed, const felem x2, const felem y2,
901 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
903 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
907 felem_square(tmp, z2);
908 felem_reduce(ftmp2, tmp);
911 felem_mul(tmp, ftmp2, z2);
912 felem_reduce(ftmp4, tmp);
914 /* ftmp4 = z2^3*y1 */
915 felem_mul(tmp2, ftmp4, y1);
916 felem_reduce(ftmp4, tmp2);
918 /* ftmp2 = z2^2*x1 */
919 felem_mul(tmp2, ftmp2, x1);
920 felem_reduce(ftmp2, tmp2);
923 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
926 /* ftmp4 = z2^3*y1 */
927 felem_assign(ftmp4, y1);
929 /* ftmp2 = z2^2*x1 */
930 felem_assign(ftmp2, x1);
934 felem_square(tmp, z1);
935 felem_reduce(ftmp, tmp);
938 felem_mul(tmp, ftmp, z1);
939 felem_reduce(ftmp3, tmp);
942 felem_mul(tmp, ftmp3, y2);
943 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
945 /* ftmp3 = z1^3*y2 - z2^3*y1 */
946 felem_diff_128_64(tmp, ftmp4);
947 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
948 felem_reduce(ftmp3, tmp);
951 felem_mul(tmp, ftmp, x2);
952 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
954 /* ftmp = z1^2*x2 - z2^2*x1 */
955 felem_diff_128_64(tmp, ftmp2);
956 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
957 felem_reduce(ftmp, tmp);
960 * the formulae are incorrect if the points are equal so we check for
961 * this and do doubling if this happens
963 x_equal = felem_is_zero(ftmp);
964 y_equal = felem_is_zero(ftmp3);
965 z1_is_zero = felem_is_zero(z1);
966 z2_is_zero = felem_is_zero(z2);
967 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
968 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
969 point_double(x3, y3, z3, x1, y1, z1);
975 felem_mul(tmp, z1, z2);
976 felem_reduce(ftmp5, tmp);
978 /* special case z2 = 0 is handled later */
979 felem_assign(ftmp5, z1);
982 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
983 felem_mul(tmp, ftmp, ftmp5);
984 felem_reduce(z_out, tmp);
986 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
987 felem_assign(ftmp5, ftmp);
988 felem_square(tmp, ftmp);
989 felem_reduce(ftmp, tmp);
991 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
992 felem_mul(tmp, ftmp, ftmp5);
993 felem_reduce(ftmp5, tmp);
995 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
996 felem_mul(tmp, ftmp2, ftmp);
997 felem_reduce(ftmp2, tmp);
999 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1000 felem_mul(tmp, ftmp4, ftmp5);
1001 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1003 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1004 felem_square(tmp2, ftmp3);
1005 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1007 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1008 felem_diff_128_64(tmp2, ftmp5);
1009 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1011 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1012 felem_assign(ftmp5, ftmp2);
1013 felem_scalar(ftmp5, 2);
1014 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1017 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1018 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1020 felem_diff_128_64(tmp2, ftmp5);
1021 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1022 felem_reduce(x_out, tmp2);
1024 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1025 felem_diff(ftmp2, x_out);
1026 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1029 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1031 felem_mul(tmp2, ftmp3, ftmp2);
1032 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1035 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1036 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1038 widefelem_diff(tmp2, tmp);
1039 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1040 felem_reduce(y_out, tmp2);
1043 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1044 * the point at infinity, so we need to check for this separately
1048 * if point 1 is at infinity, copy point 2 to output, and vice versa
1050 copy_conditional(x_out, x2, z1_is_zero);
1051 copy_conditional(x_out, x1, z2_is_zero);
1052 copy_conditional(y_out, y2, z1_is_zero);
1053 copy_conditional(y_out, y1, z2_is_zero);
1054 copy_conditional(z_out, z2, z1_is_zero);
1055 copy_conditional(z_out, z1, z2_is_zero);
1056 felem_assign(x3, x_out);
1057 felem_assign(y3, y_out);
1058 felem_assign(z3, z_out);
1062 * select_point selects the |idx|th point from a precomputation table and
1064 * The pre_comp array argument should be size of |size| argument
1066 static void select_point(const u64 idx, unsigned int size,
1067 const felem pre_comp[][3], felem out[3])
1070 limb *outlimbs = &out[0][0];
1072 memset(out, 0, sizeof(*out) * 3);
1073 for (i = 0; i < size; i++) {
1074 const limb *inlimbs = &pre_comp[i][0][0];
1081 for (j = 0; j < 4 * 3; j++)
1082 outlimbs[j] |= inlimbs[j] & mask;
1086 /* get_bit returns the |i|th bit in |in| */
1087 static char get_bit(const felem_bytearray in, unsigned i)
1091 return (in[i >> 3] >> (i & 7)) & 1;
1095 * Interleaved point multiplication using precomputed point multiples: The
1096 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1097 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1098 * generator, using certain (large) precomputed multiples in g_pre_comp.
1099 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1101 static void batch_mul(felem x_out, felem y_out, felem z_out,
1102 const felem_bytearray scalars[],
1103 const unsigned num_points, const u8 *g_scalar,
1104 const int mixed, const felem pre_comp[][17][3],
1105 const felem g_pre_comp[2][16][3])
1109 unsigned gen_mul = (g_scalar != NULL);
1110 felem nq[3], tmp[4];
1114 /* set nq to the point at infinity */
1115 memset(nq, 0, sizeof(nq));
1118 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1119 * of the generator (two in each of the last 28 rounds) and additions of
1120 * other points multiples (every 5th round).
1122 skip = 1; /* save two point operations in the first
1124 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1127 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1129 /* add multiples of the generator */
1130 if (gen_mul && (i <= 27)) {
1131 /* first, look 28 bits upwards */
1132 bits = get_bit(g_scalar, i + 196) << 3;
1133 bits |= get_bit(g_scalar, i + 140) << 2;
1134 bits |= get_bit(g_scalar, i + 84) << 1;
1135 bits |= get_bit(g_scalar, i + 28);
1136 /* select the point to add, in constant time */
1137 select_point(bits, 16, g_pre_comp[1], tmp);
1140 /* value 1 below is argument for "mixed" */
1141 point_add(nq[0], nq[1], nq[2],
1142 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1144 memcpy(nq, tmp, 3 * sizeof(felem));
1148 /* second, look at the current position */
1149 bits = get_bit(g_scalar, i + 168) << 3;
1150 bits |= get_bit(g_scalar, i + 112) << 2;
1151 bits |= get_bit(g_scalar, i + 56) << 1;
1152 bits |= get_bit(g_scalar, i);
1153 /* select the point to add, in constant time */
1154 select_point(bits, 16, g_pre_comp[0], tmp);
1155 point_add(nq[0], nq[1], nq[2],
1156 nq[0], nq[1], nq[2],
1157 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1160 /* do other additions every 5 doublings */
1161 if (num_points && (i % 5 == 0)) {
1162 /* loop over all scalars */
1163 for (num = 0; num < num_points; ++num) {
1164 bits = get_bit(scalars[num], i + 4) << 5;
1165 bits |= get_bit(scalars[num], i + 3) << 4;
1166 bits |= get_bit(scalars[num], i + 2) << 3;
1167 bits |= get_bit(scalars[num], i + 1) << 2;
1168 bits |= get_bit(scalars[num], i) << 1;
1169 bits |= get_bit(scalars[num], i - 1);
1170 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1172 /* select the point to add or subtract */
1173 select_point(digit, 17, pre_comp[num], tmp);
1174 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1176 copy_conditional(tmp[1], tmp[3], sign);
1179 point_add(nq[0], nq[1], nq[2],
1180 nq[0], nq[1], nq[2],
1181 mixed, tmp[0], tmp[1], tmp[2]);
1183 memcpy(nq, tmp, 3 * sizeof(felem));
1189 felem_assign(x_out, nq[0]);
1190 felem_assign(y_out, nq[1]);
1191 felem_assign(z_out, nq[2]);
1194 /******************************************************************************/
1196 * FUNCTIONS TO MANAGE PRECOMPUTATION
1199 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1201 NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1204 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1207 ret->references = 1;
1211 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1214 CRYPTO_add(&p->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1218 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1221 || CRYPTO_add(&p->references, -1, CRYPTO_LOCK_EC_PRE_COMP) > 0)
1226 /******************************************************************************/
1228 * OPENSSL EC_METHOD FUNCTIONS
1231 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1234 ret = ec_GFp_simple_group_init(group);
1235 group->a_is_minus3 = 1;
1239 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1240 const BIGNUM *a, const BIGNUM *b,
1244 BN_CTX *new_ctx = NULL;
1245 BIGNUM *curve_p, *curve_a, *curve_b;
1248 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1251 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1252 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1253 ((curve_b = BN_CTX_get(ctx)) == NULL))
1255 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1256 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1257 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1258 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1259 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1260 EC_R_WRONG_CURVE_PARAMETERS);
1263 group->field_mod_func = BN_nist_mod_224;
1264 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1267 BN_CTX_free(new_ctx);
1272 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1275 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1276 const EC_POINT *point,
1277 BIGNUM *x, BIGNUM *y,
1280 felem z1, z2, x_in, y_in, x_out, y_out;
1283 if (EC_POINT_is_at_infinity(group, point)) {
1284 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1285 EC_R_POINT_AT_INFINITY);
1288 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1289 (!BN_to_felem(z1, point->Z)))
1292 felem_square(tmp, z2);
1293 felem_reduce(z1, tmp);
1294 felem_mul(tmp, x_in, z1);
1295 felem_reduce(x_in, tmp);
1296 felem_contract(x_out, x_in);
1298 if (!felem_to_BN(x, x_out)) {
1299 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1304 felem_mul(tmp, z1, z2);
1305 felem_reduce(z1, tmp);
1306 felem_mul(tmp, y_in, z1);
1307 felem_reduce(y_in, tmp);
1308 felem_contract(y_out, y_in);
1310 if (!felem_to_BN(y, y_out)) {
1311 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1319 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1320 felem tmp_felems[ /* num+1 */ ])
1323 * Runs in constant time, unless an input is the point at infinity (which
1324 * normally shouldn't happen).
1326 ec_GFp_nistp_points_make_affine_internal(num,
1330 (void (*)(void *))felem_one,
1331 (int (*)(const void *))
1333 (void (*)(void *, const void *))
1335 (void (*)(void *, const void *))
1336 felem_square_reduce, (void (*)
1343 (void (*)(void *, const void *))
1345 (void (*)(void *, const void *))
1350 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1351 * values Result is stored in r (r can equal one of the inputs).
1353 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1354 const BIGNUM *scalar, size_t num,
1355 const EC_POINT *points[],
1356 const BIGNUM *scalars[], BN_CTX *ctx)
1362 BN_CTX *new_ctx = NULL;
1363 BIGNUM *x, *y, *z, *tmp_scalar;
1364 felem_bytearray g_secret;
1365 felem_bytearray *secrets = NULL;
1366 felem (*pre_comp)[17][3] = NULL;
1367 felem *tmp_felems = NULL;
1368 felem_bytearray tmp;
1370 int have_pre_comp = 0;
1371 size_t num_points = num;
1372 felem x_in, y_in, z_in, x_out, y_out, z_out;
1373 NISTP224_PRE_COMP *pre = NULL;
1374 const felem(*g_pre_comp)[16][3] = NULL;
1375 EC_POINT *generator = NULL;
1376 const EC_POINT *p = NULL;
1377 const BIGNUM *p_scalar = NULL;
1380 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1383 if (((x = BN_CTX_get(ctx)) == NULL) ||
1384 ((y = BN_CTX_get(ctx)) == NULL) ||
1385 ((z = BN_CTX_get(ctx)) == NULL) ||
1386 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1389 if (scalar != NULL) {
1390 pre = group->pre_comp.nistp224;
1392 /* we have precomputation, try to use it */
1393 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1395 /* try to use the standard precomputation */
1396 g_pre_comp = &gmul[0];
1397 generator = EC_POINT_new(group);
1398 if (generator == NULL)
1400 /* get the generator from precomputation */
1401 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1402 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1403 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1404 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1407 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1411 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1412 /* precomputation matches generator */
1416 * we don't have valid precomputation: treat the generator as a
1419 num_points = num_points + 1;
1422 if (num_points > 0) {
1423 if (num_points >= 3) {
1425 * unless we precompute multiples for just one or two points,
1426 * converting those into affine form is time well spent
1430 secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1431 pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1434 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1435 if ((secrets == NULL) || (pre_comp == NULL)
1436 || (mixed && (tmp_felems == NULL))) {
1437 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1442 * we treat NULL scalars as 0, and NULL points as points at infinity,
1443 * i.e., they contribute nothing to the linear combination
1445 for (i = 0; i < num_points; ++i) {
1449 p = EC_GROUP_get0_generator(group);
1452 /* the i^th point */
1455 p_scalar = scalars[i];
1457 if ((p_scalar != NULL) && (p != NULL)) {
1458 /* reduce scalar to 0 <= scalar < 2^224 */
1459 if ((BN_num_bits(p_scalar) > 224)
1460 || (BN_is_negative(p_scalar))) {
1462 * this is an unusual input, and we don't guarantee
1465 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1466 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1469 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1471 num_bytes = BN_bn2bin(p_scalar, tmp);
1472 flip_endian(secrets[i], tmp, num_bytes);
1473 /* precompute multiples */
1474 if ((!BN_to_felem(x_out, p->X)) ||
1475 (!BN_to_felem(y_out, p->Y)) ||
1476 (!BN_to_felem(z_out, p->Z)))
1478 felem_assign(pre_comp[i][1][0], x_out);
1479 felem_assign(pre_comp[i][1][1], y_out);
1480 felem_assign(pre_comp[i][1][2], z_out);
1481 for (j = 2; j <= 16; ++j) {
1483 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1484 pre_comp[i][j][2], pre_comp[i][1][0],
1485 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1486 pre_comp[i][j - 1][0],
1487 pre_comp[i][j - 1][1],
1488 pre_comp[i][j - 1][2]);
1490 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1491 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1492 pre_comp[i][j / 2][1],
1493 pre_comp[i][j / 2][2]);
1499 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1502 /* the scalar for the generator */
1503 if ((scalar != NULL) && (have_pre_comp)) {
1504 memset(g_secret, 0, sizeof(g_secret));
1505 /* reduce scalar to 0 <= scalar < 2^224 */
1506 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1508 * this is an unusual input, and we don't guarantee
1511 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1512 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1515 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1517 num_bytes = BN_bn2bin(scalar, tmp);
1518 flip_endian(g_secret, tmp, num_bytes);
1519 /* do the multiplication with generator precomputation */
1520 batch_mul(x_out, y_out, z_out,
1521 (const felem_bytearray(*))secrets, num_points,
1523 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1525 /* do the multiplication without generator precomputation */
1526 batch_mul(x_out, y_out, z_out,
1527 (const felem_bytearray(*))secrets, num_points,
1528 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1529 /* reduce the output to its unique minimal representation */
1530 felem_contract(x_in, x_out);
1531 felem_contract(y_in, y_out);
1532 felem_contract(z_in, z_out);
1533 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1534 (!felem_to_BN(z, z_in))) {
1535 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1538 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1542 EC_POINT_free(generator);
1543 BN_CTX_free(new_ctx);
1544 OPENSSL_free(secrets);
1545 OPENSSL_free(pre_comp);
1546 OPENSSL_free(tmp_felems);
1550 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1553 NISTP224_PRE_COMP *pre = NULL;
1555 BN_CTX *new_ctx = NULL;
1557 EC_POINT *generator = NULL;
1558 felem tmp_felems[32];
1560 /* throw away old precomputation */
1561 EC_pre_comp_free(group);
1563 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1566 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1568 /* get the generator */
1569 if (group->generator == NULL)
1571 generator = EC_POINT_new(group);
1572 if (generator == NULL)
1574 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1575 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1576 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1578 if ((pre = nistp224_pre_comp_new()) == NULL)
1581 * if the generator is the standard one, use built-in precomputation
1583 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1584 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1587 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1588 (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1589 (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1592 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1593 * 2^140*G, 2^196*G for the second one
1595 for (i = 1; i <= 8; i <<= 1) {
1596 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1597 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1598 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1599 for (j = 0; j < 27; ++j) {
1600 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1601 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1602 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1606 point_double(pre->g_pre_comp[0][2 * i][0],
1607 pre->g_pre_comp[0][2 * i][1],
1608 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1609 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1610 for (j = 0; j < 27; ++j) {
1611 point_double(pre->g_pre_comp[0][2 * i][0],
1612 pre->g_pre_comp[0][2 * i][1],
1613 pre->g_pre_comp[0][2 * i][2],
1614 pre->g_pre_comp[0][2 * i][0],
1615 pre->g_pre_comp[0][2 * i][1],
1616 pre->g_pre_comp[0][2 * i][2]);
1619 for (i = 0; i < 2; i++) {
1620 /* g_pre_comp[i][0] is the point at infinity */
1621 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1622 /* the remaining multiples */
1623 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1624 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1625 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1626 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1627 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1628 pre->g_pre_comp[i][2][2]);
1629 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1630 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1631 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1632 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1633 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1634 pre->g_pre_comp[i][2][2]);
1635 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1636 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1637 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1638 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1639 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1640 pre->g_pre_comp[i][4][2]);
1642 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1644 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1645 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1646 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1647 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1648 pre->g_pre_comp[i][2][2]);
1649 for (j = 1; j < 8; ++j) {
1650 /* odd multiples: add G resp. 2^28*G */
1651 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1652 pre->g_pre_comp[i][2 * j + 1][1],
1653 pre->g_pre_comp[i][2 * j + 1][2],
1654 pre->g_pre_comp[i][2 * j][0],
1655 pre->g_pre_comp[i][2 * j][1],
1656 pre->g_pre_comp[i][2 * j][2], 0,
1657 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1658 pre->g_pre_comp[i][1][2]);
1661 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1664 SETPRECOMP(group, nistp224, pre);
1669 EC_POINT_free(generator);
1670 BN_CTX_free(new_ctx);
1671 EC_nistp224_pre_comp_free(pre);
1675 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1677 return HAVEPRECOMP(group, nistp224);
1681 static void *dummy = &dummy;