1 /* crypto/ec/ecp_nistp224.c */
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
28 #include <openssl/opensslconf.h>
29 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
33 # include <openssl/err.h>
36 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
37 /* even with gcc, the typedef won't work for 32-bit platforms */
38 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
41 # error "Need GCC 3.1 or later to define type uint128_t"
48 /******************************************************************************/
50 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
52 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
53 * using 64-bit coefficients called 'limbs',
54 * and sometimes (for multiplication results) as
55 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
56 * using 128-bit coefficients called 'widelimbs'.
57 * A 4-limb representation is an 'felem';
58 * a 7-widelimb representation is a 'widefelem'.
59 * Even within felems, bits of adjacent limbs overlap, and we don't always
60 * reduce the representations: we ensure that inputs to each felem
61 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
62 * and fit into a 128-bit word without overflow. The coefficients are then
63 * again partially reduced to obtain an felem satisfying a_i < 2^57.
64 * We only reduce to the unique minimal representation at the end of the
68 typedef uint64_t limb;
69 typedef uint128_t widelimb;
71 typedef limb felem[4];
72 typedef widelimb widefelem[7];
75 * Field element represented as a byte arrary. 28*8 = 224 bits is also the
76 * group order size for the elliptic curve, and we also use this type for
77 * scalars for point multiplication.
79 typedef u8 felem_bytearray[28];
81 static const felem_bytearray nistp224_curve_params[5] = {
82 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
83 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
84 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
85 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
86 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
87 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
88 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
89 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
90 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
91 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
92 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
93 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
94 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
95 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
96 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
100 * Precomputed multiples of the standard generator
101 * Points are given in coordinates (X, Y, Z) where Z normally is 1
102 * (0 for the point at infinity).
103 * For each field element, slice a_0 is word 0, etc.
105 * The table has 2 * 16 elements, starting with the following:
106 * index | bits | point
107 * ------+---------+------------------------------
110 * 2 | 0 0 1 0 | 2^56G
111 * 3 | 0 0 1 1 | (2^56 + 1)G
112 * 4 | 0 1 0 0 | 2^112G
113 * 5 | 0 1 0 1 | (2^112 + 1)G
114 * 6 | 0 1 1 0 | (2^112 + 2^56)G
115 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
116 * 8 | 1 0 0 0 | 2^168G
117 * 9 | 1 0 0 1 | (2^168 + 1)G
118 * 10 | 1 0 1 0 | (2^168 + 2^56)G
119 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
120 * 12 | 1 1 0 0 | (2^168 + 2^112)G
121 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
122 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
123 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
124 * followed by a copy of this with each element multiplied by 2^28.
126 * The reason for this is so that we can clock bits into four different
127 * locations when doing simple scalar multiplies against the base point,
128 * and then another four locations using the second 16 elements.
130 static const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
133 {{0x3280d6115c1d21, 0xc1d356c2112234,
134 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
135 {0xd5819985007e34, 0x75a05a07476444,
136 0xfb4c22dfe6cd43, 0xbd376388b5f723},
138 {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
139 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
140 {0x29e0b892dc9c43, 0xece8608436e662,
141 0xdc858f185310d0, 0x9812dd4eb8d321},
143 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
144 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
145 {0xf19f90ed50266d, 0xabf2b4bf65f9df,
146 0x313865468fafec, 0x5cb379ba910a17},
148 {{0x0641966cab26e3, 0x91fb2991fab0a0,
149 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
150 {0x7510407766af5d, 0x84d929610d5450,
151 0x81d77aae82f706, 0x6916f6d4338c5b},
153 {{0xea95ac3b1f15c6, 0x086000905e82d4,
154 0xdd323ae4d1c8b1, 0x932b56be7685a3},
155 {0x9ef93dea25dbbf, 0x41665960f390f0,
156 0xfdec76dbe2a8a7, 0x523e80f019062a},
158 {{0x822fdd26732c73, 0xa01c83531b5d0f,
159 0x363f37347c1ba4, 0xc391b45c84725c},
160 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
161 0xc393da7e222a7f, 0x1efb7890ede244},
163 {{0x4c9e90ca217da1, 0xd11beca79159bb,
164 0xff8d33c2c98b7c, 0x2610b39409f849},
165 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
166 0x966c079b753c89, 0xfe67e4e820b112},
168 {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
169 0x79b7619a3e7c4c, 0x05c73240899b47},
170 {0x9f7f6382c73e3a, 0x18615165c56bda,
171 0x641fab2116fd56, 0x72855882b08394},
173 {{0x0469182f161c09, 0x74a98ca8d00fb5,
174 0xb89da93489a3e0, 0x41c98768fb0c1d},
175 {0xe5ea05fb32da81, 0x3dce9ffbca6855,
176 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
178 {{0xdab22b2333e87f, 0x4430137a5dd2f6,
179 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
180 {0x764a7df0c8fda5, 0x185ba5c3fa2044,
181 0x9281d688bcbe50, 0xc40331df893881},
183 {{0xb89530796f0f60, 0xade92bd26909a3,
184 0x1a0c83fb4884da, 0x1765bf22a5a984},
185 {0x772a9ee75db09e, 0x23bc6c67cec16f,
186 0x4c1edba8b14e2f, 0xe2a215d9611369},
188 {{0x571e509fb5efb3, 0xade88696410552,
189 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
190 {0xff9f51160f4652, 0xb47ce2495a6539,
191 0xa2946c53b582f4, 0x286d2db3ee9a60},
193 {{0x40bbd5081a44af, 0x0995183b13926c,
194 0xbcefba6f47f6d0, 0x215619e9cc0057},
195 {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
196 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
198 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
199 0x1c29819435d2c6, 0xc813132f4c07e9},
200 {0x2891425503b11f, 0x08781030579fea,
201 0xf5426ba5cc9674, 0x1e28ebf18562bc},
203 {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
204 0xff17036691a973, 0xf1aef351497c58},
205 {0xdd1f2d600564ff, 0xdead073b1402db,
206 0x74a684435bd693, 0xeea7471f962558},
211 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
212 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
214 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
215 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
217 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
218 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
220 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
221 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
223 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
224 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
226 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
227 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
229 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
230 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
232 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
233 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
235 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
236 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
238 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
239 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
241 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
242 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
244 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
245 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
247 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
248 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
250 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
251 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
253 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
254 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
258 /* Precomputation for the group generator. */
260 felem g_pre_comp[2][16][3];
264 const EC_METHOD *EC_GFp_nistp224_method(void)
266 static const EC_METHOD ret = {
267 EC_FLAGS_DEFAULT_OCT,
268 NID_X9_62_prime_field,
269 ec_GFp_nistp224_group_init,
270 ec_GFp_simple_group_finish,
271 ec_GFp_simple_group_clear_finish,
272 ec_GFp_nist_group_copy,
273 ec_GFp_nistp224_group_set_curve,
274 ec_GFp_simple_group_get_curve,
275 ec_GFp_simple_group_get_degree,
276 ec_GFp_simple_group_check_discriminant,
277 ec_GFp_simple_point_init,
278 ec_GFp_simple_point_finish,
279 ec_GFp_simple_point_clear_finish,
280 ec_GFp_simple_point_copy,
281 ec_GFp_simple_point_set_to_infinity,
282 ec_GFp_simple_set_Jprojective_coordinates_GFp,
283 ec_GFp_simple_get_Jprojective_coordinates_GFp,
284 ec_GFp_simple_point_set_affine_coordinates,
285 ec_GFp_nistp224_point_get_affine_coordinates,
286 0 /* point_set_compressed_coordinates */ ,
291 ec_GFp_simple_invert,
292 ec_GFp_simple_is_at_infinity,
293 ec_GFp_simple_is_on_curve,
295 ec_GFp_simple_make_affine,
296 ec_GFp_simple_points_make_affine,
297 ec_GFp_nistp224_points_mul,
298 ec_GFp_nistp224_precompute_mult,
299 ec_GFp_nistp224_have_precompute_mult,
300 ec_GFp_nist_field_mul,
301 ec_GFp_nist_field_sqr,
303 0 /* field_encode */ ,
304 0 /* field_decode */ ,
305 0 /* field_set_to_one */
312 * Helper functions to convert field elements to/from internal representation
314 static void bin28_to_felem(felem out, const u8 in[28])
316 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
317 out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
318 out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
319 out[3] = (*((const uint64_t *)(in+20))) >> 8;
322 static void felem_to_bin28(u8 out[28], const felem in)
325 for (i = 0; i < 7; ++i) {
326 out[i] = in[0] >> (8 * i);
327 out[i + 7] = in[1] >> (8 * i);
328 out[i + 14] = in[2] >> (8 * i);
329 out[i + 21] = in[3] >> (8 * i);
333 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
334 static void flip_endian(u8 *out, const u8 *in, unsigned len)
337 for (i = 0; i < len; ++i)
338 out[i] = in[len - 1 - i];
341 /* From OpenSSL BIGNUM to internal representation */
342 static int BN_to_felem(felem out, const BIGNUM *bn)
344 felem_bytearray b_in;
345 felem_bytearray b_out;
348 /* BN_bn2bin eats leading zeroes */
349 memset(b_out, 0, sizeof b_out);
350 num_bytes = BN_num_bytes(bn);
351 if (num_bytes > sizeof b_out) {
352 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
355 if (BN_is_negative(bn)) {
356 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
359 num_bytes = BN_bn2bin(bn, b_in);
360 flip_endian(b_out, b_in, num_bytes);
361 bin28_to_felem(out, b_out);
365 /* From internal representation to OpenSSL BIGNUM */
366 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
368 felem_bytearray b_in, b_out;
369 felem_to_bin28(b_in, in);
370 flip_endian(b_out, b_in, sizeof b_out);
371 return BN_bin2bn(b_out, sizeof b_out, out);
374 /******************************************************************************/
378 * Field operations, using the internal representation of field elements.
379 * NB! These operations are specific to our point multiplication and cannot be
380 * expected to be correct in general - e.g., multiplication with a large scalar
381 * will cause an overflow.
385 static void felem_one(felem out)
393 static void felem_assign(felem out, const felem in)
401 /* Sum two field elements: out += in */
402 static void felem_sum(felem out, const felem in)
410 /* Get negative value: out = -in */
411 /* Assumes in[i] < 2^57 */
412 static void felem_neg(felem out, const felem in)
414 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
415 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
416 static const limb two58m42m2 = (((limb) 1) << 58) -
417 (((limb) 1) << 42) - (((limb) 1) << 2);
419 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
420 out[0] = two58p2 - in[0];
421 out[1] = two58m42m2 - in[1];
422 out[2] = two58m2 - in[2];
423 out[3] = two58m2 - in[3];
426 /* Subtract field elements: out -= in */
427 /* Assumes in[i] < 2^57 */
428 static void felem_diff(felem out, const felem in)
430 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
431 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
432 static const limb two58m42m2 = (((limb) 1) << 58) -
433 (((limb) 1) << 42) - (((limb) 1) << 2);
435 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
437 out[1] += two58m42m2;
447 /* Subtract in unreduced 128-bit mode: out -= in */
448 /* Assumes in[i] < 2^119 */
449 static void widefelem_diff(widefelem out, const widefelem in)
451 static const widelimb two120 = ((widelimb) 1) << 120;
452 static const widelimb two120m64 = (((widelimb) 1) << 120) -
453 (((widelimb) 1) << 64);
454 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
455 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
457 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
462 out[4] += two120m104m64;
475 /* Subtract in mixed mode: out128 -= in64 */
477 static void felem_diff_128_64(widefelem out, const felem in)
479 static const widelimb two64p8 = (((widelimb) 1) << 64) +
480 (((widelimb) 1) << 8);
481 static const widelimb two64m8 = (((widelimb) 1) << 64) -
482 (((widelimb) 1) << 8);
483 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
484 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
486 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
488 out[1] += two64m48m8;
499 * Multiply a field element by a scalar: out = out * scalar The scalars we
500 * actually use are small, so results fit without overflow
502 static void felem_scalar(felem out, const limb scalar)
511 * Multiply an unreduced field element by a scalar: out = out * scalar The
512 * scalars we actually use are small, so results fit without overflow
514 static void widefelem_scalar(widefelem out, const widelimb scalar)
525 /* Square a field element: out = in^2 */
526 static void felem_square(widefelem out, const felem in)
528 limb tmp0, tmp1, tmp2;
532 out[0] = ((widelimb) in[0]) * in[0];
533 out[1] = ((widelimb) in[0]) * tmp1;
534 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
535 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
536 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
537 out[5] = ((widelimb) in[3]) * tmp2;
538 out[6] = ((widelimb) in[3]) * in[3];
541 /* Multiply two field elements: out = in1 * in2 */
542 static void felem_mul(widefelem out, const felem in1, const felem in2)
544 out[0] = ((widelimb) in1[0]) * in2[0];
545 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
546 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
547 ((widelimb) in1[2]) * in2[0];
548 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
549 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
550 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
551 ((widelimb) in1[3]) * in2[1];
552 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
553 out[6] = ((widelimb) in1[3]) * in2[3];
557 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
558 * Requires in[i] < 2^126,
559 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
560 static void felem_reduce(felem out, const widefelem in)
562 static const widelimb two127p15 = (((widelimb) 1) << 127) +
563 (((widelimb) 1) << 15);
564 static const widelimb two127m71 = (((widelimb) 1) << 127) -
565 (((widelimb) 1) << 71);
566 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
567 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
570 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
571 output[0] = in[0] + two127p15;
572 output[1] = in[1] + two127m71m55;
573 output[2] = in[2] + two127m71;
577 /* Eliminate in[4], in[5], in[6] */
578 output[4] += in[6] >> 16;
579 output[3] += (in[6] & 0xffff) << 40;
582 output[3] += in[5] >> 16;
583 output[2] += (in[5] & 0xffff) << 40;
586 output[2] += output[4] >> 16;
587 output[1] += (output[4] & 0xffff) << 40;
588 output[0] -= output[4];
590 /* Carry 2 -> 3 -> 4 */
591 output[3] += output[2] >> 56;
592 output[2] &= 0x00ffffffffffffff;
594 output[4] = output[3] >> 56;
595 output[3] &= 0x00ffffffffffffff;
597 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
599 /* Eliminate output[4] */
600 output[2] += output[4] >> 16;
601 /* output[2] < 2^56 + 2^56 = 2^57 */
602 output[1] += (output[4] & 0xffff) << 40;
603 output[0] -= output[4];
605 /* Carry 0 -> 1 -> 2 -> 3 */
606 output[1] += output[0] >> 56;
607 out[0] = output[0] & 0x00ffffffffffffff;
609 output[2] += output[1] >> 56;
610 /* output[2] < 2^57 + 2^72 */
611 out[1] = output[1] & 0x00ffffffffffffff;
612 output[3] += output[2] >> 56;
613 /* output[3] <= 2^56 + 2^16 */
614 out[2] = output[2] & 0x00ffffffffffffff;
617 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
618 * out[3] <= 2^56 + 2^16 (due to final carry),
624 static void felem_square_reduce(felem out, const felem in)
627 felem_square(tmp, in);
628 felem_reduce(out, tmp);
631 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
634 felem_mul(tmp, in1, in2);
635 felem_reduce(out, tmp);
639 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
640 * call felem_reduce first)
642 static void felem_contract(felem out, const felem in)
644 static const int64_t two56 = ((limb) 1) << 56;
645 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
646 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
652 /* Case 1: a = 1 iff in >= 2^224 */
656 tmp[3] &= 0x00ffffffffffffff;
658 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
659 * and the lower part is non-zero
661 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
662 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
663 a &= 0x00ffffffffffffff;
664 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
666 /* subtract 2^224 - 2^96 + 1 if a is all-one */
667 tmp[3] &= a ^ 0xffffffffffffffff;
668 tmp[2] &= a ^ 0xffffffffffffffff;
669 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
673 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
674 * non-zero, so we only need one step
680 /* carry 1 -> 2 -> 3 */
681 tmp[2] += tmp[1] >> 56;
682 tmp[1] &= 0x00ffffffffffffff;
684 tmp[3] += tmp[2] >> 56;
685 tmp[2] &= 0x00ffffffffffffff;
687 /* Now 0 <= out < p */
695 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
696 * elements are reduced to in < 2^225, so we only need to check three cases:
697 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
699 static limb felem_is_zero(const felem in)
701 limb zero, two224m96p1, two225m97p2;
703 zero = in[0] | in[1] | in[2] | in[3];
704 zero = (((int64_t) (zero) - 1) >> 63) & 1;
705 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
706 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
707 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
708 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
709 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
710 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
711 return (zero | two224m96p1 | two225m97p2);
714 static limb felem_is_zero_int(const felem in)
716 return (int)(felem_is_zero(in) & ((limb) 1));
719 /* Invert a field element */
720 /* Computation chain copied from djb's code */
721 static void felem_inv(felem out, const felem in)
723 felem ftmp, ftmp2, ftmp3, ftmp4;
727 felem_square(tmp, in);
728 felem_reduce(ftmp, tmp); /* 2 */
729 felem_mul(tmp, in, ftmp);
730 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
731 felem_square(tmp, ftmp);
732 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
733 felem_mul(tmp, in, ftmp);
734 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
735 felem_square(tmp, ftmp);
736 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
737 felem_square(tmp, ftmp2);
738 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
739 felem_square(tmp, ftmp2);
740 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
741 felem_mul(tmp, ftmp2, ftmp);
742 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
743 felem_square(tmp, ftmp);
744 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
745 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
746 felem_square(tmp, ftmp2);
747 felem_reduce(ftmp2, tmp);
749 felem_mul(tmp, ftmp2, ftmp);
750 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
751 felem_square(tmp, ftmp2);
752 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
753 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
754 felem_square(tmp, ftmp3);
755 felem_reduce(ftmp3, tmp);
757 felem_mul(tmp, ftmp3, ftmp2);
758 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
759 felem_square(tmp, ftmp2);
760 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
761 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
762 felem_square(tmp, ftmp3);
763 felem_reduce(ftmp3, tmp);
765 felem_mul(tmp, ftmp3, ftmp2);
766 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
767 felem_square(tmp, ftmp3);
768 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
769 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
770 felem_square(tmp, ftmp4);
771 felem_reduce(ftmp4, tmp);
773 felem_mul(tmp, ftmp3, ftmp4);
774 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
775 felem_square(tmp, ftmp3);
776 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
777 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
778 felem_square(tmp, ftmp4);
779 felem_reduce(ftmp4, tmp);
781 felem_mul(tmp, ftmp2, ftmp4);
782 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
783 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
784 felem_square(tmp, ftmp2);
785 felem_reduce(ftmp2, tmp);
787 felem_mul(tmp, ftmp2, ftmp);
788 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
789 felem_square(tmp, ftmp);
790 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
791 felem_mul(tmp, ftmp, in);
792 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
793 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
794 felem_square(tmp, ftmp);
795 felem_reduce(ftmp, tmp);
797 felem_mul(tmp, ftmp, ftmp3);
798 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
802 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
805 static void copy_conditional(felem out, const felem in, limb icopy)
809 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
811 const limb copy = -icopy;
812 for (i = 0; i < 4; ++i) {
813 const limb tmp = copy & (in[i] ^ out[i]);
818 /******************************************************************************/
820 * ELLIPTIC CURVE POINT OPERATIONS
822 * Points are represented in Jacobian projective coordinates:
823 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
824 * or to the point at infinity if Z == 0.
829 * Double an elliptic curve point:
830 * (X', Y', Z') = 2 * (X, Y, Z), where
831 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
832 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
833 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
834 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
835 * while x_out == y_in is not (maybe this works, but it's not tested).
838 point_double(felem x_out, felem y_out, felem z_out,
839 const felem x_in, const felem y_in, const felem z_in)
842 felem delta, gamma, beta, alpha, ftmp, ftmp2;
844 felem_assign(ftmp, x_in);
845 felem_assign(ftmp2, x_in);
848 felem_square(tmp, z_in);
849 felem_reduce(delta, tmp);
852 felem_square(tmp, y_in);
853 felem_reduce(gamma, tmp);
856 felem_mul(tmp, x_in, gamma);
857 felem_reduce(beta, tmp);
859 /* alpha = 3*(x-delta)*(x+delta) */
860 felem_diff(ftmp, delta);
861 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
862 felem_sum(ftmp2, delta);
863 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
864 felem_scalar(ftmp2, 3);
865 /* ftmp2[i] < 3 * 2^58 < 2^60 */
866 felem_mul(tmp, ftmp, ftmp2);
867 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
868 felem_reduce(alpha, tmp);
870 /* x' = alpha^2 - 8*beta */
871 felem_square(tmp, alpha);
872 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
873 felem_assign(ftmp, beta);
874 felem_scalar(ftmp, 8);
875 /* ftmp[i] < 8 * 2^57 = 2^60 */
876 felem_diff_128_64(tmp, ftmp);
877 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
878 felem_reduce(x_out, tmp);
880 /* z' = (y + z)^2 - gamma - delta */
881 felem_sum(delta, gamma);
882 /* delta[i] < 2^57 + 2^57 = 2^58 */
883 felem_assign(ftmp, y_in);
884 felem_sum(ftmp, z_in);
885 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
886 felem_square(tmp, ftmp);
887 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
888 felem_diff_128_64(tmp, delta);
889 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
890 felem_reduce(z_out, tmp);
892 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
893 felem_scalar(beta, 4);
894 /* beta[i] < 4 * 2^57 = 2^59 */
895 felem_diff(beta, x_out);
896 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
897 felem_mul(tmp, alpha, beta);
898 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
899 felem_square(tmp2, gamma);
900 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
901 widefelem_scalar(tmp2, 8);
902 /* tmp2[i] < 8 * 2^116 = 2^119 */
903 widefelem_diff(tmp, tmp2);
904 /* tmp[i] < 2^119 + 2^120 < 2^121 */
905 felem_reduce(y_out, tmp);
909 * Add two elliptic curve points:
910 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
911 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
912 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
913 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
914 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
915 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
917 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
921 * This function is not entirely constant-time: it includes a branch for
922 * checking whether the two input points are equal, (while not equal to the
923 * point at infinity). This case never happens during single point
924 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
926 static void point_add(felem x3, felem y3, felem z3,
927 const felem x1, const felem y1, const felem z1,
928 const int mixed, const felem x2, const felem y2,
931 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
933 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
937 felem_square(tmp, z2);
938 felem_reduce(ftmp2, tmp);
941 felem_mul(tmp, ftmp2, z2);
942 felem_reduce(ftmp4, tmp);
944 /* ftmp4 = z2^3*y1 */
945 felem_mul(tmp2, ftmp4, y1);
946 felem_reduce(ftmp4, tmp2);
948 /* ftmp2 = z2^2*x1 */
949 felem_mul(tmp2, ftmp2, x1);
950 felem_reduce(ftmp2, tmp2);
953 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
956 /* ftmp4 = z2^3*y1 */
957 felem_assign(ftmp4, y1);
959 /* ftmp2 = z2^2*x1 */
960 felem_assign(ftmp2, x1);
964 felem_square(tmp, z1);
965 felem_reduce(ftmp, tmp);
968 felem_mul(tmp, ftmp, z1);
969 felem_reduce(ftmp3, tmp);
972 felem_mul(tmp, ftmp3, y2);
973 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
975 /* ftmp3 = z1^3*y2 - z2^3*y1 */
976 felem_diff_128_64(tmp, ftmp4);
977 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
978 felem_reduce(ftmp3, tmp);
981 felem_mul(tmp, ftmp, x2);
982 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
984 /* ftmp = z1^2*x2 - z2^2*x1 */
985 felem_diff_128_64(tmp, ftmp2);
986 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
987 felem_reduce(ftmp, tmp);
990 * the formulae are incorrect if the points are equal so we check for
991 * this and do doubling if this happens
993 x_equal = felem_is_zero(ftmp);
994 y_equal = felem_is_zero(ftmp3);
995 z1_is_zero = felem_is_zero(z1);
996 z2_is_zero = felem_is_zero(z2);
997 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
998 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
999 point_double(x3, y3, z3, x1, y1, z1);
1005 felem_mul(tmp, z1, z2);
1006 felem_reduce(ftmp5, tmp);
1008 /* special case z2 = 0 is handled later */
1009 felem_assign(ftmp5, z1);
1012 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1013 felem_mul(tmp, ftmp, ftmp5);
1014 felem_reduce(z_out, tmp);
1016 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1017 felem_assign(ftmp5, ftmp);
1018 felem_square(tmp, ftmp);
1019 felem_reduce(ftmp, tmp);
1021 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1022 felem_mul(tmp, ftmp, ftmp5);
1023 felem_reduce(ftmp5, tmp);
1025 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1026 felem_mul(tmp, ftmp2, ftmp);
1027 felem_reduce(ftmp2, tmp);
1029 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1030 felem_mul(tmp, ftmp4, ftmp5);
1031 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1033 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1034 felem_square(tmp2, ftmp3);
1035 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1037 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1038 felem_diff_128_64(tmp2, ftmp5);
1039 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1041 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1042 felem_assign(ftmp5, ftmp2);
1043 felem_scalar(ftmp5, 2);
1044 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1047 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1048 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1050 felem_diff_128_64(tmp2, ftmp5);
1051 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1052 felem_reduce(x_out, tmp2);
1054 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1055 felem_diff(ftmp2, x_out);
1056 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1059 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1061 felem_mul(tmp2, ftmp3, ftmp2);
1062 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1065 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1066 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1068 widefelem_diff(tmp2, tmp);
1069 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1070 felem_reduce(y_out, tmp2);
1073 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1074 * the point at infinity, so we need to check for this separately
1078 * if point 1 is at infinity, copy point 2 to output, and vice versa
1080 copy_conditional(x_out, x2, z1_is_zero);
1081 copy_conditional(x_out, x1, z2_is_zero);
1082 copy_conditional(y_out, y2, z1_is_zero);
1083 copy_conditional(y_out, y1, z2_is_zero);
1084 copy_conditional(z_out, z2, z1_is_zero);
1085 copy_conditional(z_out, z1, z2_is_zero);
1086 felem_assign(x3, x_out);
1087 felem_assign(y3, y_out);
1088 felem_assign(z3, z_out);
1092 * select_point selects the |idx|th point from a precomputation table and
1094 * The pre_comp array argument should be size of |size| argument
1096 static void select_point(const u64 idx, unsigned int size,
1097 const felem pre_comp[][3], felem out[3])
1100 limb *outlimbs = &out[0][0];
1101 memset(outlimbs, 0, 3 * sizeof(felem));
1103 for (i = 0; i < size; i++) {
1104 const limb *inlimbs = &pre_comp[i][0][0];
1111 for (j = 0; j < 4 * 3; j++)
1112 outlimbs[j] |= inlimbs[j] & mask;
1116 /* get_bit returns the |i|th bit in |in| */
1117 static char get_bit(const felem_bytearray in, unsigned i)
1121 return (in[i >> 3] >> (i & 7)) & 1;
1125 * Interleaved point multiplication using precomputed point multiples: The
1126 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1127 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1128 * generator, using certain (large) precomputed multiples in g_pre_comp.
1129 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1131 static void batch_mul(felem x_out, felem y_out, felem z_out,
1132 const felem_bytearray scalars[],
1133 const unsigned num_points, const u8 *g_scalar,
1134 const int mixed, const felem pre_comp[][17][3],
1135 const felem g_pre_comp[2][16][3])
1139 unsigned gen_mul = (g_scalar != NULL);
1140 felem nq[3], tmp[4];
1144 /* set nq to the point at infinity */
1145 memset(nq, 0, 3 * sizeof(felem));
1148 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1149 * of the generator (two in each of the last 28 rounds) and additions of
1150 * other points multiples (every 5th round).
1152 skip = 1; /* save two point operations in the first
1154 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1157 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1159 /* add multiples of the generator */
1160 if (gen_mul && (i <= 27)) {
1161 /* first, look 28 bits upwards */
1162 bits = get_bit(g_scalar, i + 196) << 3;
1163 bits |= get_bit(g_scalar, i + 140) << 2;
1164 bits |= get_bit(g_scalar, i + 84) << 1;
1165 bits |= get_bit(g_scalar, i + 28);
1166 /* select the point to add, in constant time */
1167 select_point(bits, 16, g_pre_comp[1], tmp);
1170 /* value 1 below is argument for "mixed" */
1171 point_add(nq[0], nq[1], nq[2],
1172 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1174 memcpy(nq, tmp, 3 * sizeof(felem));
1178 /* second, look at the current position */
1179 bits = get_bit(g_scalar, i + 168) << 3;
1180 bits |= get_bit(g_scalar, i + 112) << 2;
1181 bits |= get_bit(g_scalar, i + 56) << 1;
1182 bits |= get_bit(g_scalar, i);
1183 /* select the point to add, in constant time */
1184 select_point(bits, 16, g_pre_comp[0], tmp);
1185 point_add(nq[0], nq[1], nq[2],
1186 nq[0], nq[1], nq[2],
1187 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1190 /* do other additions every 5 doublings */
1191 if (num_points && (i % 5 == 0)) {
1192 /* loop over all scalars */
1193 for (num = 0; num < num_points; ++num) {
1194 bits = get_bit(scalars[num], i + 4) << 5;
1195 bits |= get_bit(scalars[num], i + 3) << 4;
1196 bits |= get_bit(scalars[num], i + 2) << 3;
1197 bits |= get_bit(scalars[num], i + 1) << 2;
1198 bits |= get_bit(scalars[num], i) << 1;
1199 bits |= get_bit(scalars[num], i - 1);
1200 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1202 /* select the point to add or subtract */
1203 select_point(digit, 17, pre_comp[num], tmp);
1204 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1206 copy_conditional(tmp[1], tmp[3], sign);
1209 point_add(nq[0], nq[1], nq[2],
1210 nq[0], nq[1], nq[2],
1211 mixed, tmp[0], tmp[1], tmp[2]);
1213 memcpy(nq, tmp, 3 * sizeof(felem));
1219 felem_assign(x_out, nq[0]);
1220 felem_assign(y_out, nq[1]);
1221 felem_assign(z_out, nq[2]);
1224 /******************************************************************************/
1226 * FUNCTIONS TO MANAGE PRECOMPUTATION
1229 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1231 NISTP224_PRE_COMP *ret = NULL;
1232 ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1234 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1237 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1238 ret->references = 1;
1242 static void *nistp224_pre_comp_dup(void *src_)
1244 NISTP224_PRE_COMP *src = src_;
1246 /* no need to actually copy, these objects never change! */
1247 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1252 static void nistp224_pre_comp_free(void *pre_)
1255 NISTP224_PRE_COMP *pre = pre_;
1260 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1267 static void nistp224_pre_comp_clear_free(void *pre_)
1270 NISTP224_PRE_COMP *pre = pre_;
1275 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1279 OPENSSL_cleanse(pre, sizeof *pre);
1283 /******************************************************************************/
1285 * OPENSSL EC_METHOD FUNCTIONS
1288 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1291 ret = ec_GFp_simple_group_init(group);
1292 group->a_is_minus3 = 1;
1296 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1297 const BIGNUM *a, const BIGNUM *b,
1301 BN_CTX *new_ctx = NULL;
1302 BIGNUM *curve_p, *curve_a, *curve_b;
1305 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1308 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1309 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1310 ((curve_b = BN_CTX_get(ctx)) == NULL))
1312 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1313 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1314 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1315 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1316 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1317 EC_R_WRONG_CURVE_PARAMETERS);
1320 group->field_mod_func = BN_nist_mod_224;
1321 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1324 if (new_ctx != NULL)
1325 BN_CTX_free(new_ctx);
1330 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1333 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1334 const EC_POINT *point,
1335 BIGNUM *x, BIGNUM *y,
1338 felem z1, z2, x_in, y_in, x_out, y_out;
1341 if (EC_POINT_is_at_infinity(group, point)) {
1342 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1343 EC_R_POINT_AT_INFINITY);
1346 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1347 (!BN_to_felem(z1, &point->Z)))
1350 felem_square(tmp, z2);
1351 felem_reduce(z1, tmp);
1352 felem_mul(tmp, x_in, z1);
1353 felem_reduce(x_in, tmp);
1354 felem_contract(x_out, x_in);
1356 if (!felem_to_BN(x, x_out)) {
1357 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1362 felem_mul(tmp, z1, z2);
1363 felem_reduce(z1, tmp);
1364 felem_mul(tmp, y_in, z1);
1365 felem_reduce(y_in, tmp);
1366 felem_contract(y_out, y_in);
1368 if (!felem_to_BN(y, y_out)) {
1369 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1377 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1378 felem tmp_felems[ /* num+1 */ ])
1381 * Runs in constant time, unless an input is the point at infinity (which
1382 * normally shouldn't happen).
1384 ec_GFp_nistp_points_make_affine_internal(num,
1388 (void (*)(void *))felem_one,
1389 (int (*)(const void *))
1391 (void (*)(void *, const void *))
1393 (void (*)(void *, const void *))
1394 felem_square_reduce, (void (*)
1401 (void (*)(void *, const void *))
1403 (void (*)(void *, const void *))
1408 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1409 * values Result is stored in r (r can equal one of the inputs).
1411 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1412 const BIGNUM *scalar, size_t num,
1413 const EC_POINT *points[],
1414 const BIGNUM *scalars[], BN_CTX *ctx)
1420 BN_CTX *new_ctx = NULL;
1421 BIGNUM *x, *y, *z, *tmp_scalar;
1422 felem_bytearray g_secret;
1423 felem_bytearray *secrets = NULL;
1424 felem(*pre_comp)[17][3] = NULL;
1425 felem *tmp_felems = NULL;
1426 felem_bytearray tmp;
1428 int have_pre_comp = 0;
1429 size_t num_points = num;
1430 felem x_in, y_in, z_in, x_out, y_out, z_out;
1431 NISTP224_PRE_COMP *pre = NULL;
1432 const felem(*g_pre_comp)[16][3] = NULL;
1433 EC_POINT *generator = NULL;
1434 const EC_POINT *p = NULL;
1435 const BIGNUM *p_scalar = NULL;
1438 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1441 if (((x = BN_CTX_get(ctx)) == NULL) ||
1442 ((y = BN_CTX_get(ctx)) == NULL) ||
1443 ((z = BN_CTX_get(ctx)) == NULL) ||
1444 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1447 if (scalar != NULL) {
1448 pre = EC_EX_DATA_get_data(group->extra_data,
1449 nistp224_pre_comp_dup,
1450 nistp224_pre_comp_free,
1451 nistp224_pre_comp_clear_free);
1453 /* we have precomputation, try to use it */
1454 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1456 /* try to use the standard precomputation */
1457 g_pre_comp = &gmul[0];
1458 generator = EC_POINT_new(group);
1459 if (generator == NULL)
1461 /* get the generator from precomputation */
1462 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1463 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1464 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1465 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1468 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1472 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1473 /* precomputation matches generator */
1477 * we don't have valid precomputation: treat the generator as a
1480 num_points = num_points + 1;
1483 if (num_points > 0) {
1484 if (num_points >= 3) {
1486 * unless we precompute multiples for just one or two points,
1487 * converting those into affine form is time well spent
1491 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1492 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1495 OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1496 if ((secrets == NULL) || (pre_comp == NULL)
1497 || (mixed && (tmp_felems == NULL))) {
1498 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1503 * we treat NULL scalars as 0, and NULL points as points at infinity,
1504 * i.e., they contribute nothing to the linear combination
1506 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1507 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1508 for (i = 0; i < num_points; ++i) {
1512 p = EC_GROUP_get0_generator(group);
1515 /* the i^th point */
1518 p_scalar = scalars[i];
1520 if ((p_scalar != NULL) && (p != NULL)) {
1521 /* reduce scalar to 0 <= scalar < 2^224 */
1522 if ((BN_num_bits(p_scalar) > 224)
1523 || (BN_is_negative(p_scalar))) {
1525 * this is an unusual input, and we don't guarantee
1528 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1529 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1532 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1534 num_bytes = BN_bn2bin(p_scalar, tmp);
1535 flip_endian(secrets[i], tmp, num_bytes);
1536 /* precompute multiples */
1537 if ((!BN_to_felem(x_out, &p->X)) ||
1538 (!BN_to_felem(y_out, &p->Y)) ||
1539 (!BN_to_felem(z_out, &p->Z)))
1541 felem_assign(pre_comp[i][1][0], x_out);
1542 felem_assign(pre_comp[i][1][1], y_out);
1543 felem_assign(pre_comp[i][1][2], z_out);
1544 for (j = 2; j <= 16; ++j) {
1546 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1547 pre_comp[i][j][2], pre_comp[i][1][0],
1548 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1549 pre_comp[i][j - 1][0],
1550 pre_comp[i][j - 1][1],
1551 pre_comp[i][j - 1][2]);
1553 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1554 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1555 pre_comp[i][j / 2][1],
1556 pre_comp[i][j / 2][2]);
1562 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1565 /* the scalar for the generator */
1566 if ((scalar != NULL) && (have_pre_comp)) {
1567 memset(g_secret, 0, sizeof g_secret);
1568 /* reduce scalar to 0 <= scalar < 2^224 */
1569 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1571 * this is an unusual input, and we don't guarantee
1574 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1575 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1578 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1580 num_bytes = BN_bn2bin(scalar, tmp);
1581 flip_endian(g_secret, tmp, num_bytes);
1582 /* do the multiplication with generator precomputation */
1583 batch_mul(x_out, y_out, z_out,
1584 (const felem_bytearray(*))secrets, num_points,
1586 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1588 /* do the multiplication without generator precomputation */
1589 batch_mul(x_out, y_out, z_out,
1590 (const felem_bytearray(*))secrets, num_points,
1591 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1592 /* reduce the output to its unique minimal representation */
1593 felem_contract(x_in, x_out);
1594 felem_contract(y_in, y_out);
1595 felem_contract(z_in, z_out);
1596 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1597 (!felem_to_BN(z, z_in))) {
1598 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1601 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1605 if (generator != NULL)
1606 EC_POINT_free(generator);
1607 if (new_ctx != NULL)
1608 BN_CTX_free(new_ctx);
1609 if (secrets != NULL)
1610 OPENSSL_free(secrets);
1611 if (pre_comp != NULL)
1612 OPENSSL_free(pre_comp);
1613 if (tmp_felems != NULL)
1614 OPENSSL_free(tmp_felems);
1618 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1621 NISTP224_PRE_COMP *pre = NULL;
1623 BN_CTX *new_ctx = NULL;
1625 EC_POINT *generator = NULL;
1626 felem tmp_felems[32];
1628 /* throw away old precomputation */
1629 EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1630 nistp224_pre_comp_free,
1631 nistp224_pre_comp_clear_free);
1633 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1636 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1638 /* get the generator */
1639 if (group->generator == NULL)
1641 generator = EC_POINT_new(group);
1642 if (generator == NULL)
1644 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1645 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1646 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1648 if ((pre = nistp224_pre_comp_new()) == NULL)
1651 * if the generator is the standard one, use built-in precomputation
1653 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1654 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1658 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1659 (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1660 (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1663 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1664 * 2^140*G, 2^196*G for the second one
1666 for (i = 1; i <= 8; i <<= 1) {
1667 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1668 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1669 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1670 for (j = 0; j < 27; ++j) {
1671 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1672 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1673 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1677 point_double(pre->g_pre_comp[0][2 * i][0],
1678 pre->g_pre_comp[0][2 * i][1],
1679 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1680 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1681 for (j = 0; j < 27; ++j) {
1682 point_double(pre->g_pre_comp[0][2 * i][0],
1683 pre->g_pre_comp[0][2 * i][1],
1684 pre->g_pre_comp[0][2 * i][2],
1685 pre->g_pre_comp[0][2 * i][0],
1686 pre->g_pre_comp[0][2 * i][1],
1687 pre->g_pre_comp[0][2 * i][2]);
1690 for (i = 0; i < 2; i++) {
1691 /* g_pre_comp[i][0] is the point at infinity */
1692 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1693 /* the remaining multiples */
1694 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1695 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1696 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1697 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1698 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1699 pre->g_pre_comp[i][2][2]);
1700 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1701 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1702 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1703 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1704 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1705 pre->g_pre_comp[i][2][2]);
1706 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1707 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1708 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1709 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1710 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1711 pre->g_pre_comp[i][4][2]);
1713 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1715 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1716 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1717 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1718 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1719 pre->g_pre_comp[i][2][2]);
1720 for (j = 1; j < 8; ++j) {
1721 /* odd multiples: add G resp. 2^28*G */
1722 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1723 pre->g_pre_comp[i][2 * j + 1][1],
1724 pre->g_pre_comp[i][2 * j + 1][2],
1725 pre->g_pre_comp[i][2 * j][0],
1726 pre->g_pre_comp[i][2 * j][1],
1727 pre->g_pre_comp[i][2 * j][2], 0,
1728 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1729 pre->g_pre_comp[i][1][2]);
1732 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1734 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1735 nistp224_pre_comp_free,
1736 nistp224_pre_comp_clear_free))
1742 if (generator != NULL)
1743 EC_POINT_free(generator);
1744 if (new_ctx != NULL)
1745 BN_CTX_free(new_ctx);
1747 nistp224_pre_comp_free(pre);
1751 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1753 if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1754 nistp224_pre_comp_free,
1755 nistp224_pre_comp_clear_free)
1763 static void *dummy = &dummy;