1 /* crypto/ec/ecp_nistp224.c */
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
28 #include <openssl/opensslconf.h>
29 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
31 #ifndef OPENSSL_SYS_VMS
38 #include <openssl/err.h>
41 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
42 /* even with gcc, the typedef won't work for 32-bit platforms */
43 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
45 #error "Need GCC 3.1 or later to define type uint128_t"
53 /******************************************************************************/
54 /* INTERNAL REPRESENTATION OF FIELD ELEMENTS
56 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
57 * using 64-bit coefficients called 'limbs',
58 * and sometimes (for multiplication results) as
59 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
60 * using 128-bit coefficients called 'widelimbs'.
61 * A 4-limb representation is an 'felem';
62 * a 7-widelimb representation is a 'widefelem'.
63 * Even within felems, bits of adjacent limbs overlap, and we don't always
64 * reduce the representations: we ensure that inputs to each felem
65 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
66 * and fit into a 128-bit word without overflow. The coefficients are then
67 * again partially reduced to obtain an felem satisfying a_i < 2^57.
68 * We only reduce to the unique minimal representation at the end of the
72 typedef uint64_t limb;
73 typedef uint128_t widelimb;
75 typedef limb felem[4];
76 typedef widelimb widefelem[7];
78 /* Field element represented as a byte arrary.
79 * 28*8 = 224 bits is also the group order size for the elliptic curve,
80 * and we also use this type for scalars for point multiplication.
82 typedef u8 felem_bytearray[28];
84 static const felem_bytearray nistp224_curve_params[5] = {
85 {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* p */
86 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,
87 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01},
88 {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* a */
89 0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFF,0xFF,
90 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE},
91 {0xB4,0x05,0x0A,0x85,0x0C,0x04,0xB3,0xAB,0xF5,0x41, /* b */
92 0x32,0x56,0x50,0x44,0xB0,0xB7,0xD7,0xBF,0xD8,0xBA,
93 0x27,0x0B,0x39,0x43,0x23,0x55,0xFF,0xB4},
94 {0xB7,0x0E,0x0C,0xBD,0x6B,0xB4,0xBF,0x7F,0x32,0x13, /* x */
95 0x90,0xB9,0x4A,0x03,0xC1,0xD3,0x56,0xC2,0x11,0x22,
96 0x34,0x32,0x80,0xD6,0x11,0x5C,0x1D,0x21},
97 {0xbd,0x37,0x63,0x88,0xb5,0xf7,0x23,0xfb,0x4c,0x22, /* y */
98 0xdf,0xe6,0xcd,0x43,0x75,0xa0,0x5a,0x07,0x47,0x64,
99 0x44,0xd5,0x81,0x99,0x85,0x00,0x7e,0x34}
102 /* Precomputed multiples of the standard generator
103 * Points are given in coordinates (X, Y, Z) where Z normally is 1
104 * (0 for the point at infinity).
105 * For each field element, slice a_0 is word 0, etc.
107 * The table has 2 * 16 elements, starting with the following:
108 * index | bits | point
109 * ------+---------+------------------------------
112 * 2 | 0 0 1 0 | 2^56G
113 * 3 | 0 0 1 1 | (2^56 + 1)G
114 * 4 | 0 1 0 0 | 2^112G
115 * 5 | 0 1 0 1 | (2^112 + 1)G
116 * 6 | 0 1 1 0 | (2^112 + 2^56)G
117 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
118 * 8 | 1 0 0 0 | 2^168G
119 * 9 | 1 0 0 1 | (2^168 + 1)G
120 * 10 | 1 0 1 0 | (2^168 + 2^56)G
121 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
122 * 12 | 1 1 0 0 | (2^168 + 2^112)G
123 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
124 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
125 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
126 * followed by a copy of this with each element multiplied by 2^28.
128 * The reason for this is so that we can clock bits into four different
129 * locations when doing simple scalar multiplies against the base point,
130 * and then another four locations using the second 16 elements.
132 static const felem gmul[2][16][3] =
136 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
137 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
139 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
140 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
142 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
143 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
145 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
146 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
148 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
149 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
151 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
152 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
154 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
155 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
157 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
158 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
160 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
161 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
163 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
164 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
166 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
167 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
169 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
170 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
172 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
173 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
175 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
176 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
178 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
179 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
184 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
185 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
187 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
188 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
190 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
191 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
193 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
194 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
196 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
197 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
199 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
200 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
202 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
203 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
205 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
206 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
208 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
209 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
211 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
212 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
214 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
215 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
217 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
218 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
220 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
221 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
223 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
224 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
226 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
227 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
230 /* Precomputation for the group generator. */
232 felem g_pre_comp[2][16][3];
236 const EC_METHOD *EC_GFp_nistp224_method(void)
238 static const EC_METHOD ret = {
239 EC_FLAGS_DEFAULT_OCT,
240 NID_X9_62_prime_field,
241 ec_GFp_nistp224_group_init,
242 ec_GFp_simple_group_finish,
243 ec_GFp_simple_group_clear_finish,
244 ec_GFp_nist_group_copy,
245 ec_GFp_nistp224_group_set_curve,
246 ec_GFp_simple_group_get_curve,
247 ec_GFp_simple_group_get_degree,
248 ec_GFp_simple_group_check_discriminant,
249 ec_GFp_simple_point_init,
250 ec_GFp_simple_point_finish,
251 ec_GFp_simple_point_clear_finish,
252 ec_GFp_simple_point_copy,
253 ec_GFp_simple_point_set_to_infinity,
254 ec_GFp_simple_set_Jprojective_coordinates_GFp,
255 ec_GFp_simple_get_Jprojective_coordinates_GFp,
256 ec_GFp_simple_point_set_affine_coordinates,
257 ec_GFp_nistp224_point_get_affine_coordinates,
258 0 /* point_set_compressed_coordinates */,
263 ec_GFp_simple_invert,
264 ec_GFp_simple_is_at_infinity,
265 ec_GFp_simple_is_on_curve,
267 ec_GFp_simple_make_affine,
268 ec_GFp_simple_points_make_affine,
269 ec_GFp_nistp224_points_mul,
270 ec_GFp_nistp224_precompute_mult,
271 ec_GFp_nistp224_have_precompute_mult,
272 ec_GFp_nist_field_mul,
273 ec_GFp_nist_field_sqr,
275 0 /* field_encode */,
276 0 /* field_decode */,
277 0 /* field_set_to_one */ };
282 /* Helper functions to convert field elements to/from internal representation */
283 static void bin28_to_felem(felem out, const u8 in[28])
285 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
286 out[1] = (*((const uint64_t *)(in+7))) & 0x00ffffffffffffff;
287 out[2] = (*((const uint64_t *)(in+14))) & 0x00ffffffffffffff;
288 out[3] = (*((const uint64_t *)(in+21))) & 0x00ffffffffffffff;
291 static void felem_to_bin28(u8 out[28], const felem in)
294 for (i = 0; i < 7; ++i)
296 out[i] = in[0]>>(8*i);
297 out[i+7] = in[1]>>(8*i);
298 out[i+14] = in[2]>>(8*i);
299 out[i+21] = in[3]>>(8*i);
303 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
304 static void flip_endian(u8 *out, const u8 *in, unsigned len)
307 for (i = 0; i < len; ++i)
308 out[i] = in[len-1-i];
311 /* From OpenSSL BIGNUM to internal representation */
312 static int BN_to_felem(felem out, const BIGNUM *bn)
314 felem_bytearray b_in;
315 felem_bytearray b_out;
318 /* BN_bn2bin eats leading zeroes */
319 memset(b_out, 0, sizeof b_out);
320 num_bytes = BN_num_bytes(bn);
321 if (num_bytes > sizeof b_out)
323 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
326 if (BN_is_negative(bn))
328 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
331 num_bytes = BN_bn2bin(bn, b_in);
332 flip_endian(b_out, b_in, num_bytes);
333 bin28_to_felem(out, b_out);
337 /* From internal representation to OpenSSL BIGNUM */
338 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
340 felem_bytearray b_in, b_out;
341 felem_to_bin28(b_in, in);
342 flip_endian(b_out, b_in, sizeof b_out);
343 return BN_bin2bn(b_out, sizeof b_out, out);
346 /******************************************************************************/
349 * Field operations, using the internal representation of field elements.
350 * NB! These operations are specific to our point multiplication and cannot be
351 * expected to be correct in general - e.g., multiplication with a large scalar
352 * will cause an overflow.
356 static void felem_one(felem out)
364 static void felem_assign(felem out, const felem in)
372 /* Sum two field elements: out += in */
373 static void felem_sum(felem out, const felem in)
381 /* Get negative value: out = -in */
382 /* Assumes in[i] < 2^57 */
383 static void felem_neg(felem out, const felem in)
385 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
386 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
387 static const limb two58m42m2 = (((limb) 1) << 58) -
388 (((limb) 1) << 42) - (((limb) 1) << 2);
390 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
391 out[0] = two58p2 - in[0];
392 out[1] = two58m42m2 - in[1];
393 out[2] = two58m2 - in[2];
394 out[3] = two58m2 - in[3];
397 /* Subtract field elements: out -= in */
398 /* Assumes in[i] < 2^57 */
399 static void felem_diff(felem out, const felem in)
401 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
402 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
403 static const limb two58m42m2 = (((limb) 1) << 58) -
404 (((limb) 1) << 42) - (((limb) 1) << 2);
406 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
408 out[1] += two58m42m2;
418 /* Subtract in unreduced 128-bit mode: out -= in */
419 /* Assumes in[i] < 2^119 */
420 static void widefelem_diff(widefelem out, const widefelem in)
422 static const widelimb two120 = ((widelimb) 1) << 120;
423 static const widelimb two120m64 = (((widelimb) 1) << 120) -
424 (((widelimb) 1) << 64);
425 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
426 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
428 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
433 out[4] += two120m104m64;
446 /* Subtract in mixed mode: out128 -= in64 */
448 static void felem_diff_128_64(widefelem out, const felem in)
450 static const widelimb two64p8 = (((widelimb) 1) << 64) +
451 (((widelimb) 1) << 8);
452 static const widelimb two64m8 = (((widelimb) 1) << 64) -
453 (((widelimb) 1) << 8);
454 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
455 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
457 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
459 out[1] += two64m48m8;
469 /* Multiply a field element by a scalar: out = out * scalar
470 * The scalars we actually use are small, so results fit without overflow */
471 static void felem_scalar(felem out, const limb scalar)
479 /* Multiply an unreduced field element by a scalar: out = out * scalar
480 * The scalars we actually use are small, so results fit without overflow */
481 static void widefelem_scalar(widefelem out, const widelimb scalar)
492 /* Square a field element: out = in^2 */
493 static void felem_square(widefelem out, const felem in)
495 limb tmp0, tmp1, tmp2;
496 tmp0 = 2 * in[0]; tmp1 = 2 * in[1]; tmp2 = 2 * in[2];
497 out[0] = ((widelimb) in[0]) * in[0];
498 out[1] = ((widelimb) in[0]) * tmp1;
499 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
500 out[3] = ((widelimb) in[3]) * tmp0 +
501 ((widelimb) in[1]) * tmp2;
502 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
503 out[5] = ((widelimb) in[3]) * tmp2;
504 out[6] = ((widelimb) in[3]) * in[3];
507 /* Multiply two field elements: out = in1 * in2 */
508 static void felem_mul(widefelem out, const felem in1, const felem in2)
510 out[0] = ((widelimb) in1[0]) * in2[0];
511 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
512 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
513 ((widelimb) in1[2]) * in2[0];
514 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
515 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
516 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
517 ((widelimb) in1[3]) * in2[1];
518 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
519 out[6] = ((widelimb) in1[3]) * in2[3];
522 /* Reduce seven 128-bit coefficients to four 64-bit coefficients.
523 * Requires in[i] < 2^126,
524 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
525 static void felem_reduce(felem out, const widefelem in)
527 static const widelimb two127p15 = (((widelimb) 1) << 127) +
528 (((widelimb) 1) << 15);
529 static const widelimb two127m71 = (((widelimb) 1) << 127) -
530 (((widelimb) 1) << 71);
531 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
532 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
535 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
536 output[0] = in[0] + two127p15;
537 output[1] = in[1] + two127m71m55;
538 output[2] = in[2] + two127m71;
542 /* Eliminate in[4], in[5], in[6] */
543 output[4] += in[6] >> 16;
544 output[3] += (in[6] & 0xffff) << 40;
547 output[3] += in[5] >> 16;
548 output[2] += (in[5] & 0xffff) << 40;
551 output[2] += output[4] >> 16;
552 output[1] += (output[4] & 0xffff) << 40;
553 output[0] -= output[4];
555 /* Carry 2 -> 3 -> 4 */
556 output[3] += output[2] >> 56;
557 output[2] &= 0x00ffffffffffffff;
559 output[4] = output[3] >> 56;
560 output[3] &= 0x00ffffffffffffff;
562 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
564 /* Eliminate output[4] */
565 output[2] += output[4] >> 16;
566 /* output[2] < 2^56 + 2^56 = 2^57 */
567 output[1] += (output[4] & 0xffff) << 40;
568 output[0] -= output[4];
570 /* Carry 0 -> 1 -> 2 -> 3 */
571 output[1] += output[0] >> 56;
572 out[0] = output[0] & 0x00ffffffffffffff;
574 output[2] += output[1] >> 56;
575 /* output[2] < 2^57 + 2^72 */
576 out[1] = output[1] & 0x00ffffffffffffff;
577 output[3] += output[2] >> 56;
578 /* output[3] <= 2^56 + 2^16 */
579 out[2] = output[2] & 0x00ffffffffffffff;
581 /* out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
582 * out[3] <= 2^56 + 2^16 (due to final carry),
587 static void felem_square_reduce(felem out, const felem in)
590 felem_square(tmp, in);
591 felem_reduce(out, tmp);
594 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
597 felem_mul(tmp, in1, in2);
598 felem_reduce(out, tmp);
601 /* Reduce to unique minimal representation.
602 * Requires 0 <= in < 2*p (always call felem_reduce first) */
603 static void felem_contract(felem out, const felem in)
605 static const int64_t two56 = ((limb) 1) << 56;
606 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
607 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
613 /* Case 1: a = 1 iff in >= 2^224 */
617 tmp[3] &= 0x00ffffffffffffff;
618 /* Case 2: a = 0 iff p <= in < 2^224, i.e.,
619 * the high 128 bits are all 1 and the lower part is non-zero */
620 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
621 (((int64_t)(in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
622 a &= 0x00ffffffffffffff;
623 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
625 /* subtract 2^224 - 2^96 + 1 if a is all-one*/
626 tmp[3] &= a ^ 0xffffffffffffffff;
627 tmp[2] &= a ^ 0xffffffffffffffff;
628 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
631 /* eliminate negative coefficients: if tmp[0] is negative, tmp[1] must
632 * be non-zero, so we only need one step */
637 /* carry 1 -> 2 -> 3 */
638 tmp[2] += tmp[1] >> 56;
639 tmp[1] &= 0x00ffffffffffffff;
641 tmp[3] += tmp[2] >> 56;
642 tmp[2] &= 0x00ffffffffffffff;
644 /* Now 0 <= out < p */
651 /* Zero-check: returns 1 if input is 0, and 0 otherwise.
652 * We know that field elements are reduced to in < 2^225,
653 * so we only need to check three cases: 0, 2^224 - 2^96 + 1,
654 * and 2^225 - 2^97 + 2 */
655 static limb felem_is_zero(const felem in)
657 limb zero, two224m96p1, two225m97p2;
659 zero = in[0] | in[1] | in[2] | in[3];
660 zero = (((int64_t)(zero) - 1) >> 63) & 1;
661 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
662 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
663 two224m96p1 = (((int64_t)(two224m96p1) - 1) >> 63) & 1;
664 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
665 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
666 two225m97p2 = (((int64_t)(two225m97p2) - 1) >> 63) & 1;
667 return (zero | two224m96p1 | two225m97p2);
670 static limb felem_is_zero_int(const felem in)
672 return (int) (felem_is_zero(in) & ((limb)1));
675 /* Invert a field element */
676 /* Computation chain copied from djb's code */
677 static void felem_inv(felem out, const felem in)
679 felem ftmp, ftmp2, ftmp3, ftmp4;
683 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2 */
684 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 1 */
685 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2 */
686 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 1 */
687 felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
688 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
689 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
690 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 1 */
691 felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
692 for (i = 0; i < 5; ++i) /* 2^12 - 2^6 */
694 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
696 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
697 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
698 for (i = 0; i < 11; ++i) /* 2^24 - 2^12 */
700 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
702 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
703 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
704 for (i = 0; i < 23; ++i) /* 2^48 - 2^24 */
706 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
708 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
709 felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
710 for (i = 0; i < 47; ++i) /* 2^96 - 2^48 */
712 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp);
714 felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
715 felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
716 for (i = 0; i < 23; ++i) /* 2^120 - 2^24 */
718 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp);
720 felem_mul(tmp, ftmp2, ftmp4); felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
721 for (i = 0; i < 6; ++i) /* 2^126 - 2^6 */
723 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
725 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^126 - 1 */
726 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^127 - 2 */
727 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^127 - 1 */
728 for (i = 0; i < 97; ++i) /* 2^224 - 2^97 */
730 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
732 felem_mul(tmp, ftmp, ftmp3); felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
735 /* Copy in constant time:
736 * if icopy == 1, copy in to out,
737 * if icopy == 0, copy out to itself. */
739 copy_conditional(felem out, const felem in, limb icopy)
742 /* icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one */
743 const limb copy = -icopy;
744 for (i = 0; i < 4; ++i)
746 const limb tmp = copy & (in[i] ^ out[i]);
751 /******************************************************************************/
752 /* ELLIPTIC CURVE POINT OPERATIONS
754 * Points are represented in Jacobian projective coordinates:
755 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
756 * or to the point at infinity if Z == 0.
760 /* Double an elliptic curve point:
761 * (X', Y', Z') = 2 * (X, Y, Z), where
762 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
763 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
764 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
765 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
766 * while x_out == y_in is not (maybe this works, but it's not tested). */
768 point_double(felem x_out, felem y_out, felem z_out,
769 const felem x_in, const felem y_in, const felem z_in)
772 felem delta, gamma, beta, alpha, ftmp, ftmp2;
774 felem_assign(ftmp, x_in);
775 felem_assign(ftmp2, x_in);
778 felem_square(tmp, z_in);
779 felem_reduce(delta, tmp);
782 felem_square(tmp, y_in);
783 felem_reduce(gamma, tmp);
786 felem_mul(tmp, x_in, gamma);
787 felem_reduce(beta, tmp);
789 /* alpha = 3*(x-delta)*(x+delta) */
790 felem_diff(ftmp, delta);
791 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
792 felem_sum(ftmp2, delta);
793 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
794 felem_scalar(ftmp2, 3);
795 /* ftmp2[i] < 3 * 2^58 < 2^60 */
796 felem_mul(tmp, ftmp, ftmp2);
797 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
798 felem_reduce(alpha, tmp);
800 /* x' = alpha^2 - 8*beta */
801 felem_square(tmp, alpha);
802 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
803 felem_assign(ftmp, beta);
804 felem_scalar(ftmp, 8);
805 /* ftmp[i] < 8 * 2^57 = 2^60 */
806 felem_diff_128_64(tmp, ftmp);
807 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
808 felem_reduce(x_out, tmp);
810 /* z' = (y + z)^2 - gamma - delta */
811 felem_sum(delta, gamma);
812 /* delta[i] < 2^57 + 2^57 = 2^58 */
813 felem_assign(ftmp, y_in);
814 felem_sum(ftmp, z_in);
815 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
816 felem_square(tmp, ftmp);
817 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
818 felem_diff_128_64(tmp, delta);
819 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
820 felem_reduce(z_out, tmp);
822 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
823 felem_scalar(beta, 4);
824 /* beta[i] < 4 * 2^57 = 2^59 */
825 felem_diff(beta, x_out);
826 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
827 felem_mul(tmp, alpha, beta);
828 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
829 felem_square(tmp2, gamma);
830 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
831 widefelem_scalar(tmp2, 8);
832 /* tmp2[i] < 8 * 2^116 = 2^119 */
833 widefelem_diff(tmp, tmp2);
834 /* tmp[i] < 2^119 + 2^120 < 2^121 */
835 felem_reduce(y_out, tmp);
838 /* Add two elliptic curve points:
839 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
840 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
841 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
842 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
843 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
844 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
846 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
849 /* This function is not entirely constant-time:
850 * it includes a branch for checking whether the two input points are equal,
851 * (while not equal to the point at infinity).
852 * This case never happens during single point multiplication,
853 * so there is no timing leak for ECDH or ECDSA signing. */
854 static void point_add(felem x3, felem y3, felem z3,
855 const felem x1, const felem y1, const felem z1,
856 const int mixed, const felem x2, const felem y2, const felem z2)
858 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
860 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
865 felem_square(tmp, z2);
866 felem_reduce(ftmp2, tmp);
869 felem_mul(tmp, ftmp2, z2);
870 felem_reduce(ftmp4, tmp);
872 /* ftmp4 = z2^3*y1 */
873 felem_mul(tmp2, ftmp4, y1);
874 felem_reduce(ftmp4, tmp2);
876 /* ftmp2 = z2^2*x1 */
877 felem_mul(tmp2, ftmp2, x1);
878 felem_reduce(ftmp2, tmp2);
882 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
884 /* ftmp4 = z2^3*y1 */
885 felem_assign(ftmp4, y1);
887 /* ftmp2 = z2^2*x1 */
888 felem_assign(ftmp2, x1);
892 felem_square(tmp, z1);
893 felem_reduce(ftmp, tmp);
896 felem_mul(tmp, ftmp, z1);
897 felem_reduce(ftmp3, tmp);
900 felem_mul(tmp, ftmp3, y2);
901 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
903 /* ftmp3 = z1^3*y2 - z2^3*y1 */
904 felem_diff_128_64(tmp, ftmp4);
905 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
906 felem_reduce(ftmp3, tmp);
909 felem_mul(tmp, ftmp, x2);
910 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
912 /* ftmp = z1^2*x2 - z2^2*x1 */
913 felem_diff_128_64(tmp, ftmp2);
914 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
915 felem_reduce(ftmp, tmp);
917 /* the formulae are incorrect if the points are equal
918 * so we check for this and do doubling if this happens */
919 x_equal = felem_is_zero(ftmp);
920 y_equal = felem_is_zero(ftmp3);
921 z1_is_zero = felem_is_zero(z1);
922 z2_is_zero = felem_is_zero(z2);
923 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
924 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
926 point_double(x3, y3, z3, x1, y1, z1);
933 felem_mul(tmp, z1, z2);
934 felem_reduce(ftmp5, tmp);
938 /* special case z2 = 0 is handled later */
939 felem_assign(ftmp5, z1);
942 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
943 felem_mul(tmp, ftmp, ftmp5);
944 felem_reduce(z_out, tmp);
946 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
947 felem_assign(ftmp5, ftmp);
948 felem_square(tmp, ftmp);
949 felem_reduce(ftmp, tmp);
951 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
952 felem_mul(tmp, ftmp, ftmp5);
953 felem_reduce(ftmp5, tmp);
955 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
956 felem_mul(tmp, ftmp2, ftmp);
957 felem_reduce(ftmp2, tmp);
959 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
960 felem_mul(tmp, ftmp4, ftmp5);
961 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
963 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
964 felem_square(tmp2, ftmp3);
965 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
967 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
968 felem_diff_128_64(tmp2, ftmp5);
969 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
971 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
972 felem_assign(ftmp5, ftmp2);
973 felem_scalar(ftmp5, 2);
974 /* ftmp5[i] < 2 * 2^57 = 2^58 */
976 /* x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
977 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
978 felem_diff_128_64(tmp2, ftmp5);
979 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
980 felem_reduce(x_out, tmp2);
982 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
983 felem_diff(ftmp2, x_out);
984 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
986 /* tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) */
987 felem_mul(tmp2, ftmp3, ftmp2);
988 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
990 /* y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
991 z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
992 widefelem_diff(tmp2, tmp);
993 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
994 felem_reduce(y_out, tmp2);
996 /* the result (x_out, y_out, z_out) is incorrect if one of the inputs is
997 * the point at infinity, so we need to check for this separately */
999 /* if point 1 is at infinity, copy point 2 to output, and vice versa */
1000 copy_conditional(x_out, x2, z1_is_zero);
1001 copy_conditional(x_out, x1, z2_is_zero);
1002 copy_conditional(y_out, y2, z1_is_zero);
1003 copy_conditional(y_out, y1, z2_is_zero);
1004 copy_conditional(z_out, z2, z1_is_zero);
1005 copy_conditional(z_out, z1, z2_is_zero);
1006 felem_assign(x3, x_out);
1007 felem_assign(y3, y_out);
1008 felem_assign(z3, z_out);
1011 /* select_point selects the |idx|th point from a precomputation table and
1012 * copies it to out. */
1013 static void select_point(const u64 idx, unsigned int size, const felem pre_comp[/*size*/][3], felem out[3])
1016 limb *outlimbs = &out[0][0];
1017 memset(outlimbs, 0, 3 * sizeof(felem));
1019 for (i = 0; i < size; i++)
1021 const limb *inlimbs = &pre_comp[i][0][0];
1028 for (j = 0; j < 4 * 3; j++)
1029 outlimbs[j] |= inlimbs[j] & mask;
1033 /* get_bit returns the |i|th bit in |in| */
1034 static char get_bit(const felem_bytearray in, unsigned i)
1038 return (in[i >> 3] >> (i & 7)) & 1;
1041 /* Interleaved point multiplication using precomputed point multiples:
1042 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1043 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1044 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1045 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1046 static void batch_mul(felem x_out, felem y_out, felem z_out,
1047 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1048 const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[2][16][3])
1052 unsigned gen_mul = (g_scalar != NULL);
1053 felem nq[3], tmp[4];
1057 /* set nq to the point at infinity */
1058 memset(nq, 0, 3 * sizeof(felem));
1060 /* Loop over all scalars msb-to-lsb, interleaving additions
1061 * of multiples of the generator (two in each of the last 28 rounds)
1062 * and additions of other points multiples (every 5th round).
1064 skip = 1; /* save two point operations in the first round */
1065 for (i = (num_points ? 220 : 27); i >= 0; --i)
1069 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1071 /* add multiples of the generator */
1072 if (gen_mul && (i <= 27))
1074 /* first, look 28 bits upwards */
1075 bits = get_bit(g_scalar, i + 196) << 3;
1076 bits |= get_bit(g_scalar, i + 140) << 2;
1077 bits |= get_bit(g_scalar, i + 84) << 1;
1078 bits |= get_bit(g_scalar, i + 28);
1079 /* select the point to add, in constant time */
1080 select_point(bits, 16, g_pre_comp[1], tmp);
1084 point_add(nq[0], nq[1], nq[2],
1085 nq[0], nq[1], nq[2],
1086 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1090 memcpy(nq, tmp, 3 * sizeof(felem));
1094 /* second, look at the current position */
1095 bits = get_bit(g_scalar, i + 168) << 3;
1096 bits |= get_bit(g_scalar, i + 112) << 2;
1097 bits |= get_bit(g_scalar, i + 56) << 1;
1098 bits |= get_bit(g_scalar, i);
1099 /* select the point to add, in constant time */
1100 select_point(bits, 16, g_pre_comp[0], tmp);
1101 point_add(nq[0], nq[1], nq[2],
1102 nq[0], nq[1], nq[2],
1103 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1106 /* do other additions every 5 doublings */
1107 if (num_points && (i % 5 == 0))
1109 /* loop over all scalars */
1110 for (num = 0; num < num_points; ++num)
1112 bits = get_bit(scalars[num], i + 4) << 5;
1113 bits |= get_bit(scalars[num], i + 3) << 4;
1114 bits |= get_bit(scalars[num], i + 2) << 3;
1115 bits |= get_bit(scalars[num], i + 1) << 2;
1116 bits |= get_bit(scalars[num], i) << 1;
1117 bits |= get_bit(scalars[num], i - 1);
1118 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1120 /* select the point to add or subtract */
1121 select_point(digit, 17, pre_comp[num], tmp);
1122 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative point */
1123 copy_conditional(tmp[1], tmp[3], sign);
1127 point_add(nq[0], nq[1], nq[2],
1128 nq[0], nq[1], nq[2],
1129 mixed, tmp[0], tmp[1], tmp[2]);
1133 memcpy(nq, tmp, 3 * sizeof(felem));
1139 felem_assign(x_out, nq[0]);
1140 felem_assign(y_out, nq[1]);
1141 felem_assign(z_out, nq[2]);
1144 /******************************************************************************/
1145 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1148 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1150 NISTP224_PRE_COMP *ret = NULL;
1151 ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1154 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1157 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1158 ret->references = 1;
1162 static void *nistp224_pre_comp_dup(void *src_)
1164 NISTP224_PRE_COMP *src = src_;
1166 /* no need to actually copy, these objects never change! */
1167 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1172 static void nistp224_pre_comp_free(void *pre_)
1175 NISTP224_PRE_COMP *pre = pre_;
1180 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1187 static void nistp224_pre_comp_clear_free(void *pre_)
1190 NISTP224_PRE_COMP *pre = pre_;
1195 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1199 OPENSSL_cleanse(pre, sizeof *pre);
1203 /******************************************************************************/
1204 /* OPENSSL EC_METHOD FUNCTIONS
1207 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1210 ret = ec_GFp_simple_group_init(group);
1211 group->a_is_minus3 = 1;
1215 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1216 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1219 BN_CTX *new_ctx = NULL;
1220 BIGNUM *curve_p, *curve_a, *curve_b;
1223 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1225 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1226 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1227 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1228 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1229 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1230 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1231 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1232 (BN_cmp(curve_b, b)))
1234 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1235 EC_R_WRONG_CURVE_PARAMETERS);
1238 group->field_mod_func = BN_nist_mod_224;
1239 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1242 if (new_ctx != NULL)
1243 BN_CTX_free(new_ctx);
1247 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1248 * (X', Y') = (X/Z^2, Y/Z^3) */
1249 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1250 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1252 felem z1, z2, x_in, y_in, x_out, y_out;
1255 if (EC_POINT_is_at_infinity(group, point))
1257 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1258 EC_R_POINT_AT_INFINITY);
1261 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1262 (!BN_to_felem(z1, &point->Z))) return 0;
1264 felem_square(tmp, z2); felem_reduce(z1, tmp);
1265 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1266 felem_contract(x_out, x_in);
1269 if (!felem_to_BN(x, x_out)) {
1270 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1275 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1276 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1277 felem_contract(y_out, y_in);
1280 if (!felem_to_BN(y, y_out)) {
1281 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1289 static void make_points_affine(size_t num, felem points[/*num*/][3], felem tmp_felems[/*num+1*/])
1291 /* Runs in constant time, unless an input is the point at infinity
1292 * (which normally shouldn't happen). */
1293 ec_GFp_nistp_points_make_affine_internal(
1298 (void (*)(void *)) felem_one,
1299 (int (*)(const void *)) felem_is_zero_int,
1300 (void (*)(void *, const void *)) felem_assign,
1301 (void (*)(void *, const void *)) felem_square_reduce,
1302 (void (*)(void *, const void *, const void *)) felem_mul_reduce,
1303 (void (*)(void *, const void *)) felem_inv,
1304 (void (*)(void *, const void *)) felem_contract);
1307 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1308 * Result is stored in r (r can equal one of the inputs). */
1309 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1310 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1311 const BIGNUM *scalars[], BN_CTX *ctx)
1317 BN_CTX *new_ctx = NULL;
1318 BIGNUM *x, *y, *z, *tmp_scalar;
1319 felem_bytearray g_secret;
1320 felem_bytearray *secrets = NULL;
1321 felem (*pre_comp)[17][3] = NULL;
1322 felem *tmp_felems = NULL;
1323 felem_bytearray tmp;
1325 int have_pre_comp = 0;
1326 size_t num_points = num;
1327 felem x_in, y_in, z_in, x_out, y_out, z_out;
1328 NISTP224_PRE_COMP *pre = NULL;
1329 const felem (*g_pre_comp)[16][3] = NULL;
1330 EC_POINT *generator = NULL;
1331 const EC_POINT *p = NULL;
1332 const BIGNUM *p_scalar = NULL;
1335 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1337 if (((x = BN_CTX_get(ctx)) == NULL) ||
1338 ((y = BN_CTX_get(ctx)) == NULL) ||
1339 ((z = BN_CTX_get(ctx)) == NULL) ||
1340 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1345 pre = EC_EX_DATA_get_data(group->extra_data,
1346 nistp224_pre_comp_dup, nistp224_pre_comp_free,
1347 nistp224_pre_comp_clear_free);
1349 /* we have precomputation, try to use it */
1350 g_pre_comp = (const felem (*)[16][3]) pre->g_pre_comp;
1352 /* try to use the standard precomputation */
1353 g_pre_comp = &gmul[0];
1354 generator = EC_POINT_new(group);
1355 if (generator == NULL)
1357 /* get the generator from precomputation */
1358 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1359 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1360 !felem_to_BN(z, g_pre_comp[0][1][2]))
1362 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1365 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1366 generator, x, y, z, ctx))
1368 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1369 /* precomputation matches generator */
1372 /* we don't have valid precomputation:
1373 * treat the generator as a random point */
1374 num_points = num_points + 1;
1379 if (num_points >= 3)
1381 /* unless we precompute multiples for just one or two points,
1382 * converting those into affine form is time well spent */
1385 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1386 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1388 tmp_felems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1389 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL)))
1391 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1395 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1396 * i.e., they contribute nothing to the linear combination */
1397 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1398 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1399 for (i = 0; i < num_points; ++i)
1404 p = EC_GROUP_get0_generator(group);
1408 /* the i^th point */
1411 p_scalar = scalars[i];
1413 if ((p_scalar != NULL) && (p != NULL))
1415 /* reduce scalar to 0 <= scalar < 2^224 */
1416 if ((BN_num_bits(p_scalar) > 224) || (BN_is_negative(p_scalar)))
1418 /* this is an unusual input, and we don't guarantee
1419 * constant-timeness */
1420 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1422 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1425 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1428 num_bytes = BN_bn2bin(p_scalar, tmp);
1429 flip_endian(secrets[i], tmp, num_bytes);
1430 /* precompute multiples */
1431 if ((!BN_to_felem(x_out, &p->X)) ||
1432 (!BN_to_felem(y_out, &p->Y)) ||
1433 (!BN_to_felem(z_out, &p->Z))) goto err;
1434 felem_assign(pre_comp[i][1][0], x_out);
1435 felem_assign(pre_comp[i][1][1], y_out);
1436 felem_assign(pre_comp[i][1][2], z_out);
1437 for (j = 2; j <= 16; ++j)
1442 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1443 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1444 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1449 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1450 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1456 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1459 /* the scalar for the generator */
1460 if ((scalar != NULL) && (have_pre_comp))
1462 memset(g_secret, 0, sizeof g_secret);
1463 /* reduce scalar to 0 <= scalar < 2^224 */
1464 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar)))
1466 /* this is an unusual input, and we don't guarantee
1467 * constant-timeness */
1468 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1470 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1473 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1476 num_bytes = BN_bn2bin(scalar, tmp);
1477 flip_endian(g_secret, tmp, num_bytes);
1478 /* do the multiplication with generator precomputation*/
1479 batch_mul(x_out, y_out, z_out,
1480 (const felem_bytearray (*)) secrets, num_points,
1482 mixed, (const felem (*)[17][3]) pre_comp,
1486 /* do the multiplication without generator precomputation */
1487 batch_mul(x_out, y_out, z_out,
1488 (const felem_bytearray (*)) secrets, num_points,
1489 NULL, mixed, (const felem (*)[17][3]) pre_comp, NULL);
1490 /* reduce the output to its unique minimal representation */
1491 felem_contract(x_in, x_out);
1492 felem_contract(y_in, y_out);
1493 felem_contract(z_in, z_out);
1494 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1495 (!felem_to_BN(z, z_in)))
1497 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1500 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1504 if (generator != NULL)
1505 EC_POINT_free(generator);
1506 if (new_ctx != NULL)
1507 BN_CTX_free(new_ctx);
1508 if (secrets != NULL)
1509 OPENSSL_free(secrets);
1510 if (pre_comp != NULL)
1511 OPENSSL_free(pre_comp);
1512 if (tmp_felems != NULL)
1513 OPENSSL_free(tmp_felems);
1517 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1520 NISTP224_PRE_COMP *pre = NULL;
1522 BN_CTX *new_ctx = NULL;
1524 EC_POINT *generator = NULL;
1525 felem tmp_felems[32];
1527 /* throw away old precomputation */
1528 EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1529 nistp224_pre_comp_free, nistp224_pre_comp_clear_free);
1531 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1533 if (((x = BN_CTX_get(ctx)) == NULL) ||
1534 ((y = BN_CTX_get(ctx)) == NULL))
1536 /* get the generator */
1537 if (group->generator == NULL) goto err;
1538 generator = EC_POINT_new(group);
1539 if (generator == NULL)
1541 BN_bin2bn(nistp224_curve_params[3], sizeof (felem_bytearray), x);
1542 BN_bin2bn(nistp224_curve_params[4], sizeof (felem_bytearray), y);
1543 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1545 if ((pre = nistp224_pre_comp_new()) == NULL)
1547 /* if the generator is the standard one, use built-in precomputation */
1548 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1550 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1554 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1555 (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1556 (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1558 /* compute 2^56*G, 2^112*G, 2^168*G for the first table,
1559 * 2^28*G, 2^84*G, 2^140*G, 2^196*G for the second one
1561 for (i = 1; i <= 8; i <<= 1)
1564 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
1565 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1566 for (j = 0; j < 27; ++j)
1569 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
1570 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1575 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
1576 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1577 for (j = 0; j < 27; ++j)
1580 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
1581 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
1584 for (i = 0; i < 2; i++)
1586 /* g_pre_comp[i][0] is the point at infinity */
1587 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1588 /* the remaining multiples */
1589 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1591 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1592 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1593 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1594 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1595 pre->g_pre_comp[i][2][2]);
1596 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1598 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1599 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1600 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1601 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1602 pre->g_pre_comp[i][2][2]);
1603 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1605 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1606 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1607 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1608 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1609 pre->g_pre_comp[i][4][2]);
1610 /* 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G */
1612 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1613 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1614 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1615 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1616 pre->g_pre_comp[i][2][2]);
1617 for (j = 1; j < 8; ++j)
1619 /* odd multiples: add G resp. 2^28*G */
1621 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1],
1622 pre->g_pre_comp[i][2*j+1][2], pre->g_pre_comp[i][2*j][0],
1623 pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
1624 0, pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1625 pre->g_pre_comp[i][1][2]);
1628 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1630 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1631 nistp224_pre_comp_free, nistp224_pre_comp_clear_free))
1637 if (generator != NULL)
1638 EC_POINT_free(generator);
1639 if (new_ctx != NULL)
1640 BN_CTX_free(new_ctx);
1642 nistp224_pre_comp_free(pre);
1646 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1648 if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1649 nistp224_pre_comp_free, nistp224_pre_comp_clear_free)
1657 static void *dummy=&dummy;