1 /* crypto/ec/ecp_nistp224.c */
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
28 #include <openssl/opensslconf.h>
29 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
31 #ifndef OPENSSL_SYS_VMS
38 #include <openssl/err.h>
41 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
42 /* even with gcc, the typedef won't work for 32-bit platforms */
43 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
45 #error "Need GCC 3.1 or later to define type uint128_t"
53 /******************************************************************************/
55 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
57 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
58 * using 64-bit coefficients called 'limbs',
59 * and sometimes (for multiplication results) as
60 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
61 * using 128-bit coefficients called 'widelimbs'.
62 * A 4-limb representation is an 'felem';
63 * a 7-widelimb representation is a 'widefelem'.
64 * Even within felems, bits of adjacent limbs overlap, and we don't always
65 * reduce the representations: we ensure that inputs to each felem
66 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
67 * and fit into a 128-bit word without overflow. The coefficients are then
68 * again partially reduced to obtain an felem satisfying a_i < 2^57.
69 * We only reduce to the unique minimal representation at the end of the
73 typedef uint64_t limb;
74 typedef uint128_t widelimb;
76 typedef limb felem[4];
77 typedef widelimb widefelem[7];
79 /* Field element represented as a byte arrary.
80 * 28*8 = 224 bits is also the group order size for the elliptic curve,
81 * and we also use this type for scalars for point multiplication.
83 typedef u8 felem_bytearray[28];
85 static const felem_bytearray nistp224_curve_params[5] = {
86 {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* p */
87 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,
88 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01},
89 {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* a */
90 0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFF,0xFF,
91 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE},
92 {0xB4,0x05,0x0A,0x85,0x0C,0x04,0xB3,0xAB,0xF5,0x41, /* b */
93 0x32,0x56,0x50,0x44,0xB0,0xB7,0xD7,0xBF,0xD8,0xBA,
94 0x27,0x0B,0x39,0x43,0x23,0x55,0xFF,0xB4},
95 {0xB7,0x0E,0x0C,0xBD,0x6B,0xB4,0xBF,0x7F,0x32,0x13, /* x */
96 0x90,0xB9,0x4A,0x03,0xC1,0xD3,0x56,0xC2,0x11,0x22,
97 0x34,0x32,0x80,0xD6,0x11,0x5C,0x1D,0x21},
98 {0xbd,0x37,0x63,0x88,0xb5,0xf7,0x23,0xfb,0x4c,0x22, /* y */
99 0xdf,0xe6,0xcd,0x43,0x75,0xa0,0x5a,0x07,0x47,0x64,
100 0x44,0xd5,0x81,0x99,0x85,0x00,0x7e,0x34}
104 * Precomputed multiples of the standard generator
105 * Points are given in coordinates (X, Y, Z) where Z normally is 1
106 * (0 for the point at infinity).
107 * For each field element, slice a_0 is word 0, etc.
109 * The table has 2 * 16 elements, starting with the following:
110 * index | bits | point
111 * ------+---------+------------------------------
114 * 2 | 0 0 1 0 | 2^56G
115 * 3 | 0 0 1 1 | (2^56 + 1)G
116 * 4 | 0 1 0 0 | 2^112G
117 * 5 | 0 1 0 1 | (2^112 + 1)G
118 * 6 | 0 1 1 0 | (2^112 + 2^56)G
119 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
120 * 8 | 1 0 0 0 | 2^168G
121 * 9 | 1 0 0 1 | (2^168 + 1)G
122 * 10 | 1 0 1 0 | (2^168 + 2^56)G
123 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
124 * 12 | 1 1 0 0 | (2^168 + 2^112)G
125 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
126 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
127 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
128 * followed by a copy of this with each element multiplied by 2^28.
130 * The reason for this is so that we can clock bits into four different
131 * locations when doing simple scalar multiplies against the base point,
132 * and then another four locations using the second 16 elements.
134 static const felem gmul[2][16][3] =
138 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
139 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
141 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
142 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
144 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
145 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
147 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
148 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
150 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
151 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
153 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
154 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
156 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
157 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
159 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
160 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
162 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
163 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
165 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
166 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
168 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
169 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
171 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
172 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
174 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
175 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
177 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
178 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
180 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
181 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
186 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
187 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
189 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
190 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
192 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
193 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
195 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
196 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
198 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
199 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
201 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
202 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
204 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
205 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
207 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
208 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
210 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
211 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
213 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
214 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
216 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
217 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
219 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
220 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
222 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
223 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
225 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
226 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
228 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
229 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
232 /* Precomputation for the group generator. */
234 felem g_pre_comp[2][16][3];
238 const EC_METHOD *EC_GFp_nistp224_method(void)
240 static const EC_METHOD ret = {
241 EC_FLAGS_DEFAULT_OCT,
242 NID_X9_62_prime_field,
243 ec_GFp_nistp224_group_init,
244 ec_GFp_simple_group_finish,
245 ec_GFp_simple_group_clear_finish,
246 ec_GFp_nist_group_copy,
247 ec_GFp_nistp224_group_set_curve,
248 ec_GFp_simple_group_get_curve,
249 ec_GFp_simple_group_get_degree,
250 ec_GFp_simple_group_check_discriminant,
251 ec_GFp_simple_point_init,
252 ec_GFp_simple_point_finish,
253 ec_GFp_simple_point_clear_finish,
254 ec_GFp_simple_point_copy,
255 ec_GFp_simple_point_set_to_infinity,
256 ec_GFp_simple_set_Jprojective_coordinates_GFp,
257 ec_GFp_simple_get_Jprojective_coordinates_GFp,
258 ec_GFp_simple_point_set_affine_coordinates,
259 ec_GFp_nistp224_point_get_affine_coordinates,
260 0 /* point_set_compressed_coordinates */,
265 ec_GFp_simple_invert,
266 ec_GFp_simple_is_at_infinity,
267 ec_GFp_simple_is_on_curve,
269 ec_GFp_simple_make_affine,
270 ec_GFp_simple_points_make_affine,
271 ec_GFp_nistp224_points_mul,
272 ec_GFp_nistp224_precompute_mult,
273 ec_GFp_nistp224_have_precompute_mult,
274 ec_GFp_nist_field_mul,
275 ec_GFp_nist_field_sqr,
277 0 /* field_encode */,
278 0 /* field_decode */,
279 0 /* field_set_to_one */ };
284 /* Helper functions to convert field elements to/from internal representation */
285 static void bin28_to_felem(felem out, const u8 in[28])
287 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
288 out[1] = (*((const uint64_t *)(in+7))) & 0x00ffffffffffffff;
289 out[2] = (*((const uint64_t *)(in+14))) & 0x00ffffffffffffff;
290 out[3] = (*((const uint64_t *)(in+21))) & 0x00ffffffffffffff;
293 static void felem_to_bin28(u8 out[28], const felem in)
296 for (i = 0; i < 7; ++i)
298 out[i] = in[0]>>(8*i);
299 out[i+7] = in[1]>>(8*i);
300 out[i+14] = in[2]>>(8*i);
301 out[i+21] = in[3]>>(8*i);
305 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
306 static void flip_endian(u8 *out, const u8 *in, unsigned len)
309 for (i = 0; i < len; ++i)
310 out[i] = in[len-1-i];
313 /* From OpenSSL BIGNUM to internal representation */
314 static int BN_to_felem(felem out, const BIGNUM *bn)
316 felem_bytearray b_in;
317 felem_bytearray b_out;
320 /* BN_bn2bin eats leading zeroes */
321 memset(b_out, 0, sizeof b_out);
322 num_bytes = BN_num_bytes(bn);
323 if (num_bytes > sizeof b_out)
325 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
328 if (BN_is_negative(bn))
330 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
333 num_bytes = BN_bn2bin(bn, b_in);
334 flip_endian(b_out, b_in, num_bytes);
335 bin28_to_felem(out, b_out);
339 /* From internal representation to OpenSSL BIGNUM */
340 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
342 felem_bytearray b_in, b_out;
343 felem_to_bin28(b_in, in);
344 flip_endian(b_out, b_in, sizeof b_out);
345 return BN_bin2bn(b_out, sizeof b_out, out);
348 /******************************************************************************/
351 * Field operations, using the internal representation of field elements.
352 * NB! These operations are specific to our point multiplication and cannot be
353 * expected to be correct in general - e.g., multiplication with a large scalar
354 * will cause an overflow.
358 static void felem_one(felem out)
366 static void felem_assign(felem out, const felem in)
374 /* Sum two field elements: out += in */
375 static void felem_sum(felem out, const felem in)
383 /* Get negative value: out = -in */
384 /* Assumes in[i] < 2^57 */
385 static void felem_neg(felem out, const felem in)
387 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
388 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
389 static const limb two58m42m2 = (((limb) 1) << 58) -
390 (((limb) 1) << 42) - (((limb) 1) << 2);
392 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
393 out[0] = two58p2 - in[0];
394 out[1] = two58m42m2 - in[1];
395 out[2] = two58m2 - in[2];
396 out[3] = two58m2 - in[3];
399 /* Subtract field elements: out -= in */
400 /* Assumes in[i] < 2^57 */
401 static void felem_diff(felem out, const felem in)
403 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
404 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
405 static const limb two58m42m2 = (((limb) 1) << 58) -
406 (((limb) 1) << 42) - (((limb) 1) << 2);
408 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
410 out[1] += two58m42m2;
420 /* Subtract in unreduced 128-bit mode: out -= in */
421 /* Assumes in[i] < 2^119 */
422 static void widefelem_diff(widefelem out, const widefelem in)
424 static const widelimb two120 = ((widelimb) 1) << 120;
425 static const widelimb two120m64 = (((widelimb) 1) << 120) -
426 (((widelimb) 1) << 64);
427 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
428 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
430 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
435 out[4] += two120m104m64;
448 /* Subtract in mixed mode: out128 -= in64 */
450 static void felem_diff_128_64(widefelem out, const felem in)
452 static const widelimb two64p8 = (((widelimb) 1) << 64) +
453 (((widelimb) 1) << 8);
454 static const widelimb two64m8 = (((widelimb) 1) << 64) -
455 (((widelimb) 1) << 8);
456 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
457 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
459 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
461 out[1] += two64m48m8;
471 /* Multiply a field element by a scalar: out = out * scalar
472 * The scalars we actually use are small, so results fit without overflow */
473 static void felem_scalar(felem out, const limb scalar)
481 /* Multiply an unreduced field element by a scalar: out = out * scalar
482 * The scalars we actually use are small, so results fit without overflow */
483 static void widefelem_scalar(widefelem out, const widelimb scalar)
494 /* Square a field element: out = in^2 */
495 static void felem_square(widefelem out, const felem in)
497 limb tmp0, tmp1, tmp2;
498 tmp0 = 2 * in[0]; tmp1 = 2 * in[1]; tmp2 = 2 * in[2];
499 out[0] = ((widelimb) in[0]) * in[0];
500 out[1] = ((widelimb) in[0]) * tmp1;
501 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
502 out[3] = ((widelimb) in[3]) * tmp0 +
503 ((widelimb) in[1]) * tmp2;
504 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
505 out[5] = ((widelimb) in[3]) * tmp2;
506 out[6] = ((widelimb) in[3]) * in[3];
509 /* Multiply two field elements: out = in1 * in2 */
510 static void felem_mul(widefelem out, const felem in1, const felem in2)
512 out[0] = ((widelimb) in1[0]) * in2[0];
513 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
514 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
515 ((widelimb) in1[2]) * in2[0];
516 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
517 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
518 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
519 ((widelimb) in1[3]) * in2[1];
520 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
521 out[6] = ((widelimb) in1[3]) * in2[3];
524 /* Reduce seven 128-bit coefficients to four 64-bit coefficients.
525 * Requires in[i] < 2^126,
526 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
527 static void felem_reduce(felem out, const widefelem in)
529 static const widelimb two127p15 = (((widelimb) 1) << 127) +
530 (((widelimb) 1) << 15);
531 static const widelimb two127m71 = (((widelimb) 1) << 127) -
532 (((widelimb) 1) << 71);
533 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
534 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
537 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
538 output[0] = in[0] + two127p15;
539 output[1] = in[1] + two127m71m55;
540 output[2] = in[2] + two127m71;
544 /* Eliminate in[4], in[5], in[6] */
545 output[4] += in[6] >> 16;
546 output[3] += (in[6] & 0xffff) << 40;
549 output[3] += in[5] >> 16;
550 output[2] += (in[5] & 0xffff) << 40;
553 output[2] += output[4] >> 16;
554 output[1] += (output[4] & 0xffff) << 40;
555 output[0] -= output[4];
557 /* Carry 2 -> 3 -> 4 */
558 output[3] += output[2] >> 56;
559 output[2] &= 0x00ffffffffffffff;
561 output[4] = output[3] >> 56;
562 output[3] &= 0x00ffffffffffffff;
564 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
566 /* Eliminate output[4] */
567 output[2] += output[4] >> 16;
568 /* output[2] < 2^56 + 2^56 = 2^57 */
569 output[1] += (output[4] & 0xffff) << 40;
570 output[0] -= output[4];
572 /* Carry 0 -> 1 -> 2 -> 3 */
573 output[1] += output[0] >> 56;
574 out[0] = output[0] & 0x00ffffffffffffff;
576 output[2] += output[1] >> 56;
577 /* output[2] < 2^57 + 2^72 */
578 out[1] = output[1] & 0x00ffffffffffffff;
579 output[3] += output[2] >> 56;
580 /* output[3] <= 2^56 + 2^16 */
581 out[2] = output[2] & 0x00ffffffffffffff;
584 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
585 * out[3] <= 2^56 + 2^16 (due to final carry),
591 static void felem_square_reduce(felem out, const felem in)
594 felem_square(tmp, in);
595 felem_reduce(out, tmp);
598 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
601 felem_mul(tmp, in1, in2);
602 felem_reduce(out, tmp);
605 /* Reduce to unique minimal representation.
606 * Requires 0 <= in < 2*p (always call felem_reduce first) */
607 static void felem_contract(felem out, const felem in)
609 static const int64_t two56 = ((limb) 1) << 56;
610 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
611 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
617 /* Case 1: a = 1 iff in >= 2^224 */
621 tmp[3] &= 0x00ffffffffffffff;
622 /* Case 2: a = 0 iff p <= in < 2^224, i.e.,
623 * the high 128 bits are all 1 and the lower part is non-zero */
624 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
625 (((int64_t)(in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
626 a &= 0x00ffffffffffffff;
627 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
629 /* subtract 2^224 - 2^96 + 1 if a is all-one*/
630 tmp[3] &= a ^ 0xffffffffffffffff;
631 tmp[2] &= a ^ 0xffffffffffffffff;
632 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
635 /* eliminate negative coefficients: if tmp[0] is negative, tmp[1] must
636 * be non-zero, so we only need one step */
641 /* carry 1 -> 2 -> 3 */
642 tmp[2] += tmp[1] >> 56;
643 tmp[1] &= 0x00ffffffffffffff;
645 tmp[3] += tmp[2] >> 56;
646 tmp[2] &= 0x00ffffffffffffff;
648 /* Now 0 <= out < p */
655 /* Zero-check: returns 1 if input is 0, and 0 otherwise.
656 * We know that field elements are reduced to in < 2^225,
657 * so we only need to check three cases: 0, 2^224 - 2^96 + 1,
658 * and 2^225 - 2^97 + 2 */
659 static limb felem_is_zero(const felem in)
661 limb zero, two224m96p1, two225m97p2;
663 zero = in[0] | in[1] | in[2] | in[3];
664 zero = (((int64_t)(zero) - 1) >> 63) & 1;
665 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
666 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
667 two224m96p1 = (((int64_t)(two224m96p1) - 1) >> 63) & 1;
668 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
669 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
670 two225m97p2 = (((int64_t)(two225m97p2) - 1) >> 63) & 1;
671 return (zero | two224m96p1 | two225m97p2);
674 static limb felem_is_zero_int(const felem in)
676 return (int) (felem_is_zero(in) & ((limb)1));
679 /* Invert a field element */
680 /* Computation chain copied from djb's code */
681 static void felem_inv(felem out, const felem in)
683 felem ftmp, ftmp2, ftmp3, ftmp4;
687 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2 */
688 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 1 */
689 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2 */
690 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 1 */
691 felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
692 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
693 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
694 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 1 */
695 felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
696 for (i = 0; i < 5; ++i) /* 2^12 - 2^6 */
698 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
700 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
701 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
702 for (i = 0; i < 11; ++i) /* 2^24 - 2^12 */
704 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
706 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
707 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
708 for (i = 0; i < 23; ++i) /* 2^48 - 2^24 */
710 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
712 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
713 felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
714 for (i = 0; i < 47; ++i) /* 2^96 - 2^48 */
716 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp);
718 felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
719 felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
720 for (i = 0; i < 23; ++i) /* 2^120 - 2^24 */
722 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp);
724 felem_mul(tmp, ftmp2, ftmp4); felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
725 for (i = 0; i < 6; ++i) /* 2^126 - 2^6 */
727 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
729 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^126 - 1 */
730 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^127 - 2 */
731 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^127 - 1 */
732 for (i = 0; i < 97; ++i) /* 2^224 - 2^97 */
734 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
736 felem_mul(tmp, ftmp, ftmp3); felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
739 /* Copy in constant time:
740 * if icopy == 1, copy in to out,
741 * if icopy == 0, copy out to itself. */
743 copy_conditional(felem out, const felem in, limb icopy)
746 /* icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one */
747 const limb copy = -icopy;
748 for (i = 0; i < 4; ++i)
750 const limb tmp = copy & (in[i] ^ out[i]);
755 /******************************************************************************/
756 /* ELLIPTIC CURVE POINT OPERATIONS
758 * Points are represented in Jacobian projective coordinates:
759 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
760 * or to the point at infinity if Z == 0.
765 * Double an elliptic curve point:
766 * (X', Y', Z') = 2 * (X, Y, Z), where
767 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
768 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
769 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
770 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
771 * while x_out == y_in is not (maybe this works, but it's not tested).
774 point_double(felem x_out, felem y_out, felem z_out,
775 const felem x_in, const felem y_in, const felem z_in)
778 felem delta, gamma, beta, alpha, ftmp, ftmp2;
780 felem_assign(ftmp, x_in);
781 felem_assign(ftmp2, x_in);
784 felem_square(tmp, z_in);
785 felem_reduce(delta, tmp);
788 felem_square(tmp, y_in);
789 felem_reduce(gamma, tmp);
792 felem_mul(tmp, x_in, gamma);
793 felem_reduce(beta, tmp);
795 /* alpha = 3*(x-delta)*(x+delta) */
796 felem_diff(ftmp, delta);
797 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
798 felem_sum(ftmp2, delta);
799 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
800 felem_scalar(ftmp2, 3);
801 /* ftmp2[i] < 3 * 2^58 < 2^60 */
802 felem_mul(tmp, ftmp, ftmp2);
803 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
804 felem_reduce(alpha, tmp);
806 /* x' = alpha^2 - 8*beta */
807 felem_square(tmp, alpha);
808 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
809 felem_assign(ftmp, beta);
810 felem_scalar(ftmp, 8);
811 /* ftmp[i] < 8 * 2^57 = 2^60 */
812 felem_diff_128_64(tmp, ftmp);
813 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
814 felem_reduce(x_out, tmp);
816 /* z' = (y + z)^2 - gamma - delta */
817 felem_sum(delta, gamma);
818 /* delta[i] < 2^57 + 2^57 = 2^58 */
819 felem_assign(ftmp, y_in);
820 felem_sum(ftmp, z_in);
821 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
822 felem_square(tmp, ftmp);
823 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
824 felem_diff_128_64(tmp, delta);
825 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
826 felem_reduce(z_out, tmp);
828 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
829 felem_scalar(beta, 4);
830 /* beta[i] < 4 * 2^57 = 2^59 */
831 felem_diff(beta, x_out);
832 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
833 felem_mul(tmp, alpha, beta);
834 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
835 felem_square(tmp2, gamma);
836 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
837 widefelem_scalar(tmp2, 8);
838 /* tmp2[i] < 8 * 2^116 = 2^119 */
839 widefelem_diff(tmp, tmp2);
840 /* tmp[i] < 2^119 + 2^120 < 2^121 */
841 felem_reduce(y_out, tmp);
845 * Add two elliptic curve points:
846 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
847 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
848 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
849 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
850 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
851 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
853 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
856 /* This function is not entirely constant-time:
857 * it includes a branch for checking whether the two input points are equal,
858 * (while not equal to the point at infinity).
859 * This case never happens during single point multiplication,
860 * so there is no timing leak for ECDH or ECDSA signing. */
861 static void point_add(felem x3, felem y3, felem z3,
862 const felem x1, const felem y1, const felem z1,
863 const int mixed, const felem x2, const felem y2, const felem z2)
865 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
867 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
872 felem_square(tmp, z2);
873 felem_reduce(ftmp2, tmp);
876 felem_mul(tmp, ftmp2, z2);
877 felem_reduce(ftmp4, tmp);
879 /* ftmp4 = z2^3*y1 */
880 felem_mul(tmp2, ftmp4, y1);
881 felem_reduce(ftmp4, tmp2);
883 /* ftmp2 = z2^2*x1 */
884 felem_mul(tmp2, ftmp2, x1);
885 felem_reduce(ftmp2, tmp2);
889 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
891 /* ftmp4 = z2^3*y1 */
892 felem_assign(ftmp4, y1);
894 /* ftmp2 = z2^2*x1 */
895 felem_assign(ftmp2, x1);
899 felem_square(tmp, z1);
900 felem_reduce(ftmp, tmp);
903 felem_mul(tmp, ftmp, z1);
904 felem_reduce(ftmp3, tmp);
907 felem_mul(tmp, ftmp3, y2);
908 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
910 /* ftmp3 = z1^3*y2 - z2^3*y1 */
911 felem_diff_128_64(tmp, ftmp4);
912 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
913 felem_reduce(ftmp3, tmp);
916 felem_mul(tmp, ftmp, x2);
917 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
919 /* ftmp = z1^2*x2 - z2^2*x1 */
920 felem_diff_128_64(tmp, ftmp2);
921 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
922 felem_reduce(ftmp, tmp);
924 /* the formulae are incorrect if the points are equal
925 * so we check for this and do doubling if this happens */
926 x_equal = felem_is_zero(ftmp);
927 y_equal = felem_is_zero(ftmp3);
928 z1_is_zero = felem_is_zero(z1);
929 z2_is_zero = felem_is_zero(z2);
930 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
931 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
933 point_double(x3, y3, z3, x1, y1, z1);
940 felem_mul(tmp, z1, z2);
941 felem_reduce(ftmp5, tmp);
945 /* special case z2 = 0 is handled later */
946 felem_assign(ftmp5, z1);
949 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
950 felem_mul(tmp, ftmp, ftmp5);
951 felem_reduce(z_out, tmp);
953 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
954 felem_assign(ftmp5, ftmp);
955 felem_square(tmp, ftmp);
956 felem_reduce(ftmp, tmp);
958 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
959 felem_mul(tmp, ftmp, ftmp5);
960 felem_reduce(ftmp5, tmp);
962 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
963 felem_mul(tmp, ftmp2, ftmp);
964 felem_reduce(ftmp2, tmp);
966 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
967 felem_mul(tmp, ftmp4, ftmp5);
968 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
970 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
971 felem_square(tmp2, ftmp3);
972 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
974 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
975 felem_diff_128_64(tmp2, ftmp5);
976 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
978 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
979 felem_assign(ftmp5, ftmp2);
980 felem_scalar(ftmp5, 2);
981 /* ftmp5[i] < 2 * 2^57 = 2^58 */
984 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
985 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
987 felem_diff_128_64(tmp2, ftmp5);
988 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
989 felem_reduce(x_out, tmp2);
991 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
992 felem_diff(ftmp2, x_out);
993 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
995 /* tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) */
996 felem_mul(tmp2, ftmp3, ftmp2);
997 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1000 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1001 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1003 widefelem_diff(tmp2, tmp);
1004 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1005 felem_reduce(y_out, tmp2);
1007 /* the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1008 * the point at infinity, so we need to check for this separately */
1010 /* if point 1 is at infinity, copy point 2 to output, and vice versa */
1011 copy_conditional(x_out, x2, z1_is_zero);
1012 copy_conditional(x_out, x1, z2_is_zero);
1013 copy_conditional(y_out, y2, z1_is_zero);
1014 copy_conditional(y_out, y1, z2_is_zero);
1015 copy_conditional(z_out, z2, z1_is_zero);
1016 copy_conditional(z_out, z1, z2_is_zero);
1017 felem_assign(x3, x_out);
1018 felem_assign(y3, y_out);
1019 felem_assign(z3, z_out);
1022 /* select_point selects the |idx|th point from a precomputation table and
1023 * copies it to out. */
1024 static void select_point(const u64 idx, unsigned int size, const felem pre_comp[/*size*/][3], felem out[3])
1027 limb *outlimbs = &out[0][0];
1028 memset(outlimbs, 0, 3 * sizeof(felem));
1030 for (i = 0; i < size; i++)
1032 const limb *inlimbs = &pre_comp[i][0][0];
1039 for (j = 0; j < 4 * 3; j++)
1040 outlimbs[j] |= inlimbs[j] & mask;
1044 /* get_bit returns the |i|th bit in |in| */
1045 static char get_bit(const felem_bytearray in, unsigned i)
1049 return (in[i >> 3] >> (i & 7)) & 1;
1052 /* Interleaved point multiplication using precomputed point multiples:
1053 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1054 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1055 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1056 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1057 static void batch_mul(felem x_out, felem y_out, felem z_out,
1058 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1059 const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[2][16][3])
1063 unsigned gen_mul = (g_scalar != NULL);
1064 felem nq[3], tmp[4];
1068 /* set nq to the point at infinity */
1069 memset(nq, 0, 3 * sizeof(felem));
1071 /* Loop over all scalars msb-to-lsb, interleaving additions
1072 * of multiples of the generator (two in each of the last 28 rounds)
1073 * and additions of other points multiples (every 5th round).
1075 skip = 1; /* save two point operations in the first round */
1076 for (i = (num_points ? 220 : 27); i >= 0; --i)
1080 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1082 /* add multiples of the generator */
1083 if (gen_mul && (i <= 27))
1085 /* first, look 28 bits upwards */
1086 bits = get_bit(g_scalar, i + 196) << 3;
1087 bits |= get_bit(g_scalar, i + 140) << 2;
1088 bits |= get_bit(g_scalar, i + 84) << 1;
1089 bits |= get_bit(g_scalar, i + 28);
1090 /* select the point to add, in constant time */
1091 select_point(bits, 16, g_pre_comp[1], tmp);
1095 point_add(nq[0], nq[1], nq[2],
1096 nq[0], nq[1], nq[2],
1097 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1101 memcpy(nq, tmp, 3 * sizeof(felem));
1105 /* second, look at the current position */
1106 bits = get_bit(g_scalar, i + 168) << 3;
1107 bits |= get_bit(g_scalar, i + 112) << 2;
1108 bits |= get_bit(g_scalar, i + 56) << 1;
1109 bits |= get_bit(g_scalar, i);
1110 /* select the point to add, in constant time */
1111 select_point(bits, 16, g_pre_comp[0], tmp);
1112 point_add(nq[0], nq[1], nq[2],
1113 nq[0], nq[1], nq[2],
1114 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1117 /* do other additions every 5 doublings */
1118 if (num_points && (i % 5 == 0))
1120 /* loop over all scalars */
1121 for (num = 0; num < num_points; ++num)
1123 bits = get_bit(scalars[num], i + 4) << 5;
1124 bits |= get_bit(scalars[num], i + 3) << 4;
1125 bits |= get_bit(scalars[num], i + 2) << 3;
1126 bits |= get_bit(scalars[num], i + 1) << 2;
1127 bits |= get_bit(scalars[num], i) << 1;
1128 bits |= get_bit(scalars[num], i - 1);
1129 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1131 /* select the point to add or subtract */
1132 select_point(digit, 17, pre_comp[num], tmp);
1133 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative point */
1134 copy_conditional(tmp[1], tmp[3], sign);
1138 point_add(nq[0], nq[1], nq[2],
1139 nq[0], nq[1], nq[2],
1140 mixed, tmp[0], tmp[1], tmp[2]);
1144 memcpy(nq, tmp, 3 * sizeof(felem));
1150 felem_assign(x_out, nq[0]);
1151 felem_assign(y_out, nq[1]);
1152 felem_assign(z_out, nq[2]);
1155 /******************************************************************************/
1156 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1159 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1161 NISTP224_PRE_COMP *ret = NULL;
1162 ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1165 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1168 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1169 ret->references = 1;
1173 static void *nistp224_pre_comp_dup(void *src_)
1175 NISTP224_PRE_COMP *src = src_;
1177 /* no need to actually copy, these objects never change! */
1178 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1183 static void nistp224_pre_comp_free(void *pre_)
1186 NISTP224_PRE_COMP *pre = pre_;
1191 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1198 static void nistp224_pre_comp_clear_free(void *pre_)
1201 NISTP224_PRE_COMP *pre = pre_;
1206 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1210 OPENSSL_cleanse(pre, sizeof *pre);
1214 /******************************************************************************/
1215 /* OPENSSL EC_METHOD FUNCTIONS
1218 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1221 ret = ec_GFp_simple_group_init(group);
1222 group->a_is_minus3 = 1;
1226 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1227 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1230 BN_CTX *new_ctx = NULL;
1231 BIGNUM *curve_p, *curve_a, *curve_b;
1234 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1236 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1237 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1238 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1239 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1240 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1241 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1242 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1243 (BN_cmp(curve_b, b)))
1245 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1246 EC_R_WRONG_CURVE_PARAMETERS);
1249 group->field_mod_func = BN_nist_mod_224;
1250 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1253 if (new_ctx != NULL)
1254 BN_CTX_free(new_ctx);
1258 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1259 * (X', Y') = (X/Z^2, Y/Z^3) */
1260 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1261 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1263 felem z1, z2, x_in, y_in, x_out, y_out;
1266 if (EC_POINT_is_at_infinity(group, point))
1268 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1269 EC_R_POINT_AT_INFINITY);
1272 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1273 (!BN_to_felem(z1, &point->Z))) return 0;
1275 felem_square(tmp, z2); felem_reduce(z1, tmp);
1276 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1277 felem_contract(x_out, x_in);
1280 if (!felem_to_BN(x, x_out)) {
1281 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1286 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1287 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1288 felem_contract(y_out, y_in);
1291 if (!felem_to_BN(y, y_out)) {
1292 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1300 static void make_points_affine(size_t num, felem points[/*num*/][3], felem tmp_felems[/*num+1*/])
1302 /* Runs in constant time, unless an input is the point at infinity
1303 * (which normally shouldn't happen). */
1304 ec_GFp_nistp_points_make_affine_internal(
1309 (void (*)(void *)) felem_one,
1310 (int (*)(const void *)) felem_is_zero_int,
1311 (void (*)(void *, const void *)) felem_assign,
1312 (void (*)(void *, const void *)) felem_square_reduce,
1313 (void (*)(void *, const void *, const void *)) felem_mul_reduce,
1314 (void (*)(void *, const void *)) felem_inv,
1315 (void (*)(void *, const void *)) felem_contract);
1318 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1319 * Result is stored in r (r can equal one of the inputs). */
1320 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1321 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1322 const BIGNUM *scalars[], BN_CTX *ctx)
1328 BN_CTX *new_ctx = NULL;
1329 BIGNUM *x, *y, *z, *tmp_scalar;
1330 felem_bytearray g_secret;
1331 felem_bytearray *secrets = NULL;
1332 felem (*pre_comp)[17][3] = NULL;
1333 felem *tmp_felems = NULL;
1334 felem_bytearray tmp;
1336 int have_pre_comp = 0;
1337 size_t num_points = num;
1338 felem x_in, y_in, z_in, x_out, y_out, z_out;
1339 NISTP224_PRE_COMP *pre = NULL;
1340 const felem (*g_pre_comp)[16][3] = NULL;
1341 EC_POINT *generator = NULL;
1342 const EC_POINT *p = NULL;
1343 const BIGNUM *p_scalar = NULL;
1346 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1348 if (((x = BN_CTX_get(ctx)) == NULL) ||
1349 ((y = BN_CTX_get(ctx)) == NULL) ||
1350 ((z = BN_CTX_get(ctx)) == NULL) ||
1351 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1356 pre = EC_EX_DATA_get_data(group->extra_data,
1357 nistp224_pre_comp_dup, nistp224_pre_comp_free,
1358 nistp224_pre_comp_clear_free);
1360 /* we have precomputation, try to use it */
1361 g_pre_comp = (const felem (*)[16][3]) pre->g_pre_comp;
1363 /* try to use the standard precomputation */
1364 g_pre_comp = &gmul[0];
1365 generator = EC_POINT_new(group);
1366 if (generator == NULL)
1368 /* get the generator from precomputation */
1369 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1370 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1371 !felem_to_BN(z, g_pre_comp[0][1][2]))
1373 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1376 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1377 generator, x, y, z, ctx))
1379 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1380 /* precomputation matches generator */
1383 /* we don't have valid precomputation:
1384 * treat the generator as a random point */
1385 num_points = num_points + 1;
1390 if (num_points >= 3)
1392 /* unless we precompute multiples for just one or two points,
1393 * converting those into affine form is time well spent */
1396 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1397 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1399 tmp_felems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1400 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL)))
1402 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1406 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1407 * i.e., they contribute nothing to the linear combination */
1408 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1409 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1410 for (i = 0; i < num_points; ++i)
1415 p = EC_GROUP_get0_generator(group);
1419 /* the i^th point */
1422 p_scalar = scalars[i];
1424 if ((p_scalar != NULL) && (p != NULL))
1426 /* reduce scalar to 0 <= scalar < 2^224 */
1427 if ((BN_num_bits(p_scalar) > 224) || (BN_is_negative(p_scalar)))
1429 /* this is an unusual input, and we don't guarantee
1430 * constant-timeness */
1431 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1433 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1436 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1439 num_bytes = BN_bn2bin(p_scalar, tmp);
1440 flip_endian(secrets[i], tmp, num_bytes);
1441 /* precompute multiples */
1442 if ((!BN_to_felem(x_out, &p->X)) ||
1443 (!BN_to_felem(y_out, &p->Y)) ||
1444 (!BN_to_felem(z_out, &p->Z))) goto err;
1445 felem_assign(pre_comp[i][1][0], x_out);
1446 felem_assign(pre_comp[i][1][1], y_out);
1447 felem_assign(pre_comp[i][1][2], z_out);
1448 for (j = 2; j <= 16; ++j)
1453 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1454 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1455 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1460 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1461 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1467 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1470 /* the scalar for the generator */
1471 if ((scalar != NULL) && (have_pre_comp))
1473 memset(g_secret, 0, sizeof g_secret);
1474 /* reduce scalar to 0 <= scalar < 2^224 */
1475 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar)))
1477 /* this is an unusual input, and we don't guarantee
1478 * constant-timeness */
1479 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1481 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1484 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1487 num_bytes = BN_bn2bin(scalar, tmp);
1488 flip_endian(g_secret, tmp, num_bytes);
1489 /* do the multiplication with generator precomputation*/
1490 batch_mul(x_out, y_out, z_out,
1491 (const felem_bytearray (*)) secrets, num_points,
1493 mixed, (const felem (*)[17][3]) pre_comp,
1497 /* do the multiplication without generator precomputation */
1498 batch_mul(x_out, y_out, z_out,
1499 (const felem_bytearray (*)) secrets, num_points,
1500 NULL, mixed, (const felem (*)[17][3]) pre_comp, NULL);
1501 /* reduce the output to its unique minimal representation */
1502 felem_contract(x_in, x_out);
1503 felem_contract(y_in, y_out);
1504 felem_contract(z_in, z_out);
1505 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1506 (!felem_to_BN(z, z_in)))
1508 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1511 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1515 if (generator != NULL)
1516 EC_POINT_free(generator);
1517 if (new_ctx != NULL)
1518 BN_CTX_free(new_ctx);
1519 if (secrets != NULL)
1520 OPENSSL_free(secrets);
1521 if (pre_comp != NULL)
1522 OPENSSL_free(pre_comp);
1523 if (tmp_felems != NULL)
1524 OPENSSL_free(tmp_felems);
1528 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1531 NISTP224_PRE_COMP *pre = NULL;
1533 BN_CTX *new_ctx = NULL;
1535 EC_POINT *generator = NULL;
1536 felem tmp_felems[32];
1538 /* throw away old precomputation */
1539 EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1540 nistp224_pre_comp_free, nistp224_pre_comp_clear_free);
1542 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1544 if (((x = BN_CTX_get(ctx)) == NULL) ||
1545 ((y = BN_CTX_get(ctx)) == NULL))
1547 /* get the generator */
1548 if (group->generator == NULL) goto err;
1549 generator = EC_POINT_new(group);
1550 if (generator == NULL)
1552 BN_bin2bn(nistp224_curve_params[3], sizeof (felem_bytearray), x);
1553 BN_bin2bn(nistp224_curve_params[4], sizeof (felem_bytearray), y);
1554 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1556 if ((pre = nistp224_pre_comp_new()) == NULL)
1558 /* if the generator is the standard one, use built-in precomputation */
1559 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1561 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1565 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1566 (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1567 (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1569 /* compute 2^56*G, 2^112*G, 2^168*G for the first table,
1570 * 2^28*G, 2^84*G, 2^140*G, 2^196*G for the second one
1572 for (i = 1; i <= 8; i <<= 1)
1575 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
1576 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1577 for (j = 0; j < 27; ++j)
1580 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
1581 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1586 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
1587 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1588 for (j = 0; j < 27; ++j)
1591 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
1592 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
1595 for (i = 0; i < 2; i++)
1597 /* g_pre_comp[i][0] is the point at infinity */
1598 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1599 /* the remaining multiples */
1600 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1602 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1603 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1604 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1605 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1606 pre->g_pre_comp[i][2][2]);
1607 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1609 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1610 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1611 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1612 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1613 pre->g_pre_comp[i][2][2]);
1614 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1616 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1617 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1618 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1619 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1620 pre->g_pre_comp[i][4][2]);
1621 /* 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G */
1623 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1624 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1625 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1626 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1627 pre->g_pre_comp[i][2][2]);
1628 for (j = 1; j < 8; ++j)
1630 /* odd multiples: add G resp. 2^28*G */
1632 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1],
1633 pre->g_pre_comp[i][2*j+1][2], pre->g_pre_comp[i][2*j][0],
1634 pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
1635 0, pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1636 pre->g_pre_comp[i][1][2]);
1639 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1641 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1642 nistp224_pre_comp_free, nistp224_pre_comp_clear_free))
1648 if (generator != NULL)
1649 EC_POINT_free(generator);
1650 if (new_ctx != NULL)
1651 BN_CTX_free(new_ctx);
1653 nistp224_pre_comp_free(pre);
1657 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1659 if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1660 nistp224_pre_comp_free, nistp224_pre_comp_clear_free)
1668 static void *dummy=&dummy;