crypto/ec/ecp_nistp224.c

   1 /*
   2  * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
   3  *
   4  * Licensed under the Apache License 2.0 (the "License").  You may not use
   5  * this file except in compliance with the License.  You can obtain a copy
   6  * in the file LICENSE in the source distribution or at
   7  * https://www.openssl.org/source/license.html
   8  */
   9
  10 /* Copyright 2011 Google Inc.
  11  *
  12  * Licensed under the Apache License, Version 2.0 (the "License");
  13  *
  14  * you may not use this file except in compliance with the License.
  15  * You may obtain a copy of the License at
  16  *
  17  *     http://www.apache.org/licenses/LICENSE-2.0
  18  *
  19  *  Unless required by applicable law or agreed to in writing, software
  20  *  distributed under the License is distributed on an "AS IS" BASIS,
  21  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  22  *  See the License for the specific language governing permissions and
  23  *  limitations under the License.
  24  */
  25
  26 /*
  27  * ECDSA low level APIs are deprecated for public use, but still ok for
  28  * internal use.
  29  */
  30 #include "internal/deprecated.h"
  31
  32 /*
  33  * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
  34  *
  35  * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
  36  * and Adam Langley's public domain 64-bit C implementation of curve25519
  37  */
  38
  39 #include <openssl/opensslconf.h>
  40
  41 #include <stdint.h>
  42 #include <string.h>
  43 #include <openssl/err.h>
  44 #include "ec_local.h"
  45
  46 #if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
  47   /* even with gcc, the typedef won't work for 32-bit platforms */
  48 typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
  49                                  * platforms */
  50 #else
  51 # error "Your compiler doesn't appear to support 128-bit integer types"
  52 #endif
  53
  54 typedef uint8_t u8;
  55 typedef uint64_t u64;
  56
  57 /******************************************************************************/
  58 /*-
  59  * INTERNAL REPRESENTATION OF FIELD ELEMENTS
  60  *
  61  * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
  62  * using 64-bit coefficients called 'limbs',
  63  * and sometimes (for multiplication results) as
  64  * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
  65  * using 128-bit coefficients called 'widelimbs'.
  66  * A 4-limb representation is an 'felem';
  67  * a 7-widelimb representation is a 'widefelem'.
  68  * Even within felems, bits of adjacent limbs overlap, and we don't always
  69  * reduce the representations: we ensure that inputs to each felem
  70  * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
  71  * and fit into a 128-bit word without overflow. The coefficients are then
  72  * again partially reduced to obtain an felem satisfying a_i < 2^57.
  73  * We only reduce to the unique minimal representation at the end of the
  74  * computation.
  75  */
  76
  77 typedef uint64_t limb;
  78 typedef uint128_t widelimb;
  79
  80 typedef limb felem[4];
  81 typedef widelimb widefelem[7];
  82
  83 /*
  84  * Field element represented as a byte array. 28*8 = 224 bits is also the
  85  * group order size for the elliptic curve, and we also use this type for
  86  * scalars for point multiplication.
  87  */
  88 typedef u8 felem_bytearray[28];
  89
  90 static const felem_bytearray nistp224_curve_params[5] = {
  91     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
  92      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
  93      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
  94     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
  95      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
  96      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
  97     {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
  98      0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
  99      0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
 100     {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
 101      0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
 102      0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
 103     {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
 104      0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
 105      0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
 106 };
 107
 108 /*-
 109  * Precomputed multiples of the standard generator
 110  * Points are given in coordinates (X, Y, Z) where Z normally is 1
 111  * (0 for the point at infinity).
 112  * For each field element, slice a_0 is word 0, etc.
 113  *
 114  * The table has 2 * 16 elements, starting with the following:
 115  * index | bits    | point
 116  * ------+---------+------------------------------
 117  *     0 | 0 0 0 0 | 0G
 118  *     1 | 0 0 0 1 | 1G
 119  *     2 | 0 0 1 0 | 2^56G
 120  *     3 | 0 0 1 1 | (2^56 + 1)G
 121  *     4 | 0 1 0 0 | 2^112G
 122  *     5 | 0 1 0 1 | (2^112 + 1)G
 123  *     6 | 0 1 1 0 | (2^112 + 2^56)G
 124  *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
 125  *     8 | 1 0 0 0 | 2^168G
 126  *     9 | 1 0 0 1 | (2^168 + 1)G
 127  *    10 | 1 0 1 0 | (2^168 + 2^56)G
 128  *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
 129  *    12 | 1 1 0 0 | (2^168 + 2^112)G
 130  *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
 131  *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
 132  *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
 133  * followed by a copy of this with each element multiplied by 2^28.
 134  *
 135  * The reason for this is so that we can clock bits into four different
 136  * locations when doing simple scalar multiplies against the base point,
 137  * and then another four locations using the second 16 elements.
 138  */
 139 static const felem gmul[2][16][3] = {
 140 {{{0, 0, 0, 0},
 141   {0, 0, 0, 0},
 142   {0, 0, 0, 0}},
 143  {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
 144   {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
 145   {1, 0, 0, 0}},
 146  {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
 147   {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
 148   {1, 0, 0, 0}},
 149  {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
 150   {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
 151   {1, 0, 0, 0}},
 152  {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
 153   {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
 154   {1, 0, 0, 0}},
 155  {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
 156   {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
 157   {1, 0, 0, 0}},
 158  {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
 159   {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
 160   {1, 0, 0, 0}},
 161  {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
 162   {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
 163   {1, 0, 0, 0}},
 164  {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
 165   {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
 166   {1, 0, 0, 0}},
 167  {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
 168   {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
 169   {1, 0, 0, 0}},
 170  {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
 171   {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
 172   {1, 0, 0, 0}},
 173  {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
 174   {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
 175   {1, 0, 0, 0}},
 176  {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
 177   {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
 178   {1, 0, 0, 0}},
 179  {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
 180   {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
 181   {1, 0, 0, 0}},
 182  {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
 183   {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
 184   {1, 0, 0, 0}},
 185  {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
 186   {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
 187   {1, 0, 0, 0}}},
 188 {{{0, 0, 0, 0},
 189   {0, 0, 0, 0},
 190   {0, 0, 0, 0}},
 191  {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
 192   {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
 193   {1, 0, 0, 0}},
 194  {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
 195   {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
 196   {1, 0, 0, 0}},
 197  {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
 198   {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
 199   {1, 0, 0, 0}},
 200  {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
 201   {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
 202   {1, 0, 0, 0}},
 203  {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
 204   {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
 205   {1, 0, 0, 0}},
 206  {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
 207   {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
 208   {1, 0, 0, 0}},
 209  {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
 210   {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
 211   {1, 0, 0, 0}},
 212  {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
 213   {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
 214   {1, 0, 0, 0}},
 215  {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
 216   {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
 217   {1, 0, 0, 0}},
 218  {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
 219   {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
 220   {1, 0, 0, 0}},
 221  {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
 222   {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
 223   {1, 0, 0, 0}},
 224  {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
 225   {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
 226   {1, 0, 0, 0}},
 227  {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
 228   {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
 229   {1, 0, 0, 0}},
 230  {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
 231   {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
 232   {1, 0, 0, 0}},
 233  {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
 234   {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
 235   {1, 0, 0, 0}}}
 236 };
 237
 238 /* Precomputation for the group generator. */
 239 struct nistp224_pre_comp_st {
 240     felem g_pre_comp[2][16][3];
 241     CRYPTO_REF_COUNT references;
 242     CRYPTO_RWLOCK *lock;
 243 };
 244
 245 const EC_METHOD *EC_GFp_nistp224_method(void)
 246 {
 247     static const EC_METHOD ret = {
 248         EC_FLAGS_DEFAULT_OCT,
 249         NID_X9_62_prime_field,
 250         ec_GFp_nistp224_group_init,
 251         ec_GFp_simple_group_finish,
 252         ec_GFp_simple_group_clear_finish,
 253         ec_GFp_nist_group_copy,
 254         ec_GFp_nistp224_group_set_curve,
 255         ec_GFp_simple_group_get_curve,
 256         ec_GFp_simple_group_get_degree,
 257         ec_group_simple_order_bits,
 258         ec_GFp_simple_group_check_discriminant,
 259         ec_GFp_simple_point_init,
 260         ec_GFp_simple_point_finish,
 261         ec_GFp_simple_point_clear_finish,
 262         ec_GFp_simple_point_copy,
 263         ec_GFp_simple_point_set_to_infinity,
 264         ec_GFp_simple_set_Jprojective_coordinates_GFp,
 265         ec_GFp_simple_get_Jprojective_coordinates_GFp,
 266         ec_GFp_simple_point_set_affine_coordinates,
 267         ec_GFp_nistp224_point_get_affine_coordinates,
 268         0 /* point_set_compressed_coordinates */ ,
 269         0 /* point2oct */ ,
 270         0 /* oct2point */ ,
 271         ec_GFp_simple_add,
 272         ec_GFp_simple_dbl,
 273         ec_GFp_simple_invert,
 274         ec_GFp_simple_is_at_infinity,
 275         ec_GFp_simple_is_on_curve,
 276         ec_GFp_simple_cmp,
 277         ec_GFp_simple_make_affine,
 278         ec_GFp_simple_points_make_affine,
 279         ec_GFp_nistp224_points_mul,
 280         ec_GFp_nistp224_precompute_mult,
 281         ec_GFp_nistp224_have_precompute_mult,
 282         ec_GFp_nist_field_mul,
 283         ec_GFp_nist_field_sqr,
 284         0 /* field_div */ ,
 285         ec_GFp_simple_field_inv,
 286         0 /* field_encode */ ,
 287         0 /* field_decode */ ,
 288         0,                      /* field_set_to_one */
 289         ec_key_simple_priv2oct,
 290         ec_key_simple_oct2priv,
 291         0, /* set private */
 292         ec_key_simple_generate_key,
 293         ec_key_simple_check_key,
 294         ec_key_simple_generate_public_key,
 295         0, /* keycopy */
 296         0, /* keyfinish */
 297         ecdh_simple_compute_key,
 298         ecdsa_simple_sign_setup,
 299         ecdsa_simple_sign_sig,
 300         ecdsa_simple_verify_sig,
 301         0, /* field_inverse_mod_ord */
 302         0, /* blind_coordinates */
 303         0, /* ladder_pre */
 304         0, /* ladder_step */
 305         0  /* ladder_post */
 306     };
 307
 308     return &ret;
 309 }
 310
 311 /*
 312  * Helper functions to convert field elements to/from internal representation
 313  */
 314 static void bin28_to_felem(felem out, const u8 in[28])
 315 {
 316     out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
 317     out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
 318     out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
 319     out[3] = (*((const uint64_t *)(in+20))) >> 8;
 320 }
 321
 322 static void felem_to_bin28(u8 out[28], const felem in)
 323 {
 324     unsigned i;
 325     for (i = 0; i < 7; ++i) {
 326         out[i] = in[0] >> (8 * i);
 327         out[i + 7] = in[1] >> (8 * i);
 328         out[i + 14] = in[2] >> (8 * i);
 329         out[i + 21] = in[3] >> (8 * i);
 330     }
 331 }
 332
 333 /* From OpenSSL BIGNUM to internal representation */
 334 static int BN_to_felem(felem out, const BIGNUM *bn)
 335 {
 336     felem_bytearray b_out;
 337     int num_bytes;
 338
 339     if (BN_is_negative(bn)) {
 340         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 341         return 0;
 342     }
 343     num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
 344     if (num_bytes < 0) {
 345         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 346         return 0;
 347     }
 348     bin28_to_felem(out, b_out);
 349     return 1;
 350 }
 351
 352 /* From internal representation to OpenSSL BIGNUM */
 353 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
 354 {
 355     felem_bytearray b_out;
 356     felem_to_bin28(b_out, in);
 357     return BN_lebin2bn(b_out, sizeof(b_out), out);
 358 }
 359
 360 /******************************************************************************/
 361 /*-
 362  *                              FIELD OPERATIONS
 363  *
 364  * Field operations, using the internal representation of field elements.
 365  * NB! These operations are specific to our point multiplication and cannot be
 366  * expected to be correct in general - e.g., multiplication with a large scalar
 367  * will cause an overflow.
 368  *
 369  */
 370
 371 static void felem_one(felem out)
 372 {
 373     out[0] = 1;
 374     out[1] = 0;
 375     out[2] = 0;
 376     out[3] = 0;
 377 }
 378
 379 static void felem_assign(felem out, const felem in)
 380 {
 381     out[0] = in[0];
 382     out[1] = in[1];
 383     out[2] = in[2];
 384     out[3] = in[3];
 385 }
 386
 387 /* Sum two field elements: out += in */
 388 static void felem_sum(felem out, const felem in)
 389 {
 390     out[0] += in[0];
 391     out[1] += in[1];
 392     out[2] += in[2];
 393     out[3] += in[3];
 394 }
 395
 396 /* Subtract field elements: out -= in */
 397 /* Assumes in[i] < 2^57 */
 398 static void felem_diff(felem out, const felem in)
 399 {
 400     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 401     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 402     static const limb two58m42m2 = (((limb) 1) << 58) -
 403         (((limb) 1) << 42) - (((limb) 1) << 2);
 404
 405     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 406     out[0] += two58p2;
 407     out[1] += two58m42m2;
 408     out[2] += two58m2;
 409     out[3] += two58m2;
 410
 411     out[0] -= in[0];
 412     out[1] -= in[1];
 413     out[2] -= in[2];
 414     out[3] -= in[3];
 415 }
 416
 417 /* Subtract in unreduced 128-bit mode: out -= in */
 418 /* Assumes in[i] < 2^119 */
 419 static void widefelem_diff(widefelem out, const widefelem in)
 420 {
 421     static const widelimb two120 = ((widelimb) 1) << 120;
 422     static const widelimb two120m64 = (((widelimb) 1) << 120) -
 423         (((widelimb) 1) << 64);
 424     static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
 425         (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
 426
 427     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 428     out[0] += two120;
 429     out[1] += two120m64;
 430     out[2] += two120m64;
 431     out[3] += two120;
 432     out[4] += two120m104m64;
 433     out[5] += two120m64;
 434     out[6] += two120m64;
 435
 436     out[0] -= in[0];
 437     out[1] -= in[1];
 438     out[2] -= in[2];
 439     out[3] -= in[3];
 440     out[4] -= in[4];
 441     out[5] -= in[5];
 442     out[6] -= in[6];
 443 }
 444
 445 /* Subtract in mixed mode: out128 -= in64 */
 446 /* in[i] < 2^63 */
 447 static void felem_diff_128_64(widefelem out, const felem in)
 448 {
 449     static const widelimb two64p8 = (((widelimb) 1) << 64) +
 450         (((widelimb) 1) << 8);
 451     static const widelimb two64m8 = (((widelimb) 1) << 64) -
 452         (((widelimb) 1) << 8);
 453     static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
 454         (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
 455
 456     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 457     out[0] += two64p8;
 458     out[1] += two64m48m8;
 459     out[2] += two64m8;
 460     out[3] += two64m8;
 461
 462     out[0] -= in[0];
 463     out[1] -= in[1];
 464     out[2] -= in[2];
 465     out[3] -= in[3];
 466 }
 467
 468 /*
 469  * Multiply a field element by a scalar: out = out * scalar The scalars we
 470  * actually use are small, so results fit without overflow
 471  */
 472 static void felem_scalar(felem out, const limb scalar)
 473 {
 474     out[0] *= scalar;
 475     out[1] *= scalar;
 476     out[2] *= scalar;
 477     out[3] *= scalar;
 478 }
 479
 480 /*
 481  * Multiply an unreduced field element by a scalar: out = out * scalar The
 482  * scalars we actually use are small, so results fit without overflow
 483  */
 484 static void widefelem_scalar(widefelem out, const widelimb scalar)
 485 {
 486     out[0] *= scalar;
 487     out[1] *= scalar;
 488     out[2] *= scalar;
 489     out[3] *= scalar;
 490     out[4] *= scalar;
 491     out[5] *= scalar;
 492     out[6] *= scalar;
 493 }
 494
 495 /* Square a field element: out = in^2 */
 496 static void felem_square(widefelem out, const felem in)
 497 {
 498     limb tmp0, tmp1, tmp2;
 499     tmp0 = 2 * in[0];
 500     tmp1 = 2 * in[1];
 501     tmp2 = 2 * in[2];
 502     out[0] = ((widelimb) in[0]) * in[0];
 503     out[1] = ((widelimb) in[0]) * tmp1;
 504     out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
 505     out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
 506     out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
 507     out[5] = ((widelimb) in[3]) * tmp2;
 508     out[6] = ((widelimb) in[3]) * in[3];
 509 }
 510
 511 /* Multiply two field elements: out = in1 * in2 */
 512 static void felem_mul(widefelem out, const felem in1, const felem in2)
 513 {
 514     out[0] = ((widelimb) in1[0]) * in2[0];
 515     out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
 516     out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
 517              ((widelimb) in1[2]) * in2[0];
 518     out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
 519              ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
 520     out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
 521              ((widelimb) in1[3]) * in2[1];
 522     out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
 523     out[6] = ((widelimb) in1[3]) * in2[3];
 524 }
 525
 526 /*-
 527  * Reduce seven 128-bit coefficients to four 64-bit coefficients.
 528  * Requires in[i] < 2^126,
 529  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
 530 static void felem_reduce(felem out, const widefelem in)
 531 {
 532     static const widelimb two127p15 = (((widelimb) 1) << 127) +
 533         (((widelimb) 1) << 15);
 534     static const widelimb two127m71 = (((widelimb) 1) << 127) -
 535         (((widelimb) 1) << 71);
 536     static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
 537         (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
 538     widelimb output[5];
 539
 540     /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
 541     output[0] = in[0] + two127p15;
 542     output[1] = in[1] + two127m71m55;
 543     output[2] = in[2] + two127m71;
 544     output[3] = in[3];
 545     output[4] = in[4];
 546
 547     /* Eliminate in[4], in[5], in[6] */
 548     output[4] += in[6] >> 16;
 549     output[3] += (in[6] & 0xffff) << 40;
 550     output[2] -= in[6];
 551
 552     output[3] += in[5] >> 16;
 553     output[2] += (in[5] & 0xffff) << 40;
 554     output[1] -= in[5];
 555
 556     output[2] += output[4] >> 16;
 557     output[1] += (output[4] & 0xffff) << 40;
 558     output[0] -= output[4];
 559
 560     /* Carry 2 -> 3 -> 4 */
 561     output[3] += output[2] >> 56;
 562     output[2] &= 0x00ffffffffffffff;
 563
 564     output[4] = output[3] >> 56;
 565     output[3] &= 0x00ffffffffffffff;
 566
 567     /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
 568
 569     /* Eliminate output[4] */
 570     output[2] += output[4] >> 16;
 571     /* output[2] < 2^56 + 2^56 = 2^57 */
 572     output[1] += (output[4] & 0xffff) << 40;
 573     output[0] -= output[4];
 574
 575     /* Carry 0 -> 1 -> 2 -> 3 */
 576     output[1] += output[0] >> 56;
 577     out[0] = output[0] & 0x00ffffffffffffff;
 578
 579     output[2] += output[1] >> 56;
 580     /* output[2] < 2^57 + 2^72 */
 581     out[1] = output[1] & 0x00ffffffffffffff;
 582     output[3] += output[2] >> 56;
 583     /* output[3] <= 2^56 + 2^16 */
 584     out[2] = output[2] & 0x00ffffffffffffff;
 585
 586     /*-
 587      * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
 588      * out[3] <= 2^56 + 2^16 (due to final carry),
 589      * so out < 2*p
 590      */
 591     out[3] = output[3];
 592 }
 593
 594 static void felem_square_reduce(felem out, const felem in)
 595 {
 596     widefelem tmp;
 597     felem_square(tmp, in);
 598     felem_reduce(out, tmp);
 599 }
 600
 601 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
 602 {
 603     widefelem tmp;
 604     felem_mul(tmp, in1, in2);
 605     felem_reduce(out, tmp);
 606 }
 607
 608 /*
 609  * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
 610  * call felem_reduce first)
 611  */
 612 static void felem_contract(felem out, const felem in)
 613 {
 614     static const int64_t two56 = ((limb) 1) << 56;
 615     /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
 616     /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
 617     int64_t tmp[4], a;
 618     tmp[0] = in[0];
 619     tmp[1] = in[1];
 620     tmp[2] = in[2];
 621     tmp[3] = in[3];
 622     /* Case 1: a = 1 iff in >= 2^224 */
 623     a = (in[3] >> 56);
 624     tmp[0] -= a;
 625     tmp[1] += a << 40;
 626     tmp[3] &= 0x00ffffffffffffff;
 627     /*
 628      * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
 629      * and the lower part is non-zero
 630      */
 631     a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
 632         (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
 633     a &= 0x00ffffffffffffff;
 634     /* turn a into an all-one mask (if a = 0) or an all-zero mask */
 635     a = (a - 1) >> 63;
 636     /* subtract 2^224 - 2^96 + 1 if a is all-one */
 637     tmp[3] &= a ^ 0xffffffffffffffff;
 638     tmp[2] &= a ^ 0xffffffffffffffff;
 639     tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
 640     tmp[0] -= 1 & a;
 641
 642     /*
 643      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
 644      * non-zero, so we only need one step
 645      */
 646     a = tmp[0] >> 63;
 647     tmp[0] += two56 & a;
 648     tmp[1] -= 1 & a;
 649
 650     /* carry 1 -> 2 -> 3 */
 651     tmp[2] += tmp[1] >> 56;
 652     tmp[1] &= 0x00ffffffffffffff;
 653
 654     tmp[3] += tmp[2] >> 56;
 655     tmp[2] &= 0x00ffffffffffffff;
 656
 657     /* Now 0 <= out < p */
 658     out[0] = tmp[0];
 659     out[1] = tmp[1];
 660     out[2] = tmp[2];
 661     out[3] = tmp[3];
 662 }
 663
 664 /*
 665  * Get negative value: out = -in
 666  * Requires in[i] < 2^63,
 667  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
 668  */
 669 static void felem_neg(felem out, const felem in)
 670 {
 671     widefelem tmp;
 672
 673     memset(tmp, 0, sizeof(tmp));
 674     felem_diff_128_64(tmp, in);
 675     felem_reduce(out, tmp);
 676 }
 677
 678 /*
 679  * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
 680  * elements are reduced to in < 2^225, so we only need to check three cases:
 681  * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
 682  */
 683 static limb felem_is_zero(const felem in)
 684 {
 685     limb zero, two224m96p1, two225m97p2;
 686
 687     zero = in[0] | in[1] | in[2] | in[3];
 688     zero = (((int64_t) (zero) - 1) >> 63) & 1;
 689     two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
 690         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
 691     two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
 692     two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
 693         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
 694     two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
 695     return (zero | two224m96p1 | two225m97p2);
 696 }
 697
 698 static int felem_is_zero_int(const void *in)
 699 {
 700     return (int)(felem_is_zero(in) & ((limb) 1));
 701 }
 702
 703 /* Invert a field element */
 704 /* Computation chain copied from djb's code */
 705 static void felem_inv(felem out, const felem in)
 706 {
 707     felem ftmp, ftmp2, ftmp3, ftmp4;
 708     widefelem tmp;
 709     unsigned i;
 710
 711     felem_square(tmp, in);
 712     felem_reduce(ftmp, tmp);    /* 2 */
 713     felem_mul(tmp, in, ftmp);
 714     felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
 715     felem_square(tmp, ftmp);
 716     felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
 717     felem_mul(tmp, in, ftmp);
 718     felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
 719     felem_square(tmp, ftmp);
 720     felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
 721     felem_square(tmp, ftmp2);
 722     felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
 723     felem_square(tmp, ftmp2);
 724     felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
 725     felem_mul(tmp, ftmp2, ftmp);
 726     felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
 727     felem_square(tmp, ftmp);
 728     felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
 729     for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
 730         felem_square(tmp, ftmp2);
 731         felem_reduce(ftmp2, tmp);
 732     }
 733     felem_mul(tmp, ftmp2, ftmp);
 734     felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
 735     felem_square(tmp, ftmp2);
 736     felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
 737     for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
 738         felem_square(tmp, ftmp3);
 739         felem_reduce(ftmp3, tmp);
 740     }
 741     felem_mul(tmp, ftmp3, ftmp2);
 742     felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
 743     felem_square(tmp, ftmp2);
 744     felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
 745     for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
 746         felem_square(tmp, ftmp3);
 747         felem_reduce(ftmp3, tmp);
 748     }
 749     felem_mul(tmp, ftmp3, ftmp2);
 750     felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
 751     felem_square(tmp, ftmp3);
 752     felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
 753     for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
 754         felem_square(tmp, ftmp4);
 755         felem_reduce(ftmp4, tmp);
 756     }
 757     felem_mul(tmp, ftmp3, ftmp4);
 758     felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
 759     felem_square(tmp, ftmp3);
 760     felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
 761     for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
 762         felem_square(tmp, ftmp4);
 763         felem_reduce(ftmp4, tmp);
 764     }
 765     felem_mul(tmp, ftmp2, ftmp4);
 766     felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
 767     for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
 768         felem_square(tmp, ftmp2);
 769         felem_reduce(ftmp2, tmp);
 770     }
 771     felem_mul(tmp, ftmp2, ftmp);
 772     felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
 773     felem_square(tmp, ftmp);
 774     felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
 775     felem_mul(tmp, ftmp, in);
 776     felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
 777     for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
 778         felem_square(tmp, ftmp);
 779         felem_reduce(ftmp, tmp);
 780     }
 781     felem_mul(tmp, ftmp, ftmp3);
 782     felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
 783 }
 784
 785 /*
 786  * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
 787  * out to itself.
 788  */
 789 static void copy_conditional(felem out, const felem in, limb icopy)
 790 {
 791     unsigned i;
 792     /*
 793      * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
 794      */
 795     const limb copy = -icopy;
 796     for (i = 0; i < 4; ++i) {
 797         const limb tmp = copy & (in[i] ^ out[i]);
 798         out[i] ^= tmp;
 799     }
 800 }
 801
 802 /******************************************************************************/
 803 /*-
 804  *                       ELLIPTIC CURVE POINT OPERATIONS
 805  *
 806  * Points are represented in Jacobian projective coordinates:
 807  * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
 808  * or to the point at infinity if Z == 0.
 809  *
 810  */
 811
 812 /*-
 813  * Double an elliptic curve point:
 814  * (X', Y', Z') = 2 * (X, Y, Z), where
 815  * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
 816  * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
 817  * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
 818  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
 819  * while x_out == y_in is not (maybe this works, but it's not tested).
 820  */
 821 static void
 822 point_double(felem x_out, felem y_out, felem z_out,
 823              const felem x_in, const felem y_in, const felem z_in)
 824 {
 825     widefelem tmp, tmp2;
 826     felem delta, gamma, beta, alpha, ftmp, ftmp2;
 827
 828     felem_assign(ftmp, x_in);
 829     felem_assign(ftmp2, x_in);
 830
 831     /* delta = z^2 */
 832     felem_square(tmp, z_in);
 833     felem_reduce(delta, tmp);
 834
 835     /* gamma = y^2 */
 836     felem_square(tmp, y_in);
 837     felem_reduce(gamma, tmp);
 838
 839     /* beta = x*gamma */
 840     felem_mul(tmp, x_in, gamma);
 841     felem_reduce(beta, tmp);
 842
 843     /* alpha = 3*(x-delta)*(x+delta) */
 844     felem_diff(ftmp, delta);
 845     /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
 846     felem_sum(ftmp2, delta);
 847     /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
 848     felem_scalar(ftmp2, 3);
 849     /* ftmp2[i] < 3 * 2^58 < 2^60 */
 850     felem_mul(tmp, ftmp, ftmp2);
 851     /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
 852     felem_reduce(alpha, tmp);
 853
 854     /* x' = alpha^2 - 8*beta */
 855     felem_square(tmp, alpha);
 856     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 857     felem_assign(ftmp, beta);
 858     felem_scalar(ftmp, 8);
 859     /* ftmp[i] < 8 * 2^57 = 2^60 */
 860     felem_diff_128_64(tmp, ftmp);
 861     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 862     felem_reduce(x_out, tmp);
 863
 864     /* z' = (y + z)^2 - gamma - delta */
 865     felem_sum(delta, gamma);
 866     /* delta[i] < 2^57 + 2^57 = 2^58 */
 867     felem_assign(ftmp, y_in);
 868     felem_sum(ftmp, z_in);
 869     /* ftmp[i] < 2^57 + 2^57 = 2^58 */
 870     felem_square(tmp, ftmp);
 871     /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
 872     felem_diff_128_64(tmp, delta);
 873     /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
 874     felem_reduce(z_out, tmp);
 875
 876     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
 877     felem_scalar(beta, 4);
 878     /* beta[i] < 4 * 2^57 = 2^59 */
 879     felem_diff(beta, x_out);
 880     /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
 881     felem_mul(tmp, alpha, beta);
 882     /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
 883     felem_square(tmp2, gamma);
 884     /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
 885     widefelem_scalar(tmp2, 8);
 886     /* tmp2[i] < 8 * 2^116 = 2^119 */
 887     widefelem_diff(tmp, tmp2);
 888     /* tmp[i] < 2^119 + 2^120 < 2^121 */
 889     felem_reduce(y_out, tmp);
 890 }
 891
 892 /*-
 893  * Add two elliptic curve points:
 894  * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
 895  * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
 896  * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
 897  * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
 898  *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
 899  * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
 900  *
 901  * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
 902  */
 903
 904 /*
 905  * This function is not entirely constant-time: it includes a branch for
 906  * checking whether the two input points are equal, (while not equal to the
 907  * point at infinity). This case never happens during single point
 908  * multiplication, so there is no timing leak for ECDH or ECDSA signing.
 909  */
 910 static void point_add(felem x3, felem y3, felem z3,
 911                       const felem x1, const felem y1, const felem z1,
 912                       const int mixed, const felem x2, const felem y2,
 913                       const felem z2)
 914 {
 915     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
 916     widefelem tmp, tmp2;
 917     limb z1_is_zero, z2_is_zero, x_equal, y_equal;
 918     limb points_equal;
 919
 920     if (!mixed) {
 921         /* ftmp2 = z2^2 */
 922         felem_square(tmp, z2);
 923         felem_reduce(ftmp2, tmp);
 924
 925         /* ftmp4 = z2^3 */
 926         felem_mul(tmp, ftmp2, z2);
 927         felem_reduce(ftmp4, tmp);
 928
 929         /* ftmp4 = z2^3*y1 */
 930         felem_mul(tmp2, ftmp4, y1);
 931         felem_reduce(ftmp4, tmp2);
 932
 933         /* ftmp2 = z2^2*x1 */
 934         felem_mul(tmp2, ftmp2, x1);
 935         felem_reduce(ftmp2, tmp2);
 936     } else {
 937         /*
 938          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
 939          */
 940
 941         /* ftmp4 = z2^3*y1 */
 942         felem_assign(ftmp4, y1);
 943
 944         /* ftmp2 = z2^2*x1 */
 945         felem_assign(ftmp2, x1);
 946     }
 947
 948     /* ftmp = z1^2 */
 949     felem_square(tmp, z1);
 950     felem_reduce(ftmp, tmp);
 951
 952     /* ftmp3 = z1^3 */
 953     felem_mul(tmp, ftmp, z1);
 954     felem_reduce(ftmp3, tmp);
 955
 956     /* tmp = z1^3*y2 */
 957     felem_mul(tmp, ftmp3, y2);
 958     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 959
 960     /* ftmp3 = z1^3*y2 - z2^3*y1 */
 961     felem_diff_128_64(tmp, ftmp4);
 962     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 963     felem_reduce(ftmp3, tmp);
 964
 965     /* tmp = z1^2*x2 */
 966     felem_mul(tmp, ftmp, x2);
 967     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 968
 969     /* ftmp = z1^2*x2 - z2^2*x1 */
 970     felem_diff_128_64(tmp, ftmp2);
 971     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 972     felem_reduce(ftmp, tmp);
 973
 974     /*
 975      * The formulae are incorrect if the points are equal, in affine coordinates
 976      * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
 977      * happens.
 978      *
 979      * We use bitwise operations to avoid potential side-channels introduced by
 980      * the short-circuiting behaviour of boolean operators.
 981      */
 982     x_equal = felem_is_zero(ftmp);
 983     y_equal = felem_is_zero(ftmp3);
 984     /*
 985      * The special case of either point being the point at infinity (z1 and/or
 986      * z2 are zero), is handled separately later on in this function, so we
 987      * avoid jumping to point_double here in those special cases.
 988      */
 989     z1_is_zero = felem_is_zero(z1);
 990     z2_is_zero = felem_is_zero(z2);
 991
 992     /*
 993      * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
 994      * specific implementation `felem_is_zero()` returns truth as `0x1`
 995      * (rather than `0xff..ff`).
 996      *
 997      * This implies that `~true` in this implementation becomes
 998      * `0xff..fe` (rather than `0x0`): for this reason, to be used in
 999      * the if expression, we mask out only the last bit in the next
1000      * line.
1001      */
1002     points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
1003
1004     if (points_equal) {
1005         /*
1006          * This is obviously not constant-time but, as mentioned before, this
1007          * case never happens during single point multiplication, so there is no
1008          * timing leak for ECDH or ECDSA signing.
1009          */
1010         point_double(x3, y3, z3, x1, y1, z1);
1011         return;
1012     }
1013
1014     /* ftmp5 = z1*z2 */
1015     if (!mixed) {
1016         felem_mul(tmp, z1, z2);
1017         felem_reduce(ftmp5, tmp);
1018     } else {
1019         /* special case z2 = 0 is handled later */
1020         felem_assign(ftmp5, z1);
1021     }
1022
1023     /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1024     felem_mul(tmp, ftmp, ftmp5);
1025     felem_reduce(z_out, tmp);
1026
1027     /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1028     felem_assign(ftmp5, ftmp);
1029     felem_square(tmp, ftmp);
1030     felem_reduce(ftmp, tmp);
1031
1032     /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1033     felem_mul(tmp, ftmp, ftmp5);
1034     felem_reduce(ftmp5, tmp);
1035
1036     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1037     felem_mul(tmp, ftmp2, ftmp);
1038     felem_reduce(ftmp2, tmp);
1039
1040     /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1041     felem_mul(tmp, ftmp4, ftmp5);
1042     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1043
1044     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1045     felem_square(tmp2, ftmp3);
1046     /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1047
1048     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1049     felem_diff_128_64(tmp2, ftmp5);
1050     /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1051
1052     /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1053     felem_assign(ftmp5, ftmp2);
1054     felem_scalar(ftmp5, 2);
1055     /* ftmp5[i] < 2 * 2^57 = 2^58 */
1056
1057     /*-
1058      * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1059      *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1060      */
1061     felem_diff_128_64(tmp2, ftmp5);
1062     /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1063     felem_reduce(x_out, tmp2);
1064
1065     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1066     felem_diff(ftmp2, x_out);
1067     /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1068
1069     /*
1070      * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1071      */
1072     felem_mul(tmp2, ftmp3, ftmp2);
1073     /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1074
1075     /*-
1076      * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1077      *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1078      */
1079     widefelem_diff(tmp2, tmp);
1080     /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1081     felem_reduce(y_out, tmp2);
1082
1083     /*
1084      * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1085      * the point at infinity, so we need to check for this separately
1086      */
1087
1088     /*
1089      * if point 1 is at infinity, copy point 2 to output, and vice versa
1090      */
1091     copy_conditional(x_out, x2, z1_is_zero);
1092     copy_conditional(x_out, x1, z2_is_zero);
1093     copy_conditional(y_out, y2, z1_is_zero);
1094     copy_conditional(y_out, y1, z2_is_zero);
1095     copy_conditional(z_out, z2, z1_is_zero);
1096     copy_conditional(z_out, z1, z2_is_zero);
1097     felem_assign(x3, x_out);
1098     felem_assign(y3, y_out);
1099     felem_assign(z3, z_out);
1100 }
1101
1102 /*
1103  * select_point selects the |idx|th point from a precomputation table and
1104  * copies it to out.
1105  * The pre_comp array argument should be size of |size| argument
1106  */
1107 static void select_point(const u64 idx, unsigned int size,
1108                          const felem pre_comp[][3], felem out[3])
1109 {
1110     unsigned i, j;
1111     limb *outlimbs = &out[0][0];
1112
1113     memset(out, 0, sizeof(*out) * 3);
1114     for (i = 0; i < size; i++) {
1115         const limb *inlimbs = &pre_comp[i][0][0];
1116         u64 mask = i ^ idx;
1117         mask |= mask >> 4;
1118         mask |= mask >> 2;
1119         mask |= mask >> 1;
1120         mask &= 1;
1121         mask--;
1122         for (j = 0; j < 4 * 3; j++)
1123             outlimbs[j] |= inlimbs[j] & mask;
1124     }
1125 }
1126
1127 /* get_bit returns the |i|th bit in |in| */
1128 static char get_bit(const felem_bytearray in, unsigned i)
1129 {
1130     if (i >= 224)
1131         return 0;
1132     return (in[i >> 3] >> (i & 7)) & 1;
1133 }
1134
1135 /*
1136  * Interleaved point multiplication using precomputed point multiples: The
1137  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1138  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1139  * generator, using certain (large) precomputed multiples in g_pre_comp.
1140  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1141  */
1142 static void batch_mul(felem x_out, felem y_out, felem z_out,
1143                       const felem_bytearray scalars[],
1144                       const unsigned num_points, const u8 *g_scalar,
1145                       const int mixed, const felem pre_comp[][17][3],
1146                       const felem g_pre_comp[2][16][3])
1147 {
1148     int i, skip;
1149     unsigned num;
1150     unsigned gen_mul = (g_scalar != NULL);
1151     felem nq[3], tmp[4];
1152     u64 bits;
1153     u8 sign, digit;
1154
1155     /* set nq to the point at infinity */
1156     memset(nq, 0, sizeof(nq));
1157
1158     /*
1159      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1160      * of the generator (two in each of the last 28 rounds) and additions of
1161      * other points multiples (every 5th round).
1162      */
1163     skip = 1;                   /* save two point operations in the first
1164                                  * round */
1165     for (i = (num_points ? 220 : 27); i >= 0; --i) {
1166         /* double */
1167         if (!skip)
1168             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1169
1170         /* add multiples of the generator */
1171         if (gen_mul && (i <= 27)) {
1172             /* first, look 28 bits upwards */
1173             bits = get_bit(g_scalar, i + 196) << 3;
1174             bits |= get_bit(g_scalar, i + 140) << 2;
1175             bits |= get_bit(g_scalar, i + 84) << 1;
1176             bits |= get_bit(g_scalar, i + 28);
1177             /* select the point to add, in constant time */
1178             select_point(bits, 16, g_pre_comp[1], tmp);
1179
1180             if (!skip) {
1181                 /* value 1 below is argument for "mixed" */
1182                 point_add(nq[0], nq[1], nq[2],
1183                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1184             } else {
1185                 memcpy(nq, tmp, 3 * sizeof(felem));
1186                 skip = 0;
1187             }
1188
1189             /* second, look at the current position */
1190             bits = get_bit(g_scalar, i + 168) << 3;
1191             bits |= get_bit(g_scalar, i + 112) << 2;
1192             bits |= get_bit(g_scalar, i + 56) << 1;
1193             bits |= get_bit(g_scalar, i);
1194             /* select the point to add, in constant time */
1195             select_point(bits, 16, g_pre_comp[0], tmp);
1196             point_add(nq[0], nq[1], nq[2],
1197                       nq[0], nq[1], nq[2],
1198                       1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1199         }
1200
1201         /* do other additions every 5 doublings */
1202         if (num_points && (i % 5 == 0)) {
1203             /* loop over all scalars */
1204             for (num = 0; num < num_points; ++num) {
1205                 bits = get_bit(scalars[num], i + 4) << 5;
1206                 bits |= get_bit(scalars[num], i + 3) << 4;
1207                 bits |= get_bit(scalars[num], i + 2) << 3;
1208                 bits |= get_bit(scalars[num], i + 1) << 2;
1209                 bits |= get_bit(scalars[num], i) << 1;
1210                 bits |= get_bit(scalars[num], i - 1);
1211                 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1212
1213                 /* select the point to add or subtract */
1214                 select_point(digit, 17, pre_comp[num], tmp);
1215                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1216                                             * point */
1217                 copy_conditional(tmp[1], tmp[3], sign);
1218
1219                 if (!skip) {
1220                     point_add(nq[0], nq[1], nq[2],
1221                               nq[0], nq[1], nq[2],
1222                               mixed, tmp[0], tmp[1], tmp[2]);
1223                 } else {
1224                     memcpy(nq, tmp, 3 * sizeof(felem));
1225                     skip = 0;
1226                 }
1227             }
1228         }
1229     }
1230     felem_assign(x_out, nq[0]);
1231     felem_assign(y_out, nq[1]);
1232     felem_assign(z_out, nq[2]);
1233 }
1234
1235 /******************************************************************************/
1236 /*
1237  * FUNCTIONS TO MANAGE PRECOMPUTATION
1238  */
1239
1240 static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1241 {
1242     NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1243
1244     if (!ret) {
1245         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1246         return ret;
1247     }
1248
1249     ret->references = 1;
1250
1251     ret->lock = CRYPTO_THREAD_lock_new();
1252     if (ret->lock == NULL) {
1253         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1254         OPENSSL_free(ret);
1255         return NULL;
1256     }
1257     return ret;
1258 }
1259
1260 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1261 {
1262     int i;
1263     if (p != NULL)
1264         CRYPTO_UP_REF(&p->references, &i, p->lock);
1265     return p;
1266 }
1267
1268 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1269 {
1270     int i;
1271
1272     if (p == NULL)
1273         return;
1274
1275     CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1276     REF_PRINT_COUNT("EC_nistp224", x);
1277     if (i > 0)
1278         return;
1279     REF_ASSERT_ISNT(i < 0);
1280
1281     CRYPTO_THREAD_lock_free(p->lock);
1282     OPENSSL_free(p);
1283 }
1284
1285 /******************************************************************************/
1286 /*
1287  * OPENSSL EC_METHOD FUNCTIONS
1288  */
1289
1290 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1291 {
1292     int ret;
1293     ret = ec_GFp_simple_group_init(group);
1294     group->a_is_minus3 = 1;
1295     return ret;
1296 }
1297
1298 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1299                                     const BIGNUM *a, const BIGNUM *b,
1300                                     BN_CTX *ctx)
1301 {
1302     int ret = 0;
1303     BIGNUM *curve_p, *curve_a, *curve_b;
1304 #ifndef FIPS_MODE
1305     BN_CTX *new_ctx = NULL;
1306
1307     if (ctx == NULL)
1308         ctx = new_ctx = BN_CTX_new();
1309 #endif
1310     if (ctx == NULL)
1311         return 0;
1312
1313     BN_CTX_start(ctx);
1314     curve_p = BN_CTX_get(ctx);
1315     curve_a = BN_CTX_get(ctx);
1316     curve_b = BN_CTX_get(ctx);
1317     if (curve_b == NULL)
1318         goto err;
1319     BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1320     BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1321     BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1322     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1323         ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1324               EC_R_WRONG_CURVE_PARAMETERS);
1325         goto err;
1326     }
1327     group->field_mod_func = BN_nist_mod_224;
1328     ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1329  err:
1330     BN_CTX_end(ctx);
1331 #ifndef FIPS_MODE
1332     BN_CTX_free(new_ctx);
1333 #endif
1334     return ret;
1335 }
1336
1337 /*
1338  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1339  * (X/Z^2, Y/Z^3)
1340  */
1341 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1342                                                  const EC_POINT *point,
1343                                                  BIGNUM *x, BIGNUM *y,
1344                                                  BN_CTX *ctx)
1345 {
1346     felem z1, z2, x_in, y_in, x_out, y_out;
1347     widefelem tmp;
1348
1349     if (EC_POINT_is_at_infinity(group, point)) {
1350         ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1351               EC_R_POINT_AT_INFINITY);
1352         return 0;
1353     }
1354     if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1355         (!BN_to_felem(z1, point->Z)))
1356         return 0;
1357     felem_inv(z2, z1);
1358     felem_square(tmp, z2);
1359     felem_reduce(z1, tmp);
1360     felem_mul(tmp, x_in, z1);
1361     felem_reduce(x_in, tmp);
1362     felem_contract(x_out, x_in);
1363     if (x != NULL) {
1364         if (!felem_to_BN(x, x_out)) {
1365             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1366                   ERR_R_BN_LIB);
1367             return 0;
1368         }
1369     }
1370     felem_mul(tmp, z1, z2);
1371     felem_reduce(z1, tmp);
1372     felem_mul(tmp, y_in, z1);
1373     felem_reduce(y_in, tmp);
1374     felem_contract(y_out, y_in);
1375     if (y != NULL) {
1376         if (!felem_to_BN(y, y_out)) {
1377             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1378                   ERR_R_BN_LIB);
1379             return 0;
1380         }
1381     }
1382     return 1;
1383 }
1384
1385 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1386                                felem tmp_felems[ /* num+1 */ ])
1387 {
1388     /*
1389      * Runs in constant time, unless an input is the point at infinity (which
1390      * normally shouldn't happen).
1391      */
1392     ec_GFp_nistp_points_make_affine_internal(num,
1393                                              points,
1394                                              sizeof(felem),
1395                                              tmp_felems,
1396                                              (void (*)(void *))felem_one,
1397                                              felem_is_zero_int,
1398                                              (void (*)(void *, const void *))
1399                                              felem_assign,
1400                                              (void (*)(void *, const void *))
1401                                              felem_square_reduce, (void (*)
1402                                                                    (void *,
1403                                                                     const void
1404                                                                     *,
1405                                                                     const void
1406                                                                     *))
1407                                              felem_mul_reduce,
1408                                              (void (*)(void *, const void *))
1409                                              felem_inv,
1410                                              (void (*)(void *, const void *))
1411                                              felem_contract);
1412 }
1413
1414 /*
1415  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1416  * values Result is stored in r (r can equal one of the inputs).
1417  */
1418 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1419                                const BIGNUM *scalar, size_t num,
1420                                const EC_POINT *points[],
1421                                const BIGNUM *scalars[], BN_CTX *ctx)
1422 {
1423     int ret = 0;
1424     int j;
1425     unsigned i;
1426     int mixed = 0;
1427     BIGNUM *x, *y, *z, *tmp_scalar;
1428     felem_bytearray g_secret;
1429     felem_bytearray *secrets = NULL;
1430     felem (*pre_comp)[17][3] = NULL;
1431     felem *tmp_felems = NULL;
1432     int num_bytes;
1433     int have_pre_comp = 0;
1434     size_t num_points = num;
1435     felem x_in, y_in, z_in, x_out, y_out, z_out;
1436     NISTP224_PRE_COMP *pre = NULL;
1437     const felem(*g_pre_comp)[16][3] = NULL;
1438     EC_POINT *generator = NULL;
1439     const EC_POINT *p = NULL;
1440     const BIGNUM *p_scalar = NULL;
1441
1442     BN_CTX_start(ctx);
1443     x = BN_CTX_get(ctx);
1444     y = BN_CTX_get(ctx);
1445     z = BN_CTX_get(ctx);
1446     tmp_scalar = BN_CTX_get(ctx);
1447     if (tmp_scalar == NULL)
1448         goto err;
1449
1450     if (scalar != NULL) {
1451         pre = group->pre_comp.nistp224;
1452         if (pre)
1453             /* we have precomputation, try to use it */
1454             g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1455         else
1456             /* try to use the standard precomputation */
1457             g_pre_comp = &gmul[0];
1458         generator = EC_POINT_new(group);
1459         if (generator == NULL)
1460             goto err;
1461         /* get the generator from precomputation */
1462         if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1463             !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1464             !felem_to_BN(z, g_pre_comp[0][1][2])) {
1465             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1466             goto err;
1467         }
1468         if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1469                                                       generator, x, y, z,
1470                                                       ctx))
1471             goto err;
1472         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1473             /* precomputation matches generator */
1474             have_pre_comp = 1;
1475         else
1476             /*
1477              * we don't have valid precomputation: treat the generator as a
1478              * random point
1479              */
1480             num_points = num_points + 1;
1481     }
1482
1483     if (num_points > 0) {
1484         if (num_points >= 3) {
1485             /*
1486              * unless we precompute multiples for just one or two points,
1487              * converting those into affine form is time well spent
1488              */
1489             mixed = 1;
1490         }
1491         secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1492         pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1493         if (mixed)
1494             tmp_felems =
1495                 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1496         if ((secrets == NULL) || (pre_comp == NULL)
1497             || (mixed && (tmp_felems == NULL))) {
1498             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1499             goto err;
1500         }
1501
1502         /*
1503          * we treat NULL scalars as 0, and NULL points as points at infinity,
1504          * i.e., they contribute nothing to the linear combination
1505          */
1506         for (i = 0; i < num_points; ++i) {
1507             if (i == num) {
1508                 /* the generator */
1509                 p = EC_GROUP_get0_generator(group);
1510                 p_scalar = scalar;
1511             } else {
1512                 /* the i^th point */
1513                 p = points[i];
1514                 p_scalar = scalars[i];
1515             }
1516             if ((p_scalar != NULL) && (p != NULL)) {
1517                 /* reduce scalar to 0 <= scalar < 2^224 */
1518                 if ((BN_num_bits(p_scalar) > 224)
1519                     || (BN_is_negative(p_scalar))) {
1520                     /*
1521                      * this is an unusual input, and we don't guarantee
1522                      * constant-timeness
1523                      */
1524                     if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1525                         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1526                         goto err;
1527                     }
1528                     num_bytes = BN_bn2lebinpad(tmp_scalar,
1529                                                secrets[i], sizeof(secrets[i]));
1530                 } else {
1531                     num_bytes = BN_bn2lebinpad(p_scalar,
1532                                                secrets[i], sizeof(secrets[i]));
1533                 }
1534                 if (num_bytes < 0) {
1535                     ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1536                     goto err;
1537                 }
1538                 /* precompute multiples */
1539                 if ((!BN_to_felem(x_out, p->X)) ||
1540                     (!BN_to_felem(y_out, p->Y)) ||
1541                     (!BN_to_felem(z_out, p->Z)))
1542                     goto err;
1543                 felem_assign(pre_comp[i][1][0], x_out);
1544                 felem_assign(pre_comp[i][1][1], y_out);
1545                 felem_assign(pre_comp[i][1][2], z_out);
1546                 for (j = 2; j <= 16; ++j) {
1547                     if (j & 1) {
1548                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1549                                   pre_comp[i][j][2], pre_comp[i][1][0],
1550                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,
1551                                   pre_comp[i][j - 1][0],
1552                                   pre_comp[i][j - 1][1],
1553                                   pre_comp[i][j - 1][2]);
1554                     } else {
1555                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1556                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],
1557                                      pre_comp[i][j / 2][1],
1558                                      pre_comp[i][j / 2][2]);
1559                     }
1560                 }
1561             }
1562         }
1563         if (mixed)
1564             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1565     }
1566
1567     /* the scalar for the generator */
1568     if ((scalar != NULL) && (have_pre_comp)) {
1569         memset(g_secret, 0, sizeof(g_secret));
1570         /* reduce scalar to 0 <= scalar < 2^224 */
1571         if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1572             /*
1573              * this is an unusual input, and we don't guarantee
1574              * constant-timeness
1575              */
1576             if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1577                 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1578                 goto err;
1579             }
1580             num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1581         } else {
1582             num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1583         }
1584         /* do the multiplication with generator precomputation */
1585         batch_mul(x_out, y_out, z_out,
1586                   (const felem_bytearray(*))secrets, num_points,
1587                   g_secret,
1588                   mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1589     } else {
1590         /* do the multiplication without generator precomputation */
1591         batch_mul(x_out, y_out, z_out,
1592                   (const felem_bytearray(*))secrets, num_points,
1593                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1594     }
1595     /* reduce the output to its unique minimal representation */
1596     felem_contract(x_in, x_out);
1597     felem_contract(y_in, y_out);
1598     felem_contract(z_in, z_out);
1599     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1600         (!felem_to_BN(z, z_in))) {
1601         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1602         goto err;
1603     }
1604     ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1605
1606  err:
1607     BN_CTX_end(ctx);
1608     EC_POINT_free(generator);
1609     OPENSSL_free(secrets);
1610     OPENSSL_free(pre_comp);
1611     OPENSSL_free(tmp_felems);
1612     return ret;
1613 }
1614
1615 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1616 {
1617     int ret = 0;
1618     NISTP224_PRE_COMP *pre = NULL;
1619     int i, j;
1620     BIGNUM *x, *y;
1621     EC_POINT *generator = NULL;
1622     felem tmp_felems[32];
1623 #ifndef FIPS_MODE
1624     BN_CTX *new_ctx = NULL;
1625 #endif
1626
1627     /* throw away old precomputation */
1628     EC_pre_comp_free(group);
1629
1630 #ifndef FIPS_MODE
1631     if (ctx == NULL)
1632         ctx = new_ctx = BN_CTX_new();
1633 #endif
1634     if (ctx == NULL)
1635         return 0;
1636
1637     BN_CTX_start(ctx);
1638     x = BN_CTX_get(ctx);
1639     y = BN_CTX_get(ctx);
1640     if (y == NULL)
1641         goto err;
1642     /* get the generator */
1643     if (group->generator == NULL)
1644         goto err;
1645     generator = EC_POINT_new(group);
1646     if (generator == NULL)
1647         goto err;
1648     BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1649     BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1650     if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1651         goto err;
1652     if ((pre = nistp224_pre_comp_new()) == NULL)
1653         goto err;
1654     /*
1655      * if the generator is the standard one, use built-in precomputation
1656      */
1657     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1658         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1659         goto done;
1660     }
1661     if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1662         (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1663         (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1664         goto err;
1665     /*
1666      * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1667      * 2^140*G, 2^196*G for the second one
1668      */
1669     for (i = 1; i <= 8; i <<= 1) {
1670         point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1671                      pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1672                      pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1673         for (j = 0; j < 27; ++j) {
1674             point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1675                          pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1676                          pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1677         }
1678         if (i == 8)
1679             break;
1680         point_double(pre->g_pre_comp[0][2 * i][0],
1681                      pre->g_pre_comp[0][2 * i][1],
1682                      pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1683                      pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1684         for (j = 0; j < 27; ++j) {
1685             point_double(pre->g_pre_comp[0][2 * i][0],
1686                          pre->g_pre_comp[0][2 * i][1],
1687                          pre->g_pre_comp[0][2 * i][2],
1688                          pre->g_pre_comp[0][2 * i][0],
1689                          pre->g_pre_comp[0][2 * i][1],
1690                          pre->g_pre_comp[0][2 * i][2]);
1691         }
1692     }
1693     for (i = 0; i < 2; i++) {
1694         /* g_pre_comp[i][0] is the point at infinity */
1695         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1696         /* the remaining multiples */
1697         /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1698         point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1699                   pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1700                   pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1701                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1702                   pre->g_pre_comp[i][2][2]);
1703         /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1704         point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1705                   pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1706                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1707                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1708                   pre->g_pre_comp[i][2][2]);
1709         /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1710         point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1711                   pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1712                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1713                   0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1714                   pre->g_pre_comp[i][4][2]);
1715         /*
1716          * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1717          */
1718         point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1719                   pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1720                   pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1721                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1722                   pre->g_pre_comp[i][2][2]);
1723         for (j = 1; j < 8; ++j) {
1724             /* odd multiples: add G resp. 2^28*G */
1725             point_add(pre->g_pre_comp[i][2 * j + 1][0],
1726                       pre->g_pre_comp[i][2 * j + 1][1],
1727                       pre->g_pre_comp[i][2 * j + 1][2],
1728                       pre->g_pre_comp[i][2 * j][0],
1729                       pre->g_pre_comp[i][2 * j][1],
1730                       pre->g_pre_comp[i][2 * j][2], 0,
1731                       pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1732                       pre->g_pre_comp[i][1][2]);
1733         }
1734     }
1735     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1736
1737  done:
1738     SETPRECOMP(group, nistp224, pre);
1739     pre = NULL;
1740     ret = 1;
1741  err:
1742     BN_CTX_end(ctx);
1743     EC_POINT_free(generator);
1744 #ifndef FIPS_MODE
1745     BN_CTX_free(new_ctx);
1746 #endif
1747     EC_nistp224_pre_comp_free(pre);
1748     return ret;
1749 }
1750
1751 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1752 {
1753     return HAVEPRECOMP(group, nistp224);
1754 }