crypto/ec/ecp_nistp224.c

   1 /*
   2  * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
   3  *
   4  * Licensed under the Apache License 2.0 (the "License").  You may not use
   5  * this file except in compliance with the License.  You can obtain a copy
   6  * in the file LICENSE in the source distribution or at
   7  * https://www.openssl.org/source/license.html
   8  */
   9
  10 /* Copyright 2011 Google Inc.
  11  *
  12  * Licensed under the Apache License, Version 2.0 (the "License");
  13  *
  14  * you may not use this file except in compliance with the License.
  15  * You may obtain a copy of the License at
  16  *
  17  *     http://www.apache.org/licenses/LICENSE-2.0
  18  *
  19  *  Unless required by applicable law or agreed to in writing, software
  20  *  distributed under the License is distributed on an "AS IS" BASIS,
  21  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  22  *  See the License for the specific language governing permissions and
  23  *  limitations under the License.
  24  */
  25
  26 /*
  27  * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
  28  *
  29  * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
  30  * and Adam Langley's public domain 64-bit C implementation of curve25519
  31  */
  32
  33 #include <openssl/opensslconf.h>
  34 #ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
  35 NON_EMPTY_TRANSLATION_UNIT
  36 #else
  37
  38 # include <stdint.h>
  39 # include <string.h>
  40 # include <openssl/err.h>
  41 # include "ec_local.h"
  42
  43 # if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
  44   /* even with gcc, the typedef won't work for 32-bit platforms */
  45 typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
  46                                  * platforms */
  47 # else
  48 #  error "Your compiler doesn't appear to support 128-bit integer types"
  49 # endif
  50
  51 typedef uint8_t u8;
  52 typedef uint64_t u64;
  53
  54 /******************************************************************************/
  55 /*-
  56  * INTERNAL REPRESENTATION OF FIELD ELEMENTS
  57  *
  58  * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
  59  * using 64-bit coefficients called 'limbs',
  60  * and sometimes (for multiplication results) as
  61  * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
  62  * using 128-bit coefficients called 'widelimbs'.
  63  * A 4-limb representation is an 'felem';
  64  * a 7-widelimb representation is a 'widefelem'.
  65  * Even within felems, bits of adjacent limbs overlap, and we don't always
  66  * reduce the representations: we ensure that inputs to each felem
  67  * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
  68  * and fit into a 128-bit word without overflow. The coefficients are then
  69  * again partially reduced to obtain an felem satisfying a_i < 2^57.
  70  * We only reduce to the unique minimal representation at the end of the
  71  * computation.
  72  */
  73
  74 typedef uint64_t limb;
  75 typedef uint128_t widelimb;
  76
  77 typedef limb felem[4];
  78 typedef widelimb widefelem[7];
  79
  80 /*
  81  * Field element represented as a byte array. 28*8 = 224 bits is also the
  82  * group order size for the elliptic curve, and we also use this type for
  83  * scalars for point multiplication.
  84  */
  85 typedef u8 felem_bytearray[28];
  86
  87 static const felem_bytearray nistp224_curve_params[5] = {
  88     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
  89      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
  90      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
  91     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
  92      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
  93      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
  94     {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
  95      0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
  96      0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
  97     {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
  98      0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
  99      0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
 100     {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
 101      0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
 102      0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
 103 };
 104
 105 /*-
 106  * Precomputed multiples of the standard generator
 107  * Points are given in coordinates (X, Y, Z) where Z normally is 1
 108  * (0 for the point at infinity).
 109  * For each field element, slice a_0 is word 0, etc.
 110  *
 111  * The table has 2 * 16 elements, starting with the following:
 112  * index | bits    | point
 113  * ------+---------+------------------------------
 114  *     0 | 0 0 0 0 | 0G
 115  *     1 | 0 0 0 1 | 1G
 116  *     2 | 0 0 1 0 | 2^56G
 117  *     3 | 0 0 1 1 | (2^56 + 1)G
 118  *     4 | 0 1 0 0 | 2^112G
 119  *     5 | 0 1 0 1 | (2^112 + 1)G
 120  *     6 | 0 1 1 0 | (2^112 + 2^56)G
 121  *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
 122  *     8 | 1 0 0 0 | 2^168G
 123  *     9 | 1 0 0 1 | (2^168 + 1)G
 124  *    10 | 1 0 1 0 | (2^168 + 2^56)G
 125  *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
 126  *    12 | 1 1 0 0 | (2^168 + 2^112)G
 127  *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
 128  *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
 129  *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
 130  * followed by a copy of this with each element multiplied by 2^28.
 131  *
 132  * The reason for this is so that we can clock bits into four different
 133  * locations when doing simple scalar multiplies against the base point,
 134  * and then another four locations using the second 16 elements.
 135  */
 136 static const felem gmul[2][16][3] = {
 137 {{{0, 0, 0, 0},
 138   {0, 0, 0, 0},
 139   {0, 0, 0, 0}},
 140  {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
 141   {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
 142   {1, 0, 0, 0}},
 143  {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
 144   {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
 145   {1, 0, 0, 0}},
 146  {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
 147   {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
 148   {1, 0, 0, 0}},
 149  {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
 150   {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
 151   {1, 0, 0, 0}},
 152  {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
 153   {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
 154   {1, 0, 0, 0}},
 155  {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
 156   {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
 157   {1, 0, 0, 0}},
 158  {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
 159   {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
 160   {1, 0, 0, 0}},
 161  {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
 162   {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
 163   {1, 0, 0, 0}},
 164  {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
 165   {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
 166   {1, 0, 0, 0}},
 167  {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
 168   {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
 169   {1, 0, 0, 0}},
 170  {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
 171   {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
 172   {1, 0, 0, 0}},
 173  {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
 174   {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
 175   {1, 0, 0, 0}},
 176  {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
 177   {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
 178   {1, 0, 0, 0}},
 179  {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
 180   {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
 181   {1, 0, 0, 0}},
 182  {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
 183   {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
 184   {1, 0, 0, 0}}},
 185 {{{0, 0, 0, 0},
 186   {0, 0, 0, 0},
 187   {0, 0, 0, 0}},
 188  {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
 189   {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
 190   {1, 0, 0, 0}},
 191  {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
 192   {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
 193   {1, 0, 0, 0}},
 194  {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
 195   {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
 196   {1, 0, 0, 0}},
 197  {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
 198   {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
 199   {1, 0, 0, 0}},
 200  {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
 201   {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
 202   {1, 0, 0, 0}},
 203  {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
 204   {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
 205   {1, 0, 0, 0}},
 206  {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
 207   {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
 208   {1, 0, 0, 0}},
 209  {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
 210   {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
 211   {1, 0, 0, 0}},
 212  {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
 213   {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
 214   {1, 0, 0, 0}},
 215  {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
 216   {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
 217   {1, 0, 0, 0}},
 218  {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
 219   {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
 220   {1, 0, 0, 0}},
 221  {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
 222   {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
 223   {1, 0, 0, 0}},
 224  {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
 225   {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
 226   {1, 0, 0, 0}},
 227  {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
 228   {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
 229   {1, 0, 0, 0}},
 230  {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
 231   {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
 232   {1, 0, 0, 0}}}
 233 };
 234
 235 /* Precomputation for the group generator. */
 236 struct nistp224_pre_comp_st {
 237     felem g_pre_comp[2][16][3];
 238     CRYPTO_REF_COUNT references;
 239     CRYPTO_RWLOCK *lock;
 240 };
 241
 242 const EC_METHOD *EC_GFp_nistp224_method(void)
 243 {
 244     static const EC_METHOD ret = {
 245         EC_FLAGS_DEFAULT_OCT,
 246         NID_X9_62_prime_field,
 247         ec_GFp_nistp224_group_init,
 248         ec_GFp_simple_group_finish,
 249         ec_GFp_simple_group_clear_finish,
 250         ec_GFp_nist_group_copy,
 251         ec_GFp_nistp224_group_set_curve,
 252         ec_GFp_simple_group_get_curve,
 253         ec_GFp_simple_group_get_degree,
 254         ec_group_simple_order_bits,
 255         ec_GFp_simple_group_check_discriminant,
 256         ec_GFp_simple_point_init,
 257         ec_GFp_simple_point_finish,
 258         ec_GFp_simple_point_clear_finish,
 259         ec_GFp_simple_point_copy,
 260         ec_GFp_simple_point_set_to_infinity,
 261         ec_GFp_simple_set_Jprojective_coordinates_GFp,
 262         ec_GFp_simple_get_Jprojective_coordinates_GFp,
 263         ec_GFp_simple_point_set_affine_coordinates,
 264         ec_GFp_nistp224_point_get_affine_coordinates,
 265         0 /* point_set_compressed_coordinates */ ,
 266         0 /* point2oct */ ,
 267         0 /* oct2point */ ,
 268         ec_GFp_simple_add,
 269         ec_GFp_simple_dbl,
 270         ec_GFp_simple_invert,
 271         ec_GFp_simple_is_at_infinity,
 272         ec_GFp_simple_is_on_curve,
 273         ec_GFp_simple_cmp,
 274         ec_GFp_simple_make_affine,
 275         ec_GFp_simple_points_make_affine,
 276         ec_GFp_nistp224_points_mul,
 277         ec_GFp_nistp224_precompute_mult,
 278         ec_GFp_nistp224_have_precompute_mult,
 279         ec_GFp_nist_field_mul,
 280         ec_GFp_nist_field_sqr,
 281         0 /* field_div */ ,
 282         ec_GFp_simple_field_inv,
 283         0 /* field_encode */ ,
 284         0 /* field_decode */ ,
 285         0,                      /* field_set_to_one */
 286         ec_key_simple_priv2oct,
 287         ec_key_simple_oct2priv,
 288         0, /* set private */
 289         ec_key_simple_generate_key,
 290         ec_key_simple_check_key,
 291         ec_key_simple_generate_public_key,
 292         0, /* keycopy */
 293         0, /* keyfinish */
 294         ecdh_simple_compute_key,
 295         ecdsa_simple_sign_setup,
 296         ecdsa_simple_sign_sig,
 297         ecdsa_simple_verify_sig,
 298         0, /* field_inverse_mod_ord */
 299         0, /* blind_coordinates */
 300         0, /* ladder_pre */
 301         0, /* ladder_step */
 302         0  /* ladder_post */
 303     };
 304
 305     return &ret;
 306 }
 307
 308 /*
 309  * Helper functions to convert field elements to/from internal representation
 310  */
 311 static void bin28_to_felem(felem out, const u8 in[28])
 312 {
 313     out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
 314     out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
 315     out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
 316     out[3] = (*((const uint64_t *)(in+20))) >> 8;
 317 }
 318
 319 static void felem_to_bin28(u8 out[28], const felem in)
 320 {
 321     unsigned i;
 322     for (i = 0; i < 7; ++i) {
 323         out[i] = in[0] >> (8 * i);
 324         out[i + 7] = in[1] >> (8 * i);
 325         out[i + 14] = in[2] >> (8 * i);
 326         out[i + 21] = in[3] >> (8 * i);
 327     }
 328 }
 329
 330 /* From OpenSSL BIGNUM to internal representation */
 331 static int BN_to_felem(felem out, const BIGNUM *bn)
 332 {
 333     felem_bytearray b_out;
 334     int num_bytes;
 335
 336     if (BN_is_negative(bn)) {
 337         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 338         return 0;
 339     }
 340     num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
 341     if (num_bytes < 0) {
 342         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 343         return 0;
 344     }
 345     bin28_to_felem(out, b_out);
 346     return 1;
 347 }
 348
 349 /* From internal representation to OpenSSL BIGNUM */
 350 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
 351 {
 352     felem_bytearray b_out;
 353     felem_to_bin28(b_out, in);
 354     return BN_lebin2bn(b_out, sizeof(b_out), out);
 355 }
 356
 357 /******************************************************************************/
 358 /*-
 359  *                              FIELD OPERATIONS
 360  *
 361  * Field operations, using the internal representation of field elements.
 362  * NB! These operations are specific to our point multiplication and cannot be
 363  * expected to be correct in general - e.g., multiplication with a large scalar
 364  * will cause an overflow.
 365  *
 366  */
 367
 368 static void felem_one(felem out)
 369 {
 370     out[0] = 1;
 371     out[1] = 0;
 372     out[2] = 0;
 373     out[3] = 0;
 374 }
 375
 376 static void felem_assign(felem out, const felem in)
 377 {
 378     out[0] = in[0];
 379     out[1] = in[1];
 380     out[2] = in[2];
 381     out[3] = in[3];
 382 }
 383
 384 /* Sum two field elements: out += in */
 385 static void felem_sum(felem out, const felem in)
 386 {
 387     out[0] += in[0];
 388     out[1] += in[1];
 389     out[2] += in[2];
 390     out[3] += in[3];
 391 }
 392
 393 /* Subtract field elements: out -= in */
 394 /* Assumes in[i] < 2^57 */
 395 static void felem_diff(felem out, const felem in)
 396 {
 397     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 398     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 399     static const limb two58m42m2 = (((limb) 1) << 58) -
 400         (((limb) 1) << 42) - (((limb) 1) << 2);
 401
 402     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 403     out[0] += two58p2;
 404     out[1] += two58m42m2;
 405     out[2] += two58m2;
 406     out[3] += two58m2;
 407
 408     out[0] -= in[0];
 409     out[1] -= in[1];
 410     out[2] -= in[2];
 411     out[3] -= in[3];
 412 }
 413
 414 /* Subtract in unreduced 128-bit mode: out -= in */
 415 /* Assumes in[i] < 2^119 */
 416 static void widefelem_diff(widefelem out, const widefelem in)
 417 {
 418     static const widelimb two120 = ((widelimb) 1) << 120;
 419     static const widelimb two120m64 = (((widelimb) 1) << 120) -
 420         (((widelimb) 1) << 64);
 421     static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
 422         (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
 423
 424     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 425     out[0] += two120;
 426     out[1] += two120m64;
 427     out[2] += two120m64;
 428     out[3] += two120;
 429     out[4] += two120m104m64;
 430     out[5] += two120m64;
 431     out[6] += two120m64;
 432
 433     out[0] -= in[0];
 434     out[1] -= in[1];
 435     out[2] -= in[2];
 436     out[3] -= in[3];
 437     out[4] -= in[4];
 438     out[5] -= in[5];
 439     out[6] -= in[6];
 440 }
 441
 442 /* Subtract in mixed mode: out128 -= in64 */
 443 /* in[i] < 2^63 */
 444 static void felem_diff_128_64(widefelem out, const felem in)
 445 {
 446     static const widelimb two64p8 = (((widelimb) 1) << 64) +
 447         (((widelimb) 1) << 8);
 448     static const widelimb two64m8 = (((widelimb) 1) << 64) -
 449         (((widelimb) 1) << 8);
 450     static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
 451         (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
 452
 453     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 454     out[0] += two64p8;
 455     out[1] += two64m48m8;
 456     out[2] += two64m8;
 457     out[3] += two64m8;
 458
 459     out[0] -= in[0];
 460     out[1] -= in[1];
 461     out[2] -= in[2];
 462     out[3] -= in[3];
 463 }
 464
 465 /*
 466  * Multiply a field element by a scalar: out = out * scalar The scalars we
 467  * actually use are small, so results fit without overflow
 468  */
 469 static void felem_scalar(felem out, const limb scalar)
 470 {
 471     out[0] *= scalar;
 472     out[1] *= scalar;
 473     out[2] *= scalar;
 474     out[3] *= scalar;
 475 }
 476
 477 /*
 478  * Multiply an unreduced field element by a scalar: out = out * scalar The
 479  * scalars we actually use are small, so results fit without overflow
 480  */
 481 static void widefelem_scalar(widefelem out, const widelimb scalar)
 482 {
 483     out[0] *= scalar;
 484     out[1] *= scalar;
 485     out[2] *= scalar;
 486     out[3] *= scalar;
 487     out[4] *= scalar;
 488     out[5] *= scalar;
 489     out[6] *= scalar;
 490 }
 491
 492 /* Square a field element: out = in^2 */
 493 static void felem_square(widefelem out, const felem in)
 494 {
 495     limb tmp0, tmp1, tmp2;
 496     tmp0 = 2 * in[0];
 497     tmp1 = 2 * in[1];
 498     tmp2 = 2 * in[2];
 499     out[0] = ((widelimb) in[0]) * in[0];
 500     out[1] = ((widelimb) in[0]) * tmp1;
 501     out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
 502     out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
 503     out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
 504     out[5] = ((widelimb) in[3]) * tmp2;
 505     out[6] = ((widelimb) in[3]) * in[3];
 506 }
 507
 508 /* Multiply two field elements: out = in1 * in2 */
 509 static void felem_mul(widefelem out, const felem in1, const felem in2)
 510 {
 511     out[0] = ((widelimb) in1[0]) * in2[0];
 512     out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
 513     out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
 514              ((widelimb) in1[2]) * in2[0];
 515     out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
 516              ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
 517     out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
 518              ((widelimb) in1[3]) * in2[1];
 519     out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
 520     out[6] = ((widelimb) in1[3]) * in2[3];
 521 }
 522
 523 /*-
 524  * Reduce seven 128-bit coefficients to four 64-bit coefficients.
 525  * Requires in[i] < 2^126,
 526  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
 527 static void felem_reduce(felem out, const widefelem in)
 528 {
 529     static const widelimb two127p15 = (((widelimb) 1) << 127) +
 530         (((widelimb) 1) << 15);
 531     static const widelimb two127m71 = (((widelimb) 1) << 127) -
 532         (((widelimb) 1) << 71);
 533     static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
 534         (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
 535     widelimb output[5];
 536
 537     /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
 538     output[0] = in[0] + two127p15;
 539     output[1] = in[1] + two127m71m55;
 540     output[2] = in[2] + two127m71;
 541     output[3] = in[3];
 542     output[4] = in[4];
 543
 544     /* Eliminate in[4], in[5], in[6] */
 545     output[4] += in[6] >> 16;
 546     output[3] += (in[6] & 0xffff) << 40;
 547     output[2] -= in[6];
 548
 549     output[3] += in[5] >> 16;
 550     output[2] += (in[5] & 0xffff) << 40;
 551     output[1] -= in[5];
 552
 553     output[2] += output[4] >> 16;
 554     output[1] += (output[4] & 0xffff) << 40;
 555     output[0] -= output[4];
 556
 557     /* Carry 2 -> 3 -> 4 */
 558     output[3] += output[2] >> 56;
 559     output[2] &= 0x00ffffffffffffff;
 560
 561     output[4] = output[3] >> 56;
 562     output[3] &= 0x00ffffffffffffff;
 563
 564     /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
 565
 566     /* Eliminate output[4] */
 567     output[2] += output[4] >> 16;
 568     /* output[2] < 2^56 + 2^56 = 2^57 */
 569     output[1] += (output[4] & 0xffff) << 40;
 570     output[0] -= output[4];
 571
 572     /* Carry 0 -> 1 -> 2 -> 3 */
 573     output[1] += output[0] >> 56;
 574     out[0] = output[0] & 0x00ffffffffffffff;
 575
 576     output[2] += output[1] >> 56;
 577     /* output[2] < 2^57 + 2^72 */
 578     out[1] = output[1] & 0x00ffffffffffffff;
 579     output[3] += output[2] >> 56;
 580     /* output[3] <= 2^56 + 2^16 */
 581     out[2] = output[2] & 0x00ffffffffffffff;
 582
 583     /*-
 584      * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
 585      * out[3] <= 2^56 + 2^16 (due to final carry),
 586      * so out < 2*p
 587      */
 588     out[3] = output[3];
 589 }
 590
 591 static void felem_square_reduce(felem out, const felem in)
 592 {
 593     widefelem tmp;
 594     felem_square(tmp, in);
 595     felem_reduce(out, tmp);
 596 }
 597
 598 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
 599 {
 600     widefelem tmp;
 601     felem_mul(tmp, in1, in2);
 602     felem_reduce(out, tmp);
 603 }
 604
 605 /*
 606  * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
 607  * call felem_reduce first)
 608  */
 609 static void felem_contract(felem out, const felem in)
 610 {
 611     static const int64_t two56 = ((limb) 1) << 56;
 612     /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
 613     /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
 614     int64_t tmp[4], a;
 615     tmp[0] = in[0];
 616     tmp[1] = in[1];
 617     tmp[2] = in[2];
 618     tmp[3] = in[3];
 619     /* Case 1: a = 1 iff in >= 2^224 */
 620     a = (in[3] >> 56);
 621     tmp[0] -= a;
 622     tmp[1] += a << 40;
 623     tmp[3] &= 0x00ffffffffffffff;
 624     /*
 625      * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
 626      * and the lower part is non-zero
 627      */
 628     a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
 629         (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
 630     a &= 0x00ffffffffffffff;
 631     /* turn a into an all-one mask (if a = 0) or an all-zero mask */
 632     a = (a - 1) >> 63;
 633     /* subtract 2^224 - 2^96 + 1 if a is all-one */
 634     tmp[3] &= a ^ 0xffffffffffffffff;
 635     tmp[2] &= a ^ 0xffffffffffffffff;
 636     tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
 637     tmp[0] -= 1 & a;
 638
 639     /*
 640      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
 641      * non-zero, so we only need one step
 642      */
 643     a = tmp[0] >> 63;
 644     tmp[0] += two56 & a;
 645     tmp[1] -= 1 & a;
 646
 647     /* carry 1 -> 2 -> 3 */
 648     tmp[2] += tmp[1] >> 56;
 649     tmp[1] &= 0x00ffffffffffffff;
 650
 651     tmp[3] += tmp[2] >> 56;
 652     tmp[2] &= 0x00ffffffffffffff;
 653
 654     /* Now 0 <= out < p */
 655     out[0] = tmp[0];
 656     out[1] = tmp[1];
 657     out[2] = tmp[2];
 658     out[3] = tmp[3];
 659 }
 660
 661 /*
 662  * Get negative value: out = -in
 663  * Requires in[i] < 2^63,
 664  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
 665  */
 666 static void felem_neg(felem out, const felem in)
 667 {
 668     widefelem tmp;
 669
 670     memset(tmp, 0, sizeof(tmp));
 671     felem_diff_128_64(tmp, in);
 672     felem_reduce(out, tmp);
 673 }
 674
 675 /*
 676  * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
 677  * elements are reduced to in < 2^225, so we only need to check three cases:
 678  * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
 679  */
 680 static limb felem_is_zero(const felem in)
 681 {
 682     limb zero, two224m96p1, two225m97p2;
 683
 684     zero = in[0] | in[1] | in[2] | in[3];
 685     zero = (((int64_t) (zero) - 1) >> 63) & 1;
 686     two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
 687         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
 688     two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
 689     two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
 690         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
 691     two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
 692     return (zero | two224m96p1 | two225m97p2);
 693 }
 694
 695 static int felem_is_zero_int(const void *in)
 696 {
 697     return (int)(felem_is_zero(in) & ((limb) 1));
 698 }
 699
 700 /* Invert a field element */
 701 /* Computation chain copied from djb's code */
 702 static void felem_inv(felem out, const felem in)
 703 {
 704     felem ftmp, ftmp2, ftmp3, ftmp4;
 705     widefelem tmp;
 706     unsigned i;
 707
 708     felem_square(tmp, in);
 709     felem_reduce(ftmp, tmp);    /* 2 */
 710     felem_mul(tmp, in, ftmp);
 711     felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
 712     felem_square(tmp, ftmp);
 713     felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
 714     felem_mul(tmp, in, ftmp);
 715     felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
 716     felem_square(tmp, ftmp);
 717     felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
 718     felem_square(tmp, ftmp2);
 719     felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
 720     felem_square(tmp, ftmp2);
 721     felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
 722     felem_mul(tmp, ftmp2, ftmp);
 723     felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
 724     felem_square(tmp, ftmp);
 725     felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
 726     for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
 727         felem_square(tmp, ftmp2);
 728         felem_reduce(ftmp2, tmp);
 729     }
 730     felem_mul(tmp, ftmp2, ftmp);
 731     felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
 732     felem_square(tmp, ftmp2);
 733     felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
 734     for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
 735         felem_square(tmp, ftmp3);
 736         felem_reduce(ftmp3, tmp);
 737     }
 738     felem_mul(tmp, ftmp3, ftmp2);
 739     felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
 740     felem_square(tmp, ftmp2);
 741     felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
 742     for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
 743         felem_square(tmp, ftmp3);
 744         felem_reduce(ftmp3, tmp);
 745     }
 746     felem_mul(tmp, ftmp3, ftmp2);
 747     felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
 748     felem_square(tmp, ftmp3);
 749     felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
 750     for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
 751         felem_square(tmp, ftmp4);
 752         felem_reduce(ftmp4, tmp);
 753     }
 754     felem_mul(tmp, ftmp3, ftmp4);
 755     felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
 756     felem_square(tmp, ftmp3);
 757     felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
 758     for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
 759         felem_square(tmp, ftmp4);
 760         felem_reduce(ftmp4, tmp);
 761     }
 762     felem_mul(tmp, ftmp2, ftmp4);
 763     felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
 764     for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
 765         felem_square(tmp, ftmp2);
 766         felem_reduce(ftmp2, tmp);
 767     }
 768     felem_mul(tmp, ftmp2, ftmp);
 769     felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
 770     felem_square(tmp, ftmp);
 771     felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
 772     felem_mul(tmp, ftmp, in);
 773     felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
 774     for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
 775         felem_square(tmp, ftmp);
 776         felem_reduce(ftmp, tmp);
 777     }
 778     felem_mul(tmp, ftmp, ftmp3);
 779     felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
 780 }
 781
 782 /*
 783  * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
 784  * out to itself.
 785  */
 786 static void copy_conditional(felem out, const felem in, limb icopy)
 787 {
 788     unsigned i;
 789     /*
 790      * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
 791      */
 792     const limb copy = -icopy;
 793     for (i = 0; i < 4; ++i) {
 794         const limb tmp = copy & (in[i] ^ out[i]);
 795         out[i] ^= tmp;
 796     }
 797 }
 798
 799 /******************************************************************************/
 800 /*-
 801  *                       ELLIPTIC CURVE POINT OPERATIONS
 802  *
 803  * Points are represented in Jacobian projective coordinates:
 804  * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
 805  * or to the point at infinity if Z == 0.
 806  *
 807  */
 808
 809 /*-
 810  * Double an elliptic curve point:
 811  * (X', Y', Z') = 2 * (X, Y, Z), where
 812  * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
 813  * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
 814  * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
 815  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
 816  * while x_out == y_in is not (maybe this works, but it's not tested).
 817  */
 818 static void
 819 point_double(felem x_out, felem y_out, felem z_out,
 820              const felem x_in, const felem y_in, const felem z_in)
 821 {
 822     widefelem tmp, tmp2;
 823     felem delta, gamma, beta, alpha, ftmp, ftmp2;
 824
 825     felem_assign(ftmp, x_in);
 826     felem_assign(ftmp2, x_in);
 827
 828     /* delta = z^2 */
 829     felem_square(tmp, z_in);
 830     felem_reduce(delta, tmp);
 831
 832     /* gamma = y^2 */
 833     felem_square(tmp, y_in);
 834     felem_reduce(gamma, tmp);
 835
 836     /* beta = x*gamma */
 837     felem_mul(tmp, x_in, gamma);
 838     felem_reduce(beta, tmp);
 839
 840     /* alpha = 3*(x-delta)*(x+delta) */
 841     felem_diff(ftmp, delta);
 842     /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
 843     felem_sum(ftmp2, delta);
 844     /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
 845     felem_scalar(ftmp2, 3);
 846     /* ftmp2[i] < 3 * 2^58 < 2^60 */
 847     felem_mul(tmp, ftmp, ftmp2);
 848     /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
 849     felem_reduce(alpha, tmp);
 850
 851     /* x' = alpha^2 - 8*beta */
 852     felem_square(tmp, alpha);
 853     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 854     felem_assign(ftmp, beta);
 855     felem_scalar(ftmp, 8);
 856     /* ftmp[i] < 8 * 2^57 = 2^60 */
 857     felem_diff_128_64(tmp, ftmp);
 858     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 859     felem_reduce(x_out, tmp);
 860
 861     /* z' = (y + z)^2 - gamma - delta */
 862     felem_sum(delta, gamma);
 863     /* delta[i] < 2^57 + 2^57 = 2^58 */
 864     felem_assign(ftmp, y_in);
 865     felem_sum(ftmp, z_in);
 866     /* ftmp[i] < 2^57 + 2^57 = 2^58 */
 867     felem_square(tmp, ftmp);
 868     /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
 869     felem_diff_128_64(tmp, delta);
 870     /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
 871     felem_reduce(z_out, tmp);
 872
 873     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
 874     felem_scalar(beta, 4);
 875     /* beta[i] < 4 * 2^57 = 2^59 */
 876     felem_diff(beta, x_out);
 877     /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
 878     felem_mul(tmp, alpha, beta);
 879     /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
 880     felem_square(tmp2, gamma);
 881     /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
 882     widefelem_scalar(tmp2, 8);
 883     /* tmp2[i] < 8 * 2^116 = 2^119 */
 884     widefelem_diff(tmp, tmp2);
 885     /* tmp[i] < 2^119 + 2^120 < 2^121 */
 886     felem_reduce(y_out, tmp);
 887 }
 888
 889 /*-
 890  * Add two elliptic curve points:
 891  * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
 892  * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
 893  * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
 894  * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
 895  *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
 896  * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
 897  *
 898  * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
 899  */
 900
 901 /*
 902  * This function is not entirely constant-time: it includes a branch for
 903  * checking whether the two input points are equal, (while not equal to the
 904  * point at infinity). This case never happens during single point
 905  * multiplication, so there is no timing leak for ECDH or ECDSA signing.
 906  */
 907 static void point_add(felem x3, felem y3, felem z3,
 908                       const felem x1, const felem y1, const felem z1,
 909                       const int mixed, const felem x2, const felem y2,
 910                       const felem z2)
 911 {
 912     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
 913     widefelem tmp, tmp2;
 914     limb z1_is_zero, z2_is_zero, x_equal, y_equal;
 915     limb points_equal;
 916
 917     if (!mixed) {
 918         /* ftmp2 = z2^2 */
 919         felem_square(tmp, z2);
 920         felem_reduce(ftmp2, tmp);
 921
 922         /* ftmp4 = z2^3 */
 923         felem_mul(tmp, ftmp2, z2);
 924         felem_reduce(ftmp4, tmp);
 925
 926         /* ftmp4 = z2^3*y1 */
 927         felem_mul(tmp2, ftmp4, y1);
 928         felem_reduce(ftmp4, tmp2);
 929
 930         /* ftmp2 = z2^2*x1 */
 931         felem_mul(tmp2, ftmp2, x1);
 932         felem_reduce(ftmp2, tmp2);
 933     } else {
 934         /*
 935          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
 936          */
 937
 938         /* ftmp4 = z2^3*y1 */
 939         felem_assign(ftmp4, y1);
 940
 941         /* ftmp2 = z2^2*x1 */
 942         felem_assign(ftmp2, x1);
 943     }
 944
 945     /* ftmp = z1^2 */
 946     felem_square(tmp, z1);
 947     felem_reduce(ftmp, tmp);
 948
 949     /* ftmp3 = z1^3 */
 950     felem_mul(tmp, ftmp, z1);
 951     felem_reduce(ftmp3, tmp);
 952
 953     /* tmp = z1^3*y2 */
 954     felem_mul(tmp, ftmp3, y2);
 955     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 956
 957     /* ftmp3 = z1^3*y2 - z2^3*y1 */
 958     felem_diff_128_64(tmp, ftmp4);
 959     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 960     felem_reduce(ftmp3, tmp);
 961
 962     /* tmp = z1^2*x2 */
 963     felem_mul(tmp, ftmp, x2);
 964     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 965
 966     /* ftmp = z1^2*x2 - z2^2*x1 */
 967     felem_diff_128_64(tmp, ftmp2);
 968     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 969     felem_reduce(ftmp, tmp);
 970
 971     /*
 972      * The formulae are incorrect if the points are equal, in affine coordinates
 973      * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
 974      * happens.
 975      *
 976      * We use bitwise operations to avoid potential side-channels introduced by
 977      * the short-circuiting behaviour of boolean operators.
 978      */
 979     x_equal = felem_is_zero(ftmp);
 980     y_equal = felem_is_zero(ftmp3);
 981     /*
 982      * The special case of either point being the point at infinity (z1 and/or
 983      * z2 are zero), is handled separately later on in this function, so we
 984      * avoid jumping to point_double here in those special cases.
 985      */
 986     z1_is_zero = felem_is_zero(z1);
 987     z2_is_zero = felem_is_zero(z2);
 988
 989     /*
 990      * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
 991      * specific implementation `felem_is_zero()` returns truth as `0x1`
 992      * (rather than `0xff..ff`).
 993      *
 994      * This implies that `~true` in this implementation becomes
 995      * `0xff..fe` (rather than `0x0`): for this reason, to be used in
 996      * the if expression, we mask out only the last bit in the next
 997      * line.
 998      */
 999     points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
1000
1001     if (points_equal) {
1002         /*
1003          * This is obviously not constant-time but, as mentioned before, this
1004          * case never happens during single point multiplication, so there is no
1005          * timing leak for ECDH or ECDSA signing.
1006          */
1007         point_double(x3, y3, z3, x1, y1, z1);
1008         return;
1009     }
1010
1011     /* ftmp5 = z1*z2 */
1012     if (!mixed) {
1013         felem_mul(tmp, z1, z2);
1014         felem_reduce(ftmp5, tmp);
1015     } else {
1016         /* special case z2 = 0 is handled later */
1017         felem_assign(ftmp5, z1);
1018     }
1019
1020     /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1021     felem_mul(tmp, ftmp, ftmp5);
1022     felem_reduce(z_out, tmp);
1023
1024     /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1025     felem_assign(ftmp5, ftmp);
1026     felem_square(tmp, ftmp);
1027     felem_reduce(ftmp, tmp);
1028
1029     /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1030     felem_mul(tmp, ftmp, ftmp5);
1031     felem_reduce(ftmp5, tmp);
1032
1033     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1034     felem_mul(tmp, ftmp2, ftmp);
1035     felem_reduce(ftmp2, tmp);
1036
1037     /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1038     felem_mul(tmp, ftmp4, ftmp5);
1039     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1040
1041     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1042     felem_square(tmp2, ftmp3);
1043     /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1044
1045     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1046     felem_diff_128_64(tmp2, ftmp5);
1047     /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1048
1049     /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1050     felem_assign(ftmp5, ftmp2);
1051     felem_scalar(ftmp5, 2);
1052     /* ftmp5[i] < 2 * 2^57 = 2^58 */
1053
1054     /*-
1055      * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1056      *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1057      */
1058     felem_diff_128_64(tmp2, ftmp5);
1059     /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1060     felem_reduce(x_out, tmp2);
1061
1062     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1063     felem_diff(ftmp2, x_out);
1064     /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1065
1066     /*
1067      * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1068      */
1069     felem_mul(tmp2, ftmp3, ftmp2);
1070     /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1071
1072     /*-
1073      * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1074      *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1075      */
1076     widefelem_diff(tmp2, tmp);
1077     /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1078     felem_reduce(y_out, tmp2);
1079
1080     /*
1081      * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1082      * the point at infinity, so we need to check for this separately
1083      */
1084
1085     /*
1086      * if point 1 is at infinity, copy point 2 to output, and vice versa
1087      */
1088     copy_conditional(x_out, x2, z1_is_zero);
1089     copy_conditional(x_out, x1, z2_is_zero);
1090     copy_conditional(y_out, y2, z1_is_zero);
1091     copy_conditional(y_out, y1, z2_is_zero);
1092     copy_conditional(z_out, z2, z1_is_zero);
1093     copy_conditional(z_out, z1, z2_is_zero);
1094     felem_assign(x3, x_out);
1095     felem_assign(y3, y_out);
1096     felem_assign(z3, z_out);
1097 }
1098
1099 /*
1100  * select_point selects the |idx|th point from a precomputation table and
1101  * copies it to out.
1102  * The pre_comp array argument should be size of |size| argument
1103  */
1104 static void select_point(const u64 idx, unsigned int size,
1105                          const felem pre_comp[][3], felem out[3])
1106 {
1107     unsigned i, j;
1108     limb *outlimbs = &out[0][0];
1109
1110     memset(out, 0, sizeof(*out) * 3);
1111     for (i = 0; i < size; i++) {
1112         const limb *inlimbs = &pre_comp[i][0][0];
1113         u64 mask = i ^ idx;
1114         mask |= mask >> 4;
1115         mask |= mask >> 2;
1116         mask |= mask >> 1;
1117         mask &= 1;
1118         mask--;
1119         for (j = 0; j < 4 * 3; j++)
1120             outlimbs[j] |= inlimbs[j] & mask;
1121     }
1122 }
1123
1124 /* get_bit returns the |i|th bit in |in| */
1125 static char get_bit(const felem_bytearray in, unsigned i)
1126 {
1127     if (i >= 224)
1128         return 0;
1129     return (in[i >> 3] >> (i & 7)) & 1;
1130 }
1131
1132 /*
1133  * Interleaved point multiplication using precomputed point multiples: The
1134  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1135  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1136  * generator, using certain (large) precomputed multiples in g_pre_comp.
1137  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1138  */
1139 static void batch_mul(felem x_out, felem y_out, felem z_out,
1140                       const felem_bytearray scalars[],
1141                       const unsigned num_points, const u8 *g_scalar,
1142                       const int mixed, const felem pre_comp[][17][3],
1143                       const felem g_pre_comp[2][16][3])
1144 {
1145     int i, skip;
1146     unsigned num;
1147     unsigned gen_mul = (g_scalar != NULL);
1148     felem nq[3], tmp[4];
1149     u64 bits;
1150     u8 sign, digit;
1151
1152     /* set nq to the point at infinity */
1153     memset(nq, 0, sizeof(nq));
1154
1155     /*
1156      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1157      * of the generator (two in each of the last 28 rounds) and additions of
1158      * other points multiples (every 5th round).
1159      */
1160     skip = 1;                   /* save two point operations in the first
1161                                  * round */
1162     for (i = (num_points ? 220 : 27); i >= 0; --i) {
1163         /* double */
1164         if (!skip)
1165             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1166
1167         /* add multiples of the generator */
1168         if (gen_mul && (i <= 27)) {
1169             /* first, look 28 bits upwards */
1170             bits = get_bit(g_scalar, i + 196) << 3;
1171             bits |= get_bit(g_scalar, i + 140) << 2;
1172             bits |= get_bit(g_scalar, i + 84) << 1;
1173             bits |= get_bit(g_scalar, i + 28);
1174             /* select the point to add, in constant time */
1175             select_point(bits, 16, g_pre_comp[1], tmp);
1176
1177             if (!skip) {
1178                 /* value 1 below is argument for "mixed" */
1179                 point_add(nq[0], nq[1], nq[2],
1180                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1181             } else {
1182                 memcpy(nq, tmp, 3 * sizeof(felem));
1183                 skip = 0;
1184             }
1185
1186             /* second, look at the current position */
1187             bits = get_bit(g_scalar, i + 168) << 3;
1188             bits |= get_bit(g_scalar, i + 112) << 2;
1189             bits |= get_bit(g_scalar, i + 56) << 1;
1190             bits |= get_bit(g_scalar, i);
1191             /* select the point to add, in constant time */
1192             select_point(bits, 16, g_pre_comp[0], tmp);
1193             point_add(nq[0], nq[1], nq[2],
1194                       nq[0], nq[1], nq[2],
1195                       1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1196         }
1197
1198         /* do other additions every 5 doublings */
1199         if (num_points && (i % 5 == 0)) {
1200             /* loop over all scalars */
1201             for (num = 0; num < num_points; ++num) {
1202                 bits = get_bit(scalars[num], i + 4) << 5;
1203                 bits |= get_bit(scalars[num], i + 3) << 4;
1204                 bits |= get_bit(scalars[num], i + 2) << 3;
1205                 bits |= get_bit(scalars[num], i + 1) << 2;
1206                 bits |= get_bit(scalars[num], i) << 1;
1207                 bits |= get_bit(scalars[num], i - 1);
1208                 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1209
1210                 /* select the point to add or subtract */
1211                 select_point(digit, 17, pre_comp[num], tmp);
1212                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1213                                             * point */
1214                 copy_conditional(tmp[1], tmp[3], sign);
1215
1216                 if (!skip) {
1217                     point_add(nq[0], nq[1], nq[2],
1218                               nq[0], nq[1], nq[2],
1219                               mixed, tmp[0], tmp[1], tmp[2]);
1220                 } else {
1221                     memcpy(nq, tmp, 3 * sizeof(felem));
1222                     skip = 0;
1223                 }
1224             }
1225         }
1226     }
1227     felem_assign(x_out, nq[0]);
1228     felem_assign(y_out, nq[1]);
1229     felem_assign(z_out, nq[2]);
1230 }
1231
1232 /******************************************************************************/
1233 /*
1234  * FUNCTIONS TO MANAGE PRECOMPUTATION
1235  */
1236
1237 static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1238 {
1239     NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1240
1241     if (!ret) {
1242         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1243         return ret;
1244     }
1245
1246     ret->references = 1;
1247
1248     ret->lock = CRYPTO_THREAD_lock_new();
1249     if (ret->lock == NULL) {
1250         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1251         OPENSSL_free(ret);
1252         return NULL;
1253     }
1254     return ret;
1255 }
1256
1257 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1258 {
1259     int i;
1260     if (p != NULL)
1261         CRYPTO_UP_REF(&p->references, &i, p->lock);
1262     return p;
1263 }
1264
1265 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1266 {
1267     int i;
1268
1269     if (p == NULL)
1270         return;
1271
1272     CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1273     REF_PRINT_COUNT("EC_nistp224", x);
1274     if (i > 0)
1275         return;
1276     REF_ASSERT_ISNT(i < 0);
1277
1278     CRYPTO_THREAD_lock_free(p->lock);
1279     OPENSSL_free(p);
1280 }
1281
1282 /******************************************************************************/
1283 /*
1284  * OPENSSL EC_METHOD FUNCTIONS
1285  */
1286
1287 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1288 {
1289     int ret;
1290     ret = ec_GFp_simple_group_init(group);
1291     group->a_is_minus3 = 1;
1292     return ret;
1293 }
1294
1295 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1296                                     const BIGNUM *a, const BIGNUM *b,
1297                                     BN_CTX *ctx)
1298 {
1299     int ret = 0;
1300     BIGNUM *curve_p, *curve_a, *curve_b;
1301 #ifndef FIPS_MODE
1302     BN_CTX *new_ctx = NULL;
1303
1304     if (ctx == NULL)
1305         ctx = new_ctx = BN_CTX_new();
1306 #endif
1307     if (ctx == NULL)
1308         return 0;
1309
1310     BN_CTX_start(ctx);
1311     curve_p = BN_CTX_get(ctx);
1312     curve_a = BN_CTX_get(ctx);
1313     curve_b = BN_CTX_get(ctx);
1314     if (curve_b == NULL)
1315         goto err;
1316     BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1317     BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1318     BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1319     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1320         ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1321               EC_R_WRONG_CURVE_PARAMETERS);
1322         goto err;
1323     }
1324     group->field_mod_func = BN_nist_mod_224;
1325     ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1326  err:
1327     BN_CTX_end(ctx);
1328 #ifndef FIPS_MODE
1329     BN_CTX_free(new_ctx);
1330 #endif
1331     return ret;
1332 }
1333
1334 /*
1335  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1336  * (X/Z^2, Y/Z^3)
1337  */
1338 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1339                                                  const EC_POINT *point,
1340                                                  BIGNUM *x, BIGNUM *y,
1341                                                  BN_CTX *ctx)
1342 {
1343     felem z1, z2, x_in, y_in, x_out, y_out;
1344     widefelem tmp;
1345
1346     if (EC_POINT_is_at_infinity(group, point)) {
1347         ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1348               EC_R_POINT_AT_INFINITY);
1349         return 0;
1350     }
1351     if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1352         (!BN_to_felem(z1, point->Z)))
1353         return 0;
1354     felem_inv(z2, z1);
1355     felem_square(tmp, z2);
1356     felem_reduce(z1, tmp);
1357     felem_mul(tmp, x_in, z1);
1358     felem_reduce(x_in, tmp);
1359     felem_contract(x_out, x_in);
1360     if (x != NULL) {
1361         if (!felem_to_BN(x, x_out)) {
1362             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1363                   ERR_R_BN_LIB);
1364             return 0;
1365         }
1366     }
1367     felem_mul(tmp, z1, z2);
1368     felem_reduce(z1, tmp);
1369     felem_mul(tmp, y_in, z1);
1370     felem_reduce(y_in, tmp);
1371     felem_contract(y_out, y_in);
1372     if (y != NULL) {
1373         if (!felem_to_BN(y, y_out)) {
1374             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1375                   ERR_R_BN_LIB);
1376             return 0;
1377         }
1378     }
1379     return 1;
1380 }
1381
1382 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1383                                felem tmp_felems[ /* num+1 */ ])
1384 {
1385     /*
1386      * Runs in constant time, unless an input is the point at infinity (which
1387      * normally shouldn't happen).
1388      */
1389     ec_GFp_nistp_points_make_affine_internal(num,
1390                                              points,
1391                                              sizeof(felem),
1392                                              tmp_felems,
1393                                              (void (*)(void *))felem_one,
1394                                              felem_is_zero_int,
1395                                              (void (*)(void *, const void *))
1396                                              felem_assign,
1397                                              (void (*)(void *, const void *))
1398                                              felem_square_reduce, (void (*)
1399                                                                    (void *,
1400                                                                     const void
1401                                                                     *,
1402                                                                     const void
1403                                                                     *))
1404                                              felem_mul_reduce,
1405                                              (void (*)(void *, const void *))
1406                                              felem_inv,
1407                                              (void (*)(void *, const void *))
1408                                              felem_contract);
1409 }
1410
1411 /*
1412  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1413  * values Result is stored in r (r can equal one of the inputs).
1414  */
1415 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1416                                const BIGNUM *scalar, size_t num,
1417                                const EC_POINT *points[],
1418                                const BIGNUM *scalars[], BN_CTX *ctx)
1419 {
1420     int ret = 0;
1421     int j;
1422     unsigned i;
1423     int mixed = 0;
1424     BIGNUM *x, *y, *z, *tmp_scalar;
1425     felem_bytearray g_secret;
1426     felem_bytearray *secrets = NULL;
1427     felem (*pre_comp)[17][3] = NULL;
1428     felem *tmp_felems = NULL;
1429     int num_bytes;
1430     int have_pre_comp = 0;
1431     size_t num_points = num;
1432     felem x_in, y_in, z_in, x_out, y_out, z_out;
1433     NISTP224_PRE_COMP *pre = NULL;
1434     const felem(*g_pre_comp)[16][3] = NULL;
1435     EC_POINT *generator = NULL;
1436     const EC_POINT *p = NULL;
1437     const BIGNUM *p_scalar = NULL;
1438
1439     BN_CTX_start(ctx);
1440     x = BN_CTX_get(ctx);
1441     y = BN_CTX_get(ctx);
1442     z = BN_CTX_get(ctx);
1443     tmp_scalar = BN_CTX_get(ctx);
1444     if (tmp_scalar == NULL)
1445         goto err;
1446
1447     if (scalar != NULL) {
1448         pre = group->pre_comp.nistp224;
1449         if (pre)
1450             /* we have precomputation, try to use it */
1451             g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1452         else
1453             /* try to use the standard precomputation */
1454             g_pre_comp = &gmul[0];
1455         generator = EC_POINT_new(group);
1456         if (generator == NULL)
1457             goto err;
1458         /* get the generator from precomputation */
1459         if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1460             !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1461             !felem_to_BN(z, g_pre_comp[0][1][2])) {
1462             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1463             goto err;
1464         }
1465         if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1466                                                       generator, x, y, z,
1467                                                       ctx))
1468             goto err;
1469         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1470             /* precomputation matches generator */
1471             have_pre_comp = 1;
1472         else
1473             /*
1474              * we don't have valid precomputation: treat the generator as a
1475              * random point
1476              */
1477             num_points = num_points + 1;
1478     }
1479
1480     if (num_points > 0) {
1481         if (num_points >= 3) {
1482             /*
1483              * unless we precompute multiples for just one or two points,
1484              * converting those into affine form is time well spent
1485              */
1486             mixed = 1;
1487         }
1488         secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1489         pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1490         if (mixed)
1491             tmp_felems =
1492                 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1493         if ((secrets == NULL) || (pre_comp == NULL)
1494             || (mixed && (tmp_felems == NULL))) {
1495             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1496             goto err;
1497         }
1498
1499         /*
1500          * we treat NULL scalars as 0, and NULL points as points at infinity,
1501          * i.e., they contribute nothing to the linear combination
1502          */
1503         for (i = 0; i < num_points; ++i) {
1504             if (i == num) {
1505                 /* the generator */
1506                 p = EC_GROUP_get0_generator(group);
1507                 p_scalar = scalar;
1508             } else {
1509                 /* the i^th point */
1510                 p = points[i];
1511                 p_scalar = scalars[i];
1512             }
1513             if ((p_scalar != NULL) && (p != NULL)) {
1514                 /* reduce scalar to 0 <= scalar < 2^224 */
1515                 if ((BN_num_bits(p_scalar) > 224)
1516                     || (BN_is_negative(p_scalar))) {
1517                     /*
1518                      * this is an unusual input, and we don't guarantee
1519                      * constant-timeness
1520                      */
1521                     if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1522                         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1523                         goto err;
1524                     }
1525                     num_bytes = BN_bn2lebinpad(tmp_scalar,
1526                                                secrets[i], sizeof(secrets[i]));
1527                 } else {
1528                     num_bytes = BN_bn2lebinpad(p_scalar,
1529                                                secrets[i], sizeof(secrets[i]));
1530                 }
1531                 if (num_bytes < 0) {
1532                     ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1533                     goto err;
1534                 }
1535                 /* precompute multiples */
1536                 if ((!BN_to_felem(x_out, p->X)) ||
1537                     (!BN_to_felem(y_out, p->Y)) ||
1538                     (!BN_to_felem(z_out, p->Z)))
1539                     goto err;
1540                 felem_assign(pre_comp[i][1][0], x_out);
1541                 felem_assign(pre_comp[i][1][1], y_out);
1542                 felem_assign(pre_comp[i][1][2], z_out);
1543                 for (j = 2; j <= 16; ++j) {
1544                     if (j & 1) {
1545                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1546                                   pre_comp[i][j][2], pre_comp[i][1][0],
1547                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,
1548                                   pre_comp[i][j - 1][0],
1549                                   pre_comp[i][j - 1][1],
1550                                   pre_comp[i][j - 1][2]);
1551                     } else {
1552                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1553                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],
1554                                      pre_comp[i][j / 2][1],
1555                                      pre_comp[i][j / 2][2]);
1556                     }
1557                 }
1558             }
1559         }
1560         if (mixed)
1561             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1562     }
1563
1564     /* the scalar for the generator */
1565     if ((scalar != NULL) && (have_pre_comp)) {
1566         memset(g_secret, 0, sizeof(g_secret));
1567         /* reduce scalar to 0 <= scalar < 2^224 */
1568         if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1569             /*
1570              * this is an unusual input, and we don't guarantee
1571              * constant-timeness
1572              */
1573             if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1574                 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1575                 goto err;
1576             }
1577             num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1578         } else {
1579             num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1580         }
1581         /* do the multiplication with generator precomputation */
1582         batch_mul(x_out, y_out, z_out,
1583                   (const felem_bytearray(*))secrets, num_points,
1584                   g_secret,
1585                   mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1586     } else {
1587         /* do the multiplication without generator precomputation */
1588         batch_mul(x_out, y_out, z_out,
1589                   (const felem_bytearray(*))secrets, num_points,
1590                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1591     }
1592     /* reduce the output to its unique minimal representation */
1593     felem_contract(x_in, x_out);
1594     felem_contract(y_in, y_out);
1595     felem_contract(z_in, z_out);
1596     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1597         (!felem_to_BN(z, z_in))) {
1598         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1599         goto err;
1600     }
1601     ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1602
1603  err:
1604     BN_CTX_end(ctx);
1605     EC_POINT_free(generator);
1606     OPENSSL_free(secrets);
1607     OPENSSL_free(pre_comp);
1608     OPENSSL_free(tmp_felems);
1609     return ret;
1610 }
1611
1612 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1613 {
1614     int ret = 0;
1615     NISTP224_PRE_COMP *pre = NULL;
1616     int i, j;
1617     BIGNUM *x, *y;
1618     EC_POINT *generator = NULL;
1619     felem tmp_felems[32];
1620 #ifndef FIPS_MODE
1621     BN_CTX *new_ctx = NULL;
1622 #endif
1623
1624     /* throw away old precomputation */
1625     EC_pre_comp_free(group);
1626
1627 #ifndef FIPS_MODE
1628     if (ctx == NULL)
1629         ctx = new_ctx = BN_CTX_new();
1630 #endif
1631     if (ctx == NULL)
1632         return 0;
1633
1634     BN_CTX_start(ctx);
1635     x = BN_CTX_get(ctx);
1636     y = BN_CTX_get(ctx);
1637     if (y == NULL)
1638         goto err;
1639     /* get the generator */
1640     if (group->generator == NULL)
1641         goto err;
1642     generator = EC_POINT_new(group);
1643     if (generator == NULL)
1644         goto err;
1645     BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1646     BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1647     if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1648         goto err;
1649     if ((pre = nistp224_pre_comp_new()) == NULL)
1650         goto err;
1651     /*
1652      * if the generator is the standard one, use built-in precomputation
1653      */
1654     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1655         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1656         goto done;
1657     }
1658     if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1659         (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1660         (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1661         goto err;
1662     /*
1663      * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1664      * 2^140*G, 2^196*G for the second one
1665      */
1666     for (i = 1; i <= 8; i <<= 1) {
1667         point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1668                      pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1669                      pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1670         for (j = 0; j < 27; ++j) {
1671             point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1672                          pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1673                          pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1674         }
1675         if (i == 8)
1676             break;
1677         point_double(pre->g_pre_comp[0][2 * i][0],
1678                      pre->g_pre_comp[0][2 * i][1],
1679                      pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1680                      pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1681         for (j = 0; j < 27; ++j) {
1682             point_double(pre->g_pre_comp[0][2 * i][0],
1683                          pre->g_pre_comp[0][2 * i][1],
1684                          pre->g_pre_comp[0][2 * i][2],
1685                          pre->g_pre_comp[0][2 * i][0],
1686                          pre->g_pre_comp[0][2 * i][1],
1687                          pre->g_pre_comp[0][2 * i][2]);
1688         }
1689     }
1690     for (i = 0; i < 2; i++) {
1691         /* g_pre_comp[i][0] is the point at infinity */
1692         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1693         /* the remaining multiples */
1694         /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1695         point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1696                   pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1697                   pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1698                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1699                   pre->g_pre_comp[i][2][2]);
1700         /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1701         point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1702                   pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1703                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1704                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1705                   pre->g_pre_comp[i][2][2]);
1706         /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1707         point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1708                   pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1709                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1710                   0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1711                   pre->g_pre_comp[i][4][2]);
1712         /*
1713          * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1714          */
1715         point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1716                   pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1717                   pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1718                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1719                   pre->g_pre_comp[i][2][2]);
1720         for (j = 1; j < 8; ++j) {
1721             /* odd multiples: add G resp. 2^28*G */
1722             point_add(pre->g_pre_comp[i][2 * j + 1][0],
1723                       pre->g_pre_comp[i][2 * j + 1][1],
1724                       pre->g_pre_comp[i][2 * j + 1][2],
1725                       pre->g_pre_comp[i][2 * j][0],
1726                       pre->g_pre_comp[i][2 * j][1],
1727                       pre->g_pre_comp[i][2 * j][2], 0,
1728                       pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1729                       pre->g_pre_comp[i][1][2]);
1730         }
1731     }
1732     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1733
1734  done:
1735     SETPRECOMP(group, nistp224, pre);
1736     pre = NULL;
1737     ret = 1;
1738  err:
1739     BN_CTX_end(ctx);
1740     EC_POINT_free(generator);
1741 #ifndef FIPS_MODE
1742     BN_CTX_free(new_ctx);
1743 #endif
1744     EC_nistp224_pre_comp_free(pre);
1745     return ret;
1746 }
1747
1748 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1749 {
1750     return HAVEPRECOMP(group, nistp224);
1751 }
1752
1753 #endif