crypto/ec/ecp_nistp224.c

   1 /*
   2  * Written by Emilia Kasper (Google) for the OpenSSL project.
   3  */
   4 /* Copyright 2011 Google Inc.
   5  *
   6  * Licensed under the Apache License, Version 2.0 (the "License");
   7  *
   8  * you may not use this file except in compliance with the License.
   9  * You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  *  Unless required by applicable law or agreed to in writing, software
  14  *  distributed under the License is distributed on an "AS IS" BASIS,
  15  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  *  See the License for the specific language governing permissions and
  17  *  limitations under the License.
  18  */
  19
  20 /*
  21  * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
  22  *
  23  * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
  24  * and Adam Langley's public domain 64-bit C implementation of curve25519
  25  */
  26
  27 #include <openssl/opensslconf.h>
  28 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
  29
  30 # include <stdint.h>
  31 # include <string.h>
  32 # include <openssl/err.h>
  33 # include "ec_lcl.h"
  34
  35 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
  36   /* even with gcc, the typedef won't work for 32-bit platforms */
  37 typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
  38                                  * platforms */
  39 # else
  40 #  error "Need GCC 3.1 or later to define type uint128_t"
  41 # endif
  42
  43 typedef uint8_t u8;
  44 typedef uint64_t u64;
  45 typedef int64_t s64;
  46
  47 /******************************************************************************/
  48 /*-
  49  * INTERNAL REPRESENTATION OF FIELD ELEMENTS
  50  *
  51  * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
  52  * using 64-bit coefficients called 'limbs',
  53  * and sometimes (for multiplication results) as
  54  * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
  55  * using 128-bit coefficients called 'widelimbs'.
  56  * A 4-limb representation is an 'felem';
  57  * a 7-widelimb representation is a 'widefelem'.
  58  * Even within felems, bits of adjacent limbs overlap, and we don't always
  59  * reduce the representations: we ensure that inputs to each felem
  60  * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
  61  * and fit into a 128-bit word without overflow. The coefficients are then
  62  * again partially reduced to obtain an felem satisfying a_i < 2^57.
  63  * We only reduce to the unique minimal representation at the end of the
  64  * computation.
  65  */
  66
  67 typedef uint64_t limb;
  68 typedef uint128_t widelimb;
  69
  70 typedef limb felem[4];
  71 typedef widelimb widefelem[7];
  72
  73 /*
  74  * Field element represented as a byte arrary. 28*8 = 224 bits is also the
  75  * group order size for the elliptic curve, and we also use this type for
  76  * scalars for point multiplication.
  77  */
  78 typedef u8 felem_bytearray[28];
  79
  80 static const felem_bytearray nistp224_curve_params[5] = {
  81     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
  82      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
  83      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
  84     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
  85      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
  86      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
  87     {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
  88      0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
  89      0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
  90     {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
  91      0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
  92      0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
  93     {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
  94      0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
  95      0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
  96 };
  97
  98 /*-
  99  * Precomputed multiples of the standard generator
 100  * Points are given in coordinates (X, Y, Z) where Z normally is 1
 101  * (0 for the point at infinity).
 102  * For each field element, slice a_0 is word 0, etc.
 103  *
 104  * The table has 2 * 16 elements, starting with the following:
 105  * index | bits    | point
 106  * ------+---------+------------------------------
 107  *     0 | 0 0 0 0 | 0G
 108  *     1 | 0 0 0 1 | 1G
 109  *     2 | 0 0 1 0 | 2^56G
 110  *     3 | 0 0 1 1 | (2^56 + 1)G
 111  *     4 | 0 1 0 0 | 2^112G
 112  *     5 | 0 1 0 1 | (2^112 + 1)G
 113  *     6 | 0 1 1 0 | (2^112 + 2^56)G
 114  *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
 115  *     8 | 1 0 0 0 | 2^168G
 116  *     9 | 1 0 0 1 | (2^168 + 1)G
 117  *    10 | 1 0 1 0 | (2^168 + 2^56)G
 118  *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
 119  *    12 | 1 1 0 0 | (2^168 + 2^112)G
 120  *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
 121  *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
 122  *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
 123  * followed by a copy of this with each element multiplied by 2^28.
 124  *
 125  * The reason for this is so that we can clock bits into four different
 126  * locations when doing simple scalar multiplies against the base point,
 127  * and then another four locations using the second 16 elements.
 128  */
 129 static const felem gmul[2][16][3] = {
 130 {{{0, 0, 0, 0},
 131   {0, 0, 0, 0},
 132   {0, 0, 0, 0}},
 133  {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
 134   {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
 135   {1, 0, 0, 0}},
 136  {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
 137   {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
 138   {1, 0, 0, 0}},
 139  {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
 140   {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
 141   {1, 0, 0, 0}},
 142  {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
 143   {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
 144   {1, 0, 0, 0}},
 145  {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
 146   {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
 147   {1, 0, 0, 0}},
 148  {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
 149   {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
 150   {1, 0, 0, 0}},
 151  {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
 152   {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
 153   {1, 0, 0, 0}},
 154  {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
 155   {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
 156   {1, 0, 0, 0}},
 157  {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
 158   {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
 159   {1, 0, 0, 0}},
 160  {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
 161   {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
 162   {1, 0, 0, 0}},
 163  {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
 164   {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
 165   {1, 0, 0, 0}},
 166  {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
 167   {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
 168   {1, 0, 0, 0}},
 169  {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
 170   {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
 171   {1, 0, 0, 0}},
 172  {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
 173   {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
 174   {1, 0, 0, 0}},
 175  {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
 176   {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
 177   {1, 0, 0, 0}}},
 178 {{{0, 0, 0, 0},
 179   {0, 0, 0, 0},
 180   {0, 0, 0, 0}},
 181  {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
 182   {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
 183   {1, 0, 0, 0}},
 184  {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
 185   {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
 186   {1, 0, 0, 0}},
 187  {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
 188   {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
 189   {1, 0, 0, 0}},
 190  {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
 191   {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
 192   {1, 0, 0, 0}},
 193  {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
 194   {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
 195   {1, 0, 0, 0}},
 196  {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
 197   {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
 198   {1, 0, 0, 0}},
 199  {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
 200   {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
 201   {1, 0, 0, 0}},
 202  {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
 203   {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
 204   {1, 0, 0, 0}},
 205  {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
 206   {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
 207   {1, 0, 0, 0}},
 208  {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
 209   {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
 210   {1, 0, 0, 0}},
 211  {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
 212   {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
 213   {1, 0, 0, 0}},
 214  {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
 215   {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
 216   {1, 0, 0, 0}},
 217  {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
 218   {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
 219   {1, 0, 0, 0}},
 220  {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
 221   {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
 222   {1, 0, 0, 0}},
 223  {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
 224   {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
 225   {1, 0, 0, 0}}}
 226 };
 227
 228 /* Precomputation for the group generator. */
 229 struct nistp224_pre_comp_st {
 230     felem g_pre_comp[2][16][3];
 231     int references;
 232 };
 233
 234 const EC_METHOD *EC_GFp_nistp224_method(void)
 235 {
 236     static const EC_METHOD ret = {
 237         EC_FLAGS_DEFAULT_OCT,
 238         NID_X9_62_prime_field,
 239         ec_GFp_nistp224_group_init,
 240         ec_GFp_simple_group_finish,
 241         ec_GFp_simple_group_clear_finish,
 242         ec_GFp_nist_group_copy,
 243         ec_GFp_nistp224_group_set_curve,
 244         ec_GFp_simple_group_get_curve,
 245         ec_GFp_simple_group_get_degree,
 246         ec_GFp_simple_group_check_discriminant,
 247         ec_GFp_simple_point_init,
 248         ec_GFp_simple_point_finish,
 249         ec_GFp_simple_point_clear_finish,
 250         ec_GFp_simple_point_copy,
 251         ec_GFp_simple_point_set_to_infinity,
 252         ec_GFp_simple_set_Jprojective_coordinates_GFp,
 253         ec_GFp_simple_get_Jprojective_coordinates_GFp,
 254         ec_GFp_simple_point_set_affine_coordinates,
 255         ec_GFp_nistp224_point_get_affine_coordinates,
 256         0 /* point_set_compressed_coordinates */ ,
 257         0 /* point2oct */ ,
 258         0 /* oct2point */ ,
 259         ec_GFp_simple_add,
 260         ec_GFp_simple_dbl,
 261         ec_GFp_simple_invert,
 262         ec_GFp_simple_is_at_infinity,
 263         ec_GFp_simple_is_on_curve,
 264         ec_GFp_simple_cmp,
 265         ec_GFp_simple_make_affine,
 266         ec_GFp_simple_points_make_affine,
 267         ec_GFp_nistp224_points_mul,
 268         ec_GFp_nistp224_precompute_mult,
 269         ec_GFp_nistp224_have_precompute_mult,
 270         ec_GFp_nist_field_mul,
 271         ec_GFp_nist_field_sqr,
 272         0 /* field_div */ ,
 273         0 /* field_encode */ ,
 274         0 /* field_decode */ ,
 275         0                       /* field_set_to_one */
 276     };
 277
 278     return &ret;
 279 }
 280
 281 /*
 282  * Helper functions to convert field elements to/from internal representation
 283  */
 284 static void bin28_to_felem(felem out, const u8 in[28])
 285 {
 286     out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
 287     out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
 288     out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
 289     out[3] = (*((const uint64_t *)(in+20))) >> 8;
 290 }
 291
 292 static void felem_to_bin28(u8 out[28], const felem in)
 293 {
 294     unsigned i;
 295     for (i = 0; i < 7; ++i) {
 296         out[i] = in[0] >> (8 * i);
 297         out[i + 7] = in[1] >> (8 * i);
 298         out[i + 14] = in[2] >> (8 * i);
 299         out[i + 21] = in[3] >> (8 * i);
 300     }
 301 }
 302
 303 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
 304 static void flip_endian(u8 *out, const u8 *in, unsigned len)
 305 {
 306     unsigned i;
 307     for (i = 0; i < len; ++i)
 308         out[i] = in[len - 1 - i];
 309 }
 310
 311 /* From OpenSSL BIGNUM to internal representation */
 312 static int BN_to_felem(felem out, const BIGNUM *bn)
 313 {
 314     felem_bytearray b_in;
 315     felem_bytearray b_out;
 316     unsigned num_bytes;
 317
 318     /* BN_bn2bin eats leading zeroes */
 319     memset(b_out, 0, sizeof(b_out));
 320     num_bytes = BN_num_bytes(bn);
 321     if (num_bytes > sizeof b_out) {
 322         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 323         return 0;
 324     }
 325     if (BN_is_negative(bn)) {
 326         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 327         return 0;
 328     }
 329     num_bytes = BN_bn2bin(bn, b_in);
 330     flip_endian(b_out, b_in, num_bytes);
 331     bin28_to_felem(out, b_out);
 332     return 1;
 333 }
 334
 335 /* From internal representation to OpenSSL BIGNUM */
 336 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
 337 {
 338     felem_bytearray b_in, b_out;
 339     felem_to_bin28(b_in, in);
 340     flip_endian(b_out, b_in, sizeof b_out);
 341     return BN_bin2bn(b_out, sizeof b_out, out);
 342 }
 343
 344 /******************************************************************************/
 345 /*-
 346  *                              FIELD OPERATIONS
 347  *
 348  * Field operations, using the internal representation of field elements.
 349  * NB! These operations are specific to our point multiplication and cannot be
 350  * expected to be correct in general - e.g., multiplication with a large scalar
 351  * will cause an overflow.
 352  *
 353  */
 354
 355 static void felem_one(felem out)
 356 {
 357     out[0] = 1;
 358     out[1] = 0;
 359     out[2] = 0;
 360     out[3] = 0;
 361 }
 362
 363 static void felem_assign(felem out, const felem in)
 364 {
 365     out[0] = in[0];
 366     out[1] = in[1];
 367     out[2] = in[2];
 368     out[3] = in[3];
 369 }
 370
 371 /* Sum two field elements: out += in */
 372 static void felem_sum(felem out, const felem in)
 373 {
 374     out[0] += in[0];
 375     out[1] += in[1];
 376     out[2] += in[2];
 377     out[3] += in[3];
 378 }
 379
 380 /* Get negative value: out = -in */
 381 /* Assumes in[i] < 2^57 */
 382 static void felem_neg(felem out, const felem in)
 383 {
 384     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 385     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 386     static const limb two58m42m2 = (((limb) 1) << 58) -
 387         (((limb) 1) << 42) - (((limb) 1) << 2);
 388
 389     /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
 390     out[0] = two58p2 - in[0];
 391     out[1] = two58m42m2 - in[1];
 392     out[2] = two58m2 - in[2];
 393     out[3] = two58m2 - in[3];
 394 }
 395
 396 /* Subtract field elements: out -= in */
 397 /* Assumes in[i] < 2^57 */
 398 static void felem_diff(felem out, const felem in)
 399 {
 400     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 401     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 402     static const limb two58m42m2 = (((limb) 1) << 58) -
 403         (((limb) 1) << 42) - (((limb) 1) << 2);
 404
 405     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 406     out[0] += two58p2;
 407     out[1] += two58m42m2;
 408     out[2] += two58m2;
 409     out[3] += two58m2;
 410
 411     out[0] -= in[0];
 412     out[1] -= in[1];
 413     out[2] -= in[2];
 414     out[3] -= in[3];
 415 }
 416
 417 /* Subtract in unreduced 128-bit mode: out -= in */
 418 /* Assumes in[i] < 2^119 */
 419 static void widefelem_diff(widefelem out, const widefelem in)
 420 {
 421     static const widelimb two120 = ((widelimb) 1) << 120;
 422     static const widelimb two120m64 = (((widelimb) 1) << 120) -
 423         (((widelimb) 1) << 64);
 424     static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
 425         (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
 426
 427     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 428     out[0] += two120;
 429     out[1] += two120m64;
 430     out[2] += two120m64;
 431     out[3] += two120;
 432     out[4] += two120m104m64;
 433     out[5] += two120m64;
 434     out[6] += two120m64;
 435
 436     out[0] -= in[0];
 437     out[1] -= in[1];
 438     out[2] -= in[2];
 439     out[3] -= in[3];
 440     out[4] -= in[4];
 441     out[5] -= in[5];
 442     out[6] -= in[6];
 443 }
 444
 445 /* Subtract in mixed mode: out128 -= in64 */
 446 /* in[i] < 2^63 */
 447 static void felem_diff_128_64(widefelem out, const felem in)
 448 {
 449     static const widelimb two64p8 = (((widelimb) 1) << 64) +
 450         (((widelimb) 1) << 8);
 451     static const widelimb two64m8 = (((widelimb) 1) << 64) -
 452         (((widelimb) 1) << 8);
 453     static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
 454         (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
 455
 456     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 457     out[0] += two64p8;
 458     out[1] += two64m48m8;
 459     out[2] += two64m8;
 460     out[3] += two64m8;
 461
 462     out[0] -= in[0];
 463     out[1] -= in[1];
 464     out[2] -= in[2];
 465     out[3] -= in[3];
 466 }
 467
 468 /*
 469  * Multiply a field element by a scalar: out = out * scalar The scalars we
 470  * actually use are small, so results fit without overflow
 471  */
 472 static void felem_scalar(felem out, const limb scalar)
 473 {
 474     out[0] *= scalar;
 475     out[1] *= scalar;
 476     out[2] *= scalar;
 477     out[3] *= scalar;
 478 }
 479
 480 /*
 481  * Multiply an unreduced field element by a scalar: out = out * scalar The
 482  * scalars we actually use are small, so results fit without overflow
 483  */
 484 static void widefelem_scalar(widefelem out, const widelimb scalar)
 485 {
 486     out[0] *= scalar;
 487     out[1] *= scalar;
 488     out[2] *= scalar;
 489     out[3] *= scalar;
 490     out[4] *= scalar;
 491     out[5] *= scalar;
 492     out[6] *= scalar;
 493 }
 494
 495 /* Square a field element: out = in^2 */
 496 static void felem_square(widefelem out, const felem in)
 497 {
 498     limb tmp0, tmp1, tmp2;
 499     tmp0 = 2 * in[0];
 500     tmp1 = 2 * in[1];
 501     tmp2 = 2 * in[2];
 502     out[0] = ((widelimb) in[0]) * in[0];
 503     out[1] = ((widelimb) in[0]) * tmp1;
 504     out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
 505     out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
 506     out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
 507     out[5] = ((widelimb) in[3]) * tmp2;
 508     out[6] = ((widelimb) in[3]) * in[3];
 509 }
 510
 511 /* Multiply two field elements: out = in1 * in2 */
 512 static void felem_mul(widefelem out, const felem in1, const felem in2)
 513 {
 514     out[0] = ((widelimb) in1[0]) * in2[0];
 515     out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
 516     out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
 517              ((widelimb) in1[2]) * in2[0];
 518     out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
 519              ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
 520     out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
 521              ((widelimb) in1[3]) * in2[1];
 522     out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
 523     out[6] = ((widelimb) in1[3]) * in2[3];
 524 }
 525
 526 /*-
 527  * Reduce seven 128-bit coefficients to four 64-bit coefficients.
 528  * Requires in[i] < 2^126,
 529  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
 530 static void felem_reduce(felem out, const widefelem in)
 531 {
 532     static const widelimb two127p15 = (((widelimb) 1) << 127) +
 533         (((widelimb) 1) << 15);
 534     static const widelimb two127m71 = (((widelimb) 1) << 127) -
 535         (((widelimb) 1) << 71);
 536     static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
 537         (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
 538     widelimb output[5];
 539
 540     /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
 541     output[0] = in[0] + two127p15;
 542     output[1] = in[1] + two127m71m55;
 543     output[2] = in[2] + two127m71;
 544     output[3] = in[3];
 545     output[4] = in[4];
 546
 547     /* Eliminate in[4], in[5], in[6] */
 548     output[4] += in[6] >> 16;
 549     output[3] += (in[6] & 0xffff) << 40;
 550     output[2] -= in[6];
 551
 552     output[3] += in[5] >> 16;
 553     output[2] += (in[5] & 0xffff) << 40;
 554     output[1] -= in[5];
 555
 556     output[2] += output[4] >> 16;
 557     output[1] += (output[4] & 0xffff) << 40;
 558     output[0] -= output[4];
 559
 560     /* Carry 2 -> 3 -> 4 */
 561     output[3] += output[2] >> 56;
 562     output[2] &= 0x00ffffffffffffff;
 563
 564     output[4] = output[3] >> 56;
 565     output[3] &= 0x00ffffffffffffff;
 566
 567     /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
 568
 569     /* Eliminate output[4] */
 570     output[2] += output[4] >> 16;
 571     /* output[2] < 2^56 + 2^56 = 2^57 */
 572     output[1] += (output[4] & 0xffff) << 40;
 573     output[0] -= output[4];
 574
 575     /* Carry 0 -> 1 -> 2 -> 3 */
 576     output[1] += output[0] >> 56;
 577     out[0] = output[0] & 0x00ffffffffffffff;
 578
 579     output[2] += output[1] >> 56;
 580     /* output[2] < 2^57 + 2^72 */
 581     out[1] = output[1] & 0x00ffffffffffffff;
 582     output[3] += output[2] >> 56;
 583     /* output[3] <= 2^56 + 2^16 */
 584     out[2] = output[2] & 0x00ffffffffffffff;
 585
 586     /*-
 587      * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
 588      * out[3] <= 2^56 + 2^16 (due to final carry),
 589      * so out < 2*p
 590      */
 591     out[3] = output[3];
 592 }
 593
 594 static void felem_square_reduce(felem out, const felem in)
 595 {
 596     widefelem tmp;
 597     felem_square(tmp, in);
 598     felem_reduce(out, tmp);
 599 }
 600
 601 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
 602 {
 603     widefelem tmp;
 604     felem_mul(tmp, in1, in2);
 605     felem_reduce(out, tmp);
 606 }
 607
 608 /*
 609  * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
 610  * call felem_reduce first)
 611  */
 612 static void felem_contract(felem out, const felem in)
 613 {
 614     static const int64_t two56 = ((limb) 1) << 56;
 615     /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
 616     /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
 617     int64_t tmp[4], a;
 618     tmp[0] = in[0];
 619     tmp[1] = in[1];
 620     tmp[2] = in[2];
 621     tmp[3] = in[3];
 622     /* Case 1: a = 1 iff in >= 2^224 */
 623     a = (in[3] >> 56);
 624     tmp[0] -= a;
 625     tmp[1] += a << 40;
 626     tmp[3] &= 0x00ffffffffffffff;
 627     /*
 628      * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
 629      * and the lower part is non-zero
 630      */
 631     a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
 632         (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
 633     a &= 0x00ffffffffffffff;
 634     /* turn a into an all-one mask (if a = 0) or an all-zero mask */
 635     a = (a - 1) >> 63;
 636     /* subtract 2^224 - 2^96 + 1 if a is all-one */
 637     tmp[3] &= a ^ 0xffffffffffffffff;
 638     tmp[2] &= a ^ 0xffffffffffffffff;
 639     tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
 640     tmp[0] -= 1 & a;
 641
 642     /*
 643      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
 644      * non-zero, so we only need one step
 645      */
 646     a = tmp[0] >> 63;
 647     tmp[0] += two56 & a;
 648     tmp[1] -= 1 & a;
 649
 650     /* carry 1 -> 2 -> 3 */
 651     tmp[2] += tmp[1] >> 56;
 652     tmp[1] &= 0x00ffffffffffffff;
 653
 654     tmp[3] += tmp[2] >> 56;
 655     tmp[2] &= 0x00ffffffffffffff;
 656
 657     /* Now 0 <= out < p */
 658     out[0] = tmp[0];
 659     out[1] = tmp[1];
 660     out[2] = tmp[2];
 661     out[3] = tmp[3];
 662 }
 663
 664 /*
 665  * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
 666  * elements are reduced to in < 2^225, so we only need to check three cases:
 667  * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
 668  */
 669 static limb felem_is_zero(const felem in)
 670 {
 671     limb zero, two224m96p1, two225m97p2;
 672
 673     zero = in[0] | in[1] | in[2] | in[3];
 674     zero = (((int64_t) (zero) - 1) >> 63) & 1;
 675     two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
 676         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
 677     two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
 678     two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
 679         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
 680     two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
 681     return (zero | two224m96p1 | two225m97p2);
 682 }
 683
 684 static limb felem_is_zero_int(const felem in)
 685 {
 686     return (int)(felem_is_zero(in) & ((limb) 1));
 687 }
 688
 689 /* Invert a field element */
 690 /* Computation chain copied from djb's code */
 691 static void felem_inv(felem out, const felem in)
 692 {
 693     felem ftmp, ftmp2, ftmp3, ftmp4;
 694     widefelem tmp;
 695     unsigned i;
 696
 697     felem_square(tmp, in);
 698     felem_reduce(ftmp, tmp);    /* 2 */
 699     felem_mul(tmp, in, ftmp);
 700     felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
 701     felem_square(tmp, ftmp);
 702     felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
 703     felem_mul(tmp, in, ftmp);
 704     felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
 705     felem_square(tmp, ftmp);
 706     felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
 707     felem_square(tmp, ftmp2);
 708     felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
 709     felem_square(tmp, ftmp2);
 710     felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
 711     felem_mul(tmp, ftmp2, ftmp);
 712     felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
 713     felem_square(tmp, ftmp);
 714     felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
 715     for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
 716         felem_square(tmp, ftmp2);
 717         felem_reduce(ftmp2, tmp);
 718     }
 719     felem_mul(tmp, ftmp2, ftmp);
 720     felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
 721     felem_square(tmp, ftmp2);
 722     felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
 723     for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
 724         felem_square(tmp, ftmp3);
 725         felem_reduce(ftmp3, tmp);
 726     }
 727     felem_mul(tmp, ftmp3, ftmp2);
 728     felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
 729     felem_square(tmp, ftmp2);
 730     felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
 731     for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
 732         felem_square(tmp, ftmp3);
 733         felem_reduce(ftmp3, tmp);
 734     }
 735     felem_mul(tmp, ftmp3, ftmp2);
 736     felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
 737     felem_square(tmp, ftmp3);
 738     felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
 739     for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
 740         felem_square(tmp, ftmp4);
 741         felem_reduce(ftmp4, tmp);
 742     }
 743     felem_mul(tmp, ftmp3, ftmp4);
 744     felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
 745     felem_square(tmp, ftmp3);
 746     felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
 747     for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
 748         felem_square(tmp, ftmp4);
 749         felem_reduce(ftmp4, tmp);
 750     }
 751     felem_mul(tmp, ftmp2, ftmp4);
 752     felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
 753     for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
 754         felem_square(tmp, ftmp2);
 755         felem_reduce(ftmp2, tmp);
 756     }
 757     felem_mul(tmp, ftmp2, ftmp);
 758     felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
 759     felem_square(tmp, ftmp);
 760     felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
 761     felem_mul(tmp, ftmp, in);
 762     felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
 763     for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
 764         felem_square(tmp, ftmp);
 765         felem_reduce(ftmp, tmp);
 766     }
 767     felem_mul(tmp, ftmp, ftmp3);
 768     felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
 769 }
 770
 771 /*
 772  * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
 773  * out to itself.
 774  */
 775 static void copy_conditional(felem out, const felem in, limb icopy)
 776 {
 777     unsigned i;
 778     /*
 779      * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
 780      */
 781     const limb copy = -icopy;
 782     for (i = 0; i < 4; ++i) {
 783         const limb tmp = copy & (in[i] ^ out[i]);
 784         out[i] ^= tmp;
 785     }
 786 }
 787
 788 /******************************************************************************/
 789 /*-
 790  *                       ELLIPTIC CURVE POINT OPERATIONS
 791  *
 792  * Points are represented in Jacobian projective coordinates:
 793  * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
 794  * or to the point at infinity if Z == 0.
 795  *
 796  */
 797
 798 /*-
 799  * Double an elliptic curve point:
 800  * (X', Y', Z') = 2 * (X, Y, Z), where
 801  * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
 802  * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
 803  * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
 804  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
 805  * while x_out == y_in is not (maybe this works, but it's not tested).
 806  */
 807 static void
 808 point_double(felem x_out, felem y_out, felem z_out,
 809              const felem x_in, const felem y_in, const felem z_in)
 810 {
 811     widefelem tmp, tmp2;
 812     felem delta, gamma, beta, alpha, ftmp, ftmp2;
 813
 814     felem_assign(ftmp, x_in);
 815     felem_assign(ftmp2, x_in);
 816
 817     /* delta = z^2 */
 818     felem_square(tmp, z_in);
 819     felem_reduce(delta, tmp);
 820
 821     /* gamma = y^2 */
 822     felem_square(tmp, y_in);
 823     felem_reduce(gamma, tmp);
 824
 825     /* beta = x*gamma */
 826     felem_mul(tmp, x_in, gamma);
 827     felem_reduce(beta, tmp);
 828
 829     /* alpha = 3*(x-delta)*(x+delta) */
 830     felem_diff(ftmp, delta);
 831     /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
 832     felem_sum(ftmp2, delta);
 833     /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
 834     felem_scalar(ftmp2, 3);
 835     /* ftmp2[i] < 3 * 2^58 < 2^60 */
 836     felem_mul(tmp, ftmp, ftmp2);
 837     /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
 838     felem_reduce(alpha, tmp);
 839
 840     /* x' = alpha^2 - 8*beta */
 841     felem_square(tmp, alpha);
 842     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 843     felem_assign(ftmp, beta);
 844     felem_scalar(ftmp, 8);
 845     /* ftmp[i] < 8 * 2^57 = 2^60 */
 846     felem_diff_128_64(tmp, ftmp);
 847     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 848     felem_reduce(x_out, tmp);
 849
 850     /* z' = (y + z)^2 - gamma - delta */
 851     felem_sum(delta, gamma);
 852     /* delta[i] < 2^57 + 2^57 = 2^58 */
 853     felem_assign(ftmp, y_in);
 854     felem_sum(ftmp, z_in);
 855     /* ftmp[i] < 2^57 + 2^57 = 2^58 */
 856     felem_square(tmp, ftmp);
 857     /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
 858     felem_diff_128_64(tmp, delta);
 859     /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
 860     felem_reduce(z_out, tmp);
 861
 862     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
 863     felem_scalar(beta, 4);
 864     /* beta[i] < 4 * 2^57 = 2^59 */
 865     felem_diff(beta, x_out);
 866     /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
 867     felem_mul(tmp, alpha, beta);
 868     /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
 869     felem_square(tmp2, gamma);
 870     /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
 871     widefelem_scalar(tmp2, 8);
 872     /* tmp2[i] < 8 * 2^116 = 2^119 */
 873     widefelem_diff(tmp, tmp2);
 874     /* tmp[i] < 2^119 + 2^120 < 2^121 */
 875     felem_reduce(y_out, tmp);
 876 }
 877
 878 /*-
 879  * Add two elliptic curve points:
 880  * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
 881  * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
 882  * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
 883  * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
 884  *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
 885  * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
 886  *
 887  * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
 888  */
 889
 890 /*
 891  * This function is not entirely constant-time: it includes a branch for
 892  * checking whether the two input points are equal, (while not equal to the
 893  * point at infinity). This case never happens during single point
 894  * multiplication, so there is no timing leak for ECDH or ECDSA signing.
 895  */
 896 static void point_add(felem x3, felem y3, felem z3,
 897                       const felem x1, const felem y1, const felem z1,
 898                       const int mixed, const felem x2, const felem y2,
 899                       const felem z2)
 900 {
 901     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
 902     widefelem tmp, tmp2;
 903     limb z1_is_zero, z2_is_zero, x_equal, y_equal;
 904
 905     if (!mixed) {
 906         /* ftmp2 = z2^2 */
 907         felem_square(tmp, z2);
 908         felem_reduce(ftmp2, tmp);
 909
 910         /* ftmp4 = z2^3 */
 911         felem_mul(tmp, ftmp2, z2);
 912         felem_reduce(ftmp4, tmp);
 913
 914         /* ftmp4 = z2^3*y1 */
 915         felem_mul(tmp2, ftmp4, y1);
 916         felem_reduce(ftmp4, tmp2);
 917
 918         /* ftmp2 = z2^2*x1 */
 919         felem_mul(tmp2, ftmp2, x1);
 920         felem_reduce(ftmp2, tmp2);
 921     } else {
 922         /*
 923          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
 924          */
 925
 926         /* ftmp4 = z2^3*y1 */
 927         felem_assign(ftmp4, y1);
 928
 929         /* ftmp2 = z2^2*x1 */
 930         felem_assign(ftmp2, x1);
 931     }
 932
 933     /* ftmp = z1^2 */
 934     felem_square(tmp, z1);
 935     felem_reduce(ftmp, tmp);
 936
 937     /* ftmp3 = z1^3 */
 938     felem_mul(tmp, ftmp, z1);
 939     felem_reduce(ftmp3, tmp);
 940
 941     /* tmp = z1^3*y2 */
 942     felem_mul(tmp, ftmp3, y2);
 943     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 944
 945     /* ftmp3 = z1^3*y2 - z2^3*y1 */
 946     felem_diff_128_64(tmp, ftmp4);
 947     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 948     felem_reduce(ftmp3, tmp);
 949
 950     /* tmp = z1^2*x2 */
 951     felem_mul(tmp, ftmp, x2);
 952     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 953
 954     /* ftmp = z1^2*x2 - z2^2*x1 */
 955     felem_diff_128_64(tmp, ftmp2);
 956     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 957     felem_reduce(ftmp, tmp);
 958
 959     /*
 960      * the formulae are incorrect if the points are equal so we check for
 961      * this and do doubling if this happens
 962      */
 963     x_equal = felem_is_zero(ftmp);
 964     y_equal = felem_is_zero(ftmp3);
 965     z1_is_zero = felem_is_zero(z1);
 966     z2_is_zero = felem_is_zero(z2);
 967     /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
 968     if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
 969         point_double(x3, y3, z3, x1, y1, z1);
 970         return;
 971     }
 972
 973     /* ftmp5 = z1*z2 */
 974     if (!mixed) {
 975         felem_mul(tmp, z1, z2);
 976         felem_reduce(ftmp5, tmp);
 977     } else {
 978         /* special case z2 = 0 is handled later */
 979         felem_assign(ftmp5, z1);
 980     }
 981
 982     /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
 983     felem_mul(tmp, ftmp, ftmp5);
 984     felem_reduce(z_out, tmp);
 985
 986     /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
 987     felem_assign(ftmp5, ftmp);
 988     felem_square(tmp, ftmp);
 989     felem_reduce(ftmp, tmp);
 990
 991     /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
 992     felem_mul(tmp, ftmp, ftmp5);
 993     felem_reduce(ftmp5, tmp);
 994
 995     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
 996     felem_mul(tmp, ftmp2, ftmp);
 997     felem_reduce(ftmp2, tmp);
 998
 999     /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1000     felem_mul(tmp, ftmp4, ftmp5);
1001     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1002
1003     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1004     felem_square(tmp2, ftmp3);
1005     /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1006
1007     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1008     felem_diff_128_64(tmp2, ftmp5);
1009     /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1010
1011     /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1012     felem_assign(ftmp5, ftmp2);
1013     felem_scalar(ftmp5, 2);
1014     /* ftmp5[i] < 2 * 2^57 = 2^58 */
1015
1016     /*-
1017      * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1018      *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1019      */
1020     felem_diff_128_64(tmp2, ftmp5);
1021     /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1022     felem_reduce(x_out, tmp2);
1023
1024     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1025     felem_diff(ftmp2, x_out);
1026     /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1027
1028     /*
1029      * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1030      */
1031     felem_mul(tmp2, ftmp3, ftmp2);
1032     /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1033
1034     /*-
1035      * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1036      *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1037      */
1038     widefelem_diff(tmp2, tmp);
1039     /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1040     felem_reduce(y_out, tmp2);
1041
1042     /*
1043      * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1044      * the point at infinity, so we need to check for this separately
1045      */
1046
1047     /*
1048      * if point 1 is at infinity, copy point 2 to output, and vice versa
1049      */
1050     copy_conditional(x_out, x2, z1_is_zero);
1051     copy_conditional(x_out, x1, z2_is_zero);
1052     copy_conditional(y_out, y2, z1_is_zero);
1053     copy_conditional(y_out, y1, z2_is_zero);
1054     copy_conditional(z_out, z2, z1_is_zero);
1055     copy_conditional(z_out, z1, z2_is_zero);
1056     felem_assign(x3, x_out);
1057     felem_assign(y3, y_out);
1058     felem_assign(z3, z_out);
1059 }
1060
1061 /*
1062  * select_point selects the |idx|th point from a precomputation table and
1063  * copies it to out.
1064  * The pre_comp array argument should be size of |size| argument
1065  */
1066 static void select_point(const u64 idx, unsigned int size,
1067                          const felem pre_comp[][3], felem out[3])
1068 {
1069     unsigned i, j;
1070     limb *outlimbs = &out[0][0];
1071
1072     memset(out, 0, sizeof(*out) * 3);
1073     for (i = 0; i < size; i++) {
1074         const limb *inlimbs = &pre_comp[i][0][0];
1075         u64 mask = i ^ idx;
1076         mask |= mask >> 4;
1077         mask |= mask >> 2;
1078         mask |= mask >> 1;
1079         mask &= 1;
1080         mask--;
1081         for (j = 0; j < 4 * 3; j++)
1082             outlimbs[j] |= inlimbs[j] & mask;
1083     }
1084 }
1085
1086 /* get_bit returns the |i|th bit in |in| */
1087 static char get_bit(const felem_bytearray in, unsigned i)
1088 {
1089     if (i >= 224)
1090         return 0;
1091     return (in[i >> 3] >> (i & 7)) & 1;
1092 }
1093
1094 /*
1095  * Interleaved point multiplication using precomputed point multiples: The
1096  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1097  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1098  * generator, using certain (large) precomputed multiples in g_pre_comp.
1099  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1100  */
1101 static void batch_mul(felem x_out, felem y_out, felem z_out,
1102                       const felem_bytearray scalars[],
1103                       const unsigned num_points, const u8 *g_scalar,
1104                       const int mixed, const felem pre_comp[][17][3],
1105                       const felem g_pre_comp[2][16][3])
1106 {
1107     int i, skip;
1108     unsigned num;
1109     unsigned gen_mul = (g_scalar != NULL);
1110     felem nq[3], tmp[4];
1111     u64 bits;
1112     u8 sign, digit;
1113
1114     /* set nq to the point at infinity */
1115     memset(nq, 0, sizeof(nq));
1116
1117     /*
1118      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1119      * of the generator (two in each of the last 28 rounds) and additions of
1120      * other points multiples (every 5th round).
1121      */
1122     skip = 1;                   /* save two point operations in the first
1123                                  * round */
1124     for (i = (num_points ? 220 : 27); i >= 0; --i) {
1125         /* double */
1126         if (!skip)
1127             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1128
1129         /* add multiples of the generator */
1130         if (gen_mul && (i <= 27)) {
1131             /* first, look 28 bits upwards */
1132             bits = get_bit(g_scalar, i + 196) << 3;
1133             bits |= get_bit(g_scalar, i + 140) << 2;
1134             bits |= get_bit(g_scalar, i + 84) << 1;
1135             bits |= get_bit(g_scalar, i + 28);
1136             /* select the point to add, in constant time */
1137             select_point(bits, 16, g_pre_comp[1], tmp);
1138
1139             if (!skip) {
1140                 /* value 1 below is argument for "mixed" */
1141                 point_add(nq[0], nq[1], nq[2],
1142                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1143             } else {
1144                 memcpy(nq, tmp, 3 * sizeof(felem));
1145                 skip = 0;
1146             }
1147
1148             /* second, look at the current position */
1149             bits = get_bit(g_scalar, i + 168) << 3;
1150             bits |= get_bit(g_scalar, i + 112) << 2;
1151             bits |= get_bit(g_scalar, i + 56) << 1;
1152             bits |= get_bit(g_scalar, i);
1153             /* select the point to add, in constant time */
1154             select_point(bits, 16, g_pre_comp[0], tmp);
1155             point_add(nq[0], nq[1], nq[2],
1156                       nq[0], nq[1], nq[2],
1157                       1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1158         }
1159
1160         /* do other additions every 5 doublings */
1161         if (num_points && (i % 5 == 0)) {
1162             /* loop over all scalars */
1163             for (num = 0; num < num_points; ++num) {
1164                 bits = get_bit(scalars[num], i + 4) << 5;
1165                 bits |= get_bit(scalars[num], i + 3) << 4;
1166                 bits |= get_bit(scalars[num], i + 2) << 3;
1167                 bits |= get_bit(scalars[num], i + 1) << 2;
1168                 bits |= get_bit(scalars[num], i) << 1;
1169                 bits |= get_bit(scalars[num], i - 1);
1170                 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1171
1172                 /* select the point to add or subtract */
1173                 select_point(digit, 17, pre_comp[num], tmp);
1174                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1175                                             * point */
1176                 copy_conditional(tmp[1], tmp[3], sign);
1177
1178                 if (!skip) {
1179                     point_add(nq[0], nq[1], nq[2],
1180                               nq[0], nq[1], nq[2],
1181                               mixed, tmp[0], tmp[1], tmp[2]);
1182                 } else {
1183                     memcpy(nq, tmp, 3 * sizeof(felem));
1184                     skip = 0;
1185                 }
1186             }
1187         }
1188     }
1189     felem_assign(x_out, nq[0]);
1190     felem_assign(y_out, nq[1]);
1191     felem_assign(z_out, nq[2]);
1192 }
1193
1194 /******************************************************************************/
1195 /*
1196  * FUNCTIONS TO MANAGE PRECOMPUTATION
1197  */
1198
1199 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1200 {
1201     NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1202
1203     if (!ret) {
1204         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1205         return ret;
1206     }
1207     ret->references = 1;
1208     return ret;
1209 }
1210
1211 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1212 {
1213     if (p != NULL)
1214         CRYPTO_add(&p->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1215     return p;
1216 }
1217
1218 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1219 {
1220     if (p == NULL
1221         || CRYPTO_add(&p->references, -1, CRYPTO_LOCK_EC_PRE_COMP) > 0)
1222         return;
1223     OPENSSL_free(p);
1224 }
1225
1226 /******************************************************************************/
1227 /*
1228  * OPENSSL EC_METHOD FUNCTIONS
1229  */
1230
1231 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1232 {
1233     int ret;
1234     ret = ec_GFp_simple_group_init(group);
1235     group->a_is_minus3 = 1;
1236     return ret;
1237 }
1238
1239 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1240                                     const BIGNUM *a, const BIGNUM *b,
1241                                     BN_CTX *ctx)
1242 {
1243     int ret = 0;
1244     BN_CTX *new_ctx = NULL;
1245     BIGNUM *curve_p, *curve_a, *curve_b;
1246
1247     if (ctx == NULL)
1248         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1249             return 0;
1250     BN_CTX_start(ctx);
1251     if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1252         ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1253         ((curve_b = BN_CTX_get(ctx)) == NULL))
1254         goto err;
1255     BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1256     BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1257     BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1258     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1259         ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1260               EC_R_WRONG_CURVE_PARAMETERS);
1261         goto err;
1262     }
1263     group->field_mod_func = BN_nist_mod_224;
1264     ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1265  err:
1266     BN_CTX_end(ctx);
1267     BN_CTX_free(new_ctx);
1268     return ret;
1269 }
1270
1271 /*
1272  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1273  * (X/Z^2, Y/Z^3)
1274  */
1275 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1276                                                  const EC_POINT *point,
1277                                                  BIGNUM *x, BIGNUM *y,
1278                                                  BN_CTX *ctx)
1279 {
1280     felem z1, z2, x_in, y_in, x_out, y_out;
1281     widefelem tmp;
1282
1283     if (EC_POINT_is_at_infinity(group, point)) {
1284         ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1285               EC_R_POINT_AT_INFINITY);
1286         return 0;
1287     }
1288     if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1289         (!BN_to_felem(z1, point->Z)))
1290         return 0;
1291     felem_inv(z2, z1);
1292     felem_square(tmp, z2);
1293     felem_reduce(z1, tmp);
1294     felem_mul(tmp, x_in, z1);
1295     felem_reduce(x_in, tmp);
1296     felem_contract(x_out, x_in);
1297     if (x != NULL) {
1298         if (!felem_to_BN(x, x_out)) {
1299             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1300                   ERR_R_BN_LIB);
1301             return 0;
1302         }
1303     }
1304     felem_mul(tmp, z1, z2);
1305     felem_reduce(z1, tmp);
1306     felem_mul(tmp, y_in, z1);
1307     felem_reduce(y_in, tmp);
1308     felem_contract(y_out, y_in);
1309     if (y != NULL) {
1310         if (!felem_to_BN(y, y_out)) {
1311             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1312                   ERR_R_BN_LIB);
1313             return 0;
1314         }
1315     }
1316     return 1;
1317 }
1318
1319 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1320                                felem tmp_felems[ /* num+1 */ ])
1321 {
1322     /*
1323      * Runs in constant time, unless an input is the point at infinity (which
1324      * normally shouldn't happen).
1325      */
1326     ec_GFp_nistp_points_make_affine_internal(num,
1327                                              points,
1328                                              sizeof(felem),
1329                                              tmp_felems,
1330                                              (void (*)(void *))felem_one,
1331                                              (int (*)(const void *))
1332                                              felem_is_zero_int,
1333                                              (void (*)(void *, const void *))
1334                                              felem_assign,
1335                                              (void (*)(void *, const void *))
1336                                              felem_square_reduce, (void (*)
1337                                                                    (void *,
1338                                                                     const void
1339                                                                     *,
1340                                                                     const void
1341                                                                     *))
1342                                              felem_mul_reduce,
1343                                              (void (*)(void *, const void *))
1344                                              felem_inv,
1345                                              (void (*)(void *, const void *))
1346                                              felem_contract);
1347 }
1348
1349 /*
1350  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1351  * values Result is stored in r (r can equal one of the inputs).
1352  */
1353 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1354                                const BIGNUM *scalar, size_t num,
1355                                const EC_POINT *points[],
1356                                const BIGNUM *scalars[], BN_CTX *ctx)
1357 {
1358     int ret = 0;
1359     int j;
1360     unsigned i;
1361     int mixed = 0;
1362     BN_CTX *new_ctx = NULL;
1363     BIGNUM *x, *y, *z, *tmp_scalar;
1364     felem_bytearray g_secret;
1365     felem_bytearray *secrets = NULL;
1366     felem (*pre_comp)[17][3] = NULL;
1367     felem *tmp_felems = NULL;
1368     felem_bytearray tmp;
1369     unsigned num_bytes;
1370     int have_pre_comp = 0;
1371     size_t num_points = num;
1372     felem x_in, y_in, z_in, x_out, y_out, z_out;
1373     NISTP224_PRE_COMP *pre = NULL;
1374     const felem(*g_pre_comp)[16][3] = NULL;
1375     EC_POINT *generator = NULL;
1376     const EC_POINT *p = NULL;
1377     const BIGNUM *p_scalar = NULL;
1378
1379     if (ctx == NULL)
1380         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1381             return 0;
1382     BN_CTX_start(ctx);
1383     if (((x = BN_CTX_get(ctx)) == NULL) ||
1384         ((y = BN_CTX_get(ctx)) == NULL) ||
1385         ((z = BN_CTX_get(ctx)) == NULL) ||
1386         ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1387         goto err;
1388
1389     if (scalar != NULL) {
1390         pre = group->pre_comp.nistp224;
1391         if (pre)
1392             /* we have precomputation, try to use it */
1393             g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1394         else
1395             /* try to use the standard precomputation */
1396             g_pre_comp = &gmul[0];
1397         generator = EC_POINT_new(group);
1398         if (generator == NULL)
1399             goto err;
1400         /* get the generator from precomputation */
1401         if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1402             !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1403             !felem_to_BN(z, g_pre_comp[0][1][2])) {
1404             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1405             goto err;
1406         }
1407         if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1408                                                       generator, x, y, z,
1409                                                       ctx))
1410             goto err;
1411         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1412             /* precomputation matches generator */
1413             have_pre_comp = 1;
1414         else
1415             /*
1416              * we don't have valid precomputation: treat the generator as a
1417              * random point
1418              */
1419             num_points = num_points + 1;
1420     }
1421
1422     if (num_points > 0) {
1423         if (num_points >= 3) {
1424             /*
1425              * unless we precompute multiples for just one or two points,
1426              * converting those into affine form is time well spent
1427              */
1428             mixed = 1;
1429         }
1430         secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1431         pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1432         if (mixed)
1433             tmp_felems =
1434                 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1435         if ((secrets == NULL) || (pre_comp == NULL)
1436             || (mixed && (tmp_felems == NULL))) {
1437             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1438             goto err;
1439         }
1440
1441         /*
1442          * we treat NULL scalars as 0, and NULL points as points at infinity,
1443          * i.e., they contribute nothing to the linear combination
1444          */
1445         for (i = 0; i < num_points; ++i) {
1446             if (i == num)
1447                 /* the generator */
1448             {
1449                 p = EC_GROUP_get0_generator(group);
1450                 p_scalar = scalar;
1451             } else
1452                 /* the i^th point */
1453             {
1454                 p = points[i];
1455                 p_scalar = scalars[i];
1456             }
1457             if ((p_scalar != NULL) && (p != NULL)) {
1458                 /* reduce scalar to 0 <= scalar < 2^224 */
1459                 if ((BN_num_bits(p_scalar) > 224)
1460                     || (BN_is_negative(p_scalar))) {
1461                     /*
1462                      * this is an unusual input, and we don't guarantee
1463                      * constant-timeness
1464                      */
1465                     if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1466                         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1467                         goto err;
1468                     }
1469                     num_bytes = BN_bn2bin(tmp_scalar, tmp);
1470                 } else
1471                     num_bytes = BN_bn2bin(p_scalar, tmp);
1472                 flip_endian(secrets[i], tmp, num_bytes);
1473                 /* precompute multiples */
1474                 if ((!BN_to_felem(x_out, p->X)) ||
1475                     (!BN_to_felem(y_out, p->Y)) ||
1476                     (!BN_to_felem(z_out, p->Z)))
1477                     goto err;
1478                 felem_assign(pre_comp[i][1][0], x_out);
1479                 felem_assign(pre_comp[i][1][1], y_out);
1480                 felem_assign(pre_comp[i][1][2], z_out);
1481                 for (j = 2; j <= 16; ++j) {
1482                     if (j & 1) {
1483                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1484                                   pre_comp[i][j][2], pre_comp[i][1][0],
1485                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,
1486                                   pre_comp[i][j - 1][0],
1487                                   pre_comp[i][j - 1][1],
1488                                   pre_comp[i][j - 1][2]);
1489                     } else {
1490                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1491                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],
1492                                      pre_comp[i][j / 2][1],
1493                                      pre_comp[i][j / 2][2]);
1494                     }
1495                 }
1496             }
1497         }
1498         if (mixed)
1499             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1500     }
1501
1502     /* the scalar for the generator */
1503     if ((scalar != NULL) && (have_pre_comp)) {
1504         memset(g_secret, 0, sizeof(g_secret));
1505         /* reduce scalar to 0 <= scalar < 2^224 */
1506         if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1507             /*
1508              * this is an unusual input, and we don't guarantee
1509              * constant-timeness
1510              */
1511             if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1512                 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1513                 goto err;
1514             }
1515             num_bytes = BN_bn2bin(tmp_scalar, tmp);
1516         } else
1517             num_bytes = BN_bn2bin(scalar, tmp);
1518         flip_endian(g_secret, tmp, num_bytes);
1519         /* do the multiplication with generator precomputation */
1520         batch_mul(x_out, y_out, z_out,
1521                   (const felem_bytearray(*))secrets, num_points,
1522                   g_secret,
1523                   mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1524     } else
1525         /* do the multiplication without generator precomputation */
1526         batch_mul(x_out, y_out, z_out,
1527                   (const felem_bytearray(*))secrets, num_points,
1528                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1529     /* reduce the output to its unique minimal representation */
1530     felem_contract(x_in, x_out);
1531     felem_contract(y_in, y_out);
1532     felem_contract(z_in, z_out);
1533     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1534         (!felem_to_BN(z, z_in))) {
1535         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1536         goto err;
1537     }
1538     ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1539
1540  err:
1541     BN_CTX_end(ctx);
1542     EC_POINT_free(generator);
1543     BN_CTX_free(new_ctx);
1544     OPENSSL_free(secrets);
1545     OPENSSL_free(pre_comp);
1546     OPENSSL_free(tmp_felems);
1547     return ret;
1548 }
1549
1550 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1551 {
1552     int ret = 0;
1553     NISTP224_PRE_COMP *pre = NULL;
1554     int i, j;
1555     BN_CTX *new_ctx = NULL;
1556     BIGNUM *x, *y;
1557     EC_POINT *generator = NULL;
1558     felem tmp_felems[32];
1559
1560     /* throw away old precomputation */
1561     EC_pre_comp_free(group);
1562     if (ctx == NULL)
1563         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1564             return 0;
1565     BN_CTX_start(ctx);
1566     if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1567         goto err;
1568     /* get the generator */
1569     if (group->generator == NULL)
1570         goto err;
1571     generator = EC_POINT_new(group);
1572     if (generator == NULL)
1573         goto err;
1574     BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1575     BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1576     if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1577         goto err;
1578     if ((pre = nistp224_pre_comp_new()) == NULL)
1579         goto err;
1580     /*
1581      * if the generator is the standard one, use built-in precomputation
1582      */
1583     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1584         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1585         goto done;
1586     }
1587     if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1588         (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1589         (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1590         goto err;
1591     /*
1592      * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1593      * 2^140*G, 2^196*G for the second one
1594      */
1595     for (i = 1; i <= 8; i <<= 1) {
1596         point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1597                      pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1598                      pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1599         for (j = 0; j < 27; ++j) {
1600             point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1601                          pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1602                          pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1603         }
1604         if (i == 8)
1605             break;
1606         point_double(pre->g_pre_comp[0][2 * i][0],
1607                      pre->g_pre_comp[0][2 * i][1],
1608                      pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1609                      pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1610         for (j = 0; j < 27; ++j) {
1611             point_double(pre->g_pre_comp[0][2 * i][0],
1612                          pre->g_pre_comp[0][2 * i][1],
1613                          pre->g_pre_comp[0][2 * i][2],
1614                          pre->g_pre_comp[0][2 * i][0],
1615                          pre->g_pre_comp[0][2 * i][1],
1616                          pre->g_pre_comp[0][2 * i][2]);
1617         }
1618     }
1619     for (i = 0; i < 2; i++) {
1620         /* g_pre_comp[i][0] is the point at infinity */
1621         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1622         /* the remaining multiples */
1623         /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1624         point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1625                   pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1626                   pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1627                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1628                   pre->g_pre_comp[i][2][2]);
1629         /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1630         point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1631                   pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1632                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1633                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1634                   pre->g_pre_comp[i][2][2]);
1635         /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1636         point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1637                   pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1638                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1639                   0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1640                   pre->g_pre_comp[i][4][2]);
1641         /*
1642          * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1643          */
1644         point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1645                   pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1646                   pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1647                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1648                   pre->g_pre_comp[i][2][2]);
1649         for (j = 1; j < 8; ++j) {
1650             /* odd multiples: add G resp. 2^28*G */
1651             point_add(pre->g_pre_comp[i][2 * j + 1][0],
1652                       pre->g_pre_comp[i][2 * j + 1][1],
1653                       pre->g_pre_comp[i][2 * j + 1][2],
1654                       pre->g_pre_comp[i][2 * j][0],
1655                       pre->g_pre_comp[i][2 * j][1],
1656                       pre->g_pre_comp[i][2 * j][2], 0,
1657                       pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1658                       pre->g_pre_comp[i][1][2]);
1659         }
1660     }
1661     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1662
1663  done:
1664     SETPRECOMP(group, nistp224, pre);
1665     pre = NULL;
1666     ret = 1;
1667  err:
1668     BN_CTX_end(ctx);
1669     EC_POINT_free(generator);
1670     BN_CTX_free(new_ctx);
1671     EC_nistp224_pre_comp_free(pre);
1672     return ret;
1673 }
1674
1675 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1676 {
1677     return HAVEPRECOMP(group, nistp224);
1678 }
1679
1680 #else
1681 static void *dummy = &dummy;
1682 #endif