From: Matt Caswell <matt@openssl.org>
Date: Wed, 15 Nov 2017 15:59:27 +0000 (+0000)
Subject: Flatten the Curve 448 source structure
X-Git-Tag: OpenSSL_1_1_1-pre2~126
X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=abcd22bf621b25e5db724b0ad9bcb4bcc189b1d3;p=oweals%2Fopenssl.git

Flatten the Curve 448 source structure

Reviewed-by: Bernd Edlinger <bernd.edlinger@hotmail.de>
(Merged from https://github.com/openssl/openssl/pull/5105)
---

diff --git a/crypto/ec/curve448/GENERATED/c/ed448goldilocks/decaf.c b/crypto/ec/curve448/GENERATED/c/ed448goldilocks/decaf.c
deleted file mode 100644
index 3fdc491db6..0000000000
--- a/crypto/ec/curve448/GENERATED/c/ed448goldilocks/decaf.c
+++ /dev/null
@@ -1,1598 +0,0 @@
-/**
- * @file ed448goldilocks/decaf.c
- * @author Mike Hamburg
- *
- * @copyright
- *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
- *   Released under the MIT License.  See LICENSE.txt for license information.
- *
- * @brief Decaf high-level functions.
- *
- * @warning This file was automatically generated in Python.
- * Please do not edit it.
- */
-#define _XOPEN_SOURCE 600 /* for posix_memalign */
-#include "word.h"
-#include "field.h"
-
-#include <decaf.h>
-#include <decaf/ed448.h>
-
-/* Template stuff */
-#define API_NS(_id) decaf_448_##_id
-#define SCALAR_BITS DECAF_448_SCALAR_BITS
-#define SCALAR_SER_BYTES DECAF_448_SCALAR_BYTES
-#define SCALAR_LIMBS DECAF_448_SCALAR_LIMBS
-#define scalar_t API_NS(scalar_t)
-#define point_t API_NS(point_t)
-#define precomputed_s API_NS(precomputed_s)
-#define IMAGINE_TWIST 0
-#define COFACTOR 4
-
-/* Comb config: number of combs, n, t, s. */
-#define COMBS_N 5
-#define COMBS_T 5
-#define COMBS_S 18
-#define DECAF_WINDOW_BITS 5
-#define DECAF_WNAF_FIXED_TABLE_BITS 5
-#define DECAF_WNAF_VAR_TABLE_BITS 3
-
-#define EDDSA_USE_SIGMA_ISOGENY 0
-
-static const int EDWARDS_D = -39081;
-static const scalar_t point_scalarmul_adjustment = {{{
-    SC_LIMB(0xc873d6d54a7bb0cf), SC_LIMB(0xe933d8d723a70aad), SC_LIMB(0xbb124b65129c96fd), SC_LIMB(0x00000008335dc163)
-}}}, precomputed_scalarmul_adjustment = {{{
-    SC_LIMB(0xc873d6d54a7bb0cf), SC_LIMB(0xe933d8d723a70aad), SC_LIMB(0xbb124b65129c96fd), SC_LIMB(0x00000008335dc163)
-}}};
-
-const uint8_t decaf_x448_base_point[DECAF_X448_PUBLIC_BYTES] = { 0x05 };
-
-#define RISTRETTO_FACTOR DECAF_448_RISTRETTO_FACTOR
-const gf RISTRETTO_FACTOR = {{{
-    0x42ef0f45572736, 0x7bf6aa20ce5296, 0xf4fd6eded26033, 0x968c14ba839a66, 0xb8d54b64a2d780, 0x6aa0a1f1a7b8a5, 0x683bf68d722fa2, 0x22d962fbeb24f7
-}}};
-
-#if IMAGINE_TWIST
-#define TWISTED_D (-(EDWARDS_D))
-#else
-#define TWISTED_D ((EDWARDS_D)-1)
-#endif
-
-#if TWISTED_D < 0
-#define EFF_D (-(TWISTED_D))
-#define NEG_D 1
-#else
-#define EFF_D TWISTED_D
-#define NEG_D 0
-#endif
-
-/* End of template stuff */
-
-/* Sanity */
-#if (COFACTOR == 8) && !IMAGINE_TWIST && !UNSAFE_CURVE_HAS_POINTS_AT_INFINITY
-/* FUTURE MAGIC: Curve41417 doesn't have these properties. */
-#error "Currently require IMAGINE_TWIST (and thus p=5 mod 8) for cofactor 8"
-        /* OK, but why?
-         * Two reasons: #1: There are bugs when COFACTOR == && IMAGINE_TWIST
-         # #2: 
-         */
-#endif
-
-#if IMAGINE_TWIST && (P_MOD_8 != 5)
-    #error "Cannot use IMAGINE_TWIST except for p == 5 mod 8"
-#endif
-
-#if (COFACTOR != 8) && (COFACTOR != 4)
-    #error "COFACTOR must be 4 or 8"
-#endif
- 
-#if IMAGINE_TWIST
-    extern const gf SQRT_MINUS_ONE;
-#endif
-
-#define WBITS DECAF_WORD_BITS /* NB this may be different from ARCH_WORD_BITS */
-
-extern const point_t API_NS(point_base);
-
-/* Projective Niels coordinates */
-typedef struct { gf a, b, c; } niels_s, niels_t[1];
-typedef struct { niels_t n; gf z; } VECTOR_ALIGNED pniels_s, pniels_t[1];
-
-/* Precomputed base */
-struct precomputed_s { niels_t table [COMBS_N<<(COMBS_T-1)]; };
-
-extern const gf API_NS(precomputed_base_as_fe)[];
-const precomputed_s *API_NS(precomputed_base) =
-    (const precomputed_s *) &API_NS(precomputed_base_as_fe);
-
-const size_t API_NS(sizeof_precomputed_s) = sizeof(precomputed_s);
-const size_t API_NS(alignof_precomputed_s) = sizeof(big_register_t);
-
-/** Inverse. */
-static void
-gf_invert(gf y, const gf x, int assert_nonzero) {
-    gf t1, t2;
-    gf_sqr(t1, x); // o^2
-    mask_t ret = gf_isr(t2, t1); // +-1/sqrt(o^2) = +-1/o
-    (void)ret;
-    if (assert_nonzero) assert(ret);
-    gf_sqr(t1, t2);
-    gf_mul(t2, t1, x); // not direct to y in case of alias.
-    gf_copy(y, t2);
-}
-
-/** identity = (0,1) */
-const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}};
-
-/* Predeclare because not static: called by elligator */
-void API_NS(deisogenize) (
-    gf_s *__restrict__ s,
-    gf_s *__restrict__ inv_el_sum,
-    gf_s *__restrict__ inv_el_m1,
-    const point_t p,
-    mask_t toggle_s,
-    mask_t toggle_altx,
-    mask_t toggle_rotation
-);
-
-void API_NS(deisogenize) (
-    gf_s *__restrict__ s,
-    gf_s *__restrict__ inv_el_sum,
-    gf_s *__restrict__ inv_el_m1,
-    const point_t p,
-    mask_t toggle_s,
-    mask_t toggle_altx,
-    mask_t toggle_rotation
-) {
-#if COFACTOR == 4 && !IMAGINE_TWIST
-    (void)toggle_rotation; /* Only applies to cofactor 8 */
-    gf t1;
-    gf_s *t2 = s, *t3=inv_el_sum, *t4=inv_el_m1;
-    
-    gf_add(t1,p->x,p->t);
-    gf_sub(t2,p->x,p->t);
-    gf_mul(t3,t1,t2); /* t3 = num */
-    gf_sqr(t2,p->x);
-    gf_mul(t1,t2,t3);
-    gf_mulw(t2,t1,-1-TWISTED_D); /* -x^2 * (a-d) * num */
-    gf_isr(t1,t2);    /* t1 = isr */
-    gf_mul(t2,t1,t3); /* t2 = ratio */
-    gf_mul(t4,t2,RISTRETTO_FACTOR);
-    mask_t negx = gf_lobit(t4) ^ toggle_altx;
-    gf_cond_neg(t2, negx);
-    gf_mul(t3,t2,p->z);
-    gf_sub(t3,t3,p->t);
-    gf_mul(t2,t3,p->x);
-    gf_mulw(t4,t2,-1-TWISTED_D);
-    gf_mul(s,t4,t1);
-    mask_t lobs = gf_lobit(s);
-    gf_cond_neg(s,lobs);
-    gf_copy(inv_el_m1,p->x);
-    gf_cond_neg(inv_el_m1,~lobs^negx^toggle_s);
-    gf_add(inv_el_m1,inv_el_m1,p->t);
-    
-#elif COFACTOR == 8 && IMAGINE_TWIST
-    /* More complicated because of rotation */
-    gf t1,t2,t3,t4,t5;
-    gf_add(t1,p->z,p->y);
-    gf_sub(t2,p->z,p->y);
-    gf_mul(t3,t1,t2);      /* t3 = num */
-    gf_mul(t2,p->x,p->y);  /* t2 = den */
-    gf_sqr(t1,t2);
-    gf_mul(t4,t1,t3);
-    gf_mulw(t1,t4,-1-TWISTED_D);
-    gf_isr(t4,t1);         /* isqrt(num*(a-d)*den^2) */
-    gf_mul(t1,t2,t4);
-    gf_mul(t2,t1,RISTRETTO_FACTOR); /* t2 = "iden" in ristretto.sage */
-    gf_mul(t1,t3,t4);                 /* t1 = "inum" in ristretto.sage */
-
-    /* Calculate altxy = iden*inum*i*t^2*(d-a) */
-    gf_mul(t3,t1,t2);
-    gf_mul_i(t4,t3);
-    gf_mul(t3,t4,p->t);
-    gf_mul(t4,t3,p->t);
-    gf_mulw(t3,t4,TWISTED_D+1);      /* iden*inum*i*t^2*(d-1) */
-    mask_t rotate = toggle_rotation ^ gf_lobit(t3);
-    
-    /* Rotate if altxy is negative */
-    gf_cond_swap(t1,t2,rotate);
-    gf_mul_i(t4,p->x);
-    gf_cond_sel(t4,p->y,t4,rotate);  /* t4 = "fac" = ix if rotate, else y */
-    
-    gf_mul_i(t5,RISTRETTO_FACTOR); /* t5 = imi */
-    gf_mul(t3,t5,t2);                /* iden * imi */
-    gf_mul(t2,t5,t1);
-    gf_mul(t5,t2,p->t);              /* "altx" = iden*imi*t */
-    mask_t negx = gf_lobit(t5) ^ toggle_altx;
-    
-    gf_cond_neg(t1,negx^rotate);
-    gf_mul(t2,t1,p->z);
-    gf_add(t2,t2,ONE);
-    gf_mul(inv_el_sum,t2,t4);
-    gf_mul(s,inv_el_sum,t3);
-    
-    mask_t negs = gf_lobit(s);
-    gf_cond_neg(s,negs);
-    
-    mask_t negz = ~negs ^ toggle_s ^ negx;
-    gf_copy(inv_el_m1,p->z);
-    gf_cond_neg(inv_el_m1,negz);
-    gf_sub(inv_el_m1,inv_el_m1,t4);
-#else
-#error "Cofactor must be 4 (with no IMAGINE_TWIST) or 8 (with IMAGINE_TWIST)"
-#endif
-}
-
-void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) {
-    gf s,ie1,ie2;
-    API_NS(deisogenize)(s,ie1,ie2,p,0,0,0);
-    gf_serialize(ser,s,1);
-}
-
-decaf_error_t API_NS(point_decode) (
-    point_t p,
-    const unsigned char ser[SER_BYTES],
-    decaf_bool_t allow_identity
-) {
-    gf s, s2, num, tmp;
-    gf_s *tmp2=s2, *ynum=p->z, *isr=p->x, *den=p->t;
-    
-    mask_t succ = gf_deserialize(s, ser, 1, 0);
-    succ &= bool_to_mask(allow_identity) | ~gf_eq(s, ZERO);
-    succ &= ~gf_lobit(s);
-    
-    gf_sqr(s2,s);                  /* s^2 = -as^2 */
-#if IMAGINE_TWIST
-    gf_sub(s2,ZERO,s2);            /* -as^2 */
-#endif
-    gf_sub(den,ONE,s2);            /* 1+as^2 */
-    gf_add(ynum,ONE,s2);           /* 1-as^2 */
-    gf_mulw(num,s2,-4*TWISTED_D);
-    gf_sqr(tmp,den);               /* tmp = den^2 */
-    gf_add(num,tmp,num);           /* num = den^2 - 4*d*s^2 */
-    gf_mul(tmp2,num,tmp);          /* tmp2 = num*den^2 */
-    succ &= gf_isr(isr,tmp2);      /* isr = 1/sqrt(num*den^2) */
-    gf_mul(tmp,isr,den);           /* isr*den */
-    gf_mul(p->y,tmp,ynum);         /* isr*den*(1-as^2) */
-    gf_mul(tmp2,tmp,s);            /* s*isr*den */
-    gf_add(tmp2,tmp2,tmp2);        /* 2*s*isr*den */
-    gf_mul(tmp,tmp2,isr);          /* 2*s*isr^2*den */
-    gf_mul(p->x,tmp,num);          /* 2*s*isr^2*den*num */
-    gf_mul(tmp,tmp2,RISTRETTO_FACTOR); /* 2*s*isr*den*magic */
-    gf_cond_neg(p->x,gf_lobit(tmp)); /* flip x */
-    
-#if COFACTOR==8
-    /* Additionally check y != 0 and x*y*isomagic nonegative */
-    succ &= ~gf_eq(p->y,ZERO);
-    gf_mul(tmp,p->x,p->y);
-    gf_mul(tmp2,tmp,RISTRETTO_FACTOR);
-    succ &= ~gf_lobit(tmp2);
-#endif
-
-#if IMAGINE_TWIST
-    gf_copy(tmp,p->x);
-    gf_mul_i(p->x,tmp);
-#endif
-
-    /* Fill in z and t */
-    gf_copy(p->z,ONE);
-    gf_mul(p->t,p->x,p->y);
-    
-    assert(API_NS(point_valid)(p) | ~succ);
-    return decaf_succeed_if(mask_to_bool(succ));
-}
-
-void API_NS(point_sub) (
-    point_t p,
-    const point_t q,
-    const point_t r
-) {
-    gf a, b, c, d;
-    gf_sub_nr ( b, q->y, q->x ); /* 3+e */
-    gf_sub_nr ( d, r->y, r->x ); /* 3+e */
-    gf_add_nr ( c, r->y, r->x ); /* 2+e */
-    gf_mul ( a, c, b );
-    gf_add_nr ( b, q->y, q->x ); /* 2+e */
-    gf_mul ( p->y, d, b );
-    gf_mul ( b, r->t, q->t );
-    gf_mulw ( p->x, b, 2*EFF_D );
-    gf_add_nr ( b, a, p->y );    /* 2+e */
-    gf_sub_nr ( c, p->y, a );    /* 3+e */
-    gf_mul ( a, q->z, r->z );
-    gf_add_nr ( a, a, a );       /* 2+e */
-    if (GF_HEADROOM <= 3) gf_weak_reduce(a); /* or 1+e */
-#if NEG_D
-    gf_sub_nr ( p->y, a, p->x ); /* 4+e or 3+e */
-    gf_add_nr ( a, a, p->x );    /* 3+e or 2+e */
-#else
-    gf_add_nr ( p->y, a, p->x ); /* 3+e or 2+e */
-    gf_sub_nr ( a, a, p->x );    /* 4+e or 3+e */
-#endif
-    gf_mul ( p->z, a, p->y );
-    gf_mul ( p->x, p->y, c );
-    gf_mul ( p->y, a, b );
-    gf_mul ( p->t, b, c );
-}
-    
-void API_NS(point_add) (
-    point_t p,
-    const point_t q,
-    const point_t r
-) {
-    gf a, b, c, d;
-    gf_sub_nr ( b, q->y, q->x ); /* 3+e */
-    gf_sub_nr ( c, r->y, r->x ); /* 3+e */
-    gf_add_nr ( d, r->y, r->x ); /* 2+e */
-    gf_mul ( a, c, b );
-    gf_add_nr ( b, q->y, q->x ); /* 2+e */
-    gf_mul ( p->y, d, b );
-    gf_mul ( b, r->t, q->t );
-    gf_mulw ( p->x, b, 2*EFF_D );
-    gf_add_nr ( b, a, p->y );    /* 2+e */
-    gf_sub_nr ( c, p->y, a );    /* 3+e */
-    gf_mul ( a, q->z, r->z );
-    gf_add_nr ( a, a, a );       /* 2+e */
-    if (GF_HEADROOM <= 3) gf_weak_reduce(a); /* or 1+e */
-#if NEG_D
-    gf_add_nr ( p->y, a, p->x ); /* 3+e or 2+e */
-    gf_sub_nr ( a, a, p->x );    /* 4+e or 3+e */
-#else
-    gf_sub_nr ( p->y, a, p->x ); /* 4+e or 3+e */
-    gf_add_nr ( a, a, p->x );    /* 3+e or 2+e */
-#endif
-    gf_mul ( p->z, a, p->y );
-    gf_mul ( p->x, p->y, c );
-    gf_mul ( p->y, a, b );
-    gf_mul ( p->t, b, c );
-}
-
-static DECAF_NOINLINE void
-point_double_internal (
-    point_t p,
-    const point_t q,
-    int before_double
-) {
-    gf a, b, c, d;
-    gf_sqr ( c, q->x );
-    gf_sqr ( a, q->y );
-    gf_add_nr ( d, c, a );             /* 2+e */
-    gf_add_nr ( p->t, q->y, q->x );    /* 2+e */
-    gf_sqr ( b, p->t );
-    gf_subx_nr ( b, b, d, 3 );         /* 4+e */
-    gf_sub_nr ( p->t, a, c );          /* 3+e */
-    gf_sqr ( p->x, q->z );
-    gf_add_nr ( p->z, p->x, p->x );    /* 2+e */
-    gf_subx_nr ( a, p->z, p->t, 4 );   /* 6+e */
-    if (GF_HEADROOM == 5) gf_weak_reduce(a); /* or 1+e */
-    gf_mul ( p->x, a, b );
-    gf_mul ( p->z, p->t, a );
-    gf_mul ( p->y, p->t, d );
-    if (!before_double) gf_mul ( p->t, b, d );
-}
-
-void API_NS(point_double)(point_t p, const point_t q) {
-    point_double_internal(p,q,0);
-}
-
-void API_NS(point_negate) (
-   point_t nega,
-   const point_t a
-) {
-    gf_sub(nega->x, ZERO, a->x);
-    gf_copy(nega->y, a->y);
-    gf_copy(nega->z, a->z);
-    gf_sub(nega->t, ZERO, a->t);
-}
-
-/* Operations on [p]niels */
-static DECAF_INLINE void
-cond_neg_niels (
-    niels_t n,
-    mask_t neg
-) {
-    gf_cond_swap(n->a, n->b, neg);
-    gf_cond_neg(n->c, neg);
-}
-
-static DECAF_NOINLINE void pt_to_pniels (
-    pniels_t b,
-    const point_t a
-) {
-    gf_sub ( b->n->a, a->y, a->x );
-    gf_add ( b->n->b, a->x, a->y );
-    gf_mulw ( b->n->c, a->t, 2*TWISTED_D );
-    gf_add ( b->z, a->z, a->z );
-}
-
-static DECAF_NOINLINE void pniels_to_pt (
-    point_t e,
-    const pniels_t d
-) {
-    gf eu;
-    gf_add ( eu, d->n->b, d->n->a );
-    gf_sub ( e->y, d->n->b, d->n->a );
-    gf_mul ( e->t, e->y, eu);
-    gf_mul ( e->x, d->z, e->y );
-    gf_mul ( e->y, d->z, eu );
-    gf_sqr ( e->z, d->z );
-}
-
-static DECAF_NOINLINE void
-niels_to_pt (
-    point_t e,
-    const niels_t n
-) {
-    gf_add ( e->y, n->b, n->a );
-    gf_sub ( e->x, n->b, n->a );
-    gf_mul ( e->t, e->y, e->x );
-    gf_copy ( e->z, ONE );
-}
-
-static DECAF_NOINLINE void
-add_niels_to_pt (
-    point_t d,
-    const niels_t e,
-    int before_double
-) {
-    gf a, b, c;
-    gf_sub_nr ( b, d->y, d->x ); /* 3+e */
-    gf_mul ( a, e->a, b );
-    gf_add_nr ( b, d->x, d->y ); /* 2+e */
-    gf_mul ( d->y, e->b, b );
-    gf_mul ( d->x, e->c, d->t );
-    gf_add_nr ( c, a, d->y );    /* 2+e */
-    gf_sub_nr ( b, d->y, a );    /* 3+e */
-    gf_sub_nr ( d->y, d->z, d->x ); /* 3+e */
-    gf_add_nr ( a, d->x, d->z ); /* 2+e */
-    gf_mul ( d->z, a, d->y );
-    gf_mul ( d->x, d->y, b );
-    gf_mul ( d->y, a, c );
-    if (!before_double) gf_mul ( d->t, b, c );
-}
-
-static DECAF_NOINLINE void
-sub_niels_from_pt (
-    point_t d,
-    const niels_t e,
-    int before_double
-) {
-    gf a, b, c;
-    gf_sub_nr ( b, d->y, d->x ); /* 3+e */
-    gf_mul ( a, e->b, b );
-    gf_add_nr ( b, d->x, d->y ); /* 2+e */
-    gf_mul ( d->y, e->a, b );
-    gf_mul ( d->x, e->c, d->t );
-    gf_add_nr ( c, a, d->y );    /* 2+e */
-    gf_sub_nr ( b, d->y, a );    /* 3+e */
-    gf_add_nr ( d->y, d->z, d->x ); /* 2+e */
-    gf_sub_nr ( a, d->z, d->x ); /* 3+e */
-    gf_mul ( d->z, a, d->y );
-    gf_mul ( d->x, d->y, b );
-    gf_mul ( d->y, a, c );
-    if (!before_double) gf_mul ( d->t, b, c );
-}
-
-static void
-add_pniels_to_pt (
-    point_t p,
-    const pniels_t pn,
-    int before_double
-) {
-    gf L0;
-    gf_mul ( L0, p->z, pn->z );
-    gf_copy ( p->z, L0 );
-    add_niels_to_pt( p, pn->n, before_double );
-}
-
-static void
-sub_pniels_from_pt (
-    point_t p,
-    const pniels_t pn,
-    int before_double
-) {
-    gf L0;
-    gf_mul ( L0, p->z, pn->z );
-    gf_copy ( p->z, L0 );
-    sub_niels_from_pt( p, pn->n, before_double );
-}
-
-static DECAF_NOINLINE void
-prepare_fixed_window(
-    pniels_t *multiples,
-    const point_t b,
-    int ntable
-) {
-    point_t tmp;
-    pniels_t pn;
-    int i;
-    
-    point_double_internal(tmp, b, 0);
-    pt_to_pniels(pn, tmp);
-    pt_to_pniels(multiples[0], b);
-    API_NS(point_copy)(tmp, b);
-    for (i=1; i<ntable; i++) {
-        add_pniels_to_pt(tmp, pn, 0);
-        pt_to_pniels(multiples[i], tmp);
-    }
-    
-    decaf_bzero(pn,sizeof(pn));
-    decaf_bzero(tmp,sizeof(tmp));
-}
-
-void API_NS(point_scalarmul) (
-    point_t a,
-    const point_t b,
-    const scalar_t scalar
-) {
-    const int WINDOW = DECAF_WINDOW_BITS,
-        WINDOW_MASK = (1<<WINDOW)-1,
-        WINDOW_T_MASK = WINDOW_MASK >> 1,
-        NTABLE = 1<<(WINDOW-1);
-        
-    scalar_t scalar1x;
-    API_NS(scalar_add)(scalar1x, scalar, point_scalarmul_adjustment);
-    API_NS(scalar_halve)(scalar1x,scalar1x);
-    
-    /* Set up a precomputed table with odd multiples of b. */
-    pniels_t pn, multiples[NTABLE];
-    point_t tmp;
-    prepare_fixed_window(multiples, b, NTABLE);
-
-    /* Initialize. */
-    int i,j,first=1;
-    i = SCALAR_BITS - ((SCALAR_BITS-1) % WINDOW) - 1;
-
-    for (; i>=0; i-=WINDOW) {
-        /* Fetch another block of bits */
-        word_t bits = scalar1x->limb[i/WBITS] >> (i%WBITS);
-        if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
-            bits ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
-        }
-        bits &= WINDOW_MASK;
-        mask_t inv = (bits>>(WINDOW-1))-1;
-        bits ^= inv;
-    
-        /* Add in from table.  Compute t only on last iteration. */
-        constant_time_lookup(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK);
-        cond_neg_niels(pn->n, inv);
-        if (first) {
-            pniels_to_pt(tmp, pn);
-            first = 0;
-        } else {
-           /* Using Hisil et al's lookahead method instead of extensible here
-            * for no particular reason.  Double WINDOW times, but only compute t on
-            * the last one.
-            */
-            for (j=0; j<WINDOW-1; j++)
-                point_double_internal(tmp, tmp, -1);
-            point_double_internal(tmp, tmp, 0);
-            add_pniels_to_pt(tmp, pn, i ? -1 : 0);
-        }
-    }
-    
-    /* Write out the answer */
-    API_NS(point_copy)(a,tmp);
-    
-    decaf_bzero(scalar1x,sizeof(scalar1x));
-    decaf_bzero(pn,sizeof(pn));
-    decaf_bzero(multiples,sizeof(multiples));
-    decaf_bzero(tmp,sizeof(tmp));
-}
-
-void API_NS(point_double_scalarmul) (
-    point_t a,
-    const point_t b,
-    const scalar_t scalarb,
-    const point_t c,
-    const scalar_t scalarc
-) {
-    const int WINDOW = DECAF_WINDOW_BITS,
-        WINDOW_MASK = (1<<WINDOW)-1,
-        WINDOW_T_MASK = WINDOW_MASK >> 1,
-        NTABLE = 1<<(WINDOW-1);
-        
-    scalar_t scalar1x, scalar2x;
-    API_NS(scalar_add)(scalar1x, scalarb, point_scalarmul_adjustment);
-    API_NS(scalar_halve)(scalar1x,scalar1x);
-    API_NS(scalar_add)(scalar2x, scalarc, point_scalarmul_adjustment);
-    API_NS(scalar_halve)(scalar2x,scalar2x);
-    
-    /* Set up a precomputed table with odd multiples of b. */
-    pniels_t pn, multiples1[NTABLE], multiples2[NTABLE];
-    point_t tmp;
-    prepare_fixed_window(multiples1, b, NTABLE);
-    prepare_fixed_window(multiples2, c, NTABLE);
-
-    /* Initialize. */
-    int i,j,first=1;
-    i = SCALAR_BITS - ((SCALAR_BITS-1) % WINDOW) - 1;
-
-    for (; i>=0; i-=WINDOW) {
-        /* Fetch another block of bits */
-        word_t bits1 = scalar1x->limb[i/WBITS] >> (i%WBITS),
-                     bits2 = scalar2x->limb[i/WBITS] >> (i%WBITS);
-        if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
-            bits1 ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
-            bits2 ^= scalar2x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
-        }
-        bits1 &= WINDOW_MASK;
-        bits2 &= WINDOW_MASK;
-        mask_t inv1 = (bits1>>(WINDOW-1))-1;
-        mask_t inv2 = (bits2>>(WINDOW-1))-1;
-        bits1 ^= inv1;
-        bits2 ^= inv2;
-    
-        /* Add in from table.  Compute t only on last iteration. */
-        constant_time_lookup(pn, multiples1, sizeof(pn), NTABLE, bits1 & WINDOW_T_MASK);
-        cond_neg_niels(pn->n, inv1);
-        if (first) {
-            pniels_to_pt(tmp, pn);
-            first = 0;
-        } else {
-           /* Using Hisil et al's lookahead method instead of extensible here
-            * for no particular reason.  Double WINDOW times, but only compute t on
-            * the last one.
-            */
-            for (j=0; j<WINDOW-1; j++)
-                point_double_internal(tmp, tmp, -1);
-            point_double_internal(tmp, tmp, 0);
-            add_pniels_to_pt(tmp, pn, 0);
-        }
-        constant_time_lookup(pn, multiples2, sizeof(pn), NTABLE, bits2 & WINDOW_T_MASK);
-        cond_neg_niels(pn->n, inv2);
-        add_pniels_to_pt(tmp, pn, i?-1:0);
-    }
-    
-    /* Write out the answer */
-    API_NS(point_copy)(a,tmp);
-    
-
-    decaf_bzero(scalar1x,sizeof(scalar1x));
-    decaf_bzero(scalar2x,sizeof(scalar2x));
-    decaf_bzero(pn,sizeof(pn));
-    decaf_bzero(multiples1,sizeof(multiples1));
-    decaf_bzero(multiples2,sizeof(multiples2));
-    decaf_bzero(tmp,sizeof(tmp));
-}
-
-void API_NS(point_dual_scalarmul) (
-    point_t a1,
-    point_t a2,
-    const point_t b,
-    const scalar_t scalar1,
-    const scalar_t scalar2
-) {
-    const int WINDOW = DECAF_WINDOW_BITS,
-        WINDOW_MASK = (1<<WINDOW)-1,
-        WINDOW_T_MASK = WINDOW_MASK >> 1,
-        NTABLE = 1<<(WINDOW-1);
-        
-    scalar_t scalar1x, scalar2x;
-    API_NS(scalar_add)(scalar1x, scalar1, point_scalarmul_adjustment);
-    API_NS(scalar_halve)(scalar1x,scalar1x);
-    API_NS(scalar_add)(scalar2x, scalar2, point_scalarmul_adjustment);
-    API_NS(scalar_halve)(scalar2x,scalar2x);
-    
-    /* Set up a precomputed table with odd multiples of b. */
-    point_t multiples1[NTABLE], multiples2[NTABLE], working, tmp;
-    pniels_t pn;
-    
-    API_NS(point_copy)(working, b);
-
-    /* Initialize. */
-    int i,j;
-    
-    for (i=0; i<NTABLE; i++) {
-        API_NS(point_copy)(multiples1[i], API_NS(point_identity));
-        API_NS(point_copy)(multiples2[i], API_NS(point_identity));
-    }
-
-    for (i=0; i<SCALAR_BITS; i+=WINDOW) {   
-        if (i) {
-            for (j=0; j<WINDOW-1; j++)
-                point_double_internal(working, working, -1);
-            point_double_internal(working, working, 0);
-        }
-        
-        /* Fetch another block of bits */
-        word_t bits1 = scalar1x->limb[i/WBITS] >> (i%WBITS),
-               bits2 = scalar2x->limb[i/WBITS] >> (i%WBITS);
-        if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
-            bits1 ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
-            bits2 ^= scalar2x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
-        }
-        bits1 &= WINDOW_MASK;
-        bits2 &= WINDOW_MASK;
-        mask_t inv1 = (bits1>>(WINDOW-1))-1;
-        mask_t inv2 = (bits2>>(WINDOW-1))-1;
-        bits1 ^= inv1;
-        bits2 ^= inv2;
-        
-        pt_to_pniels(pn, working);
-
-        constant_time_lookup(tmp, multiples1, sizeof(tmp), NTABLE, bits1 & WINDOW_T_MASK);
-        cond_neg_niels(pn->n, inv1);
-        /* add_pniels_to_pt(multiples1[bits1 & WINDOW_T_MASK], pn, 0); */
-        add_pniels_to_pt(tmp, pn, 0);
-        constant_time_insert(multiples1, tmp, sizeof(tmp), NTABLE, bits1 & WINDOW_T_MASK);
-        
-        
-        constant_time_lookup(tmp, multiples2, sizeof(tmp), NTABLE, bits2 & WINDOW_T_MASK);
-        cond_neg_niels(pn->n, inv1^inv2);
-        /* add_pniels_to_pt(multiples2[bits2 & WINDOW_T_MASK], pn, 0); */
-        add_pniels_to_pt(tmp, pn, 0);
-        constant_time_insert(multiples2, tmp, sizeof(tmp), NTABLE, bits2 & WINDOW_T_MASK);
-    }
-    
-    if (NTABLE > 1) {
-        API_NS(point_copy)(working, multiples1[NTABLE-1]);
-        API_NS(point_copy)(tmp    , multiples2[NTABLE-1]);
-    
-        for (i=NTABLE-1; i>1; i--) {
-            API_NS(point_add)(multiples1[i-1], multiples1[i-1], multiples1[i]);
-            API_NS(point_add)(multiples2[i-1], multiples2[i-1], multiples2[i]);
-            API_NS(point_add)(working, working, multiples1[i-1]);
-            API_NS(point_add)(tmp,     tmp,     multiples2[i-1]);
-        }
-    
-        API_NS(point_add)(multiples1[0], multiples1[0], multiples1[1]);
-        API_NS(point_add)(multiples2[0], multiples2[0], multiples2[1]);
-        point_double_internal(working, working, 0);
-        point_double_internal(tmp,         tmp, 0);
-        API_NS(point_add)(a1, working, multiples1[0]);
-        API_NS(point_add)(a2, tmp,     multiples2[0]);
-    } else {
-        API_NS(point_copy)(a1, multiples1[0]);
-        API_NS(point_copy)(a2, multiples2[0]);
-    }
-
-    decaf_bzero(scalar1x,sizeof(scalar1x));
-    decaf_bzero(scalar2x,sizeof(scalar2x));
-    decaf_bzero(pn,sizeof(pn));
-    decaf_bzero(multiples1,sizeof(multiples1));
-    decaf_bzero(multiples2,sizeof(multiples2));
-    decaf_bzero(tmp,sizeof(tmp));
-    decaf_bzero(working,sizeof(working));
-}
-
-decaf_bool_t API_NS(point_eq) ( const point_t p, const point_t q ) {
-    /* equality mod 2-torsion compares x/y */
-    gf a, b;
-    gf_mul ( a, p->y, q->x );
-    gf_mul ( b, q->y, p->x );
-    mask_t succ = gf_eq(a,b);
-    
-    #if (COFACTOR == 8) && IMAGINE_TWIST
-        gf_mul ( a, p->y, q->y );
-        gf_mul ( b, q->x, p->x );
-        #if !(IMAGINE_TWIST)
-            gf_sub ( a, ZERO, a );
-        #else
-           /* Interesting note: the 4tor would normally be rotation.
-            * But because of the *i twist, it's actually
-            * (x,y) <-> (iy,ix)
-            */
-    
-           /* No code, just a comment. */
-        #endif
-        succ |= gf_eq(a,b);
-    #endif
-    
-    return mask_to_bool(succ);
-}
-
-decaf_bool_t API_NS(point_valid) (
-    const point_t p
-) {
-    gf a,b,c;
-    gf_mul(a,p->x,p->y);
-    gf_mul(b,p->z,p->t);
-    mask_t out = gf_eq(a,b);
-    gf_sqr(a,p->x);
-    gf_sqr(b,p->y);
-    gf_sub(a,b,a);
-    gf_sqr(b,p->t);
-    gf_mulw(c,b,TWISTED_D);
-    gf_sqr(b,p->z);
-    gf_add(b,b,c);
-    out &= gf_eq(a,b);
-    out &= ~gf_eq(p->z,ZERO);
-    return mask_to_bool(out);
-}
-
-void API_NS(point_debugging_torque) (
-    point_t q,
-    const point_t p
-) {
-#if COFACTOR == 8 && IMAGINE_TWIST
-    gf tmp;
-    gf_mul(tmp,p->x,SQRT_MINUS_ONE);
-    gf_mul(q->x,p->y,SQRT_MINUS_ONE);
-    gf_copy(q->y,tmp);
-    gf_copy(q->z,p->z);
-    gf_sub(q->t,ZERO,p->t);
-#else
-    gf_sub(q->x,ZERO,p->x);
-    gf_sub(q->y,ZERO,p->y);
-    gf_copy(q->z,p->z);
-    gf_copy(q->t,p->t);
-#endif
-}
-
-void API_NS(point_debugging_pscale) (
-    point_t q,
-    const point_t p,
-    const uint8_t factor[SER_BYTES]
-) {
-    gf gfac,tmp;
-    /* NB this means you'll never pscale by negative numbers for p521 */
-    ignore_result(gf_deserialize(gfac,factor,0,0));
-    gf_cond_sel(gfac,gfac,ONE,gf_eq(gfac,ZERO));
-    gf_mul(tmp,p->x,gfac);
-    gf_copy(q->x,tmp);
-    gf_mul(tmp,p->y,gfac);
-    gf_copy(q->y,tmp);
-    gf_mul(tmp,p->z,gfac);
-    gf_copy(q->z,tmp);
-    gf_mul(tmp,p->t,gfac);
-    gf_copy(q->t,tmp);
-}
-
-static void gf_batch_invert (
-    gf *__restrict__ out,
-    const gf *in,
-    unsigned int n
-) {
-    gf t1;
-    assert(n>1);
-  
-    gf_copy(out[1], in[0]);
-    int i;
-    for (i=1; i<(int) (n-1); i++) {
-        gf_mul(out[i+1], out[i], in[i]);
-    }
-    gf_mul(out[0], out[n-1], in[n-1]);
-
-    gf_invert(out[0], out[0], 1);
-
-    for (i=n-1; i>0; i--) {
-        gf_mul(t1, out[i], out[0]);
-        gf_copy(out[i], t1);
-        gf_mul(t1, out[0], in[i]);
-        gf_copy(out[0], t1);
-    }
-}
-
-static void batch_normalize_niels (
-    niels_t *table,
-    const gf *zs,
-    gf *__restrict__ zis,
-    int n
-) {
-    int i;
-    gf product;
-    gf_batch_invert(zis, zs, n);
-
-    for (i=0; i<n; i++) {
-        gf_mul(product, table[i]->a, zis[i]);
-        gf_strong_reduce(product);
-        gf_copy(table[i]->a, product);
-        
-        gf_mul(product, table[i]->b, zis[i]);
-        gf_strong_reduce(product);
-        gf_copy(table[i]->b, product);
-        
-        gf_mul(product, table[i]->c, zis[i]);
-        gf_strong_reduce(product);
-        gf_copy(table[i]->c, product);
-    }
-    
-    decaf_bzero(product,sizeof(product));
-}
-
-void API_NS(precompute) (
-    precomputed_s *table,
-    const point_t base
-) { 
-    const unsigned int n = COMBS_N, t = COMBS_T, s = COMBS_S;
-    assert(n*t*s >= SCALAR_BITS);
-  
-    point_t working, start, doubles[t-1];
-    API_NS(point_copy)(working, base);
-    pniels_t pn_tmp;
-  
-    gf zs[n<<(t-1)], zis[n<<(t-1)];
-  
-    unsigned int i,j,k;
-    
-    /* Compute n tables */
-    for (i=0; i<n; i++) {
-
-        /* Doubling phase */
-        for (j=0; j<t; j++) {
-            if (j) API_NS(point_add)(start, start, working);
-            else API_NS(point_copy)(start, working);
-
-            if (j==t-1 && i==n-1) break;
-
-            point_double_internal(working, working,0);
-            if (j<t-1) API_NS(point_copy)(doubles[j], working);
-
-            for (k=0; k<s-1; k++)
-                point_double_internal(working, working, k<s-2);
-        }
-
-        /* Gray-code phase */
-        for (j=0;; j++) {
-            int gray = j ^ (j>>1);
-            int idx = (((i+1)<<(t-1))-1) ^ gray;
-
-            pt_to_pniels(pn_tmp, start);
-            memcpy(table->table[idx], pn_tmp->n, sizeof(pn_tmp->n));
-            gf_copy(zs[idx], pn_tmp->z);
-			
-            if (j >= (1u<<(t-1)) - 1) break;
-            int delta = (j+1) ^ ((j+1)>>1) ^ gray;
-
-            for (k=0; delta>1; k++)
-                delta >>=1;
-            
-            if (gray & (1<<k)) {
-                API_NS(point_add)(start, start, doubles[k]);
-            } else {
-                API_NS(point_sub)(start, start, doubles[k]);
-            }
-        }
-    }
-    
-    batch_normalize_niels(table->table,(const gf *)zs,zis,n<<(t-1));
-    
-    decaf_bzero(zs,sizeof(zs));
-    decaf_bzero(zis,sizeof(zis));
-    decaf_bzero(pn_tmp,sizeof(pn_tmp));
-    decaf_bzero(working,sizeof(working));
-    decaf_bzero(start,sizeof(start));
-    decaf_bzero(doubles,sizeof(doubles));
-}
-
-static DECAF_INLINE void
-constant_time_lookup_niels (
-    niels_s *__restrict__ ni,
-    const niels_t *table,
-    int nelts,
-    int idx
-) {
-    constant_time_lookup(ni, table, sizeof(niels_s), nelts, idx);
-}
-
-void API_NS(precomputed_scalarmul) (
-    point_t out,
-    const precomputed_s *table,
-    const scalar_t scalar
-) {
-    int i;
-    unsigned j,k;
-    const unsigned int n = COMBS_N, t = COMBS_T, s = COMBS_S;
-    
-    scalar_t scalar1x;
-    API_NS(scalar_add)(scalar1x, scalar, precomputed_scalarmul_adjustment);
-    API_NS(scalar_halve)(scalar1x,scalar1x);
-    
-    niels_t ni;
-    
-    for (i=s-1; i>=0; i--) {
-        if (i != (int)s-1) point_double_internal(out,out,0);
-        
-        for (j=0; j<n; j++) {
-            int tab = 0;
-         
-            for (k=0; k<t; k++) {
-                unsigned int bit = i + s*(k + j*t);
-                if (bit < SCALAR_BITS) {
-                    tab |= (scalar1x->limb[bit/WBITS] >> (bit%WBITS) & 1) << k;
-                }
-            }
-            
-            mask_t invert = (tab>>(t-1))-1;
-            tab ^= invert;
-            tab &= (1<<(t-1)) - 1;
-
-            constant_time_lookup_niels(ni, &table->table[j<<(t-1)], 1<<(t-1), tab);
-
-            cond_neg_niels(ni, invert);
-            if ((i!=(int)s-1)||j) {
-                add_niels_to_pt(out, ni, j==n-1 && i);
-            } else {
-                niels_to_pt(out, ni);
-            }
-        }
-    }
-    
-    decaf_bzero(ni,sizeof(ni));
-    decaf_bzero(scalar1x,sizeof(scalar1x));
-}
-
-void API_NS(point_cond_sel) (
-    point_t out,
-    const point_t a,
-    const point_t b,
-    decaf_bool_t pick_b
-) {
-    constant_time_select(out,a,b,sizeof(point_t),bool_to_mask(pick_b),0);
-}
-
-/* FUTURE: restore Curve25519 Montgomery ladder? */
-decaf_error_t API_NS(direct_scalarmul) (
-    uint8_t scaled[SER_BYTES],
-    const uint8_t base[SER_BYTES],
-    const scalar_t scalar,
-    decaf_bool_t allow_identity,
-    decaf_bool_t short_circuit
-) {
-    point_t basep;
-    decaf_error_t succ = API_NS(point_decode)(basep, base, allow_identity);
-    if (short_circuit && succ != DECAF_SUCCESS) return succ;
-    API_NS(point_cond_sel)(basep, API_NS(point_base), basep, succ);
-    API_NS(point_scalarmul)(basep, basep, scalar);
-    API_NS(point_encode)(scaled, basep);
-    API_NS(point_destroy)(basep);
-    return succ;
-}
-
-void API_NS(point_mul_by_ratio_and_encode_like_eddsa) (
-    uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES],
-    const point_t p
-) {
-    
-    /* The point is now on the twisted curve.  Move it to untwisted. */
-    gf x, y, z, t;
-    point_t q;
-#if COFACTOR == 8
-    API_NS(point_double)(q,p);
-#else
-    API_NS(point_copy)(q,p);
-#endif
-    
-#if EDDSA_USE_SIGMA_ISOGENY
-    {
-        /* Use 4-isogeny like ed25519:
-         *   2*x*y*sqrt(d/a-1)/(ax^2 + y^2 - 2)
-         *   (y^2 - ax^2)/(y^2 + ax^2)
-         * with a = -1, d = -EDWARDS_D:
-         *   -2xysqrt(EDWARDS_D-1)/(2z^2-y^2+x^2)
-         *   (y^2+x^2)/(y^2-x^2)
-         */
-        gf u;
-        gf_sqr ( x, q->x ); // x^2
-        gf_sqr ( t, q->y ); // y^2
-        gf_add( u, x, t ); // x^2 + y^2
-        gf_add( z, q->y, q->x );
-        gf_sqr ( y, z);
-        gf_sub ( y, u, y ); // -2xy
-        gf_sub ( z, t, x ); // y^2 - x^2
-        gf_sqr ( x, q->z );
-        gf_add ( t, x, x);
-        gf_sub ( t, t, z);  // 2z^2 - y^2 + x^2
-        gf_mul ( x, y, z ); // 2xy(y^2-x^2)
-        gf_mul ( y, u, t ); // (x^2+y^2)(2z^2-y^2+x^2)
-        gf_mul ( u, z, t );
-        gf_copy( z, u );
-        gf_mul ( u, x, RISTRETTO_FACTOR );
-#if IMAGINE_TWIST
-        gf_mul_i( x, u );
-#else
-#error "... probably wrong"
-        gf_copy( x, u );
-#endif
-        decaf_bzero(u,sizeof(u));
-    }
-#elif IMAGINE_TWIST
-    {
-        API_NS(point_double)(q,q);
-        API_NS(point_double)(q,q);
-        gf_mul_i(x, q->x);
-        gf_copy(y, q->y);
-        gf_copy(z, q->z);
-    }
-#else
-    {
-        /* 4-isogeny: 2xy/(y^+x^2), (y^2-x^2)/(2z^2-y^2+x^2) */
-        gf u;
-        gf_sqr ( x, q->x );
-        gf_sqr ( t, q->y );
-        gf_add( u, x, t );
-        gf_add( z, q->y, q->x );
-        gf_sqr ( y, z);
-        gf_sub ( y, y, u );
-        gf_sub ( z, t, x );
-        gf_sqr ( x, q->z );
-        gf_add ( t, x, x); 
-        gf_sub ( t, t, z);
-        gf_mul ( x, t, y );
-        gf_mul ( y, z, u );
-        gf_mul ( z, u, t );
-        decaf_bzero(u,sizeof(u));
-    }
-#endif
-    /* Affinize */
-    gf_invert(z,z,1);
-    gf_mul(t,x,z);
-    gf_mul(x,y,z);
-    
-    /* Encode */
-    enc[DECAF_EDDSA_448_PRIVATE_BYTES-1] = 0;
-    gf_serialize(enc, x, 1);
-    enc[DECAF_EDDSA_448_PRIVATE_BYTES-1] |= 0x80 & gf_lobit(t);
-
-    decaf_bzero(x,sizeof(x));
-    decaf_bzero(y,sizeof(y));
-    decaf_bzero(z,sizeof(z));
-    decaf_bzero(t,sizeof(t));
-    API_NS(point_destroy)(q);
-}
-
-
-decaf_error_t API_NS(point_decode_like_eddsa_and_mul_by_ratio) (
-    point_t p,
-    const uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES]
-) {
-    uint8_t enc2[DECAF_EDDSA_448_PUBLIC_BYTES];
-    memcpy(enc2,enc,sizeof(enc2));
-
-    mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] & 0x80);
-    enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] &= ~0x80;
-    
-    mask_t succ = gf_deserialize(p->y, enc2, 1, 0);
-#if 0 == 0
-    succ &= word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1]);
-#endif
-
-    gf_sqr(p->x,p->y);
-    gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */
-    #if EDDSA_USE_SIGMA_ISOGENY
-        gf_mulw(p->t,p->z,EDWARDS_D); /* d-dy^2 */
-        gf_mulw(p->x,p->z,EDWARDS_D-1); /* num = (1-y^2)(d-1) */
-        gf_copy(p->z,p->x);
-    #else
-        gf_mulw(p->t,p->x,EDWARDS_D); /* dy^2 */
-    #endif
-    gf_sub(p->t,ONE,p->t); /* denom = 1-dy^2 or 1-d + dy^2 */
-    
-    gf_mul(p->x,p->z,p->t);
-    succ &= gf_isr(p->t,p->x); /* 1/sqrt(num * denom) */
-    
-    gf_mul(p->x,p->t,p->z); /* sqrt(num / denom) */
-    gf_cond_neg(p->x,gf_lobit(p->x)^low);
-    gf_copy(p->z,ONE);
-  
-    #if EDDSA_USE_SIGMA_ISOGENY
-    {
-       /* Use 4-isogeny like ed25519:
-        *   2*x*y/sqrt(1-d/a)/(ax^2 + y^2 - 2)
-        *   (y^2 - ax^2)/(y^2 + ax^2)
-        * (MAGIC: above formula may be off by a factor of -a
-        * or something somewhere; check it for other a)
-        *
-        * with a = -1, d = -EDWARDS_D:
-        *   -2xy/sqrt(1-EDWARDS_D)/(2z^2-y^2+x^2)
-        *   (y^2+x^2)/(y^2-x^2)
-        */
-        gf a, b, c, d;
-        gf_sqr ( c, p->x );
-        gf_sqr ( a, p->y );
-        gf_add ( d, c, a ); // x^2 + y^2
-        gf_add ( p->t, p->y, p->x );
-        gf_sqr ( b, p->t );
-        gf_sub ( b, b, d ); // 2xy
-        gf_sub ( p->t, a, c ); // y^2 - x^2
-        gf_sqr ( p->x, p->z );
-        gf_add ( p->z, p->x, p->x );
-        gf_sub ( c, p->z, p->t ); // 2z^2 - y^2 + x^2
-        gf_div_i ( a, c );
-        gf_mul ( c, a, RISTRETTO_FACTOR );
-        gf_mul ( p->x, b, p->t); // (2xy)(y^2-x^2)
-        gf_mul ( p->z, p->t, c ); // (y^2-x^2)sd(2z^2 - y^2 + x^2)
-        gf_mul ( p->y, d, c ); // (y^2+x^2)sd(2z^2 - y^2 + x^2)
-        gf_mul ( p->t, d, b );
-        decaf_bzero(a,sizeof(a));
-        decaf_bzero(b,sizeof(b));
-        decaf_bzero(c,sizeof(c));
-        decaf_bzero(d,sizeof(d));
-    } 
-    #elif IMAGINE_TWIST
-    {
-        gf_mul(p->t,p->x,SQRT_MINUS_ONE);
-        gf_copy(p->x,p->t);
-        gf_mul(p->t,p->x,p->y);
-    }
-    #else
-    {
-        /* 4-isogeny 2xy/(y^2-ax^2), (y^2+ax^2)/(2-y^2-ax^2) */
-        gf a, b, c, d;
-        gf_sqr ( c, p->x );
-        gf_sqr ( a, p->y );
-        gf_add ( d, c, a );
-        gf_add ( p->t, p->y, p->x );
-        gf_sqr ( b, p->t );
-        gf_sub ( b, b, d );
-        gf_sub ( p->t, a, c );
-        gf_sqr ( p->x, p->z );
-        gf_add ( p->z, p->x, p->x );
-        gf_sub ( a, p->z, d );
-        gf_mul ( p->x, a, b );
-        gf_mul ( p->z, p->t, a );
-        gf_mul ( p->y, p->t, d );
-        gf_mul ( p->t, b, d );
-        decaf_bzero(a,sizeof(a));
-        decaf_bzero(b,sizeof(b));
-        decaf_bzero(c,sizeof(c));
-        decaf_bzero(d,sizeof(d));
-    }
-    #endif
-    
-    decaf_bzero(enc2,sizeof(enc2));
-    assert(API_NS(point_valid)(p) || ~succ);
-    
-    return decaf_succeed_if(mask_to_bool(succ));
-}
-
-decaf_error_t decaf_x448 (
-    uint8_t out[X_PUBLIC_BYTES],
-    const uint8_t base[X_PUBLIC_BYTES],
-    const uint8_t scalar[X_PRIVATE_BYTES]
-) {
-    gf x1, x2, z2, x3, z3, t1, t2;
-    ignore_result(gf_deserialize(x1,base,1,0));
-    gf_copy(x2,ONE);
-    gf_copy(z2,ZERO);
-    gf_copy(x3,x1);
-    gf_copy(z3,ONE);
-    
-    int t;
-    mask_t swap = 0;
-    
-    for (t = X_PRIVATE_BITS-1; t>=0; t--) {
-        uint8_t sb = scalar[t/8];
-        
-        /* Scalar conditioning */
-        if (t/8==0) sb &= -(uint8_t)COFACTOR;
-        else if (t == X_PRIVATE_BITS-1) sb = -1;
-        
-        mask_t k_t = (sb>>(t%8)) & 1;
-        k_t = -k_t; /* set to all 0s or all 1s */
-        
-        swap ^= k_t;
-        gf_cond_swap(x2,x3,swap);
-        gf_cond_swap(z2,z3,swap);
-        swap = k_t;
-        
-        gf_add_nr(t1,x2,z2); /* A = x2 + z2 */        /* 2+e */
-        gf_sub_nr(t2,x2,z2); /* B = x2 - z2 */        /* 3+e */
-        gf_sub_nr(z2,x3,z3); /* D = x3 - z3 */        /* 3+e */
-        gf_mul(x2,t1,z2);    /* DA */
-        gf_add_nr(z2,z3,x3); /* C = x3 + z3 */        /* 2+e */
-        gf_mul(x3,t2,z2);    /* CB */
-        gf_sub_nr(z3,x2,x3); /* DA-CB */              /* 3+e */
-        gf_sqr(z2,z3);       /* (DA-CB)^2 */
-        gf_mul(z3,x1,z2);    /* z3 = x1(DA-CB)^2 */
-        gf_add_nr(z2,x2,x3); /* (DA+CB) */            /* 2+e */
-        gf_sqr(x3,z2);       /* x3 = (DA+CB)^2 */
-        
-        gf_sqr(z2,t1);       /* AA = A^2 */
-        gf_sqr(t1,t2);       /* BB = B^2 */
-        gf_mul(x2,z2,t1);    /* x2 = AA*BB */
-        gf_sub_nr(t2,z2,t1); /* E = AA-BB */          /* 3+e */
-        
-        gf_mulw(t1,t2,-EDWARDS_D); /* E*-d = a24*E */
-        gf_add_nr(t1,t1,z2); /* AA + a24*E */         /* 2+e */
-        gf_mul(z2,t2,t1); /* z2 = E(AA+a24*E) */
-    }
-    
-    /* Finish */
-    gf_cond_swap(x2,x3,swap);
-    gf_cond_swap(z2,z3,swap);
-    gf_invert(z2,z2,0);
-    gf_mul(x1,x2,z2);
-    gf_serialize(out,x1,1);
-    mask_t nz = ~gf_eq(x1,ZERO);
-    
-    decaf_bzero(x1,sizeof(x1));
-    decaf_bzero(x2,sizeof(x2));
-    decaf_bzero(z2,sizeof(z2));
-    decaf_bzero(x3,sizeof(x3));
-    decaf_bzero(z3,sizeof(z3));
-    decaf_bzero(t1,sizeof(t1));
-    decaf_bzero(t2,sizeof(t2));
-    
-    return decaf_succeed_if(mask_to_bool(nz));
-}
-
-/* Thanks Johan Pascal */
-void decaf_ed448_convert_public_key_to_x448 (
-    uint8_t x[DECAF_X448_PUBLIC_BYTES],
-    const uint8_t ed[DECAF_EDDSA_448_PUBLIC_BYTES]
-) {
-    gf y;
-    const uint8_t mask = (uint8_t)(0xFE<<(7));
-    ignore_result(gf_deserialize(y, ed, 1, mask));
-    
-    {
-        gf n,d;
-        
-#if EDDSA_USE_SIGMA_ISOGENY
-        /* u = (1+y)/(1-y)*/
-        gf_add(n, y, ONE); /* n = y+1 */
-        gf_sub(d, ONE, y); /* d = 1-y */
-        gf_invert(d, d, 0); /* d = 1/(1-y) */
-        gf_mul(y, n, d); /* u = (y+1)/(1-y) */
-        gf_serialize(x,y,1);
-#else /* EDDSA_USE_SIGMA_ISOGENY */
-        /* u = y^2 * (1-dy^2) / (1-y^2) */
-        gf_sqr(n,y); /* y^2*/
-        gf_sub(d,ONE,n); /* 1-y^2*/
-        gf_invert(d,d,0); /* 1/(1-y^2)*/
-        gf_mul(y,n,d); /* y^2 / (1-y^2) */
-        gf_mulw(d,n,EDWARDS_D); /* dy^2*/
-        gf_sub(d, ONE, d); /* 1-dy^2*/
-        gf_mul(n, y, d); /* y^2 * (1-dy^2) / (1-y^2) */
-        gf_serialize(x,n,1);
-#endif /* EDDSA_USE_SIGMA_ISOGENY */
-        
-        decaf_bzero(y,sizeof(y));
-        decaf_bzero(n,sizeof(n));
-        decaf_bzero(d,sizeof(d));
-    }
-}
-
-void decaf_x448_generate_key (
-    uint8_t out[X_PUBLIC_BYTES],
-    const uint8_t scalar[X_PRIVATE_BYTES]
-) {
-    decaf_x448_derive_public_key(out,scalar);
-}
-
-void API_NS(point_mul_by_ratio_and_encode_like_x448) (
-    uint8_t out[X_PUBLIC_BYTES],
-    const point_t p
-) {
-    point_t q;
-#if COFACTOR == 8
-    point_double_internal(q,p,1);
-#else
-    API_NS(point_copy)(q,p);
-#endif
-    gf_invert(q->t,q->x,0); /* 1/x */
-    gf_mul(q->z,q->t,q->y); /* y/x */
-    gf_sqr(q->y,q->z); /* (y/x)^2 */
-#if IMAGINE_TWIST
-    gf_sub(q->y,ZERO,q->y);
-#endif
-    gf_serialize(out,q->y,1);
-    API_NS(point_destroy(q));
-}
-
-void decaf_x448_derive_public_key (
-    uint8_t out[X_PUBLIC_BYTES],
-    const uint8_t scalar[X_PRIVATE_BYTES]
-) {
-    /* Scalar conditioning */
-    uint8_t scalar2[X_PRIVATE_BYTES];
-    memcpy(scalar2,scalar,sizeof(scalar2));
-    scalar2[0] &= -(uint8_t)COFACTOR;
-    
-    scalar2[X_PRIVATE_BYTES-1] &= ~(-1u<<((X_PRIVATE_BITS+7)%8));
-    scalar2[X_PRIVATE_BYTES-1] |= 1<<((X_PRIVATE_BITS+7)%8);
-    
-    scalar_t the_scalar;
-    API_NS(scalar_decode_long)(the_scalar,scalar2,sizeof(scalar2));
-    
-    /* Compensate for the encoding ratio */
-    for (unsigned i=1; i<DECAF_X448_ENCODE_RATIO; i<<=1) {
-        API_NS(scalar_halve)(the_scalar,the_scalar);
-    }
-    point_t p;
-    API_NS(precomputed_scalarmul)(p,API_NS(precomputed_base),the_scalar);
-    API_NS(point_mul_by_ratio_and_encode_like_x448)(out,p);
-    API_NS(point_destroy)(p);
-}
-
-/**
- * @cond internal
- * Control for variable-time scalar multiply algorithms.
- */
-struct smvt_control {
-  int power, addend;
-};
-
-static int recode_wnaf (
-    struct smvt_control *control, /* [nbits/(table_bits+1) + 3] */
-    const scalar_t scalar,
-    unsigned int table_bits
-) {
-    unsigned int table_size = SCALAR_BITS/(table_bits+1) + 3;
-    int position = table_size - 1; /* at the end */
-    
-    /* place the end marker */
-    control[position].power = -1;
-    control[position].addend = 0;
-    position--;
-
-    /* PERF: Could negate scalar if it's large.  But then would need more cases
-     * in the actual code that uses it, all for an expected reduction of like 1/5 op.
-     * Probably not worth it.
-     */
-    
-    uint64_t current = scalar->limb[0] & 0xFFFF;
-    uint32_t mask = (1<<(table_bits+1))-1;
-
-    unsigned int w;
-    const unsigned int B_OVER_16 = sizeof(scalar->limb[0]) / 2;
-    for (w = 1; w<(SCALAR_BITS-1)/16+3; w++) {
-        if (w < (SCALAR_BITS-1)/16+1) {
-            /* Refill the 16 high bits of current */
-            current += (uint32_t)((scalar->limb[w/B_OVER_16]>>(16*(w%B_OVER_16)))<<16);
-        }
-        
-        while (current & 0xFFFF) {
-            assert(position >= 0);
-            uint32_t pos = __builtin_ctz((uint32_t)current), odd = (uint32_t)current >> pos;
-            int32_t delta = odd & mask;
-            if (odd & 1<<(table_bits+1)) delta -= (1<<(table_bits+1));
-            current -= delta << pos;
-            control[position].power = pos + 16*(w-1);
-            control[position].addend = delta;
-            position--;
-        }
-        current >>= 16;
-    }
-    assert(current==0);
-    
-    position++;
-    unsigned int n = table_size - position;
-    unsigned int i;
-    for (i=0; i<n; i++) {
-        control[i] = control[i+position];
-    }
-    return n-1;
-}
-
-static void
-prepare_wnaf_table(
-    pniels_t *output,
-    const point_t working,
-    unsigned int tbits
-) {
-    point_t tmp;
-    int i;
-    pt_to_pniels(output[0], working);
-
-    if (tbits == 0) return;
-
-    API_NS(point_double)(tmp,working);
-    pniels_t twop;
-    pt_to_pniels(twop, tmp);
-
-    add_pniels_to_pt(tmp, output[0],0);
-    pt_to_pniels(output[1], tmp);
-
-    for (i=2; i < 1<<tbits; i++) {
-        add_pniels_to_pt(tmp, twop,0);
-        pt_to_pniels(output[i], tmp);
-    }
-    
-    API_NS(point_destroy)(tmp);
-    decaf_bzero(twop,sizeof(twop));
-}
-
-extern const gf API_NS(precomputed_wnaf_as_fe)[];
-static const niels_t *API_NS(wnaf_base) = (const niels_t *)API_NS(precomputed_wnaf_as_fe);
-const size_t API_NS(sizeof_precomputed_wnafs) __attribute((visibility("hidden")))
-    = sizeof(niels_t)<<DECAF_WNAF_FIXED_TABLE_BITS;
-
-void API_NS(precompute_wnafs) (
-    niels_t out[1<<DECAF_WNAF_FIXED_TABLE_BITS],
-    const point_t base
-) __attribute__ ((visibility ("hidden")));
-
-void API_NS(precompute_wnafs) (
-    niels_t out[1<<DECAF_WNAF_FIXED_TABLE_BITS],
-    const point_t base
-) {
-    pniels_t tmp[1<<DECAF_WNAF_FIXED_TABLE_BITS];
-    gf zs[1<<DECAF_WNAF_FIXED_TABLE_BITS], zis[1<<DECAF_WNAF_FIXED_TABLE_BITS];
-    int i;
-    prepare_wnaf_table(tmp,base,DECAF_WNAF_FIXED_TABLE_BITS);
-    for (i=0; i<1<<DECAF_WNAF_FIXED_TABLE_BITS; i++) {
-        memcpy(out[i], tmp[i]->n, sizeof(niels_t));
-        gf_copy(zs[i], tmp[i]->z);
-    }
-    batch_normalize_niels(out, (const gf *)zs, zis, 1<<DECAF_WNAF_FIXED_TABLE_BITS);
-    
-    decaf_bzero(tmp,sizeof(tmp));
-    decaf_bzero(zs,sizeof(zs));
-    decaf_bzero(zis,sizeof(zis));
-}
-
-void API_NS(base_double_scalarmul_non_secret) (
-    point_t combo,
-    const scalar_t scalar1,
-    const point_t base2,
-    const scalar_t scalar2
-) {
-    const int table_bits_var = DECAF_WNAF_VAR_TABLE_BITS,
-        table_bits_pre = DECAF_WNAF_FIXED_TABLE_BITS;
-    struct smvt_control control_var[SCALAR_BITS/(table_bits_var+1)+3];
-    struct smvt_control control_pre[SCALAR_BITS/(table_bits_pre+1)+3];
-    
-    int ncb_pre = recode_wnaf(control_pre, scalar1, table_bits_pre);
-    int ncb_var = recode_wnaf(control_var, scalar2, table_bits_var);
-  
-    pniels_t precmp_var[1<<table_bits_var];
-    prepare_wnaf_table(precmp_var, base2, table_bits_var);
-  
-    int contp=0, contv=0, i = control_var[0].power;
-
-    if (i < 0) {
-        API_NS(point_copy)(combo, API_NS(point_identity));
-        return;
-    } else if (i > control_pre[0].power) {
-        pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]);
-        contv++;
-    } else if (i == control_pre[0].power && i >=0 ) {
-        pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]);
-        add_niels_to_pt(combo, API_NS(wnaf_base)[control_pre[0].addend >> 1], i);
-        contv++; contp++;
-    } else {
-        i = control_pre[0].power;
-        niels_to_pt(combo, API_NS(wnaf_base)[control_pre[0].addend >> 1]);
-        contp++;
-    }
-    
-    for (i--; i >= 0; i--) {
-        int cv = (i==control_var[contv].power), cp = (i==control_pre[contp].power);
-        point_double_internal(combo,combo,i && !(cv||cp));
-
-        if (cv) {
-            assert(control_var[contv].addend);
-
-            if (control_var[contv].addend > 0) {
-                add_pniels_to_pt(combo, precmp_var[control_var[contv].addend >> 1], i&&!cp);
-            } else {
-                sub_pniels_from_pt(combo, precmp_var[(-control_var[contv].addend) >> 1], i&&!cp);
-            }
-            contv++;
-        }
-
-        if (cp) {
-            assert(control_pre[contp].addend);
-
-            if (control_pre[contp].addend > 0) {
-                add_niels_to_pt(combo, API_NS(wnaf_base)[control_pre[contp].addend >> 1], i);
-            } else {
-                sub_niels_from_pt(combo, API_NS(wnaf_base)[(-control_pre[contp].addend) >> 1], i);
-            }
-            contp++;
-        }
-    }
-    
-    /* This function is non-secret, but whatever this is cheap. */
-    decaf_bzero(control_var,sizeof(control_var));
-    decaf_bzero(control_pre,sizeof(control_pre));
-    decaf_bzero(precmp_var,sizeof(precmp_var));
-
-    assert(contv == ncb_var); (void)ncb_var;
-    assert(contp == ncb_pre); (void)ncb_pre;
-}
-
-void API_NS(point_destroy) (
-    point_t point
-) {
-    decaf_bzero(point, sizeof(point_t));
-}
-
-void API_NS(precomputed_destroy) (
-    precomputed_s *pre
-) {
-    decaf_bzero(pre, API_NS(sizeof_precomputed_s));
-}
diff --git a/crypto/ec/curve448/GENERATED/c/ed448goldilocks/decaf_tables.c b/crypto/ec/curve448/GENERATED/c/ed448goldilocks/decaf_tables.c
deleted file mode 100644
index ab4e6d79c4..0000000000
--- a/crypto/ec/curve448/GENERATED/c/ed448goldilocks/decaf_tables.c
+++ /dev/null
@@ -1,354 +0,0 @@
-/** @warning: this file was automatically generated. */
-#include "field.h"
-
-#include <decaf.h>
-
-#define API_NS(_id) decaf_448_##_id
-const API_NS(point_t) API_NS(point_base) = {{
-{FIELD_LITERAL(0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0080000000000000,0x00fffffffffffffe,0x00ffffffffffffff,0x00ffffffffffffff,0x007fffffffffffff)},
-  {FIELD_LITERAL(0x006079b4dfdd4a64,0x000c1e3ab470a1c8,0x0044d73f48e5199b,0x0050452714141818,0x004c74c393d5242c,0x0024080526437050,0x00d48d06c13078ca,0x008508de14f04286)},
-  {FIELD_LITERAL(0x0000000000000001,0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000)},
-  {FIELD_LITERAL(0x00e3c816dc198105,0x0062071833f4e093,0x004dde98e3421403,0x00a319b57519c985,0x00794be956382384,0x00e1ddc2b86da60f,0x0050e23d5682a9ff,0x006d3669e173c6a4)}
-}};
-const gf API_NS(precomputed_base_as_fe)[240]
-VECTOR_ALIGNED __attribute__((visibility("hidden"))) = {
-  {FIELD_LITERAL(0x00cc3b062366f4cc,0x003d6e34e314aa3c,0x00d51c0a7521774d,0x0094e060eec6ab8b,0x00d21291b4d80082,0x00befed12b55ef1e,0x00c3dd2df5c94518,0x00e0a7b112b8d4e6)},
-  {FIELD_LITERAL(0x0019eb5608d8723a,0x00d1bab52fb3aedb,0x00270a7311ebc90c,0x0037c12b91be7f13,0x005be16cd8b5c704,0x003e181acda888e1,0x00bc1f00fc3fc6d0,0x00d3839bfa319e20)},
-  {FIELD_LITERAL(0x003caeb88611909f,0x00ea8b378c4df3d4,0x00b3295b95a5a19a,0x00a65f97514bdfb5,0x00b39efba743cab1,0x0016ba98b862fd2d,0x0001508812ee71d7,0x000a75740eea114a)},
-  {FIELD_LITERAL(0x00ebcf0eb649f823,0x00166d332e98ea03,0x0059ddf64f5cd5f6,0x0047763123d9471b,0x00a64065c53ef62f,0x00978e44c480153d,0x000b5b2a0265f194,0x0046a24b9f32965a)},
-  {FIELD_LITERAL(0x00b9eef787034df0,0x0020bc24de3390cd,0x000022160bae99bb,0x00ae66e886e97946,0x0048d4bbe02cbb8b,0x0072ba97b34e38d4,0x00eae7ec8f03e85a,0x005ba92ecf808b2c)},
-  {FIELD_LITERAL(0x00c9cfbbe74258fd,0x00843a979ea9eaa7,0x000cbb4371cfbe90,0x0059bac8f7f0a628,0x004b3dff882ff530,0x0011869df4d90733,0x00595aa71f4abfc2,0x0070e2d38990c2e6)},
-  {FIELD_LITERAL(0x00de2010c0a01733,0x00c739a612e24297,0x00a7212643141d7c,0x00f88444f6b67c11,0x00484b7b16ec28f2,0x009c1b8856af9c68,0x00ff4669591fe9d6,0x0054974be08a32c8)},
-  {FIELD_LITERAL(0x0010de3fd682ceed,0x008c07642d83ca4e,0x0013bb064e00a1cc,0x009411ae27870e11,0x00ea8e5b4d531223,0x0032fe7d2aaece2e,0x00d989e243e7bb41,0x000fe79a508e9b8b)},
-  {FIELD_LITERAL(0x005e0426b9bfc5b1,0x0041a5b1d29ee4fa,0x0015b0def7774391,0x00bc164f1f51af01,0x00d543b0942797b9,0x003c129b6398099c,0x002b114c6e5adf18,0x00b4e630e4018a7b)},
-  {FIELD_LITERAL(0x00d490afc95f8420,0x00b096bf50c1d9b9,0x00799fd707679866,0x007c74d9334afbea,0x00efaa8be80ff4ed,0x0075c4943bb81694,0x00c21c2fca161f36,0x00e77035d492bfee)},
-  {FIELD_LITERAL(0x006658a190dd6661,0x00e0e9bab38609a6,0x0028895c802237ed,0x006a0229c494f587,0x002dcde96c9916b7,0x00d158822de16218,0x00173b917a06856f,0x00ca78a79ae07326)},
-  {FIELD_LITERAL(0x00e35bfc79caced4,0x0087238a3e1fe3bb,0x00bcbf0ff4ceff5b,0x00a19c1c94099b91,0x0071e102b49db976,0x0059e3d004eada1e,0x008da78afa58a47e,0x00579c8ebf269187)},
-  {FIELD_LITERAL(0x00a16c2905eee75f,0x009d4bcaea2c7e1d,0x00d3bd79bfad19df,0x0050da745193342c,0x006abdb8f6b29ab1,0x00a24fe0a4fef7ef,0x0063730da1057dfb,0x00a08c312c8eb108)},
-  {FIELD_LITERAL(0x00b583be005375be,0x00a40c8f8a4e3df4,0x003fac4a8f5bdbf7,0x00d4481d872cd718,0x004dc8749cdbaefe,0x00cce740d5e5c975,0x000b1c1f4241fd21,0x00a76de1b4e1cd07)},
-  {FIELD_LITERAL(0x007a076500d30b62,0x000a6e117b7f090f,0x00c8712ae7eebd9a,0x000fbd6c1d5f6ff7,0x003a7977246ebf11,0x00166ed969c6600e,0x00aa42e469c98bec,0x00dc58f307cf0666)},
-  {FIELD_LITERAL(0x004b491f65a9a28b,0x006a10309e8a55b7,0x00b67210185187ef,0x00cf6497b12d9b8f,0x0085778c56e2b1ba,0x0015b4c07a814d85,0x00686479e62da561,0x008de5d88f114916)},
-  {FIELD_LITERAL(0x00e37c88d6bba7b1,0x003e4577e1b8d433,0x0050d8ea5f510ec0,0x0042fc9f2da9ef59,0x003bd074c1141420,0x00561b8b7b68774e,0x00232e5e5d1013a3,0x006b7f2cb3d7e73f)},
-  {FIELD_LITERAL(0x004bdd0f0b41e6a0,0x001773057c405d24,0x006029f99915bd97,0x006a5ba70a17fe2f,0x0046111977df7e08,0x004d8124c89fb6b7,0x00580983b2bb2724,0x00207bf330d6f3fe)},
-  {FIELD_LITERAL(0x007efdc93972a48b,0x002f5e50e78d5fee,0x0080dc11d61c7fe5,0x0065aa598707245b,0x009abba2300641be,0x000c68787656543a,0x00ffe0fef2dc0a17,0x00007ffbd6cb4f3a)},
-  {FIELD_LITERAL(0x0036012f2b836efc,0x00458c126d6b5fbc,0x00a34436d719ad1e,0x0097be6167117dea,0x0009c219c879cff3,0x0065564493e60755,0x00993ac94a8cdec0,0x002d4885a4d0dbaf)},
-  {FIELD_LITERAL(0x00598b60b4c068ba,0x00c547a0be7f1afd,0x009582164acf12af,0x00af4acac4fbbe40,0x005f6ca7c539121a,0x003b6e752ebf9d66,0x00f08a30d5cac5d4,0x00e399bb5f97c5a9)},
-  {FIELD_LITERAL(0x007445a0409c0a66,0x00a65c369f3829c0,0x0031d248a4f74826,0x006817f34defbe8e,0x00649741d95ebf2e,0x00d46466ab16b397,0x00fdc35703bee414,0x00343b43334525f8)},
-  {FIELD_LITERAL(0x001796bea93f6401,0x00090c5a42e85269,0x00672412ba1252ed,0x001201d47b6de7de,0x006877bccfe66497,0x00b554fd97a4c161,0x009753f42dbac3cf,0x00e983e3e378270a)},
-  {FIELD_LITERAL(0x00ac3eff18849872,0x00f0eea3bff05690,0x00a6d72c21dd505d,0x001b832642424169,0x00a6813017b540e5,0x00a744bd71b385cd,0x0022a7d089130a7b,0x004edeec9a133486)},
-  {FIELD_LITERAL(0x00b2d6729196e8a9,0x0088a9bb2031cef4,0x00579e7787dc1567,0x0030f49feb059190,0x00a0b1d69c7f7d8f,0x0040bdcc6d9d806f,0x00d76c4037edd095,0x00bbf24376415dd7)},
-  {FIELD_LITERAL(0x00240465ff5a7197,0x00bb97e76caf27d0,0x004b4edbf8116d39,0x001d8586f708cbaa,0x000f8ee8ff8e4a50,0x00dde5a1945dd622,0x00e6fc1c0957e07c,0x0041c9cdabfd88a0)},
-  {FIELD_LITERAL(0x005344b0bf5b548c,0x002957d0b705cc99,0x00f586a70390553d,0x0075b3229f583cc3,0x00a1aa78227490e4,0x001bf09cf7957717,0x00cf6bf344325f52,0x0065bd1c23ca3ecf)},
-  {FIELD_LITERAL(0x009bff3b3239363c,0x00e17368796ef7c0,0x00528b0fe0971f3a,0x0008014fc8d4a095,0x00d09f2e8a521ec4,0x006713ab5dde5987,0x0003015758e0dbb1,0x00215999f1ba212d)},
-  {FIELD_LITERAL(0x002c88e93527da0e,0x0077c78f3456aad5,0x0071087a0a389d1c,0x00934dac1fb96dbd,0x008470e801162697,0x005bc2196cd4ad49,0x00e535601d5087c3,0x00769888700f497f)},
-  {FIELD_LITERAL(0x00da7a4b557298ad,0x0019d2589ea5df76,0x00ef3e38be0c6497,0x00a9644e1312609a,0x004592f61b2558da,0x0082c1df510d7e46,0x0042809a535c0023,0x00215bcb5afd7757)},
-  {FIELD_LITERAL(0x002b9df55a1a4213,0x00dcfc3b464a26be,0x00c4f9e07a8144d5,0x00c8e0617a92b602,0x008e3c93accafae0,0x00bf1bcb95b2ca60,0x004ce2426a613bf3,0x00266cac58e40921)},
-  {FIELD_LITERAL(0x008456d5db76e8f0,0x0032ca9cab2ce163,0x0059f2b8bf91abcf,0x0063c2a021712788,0x00f86155af22f72d,0x00db98b2a6c005a0,0x00ac6e416a693ac4,0x007a93572af53226)},
-  {FIELD_LITERAL(0x0087767520f0de22,0x0091f64012279fb5,0x001050f1f0644999,0x004f097a2477ad3c,0x006b37913a9947bd,0x001a3d78645af241,0x0057832bbb3008a7,0x002c1d902b80dc20)},
-  {FIELD_LITERAL(0x001a6002bf178877,0x009bce168aa5af50,0x005fc318ff04a7f5,0x0052818f55c36461,0x008768f5d4b24afb,0x0037ffbae7b69c85,0x0018195a4b61edc0,0x001e12ea088434b2)},
-  {FIELD_LITERAL(0x0047d3f804e7ab07,0x00a809ab5f905260,0x00b3ffc7cdaf306d,0x00746e8ec2d6e509,0x00d0dade8887a645,0x00acceeebde0dd37,0x009bc2579054686b,0x0023804f97f1c2bf)},
-  {FIELD_LITERAL(0x0043e2e2e50b80d7,0x00143aafe4427e0f,0x005594aaecab855b,0x008b12ccaaecbc01,0x002deeb091082bc3,0x009cca4be2ae7514,0x00142b96e696d047,0x00ad2a2b1c05256a)},
-  {FIELD_LITERAL(0x003914f2f144b78b,0x007a95dd8bee6f68,0x00c7f4384d61c8e6,0x004e51eb60f1bdb2,0x00f64be7aa4621d8,0x006797bfec2f0ac0,0x007d17aab3c75900,0x001893e73cac8bc5)},
-  {FIELD_LITERAL(0x00140360b768665b,0x00b68aca4967f977,0x0001089b66195ae4,0x00fe71122185e725,0x000bca2618d49637,0x00a54f0557d7e98a,0x00cdcd2f91d6f417,0x00ab8c13741fd793)},
-  {FIELD_LITERAL(0x00725ee6b1e549e0,0x007124a0769777fa,0x000b68fdad07ae42,0x0085b909cd4952df,0x0092d2e3c81606f4,0x009f22f6cac099a0,0x00f59da57f2799a8,0x00f06c090122f777)},
-  {FIELD_LITERAL(0x00ce0bed0a3532bc,0x001a5048a22df16b,0x00e31db4cbad8bf1,0x00e89292120cf00e,0x007d1dd1a9b00034,0x00e2a9041ff8f680,0x006a4c837ae596e7,0x00713af1068070b3)},
-  {FIELD_LITERAL(0x00c4fe64ce66d04b,0x00b095d52e09b3d7,0x00758bbecb1a3a8e,0x00f35cce8d0650c0,0x002b878aa5984473,0x0062e0a3b7544ddc,0x00b25b290ed116fe,0x007b0f6abe0bebf2)},
-  {FIELD_LITERAL(0x0081d4e3addae0a8,0x003410c836c7ffcc,0x00c8129ad89e4314,0x000e3d5a23922dcd,0x00d91e46f29c31f3,0x006c728cde8c5947,0x002bc655ba2566c0,0x002ca94721533108)},
-  {FIELD_LITERAL(0x0051e4b3f764d8a9,0x0019792d46e904a0,0x00853bc13dbc8227,0x000840208179f12d,0x0068243474879235,0x0013856fbfe374d0,0x00bda12fe8676424,0x00bbb43635926eb2)},
-  {FIELD_LITERAL(0x0012cdc880a93982,0x003c495b21cd1b58,0x00b7e5c93f22a26e,0x0044aa82dfb99458,0x009ba092cdffe9c0,0x00a14b3ab2083b73,0x000271c2f70e1c4b,0x00eea9cac0f66eb8)},
-  {FIELD_LITERAL(0x001a1847c4ac5480,0x00b1b412935bb03a,0x00f74285983bf2b2,0x00624138b5b5d0f1,0x008820c0b03d38bf,0x00b94e50a18c1572,0x0060f6934841798f,0x00c52f5d66d6ebe2)},
-  {FIELD_LITERAL(0x00da23d59f9bcea6,0x00e0f27007a06a4b,0x00128b5b43a6758c,0x000cf50190fa8b56,0x00fc877aba2b2d72,0x00623bef52edf53f,0x00e6af6b819669e2,0x00e314dc34fcaa4f)},
-  {FIELD_LITERAL(0x0066e5eddd164d1e,0x00418a7c6fe28238,0x0002e2f37e962c25,0x00f01f56b5975306,0x0048842fa503875c,0x0057b0e968078143,0x00ff683024f3d134,0x0082ae28fcad12e4)},
-  {FIELD_LITERAL(0x0011ddfd21260e42,0x00d05b0319a76892,0x00183ea4368e9b8f,0x00b0815662affc96,0x00b466a5e7ce7c88,0x00db93b07506e6ee,0x0033885f82f62401,0x0086f9090ec9b419)},
-  {FIELD_LITERAL(0x00d95d1c5fcb435a,0x0016d1ed6b5086f9,0x00792aa0b7e54d71,0x0067b65715f1925d,0x00a219755ec6176b,0x00bc3f026b12c28f,0x00700c897ffeb93e,0x0089b83f6ec50b46)},
-  {FIELD_LITERAL(0x003c97e6384da36e,0x00423d53eac81a09,0x00b70d68f3cdce35,0x00ee7959b354b92c,0x00f4e9718819c8ca,0x009349f12acbffe9,0x005aee7b62cb7da6,0x00d97764154ffc86)},
-  {FIELD_LITERAL(0x00526324babb46dc,0x002ee99b38d7bf9e,0x007ea51794706ef4,0x00abeb04da6e3c39,0x006b457c1d281060,0x00fe243e9a66c793,0x00378de0fb6c6ee4,0x003e4194b9c3cb93)},
-  {FIELD_LITERAL(0x00fed3cd80ca2292,0x0015b043a73ca613,0x000a9fd7bf9be227,0x003b5e03de2db983,0x005af72d46904ef7,0x00c0f1b5c49faa99,0x00dc86fc3bd305e1,0x00c92f08c1cb1797)},
-  {FIELD_LITERAL(0x0079680ce111ed3b,0x001a1ed82806122c,0x000c2e7466d15df3,0x002c407f6f7150fd,0x00c5e7c96b1b0ce3,0x009aa44626863ff9,0x00887b8b5b80be42,0x00b6023cec964825)},
-  {FIELD_LITERAL(0x00e4a8e1048970c8,0x0062887b7830a302,0x00bcf1c8cd81402b,0x0056dbb81a68f5be,0x0014eced83f12452,0x00139e1a510150df,0x00bb81140a82d1a3,0x000febcc1aaf1aa7)},
-  {FIELD_LITERAL(0x00a7527958238159,0x0013ec9537a84cd6,0x001d7fee7d562525,0x00b9eefa6191d5e5,0x00dbc97db70bcb8a,0x00481affc7a4d395,0x006f73d3e70c31bb,0x00183f324ed96a61)},
-  {FIELD_LITERAL(0x0039dd7ce7fc6860,0x00d64f6425653da1,0x003e037c7f57d0af,0x0063477a06e2bcf2,0x001727dbb7ac67e6,0x0049589f5efafe2e,0x00fc0fef2e813d54,0x008baa5d087fb50d)},
-  {FIELD_LITERAL(0x0024fb59d9b457c7,0x00a7d4e060223e4c,0x00c118d1b555fd80,0x0082e216c732f22a,0x00cd2a2993089504,0x003638e836a3e13d,0x000d855ee89b4729,0x008ec5b7d4810c91)},
-  {FIELD_LITERAL(0x001bf51f7d65cdfd,0x00d14cdafa16a97d,0x002c38e60fcd10e7,0x00a27446e393efbd,0x000b5d8946a71fdd,0x0063df2cde128f2f,0x006c8679569b1888,0x0059ffc4925d732d)},
-  {FIELD_LITERAL(0x00ece96f95f2b66f,0x00ece7952813a27b,0x0026fc36592e489e,0x007157d1a2de0f66,0x00759dc111d86ddf,0x0012881e5780bb0f,0x00c8ccc83ad29496,0x0012b9bd1929eb71)},
-  {FIELD_LITERAL(0x000fa15a20da5df0,0x00349ddb1a46cd31,0x002c512ad1d8e726,0x00047611f669318d,0x009e68fba591e17e,0x004320dffa803906,0x00a640874951a3d3,0x00b6353478baa24f)},
-  {FIELD_LITERAL(0x009696510000d333,0x00ec2f788bc04826,0x000e4d02b1f67ba5,0x00659aa8dace08b6,0x00d7a38a3a3ae533,0x008856defa8c746b,0x004d7a4402d3da1a,0x00ea82e06229260f)},
-  {FIELD_LITERAL(0x006a15bb20f75c0c,0x0079a144027a5d0c,0x00d19116ce0b4d70,0x0059b83bcb0b268e,0x005f58f63f16c127,0x0079958318ee2c37,0x00defbb063d07f82,0x00f1f0b931d2d446)},
-  {FIELD_LITERAL(0x00cb5e4c3c35d422,0x008df885ca43577f,0x00fa50b16ca3e471,0x005a0e58e17488c8,0x00b2ceccd6d34d19,0x00f01d5d235e36e9,0x00db2e7e4be6ca44,0x00260ab77f35fccd)},
-  {FIELD_LITERAL(0x006f6fd9baac61d5,0x002a7710a020a895,0x009de0db7fc03d4d,0x00cdedcb1875f40b,0x00050caf9b6b1e22,0x005e3a6654456ab0,0x00775fdf8c4423d4,0x0028701ea5738b5d)},
-  {FIELD_LITERAL(0x009ffd90abfeae96,0x00cba3c2b624a516,0x005ef08bcee46c91,0x00e6fde30afb6185,0x00f0b4db4f818ce4,0x006c54f45d2127f5,0x00040125035854c7,0x00372658a3287e13)},
-  {FIELD_LITERAL(0x00d7070fb1beb2ab,0x0078fc845a93896b,0x006894a4b2f224a6,0x005bdd8192b9dbde,0x00b38839874b3a9e,0x00f93618b04b7a57,0x003e3ec75fd2c67e,0x00bf5e6bfc29494a)},
-  {FIELD_LITERAL(0x00f19224ebba2aa5,0x0074f89d358e694d,0x00eea486597135ad,0x0081579a4555c7e1,0x0010b9b872930a9d,0x00f002e87a30ecc0,0x009b9d66b6de56e2,0x00a3c4f45e8004eb)},
-  {FIELD_LITERAL(0x0045e8dda9400888,0x002ff12e5fc05db7,0x00a7098d54afe69c,0x00cdbe846a500585,0x00879c1593ca1882,0x003f7a7fea76c8b0,0x002cd73dd0c8e0a1,0x00645d6ce96f51fe)},
-  {FIELD_LITERAL(0x002b7e83e123d6d6,0x00398346f7419c80,0x0042922e55940163,0x005e7fc5601886a3,0x00e88f2cee1d3103,0x00e7fab135f2e377,0x00b059984dbf0ded,0x0009ce080faa5bb8)},
-  {FIELD_LITERAL(0x0085e78af7758979,0x00275a4ee1631a3a,0x00d26bc0ed78b683,0x004f8355ea21064f,0x00d618e1a32696e5,0x008d8d7b150e5680,0x00a74cd854b278d2,0x001dd62702203ea0)},
-  {FIELD_LITERAL(0x00f89335c2a59286,0x00a0f5c905d55141,0x00b41fb836ee9382,0x00e235d51730ca43,0x00a5cb37b5c0a69a,0x009b966ffe136c45,0x00cb2ea10bf80ed1,0x00fb2b370b40dc35)},
-  {FIELD_LITERAL(0x00d687d16d4ee8ba,0x0071520bdd069dff,0x00de85c60d32355d,0x0087d2e3565102f4,0x00cde391b8dfc9aa,0x00e18d69efdfefe5,0x004a9d0591954e91,0x00fa36dd8b50eee5)},
-  {FIELD_LITERAL(0x002e788749a865f7,0x006e4dc3116861ea,0x009f1428c37276e6,0x00e7d2e0fc1e1226,0x003aeebc6b6c45f6,0x0071a8073bf500c9,0x004b22ad986b530c,0x00f439e63c0d79d4)},
-  {FIELD_LITERAL(0x006bc3d53011f470,0x00032d6e692b83e8,0x00059722f497cd0b,0x0009b4e6f0c497cc,0x0058a804b7cce6c0,0x002b71d3302bbd5d,0x00e2f82a36765fce,0x008dded99524c703)},
-  {FIELD_LITERAL(0x004d058953747d64,0x00701940fe79aa6f,0x00a620ac71c760bf,0x009532b611158b75,0x00547ed7f466f300,0x003cb5ab53a8401a,0x00c7763168ce3120,0x007e48e33e4b9ab2)},
-  {FIELD_LITERAL(0x001b2fc57bf3c738,0x006a3f918993fb80,0x0026f7a14fdec288,0x0075a2cdccef08db,0x00d3ecbc9eecdbf1,0x0048c40f06e5bf7f,0x00d63e423009896b,0x000598bc99c056a8)},
-  {FIELD_LITERAL(0x002f194eaafa46dc,0x008e38f57fe87613,0x00dc8e5ae25f4ab2,0x000a17809575e6bd,0x00d3ec7923ba366a,0x003a7e72e0ad75e3,0x0010024b88436e0a,0x00ed3c5444b64051)},
-  {FIELD_LITERAL(0x00831fc1340af342,0x00c9645669466d35,0x007692b4cc5a080f,0x009fd4a47ac9259f,0x001eeddf7d45928b,0x003c0446fc45f28b,0x002c0713aa3e2507,0x0095706935f0f41e)},
-  {FIELD_LITERAL(0x00766ae4190ec6d8,0x0065768cabc71380,0x00b902598416cdc2,0x00380021ad38df52,0x008f0b89d6551134,0x004254d4cc62c5a5,0x000d79f4484b9b94,0x00b516732ae3c50e)},
-  {FIELD_LITERAL(0x001fb73475c45509,0x00d2b2e5ea43345a,0x00cb3c3842077bd1,0x0029f90ad820946e,0x007c11b2380778aa,0x009e54ece62c1704,0x004bc60c41ca01c3,0x004525679a5a0b03)},
-  {FIELD_LITERAL(0x00c64fbddbed87b3,0x0040601d11731faa,0x009c22475b6f9d67,0x0024b79dae875f15,0x00616fed3f02c3b0,0x0000cf39f6af2d3b,0x00c46bac0aa9a688,0x00ab23e2800da204)},
-  {FIELD_LITERAL(0x000b3a37617632b0,0x00597199fe1cfb6c,0x0042a7ccdfeafdd6,0x004cc9f15ebcea17,0x00f436e596a6b4a4,0x00168861142df0d8,0x000753edfec26af5,0x000c495d7e388116)},
-  {FIELD_LITERAL(0x0017085f4a346148,0x00c7cf7a37f62272,0x001776e129bc5c30,0x009955134c9eef2a,0x001ba5bdf1df07be,0x00ec39497103a55c,0x006578354fda6cfb,0x005f02719d4f15ee)},
-  {FIELD_LITERAL(0x0052b9d9b5d9655d,0x00d4ec7ba1b461c3,0x00f95df4974f280b,0x003d8e5ca11aeb51,0x00d4981eb5a70b26,0x000af9a4f6659f29,0x004598c846faeb43,0x0049d9a183a47670)},
-  {FIELD_LITERAL(0x000a72d23dcb3f1f,0x00a3737f84011727,0x00f870c0fbbf4a47,0x00a7aadd04b5c9ca,0x000c7715c67bd072,0x00015a136afcd74e,0x0080d5caea499634,0x0026b448ec7514b7)},
-  {FIELD_LITERAL(0x00b60167d9e7d065,0x00e60ba0d07381e8,0x003a4f17b725c2d4,0x006c19fe176b64fa,0x003b57b31af86ccb,0x0021047c286180fd,0x00bdc8fb00c6dbb6,0x00fe4a9f4bab4f3f)},
-  {FIELD_LITERAL(0x0088ffc3a16111f7,0x009155e4245d0bc8,0x00851d68220572d5,0x00557ace1e514d29,0x0031d7c339d91022,0x00101d0ae2eaceea,0x00246ab3f837b66a,0x00d5216d381ff530)},
-  {FIELD_LITERAL(0x0057e7ea35f36dae,0x00f47d7ad15de22e,0x00d757ea4b105115,0x008311457d579d7e,0x00b49b75b1edd4eb,0x0081c7ff742fd63a,0x00ddda3187433df6,0x00475727d55f9c66)},
-  {FIELD_LITERAL(0x00a6295218dc136a,0x00563b3af0e9c012,0x00d3753b0145db1b,0x004550389c043dc1,0x00ea94ae27401bdf,0x002b0b949f2b7956,0x00c63f780ad8e23c,0x00e591c47d6bab15)},
-  {FIELD_LITERAL(0x00416c582b058eb6,0x004107da5b2cc695,0x00b3cd2556aeec64,0x00c0b418267e57a1,0x001799293579bd2e,0x0046ed44590e4d07,0x001d7459b3630a1e,0x00c6afba8b6696aa)},
-  {FIELD_LITERAL(0x008d6009b26da3f8,0x00898e88ca06b1ca,0x00edb22b2ed7fe62,0x00fbc93516aabe80,0x008b4b470c42ce0d,0x00e0032ba7d0dcbb,0x00d76da3a956ecc8,0x007f20fe74e3852a)},
-  {FIELD_LITERAL(0x002419222c607674,0x00a7f23af89188b3,0x00ad127284e73d1c,0x008bba582fae1c51,0x00fc6aa7ca9ecab1,0x003df5319eb6c2ba,0x002a05af8a8b199a,0x004bf8354558407c)},
-  {FIELD_LITERAL(0x00ce7d4a30f0fcbf,0x00d02c272629f03d,0x0048c001f7400bc2,0x002c21368011958d,0x0098a550391e96b5,0x002d80b66390f379,0x001fa878760cc785,0x001adfce54b613d5)},
-  {FIELD_LITERAL(0x001ed4dc71fa2523,0x005d0bff19bf9b5c,0x00c3801cee065a64,0x001ed0b504323fbf,0x0003ab9fdcbbc593,0x00df82070178b8d2,0x00a2bcaa9c251f85,0x00c628a3674bd02e)},
-  {FIELD_LITERAL(0x006b7a0674f9f8de,0x00a742414e5c7cff,0x0041cbf3c6e13221,0x00e3a64fd207af24,0x0087c05f15fbe8d1,0x004c50936d9e8a33,0x001306ec21042b6d,0x00a4f4137d1141c2)},
-  {FIELD_LITERAL(0x0009e6fb921568b0,0x00b3c60120219118,0x002a6c3460dd503a,0x009db1ef11654b54,0x0063e4bf0be79601,0x00670d34bb2592b9,0x00dcee2f6c4130ce,0x00b2682e88e77f54)},
-  {FIELD_LITERAL(0x000d5b4b3da135ab,0x00838f3e5064d81d,0x00d44eb50f6d94ed,0x0008931ab502ac6d,0x00debe01ca3d3586,0x0025c206775f0641,0x005ad4b6ae912763,0x007e2c318ad8f247)},
-  {FIELD_LITERAL(0x00ddbe0750dd1add,0x004b3c7b885844b8,0x00363e7ecf12f1ae,0x0062e953e6438f9d,0x0023cc73b076afe9,0x00b09fa083b4da32,0x00c7c3d2456c541d,0x005b591ec6b694d4)},
-  {FIELD_LITERAL(0x0028656e19d62fcf,0x0052a4af03df148d,0x00122765ddd14e42,0x00f2252904f67157,0x004741965b636f3a,0x006441d296132cb9,0x005e2106f956a5b7,0x00247029592d335c)},
-  {FIELD_LITERAL(0x003fe038eb92f894,0x000e6da1b72e8e32,0x003a1411bfcbe0fa,0x00b55d473164a9e4,0x00b9a775ac2df48d,0x0002ddf350659e21,0x00a279a69eb19cb3,0x00f844eab25cba44)},
-  {FIELD_LITERAL(0x00c41d1f9c1f1ac1,0x007b2df4e9f19146,0x00b469355fd5ba7a,0x00b5e1965afc852a,0x00388d5f1e2d8217,0x0022079e4c09ae93,0x0014268acd4ef518,0x00c1dd8d9640464c)},
-  {FIELD_LITERAL(0x0038526adeed0c55,0x00dd68c607e3fe85,0x00f746ddd48a5d57,0x0042f2952b963b7c,0x001cbbd6876d5ec2,0x005e341470bca5c2,0x00871d41e085f413,0x00e53ab098f45732)},
-  {FIELD_LITERAL(0x004d51124797c831,0x008f5ae3750347ad,0x0070ced94c1a0c8e,0x00f6db2043898e64,0x000d00c9a5750cd0,0x000741ec59bad712,0x003c9d11aab37b7f,0x00a67ba169807714)},
-  {FIELD_LITERAL(0x00adb2c1566e8b8f,0x0096c68a35771a9a,0x00869933356f334a,0x00ba9c93459f5962,0x009ec73fb6e8ca4b,0x003c3802c27202e1,0x0031f5b733e0c008,0x00f9058c19611fa9)},
-  {FIELD_LITERAL(0x00238f01814a3421,0x00c325a44b6cce28,0x002136f97aeb0e73,0x000cac8268a4afe2,0x0022fd218da471b3,0x009dcd8dfff8def9,0x00cb9f8181d999bb,0x00143ae56edea349)},
-  {FIELD_LITERAL(0x0000623bf87622c5,0x00a1966fdd069496,0x00c315b7b812f9fc,0x00bdf5efcd128b97,0x001d464f532e3e16,0x003cd94f081bfd7e,0x00ed9dae12ce4009,0x002756f5736eee70)},
-  {FIELD_LITERAL(0x00a5187e6ee7341b,0x00e6d52e82d83b6e,0x00df3c41323094a7,0x00b3324f444e9de9,0x00689eb21a35bfe5,0x00f16363becd548d,0x00e187cc98e7f60f,0x00127d9062f0ccab)},
-  {FIELD_LITERAL(0x004ad71b31c29e40,0x00a5fcace12fae29,0x004425b5597280ed,0x00e7ef5d716c3346,0x0010b53ada410ac8,0x0092310226060c9b,0x0091c26128729c7e,0x0088b42900f8ec3b)},
-  {FIELD_LITERAL(0x00f1e26e9762d4a8,0x00d9d74082183414,0x00ffec9bd57a0282,0x000919e128fd497a,0x00ab7ae7d00fe5f8,0x0054dc442851ff68,0x00c9ebeb3b861687,0x00507f7cab8b698f)},
-  {FIELD_LITERAL(0x00c13c5aae3ae341,0x009c6c9ed98373e7,0x00098f26864577a8,0x0015b886e9488b45,0x0037692c42aadba5,0x00b83170b8e7791c,0x001670952ece1b44,0x00fd932a39276da2)},
-  {FIELD_LITERAL(0x0081a3259bef3398,0x005480fff416107b,0x00ce4f607d21be98,0x003ffc084b41df9b,0x0043d0bb100502d1,0x00ec35f575ba3261,0x00ca18f677300ef3,0x00e8bb0a827d8548)},
-  {FIELD_LITERAL(0x00df76b3328ada72,0x002e20621604a7c2,0x00f910638a105b09,0x00ef4724d96ef2cd,0x00377d83d6b8a2f7,0x00b4f48805ade324,0x001cd5da8b152018,0x0045af671a20ca7f)},
-  {FIELD_LITERAL(0x009ae3b93a56c404,0x004a410b7a456699,0x00023a619355e6b2,0x009cdc7297387257,0x0055b94d4ae70d04,0x002cbd607f65b005,0x003208b489697166,0x00ea2aa058867370)},
-  {FIELD_LITERAL(0x00f29d2598ee3f32,0x00b4ac5385d82adc,0x007633eaf04df19b,0x00aa2d3d77ceab01,0x004a2302fcbb778a,0x00927f225d5afa34,0x004a8e9d5047f237,0x008224ae9dbce530)},
-  {FIELD_LITERAL(0x001cf640859b02f8,0x00758d1d5d5ce427,0x00763c784ef4604c,0x005fa81aee205270,0x00ac537bfdfc44cb,0x004b919bd342d670,0x00238508d9bf4b7a,0x00154888795644f3)},
-  {FIELD_LITERAL(0x00c845923c084294,0x00072419a201bc25,0x0045f408b5f8e669,0x00e9d6a186b74dfe,0x00e19108c68fa075,0x0017b91d874177b7,0x002f0ca2c7912c5a,0x009400aa385a90a2)},
-  {FIELD_LITERAL(0x0071110b01482184,0x00cfed0044f2bef8,0x0034f2901cf4662e,0x003b4ae2a67f9834,0x00cca9b96fe94810,0x00522507ae77abd0,0x00bac7422721e73e,0x0066622b0f3a62b0)},
-  {FIELD_LITERAL(0x00f8ac5cf4705b6a,0x00867d82dcb457e3,0x007e13ab2ccc2ce9,0x009ee9a018d3930e,0x008370f8ecb42df8,0x002d9f019add263e,0x003302385b92d196,0x00a15654536e2c0c)},
-  {FIELD_LITERAL(0x0026ef1614e160af,0x00c023f9edfc9c76,0x00cff090da5f57ba,0x0076db7a66643ae9,0x0019462f8c646999,0x008fec00b3854b22,0x00d55041692a0a1c,0x0065db894215ca00)},
-  {FIELD_LITERAL(0x00a925036e0a451c,0x002a0390c36b6cc1,0x00f27020d90894f4,0x008d90d52cbd3d7f,0x00e1d0137392f3b8,0x00f017c158b51a8f,0x00cac313d3ed7dbc,0x00b99a81e3eb42d3)},
-  {FIELD_LITERAL(0x00b54850275fe626,0x0053a3fd1ec71140,0x00e3d2d7dbe096fa,0x00e4ac7b595cce4c,0x0077bad449c0a494,0x00b7c98814afd5b3,0x0057226f58486cf9,0x00b1557154f0cc57)},
-  {FIELD_LITERAL(0x008cc9cd236315c0,0x0031d9c5b39fda54,0x00a5713ef37e1171,0x00293d5ae2886325,0x00c4aba3e05015e1,0x0003f35ef78e4fc6,0x0039d6bd3ac1527b,0x0019d7c3afb77106)},
-  {FIELD_LITERAL(0x007b162931a985af,0x00ad40a2e0daa713,0x006df27c4009f118,0x00503e9f4e2e8bec,0x00751a77c82c182d,0x000298937769245b,0x00ffb1e8fabf9ee5,0x0008334706e09abe)},
-  {FIELD_LITERAL(0x00dbca4e98a7dcd9,0x00ee29cfc78bde99,0x00e4a3b6995f52e9,0x0045d70189ae8096,0x00fd2a8a3b9b0d1b,0x00af1793b107d8e1,0x00dbf92cbe4afa20,0x00da60f798e3681d)},
-  {FIELD_LITERAL(0x004246bfcecc627a,0x004ba431246c03a4,0x00bd1d101872d497,0x003b73d3f185ee16,0x001feb2e2678c0e3,0x00ff13c5a89dec76,0x00ed06042e771d8f,0x00a4fd2a897a83dd)},
-  {FIELD_LITERAL(0x009a4a3be50d6597,0x00de3165fc5a1096,0x004f3f56e345b0c7,0x00f7bf721d5ab8bc,0x004313e47b098c50,0x00e4c7d5c0e1adbb,0x002e3e3db365051e,0x00a480c2cd6a96fb)},
-  {FIELD_LITERAL(0x00417fa30a7119ed,0x00af257758419751,0x00d358a487b463d4,0x0089703cc720b00d,0x00ce56314ff7f271,0x0064db171ade62c1,0x00640b36d4a22fed,0x00424eb88696d23f)},
-  {FIELD_LITERAL(0x004ede34af2813f3,0x00d4a8e11c9e8216,0x004796d5041de8a5,0x00c4c6b4d21cc987,0x00e8a433ee07fa1e,0x0055720b5abcc5a1,0x008873ea9c74b080,0x005b3fec1ab65d48)},
-  {FIELD_LITERAL(0x0047e5277db70ec5,0x000a096c66db7d6b,0x00b4164cc1730159,0x004a9f783fe720fe,0x00a8177b94449dbc,0x0095a24ff49a599f,0x0069c1c578250cbc,0x00452019213debf4)},
-  {FIELD_LITERAL(0x0021ce99e09ebda3,0x00fcbd9f91875ad0,0x009bbf6b7b7a0b5f,0x00388886a69b1940,0x00926a56d0f81f12,0x00e12903c3358d46,0x005dfce4e8e1ce9d,0x0044cfa94e2f7e23)},
-  {FIELD_LITERAL(0x001bd59c09e982ea,0x00f72daeb937b289,0x0018b76dca908e0e,0x00edb498512384ad,0x00ce0243b6cc9538,0x00f96ff690cb4e70,0x007c77bf9f673c8d,0x005bf704c088a528)},
-  {FIELD_LITERAL(0x0093d4628dcb33be,0x0095263d51d42582,0x0049b3222458fe06,0x00e7fce73b653a7f,0x003ca2ebce60b369,0x00c5de239a32bea4,0x0063b8b3d71fb6bf,0x0039aeeb78a1a839)},
-  {FIELD_LITERAL(0x007dc52da400336c,0x001fded1e15b9457,0x00902e00f5568e3a,0x00219bef40456d2d,0x005684161fb3dbc9,0x004a4e9be49a76ea,0x006e685ae88b78ff,0x0021c42f13042d3c)},
-  {FIELD_LITERAL(0x00fb22bb5fd3ce50,0x0017b48aada7ae54,0x00fd5c44ad19a536,0x000ccc4e4e55e45c,0x00fd637d45b4c3f5,0x0038914e023c37cf,0x00ac1881d6a8d898,0x00611ed8d3d943a8)},
-  {FIELD_LITERAL(0x0056e2259d113d2b,0x00594819b284ec16,0x00c7bf794bb36696,0x00721ee75097cdc6,0x00f71be9047a2892,0x00df6ba142564edf,0x0069580b7a184e8d,0x00f056e38fca0fee)},
-  {FIELD_LITERAL(0x009df98566a18c6d,0x00cf3a200968f219,0x0044ba60da6d9086,0x00dbc9c0e344da03,0x000f9401c4466855,0x00d46a57c5b0a8d1,0x00875a635d7ac7c6,0x00ef4a933b7e0ae6)},
-  {FIELD_LITERAL(0x005e8694077a1535,0x008bef75f71c8f1d,0x000a7c1316423511,0x00906e1d70604320,0x003fc46c1a2ffbd6,0x00d1d5022e68f360,0x002515fba37bbf46,0x00ca16234e023b44)},
-  {FIELD_LITERAL(0x00787c99561f4690,0x00a857a8c1561f27,0x00a10df9223c09fe,0x00b98a9562e3b154,0x004330b8744c3ed2,0x00e06812807ec5c4,0x00e4cf6a7db9f1e3,0x00d95b089f132a34)},
-  {FIELD_LITERAL(0x002922b39ca33eec,0x0090d12a5f3ab194,0x00ab60c02fb5f8ed,0x00188d292abba1cf,0x00e10edec9698f6e,0x0069a4d9934133c8,0x0024aac40e6d3d06,0x001702c2177661b0)},
-  {FIELD_LITERAL(0x00139078397030bd,0x000e3c447e859a00,0x0064a5b334c82393,0x00b8aabeb7358093,0x00020778bb9ae73b,0x0032ee94c7892a18,0x008215253cb41bda,0x005e2797593517ae)},
-  {FIELD_LITERAL(0x0083765a5f855d4a,0x0051b6d1351b8ee2,0x00116de548b0f7bb,0x0087bd88703affa0,0x0095b2cc34d7fdd2,0x0084cd81b53f0bc8,0x008562fc995350ed,0x00a39abb193651e3)},
-  {FIELD_LITERAL(0x0019e23f0474b114,0x00eb94c2ad3b437e,0x006ddb34683b75ac,0x00391f9209b564c6,0x00083b3bb3bff7aa,0x00eedcd0f6dceefc,0x00b50817f794fe01,0x0036474deaaa75c9)},
-  {FIELD_LITERAL(0x0091868594265aa2,0x00797accae98ca6d,0x0008d8c5f0f8a184,0x00d1f4f1c2b2fe6e,0x0036783dfb48a006,0x008c165120503527,0x0025fd780058ce9b,0x0068beb007be7d27)},
-  {FIELD_LITERAL(0x00d0ff88aa7c90c2,0x00b2c60dacf53394,0x0094a7284d9666d6,0x00bed9022ce7a19d,0x00c51553f0cd7682,0x00c3fb870b124992,0x008d0bc539956c9b,0x00fc8cf258bb8885)},
-  {FIELD_LITERAL(0x003667bf998406f8,0x0000115c43a12975,0x001e662f3b20e8fd,0x0019ffa534cb24eb,0x00016be0dc8efb45,0x00ff76a8b26243f5,0x00ae20d241a541e3,0x0069bd6af13cd430)},
-  {FIELD_LITERAL(0x0045fdc16487cda3,0x00b2d8e844cf2ed7,0x00612c50e88c1607,0x00a08aabc66c1672,0x006031fdcbb24d97,0x001b639525744b93,0x004409d62639ab17,0x00a1853d0347ab1d)},
-  {FIELD_LITERAL(0x0075a1a56ebf5c21,0x00a3e72be9ac53ed,0x00efcde1629170c2,0x0004225fe91ef535,0x0088049fc73dfda7,0x004abc74857e1288,0x0024e2434657317c,0x00d98cb3d3e5543c)},
-  {FIELD_LITERAL(0x00b4b53eab6bdb19,0x009b22d8b43711d0,0x00d948b9d961785d,0x00cb167b6f279ead,0x00191de3a678e1c9,0x00d9dd9511095c2e,0x00f284324cd43067,0x00ed74fa535151dd)},
-  {FIELD_LITERAL(0x007e32c049b5c477,0x009d2bfdbd9bcfd8,0x00636e93045938c6,0x007fde4af7687298,0x0046a5184fafa5d3,0x0079b1e7f13a359b,0x00875adf1fb927d6,0x00333e21c61bcad2)},
-  {FIELD_LITERAL(0x00048014f73d8b8d,0x0075684aa0966388,0x0092be7df06dc47c,0x0097cebcd0f5568a,0x005a7004d9c4c6a9,0x00b0ecbb659924c7,0x00d90332dd492a7c,0x0057fc14df11493d)},
-  {FIELD_LITERAL(0x0008ed8ea0ad95be,0x0041d324b9709645,0x00e25412257a19b4,0x0058df9f3423d8d2,0x00a9ab20def71304,0x009ae0dbf8ac4a81,0x00c9565977e4392a,0x003c9269444baf55)},
-  {FIELD_LITERAL(0x007df6cbb926830b,0x00d336058ae37865,0x007af47dac696423,0x0048d3011ec64ac8,0x006b87666e40049f,0x0036a2e0e51303d7,0x00ba319bd79dbc55,0x003e2737ecc94f53)},
-  {FIELD_LITERAL(0x00d296ff726272d9,0x00f6d097928fcf57,0x00e0e616a55d7013,0x00deaf454ed9eac7,0x0073a56bedef4d92,0x006ccfdf6fc92e19,0x009d1ee1371a7218,0x00ee3c2ee4462d80)},
-  {FIELD_LITERAL(0x00437bce9bccdf9d,0x00e0c8e2f85dc0a3,0x00c91a7073995a19,0x00856ec9fe294559,0x009e4b33394b156e,0x00e245b0dc497e5c,0x006a54e687eeaeff,0x00f1cd1cd00fdb7c)},
-  {FIELD_LITERAL(0x008132ae5c5d8cd1,0x00121d68324a1d9f,0x00d6be9dafcb8c76,0x00684d9070edf745,0x00519fbc96d7448e,0x00388182fdc1f27e,0x000235baed41f158,0x00bf6cf6f1a1796a)},
-  {FIELD_LITERAL(0x002adc4b4d148219,0x003084ada0d3a90a,0x0046de8aab0f2e4e,0x00452d342a67b5fd,0x00d4b50f01d4de21,0x00db6d9fc0cefb79,0x008c184c86a462cd,0x00e17c83764d42da)},
-  {FIELD_LITERAL(0x007b2743b9a1e01a,0x007847ffd42688c4,0x006c7844d610a316,0x00f0cb8b250aa4b0,0x00a19060143b3ae6,0x0014eb10b77cfd80,0x000170905729dd06,0x00063b5b9cd72477)},
-  {FIELD_LITERAL(0x00ce382dc7993d92,0x00021153e938b4c8,0x00096f7567f48f51,0x0058f81ddfe4b0d5,0x00cc379a56b355c7,0x002c760770d3e819,0x00ee22d1d26e5a40,0x00de6d93d5b082d7)},
-  {FIELD_LITERAL(0x000a91a42c52e056,0x00185f6b77fce7ea,0x000803c51962f6b5,0x0022528582ba563d,0x0043f8040e9856d6,0x0085a29ec81fb860,0x005f9a611549f5ff,0x00c1f974ecbd4b06)},
-  {FIELD_LITERAL(0x005b64c6fd65ec97,0x00c1fdd7f877bc7f,0x000d9cc6c89f841c,0x005c97b7f1aff9ad,0x0075e3c61475d47e,0x001ecb1ba8153011,0x00fe7f1c8d71d40d,0x003fa9757a229832)},
-  {FIELD_LITERAL(0x00ffc5c89d2b0cba,0x00d363d42e3e6fc3,0x0019a1a0118e2e8a,0x00f7baeff48882e1,0x001bd5af28c6b514,0x0055476ca2253cb2,0x00d8eb1977e2ddf3,0x00b173b1adb228a1)},
-  {FIELD_LITERAL(0x00f2cb99dd0ad707,0x00e1e08b6859ddd8,0x000008f2d0650bcc,0x00d7ed392f8615c3,0x00976750a94da27f,0x003e83bb0ecb69ba,0x00df8e8d15c14ac6,0x00f9f7174295d9c2)},
-  {FIELD_LITERAL(0x00f11cc8e0e70bcb,0x00e5dc689974e7dd,0x0014e409f9ee5870,0x00826e6689acbd63,0x008a6f4e3d895d88,0x00b26a8da41fd4ad,0x000fb7723f83efd7,0x009c749db0a5f6c3)},
-  {FIELD_LITERAL(0x002389319450f9ba,0x003677f31aa1250a,0x0092c3db642f38cb,0x00f8b64c0dfc9773,0x00cd49fe3505b795,0x0068105a4090a510,0x00df0ba2072a8bb6,0x00eb396143afd8be)},
-  {FIELD_LITERAL(0x00a0d4ecfb24cdff,0x00ddaf8008ba6479,0x00f0b3e36d4b0f44,0x003734bd3af1f146,0x00b87e2efc75527e,0x00d230df55ddab50,0x002613257ae56c1d,0x00bc0946d135934d)},
-  {FIELD_LITERAL(0x00468711bd994651,0x0033108fa67561bf,0x0089d760192a54b4,0x00adc433de9f1871,0x000467d05f36e050,0x007847e0f0579f7f,0x00a2314ad320052d,0x00b3a93649f0b243)},
-  {FIELD_LITERAL(0x0067f8f0c4fe26c9,0x0079c4a3cc8f67b9,0x0082b1e62f23550d,0x00f2d409caefd7f5,0x0080e67dcdb26e81,0x0087ae993ea1f98a,0x00aa108becf61d03,0x001acf11efb608a3)},
-  {FIELD_LITERAL(0x008225febbab50d9,0x00f3b605e4dd2083,0x00a32b28189e23d2,0x00d507e5e5eb4c97,0x005a1a84e302821f,0x0006f54c1c5f08c7,0x00a347c8cb2843f0,0x0009f73e9544bfa5)},
-  {FIELD_LITERAL(0x006c59c9ae744185,0x009fc32f1b4282cd,0x004d6348ca59b1ac,0x00105376881be067,0x00af4096013147dc,0x004abfb5a5cb3124,0x000d2a7f8626c354,0x009c6ed568e07431)},
-  {FIELD_LITERAL(0x00e828333c297f8b,0x009ef3cf8c3f7e1f,0x00ab45f8fff31cb9,0x00c8b4178cb0b013,0x00d0c50dd3260a3f,0x0097126ac257f5bc,0x0042376cc90c705a,0x001d96fdb4a1071e)},
-  {FIELD_LITERAL(0x00542d44d89ee1a8,0x00306642e0442d98,0x0090853872b87338,0x002362cbf22dc044,0x002c222adff663b8,0x0067c924495fcb79,0x000e621d983c977c,0x00df77a9eccb66fb)},
-  {FIELD_LITERAL(0x002809e4bbf1814a,0x00b9e854f9fafb32,0x00d35e67c10f7a67,0x008f1bcb76e748cf,0x004224d9515687d2,0x005ba0b774e620c4,0x00b5e57db5d54119,0x00e15babe5683282)},
-  {FIELD_LITERAL(0x00832d02369b482c,0x00cba52ff0d93450,0x003fa9c908d554db,0x008d1e357b54122f,0x00abd91c2dc950c6,0x007eff1df4c0ec69,0x003f6aeb13fb2d31,0x00002d6179fc5b2c)},
-  {FIELD_LITERAL(0x0046c9eda81c9c89,0x00b60cb71c8f62fc,0x0022f5a683baa558,0x00f87319fccdf997,0x009ca09b51ce6a22,0x005b12baf4af7d77,0x008a46524a1e33e2,0x00035a77e988be0d)},
-  {FIELD_LITERAL(0x00a7efe46a7dbe2f,0x002f66fd55014fe7,0x006a428afa1ff026,0x0056caaa9604ab72,0x0033f3bcd7fac8ae,0x00ccb1aa01c86764,0x00158d1edf13bf40,0x009848ee76fcf3b4)},
-  {FIELD_LITERAL(0x00a9e7730a819691,0x00d9cc73c4992b70,0x00e299bde067de5a,0x008c314eb705192a,0x00e7226f17e8a3cc,0x0029dfd956e65a47,0x0053a8e839073b12,0x006f942b2ab1597e)},
-  {FIELD_LITERAL(0x001c3d780ecd5e39,0x0094f247fbdcc5fe,0x00d5c786fd527764,0x00b6f4da74f0db2a,0x0080f1f8badcd5fc,0x00f36a373ad2e23b,0x00f804f9f4343bf2,0x00d1af40ec623982)},
-  {FIELD_LITERAL(0x0082aeace5f1b144,0x00f68b3108cf4dd3,0x00634af01dde3020,0x000beab5df5c2355,0x00e8b790d1b49b0b,0x00e48d15854e36f4,0x0040ab2d95f3db9f,0x002711c4ed9e899a)},
-  {FIELD_LITERAL(0x0039343746531ebe,0x00c8509d835d429d,0x00e79eceff6b0018,0x004abfd31e8efce5,0x007bbfaaa1e20210,0x00e3be89c193e179,0x001c420f4c31d585,0x00f414a315bef5ae)},
-  {FIELD_LITERAL(0x007c296a24990df8,0x00d5d07525a75588,0x00dd8e113e94b7e7,0x007bbc58febe0cc8,0x0029f51af9bfcad3,0x007e9311ec7ab6f3,0x009a884de1676343,0x0050d5f2dce84be9)},
-  {FIELD_LITERAL(0x005fa020cca2450a,0x00491c29db6416d8,0x0037cefe3f9f9a85,0x003d405230647066,0x0049e835f0fdbe89,0x00feb78ac1a0815c,0x00828e4b32dc9724,0x00db84f2dc8d6fd4)},
-  {FIELD_LITERAL(0x0098cddc8b39549a,0x006da37e3b05d22c,0x00ce633cfd4eb3cb,0x00fda288ef526acd,0x0025338878c5d30a,0x00f34438c4e5a1b4,0x00584efea7c310f1,0x0041a551f1b660ad)},
-  {FIELD_LITERAL(0x00d7f7a8fbd6437a,0x0062872413bf3753,0x00ad4bbcb43c584b,0x007fe49be601d7e3,0x0077c659789babf4,0x00eb45fcb06a741b,0x005ce244913f9708,0x0088426401736326)},
-  {FIELD_LITERAL(0x007bf562ca768d7c,0x006c1f3a174e387c,0x00f024b447fee939,0x007e7af75f01143f,0x003adb70b4eed89d,0x00e43544021ad79a,0x0091f7f7042011f6,0x0093c1a1ee3a0ddc)},
-  {FIELD_LITERAL(0x00a0b68ec1eb72d2,0x002c03235c0d45a0,0x00553627323fe8c5,0x006186e94b17af94,0x00a9906196e29f14,0x0025b3aee6567733,0x007e0dd840080517,0x0018eb5801a4ba93)},
-  {FIELD_LITERAL(0x00d7fe7017bf6a40,0x006e3f0624be0c42,0x00ffbba205358245,0x00f9fc2cf8194239,0x008d93b37bf15b4e,0x006ddf2e38be8e95,0x002b6e79bf5fcff9,0x00ab355da425e2de)},
-  {FIELD_LITERAL(0x00938f97e20be973,0x0099141a36aaf306,0x0057b0ca29e545a1,0x0085db571f9fbc13,0x008b333c554b4693,0x0043ab6ef3e241cb,0x0054fb20aa1e5c70,0x00be0ff852760adf)},
-  {FIELD_LITERAL(0x003973d8938971d6,0x002aca26fa80c1f5,0x00108af1faa6b513,0x00daae275d7924e6,0x0053634ced721308,0x00d2355fe0bbd443,0x00357612b2d22095,0x00f9bb9dd4136cf3)},
-  {FIELD_LITERAL(0x002bff12cf5e03a5,0x001bdb1fa8a19cf8,0x00c91c6793f84d39,0x00f869f1b2eba9af,0x0059bc547dc3236b,0x00d91611d6d38689,0x00e062daaa2c0214,0x00ed3c047cc2bc82)},
-  {FIELD_LITERAL(0x000050d70c32b31a,0x001939d576d437b3,0x00d709e598bf9fe6,0x00a885b34bd2ee9e,0x00dd4b5c08ab1a50,0x0091bebd50b55639,0x00cf79ff64acdbc6,0x006067a39d826336)},
-  {FIELD_LITERAL(0x0062dd0fb31be374,0x00fcc96b84c8e727,0x003f64f1375e6ae3,0x0057d9b6dd1af004,0x00d6a167b1103c7b,0x00dd28f3180fb537,0x004ff27ad7167128,0x008934c33461f2ac)},
-  {FIELD_LITERAL(0x0065b472b7900043,0x00ba7efd2ff1064b,0x000b67d6c4c3020f,0x0012d28469f4e46d,0x0031c32939703ec7,0x00b49f0bce133066,0x00f7e10416181d47,0x005c90f51867eecc)},
-  {FIELD_LITERAL(0x0051207abd179101,0x00fc2a5c20d9c5da,0x00fb9d5f2701b6df,0x002dd040fdea82b8,0x00f163b0738442ff,0x00d9736bd68855b8,0x00e0d8e93005e61c,0x00df5a40b3988570)},
-  {FIELD_LITERAL(0x0006918f5dfce6dc,0x00d4bf1c793c57fb,0x0069a3f649435364,0x00e89a50e5b0cd6e,0x00b9f6a237e973af,0x006d4ed8b104e41d,0x00498946a3924cd2,0x00c136ec5ac9d4f7)},
-  {FIELD_LITERAL(0x0011a9c290ac5336,0x002b9a2d4a6a6533,0x009a8a68c445d937,0x00361b27b07e5e5c,0x003c043b1755b974,0x00b7eb66cf1155ee,0x0077af5909eefff2,0x0098f609877cc806)},
-  {FIELD_LITERAL(0x00ab13af436bf8f4,0x000bcf0a0dac8574,0x00d50c864f705045,0x00c40e611debc842,0x0085010489bd5caa,0x007c5050acec026f,0x00f67d943c8da6d1,0x00de1da0278074c6)},
-  {FIELD_LITERAL(0x00b373076597455f,0x00e83f1af53ac0f5,0x0041f63c01dc6840,0x0097dea19b0c6f4b,0x007f9d63b4c1572c,0x00e692d492d0f5f0,0x00cbcb392e83b4ad,0x0069c0f39ed9b1a8)},
-  {FIELD_LITERAL(0x00861030012707c9,0x009fbbdc7fd4aafb,0x008f591d6b554822,0x00df08a41ea18ade,0x009d7d83e642abea,0x0098c71bda3b78ff,0x0022c89e7021f005,0x0044d29a3fe1e3c4)},
-  {FIELD_LITERAL(0x00e748cd7b5c52f2,0x00ea9df883f89cc3,0x0018970df156b6c7,0x00c5a46c2a33a847,0x00cbde395e32aa09,0x0072474ebb423140,0x00fb00053086a23d,0x001dafcfe22d4e1f)},
-  {FIELD_LITERAL(0x00c903ee6d825540,0x00add6c4cf98473e,0x007636efed4227f1,0x00905124ae55e772,0x00e6b38fab12ed53,0x0045e132b863fe55,0x003974662edb366a,0x00b1787052be8208)},
-  {FIELD_LITERAL(0x00a614b00d775c7c,0x00d7c78941cc7754,0x00422dd68b5dabc4,0x00a6110f0167d28b,0x00685a309c252886,0x00b439ffd5143660,0x003656e29ee7396f,0x00c7c9b9ed5ad854)},
-  {FIELD_LITERAL(0x0040f7e7c5b37bf2,0x0064e4dc81181bba,0x00a8767ae2a366b6,0x001496b4f90546f2,0x002a28493f860441,0x0021f59513049a3a,0x00852d369a8b7ee3,0x00dd2e7d8b7d30a9)},
-  {FIELD_LITERAL(0x00006e34a35d9fbc,0x00eee4e48b2f019a,0x006b344743003a5f,0x00541d514f04a7e3,0x00e81f9ee7647455,0x005e2b916c438f81,0x00116f8137b7eff0,0x009bd3decc7039d1)},
-  {FIELD_LITERAL(0x0005d226f434110d,0x00af8288b8ef21d5,0x004a7a52ef181c8c,0x00be0b781b4b06de,0x00e6e3627ded07e1,0x00e43aa342272b8b,0x00e86ab424577d84,0x00fb292c566e35bb)},
-  {FIELD_LITERAL(0x00334f5303ea1222,0x00dfb3dbeb0a5d3e,0x002940d9592335c1,0x00706a7a63e8938a,0x005a533558bc4caf,0x00558e33192022a9,0x00970d9faf74c133,0x002979fcb63493ca)},
-  {FIELD_LITERAL(0x00e38abece3c82ab,0x005a51f18a2c7a86,0x009dafa2e86d592e,0x00495a62eb688678,0x00b79df74c0eb212,0x0023e8cc78b75982,0x005998cb91075e13,0x00735aa9ba61bc76)},
-  {FIELD_LITERAL(0x00d9f7a82ddbe628,0x00a1fc782889ae0f,0x0071ffda12d14b66,0x0037cf4eca7fb3d5,0x00c80bc242c58808,0x0075bf8c2d08c863,0x008d41f31afc52a7,0x00197962ecf38741)},
-  {FIELD_LITERAL(0x006e9f475cccf2ee,0x00454b9cd506430c,0x00224a4fb79ee479,0x0062e3347ef0b5e2,0x0034fd2a3512232a,0x00b8b3cb0f457046,0x00eb20165daa38ec,0x00128eebc2d9c0f7)},
-  {FIELD_LITERAL(0x00bfc5fa1e4ea21f,0x00c21d7b6bb892e6,0x00cf043f3acf0291,0x00c13f2f849b3c90,0x00d1a97ebef10891,0x0061e130a445e7fe,0x0019513fdedbf22b,0x001d60c813bff841)},
-  {FIELD_LITERAL(0x0019561c7fcf0213,0x00e3dca6843ebd77,0x0068ea95b9ca920e,0x009bdfb70f253595,0x00c68f59186aa02a,0x005aee1cca1c3039,0x00ab79a8a937a1ce,0x00b9a0e549959e6f)},
-  {FIELD_LITERAL(0x00c79e0b6d97dfbd,0x00917c71fd2bc6e8,0x00db7529ccfb63d8,0x00be5be957f17866,0x00a9e11fdc2cdac1,0x007b91a8e1f44443,0x00a3065e4057d80f,0x004825f5b8d5f6d4)},
-  {FIELD_LITERAL(0x003e4964fa8a8fc8,0x00f6a1cdbcf41689,0x00943cb18fe7fda7,0x00606dafbf34440a,0x005d37a86399c789,0x00e79a2a69417403,0x00fe34f7e68b8866,0x0011f448ed2df10e)},
-  {FIELD_LITERAL(0x00f1f57efcc1fcc4,0x00513679117de154,0x002e5b5b7c86d8c3,0x009f6486561f9cfb,0x00169e74b0170cf7,0x00900205af4af696,0x006acfddb77853f3,0x00df184c90f31068)},
-  {FIELD_LITERAL(0x00b37396c3320791,0x00fc7b67175c5783,0x00c36d2cd73ecc38,0x0080ebcc0b328fc5,0x0043a5b22b35d35d,0x00466c9f1713c9da,0x0026ad346dcaa8da,0x007c684e701183a6)},
-  {FIELD_LITERAL(0x00fd579ffb691713,0x00b76af4f81c412d,0x00f239de96110f82,0x00e965fb437f0306,0x00ca7e9436900921,0x00e487f1325fa24a,0x00633907de476380,0x00721c62ac5b8ea0)},
-  {FIELD_LITERAL(0x00c0d54e542eb4f9,0x004ed657171c8dcf,0x00b743a4f7c2a39b,0x00fd9f93ed6cc567,0x00307fae3113e58b,0x0058aa577c93c319,0x00d254556f35b346,0x00491aada2203f0d)},
-  {FIELD_LITERAL(0x00dff3103786ff34,0x000144553b1f20c3,0x0095613baeb930e4,0x00098058275ea5d4,0x007cd1402b046756,0x0074d74e4d58aee3,0x005f93fc343ff69b,0x00873df17296b3b0)},
-  {FIELD_LITERAL(0x00c4a1fb48635413,0x00b5dd54423ad59f,0x009ff5d53fd24a88,0x003c98d267fc06a7,0x002db7cb20013641,0x00bd1d6716e191f2,0x006dbc8b29094241,0x0044bbf233dafa2c)},
-  {FIELD_LITERAL(0x0055838d41f531e6,0x00bf6a2dd03c81b2,0x005827a061c4839e,0x0000de2cbb36aac3,0x002efa29d9717478,0x00f9e928cc8a77ba,0x00c134b458def9ef,0x00958a182223fc48)},
-  {FIELD_LITERAL(0x000a9ee23c06881f,0x002c727d3d871945,0x00f47d971512d24a,0x00671e816f9ef31a,0x00883af2cfaad673,0x00601f98583d6c9a,0x00b435f5adc79655,0x00ad87b71c04bff2)},
-  {FIELD_LITERAL(0x007860d99db787cf,0x00fda8983018f4a8,0x008c8866bac4743c,0x00ef471f84c82a3f,0x00abea5976d3b8e7,0x00714882896cd015,0x00b49fae584ddac5,0x008e33a1a0b69c81)},
-  {FIELD_LITERAL(0x007b6ee2c9e8a9ec,0x002455dbbd89d622,0x006490cf4eaab038,0x00d925f6c3081561,0x00153b3047de7382,0x003b421f8bdceb6f,0x00761a4a5049da78,0x00980348c5202433)},
-  {FIELD_LITERAL(0x007f8a43da97dd5c,0x00058539c800fc7b,0x0040f3cf5a28414a,0x00d68dd0d95283d6,0x004adce9da90146e,0x00befa41c7d4f908,0x007603bc2e3c3060,0x00bdf360ab3545db)},
-  {FIELD_LITERAL(0x00eebfd4e2312cc3,0x00474b2564e4fc8c,0x003303ef14b1da9b,0x003c93e0e66beb1d,0x0013619b0566925a,0x008817c24d901bf3,0x00b62bd8898d218b,0x0075a7716f1e88a2)},
-  {FIELD_LITERAL(0x0009218da1e6890f,0x0026907f5fd02575,0x004dabed5f19d605,0x003abf181870249d,0x00b52fd048cc92c4,0x00b6dd51e415a5c5,0x00d9eb82bd2b4014,0x002c865a43b46b43)},
-  {FIELD_LITERAL(0x0070047189452f4c,0x00f7ad12e1ce78d5,0x00af1ba51ec44a8b,0x005f39f63e667cd6,0x00058eac4648425e,0x00d7fdab42bea03b,0x0028576a5688de15,0x00af973209e77c10)},
-  {FIELD_LITERAL(0x00c338b915d8fef0,0x00a893292045c39a,0x0028ab4f2eba6887,0x0060743cb519fd61,0x0006213964093ac0,0x007c0b7a43f6266d,0x008e3557c4fa5bda,0x002da976de7b8d9d)},
-  {FIELD_LITERAL(0x0048729f8a8b6dcd,0x00fe23b85cc4d323,0x00e7384d16e4db0e,0x004a423970678942,0x00ec0b763345d4ba,0x00c477b9f99ed721,0x00c29dad3777b230,0x001c517b466f7df6)},
-  {FIELD_LITERAL(0x006366c380f7b574,0x001c7d1f09ff0438,0x003e20a7301f5b22,0x00d3efb1916d28f6,0x0049f4f81060ce83,0x00c69d91ea43ced1,0x002b6f3e5cd269ed,0x005b0fb22ce9ec65)},
-  {FIELD_LITERAL(0x00aa2261022d883f,0x00ebcca4548010ac,0x002528512e28a437,0x0070ca7676b66082,0x0084bda170f7c6d3,0x00581b4747c9b8bb,0x005c96a01061c7e2,0x00fb7c4a362b5273)},
-  {FIELD_LITERAL(0x00c30020eb512d02,0x0060f288283a4d26,0x00b7ed13becde260,0x0075ebb74220f6e9,0x00701079fcfe8a1f,0x001c28fcdff58938,0x002e4544b8f4df6b,0x0060c5bc4f1a7d73)},
-  {FIELD_LITERAL(0x00ae307cf069f701,0x005859f222dd618b,0x00212d6c46ec0b0d,0x00a0fe4642afb62d,0x00420d8e4a0a8903,0x00a80ff639bdf7b0,0x0019bee1490b5d8e,0x007439e4b9c27a86)},
-  {FIELD_LITERAL(0x00a94700032a093f,0x0076e96c225216e7,0x00a63a4316e45f91,0x007d8bbb4645d3b2,0x00340a6ff22793eb,0x006f935d4572aeb7,0x00b1fb69f00afa28,0x009e8f3423161ed3)},
-  {FIELD_LITERAL(0x009ef49c6b5ced17,0x00a555e6269e9f0a,0x007e6f1d79ec73b5,0x009ac78695a32ac4,0x0001d77fbbcd5682,0x008cea1fee0aaeed,0x00f42bea82a53462,0x002e46ab96cafcc9)},
-  {FIELD_LITERAL(0x0051cfcc5885377a,0x00dce566cb1803ca,0x00430c7643f2c7d4,0x00dce1a1337bdcc0,0x0010d5bd7283c128,0x003b1b547f9b46fe,0x000f245e37e770ab,0x007b72511f022b37)},
-  {FIELD_LITERAL(0x0060db815bc4786c,0x006fab25beedc434,0x00c610d06084797c,0x000c48f08537bec0,0x0031aba51c5b93da,0x007968fa6e01f347,0x0030070da52840c6,0x00c043c225a4837f)},
-  {FIELD_LITERAL(0x001bcfd00649ee93,0x006dceb47e2a0fd5,0x00f2cebda0cf8fd0,0x00b6b9d9d1fbdec3,0x00815262e6490611,0x00ef7f5ce3176760,0x00e49cd0c998d58b,0x005fc6cc269ba57c)},
-  {FIELD_LITERAL(0x008940211aa0d633,0x00addae28136571d,0x00d68fdbba20d673,0x003bc6129bc9e21a,0x000346cf184ebe9a,0x0068774d741ebc7f,0x0019d5e9e6966557,0x0003cbd7f981b651)},
-  {FIELD_LITERAL(0x004a2902926f8d3f,0x00ad79b42637ab75,0x0088f60b90f2d4e8,0x0030f54ef0e398c4,0x00021dc9bf99681e,0x007ebf66fde74ee3,0x004ade654386e9a4,0x00e7485066be4c27)},
-  {FIELD_LITERAL(0x00445f1263983be0,0x004cf371dda45e6a,0x00744a89d5a310e7,0x001f20ce4f904833,0x00e746edebe66e29,0x000912ab1f6c153d,0x00f61d77d9b2444c,0x0001499cd6647610)}
-};
-const gf API_NS(precomputed_wnaf_as_fe)[96]
-VECTOR_ALIGNED __attribute__((visibility("hidden"))) = {
-  {FIELD_LITERAL(0x00303cda6feea532,0x00860f1d5a3850e4,0x00226b9fa4728ccd,0x00e822938a0a0c0c,0x00263a61c9ea9216,0x001204029321b828,0x006a468360983c65,0x0002846f0a782143)},
-  {FIELD_LITERAL(0x00303cda6feea532,0x00860f1d5a3850e4,0x00226b9fa4728ccd,0x006822938a0a0c0c,0x00263a61c9ea9215,0x001204029321b828,0x006a468360983c65,0x0082846f0a782143)},
-  {FIELD_LITERAL(0x00ef8e22b275198d,0x00b0eb141a0b0e8b,0x001f6789da3cb38c,0x006d2ff8ed39073e,0x00610bdb69a167f3,0x00571f306c9689b4,0x00f557e6f84b2df8,0x002affd38b2c86db)},
-  {FIELD_LITERAL(0x00cea0fc8d2e88b5,0x00821612d69f1862,0x0074c283b3e67522,0x005a195ba05a876d,0x000cddfe557feea4,0x008046c795bcc5e5,0x00540969f4d6e119,0x00d27f96d6b143d5)},
-  {FIELD_LITERAL(0x000c3b1019d474e8,0x00e19533e4952284,0x00cc9810ba7c920a,0x00f103d2785945ac,0x00bfa5696cc69b34,0x00a8d3d51e9ca839,0x005623cb459586b9,0x00eae7ce1cd52e9e)},
-  {FIELD_LITERAL(0x0005a178751dd7d8,0x002cc3844c69c42f,0x00acbfe5efe10539,0x009c20f43431a65a,0x008435d96374a7b3,0x009ee57566877bd3,0x0044691725ed4757,0x001e87bb2fe2c6b2)},
-  {FIELD_LITERAL(0x000cedc4debf7a04,0x002ffa45000470ac,0x002e9f9678201915,0x0017da1208c4fe72,0x007d558cc7d656cb,0x0037a827287cf289,0x00142472d3441819,0x009c21f166cf8dd1)},
-  {FIELD_LITERAL(0x003ef83af164b2f2,0x000949a5a0525d0d,0x00f4498186cac051,0x00e77ac09ef126d2,0x0073ae0b2c9296e9,0x001c163f6922e3ed,0x0062946159321bea,0x00cfb79b22990b39)},
-  {FIELD_LITERAL(0x00b001431ca9e654,0x002d7e5eabcc9a3a,0x0052e8114c2f6747,0x0079ac4f94487f92,0x00bffd919b5d749c,0x00261f92ad15e620,0x00718397b7a97895,0x00c1443e6ebbc0c4)},
-  {FIELD_LITERAL(0x00eacd90c1e0a049,0x008977935b149fbe,0x0004cb9ba11c93dc,0x009fbd5b3470844d,0x004bc18c9bfc22cf,0x0057679a991839f3,0x00ef15b76fb4092e,0x0074a5173a225041)},
-  {FIELD_LITERAL(0x003f5f9d7ec4777b,0x00ab2e733c919c94,0x001bb6c035245ae5,0x00a325a49a883630,0x0033e9a9ea3cea2f,0x00e442a1eaa0e844,0x00b2116d5b0e71b8,0x00c16abed6d64047)},
-  {FIELD_LITERAL(0x00c560b5ed051165,0x001945adc5d65094,0x00e221865710f910,0x00cc12bc9e9b8ceb,0x004faa9518914e35,0x0017476d89d42f6d,0x00b8f637c8fa1c8b,0x0088c7d2790864b8)},
-  {FIELD_LITERAL(0x00ef7eafc1c69be6,0x0085d3855778fbea,0x002c8d5b450cb6f5,0x004e77de5e1e7fec,0x0047c057893abded,0x001b430b85d51e16,0x00965c7b45640c3c,0x00487b2bb1162b97)},
-  {FIELD_LITERAL(0x0099c73a311beec2,0x00a3eff38d8912ad,0x002efa9d1d7e8972,0x00f717ae1e14d126,0x002833f795850c8b,0x0066c12ad71486bd,0x00ae9889da4820eb,0x00d6044309555c08)},
-  {FIELD_LITERAL(0x004b1c5283d15e41,0x00669d8ea308ff75,0x0004390233f762a1,0x00e1d67b83cb6cec,0x003eebaa964c78b1,0x006b0aff965eb664,0x00b313d4470bdc37,0x008814ffcb3cb9d8)},
-  {FIELD_LITERAL(0x009724b8ce68db70,0x007678b5ed006f3d,0x00bdf4b89c0abd73,0x00299748e04c7c6d,0x00ddd86492c3c977,0x00c5a7febfa30a99,0x00ed84715b4b02bb,0x00319568adf70486)},
-  {FIELD_LITERAL(0x0070ff2d864de5bb,0x005a37eeb637ee95,0x0033741c258de160,0x00e6ca5cb1988f46,0x001ceabd92a24661,0x0030957bd500fe40,0x001c3362afe912c5,0x005187889f678bd2)},
-  {FIELD_LITERAL(0x0086835fc62bbdc7,0x009c3516ca4910a1,0x00956c71f8d00783,0x0095c78fcf63235f,0x00fc7ff6ba05c222,0x00cdd8b3f8d74a52,0x00ac5ae16de8256e,0x00e9d4be8ed48624)},
-  {FIELD_LITERAL(0x00c0ce11405df2d8,0x004e3f37b293d7b6,0x002410172e1ac6db,0x00b8dbff4bf8143d,0x003a7b409d56eb66,0x003e0f6a0dfef9af,0x0081c4e4d3645be1,0x00ce76076b127623)},
-  {FIELD_LITERAL(0x00f6ee0f98974239,0x0042d89af07d3a4f,0x00846b7fe84346b5,0x006a21fc6a8d39a1,0x00ac8bc2541ff2d9,0x006d4e2a77732732,0x009a39b694cc3f2f,0x0085c0aa2a404c8f)},
-  {FIELD_LITERAL(0x00b261101a218548,0x00c1cae96424277b,0x00869da0a77dd268,0x00bc0b09f8ec83ea,0x00d61027f8e82ba9,0x00aa4c85999dce67,0x00eac3132b9f3fe1,0x00fb9b0cf1c695d2)},
-  {FIELD_LITERAL(0x0043079295512f0d,0x0046a009861758e0,0x003ee2842a807378,0x0034cc9d1298e4fa,0x009744eb4d31b3ee,0x00afacec96650cd0,0x00ac891b313761ae,0x00e864d6d26e708a)},
-  {FIELD_LITERAL(0x00a84d7c8a23b491,0x0088e19aa868b27f,0x0005986d43e78ce9,0x00f28012f0606d28,0x0017ded7e10249b3,0x005ed4084b23af9b,0x00b9b0a940564472,0x00ad9056cceeb1f4)},
-  {FIELD_LITERAL(0x00db91b357fe755e,0x00a1aa544b15359c,0x00af4931a0195574,0x007686124fe11aef,0x00d1ead3c7b9ef7e,0x00aaf5fc580f8c15,0x00e727be147ee1ec,0x003c61c1e1577b86)},
-  {FIELD_LITERAL(0x009d3fca983220cf,0x00cd11acbc853dc4,0x0017590409d27f1d,0x00d2176698082802,0x00fa01251b2838c8,0x00dd297a0d9b51c6,0x00d76c92c045820a,0x00534bc7c46c9033)},
-  {FIELD_LITERAL(0x0080ed9bc9b07338,0x00fceac7745d2652,0x008a9d55f5f2cc69,0x0096ce72df301ac5,0x00f53232e7974d87,0x0071728c7ae73947,0x0090507602570778,0x00cb81cfd883b1b2)},
-  {FIELD_LITERAL(0x005011aadea373da,0x003a8578ec896034,0x00f20a6535fa6d71,0x005152d31e5a87cf,0x002bac1c8e68ca31,0x00b0e323db4c1381,0x00f1d596b7d5ae25,0x00eae458097cb4e0)},
-  {FIELD_LITERAL(0x00920ac80f9b0d21,0x00f80f7f73401246,0x0086d37849b557d6,0x0002bd4b317b752e,0x00b26463993a42bb,0x002070422a73b129,0x00341acaa0380cb3,0x00541914dd66a1b2)},
-  {FIELD_LITERAL(0x00c1513cd66abe8c,0x000139e01118944d,0x0064abbcb8080bbb,0x00b3b08202473142,0x00c629ef25da2403,0x00f0aec3310d9b7f,0x0050b2227472d8cd,0x00f6c8a922d41fb4)},
-  {FIELD_LITERAL(0x001075ccf26b7b1f,0x00bb6bb213170433,0x00e9491ad262da79,0x009ef4f48d2d384c,0x008992770766f09d,0x001584396b6b1101,0x00af3f8676c9feef,0x0024603c40269118)},
-  {FIELD_LITERAL(0x009dd7b31319527c,0x001e7ac948d873a9,0x00fa54b46ef9673a,0x0066efb8d5b02fe6,0x00754b1d3928aeae,0x0004262ac72a6f6b,0x0079b7d49a6eb026,0x003126a753540102)},
-  {FIELD_LITERAL(0x009666e24f693947,0x00f714311269d45f,0x0010ffac1d0c851c,0x0066e80c37363497,0x00f1f4ad010c60b0,0x0015c87408470ff7,0x00651d5e9c7766a4,0x008138819d7116de)},
-  {FIELD_LITERAL(0x003934b11c57253b,0x00ef308edf21f46e,0x00e54e99c7a16198,0x0080d57135764e63,0x00751c27b946bc24,0x00dd389ce4e9e129,0x00a1a2bfd1cd84dc,0x002fae73e5149b32)},
-  {FIELD_LITERAL(0x00911657dffb4cdd,0x00c100b7cc553d06,0x00449d075ec467cc,0x007062100bc64e70,0x0043cf86f7bd21e7,0x00f401dc4b797dea,0x005224afb2f62e65,0x00d1ede3fb5a42be)},
-  {FIELD_LITERAL(0x00f2ba36a41aa144,0x00a0c22d946ee18f,0x008aae8ef9a14f99,0x00eef4d79b19bb36,0x008e75ce3d27b1fc,0x00a65daa03b29a27,0x00d9cc83684eb145,0x009e1ed80cc2ed74)},
-  {FIELD_LITERAL(0x00bed953d1997988,0x00b93ed175a24128,0x00871c5963fb6365,0x00ca2df20014a787,0x00f5d9c1d0b34322,0x00f6f5942818db0a,0x004cc091f49c9906,0x00e8a188a60bff9f)},
-  {FIELD_LITERAL(0x0032c7762032fae8,0x00e4087232e0bc21,0x00f767344b6e8d85,0x00bbf369b76c2aa2,0x008a1f46c6e1570c,0x001368cd9780369f,0x007359a39d079430,0x0003646512921434)},
-  {FIELD_LITERAL(0x007c4b47ca7c73e7,0x005396221039734b,0x008b64ddf0e45d7e,0x00bfad5af285e6c2,0x008ec711c5b1a1a8,0x00cf663301237f98,0x00917ee3f1655126,0x004152f337efedd8)},
-  {FIELD_LITERAL(0x0007c7edc9305daa,0x000a6664f273701c,0x00f6e78795e200b1,0x005d05b9ecd2473e,0x0014f5f17c865786,0x00c7fd2d166fa995,0x004939a2d8eb80e0,0x002244ba0942c199)},
-  {FIELD_LITERAL(0x00321e767f0262cf,0x002e57d776caf68e,0x00bf2c94814f0437,0x00c339196acd622f,0x001db4cce71e2770,0x001ded5ddba6eee2,0x0078608ab1554c8d,0x00067fe0ab76365b)},
-  {FIELD_LITERAL(0x00f09758e11e3985,0x00169efdbd64fad3,0x00e8889b7d6dacd6,0x0035cdd58ea88209,0x00bcda47586d7f49,0x003cdddcb2879088,0x0016da70187e954b,0x009556ea2e92aacd)},
-  {FIELD_LITERAL(0x008cab16bd1ff897,0x00b389972cdf753f,0x00ea8ed1e46dfdc0,0x004fe7ef94c589f4,0x002b8ae9b805ecf3,0x0025c08d892874a5,0x0023938e98d44c4c,0x00f759134cabf69c)},
-  {FIELD_LITERAL(0x006c2a84678e4b3b,0x007a194aacd1868f,0x00ed0225af424761,0x00da0a6f293c64b8,0x001062ac5c6a7a18,0x0030f5775a8aeef4,0x0002acaad76b7af0,0x00410b8fd63a579f)},
-  {FIELD_LITERAL(0x001ec59db3d9590e,0x001e9e3f1c3f182d,0x0045a9c3ec2cab14,0x0008198572aeb673,0x00773b74068bd167,0x0012535eaa395434,0x0044dba9e3bbb74a,0x002fba4d3c74bd0e)},
-  {FIELD_LITERAL(0x0042bf08fe66922c,0x003318b8fbb49e8c,0x00d75946004aa14c,0x00f601586b42bf1c,0x00c74cf1d912fe66,0x00abcb36974b30ad,0x007eb78720c9d2b8,0x009f54ab7bd4df85)},
-  {FIELD_LITERAL(0x00db9fc948f73826,0x00fa8b3746ed8ee9,0x00132cb65aafbeb2,0x00c36ff3fe7925b8,0x00837daed353d2fe,0x00ec661be0667cf4,0x005beb8ed2e90204,0x00d77dd69e564967)},
-  {FIELD_LITERAL(0x0042e6268b861751,0x0008dd0469500c16,0x00b51b57c338a3fd,0x00cc4497d85cff6b,0x002f13d6b57c34a4,0x0083652eaf301105,0x00cc344294cc93a8,0x0060f4d02810e270)},
-  {FIELD_LITERAL(0x00a8954363cd518b,0x00ad171124bccb7b,0x0065f46a4adaae00,0x001b1a5b2a96e500,0x0043fe24f8233285,0x0066996d8ae1f2c3,0x00c530f3264169f9,0x00c0f92d07cf6a57)},
-  {FIELD_LITERAL(0x0036a55c6815d943,0x008c8d1def993db3,0x002e0e1e8ff7318f,0x00d883a4b92db00a,0x002f5e781ae33906,0x001a72adb235c06d,0x00f2e59e736e9caa,0x001a4b58e3031914)},
-  {FIELD_LITERAL(0x00d73bfae5e00844,0x00bf459766fb5f52,0x0061b4f5a5313cde,0x004392d4c3b95514,0x000d3551b1077523,0x0000998840ee5d71,0x006de6e340448b7b,0x00251aa504875d6e)},
-  {FIELD_LITERAL(0x003bf343427ac342,0x00adc0a78642b8c5,0x0003b893175a8314,0x0061a34ade5703bc,0x00ea3ea8bb71d632,0x00be0df9a1f198c2,0x0046dd8e7c1635fb,0x00f1523fdd25d5e5)},
-  {FIELD_LITERAL(0x00633f63fc9dd406,0x00e713ff80e04a43,0x0060c6e970f2d621,0x00a57cd7f0df1891,0x00f2406a550650bb,0x00b064290efdc684,0x001eab0144d17916,0x00cd15f863c293ab)},
-  {FIELD_LITERAL(0x0029cec55273f70d,0x007044ee275c6340,0x0040f637a93015e2,0x00338bb78db5aae9,0x001491b2a6132147,0x00a125d6cfe6bde3,0x005f7ac561ba8669,0x001d5eaea3fbaacf)},
-  {FIELD_LITERAL(0x00054e9635e3be31,0x000e43f31e2872be,0x00d05b1c9e339841,0x006fac50bd81fd98,0x00cdc7852eaebb09,0x004ff519b061991b,0x009099e8107d4c85,0x00273e24c36a4a61)},
-  {FIELD_LITERAL(0x00070b4441ef2c46,0x00efa5b02801a109,0x00bf0b8c3ee64adf,0x008a67e0b3452e98,0x001916b1f2fa7a74,0x00d781a78ff6cdc3,0x008682ce57e5c919,0x00cc1109dd210da3)},
-  {FIELD_LITERAL(0x00cae8aaff388663,0x005e983a35dda1c7,0x007ab1030d8e37f4,0x00e48940f5d032fe,0x006a36f9ef30b331,0x009be6f03958c757,0x0086231ceba91400,0x008bd0f7b823e7aa)},
-  {FIELD_LITERAL(0x00cf881ebef5a45a,0x004ebea78e7c6f2c,0x0090da9209cf26a0,0x00de2b2e4c775b84,0x0071d6031c3c15ae,0x00d9e927ef177d70,0x00894ee8c23896fd,0x00e3b3b401e41aad)},
-  {FIELD_LITERAL(0x00204fef26864170,0x00819269c5dee0f8,0x00bfb4713ec97966,0x0026339a6f34df78,0x001f26e64c761dc2,0x00effe3af313cb60,0x00e17b70138f601b,0x00f16e1ccd9ede5e)},
-  {FIELD_LITERAL(0x005d9a8353fdb2db,0x0055cc2048c698f0,0x00f6c4ac89657218,0x00525034d73faeb2,0x00435776fbda3c7d,0x0070ea5312323cbc,0x007a105d44d069fb,0x006dbc8d6dc786aa)},
-  {FIELD_LITERAL(0x0017cff19cd394ec,0x00fef7b810922587,0x00e6483970dff548,0x00ddf36ad6874264,0x00e61778523fcce2,0x0093a66c0c93b24a,0x00fd367114db7f86,0x007652d7ddce26dd)},
-  {FIELD_LITERAL(0x00d92ced7ba12843,0x00aea9c7771e86e7,0x0046639693354f7b,0x00a628dbb6a80c47,0x003a0b0507372953,0x00421113ab45c0d9,0x00e545f08362ab7a,0x0028ce087b4d6d96)},
-  {FIELD_LITERAL(0x00a67ee7cf9f99eb,0x005713b275f2ff68,0x00f1d536a841513d,0x00823b59b024712e,0x009c46b9d0d38cec,0x00cdb1595aa2d7d4,0x008375b3423d9af8,0x000ab0b516d978f7)},
-  {FIELD_LITERAL(0x00428dcb3c510b0f,0x00585607ea24bb4e,0x003736bf1603687a,0x00c47e568c4fe3c7,0x003cd00282848605,0x0043a487c3b91939,0x004ffc04e1095a06,0x00a4c989a3d4b918)},
-  {FIELD_LITERAL(0x00a8778d0e429f7a,0x004c02b059105a68,0x0016653b609da3ff,0x00d5107bd1a12d27,0x00b4708f9a771cab,0x00bb63b662033f69,0x0072f322240e7215,0x0019445b59c69222)},
-  {FIELD_LITERAL(0x00cf4f6069a658e6,0x0053ca52859436a6,0x0064b994d7e3e117,0x00cb469b9a07f534,0x00cfb68f399e9d47,0x00f0dcb8dac1c6e7,0x00f2ab67f538b3a5,0x0055544f178ab975)},
-  {FIELD_LITERAL(0x0099b7a2685d538c,0x00e2f1897b7c0018,0x003adac8ce48dae3,0x00089276d5c50c0c,0x00172fca07ad6717,0x00cb1a72f54069e5,0x004ee42f133545b3,0x00785f8651362f16)},
-  {FIELD_LITERAL(0x0049cbac38509e11,0x0015234505d42cdf,0x00794fb0b5840f1c,0x00496437344045a5,0x0031b6d944e4f9b0,0x00b207318ac1f5d8,0x0000c840da7f5c5d,0x00526f373a5c8814)},
-  {FIELD_LITERAL(0x002c7b7742d1dfd9,0x002cabeb18623c01,0x00055f5e3e044446,0x006c20f3b4ef54ba,0x00c600141ec6b35f,0x00354f437f1a32a3,0x00bac4624a3520f9,0x00c483f734a90691)},
-  {FIELD_LITERAL(0x0053a737d422918d,0x00f7fca1d8758625,0x00c360336dadb04c,0x00f38e3d9158a1b8,0x0069ce3b418e84c6,0x005d1697eca16ead,0x00f8bd6a35ece13d,0x007885dfc2b5afea)},
-  {FIELD_LITERAL(0x00c3617ae260776c,0x00b20dc3e96922d7,0x00a1a7802246706a,0x00ca6505a5240244,0x002246b62d919782,0x001439102d7aa9b3,0x00e8af1139e6422c,0x00c888d1b52f2b05)},
-  {FIELD_LITERAL(0x005b67690ffd41d9,0x005294f28df516f9,0x00a879272412fcb9,0x00098b629a6d1c8d,0x00fabd3c8050865a,0x00cd7e5b0a3879c5,0x00153238210f3423,0x00357cac101e9f42)},
-  {FIELD_LITERAL(0x008917b454444fb7,0x00f59247c97e441b,0x00a6200a6815152d,0x0009a4228601d254,0x001c0360559bd374,0x007563362039cb36,0x00bd75b48d74e32b,0x0017f515ac3499e8)},
-  {FIELD_LITERAL(0x001532a7ffe41c5a,0x00eb1edce358d6bf,0x00ddbacc7b678a7b,0x008a7b70f3c841a3,0x00f1923bf27d3f4c,0x000b2713ed8f7873,0x00aaf67e29047902,0x0044994a70b3976d)},
-  {FIELD_LITERAL(0x00d54e802082d42c,0x00a55aa0dce7cc6c,0x006477b96073f146,0x0082efe4ceb43594,0x00a922bcba026845,0x0077f19d1ab75182,0x00c2bb2737846e59,0x0004d7eec791dd33)},
-  {FIELD_LITERAL(0x0044588d1a81d680,0x00b0a9097208e4f8,0x00212605350dc57e,0x0028717cd2871123,0x00fb083c100fd979,0x0045a056ce063fdf,0x00a5d604b4dd6a41,0x001dabc08ba4e236)},
-  {FIELD_LITERAL(0x00c4887198d7a7fa,0x00244f98fb45784a,0x0045911e15a15d01,0x001d323d374c0966,0x00967c3915196562,0x0039373abd2f3c67,0x000d2c5614312423,0x0041cf2215442ce3)},
-  {FIELD_LITERAL(0x008ede889ada7f06,0x001611e91de2e135,0x00fdb9a458a471b9,0x00563484e03710d1,0x0031cc81925e3070,0x0062c97b3af80005,0x00fa733eea28edeb,0x00e82457e1ebbc88)},
-  {FIELD_LITERAL(0x006a0df5fe9b6f59,0x00a0d4ff46040d92,0x004a7cedb6f93250,0x00d1df8855b8c357,0x00e73a46086fd058,0x0048fb0add6dfe59,0x001e03a28f1b4e3d,0x00a871c993308d76)},
-  {FIELD_LITERAL(0x0030dbb2d1766ec8,0x00586c0ad138555e,0x00d1a34f9e91c77c,0x0063408ad0e89014,0x00d61231b05f6f5b,0x0009abf569f5fd8a,0x00aec67a110f1c43,0x0031d1a790938dd7)},
-  {FIELD_LITERAL(0x006cded841e2a862,0x00198d60af0ab6fb,0x0018f09db809e750,0x004e6ac676016263,0x00eafcd1620969cb,0x002c9784ca34917d,0x0054f00079796de7,0x00d9fab5c5972204)},
-  {FIELD_LITERAL(0x004bd0fee2438a83,0x00b571e62b0f83bd,0x0059287d7ce74800,0x00fb3631b645c3f0,0x00a018e977f78494,0x0091e27065c27b12,0x007696c1817165e0,0x008c40be7c45ba3a)},
-  {FIELD_LITERAL(0x00a0f326327cb684,0x001c7d0f672680ff,0x008c1c81ffb112d1,0x00f8f801674eddc8,0x00e926d5d48c2a9d,0x005bd6d954c6fe9a,0x004c6b24b4e33703,0x00d05eb5c09105cc)},
-  {FIELD_LITERAL(0x00d61731caacf2cf,0x002df0c7609e01c5,0x00306172208b1e2b,0x00b413fe4fb2b686,0x00826d360902a221,0x003f8d056e67e7f7,0x0065025b0175e989,0x00369add117865eb)},
-  {FIELD_LITERAL(0x00aaf895aec2fa11,0x000f892bc313eb52,0x005b1c794dad050b,0x003f8ec4864cec14,0x00af81058d0b90e5,0x00ebe43e183997bb,0x00a9d610f9f3e615,0x007acd8eec2e88d3)},
-  {FIELD_LITERAL(0x0049b2fab13812a3,0x00846db32cd60431,0x000177fa578c8d6c,0x00047d0e2ad4bc51,0x00b158ba38d1e588,0x006a45daad79e3f3,0x000997b93cab887b,0x00c47ea42fa23dc3)},
-  {FIELD_LITERAL(0x0012b6fef7aeb1ca,0x009412768194b6a7,0x00ff0d351f23ab93,0x007e8a14c1aff71b,0x006c1c0170c512bc,0x0016243ea02ab2e5,0x007bb6865b303f3e,0x0015ce6b29b159f4)},
-  {FIELD_LITERAL(0x009961cd02e68108,0x00e2035d3a1d0836,0x005d51f69b5e1a1d,0x004bccb4ea36edcd,0x0069be6a7aeef268,0x0063f4dd9de8d5a7,0x006283783092ca35,0x0075a31af2c35409)},
-  {FIELD_LITERAL(0x00c412365162e8cf,0x00012283fb34388a,0x003e6543babf39e2,0x00eead6b3a804978,0x0099c0314e8b326f,0x00e98e0a8d477a4f,0x00d2eb96b127a687,0x00ed8d7df87571bb)},
-  {FIELD_LITERAL(0x00777463e308cacf,0x00c8acb93950132d,0x00ebddbf4ca48b2c,0x0026ad7ca0795a0a,0x00f99a3d9a715064,0x000d60bcf9d4dfcc,0x005e65a73a437a06,0x0019d536a8db56c8)},
-  {FIELD_LITERAL(0x00192d7dd558d135,0x0027cd6a8323ffa7,0x00239f1a412dc1e7,0x0046b4b3be74fc5c,0x0020c47a2bef5bce,0x00aa17e48f43862b,0x00f7e26c96342e5f,0x0008011c530f39a9)},
-  {FIELD_LITERAL(0x00aad4ac569bf0f1,0x00a67adc90b27740,0x0048551369a5751a,0x0031252584a3306a,0x0084e15df770e6fc,0x00d7bba1c74b5805,0x00a80ef223af1012,0x0089c85ceb843a34)},
-  {FIELD_LITERAL(0x00c4545be4a54004,0x0099e11f60357e6c,0x001f3936d19515a6,0x007793df84341a6e,0x0051061886717ffa,0x00e9b0a660b28f85,0x0044ea685892de0d,0x000257d2a1fda9d9)},
-  {FIELD_LITERAL(0x007e8b01b24ac8a8,0x006cf3b0b5ca1337,0x00f1607d3e36a570,0x0039b7fab82991a1,0x00231777065840c5,0x00998e5afdd346f9,0x00b7dc3e64acc85f,0x00baacc748013ad6)},
-  {FIELD_LITERAL(0x008ea6a4177580bf,0x005fa1953e3f0378,0x005fe409ac74d614,0x00452327f477e047,0x00a4018507fb6073,0x007b6e71951caac8,0x0012b42ab8a6ce91,0x0080eca677294ab7)},
-  {FIELD_LITERAL(0x00a53edc023ba69b,0x00c6afa83ddde2e8,0x00c3f638b307b14e,0x004a357a64414062,0x00e4d94d8b582dc9,0x001739caf71695b7,0x0012431b2ae28de1,0x003b6bc98682907c)},
-  {FIELD_LITERAL(0x008a9a93be1f99d6,0x0079fa627cc699c8,0x00b0cfb134ba84c8,0x001c4b778249419a,0x00df4ab3d9c44f40,0x009f596e6c1a9e3c,0x001979c0df237316,0x00501e953a919b87)}
-};
diff --git a/crypto/ec/curve448/GENERATED/c/ed448goldilocks/eddsa.c b/crypto/ec/curve448/GENERATED/c/ed448goldilocks/eddsa.c
deleted file mode 100644
index f6c1836658..0000000000
--- a/crypto/ec/curve448/GENERATED/c/ed448goldilocks/eddsa.c
+++ /dev/null
@@ -1,328 +0,0 @@
-/**
- * @file ed448goldilocks/eddsa.c
- * @author Mike Hamburg
- *
- * @copyright
- *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
- *   Released under the MIT License.  See LICENSE.txt for license information.
- *
- * @cond internal
- * @brief EdDSA routines.
- *
- * @warning This file was automatically generated in Python.
- * Please do not edit it.
- */
-#include "word.h"
-#include <decaf/ed448.h>
-#include <decaf/shake.h>
-#include <decaf/sha512.h>
-#include <string.h>
-
-#define API_NAME "decaf_448"
-#define API_NS(_id) decaf_448_##_id
-
-#define hash_ctx_t   decaf_shake256_ctx_t
-#define hash_init    decaf_shake256_init
-#define hash_update  decaf_shake256_update
-#define hash_final   decaf_shake256_final
-#define hash_destroy decaf_shake256_destroy
-#define hash_hash    decaf_shake256_hash
-
-#define NO_CONTEXT DECAF_EDDSA_448_SUPPORTS_CONTEXTLESS_SIGS
-#define EDDSA_USE_SIGMA_ISOGENY 0
-#define COFACTOR 4
-#define EDDSA_PREHASH_BYTES 64
-
-#if NO_CONTEXT
-const uint8_t NO_CONTEXT_POINTS_HERE = 0;
-const uint8_t * const DECAF_ED448_NO_CONTEXT = &NO_CONTEXT_POINTS_HERE;
-#endif
-
-/* EDDSA_BASE_POINT_RATIO = 1 or 2
- * Because EdDSA25519 is not on E_d but on the isogenous E_sigma_d,
- * its base point is twice ours.
- */
-#define EDDSA_BASE_POINT_RATIO (1+EDDSA_USE_SIGMA_ISOGENY) /* TODO: remove */
-
-static void clamp (
-    uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES]
-) {
-    /* Blarg */
-    secret_scalar_ser[0] &= -COFACTOR;
-    uint8_t hibit = (1<<0)>>1;
-    if (hibit == 0) {
-        secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES - 1] = 0;
-        secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES - 2] |= 0x80;
-    } else {
-        secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES - 1] &= hibit-1;
-        secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES - 1] |= hibit;
-    }
-}
-
-static void hash_init_with_dom(
-    hash_ctx_t hash,
-    uint8_t prehashed,
-    uint8_t for_prehash,
-    const uint8_t *context,
-    uint8_t context_len
-) {
-    hash_init(hash);
-
-#if NO_CONTEXT
-    if (context_len == 0 && context == DECAF_ED448_NO_CONTEXT) {
-        (void)prehashed;
-        (void)for_prehash;
-        (void)context;
-        (void)context_len;
-        return;
-    }
-#endif
-    const char *dom_s = "SigEd448";
-    const uint8_t dom[2] = {2+word_is_zero(prehashed)+word_is_zero(for_prehash), context_len};
-    hash_update(hash,(const unsigned char *)dom_s, strlen(dom_s));
-    hash_update(hash,dom,2);
-    hash_update(hash,context,context_len);
-}
-
-void decaf_ed448_prehash_init (
-    hash_ctx_t hash
-) {
-    hash_init(hash);
-}
-
-/* In this file because it uses the hash */
-void decaf_ed448_convert_private_key_to_x448 (
-    uint8_t x[DECAF_X448_PRIVATE_BYTES],
-    const uint8_t ed[DECAF_EDDSA_448_PRIVATE_BYTES]
-) {
-    /* pass the private key through hash_hash function */
-    /* and keep the first DECAF_X448_PRIVATE_BYTES bytes */
-    hash_hash(
-        x,
-        DECAF_X448_PRIVATE_BYTES,
-        ed,
-        DECAF_EDDSA_448_PRIVATE_BYTES
-    );
-}
-    
-void decaf_ed448_derive_public_key (
-    uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES]
-) {
-    /* only this much used for keygen */
-    uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES];
-    
-    hash_hash(
-        secret_scalar_ser,
-        sizeof(secret_scalar_ser),
-        privkey,
-        DECAF_EDDSA_448_PRIVATE_BYTES
-    );
-    clamp(secret_scalar_ser);
-        
-    API_NS(scalar_t) secret_scalar;
-    API_NS(scalar_decode_long)(secret_scalar, secret_scalar_ser, sizeof(secret_scalar_ser));
-    
-    /* Since we are going to mul_by_cofactor during encoding, divide by it here.
-     * However, the EdDSA base point is not the same as the decaf base point if
-     * the sigma isogeny is in use: the EdDSA base point is on Etwist_d/(1-d) and
-     * the decaf base point is on Etwist_d, and when converted it effectively
-     * picks up a factor of 2 from the isogenies.  So we might start at 2 instead of 1. 
-     */
-    for (unsigned int c=1; c<DECAF_448_EDDSA_ENCODE_RATIO; c <<= 1) {
-        API_NS(scalar_halve)(secret_scalar,secret_scalar);
-    }
-    
-    API_NS(point_t) p;
-    API_NS(precomputed_scalarmul)(p,API_NS(precomputed_base),secret_scalar);
-    
-    API_NS(point_mul_by_ratio_and_encode_like_eddsa)(pubkey, p);
-        
-    /* Cleanup */
-    API_NS(scalar_destroy)(secret_scalar);
-    API_NS(point_destroy)(p);
-    decaf_bzero(secret_scalar_ser, sizeof(secret_scalar_ser));
-}
-
-void decaf_ed448_sign (
-    uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
-    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-    const uint8_t *message,
-    size_t message_len,
-    uint8_t prehashed,
-    const uint8_t *context,
-    uint8_t context_len
-) {
-    API_NS(scalar_t) secret_scalar;
-    hash_ctx_t hash;
-    {
-        /* Schedule the secret key */
-        struct {
-            uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES];
-            uint8_t seed[DECAF_EDDSA_448_PRIVATE_BYTES];
-        } __attribute__((packed)) expanded;
-        hash_hash(
-            (uint8_t *)&expanded,
-            sizeof(expanded),
-            privkey,
-            DECAF_EDDSA_448_PRIVATE_BYTES
-        );
-        clamp(expanded.secret_scalar_ser);   
-        API_NS(scalar_decode_long)(secret_scalar, expanded.secret_scalar_ser, sizeof(expanded.secret_scalar_ser));
-    
-        /* Hash to create the nonce */
-        hash_init_with_dom(hash,prehashed,0,context,context_len);
-        hash_update(hash,expanded.seed,sizeof(expanded.seed));
-        hash_update(hash,message,message_len);
-        decaf_bzero(&expanded, sizeof(expanded));
-    }
-    
-    /* Decode the nonce */
-    API_NS(scalar_t) nonce_scalar;
-    {
-        uint8_t nonce[2*DECAF_EDDSA_448_PRIVATE_BYTES];
-        hash_final(hash,nonce,sizeof(nonce));
-        API_NS(scalar_decode_long)(nonce_scalar, nonce, sizeof(nonce));
-        decaf_bzero(nonce, sizeof(nonce));
-    }
-    
-    uint8_t nonce_point[DECAF_EDDSA_448_PUBLIC_BYTES] = {0};
-    {
-        /* Scalarmul to create the nonce-point */
-        API_NS(scalar_t) nonce_scalar_2;
-        API_NS(scalar_halve)(nonce_scalar_2,nonce_scalar);
-        for (unsigned int c = 2; c < DECAF_448_EDDSA_ENCODE_RATIO; c <<= 1) {
-            API_NS(scalar_halve)(nonce_scalar_2,nonce_scalar_2);
-        }
-        
-        API_NS(point_t) p;
-        API_NS(precomputed_scalarmul)(p,API_NS(precomputed_base),nonce_scalar_2);
-        API_NS(point_mul_by_ratio_and_encode_like_eddsa)(nonce_point, p);
-        API_NS(point_destroy)(p);
-        API_NS(scalar_destroy)(nonce_scalar_2);
-    }
-    
-    API_NS(scalar_t) challenge_scalar;
-    {
-        /* Compute the challenge */
-        hash_init_with_dom(hash,prehashed,0,context,context_len);
-        hash_update(hash,nonce_point,sizeof(nonce_point));
-        hash_update(hash,pubkey,DECAF_EDDSA_448_PUBLIC_BYTES);
-        hash_update(hash,message,message_len);
-        uint8_t challenge[2*DECAF_EDDSA_448_PRIVATE_BYTES];
-        hash_final(hash,challenge,sizeof(challenge));
-        hash_destroy(hash);
-        API_NS(scalar_decode_long)(challenge_scalar,challenge,sizeof(challenge));
-        decaf_bzero(challenge,sizeof(challenge));
-    }
-    
-    API_NS(scalar_mul)(challenge_scalar,challenge_scalar,secret_scalar);
-    API_NS(scalar_add)(challenge_scalar,challenge_scalar,nonce_scalar);
-    
-    decaf_bzero(signature,DECAF_EDDSA_448_SIGNATURE_BYTES);
-    memcpy(signature,nonce_point,sizeof(nonce_point));
-    API_NS(scalar_encode)(&signature[DECAF_EDDSA_448_PUBLIC_BYTES],challenge_scalar);
-    
-    API_NS(scalar_destroy)(secret_scalar);
-    API_NS(scalar_destroy)(nonce_scalar);
-    API_NS(scalar_destroy)(challenge_scalar);
-}
-
-
-void decaf_ed448_sign_prehash (
-    uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
-    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-    const decaf_ed448_prehash_ctx_t hash,
-    const uint8_t *context,
-    uint8_t context_len
-) {
-    uint8_t hash_output[EDDSA_PREHASH_BYTES];
-    {
-        decaf_ed448_prehash_ctx_t hash_too;
-        memcpy(hash_too,hash,sizeof(hash_too));
-        hash_final(hash_too,hash_output,sizeof(hash_output));
-        hash_destroy(hash_too);
-    }
-
-    decaf_ed448_sign(signature,privkey,pubkey,hash_output,sizeof(hash_output),1,context,context_len);
-    decaf_bzero(hash_output,sizeof(hash_output));
-}
-
-decaf_error_t decaf_ed448_verify (
-    const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-    const uint8_t *message,
-    size_t message_len,
-    uint8_t prehashed,
-    const uint8_t *context,
-    uint8_t context_len
-) { 
-    API_NS(point_t) pk_point, r_point;
-    decaf_error_t error = API_NS(point_decode_like_eddsa_and_mul_by_ratio)(pk_point,pubkey);
-    if (DECAF_SUCCESS != error) { return error; }
-    
-    error = API_NS(point_decode_like_eddsa_and_mul_by_ratio)(r_point,signature);
-    if (DECAF_SUCCESS != error) { return error; }
-    
-    API_NS(scalar_t) challenge_scalar;
-    {
-        /* Compute the challenge */
-        hash_ctx_t hash;
-        hash_init_with_dom(hash,prehashed,0,context,context_len);
-        hash_update(hash,signature,DECAF_EDDSA_448_PUBLIC_BYTES);
-        hash_update(hash,pubkey,DECAF_EDDSA_448_PUBLIC_BYTES);
-        hash_update(hash,message,message_len);
-        uint8_t challenge[2*DECAF_EDDSA_448_PRIVATE_BYTES];
-        hash_final(hash,challenge,sizeof(challenge));
-        hash_destroy(hash);
-        API_NS(scalar_decode_long)(challenge_scalar,challenge,sizeof(challenge));
-        decaf_bzero(challenge,sizeof(challenge));
-    }
-    API_NS(scalar_sub)(challenge_scalar, API_NS(scalar_zero), challenge_scalar);
-    
-    API_NS(scalar_t) response_scalar;
-    API_NS(scalar_decode_long)(
-        response_scalar,
-        &signature[DECAF_EDDSA_448_PUBLIC_BYTES],
-        DECAF_EDDSA_448_PRIVATE_BYTES
-    );
-    
-    for (unsigned c=1; c<DECAF_448_EDDSA_DECODE_RATIO; c<<=1) {
-        API_NS(scalar_add)(response_scalar,response_scalar,response_scalar);
-    }
-    
-    
-    /* pk_point = -c(x(P)) + (cx + k)G = kG */
-    API_NS(base_double_scalarmul_non_secret)(
-        pk_point,
-        response_scalar,
-        pk_point,
-        challenge_scalar
-    );
-    return decaf_succeed_if(API_NS(point_eq(pk_point,r_point)));
-}
-
-
-decaf_error_t decaf_ed448_verify_prehash (
-    const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-    const decaf_ed448_prehash_ctx_t hash,
-    const uint8_t *context,
-    uint8_t context_len
-) {
-    decaf_error_t ret;
-    
-    uint8_t hash_output[EDDSA_PREHASH_BYTES];
-    {
-        decaf_ed448_prehash_ctx_t hash_too;
-        memcpy(hash_too,hash,sizeof(hash_too));
-        hash_final(hash_too,hash_output,sizeof(hash_output));
-        hash_destroy(hash_too);
-    }
-    
-    ret = decaf_ed448_verify(signature,pubkey,hash_output,sizeof(hash_output),1,context,context_len);
-    
-    return ret;
-}
diff --git a/crypto/ec/curve448/GENERATED/c/ed448goldilocks/scalar.c b/crypto/ec/curve448/GENERATED/c/ed448goldilocks/scalar.c
deleted file mode 100644
index 1c98ac91d4..0000000000
--- a/crypto/ec/curve448/GENERATED/c/ed448goldilocks/scalar.c
+++ /dev/null
@@ -1,341 +0,0 @@
-/**
- * @file ed448goldilocks/scalar.c
- * @author Mike Hamburg
- *
- * @copyright
- *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
- *   Released under the MIT License.  See LICENSE.txt for license information.
- *
- * @brief Decaf high-level functions.
- *
- * @warning This file was automatically generated in Python.
- * Please do not edit it.
- */
-#include "word.h"
-#include "constant_time.h"
-#include <decaf.h>
-
-/* Template stuff */
-#define API_NS(_id) decaf_448_##_id
-#define SCALAR_BITS DECAF_448_SCALAR_BITS
-#define SCALAR_SER_BYTES DECAF_448_SCALAR_BYTES
-#define SCALAR_LIMBS DECAF_448_SCALAR_LIMBS
-#define scalar_t API_NS(scalar_t)
-
-static const decaf_word_t MONTGOMERY_FACTOR = (decaf_word_t)0x3bd440fae918bc5ull;
-static const scalar_t sc_p = {{{
-    SC_LIMB(0x2378c292ab5844f3), SC_LIMB(0x216cc2728dc58f55), SC_LIMB(0xc44edb49aed63690), SC_LIMB(0xffffffff7cca23e9), SC_LIMB(0xffffffffffffffff), SC_LIMB(0xffffffffffffffff), SC_LIMB(0x3fffffffffffffff)
-}}}, sc_r2 = {{{
-    SC_LIMB(0xe3539257049b9b60), SC_LIMB(0x7af32c4bc1b195d9), SC_LIMB(0x0d66de2388ea1859), SC_LIMB(0xae17cf725ee4d838), SC_LIMB(0x1a9cc14ba3c47c44), SC_LIMB(0x2052bcb7e4d070af), SC_LIMB(0x3402a939f823b729)
-}}};
-/* End of template stuff */
-
-#define WBITS DECAF_WORD_BITS /* NB this may be different from ARCH_WORD_BITS */
-
-const scalar_t API_NS(scalar_one) = {{{1}}}, API_NS(scalar_zero) = {{{0}}};
-
-/** {extra,accum} - sub +? p
- * Must have extra <= 1
- */
-static DECAF_NOINLINE void sc_subx(
-    scalar_t out,
-    const decaf_word_t accum[SCALAR_LIMBS],
-    const scalar_t sub,
-    const scalar_t p,
-    decaf_word_t extra
-) {
-    decaf_dsword_t chain = 0;
-    unsigned int i;
-    for (i=0; i<SCALAR_LIMBS; i++) {
-        chain = (chain + accum[i]) - sub->limb[i];
-        out->limb[i] = chain;
-        chain >>= WBITS;
-    }
-    decaf_word_t borrow = chain+extra; /* = 0 or -1 */
-    
-    chain = 0;
-    for (i=0; i<SCALAR_LIMBS; i++) {
-        chain = (chain + out->limb[i]) + (p->limb[i] & borrow);
-        out->limb[i] = chain;
-        chain >>= WBITS;
-    }
-}
-
-static DECAF_NOINLINE void sc_montmul (
-    scalar_t out,
-    const scalar_t a,
-    const scalar_t b
-) {
-    unsigned int i,j;
-    decaf_word_t accum[SCALAR_LIMBS+1] = {0};
-    decaf_word_t hi_carry = 0;
-    
-    for (i=0; i<SCALAR_LIMBS; i++) {
-        decaf_word_t mand = a->limb[i];
-        const decaf_word_t *mier = b->limb;
-        
-        decaf_dword_t chain = 0;
-        for (j=0; j<SCALAR_LIMBS; j++) {
-            chain += ((decaf_dword_t)mand)*mier[j] + accum[j];
-            accum[j] = chain;
-            chain >>= WBITS;
-        }
-        accum[j] = chain;
-        
-        mand = accum[0] * MONTGOMERY_FACTOR;
-        chain = 0;
-        mier = sc_p->limb;
-        for (j=0; j<SCALAR_LIMBS; j++) {
-            chain += (decaf_dword_t)mand*mier[j] + accum[j];
-            if (j) accum[j-1] = chain;
-            chain >>= WBITS;
-        }
-        chain += accum[j];
-        chain += hi_carry;
-        accum[j-1] = chain;
-        hi_carry = chain >> WBITS;
-    }
-    
-    sc_subx(out, accum, sc_p, sc_p, hi_carry);
-}
-
-void API_NS(scalar_mul) (
-    scalar_t out,
-    const scalar_t a,
-    const scalar_t b
-) {
-    sc_montmul(out,a,b);
-    sc_montmul(out,out,sc_r2);
-}
-
-/* PERF: could implement this */
-static DECAF_INLINE void sc_montsqr (scalar_t out, const scalar_t a) {
-    sc_montmul(out,a,a);
-}
-
-decaf_error_t API_NS(scalar_invert) (
-    scalar_t out,
-    const scalar_t a
-) {
-    /* Fermat's little theorem, sliding window.
-     * Sliding window is fine here because the modulus isn't secret.
-     */
-    const int SCALAR_WINDOW_BITS = 3;
-    scalar_t precmp[1<<SCALAR_WINDOW_BITS];
-    const int LAST = (1<<SCALAR_WINDOW_BITS)-1;
-
-    /* Precompute precmp = [a^1,a^3,...] */
-    sc_montmul(precmp[0],a,sc_r2);
-    if (LAST > 0) sc_montmul(precmp[LAST],precmp[0],precmp[0]);
-
-    int i;
-    for (i=1; i<=LAST; i++) {
-        sc_montmul(precmp[i],precmp[i-1],precmp[LAST]);
-    }
-    
-    /* Sliding window */
-    unsigned residue = 0, trailing = 0, started = 0;
-    for (i=SCALAR_BITS-1; i>=-SCALAR_WINDOW_BITS; i--) {
-        
-        if (started) sc_montsqr(out,out);
-        
-        decaf_word_t w = (i>=0) ? sc_p->limb[i/WBITS] : 0;
-        if (i >= 0 && i<WBITS) {
-            assert(w >= 2);
-            w-=2;
-        }
-        
-        residue = (residue<<1) | ((w>>(i%WBITS))&1);
-        if (residue>>SCALAR_WINDOW_BITS != 0) {
-            assert(trailing == 0);
-            trailing = residue;
-            residue = 0;
-        }
-        
-        if (trailing > 0 && (trailing & ((1<<SCALAR_WINDOW_BITS)-1)) == 0) {
-            if (started) {
-                sc_montmul(out,out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]);
-            } else {
-                API_NS(scalar_copy)(out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]);
-                started = 1;
-            }
-            trailing = 0;
-        }
-        trailing <<= 1;
-        
-    }
-    assert(residue==0);
-    assert(trailing==0);
-    
-    /* Demontgomerize */
-    sc_montmul(out,out,API_NS(scalar_one));
-    decaf_bzero(precmp, sizeof(precmp));
-    return decaf_succeed_if(~API_NS(scalar_eq)(out,API_NS(scalar_zero)));
-}
-
-void API_NS(scalar_sub) (
-    scalar_t out,
-    const scalar_t a,
-    const scalar_t b
-) {
-    sc_subx(out, a->limb, b, sc_p, 0);
-}
-
-void API_NS(scalar_add) (
-    scalar_t out,
-    const scalar_t a,
-    const scalar_t b
-) {
-    decaf_dword_t chain = 0;
-    unsigned int i;
-    for (i=0; i<SCALAR_LIMBS; i++) {
-        chain = (chain + a->limb[i]) + b->limb[i];
-        out->limb[i] = chain;
-        chain >>= WBITS;
-    }
-    sc_subx(out, out->limb, sc_p, sc_p, chain);
-}
-
-void
-API_NS(scalar_set_unsigned) (
-    scalar_t out,
-    uint64_t w
-) {
-    memset(out,0,sizeof(scalar_t));
-    unsigned int i = 0;
-    for (; i<sizeof(uint64_t)/sizeof(decaf_word_t); i++) {
-        out->limb[i] = w;
-#if DECAF_WORD_BITS < 64
-        w >>= 8*sizeof(decaf_word_t);
-#endif
-    }
-}
-
-decaf_bool_t
-API_NS(scalar_eq) (
-    const scalar_t a,
-    const scalar_t b
-) {
-    decaf_word_t diff = 0;
-    unsigned int i;
-    for (i=0; i<SCALAR_LIMBS; i++) {
-        diff |= a->limb[i] ^ b->limb[i];
-    }
-    return mask_to_bool(word_is_zero(diff));
-}
-
-static DECAF_INLINE void scalar_decode_short (
-    scalar_t s,
-    const unsigned char *ser,
-    unsigned int nbytes
-) {
-    unsigned int i,j,k=0;
-    for (i=0; i<SCALAR_LIMBS; i++) {
-        decaf_word_t out = 0;
-        for (j=0; j<sizeof(decaf_word_t) && k<nbytes; j++,k++) {
-            out |= ((decaf_word_t)ser[k])<<(8*j);
-        }
-        s->limb[i] = out;
-    }
-}
-
-decaf_error_t API_NS(scalar_decode)(
-    scalar_t s,
-    const unsigned char ser[SCALAR_SER_BYTES]
-) {
-    unsigned int i;
-    scalar_decode_short(s, ser, SCALAR_SER_BYTES);
-    decaf_dsword_t accum = 0;
-    for (i=0; i<SCALAR_LIMBS; i++) {
-        accum = (accum + s->limb[i] - sc_p->limb[i]) >> WBITS;
-    }
-    /* Here accum == 0 or -1 */
-    
-    API_NS(scalar_mul)(s,s,API_NS(scalar_one)); /* ham-handed reduce */
-    
-    return decaf_succeed_if(~word_is_zero(accum));
-}
-
-void API_NS(scalar_destroy) (
-    scalar_t scalar
-) {
-    decaf_bzero(scalar, sizeof(scalar_t));
-}
-
-void API_NS(scalar_decode_long)(
-    scalar_t s,
-    const unsigned char *ser,
-    size_t ser_len
-) {
-    if (ser_len == 0) {
-        API_NS(scalar_copy)(s, API_NS(scalar_zero));
-        return;
-    }
-    
-    size_t i;
-    scalar_t t1, t2;
-
-    i = ser_len - (ser_len%SCALAR_SER_BYTES);
-    if (i==ser_len) i -= SCALAR_SER_BYTES;
-    
-    scalar_decode_short(t1, &ser[i], ser_len-i);
-
-    if (ser_len == sizeof(scalar_t)) {
-        assert(i==0);
-        /* ham-handed reduce */
-        API_NS(scalar_mul)(s,t1,API_NS(scalar_one));
-        API_NS(scalar_destroy)(t1);
-        return;
-    }
-
-    while (i) {
-        i -= SCALAR_SER_BYTES;
-        sc_montmul(t1,t1,sc_r2);
-        ignore_result( API_NS(scalar_decode)(t2, ser+i) );
-        API_NS(scalar_add)(t1, t1, t2);
-    }
-
-    API_NS(scalar_copy)(s, t1);
-    API_NS(scalar_destroy)(t1);
-    API_NS(scalar_destroy)(t2);
-}
-
-void API_NS(scalar_encode)(
-    unsigned char ser[SCALAR_SER_BYTES],
-    const scalar_t s
-) {
-    unsigned int i,j,k=0;
-    for (i=0; i<SCALAR_LIMBS; i++) {
-        for (j=0; j<sizeof(decaf_word_t); j++,k++) {
-            ser[k] = s->limb[i] >> (8*j);
-        }
-    }
-}
-
-void API_NS(scalar_cond_sel) (
-    scalar_t out,
-    const scalar_t a,
-    const scalar_t b,
-    decaf_bool_t pick_b
-) {
-    constant_time_select(out,a,b,sizeof(scalar_t),bool_to_mask(pick_b),sizeof(out->limb[0]));
-}
-
-void API_NS(scalar_halve) (
-    scalar_t out,
-    const scalar_t a
-) {
-    decaf_word_t mask = -(a->limb[0] & 1);
-    decaf_dword_t chain = 0;
-    unsigned int i;
-    for (i=0; i<SCALAR_LIMBS; i++) {
-        chain = (chain + a->limb[i]) + (sc_p->limb[i] & mask);
-        out->limb[i] = chain;
-        chain >>= DECAF_WORD_BITS;
-    }
-    for (i=0; i<SCALAR_LIMBS-1; i++) {
-        out->limb[i] = out->limb[i]>>1 | out->limb[i+1]<<(WBITS-1);
-    }
-    out->limb[i] = out->limb[i]>>1 | chain<<(WBITS-1);
-}
-
diff --git a/crypto/ec/curve448/GENERATED/c/p448/f_field.h b/crypto/ec/curve448/GENERATED/c/p448/f_field.h
deleted file mode 100644
index 4eef7186d3..0000000000
--- a/crypto/ec/curve448/GENERATED/c/p448/f_field.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/**
- * @file p448/f_field.h
- * @author Mike Hamburg
- *
- * @copyright
- *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
- *   Released under the MIT License.  See LICENSE.txt for license information.
- *
- * @brief Field-specific code for 2^448 - 2^224 - 1.
- *
- * @warning This file was automatically generated in Python.
- * Please do not edit it.
- */
-
-#ifndef __P448_F_FIELD_H__
-#define __P448_F_FIELD_H__ 1
-
-#include "constant_time.h"
-#include <string.h>
-#include <assert.h>
-
-#include "word.h"
-
-#define __DECAF_448_GF_DEFINED__ 1
-#define NLIMBS (64/sizeof(word_t))
-#define X_SER_BYTES 56
-#define SER_BYTES 56
-typedef struct gf_448_s {
-    word_t limb[NLIMBS];
-} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];
-
-#define GF_LIT_LIMB_BITS  56
-#define GF_BITS           448
-#define ZERO              gf_448_ZERO
-#define ONE               gf_448_ONE
-#define MODULUS           gf_448_MODULUS
-#define gf                gf_448_t
-#define gf_s              gf_448_s
-#define gf_eq             gf_448_eq
-#define gf_hibit          gf_448_hibit
-#define gf_lobit          gf_448_lobit
-#define gf_copy           gf_448_copy
-#define gf_add            gf_448_add
-#define gf_sub            gf_448_sub
-#define gf_add_RAW        gf_448_add_RAW
-#define gf_sub_RAW        gf_448_sub_RAW
-#define gf_bias           gf_448_bias
-#define gf_weak_reduce    gf_448_weak_reduce
-#define gf_strong_reduce  gf_448_strong_reduce
-#define gf_mul            gf_448_mul
-#define gf_sqr            gf_448_sqr
-#define gf_mulw_unsigned  gf_448_mulw_unsigned
-#define gf_isr            gf_448_isr
-#define gf_serialize      gf_448_serialize
-#define gf_deserialize    gf_448_deserialize
-
-/* RFC 7748 support */
-#define X_PUBLIC_BYTES  X_SER_BYTES
-#define X_PRIVATE_BYTES X_PUBLIC_BYTES
-#define X_PRIVATE_BITS  448
-
-#define SQRT_MINUS_ONE    P448_SQRT_MINUS_ONE /* might not be defined */
-
-#define INLINE_UNUSED __inline__ __attribute__((unused,always_inline))
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Defined below in f_impl.h */
-static INLINE_UNUSED void gf_copy (gf out, const gf a) { *out = *a; }
-static INLINE_UNUSED void gf_add_RAW (gf out, const gf a, const gf b);
-static INLINE_UNUSED void gf_sub_RAW (gf out, const gf a, const gf b);
-static INLINE_UNUSED void gf_bias (gf inout, int amount);
-static INLINE_UNUSED void gf_weak_reduce (gf inout);
-
-void gf_strong_reduce (gf inout);   
-void gf_add (gf out, const gf a, const gf b);
-void gf_sub (gf out, const gf a, const gf b);
-void gf_mul (gf_s *__restrict__ out, const gf a, const gf b);
-void gf_mulw_unsigned (gf_s *__restrict__ out, const gf a, uint32_t b);
-void gf_sqr (gf_s *__restrict__ out, const gf a);
-mask_t gf_isr(gf a, const gf x); /** a^2 x = 1, QNR, or 0 if x=0.  Return true if successful */
-mask_t gf_eq (const gf x, const gf y);
-mask_t gf_lobit (const gf x);
-mask_t gf_hibit (const gf x);
-
-void gf_serialize (uint8_t *serial, const gf x,int with_highbit);
-mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES],int with_hibit,uint8_t hi_nmask);
-
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#include "f_impl.h" /* Bring in the inline implementations */
-
-#define P_MOD_8 7
-#if P_MOD_8 == 5
-    extern const gf SQRT_MINUS_ONE;
-#endif
-
-#ifndef LIMBPERM
-  #define LIMBPERM(i) (i)
-#endif
-#define LIMB_MASK(i) (((1ull)<<LIMB_PLACE_VALUE(i))-1)
-
-static const gf ZERO = {{{0}}}, ONE = {{{ [LIMBPERM(0)] = 1 }}};
-
-#endif /* __P448_F_FIELD_H__ */
diff --git a/crypto/ec/curve448/GENERATED/c/p448/f_generic.c b/crypto/ec/curve448/GENERATED/c/p448/f_generic.c
deleted file mode 100644
index d09a989f67..0000000000
--- a/crypto/ec/curve448/GENERATED/c/p448/f_generic.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/**
- * @file p448/f_generic.c
- * @author Mike Hamburg
- *
- * @copyright
- *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
- *   Released under the MIT License.  See LICENSE.txt for license information.
- *
- * @brief Generic arithmetic which has to be compiled per field.
- *
- * @warning This file was automatically generated in Python.
- * Please do not edit it.
- */
-#include "field.h"
-
-static const gf MODULUS = {FIELD_LITERAL(
-    0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff, 0xfffffffffffffe, 0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff
-)};
-    
-#if P_MOD_8 == 5
-    const gf SQRT_MINUS_ONE = {FIELD_LITERAL(
-        /* NOPE */
-    )};
-#endif
-
-/** Serialize to wire format. */
-void gf_serialize (uint8_t serial[SER_BYTES], const gf x, int with_hibit) {
-    gf red;
-    gf_copy(red, x);
-    gf_strong_reduce(red);
-    if (!with_hibit) { assert(gf_hibit(red) == 0); }
-    
-    unsigned int j=0, fill=0;
-    dword_t buffer = 0;
-    UNROLL for (unsigned int i=0; i<(with_hibit ? X_SER_BYTES : SER_BYTES); i++) {
-        if (fill < 8 && j < NLIMBS) {
-            buffer |= ((dword_t)red->limb[LIMBPERM(j)]) << fill;
-            fill += LIMB_PLACE_VALUE(LIMBPERM(j));
-            j++;
-        }
-        serial[i] = buffer;
-        fill -= 8;
-        buffer >>= 8;
-    }
-}
-
-/** Return high bit of x = low bit of 2x mod p */
-mask_t gf_hibit(const gf x) {
-    gf y;
-    gf_add(y,x,x);
-    gf_strong_reduce(y);
-    return -(y->limb[0]&1);
-}
-
-/** Return high bit of x = low bit of 2x mod p */
-mask_t gf_lobit(const gf x) {
-    gf y;
-    gf_copy(y,x);
-    gf_strong_reduce(y);
-    return -(y->limb[0]&1);
-}
-
-/** Deserialize from wire format; return -1 on success and 0 on failure. */
-mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES], int with_hibit, uint8_t hi_nmask) {
-    unsigned int j=0, fill=0;
-    dword_t buffer = 0;
-    dsword_t scarry = 0;
-    const unsigned nbytes = with_hibit ? X_SER_BYTES : SER_BYTES;
-    UNROLL for (unsigned int i=0; i<NLIMBS; i++) {
-        UNROLL while (fill < LIMB_PLACE_VALUE(LIMBPERM(i)) && j < nbytes) {
-            uint8_t sj = serial[j];
-            if (j==nbytes-1) sj &= ~hi_nmask;
-            buffer |= ((dword_t)sj) << fill;
-            fill += 8;
-            j++;
-        }
-        x->limb[LIMBPERM(i)] = (i<NLIMBS-1) ? buffer & LIMB_MASK(LIMBPERM(i)) : buffer;
-        fill -= LIMB_PLACE_VALUE(LIMBPERM(i));
-        buffer >>= LIMB_PLACE_VALUE(LIMBPERM(i));
-        scarry = (scarry + x->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]) >> (8*sizeof(word_t));
-    }
-    mask_t succ = with_hibit ? -(mask_t)1 : ~gf_hibit(x);
-    return succ & word_is_zero(buffer) & ~word_is_zero(scarry);
-}
-
-/** Reduce to canonical form. */
-void gf_strong_reduce (gf a) {
-    /* first, clear high */
-    gf_weak_reduce(a); /* Determined to have negligible perf impact. */
-
-    /* now the total is less than 2p */
-
-    /* compute total_value - p.  No need to reduce mod p. */
-    dsword_t scarry = 0;
-    for (unsigned int i=0; i<NLIMBS; i++) {
-        scarry = scarry + a->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)];
-        a->limb[LIMBPERM(i)] = scarry & LIMB_MASK(LIMBPERM(i));
-        scarry >>= LIMB_PLACE_VALUE(LIMBPERM(i));
-    }
-
-    /* uncommon case: it was >= p, so now scarry = 0 and this = x
-     * common case: it was < p, so now scarry = -1 and this = x - p + 2^255
-     * so let's add back in p.  will carry back off the top for 2^255.
-     */
-    assert(word_is_zero(scarry) | word_is_zero(scarry+1));
-
-    word_t scarry_0 = scarry;
-    dword_t carry = 0;
-
-    /* add it back */
-    for (unsigned int i=0; i<NLIMBS; i++) {
-        carry = carry + a->limb[LIMBPERM(i)] + (scarry_0 & MODULUS->limb[LIMBPERM(i)]);
-        a->limb[LIMBPERM(i)] = carry & LIMB_MASK(LIMBPERM(i));
-        carry >>= LIMB_PLACE_VALUE(LIMBPERM(i));
-    }
-
-    assert(word_is_zero(carry + scarry_0));
-}
-
-/** Subtract two gf elements d=a-b */
-void gf_sub (gf d, const gf a, const gf b) {
-    gf_sub_RAW ( d, a, b );
-    gf_bias( d, 2 );
-    gf_weak_reduce ( d );
-}
-
-/** Add two field elements d = a+b */
-void gf_add (gf d, const gf a, const gf b) {
-    gf_add_RAW ( d, a, b );
-    gf_weak_reduce ( d );
-}
-
-/** Compare a==b */
-mask_t gf_eq(const gf a, const gf b) {
-    gf c;
-    gf_sub(c,a,b);
-    gf_strong_reduce(c);
-    mask_t ret=0;
-    for (unsigned int i=0; i<NLIMBS; i++) {
-        ret |= c->limb[LIMBPERM(i)];
-    }
-
-    return word_is_zero(ret);
-}
diff --git a/crypto/ec/curve448/GENERATED/include/decaf.h b/crypto/ec/curve448/GENERATED/include/decaf.h
deleted file mode 100644
index d3cb60ce3d..0000000000
--- a/crypto/ec/curve448/GENERATED/include/decaf.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/**
- * @file decaf.h
- * @author Mike Hamburg
- *
- * @copyright
- *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
- *   Released under the MIT License.  See LICENSE.txt for license information.
- *
- * Master header for Decaf library.
- *
- * The Decaf library implements cryptographic operations on a elliptic curve
- * groups of prime order p.  It accomplishes this by using a twisted Edwards
- * curve (isogenous to Ed448-Goldilocks or Ed25519) and wiping out the cofactor.
- *
- * The formulas are all complete and have no special cases.  However, some
- * functions can fail.  For example, decoding functions can fail because not
- * every string is the encoding of a valid group element.
- *
- * The formulas contain no data-dependent branches, timing or memory accesses,
- * except for decaf_XXX_base_double_scalarmul_non_secret.
- *
- * @warning This file was automatically generated in Python.
- * Please do not edit it.
- */
-
-#ifndef __DECAF_H__
-#define __DECAF_H__ 1
-
-#include <decaf/point_255.h>
-#include <decaf/point_448.h>
-
-#endif /* __DECAF_H__ */
diff --git a/crypto/ec/curve448/GENERATED/include/decaf/common.h b/crypto/ec/curve448/GENERATED/include/decaf/common.h
deleted file mode 100644
index 64719ad971..0000000000
--- a/crypto/ec/curve448/GENERATED/include/decaf/common.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/**
- * @file decaf/common.h
- * @author Mike Hamburg
- *
- * @copyright
- *   Copyright (c) 2015 Cryptography Research, Inc.  \n
- *   Released under the MIT License.  See LICENSE.txt for license information.
- *
- * @brief Common utility headers for Decaf library.
- */
-
-#ifndef __DECAF_COMMON_H__
-#define __DECAF_COMMON_H__ 1
-
-#include <stdint.h>
-#include <sys/types.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Goldilocks' build flags default to hidden and stripping executables. */
-/** @cond internal */
-#if defined(DOXYGEN) && !defined(__attribute__)
-#define __attribute__((x))
-#endif
-#define DECAF_API_VIS __attribute__((visibility("default")))
-#define DECAF_NOINLINE  __attribute__((noinline))
-#define DECAF_WARN_UNUSED __attribute__((warn_unused_result))
-#define DECAF_NONNULL __attribute__((nonnull))
-#define DECAF_INLINE inline __attribute__((always_inline,unused))
-// Cribbed from libnotmuch
-#if defined (__clang_major__) && __clang_major__ >= 3 \
-    || defined (__GNUC__) && __GNUC__ >= 5 \
-    || defined (__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ >= 5
-#define DECAF_DEPRECATED(msg) __attribute__ ((deprecated(msg)))
-#else
-#define DECAF_DEPRECATED(msg) __attribute__ ((deprecated))
-#endif
-/** @endcond */
-
-/* Internal word types.
- *
- * Somewhat tricky.  This could be decided separately per platform.  However,
- * the structs do need to be all the same size and alignment on a given
- * platform to support dynamic linking, since even if you header was built
- * with eg arch_neon, you might end up linking a library built with arch_arm32.
- */
-#ifndef DECAF_WORD_BITS
-    #if (defined(__ILP64__) || defined(__amd64__) || defined(__x86_64__) || (((__UINT_FAST32_MAX__)>>30)>>30))
-        #define DECAF_WORD_BITS 64 /**< The number of bits in a word */
-    #else
-        #define DECAF_WORD_BITS 32 /**< The number of bits in a word */
-    #endif
-#endif
-    
-#if DECAF_WORD_BITS == 64
-typedef uint64_t decaf_word_t;      /**< Word size for internal computations */
-typedef int64_t decaf_sword_t;      /**< Signed word size for internal computations */
-typedef uint64_t decaf_bool_t;      /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
-typedef __uint128_t decaf_dword_t;  /**< Double-word size for internal computations */
-typedef __int128_t decaf_dsword_t;  /**< Signed double-word size for internal computations */
-#elif DECAF_WORD_BITS == 32         /**< The number of bits in a word */
-typedef uint32_t decaf_word_t;      /**< Word size for internal computations */
-typedef int32_t decaf_sword_t;      /**< Signed word size for internal computations */
-typedef uint32_t decaf_bool_t;      /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
-typedef uint64_t decaf_dword_t;     /**< Double-word size for internal computations */
-typedef int64_t decaf_dsword_t;     /**< Signed double-word size for internal computations */
-#else
-#error "Only supporting DECAF_WORD_BITS = 32 or 64 for now"
-#endif
-    
-/** DECAF_TRUE = -1 so that DECAF_TRUE & x = x */
-static const decaf_bool_t DECAF_TRUE = -(decaf_bool_t)1;
-
-/** DECAF_FALSE = 0 so that DECAF_FALSE & x = 0 */
-static const decaf_bool_t DECAF_FALSE = 0;
-
-/** Another boolean type used to indicate success or failure. */
-typedef enum {
-    DECAF_SUCCESS = -1, /**< The operation succeeded. */
-    DECAF_FAILURE = 0   /**< The operation failed. */
-} decaf_error_t;
-
-
-/** Return success if x is true */
-static DECAF_INLINE decaf_error_t
-decaf_succeed_if(decaf_bool_t x) {
-    return (decaf_error_t)x;
-}
-
-/** Return DECAF_TRUE iff x == DECAF_SUCCESS */
-static DECAF_INLINE decaf_bool_t
-decaf_successful(decaf_error_t e) {
-    decaf_dword_t w = ((decaf_word_t)e) ^  ((decaf_word_t)DECAF_SUCCESS);
-    return (w-1)>>DECAF_WORD_BITS;
-}
-    
-/** Overwrite data with zeros.  Uses memset_s if available. */
-void decaf_bzero (
-    void *data,
-    size_t size
-) DECAF_NONNULL DECAF_API_VIS;
-
-/** Compare two buffers, returning DECAF_TRUE if they are equal. */
-decaf_bool_t decaf_memeq (
-    const void *data1,
-    const void *data2,
-    size_t size
-) DECAF_NONNULL DECAF_WARN_UNUSED DECAF_API_VIS;
-    
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-    
-#endif /* __DECAF_COMMON_H__ */
diff --git a/crypto/ec/curve448/GENERATED/include/decaf/ed448.h b/crypto/ec/curve448/GENERATED/include/decaf/ed448.h
deleted file mode 100644
index eeed619adf..0000000000
--- a/crypto/ec/curve448/GENERATED/include/decaf/ed448.h
+++ /dev/null
@@ -1,251 +0,0 @@
-/**
- * @file decaf/ed448.h
- * @author Mike Hamburg
- *
- * @copyright
- *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
- *   Released under the MIT License.  See LICENSE.txt for license information.
- *
- * @brief A group of prime order p, based on Ed448-Goldilocks.
- *
- * @warning This file was automatically generated in Python.
- * Please do not edit it.
- */
-
-#ifndef __DECAF_ED448_H__
-#define __DECAF_ED448_H__ 1
-
-#include <decaf/point_448.h>
-#include <decaf/shake.h>
-#include <decaf/sha512.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** Number of bytes in an EdDSA public key. */
-#define DECAF_EDDSA_448_PUBLIC_BYTES 57
-
-/** Number of bytes in an EdDSA private key. */
-#define DECAF_EDDSA_448_PRIVATE_BYTES DECAF_EDDSA_448_PUBLIC_BYTES
-
-/** Number of bytes in an EdDSA private key. */
-#define DECAF_EDDSA_448_SIGNATURE_BYTES (DECAF_EDDSA_448_PUBLIC_BYTES + DECAF_EDDSA_448_PRIVATE_BYTES)
-
-/** Does EdDSA support non-contextual signatures? */
-#define DECAF_EDDSA_448_SUPPORTS_CONTEXTLESS_SIGS 0
-
-/** Prehash context renaming macros. */
-#define decaf_ed448_prehash_ctx_s   decaf_shake256_ctx_s
-#define decaf_ed448_prehash_ctx_t   decaf_shake256_ctx_t
-#define decaf_ed448_prehash_update  decaf_shake256_update
-#define decaf_ed448_prehash_destroy decaf_shake256_destroy
-
-/** EdDSA encoding ratio. */
-#define DECAF_448_EDDSA_ENCODE_RATIO 4
-
-/** EdDSA decoding ratio. */
-#define DECAF_448_EDDSA_DECODE_RATIO (4 / 4)
-
-/**
- * @brief EdDSA key generation.  This function uses a different (non-Decaf)
- * encoding.
- *
- * @param [out] pubkey The public key.
- * @param [in] privkey The private key.
- */    
-void decaf_ed448_derive_public_key (
-    uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief EdDSA signing.
- *
- * @param [out] signature The signature.
- * @param [in] privkey The private key.
- * @param [in] pubkey The public key.
- * @param [in] message The message to sign.
- * @param [in] message_len The length of the message.
- * @param [in] prehashed Nonzero if the message is actually the hash of something you want to sign.
- * @param [in] context A "context" for this signature of up to 255 bytes.
- * @param [in] context_len Length of the context.
- *
- * @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
- * messages, at least without some very careful protocol-level disambiguation.  For Ed448 it is
- * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
- * you no seat belt.
- */  
-void decaf_ed448_sign (
-    uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
-    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-    const uint8_t *message,
-    size_t message_len,
-    uint8_t prehashed,
-    const uint8_t *context,
-    uint8_t context_len
-) DECAF_API_VIS __attribute__((nonnull(1,2,3))) DECAF_NOINLINE;
-
-/**
- * @brief EdDSA signing with prehash.
- *
- * @param [out] signature The signature.
- * @param [in] privkey The private key.
- * @param [in] pubkey The public key.
- * @param [in] hash The hash of the message.  This object will not be modified by the call.
- * @param [in] context A "context" for this signature of up to 255 bytes.  Must be the same as what was used for the prehash.
- * @param [in] context_len Length of the context.
- *
- * @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
- * messages, at least without some very careful protocol-level disambiguation.  For Ed448 it is
- * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
- * you no seat belt.
- */  
-void decaf_ed448_sign_prehash (
-    uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
-    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-    const decaf_ed448_prehash_ctx_t hash,
-    const uint8_t *context,
-    uint8_t context_len
-) DECAF_API_VIS __attribute__((nonnull(1,2,3,4))) DECAF_NOINLINE;
-    
-/**
- * @brief Prehash initialization, with contexts if supported.
- *
- * @param [out] hash The hash object to be initialized.
- */
-void decaf_ed448_prehash_init (
-    decaf_ed448_prehash_ctx_t hash
-) DECAF_API_VIS __attribute__((nonnull(1))) DECAF_NOINLINE;
-
-/**
- * @brief EdDSA signature verification.
- *
- * Uses the standard (i.e. less-strict) verification formula.
- *
- * @param [in] signature The signature.
- * @param [in] pubkey The public key.
- * @param [in] message The message to verify.
- * @param [in] message_len The length of the message.
- * @param [in] prehashed Nonzero if the message is actually the hash of something you want to verify.
- * @param [in] context A "context" for this signature of up to 255 bytes.
- * @param [in] context_len Length of the context.
- *
- * @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
- * messages, at least without some very careful protocol-level disambiguation.  For Ed448 it is
- * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
- * you no seat belt.
- */
-decaf_error_t decaf_ed448_verify (
-    const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-    const uint8_t *message,
-    size_t message_len,
-    uint8_t prehashed,
-    const uint8_t *context,
-    uint8_t context_len
-) DECAF_API_VIS __attribute__((nonnull(1,2))) DECAF_NOINLINE;
-
-/**
- * @brief EdDSA signature verification.
- *
- * Uses the standard (i.e. less-strict) verification formula.
- *
- * @param [in] signature The signature.
- * @param [in] pubkey The public key.
- * @param [in] hash The hash of the message.  This object will not be modified by the call.
- * @param [in] context A "context" for this signature of up to 255 bytes.  Must be the same as what was used for the prehash.
- * @param [in] context_len Length of the context.
- *
- * @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
- * messages, at least without some very careful protocol-level disambiguation.  For Ed448 it is
- * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
- * you no seat belt.
- */
-decaf_error_t decaf_ed448_verify_prehash (
-    const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-    const decaf_ed448_prehash_ctx_t hash,
-    const uint8_t *context,
-    uint8_t context_len
-) DECAF_API_VIS __attribute__((nonnull(1,2))) DECAF_NOINLINE;
-
-/**
- * @brief EdDSA point encoding.  Used internally, exposed externally.
- * Multiplies by DECAF_448_EDDSA_ENCODE_RATIO first.
- *
- * The multiplication is required because the EdDSA encoding represents
- * the cofactor information, but the Decaf encoding ignores it (which
- * is the whole point).  So if you decode from EdDSA and re-encode to
- * EdDSA, the cofactor info must get cleared, because the intermediate
- * representation doesn't track it.
- *
- * The way libdecaf handles this is to multiply by
- * DECAF_448_EDDSA_DECODE_RATIO when decoding, and by
- * DECAF_448_EDDSA_ENCODE_RATIO when encoding.  The product of these
- * ratios is always exactly the cofactor 4, so the cofactor
- * ends up cleared one way or another.  But exactly how that shakes
- * out depends on the base points specified in RFC 8032.
- *
- * The upshot is that if you pass the Decaf/Ristretto base point to
- * this function, you will get DECAF_448_EDDSA_ENCODE_RATIO times the
- * EdDSA base point.
- *
- * @param [out] enc The encoded point.
- * @param [in] p The point.
- */       
-void decaf_448_point_mul_by_ratio_and_encode_like_eddsa (
-    uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES],
-    const decaf_448_point_t p
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief EdDSA point decoding.  Multiplies by DECAF_448_EDDSA_DECODE_RATIO,
- * and ignores cofactor information.
- *
- * See notes on decaf_448_point_mul_by_ratio_and_encode_like_eddsa
- *
- * @param [out] enc The encoded point.
- * @param [in] p The point.
- */       
-decaf_error_t decaf_448_point_decode_like_eddsa_and_mul_by_ratio (
-    decaf_448_point_t p,
-    const uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief EdDSA to ECDH public key conversion
- * Deserialize the point to get y on Edwards curve,
- * Convert it to u coordinate on Montgomery curve.
- *
- * @warning This function does not check that the public key being converted
- * is a valid EdDSA public key (FUTURE?)
- *
- * @param[out] x The ECDH public key as in RFC7748(point on Montgomery curve)
- * @param[in] ed The EdDSA public key(point on Edwards curve)
- */
-void decaf_ed448_convert_public_key_to_x448 (
-    uint8_t x[DECAF_X448_PUBLIC_BYTES],
-    const uint8_t ed[DECAF_EDDSA_448_PUBLIC_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief EdDSA to ECDH private key conversion
- * Using the appropriate hash function, hash the EdDSA private key
- * and keep only the lower bytes to get the ECDH private key
- *
- * @param[out] x The ECDH private key as in RFC7748
- * @param[in] ed The EdDSA private key
- */
-void decaf_ed448_convert_private_key_to_x448 (
-    uint8_t x[DECAF_X448_PRIVATE_BYTES],
-    const uint8_t ed[DECAF_EDDSA_448_PRIVATE_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* __DECAF_ED448_H__ */
diff --git a/crypto/ec/curve448/GENERATED/include/decaf/point_255.h b/crypto/ec/curve448/GENERATED/include/decaf/point_255.h
deleted file mode 100644
index 94e30a5b89..0000000000
--- a/crypto/ec/curve448/GENERATED/include/decaf/point_255.h
+++ /dev/null
@@ -1,765 +0,0 @@
-/**
- * @file decaf/point_255.h
- * @author Mike Hamburg
- *
- * @copyright
- *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
- *   Released under the MIT License.  See LICENSE.txt for license information.
- *
- * @brief A group of prime order p, based on Curve25519.
- *
- * @warning This file was automatically generated in Python.
- * Please do not edit it.
- */
-
-#ifndef __DECAF_POINT_255_H__
-#define __DECAF_POINT_255_H__ 1
-
-#include <decaf/common.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** @cond internal */
-#define DECAF_255_SCALAR_LIMBS ((253-1)/DECAF_WORD_BITS+1)
-/** @endcond */
-
-/** The number of bits in a scalar */
-#define DECAF_255_SCALAR_BITS 253
-
-/** @cond internal */
-#ifndef __DECAF_25519_GF_DEFINED__
-#define __DECAF_25519_GF_DEFINED__ 1
-/** @brief Galois field element internal structure */
-typedef struct gf_25519_s {
-    decaf_word_t limb[320/DECAF_WORD_BITS];
-} __attribute__((aligned(32))) gf_25519_s, gf_25519_t[1];
-#endif /* __DECAF_25519_GF_DEFINED__ */
-/** @endcond */
-
-/** Number of bytes in a serialized point. */
-#define DECAF_255_SER_BYTES 32
-
-/** Number of bytes in an elligated point.  For now set the same as SER_BYTES
- * but could be different for other curves.
- */
-#define DECAF_255_HASH_BYTES 32
-
-/** Number of bytes in a serialized scalar. */
-#define DECAF_255_SCALAR_BYTES 32
-
-/** Number of bits in the "which" field of an elligator inverse */
-#define DECAF_255_INVERT_ELLIGATOR_WHICH_BITS 5
-
-/** The cofactor the curve would have, if we hadn't removed it */
-#define DECAF_255_REMOVED_COFACTOR 8
-
-/** X25519 encoding ratio. */
-#define DECAF_X25519_ENCODE_RATIO 4
-
-/** Number of bytes in an x25519 public key */
-#define DECAF_X25519_PUBLIC_BYTES 32
-
-/** Number of bytes in an x25519 private key */
-#define DECAF_X25519_PRIVATE_BYTES 32
-
-/** Twisted Edwards extended homogeneous coordinates */
-typedef struct decaf_255_point_s {
-    /** @cond internal */
-    gf_25519_t x,y,z,t;
-    /** @endcond */
-} decaf_255_point_t[1];
-
-/** Precomputed table based on a point.  Can be trivial implementation. */
-struct decaf_255_precomputed_s;
-
-/** Precomputed table based on a point.  Can be trivial implementation. */
-typedef struct decaf_255_precomputed_s decaf_255_precomputed_s; 
-
-/** Size and alignment of precomputed point tables. */
-extern const size_t decaf_255_sizeof_precomputed_s DECAF_API_VIS, decaf_255_alignof_precomputed_s DECAF_API_VIS;
-
-/** Scalar is stored packed, because we don't need the speed. */
-typedef struct decaf_255_scalar_s {
-    /** @cond internal */
-    decaf_word_t limb[DECAF_255_SCALAR_LIMBS];
-    /** @endcond */
-} decaf_255_scalar_t[1];
-
-/** A scalar equal to 1. */
-extern const decaf_255_scalar_t decaf_255_scalar_one DECAF_API_VIS;
-
-/** A scalar equal to 0. */
-extern const decaf_255_scalar_t decaf_255_scalar_zero DECAF_API_VIS;
-
-/** The identity point on the curve. */
-extern const decaf_255_point_t decaf_255_point_identity DECAF_API_VIS;
-
-/** An arbitrarily chosen base point on the curve. */
-extern const decaf_255_point_t decaf_255_point_base DECAF_API_VIS;
-
-/** Precomputed table for the base point on the curve. */
-extern const struct decaf_255_precomputed_s *decaf_255_precomputed_base DECAF_API_VIS;
-
-/**
- * @brief Read a scalar from wire format or from bytes.
- *
- * @param [in] ser Serialized form of a scalar.
- * @param [out] out Deserialized form.
- *
- * @retval DECAF_SUCCESS The scalar was correctly encoded.
- * @retval DECAF_FAILURE The scalar was greater than the modulus,
- * and has been reduced modulo that modulus.
- */
-decaf_error_t decaf_255_scalar_decode (
-    decaf_255_scalar_t out,
-    const unsigned char ser[DECAF_255_SCALAR_BYTES]
-) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Read a scalar from wire format or from bytes.  Reduces mod
- * scalar prime.
- *
- * @param [in] ser Serialized form of a scalar.
- * @param [in] ser_len Length of serialized form.
- * @param [out] out Deserialized form.
- */
-void decaf_255_scalar_decode_long (
-    decaf_255_scalar_t out,
-    const unsigned char *ser,
-    size_t ser_len
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-    
-/**
- * @brief Serialize a scalar to wire format.
- *
- * @param [out] ser Serialized form of a scalar.
- * @param [in] s Deserialized scalar.
- */
-void decaf_255_scalar_encode (
-    unsigned char ser[DECAF_255_SCALAR_BYTES],
-    const decaf_255_scalar_t s
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_NOINLINE;
-        
-/**
- * @brief Add two scalars.  The scalars may use the same memory.
- * @param [in] a One scalar.
- * @param [in] b Another scalar.
- * @param [out] out a+b.
- */
-void decaf_255_scalar_add (
-    decaf_255_scalar_t out,
-    const decaf_255_scalar_t a,
-    const decaf_255_scalar_t b
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Compare two scalars.
- * @param [in] a One scalar.
- * @param [in] b Another scalar.
- * @retval DECAF_TRUE The scalars are equal.
- * @retval DECAF_FALSE The scalars are not equal.
- */    
-decaf_bool_t decaf_255_scalar_eq (
-    const decaf_255_scalar_t a,
-    const decaf_255_scalar_t b
-) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Subtract two scalars.  The scalars may use the same memory.
- * @param [in] a One scalar.
- * @param [in] b Another scalar.
- * @param [out] out a-b.
- */  
-void decaf_255_scalar_sub (
-    decaf_255_scalar_t out,
-    const decaf_255_scalar_t a,
-    const decaf_255_scalar_t b
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Multiply two scalars.  The scalars may use the same memory.
- * @param [in] a One scalar.
- * @param [in] b Another scalar.
- * @param [out] out a*b.
- */  
-void decaf_255_scalar_mul (
-    decaf_255_scalar_t out,
-    const decaf_255_scalar_t a,
-    const decaf_255_scalar_t b
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-        
-/**
-* @brief Halve a scalar.  The scalars may use the same memory.
-* @param [in] a A scalar.
-* @param [out] out a/2.
-*/
-void decaf_255_scalar_halve (
-   decaf_255_scalar_t out,
-   const decaf_255_scalar_t a
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Invert a scalar.  When passed zero, return 0.  The input and output may alias.
- * @param [in] a A scalar.
- * @param [out] out 1/a.
- * @return DECAF_SUCCESS The input is nonzero.
- */  
-decaf_error_t decaf_255_scalar_invert (
-    decaf_255_scalar_t out,
-    const decaf_255_scalar_t a
-) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Copy a scalar.  The scalars may use the same memory, in which
- * case this function does nothing.
- * @param [in] a A scalar.
- * @param [out] out Will become a copy of a.
- */
-static inline void DECAF_NONNULL decaf_255_scalar_copy (
-    decaf_255_scalar_t out,
-    const decaf_255_scalar_t a
-) {
-    *out = *a;
-}
-
-/**
- * @brief Set a scalar to an unsigned 64-bit integer.
- * @param [in] a An integer.
- * @param [out] out Will become equal to a.
- */  
-void decaf_255_scalar_set_unsigned (
-    decaf_255_scalar_t out,
-    uint64_t a
-) DECAF_API_VIS DECAF_NONNULL;
-
-/**
- * @brief Encode a point as a sequence of bytes.
- *
- * @param [out] ser The byte representation of the point.
- * @param [in] pt The point to encode.
- */
-void decaf_255_point_encode (
-    uint8_t ser[DECAF_255_SER_BYTES],
-    const decaf_255_point_t pt
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Decode a point from a sequence of bytes.
- *
- * Every point has a unique encoding, so not every
- * sequence of bytes is a valid encoding.  If an invalid
- * encoding is given, the output is undefined.
- *
- * @param [out] pt The decoded point.
- * @param [in] ser The serialized version of the point.
- * @param [in] allow_identity DECAF_TRUE if the identity is a legal input.
- * @retval DECAF_SUCCESS The decoding succeeded.
- * @retval DECAF_FAILURE The decoding didn't succeed, because
- * ser does not represent a point.
- */
-decaf_error_t decaf_255_point_decode (
-    decaf_255_point_t pt,
-    const uint8_t ser[DECAF_255_SER_BYTES],
-    decaf_bool_t allow_identity
-) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Copy a point.  The input and output may alias,
- * in which case this function does nothing.
- *
- * @param [out] a A copy of the point.
- * @param [in] b Any point.
- */
-static inline void DECAF_NONNULL decaf_255_point_copy (
-    decaf_255_point_t a,
-    const decaf_255_point_t b
-) {
-    *a=*b;
-}
-
-/**
- * @brief Test whether two points are equal.  If yes, return
- * DECAF_TRUE, else return DECAF_FALSE.
- *
- * @param [in] a A point.
- * @param [in] b Another point.
- * @retval DECAF_TRUE The points are equal.
- * @retval DECAF_FALSE The points are not equal.
- */
-decaf_bool_t decaf_255_point_eq (
-    const decaf_255_point_t a,
-    const decaf_255_point_t b
-) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Add two points to produce a third point.  The
- * input points and output point can be pointers to the same
- * memory.
- *
- * @param [out] sum The sum a+b.
- * @param [in] a An addend.
- * @param [in] b An addend.
- */
-void decaf_255_point_add (
-    decaf_255_point_t sum,
-    const decaf_255_point_t a,
-    const decaf_255_point_t b
-) DECAF_API_VIS DECAF_NONNULL;
-
-/**
- * @brief Double a point.  Equivalent to
- * decaf_255_point_add(two_a,a,a), but potentially faster.
- *
- * @param [out] two_a The sum a+a.
- * @param [in] a A point.
- */
-void decaf_255_point_double (
-    decaf_255_point_t two_a,
-    const decaf_255_point_t a
-) DECAF_API_VIS DECAF_NONNULL;
-
-/**
- * @brief Subtract two points to produce a third point.  The
- * input points and output point can be pointers to the same
- * memory.
- *
- * @param [out] diff The difference a-b.
- * @param [in] a The minuend.
- * @param [in] b The subtrahend.
- */
-void decaf_255_point_sub (
-    decaf_255_point_t diff,
-    const decaf_255_point_t a,
-    const decaf_255_point_t b
-) DECAF_API_VIS DECAF_NONNULL;
-    
-/**
- * @brief Negate a point to produce another point.  The input
- * and output points can use the same memory.
- *
- * @param [out] nega The negated input point
- * @param [in] a The input point.
- */
-void decaf_255_point_negate (
-   decaf_255_point_t nega,
-   const decaf_255_point_t a
-) DECAF_API_VIS DECAF_NONNULL;
-
-/**
- * @brief Multiply a base point by a scalar: scaled = scalar*base.
- *
- * @param [out] scaled The scaled point base*scalar
- * @param [in] base The point to be scaled.
- * @param [in] scalar The scalar to multiply by.
- */
-void decaf_255_point_scalarmul (
-    decaf_255_point_t scaled,
-    const decaf_255_point_t base,
-    const decaf_255_scalar_t scalar
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Multiply a base point by a scalar: scaled = scalar*base.
- * This function operates directly on serialized forms.
- *
- * @warning This function is experimental.  It may not be supported
- * long-term.
- *
- * @param [out] scaled The scaled point base*scalar
- * @param [in] base The point to be scaled.
- * @param [in] scalar The scalar to multiply by.
- * @param [in] allow_identity Allow the input to be the identity.
- * @param [in] short_circuit Allow a fast return if the input is illegal.
- *
- * @retval DECAF_SUCCESS The scalarmul succeeded.
- * @retval DECAF_FAILURE The scalarmul didn't succeed, because
- * base does not represent a point.
- */
-decaf_error_t decaf_255_direct_scalarmul (
-    uint8_t scaled[DECAF_255_SER_BYTES],
-    const uint8_t base[DECAF_255_SER_BYTES],
-    const decaf_255_scalar_t scalar,
-    decaf_bool_t allow_identity,
-    decaf_bool_t short_circuit
-) DECAF_API_VIS DECAF_NONNULL DECAF_WARN_UNUSED DECAF_NOINLINE;
-
-/**
- * @brief RFC 7748 Diffie-Hellman scalarmul.  This function uses a different
- * (non-Decaf) encoding.
- *
- * @param [out] scaled The scaled point base*scalar
- * @param [in] base The point to be scaled.
- * @param [in] scalar The scalar to multiply by.
- *
- * @retval DECAF_SUCCESS The scalarmul succeeded.
- * @retval DECAF_FAILURE The scalarmul didn't succeed, because the base
- * point is in a small subgroup.
- */
-decaf_error_t decaf_x25519 (
-    uint8_t out[DECAF_X25519_PUBLIC_BYTES],
-    const uint8_t base[DECAF_X25519_PUBLIC_BYTES],
-    const uint8_t scalar[DECAF_X25519_PRIVATE_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_WARN_UNUSED DECAF_NOINLINE;
-
-/**
- * @brief Multiply a point by DECAF_X25519_ENCODE_RATIO,
- * then encode it like RFC 7748.
- *
- * This function is mainly used internally, but is exported in case
- * it will be useful.
- *
- * The ratio is necessary because the internal representation doesn't
- * track the cofactor information, so on output we must clear the cofactor.
- * This would multiply by the cofactor, but in fact internally libdecaf's
- * points are always even, so it multiplies by half the cofactor instead.
- *
- * As it happens, this aligns with the base point definitions; that is,
- * if you pass the Decaf/Ristretto base point to this function, the result
- * will be DECAF_X25519_ENCODE_RATIO times the X25519
- * base point.
- *
- * @param [out] out The scaled and encoded point.
- * @param [in] p The point to be scaled and encoded.
- */
-void decaf_255_point_mul_by_ratio_and_encode_like_x25519 (
-    uint8_t out[DECAF_X25519_PUBLIC_BYTES],
-    const decaf_255_point_t p
-) DECAF_API_VIS DECAF_NONNULL;
-
-/** The base point for X25519 Diffie-Hellman */
-extern const uint8_t decaf_x25519_base_point[DECAF_X25519_PUBLIC_BYTES] DECAF_API_VIS;
-
-/**
- * @brief RFC 7748 Diffie-Hellman base point scalarmul.  This function uses
- * a different (non-Decaf) encoding.
- *
- * @deprecated Renamed to decaf_x25519_derive_public_key.
- * I have no particular timeline for removing this name.
- *
- * @param [out] scaled The scaled point base*scalar
- * @param [in] scalar The scalar to multiply by.
- */
-void decaf_x25519_generate_key (
-    uint8_t out[DECAF_X25519_PUBLIC_BYTES],
-    const uint8_t scalar[DECAF_X25519_PRIVATE_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_DEPRECATED("Renamed to decaf_x25519_derive_public_key");
-    
-/**
- * @brief RFC 7748 Diffie-Hellman base point scalarmul.  This function uses
- * a different (non-Decaf) encoding.
- *
- * Does exactly the same thing as decaf_x25519_generate_key,
- * but has a better name.
- *
- * @param [out] scaled The scaled point base*scalar
- * @param [in] scalar The scalar to multiply by.
- */
-void decaf_x25519_derive_public_key (
-    uint8_t out[DECAF_X25519_PUBLIC_BYTES],
-    const uint8_t scalar[DECAF_X25519_PRIVATE_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/* FUTURE: uint8_t decaf_255_encode_like_curve25519) */
-
-/**
- * @brief Precompute a table for fast scalar multiplication.
- * Some implementations do not include precomputed points; for
- * those implementations, this implementation simply copies the
- * point.
- *
- * @param [out] a A precomputed table of multiples of the point.
- * @param [in] b Any point.
- */
-void decaf_255_precompute (
-    decaf_255_precomputed_s *a,
-    const decaf_255_point_t b
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Multiply a precomputed base point by a scalar:
- * scaled = scalar*base.
- * Some implementations do not include precomputed points; for
- * those implementations, this function is the same as
- * decaf_255_point_scalarmul
- *
- * @param [out] scaled The scaled point base*scalar
- * @param [in] base The point to be scaled.
- * @param [in] scalar The scalar to multiply by.
- */
-void decaf_255_precomputed_scalarmul (
-    decaf_255_point_t scaled,
-    const decaf_255_precomputed_s *base,
-    const decaf_255_scalar_t scalar
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Multiply two base points by two scalars:
- * scaled = scalar1*base1 + scalar2*base2.
- *
- * Equivalent to two calls to decaf_255_point_scalarmul, but may be
- * faster.
- *
- * @param [out] combo The linear combination scalar1*base1 + scalar2*base2.
- * @param [in] base1 A first point to be scaled.
- * @param [in] scalar1 A first scalar to multiply by.
- * @param [in] base2 A second point to be scaled.
- * @param [in] scalar2 A second scalar to multiply by.
- */
-void decaf_255_point_double_scalarmul (
-    decaf_255_point_t combo,
-    const decaf_255_point_t base1,
-    const decaf_255_scalar_t scalar1,
-    const decaf_255_point_t base2,
-    const decaf_255_scalar_t scalar2
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-    
-/**
- * Multiply one base point by two scalars:
- *
- * a1 = scalar1 * base
- * a2 = scalar2 * base
- *
- * Equivalent to two calls to decaf_255_point_scalarmul, but may be
- * faster.
- *
- * @param [out] a1 The first multiple.  It may be the same as the input point.
- * @param [out] a2 The second multiple.  It may be the same as the input point.
- * @param [in] base1 A point to be scaled.
- * @param [in] scalar1 A first scalar to multiply by.
- * @param [in] scalar2 A second scalar to multiply by.
- */
-void decaf_255_point_dual_scalarmul (
-    decaf_255_point_t a1,
-    decaf_255_point_t a2,
-    const decaf_255_point_t base1,
-    const decaf_255_scalar_t scalar1,
-    const decaf_255_scalar_t scalar2
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Multiply two base points by two scalars:
- * scaled = scalar1*decaf_255_point_base + scalar2*base2.
- *
- * Otherwise equivalent to decaf_255_point_double_scalarmul, but may be
- * faster at the expense of being variable time.
- *
- * @param [out] combo The linear combination scalar1*base + scalar2*base2.
- * @param [in] scalar1 A first scalar to multiply by.
- * @param [in] base2 A second point to be scaled.
- * @param [in] scalar2 A second scalar to multiply by.
- *
- * @warning: This function takes variable time, and may leak the scalars
- * used.  It is designed for signature verification.
- */
-void decaf_255_base_double_scalarmul_non_secret (
-    decaf_255_point_t combo,
-    const decaf_255_scalar_t scalar1,
-    const decaf_255_point_t base2,
-    const decaf_255_scalar_t scalar2
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Constant-time decision between two points.  If pick_b
- * is zero, out = a; else out = b.
- *
- * @param [out] out The output.  It may be the same as either input.
- * @param [in] a Any point.
- * @param [in] b Any point.
- * @param [in] pick_b If nonzero, choose point b.
- */
-void decaf_255_point_cond_sel (
-    decaf_255_point_t out,
-    const decaf_255_point_t a,
-    const decaf_255_point_t b,
-    decaf_word_t pick_b
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Constant-time decision between two scalars.  If pick_b
- * is zero, out = a; else out = b.
- *
- * @param [out] out The output.  It may be the same as either input.
- * @param [in] a Any scalar.
- * @param [in] b Any scalar.
- * @param [in] pick_b If nonzero, choose scalar b.
- */
-void decaf_255_scalar_cond_sel (
-    decaf_255_scalar_t out,
-    const decaf_255_scalar_t a,
-    const decaf_255_scalar_t b,
-    decaf_word_t pick_b
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Test that a point is valid, for debugging purposes.
- *
- * @param [in] to_test The point to test.
- * @retval DECAF_TRUE The point is valid.
- * @retval DECAF_FALSE The point is invalid.
- */
-decaf_bool_t decaf_255_point_valid (
-    const decaf_255_point_t to_test
-) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Torque a point, for debugging purposes.  The output
- * will be equal to the input.
- *
- * @param [out] q The point to torque.
- * @param [in] p The point to torque.
- */
-void decaf_255_point_debugging_torque (
-    decaf_255_point_t q,
-    const decaf_255_point_t p
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Projectively scale a point, for debugging purposes.
- * The output will be equal to the input, and will be valid
- * even if the factor is zero.
- *
- * @param [out] q The point to scale.
- * @param [in] p The point to scale.
- * @param [in] factor Serialized GF factor to scale.
- */
-void decaf_255_point_debugging_pscale (
-    decaf_255_point_t q,
-    const decaf_255_point_t p,
-    const unsigned char factor[DECAF_255_SER_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Almost-Elligator-like hash to curve.
- *
- * Call this function with the output of a hash to make a hash to the curve.
- *
- * This function runs Elligator2 on the decaf_255 Jacobi quartic model.  It then
- * uses the isogeny to put the result in twisted Edwards form.  As a result,
- * it is safe (cannot produce points of order 4), and would be compatible with
- * hypothetical other implementations of Decaf using a Montgomery or untwisted
- * Edwards model.
- *
- * Unlike Elligator, this function may be up to 4:1 on [0,(p-1)/2]:
- *   A factor of 2 due to the isogeny.
- *   A factor of 2 because we quotient out the 2-torsion.
- *
- * This makes it about 8:1 overall, or 16:1 overall on curves with cofactor 8.
- *
- * Negating the input (mod q) results in the same point.  Inverting the input
- * (mod q) results in the negative point.  This is the same as Elligator.
- *
- * This function isn't quite indifferentiable from a random oracle.
- * However, it is suitable for many protocols, including SPEKE and SPAKE2 EE. 
- * Furthermore, calling it twice with independent seeds and adding the results
- * is indifferentiable from a random oracle.
- *
- * @param [in] hashed_data Output of some hash function.
- * @param [out] pt The data hashed to the curve.
- */
-void
-decaf_255_point_from_hash_nonuniform (
-    decaf_255_point_t pt,
-    const unsigned char hashed_data[DECAF_255_HASH_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Indifferentiable hash function encoding to curve.
- *
- * Equivalent to calling decaf_255_point_from_hash_nonuniform twice and adding.
- *
- * @param [in] hashed_data Output of some hash function.
- * @param [out] pt The data hashed to the curve.
- */ 
-void decaf_255_point_from_hash_uniform (
-    decaf_255_point_t pt,
-    const unsigned char hashed_data[2*DECAF_255_HASH_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Inverse of elligator-like hash to curve.
- *
- * This function writes to the buffer, to make it so that
- * decaf_255_point_from_hash_nonuniform(buffer) = pt if
- * possible.  Since there may be multiple preimages, the
- * "which" parameter chooses between them.  To ensure uniform
- * inverse sampling, this function succeeds or fails
- * independently for different "which" values.
- *
- * This function isn't guaranteed to find every possible
- * preimage, but it finds all except a small finite number.
- * In particular, when the number of bits in the modulus isn't
- * a multiple of 8 (i.e. for curve25519), it sets the high bits
- * independently, which enables the generated data to be uniform.
- * But it doesn't add p, so you'll never get exactly p from this
- * function.  This might change in the future, especially if
- * we ever support eg Brainpool curves, where this could cause
- * real nonuniformity.
- *
- * @param [out] recovered_hash Encoded data.
- * @param [in] pt The point to encode.
- * @param [in] which A value determining which inverse point
- * to return.
- *
- * @retval DECAF_SUCCESS The inverse succeeded.
- * @retval DECAF_FAILURE The inverse failed.
- */
-decaf_error_t
-decaf_255_invert_elligator_nonuniform (
-    unsigned char recovered_hash[DECAF_255_HASH_BYTES],
-    const decaf_255_point_t pt,
-    uint32_t which
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_WARN_UNUSED;
-
-/**
- * @brief Inverse of elligator-like hash to curve.
- *
- * This function writes to the buffer, to make it so that
- * decaf_255_point_from_hash_uniform(buffer) = pt if
- * possible.  Since there may be multiple preimages, the
- * "which" parameter chooses between them.  To ensure uniform
- * inverse sampling, this function succeeds or fails
- * independently for different "which" values.
- *
- * @param [out] recovered_hash Encoded data.
- * @param [in] pt The point to encode.
- * @param [in] which A value determining which inverse point
- * to return.
- *
- * @retval DECAF_SUCCESS The inverse succeeded.
- * @retval DECAF_FAILURE The inverse failed.
- */
-decaf_error_t
-decaf_255_invert_elligator_uniform (
-    unsigned char recovered_hash[2*DECAF_255_HASH_BYTES],
-    const decaf_255_point_t pt,
-    uint32_t which
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_WARN_UNUSED;
-
-/**
- * @brief Overwrite scalar with zeros.
- */
-void decaf_255_scalar_destroy (
-    decaf_255_scalar_t scalar
-) DECAF_NONNULL DECAF_API_VIS;
-
-/**
- * @brief Overwrite point with zeros.
- */
-void decaf_255_point_destroy (
-    decaf_255_point_t point
-) DECAF_NONNULL DECAF_API_VIS;
-
-/**
- * @brief Overwrite precomputed table with zeros.
- */
-void decaf_255_precomputed_destroy (
-    decaf_255_precomputed_s *pre
-) DECAF_NONNULL DECAF_API_VIS;
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* __DECAF_POINT_255_H__ */
diff --git a/crypto/ec/curve448/GENERATED/include/decaf/point_448.h b/crypto/ec/curve448/GENERATED/include/decaf/point_448.h
deleted file mode 100644
index bc1cb43a00..0000000000
--- a/crypto/ec/curve448/GENERATED/include/decaf/point_448.h
+++ /dev/null
@@ -1,765 +0,0 @@
-/**
- * @file decaf/point_448.h
- * @author Mike Hamburg
- *
- * @copyright
- *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
- *   Released under the MIT License.  See LICENSE.txt for license information.
- *
- * @brief A group of prime order p, based on Ed448-Goldilocks.
- *
- * @warning This file was automatically generated in Python.
- * Please do not edit it.
- */
-
-#ifndef __DECAF_POINT_448_H__
-#define __DECAF_POINT_448_H__ 1
-
-#include <decaf/common.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** @cond internal */
-#define DECAF_448_SCALAR_LIMBS ((446-1)/DECAF_WORD_BITS+1)
-/** @endcond */
-
-/** The number of bits in a scalar */
-#define DECAF_448_SCALAR_BITS 446
-
-/** @cond internal */
-#ifndef __DECAF_448_GF_DEFINED__
-#define __DECAF_448_GF_DEFINED__ 1
-/** @brief Galois field element internal structure */
-typedef struct gf_448_s {
-    decaf_word_t limb[512/DECAF_WORD_BITS];
-} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];
-#endif /* __DECAF_448_GF_DEFINED__ */
-/** @endcond */
-
-/** Number of bytes in a serialized point. */
-#define DECAF_448_SER_BYTES 56
-
-/** Number of bytes in an elligated point.  For now set the same as SER_BYTES
- * but could be different for other curves.
- */
-#define DECAF_448_HASH_BYTES 56
-
-/** Number of bytes in a serialized scalar. */
-#define DECAF_448_SCALAR_BYTES 56
-
-/** Number of bits in the "which" field of an elligator inverse */
-#define DECAF_448_INVERT_ELLIGATOR_WHICH_BITS 3
-
-/** The cofactor the curve would have, if we hadn't removed it */
-#define DECAF_448_REMOVED_COFACTOR 4
-
-/** X448 encoding ratio. */
-#define DECAF_X448_ENCODE_RATIO 2
-
-/** Number of bytes in an x448 public key */
-#define DECAF_X448_PUBLIC_BYTES 56
-
-/** Number of bytes in an x448 private key */
-#define DECAF_X448_PRIVATE_BYTES 56
-
-/** Twisted Edwards extended homogeneous coordinates */
-typedef struct decaf_448_point_s {
-    /** @cond internal */
-    gf_448_t x,y,z,t;
-    /** @endcond */
-} decaf_448_point_t[1];
-
-/** Precomputed table based on a point.  Can be trivial implementation. */
-struct decaf_448_precomputed_s;
-
-/** Precomputed table based on a point.  Can be trivial implementation. */
-typedef struct decaf_448_precomputed_s decaf_448_precomputed_s; 
-
-/** Size and alignment of precomputed point tables. */
-extern const size_t decaf_448_sizeof_precomputed_s DECAF_API_VIS, decaf_448_alignof_precomputed_s DECAF_API_VIS;
-
-/** Scalar is stored packed, because we don't need the speed. */
-typedef struct decaf_448_scalar_s {
-    /** @cond internal */
-    decaf_word_t limb[DECAF_448_SCALAR_LIMBS];
-    /** @endcond */
-} decaf_448_scalar_t[1];
-
-/** A scalar equal to 1. */
-extern const decaf_448_scalar_t decaf_448_scalar_one DECAF_API_VIS;
-
-/** A scalar equal to 0. */
-extern const decaf_448_scalar_t decaf_448_scalar_zero DECAF_API_VIS;
-
-/** The identity point on the curve. */
-extern const decaf_448_point_t decaf_448_point_identity DECAF_API_VIS;
-
-/** An arbitrarily chosen base point on the curve. */
-extern const decaf_448_point_t decaf_448_point_base DECAF_API_VIS;
-
-/** Precomputed table for the base point on the curve. */
-extern const struct decaf_448_precomputed_s *decaf_448_precomputed_base DECAF_API_VIS;
-
-/**
- * @brief Read a scalar from wire format or from bytes.
- *
- * @param [in] ser Serialized form of a scalar.
- * @param [out] out Deserialized form.
- *
- * @retval DECAF_SUCCESS The scalar was correctly encoded.
- * @retval DECAF_FAILURE The scalar was greater than the modulus,
- * and has been reduced modulo that modulus.
- */
-decaf_error_t decaf_448_scalar_decode (
-    decaf_448_scalar_t out,
-    const unsigned char ser[DECAF_448_SCALAR_BYTES]
-) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Read a scalar from wire format or from bytes.  Reduces mod
- * scalar prime.
- *
- * @param [in] ser Serialized form of a scalar.
- * @param [in] ser_len Length of serialized form.
- * @param [out] out Deserialized form.
- */
-void decaf_448_scalar_decode_long (
-    decaf_448_scalar_t out,
-    const unsigned char *ser,
-    size_t ser_len
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-    
-/**
- * @brief Serialize a scalar to wire format.
- *
- * @param [out] ser Serialized form of a scalar.
- * @param [in] s Deserialized scalar.
- */
-void decaf_448_scalar_encode (
-    unsigned char ser[DECAF_448_SCALAR_BYTES],
-    const decaf_448_scalar_t s
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_NOINLINE;
-        
-/**
- * @brief Add two scalars.  The scalars may use the same memory.
- * @param [in] a One scalar.
- * @param [in] b Another scalar.
- * @param [out] out a+b.
- */
-void decaf_448_scalar_add (
-    decaf_448_scalar_t out,
-    const decaf_448_scalar_t a,
-    const decaf_448_scalar_t b
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Compare two scalars.
- * @param [in] a One scalar.
- * @param [in] b Another scalar.
- * @retval DECAF_TRUE The scalars are equal.
- * @retval DECAF_FALSE The scalars are not equal.
- */    
-decaf_bool_t decaf_448_scalar_eq (
-    const decaf_448_scalar_t a,
-    const decaf_448_scalar_t b
-) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Subtract two scalars.  The scalars may use the same memory.
- * @param [in] a One scalar.
- * @param [in] b Another scalar.
- * @param [out] out a-b.
- */  
-void decaf_448_scalar_sub (
-    decaf_448_scalar_t out,
-    const decaf_448_scalar_t a,
-    const decaf_448_scalar_t b
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Multiply two scalars.  The scalars may use the same memory.
- * @param [in] a One scalar.
- * @param [in] b Another scalar.
- * @param [out] out a*b.
- */  
-void decaf_448_scalar_mul (
-    decaf_448_scalar_t out,
-    const decaf_448_scalar_t a,
-    const decaf_448_scalar_t b
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-        
-/**
-* @brief Halve a scalar.  The scalars may use the same memory.
-* @param [in] a A scalar.
-* @param [out] out a/2.
-*/
-void decaf_448_scalar_halve (
-   decaf_448_scalar_t out,
-   const decaf_448_scalar_t a
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Invert a scalar.  When passed zero, return 0.  The input and output may alias.
- * @param [in] a A scalar.
- * @param [out] out 1/a.
- * @return DECAF_SUCCESS The input is nonzero.
- */  
-decaf_error_t decaf_448_scalar_invert (
-    decaf_448_scalar_t out,
-    const decaf_448_scalar_t a
-) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Copy a scalar.  The scalars may use the same memory, in which
- * case this function does nothing.
- * @param [in] a A scalar.
- * @param [out] out Will become a copy of a.
- */
-static inline void DECAF_NONNULL decaf_448_scalar_copy (
-    decaf_448_scalar_t out,
-    const decaf_448_scalar_t a
-) {
-    *out = *a;
-}
-
-/**
- * @brief Set a scalar to an unsigned 64-bit integer.
- * @param [in] a An integer.
- * @param [out] out Will become equal to a.
- */  
-void decaf_448_scalar_set_unsigned (
-    decaf_448_scalar_t out,
-    uint64_t a
-) DECAF_API_VIS DECAF_NONNULL;
-
-/**
- * @brief Encode a point as a sequence of bytes.
- *
- * @param [out] ser The byte representation of the point.
- * @param [in] pt The point to encode.
- */
-void decaf_448_point_encode (
-    uint8_t ser[DECAF_448_SER_BYTES],
-    const decaf_448_point_t pt
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Decode a point from a sequence of bytes.
- *
- * Every point has a unique encoding, so not every
- * sequence of bytes is a valid encoding.  If an invalid
- * encoding is given, the output is undefined.
- *
- * @param [out] pt The decoded point.
- * @param [in] ser The serialized version of the point.
- * @param [in] allow_identity DECAF_TRUE if the identity is a legal input.
- * @retval DECAF_SUCCESS The decoding succeeded.
- * @retval DECAF_FAILURE The decoding didn't succeed, because
- * ser does not represent a point.
- */
-decaf_error_t decaf_448_point_decode (
-    decaf_448_point_t pt,
-    const uint8_t ser[DECAF_448_SER_BYTES],
-    decaf_bool_t allow_identity
-) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Copy a point.  The input and output may alias,
- * in which case this function does nothing.
- *
- * @param [out] a A copy of the point.
- * @param [in] b Any point.
- */
-static inline void DECAF_NONNULL decaf_448_point_copy (
-    decaf_448_point_t a,
-    const decaf_448_point_t b
-) {
-    *a=*b;
-}
-
-/**
- * @brief Test whether two points are equal.  If yes, return
- * DECAF_TRUE, else return DECAF_FALSE.
- *
- * @param [in] a A point.
- * @param [in] b Another point.
- * @retval DECAF_TRUE The points are equal.
- * @retval DECAF_FALSE The points are not equal.
- */
-decaf_bool_t decaf_448_point_eq (
-    const decaf_448_point_t a,
-    const decaf_448_point_t b
-) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Add two points to produce a third point.  The
- * input points and output point can be pointers to the same
- * memory.
- *
- * @param [out] sum The sum a+b.
- * @param [in] a An addend.
- * @param [in] b An addend.
- */
-void decaf_448_point_add (
-    decaf_448_point_t sum,
-    const decaf_448_point_t a,
-    const decaf_448_point_t b
-) DECAF_API_VIS DECAF_NONNULL;
-
-/**
- * @brief Double a point.  Equivalent to
- * decaf_448_point_add(two_a,a,a), but potentially faster.
- *
- * @param [out] two_a The sum a+a.
- * @param [in] a A point.
- */
-void decaf_448_point_double (
-    decaf_448_point_t two_a,
-    const decaf_448_point_t a
-) DECAF_API_VIS DECAF_NONNULL;
-
-/**
- * @brief Subtract two points to produce a third point.  The
- * input points and output point can be pointers to the same
- * memory.
- *
- * @param [out] diff The difference a-b.
- * @param [in] a The minuend.
- * @param [in] b The subtrahend.
- */
-void decaf_448_point_sub (
-    decaf_448_point_t diff,
-    const decaf_448_point_t a,
-    const decaf_448_point_t b
-) DECAF_API_VIS DECAF_NONNULL;
-    
-/**
- * @brief Negate a point to produce another point.  The input
- * and output points can use the same memory.
- *
- * @param [out] nega The negated input point
- * @param [in] a The input point.
- */
-void decaf_448_point_negate (
-   decaf_448_point_t nega,
-   const decaf_448_point_t a
-) DECAF_API_VIS DECAF_NONNULL;
-
-/**
- * @brief Multiply a base point by a scalar: scaled = scalar*base.
- *
- * @param [out] scaled The scaled point base*scalar
- * @param [in] base The point to be scaled.
- * @param [in] scalar The scalar to multiply by.
- */
-void decaf_448_point_scalarmul (
-    decaf_448_point_t scaled,
-    const decaf_448_point_t base,
-    const decaf_448_scalar_t scalar
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Multiply a base point by a scalar: scaled = scalar*base.
- * This function operates directly on serialized forms.
- *
- * @warning This function is experimental.  It may not be supported
- * long-term.
- *
- * @param [out] scaled The scaled point base*scalar
- * @param [in] base The point to be scaled.
- * @param [in] scalar The scalar to multiply by.
- * @param [in] allow_identity Allow the input to be the identity.
- * @param [in] short_circuit Allow a fast return if the input is illegal.
- *
- * @retval DECAF_SUCCESS The scalarmul succeeded.
- * @retval DECAF_FAILURE The scalarmul didn't succeed, because
- * base does not represent a point.
- */
-decaf_error_t decaf_448_direct_scalarmul (
-    uint8_t scaled[DECAF_448_SER_BYTES],
-    const uint8_t base[DECAF_448_SER_BYTES],
-    const decaf_448_scalar_t scalar,
-    decaf_bool_t allow_identity,
-    decaf_bool_t short_circuit
-) DECAF_API_VIS DECAF_NONNULL DECAF_WARN_UNUSED DECAF_NOINLINE;
-
-/**
- * @brief RFC 7748 Diffie-Hellman scalarmul.  This function uses a different
- * (non-Decaf) encoding.
- *
- * @param [out] scaled The scaled point base*scalar
- * @param [in] base The point to be scaled.
- * @param [in] scalar The scalar to multiply by.
- *
- * @retval DECAF_SUCCESS The scalarmul succeeded.
- * @retval DECAF_FAILURE The scalarmul didn't succeed, because the base
- * point is in a small subgroup.
- */
-decaf_error_t decaf_x448 (
-    uint8_t out[DECAF_X448_PUBLIC_BYTES],
-    const uint8_t base[DECAF_X448_PUBLIC_BYTES],
-    const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_WARN_UNUSED DECAF_NOINLINE;
-
-/**
- * @brief Multiply a point by DECAF_X448_ENCODE_RATIO,
- * then encode it like RFC 7748.
- *
- * This function is mainly used internally, but is exported in case
- * it will be useful.
- *
- * The ratio is necessary because the internal representation doesn't
- * track the cofactor information, so on output we must clear the cofactor.
- * This would multiply by the cofactor, but in fact internally libdecaf's
- * points are always even, so it multiplies by half the cofactor instead.
- *
- * As it happens, this aligns with the base point definitions; that is,
- * if you pass the Decaf/Ristretto base point to this function, the result
- * will be DECAF_X448_ENCODE_RATIO times the X448
- * base point.
- *
- * @param [out] out The scaled and encoded point.
- * @param [in] p The point to be scaled and encoded.
- */
-void decaf_448_point_mul_by_ratio_and_encode_like_x448 (
-    uint8_t out[DECAF_X448_PUBLIC_BYTES],
-    const decaf_448_point_t p
-) DECAF_API_VIS DECAF_NONNULL;
-
-/** The base point for X448 Diffie-Hellman */
-extern const uint8_t decaf_x448_base_point[DECAF_X448_PUBLIC_BYTES] DECAF_API_VIS;
-
-/**
- * @brief RFC 7748 Diffie-Hellman base point scalarmul.  This function uses
- * a different (non-Decaf) encoding.
- *
- * @deprecated Renamed to decaf_x448_derive_public_key.
- * I have no particular timeline for removing this name.
- *
- * @param [out] scaled The scaled point base*scalar
- * @param [in] scalar The scalar to multiply by.
- */
-void decaf_x448_generate_key (
-    uint8_t out[DECAF_X448_PUBLIC_BYTES],
-    const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_DEPRECATED("Renamed to decaf_x448_derive_public_key");
-    
-/**
- * @brief RFC 7748 Diffie-Hellman base point scalarmul.  This function uses
- * a different (non-Decaf) encoding.
- *
- * Does exactly the same thing as decaf_x448_generate_key,
- * but has a better name.
- *
- * @param [out] scaled The scaled point base*scalar
- * @param [in] scalar The scalar to multiply by.
- */
-void decaf_x448_derive_public_key (
-    uint8_t out[DECAF_X448_PUBLIC_BYTES],
-    const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/* FUTURE: uint8_t decaf_448_encode_like_curve448) */
-
-/**
- * @brief Precompute a table for fast scalar multiplication.
- * Some implementations do not include precomputed points; for
- * those implementations, this implementation simply copies the
- * point.
- *
- * @param [out] a A precomputed table of multiples of the point.
- * @param [in] b Any point.
- */
-void decaf_448_precompute (
-    decaf_448_precomputed_s *a,
-    const decaf_448_point_t b
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Multiply a precomputed base point by a scalar:
- * scaled = scalar*base.
- * Some implementations do not include precomputed points; for
- * those implementations, this function is the same as
- * decaf_448_point_scalarmul
- *
- * @param [out] scaled The scaled point base*scalar
- * @param [in] base The point to be scaled.
- * @param [in] scalar The scalar to multiply by.
- */
-void decaf_448_precomputed_scalarmul (
-    decaf_448_point_t scaled,
-    const decaf_448_precomputed_s *base,
-    const decaf_448_scalar_t scalar
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Multiply two base points by two scalars:
- * scaled = scalar1*base1 + scalar2*base2.
- *
- * Equivalent to two calls to decaf_448_point_scalarmul, but may be
- * faster.
- *
- * @param [out] combo The linear combination scalar1*base1 + scalar2*base2.
- * @param [in] base1 A first point to be scaled.
- * @param [in] scalar1 A first scalar to multiply by.
- * @param [in] base2 A second point to be scaled.
- * @param [in] scalar2 A second scalar to multiply by.
- */
-void decaf_448_point_double_scalarmul (
-    decaf_448_point_t combo,
-    const decaf_448_point_t base1,
-    const decaf_448_scalar_t scalar1,
-    const decaf_448_point_t base2,
-    const decaf_448_scalar_t scalar2
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-    
-/**
- * Multiply one base point by two scalars:
- *
- * a1 = scalar1 * base
- * a2 = scalar2 * base
- *
- * Equivalent to two calls to decaf_448_point_scalarmul, but may be
- * faster.
- *
- * @param [out] a1 The first multiple.  It may be the same as the input point.
- * @param [out] a2 The second multiple.  It may be the same as the input point.
- * @param [in] base1 A point to be scaled.
- * @param [in] scalar1 A first scalar to multiply by.
- * @param [in] scalar2 A second scalar to multiply by.
- */
-void decaf_448_point_dual_scalarmul (
-    decaf_448_point_t a1,
-    decaf_448_point_t a2,
-    const decaf_448_point_t base1,
-    const decaf_448_scalar_t scalar1,
-    const decaf_448_scalar_t scalar2
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Multiply two base points by two scalars:
- * scaled = scalar1*decaf_448_point_base + scalar2*base2.
- *
- * Otherwise equivalent to decaf_448_point_double_scalarmul, but may be
- * faster at the expense of being variable time.
- *
- * @param [out] combo The linear combination scalar1*base + scalar2*base2.
- * @param [in] scalar1 A first scalar to multiply by.
- * @param [in] base2 A second point to be scaled.
- * @param [in] scalar2 A second scalar to multiply by.
- *
- * @warning: This function takes variable time, and may leak the scalars
- * used.  It is designed for signature verification.
- */
-void decaf_448_base_double_scalarmul_non_secret (
-    decaf_448_point_t combo,
-    const decaf_448_scalar_t scalar1,
-    const decaf_448_point_t base2,
-    const decaf_448_scalar_t scalar2
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Constant-time decision between two points.  If pick_b
- * is zero, out = a; else out = b.
- *
- * @param [out] out The output.  It may be the same as either input.
- * @param [in] a Any point.
- * @param [in] b Any point.
- * @param [in] pick_b If nonzero, choose point b.
- */
-void decaf_448_point_cond_sel (
-    decaf_448_point_t out,
-    const decaf_448_point_t a,
-    const decaf_448_point_t b,
-    decaf_word_t pick_b
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Constant-time decision between two scalars.  If pick_b
- * is zero, out = a; else out = b.
- *
- * @param [out] out The output.  It may be the same as either input.
- * @param [in] a Any scalar.
- * @param [in] b Any scalar.
- * @param [in] pick_b If nonzero, choose scalar b.
- */
-void decaf_448_scalar_cond_sel (
-    decaf_448_scalar_t out,
-    const decaf_448_scalar_t a,
-    const decaf_448_scalar_t b,
-    decaf_word_t pick_b
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Test that a point is valid, for debugging purposes.
- *
- * @param [in] to_test The point to test.
- * @retval DECAF_TRUE The point is valid.
- * @retval DECAF_FALSE The point is invalid.
- */
-decaf_bool_t decaf_448_point_valid (
-    const decaf_448_point_t to_test
-) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Torque a point, for debugging purposes.  The output
- * will be equal to the input.
- *
- * @param [out] q The point to torque.
- * @param [in] p The point to torque.
- */
-void decaf_448_point_debugging_torque (
-    decaf_448_point_t q,
-    const decaf_448_point_t p
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Projectively scale a point, for debugging purposes.
- * The output will be equal to the input, and will be valid
- * even if the factor is zero.
- *
- * @param [out] q The point to scale.
- * @param [in] p The point to scale.
- * @param [in] factor Serialized GF factor to scale.
- */
-void decaf_448_point_debugging_pscale (
-    decaf_448_point_t q,
-    const decaf_448_point_t p,
-    const unsigned char factor[DECAF_448_SER_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Almost-Elligator-like hash to curve.
- *
- * Call this function with the output of a hash to make a hash to the curve.
- *
- * This function runs Elligator2 on the decaf_448 Jacobi quartic model.  It then
- * uses the isogeny to put the result in twisted Edwards form.  As a result,
- * it is safe (cannot produce points of order 4), and would be compatible with
- * hypothetical other implementations of Decaf using a Montgomery or untwisted
- * Edwards model.
- *
- * Unlike Elligator, this function may be up to 4:1 on [0,(p-1)/2]:
- *   A factor of 2 due to the isogeny.
- *   A factor of 2 because we quotient out the 2-torsion.
- *
- * This makes it about 8:1 overall, or 16:1 overall on curves with cofactor 8.
- *
- * Negating the input (mod q) results in the same point.  Inverting the input
- * (mod q) results in the negative point.  This is the same as Elligator.
- *
- * This function isn't quite indifferentiable from a random oracle.
- * However, it is suitable for many protocols, including SPEKE and SPAKE2 EE. 
- * Furthermore, calling it twice with independent seeds and adding the results
- * is indifferentiable from a random oracle.
- *
- * @param [in] hashed_data Output of some hash function.
- * @param [out] pt The data hashed to the curve.
- */
-void
-decaf_448_point_from_hash_nonuniform (
-    decaf_448_point_t pt,
-    const unsigned char hashed_data[DECAF_448_HASH_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Indifferentiable hash function encoding to curve.
- *
- * Equivalent to calling decaf_448_point_from_hash_nonuniform twice and adding.
- *
- * @param [in] hashed_data Output of some hash function.
- * @param [out] pt The data hashed to the curve.
- */ 
-void decaf_448_point_from_hash_uniform (
-    decaf_448_point_t pt,
-    const unsigned char hashed_data[2*DECAF_448_HASH_BYTES]
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
-
-/**
- * @brief Inverse of elligator-like hash to curve.
- *
- * This function writes to the buffer, to make it so that
- * decaf_448_point_from_hash_nonuniform(buffer) = pt if
- * possible.  Since there may be multiple preimages, the
- * "which" parameter chooses between them.  To ensure uniform
- * inverse sampling, this function succeeds or fails
- * independently for different "which" values.
- *
- * This function isn't guaranteed to find every possible
- * preimage, but it finds all except a small finite number.
- * In particular, when the number of bits in the modulus isn't
- * a multiple of 8 (i.e. for curve25519), it sets the high bits
- * independently, which enables the generated data to be uniform.
- * But it doesn't add p, so you'll never get exactly p from this
- * function.  This might change in the future, especially if
- * we ever support eg Brainpool curves, where this could cause
- * real nonuniformity.
- *
- * @param [out] recovered_hash Encoded data.
- * @param [in] pt The point to encode.
- * @param [in] which A value determining which inverse point
- * to return.
- *
- * @retval DECAF_SUCCESS The inverse succeeded.
- * @retval DECAF_FAILURE The inverse failed.
- */
-decaf_error_t
-decaf_448_invert_elligator_nonuniform (
-    unsigned char recovered_hash[DECAF_448_HASH_BYTES],
-    const decaf_448_point_t pt,
-    uint32_t which
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_WARN_UNUSED;
-
-/**
- * @brief Inverse of elligator-like hash to curve.
- *
- * This function writes to the buffer, to make it so that
- * decaf_448_point_from_hash_uniform(buffer) = pt if
- * possible.  Since there may be multiple preimages, the
- * "which" parameter chooses between them.  To ensure uniform
- * inverse sampling, this function succeeds or fails
- * independently for different "which" values.
- *
- * @param [out] recovered_hash Encoded data.
- * @param [in] pt The point to encode.
- * @param [in] which A value determining which inverse point
- * to return.
- *
- * @retval DECAF_SUCCESS The inverse succeeded.
- * @retval DECAF_FAILURE The inverse failed.
- */
-decaf_error_t
-decaf_448_invert_elligator_uniform (
-    unsigned char recovered_hash[2*DECAF_448_HASH_BYTES],
-    const decaf_448_point_t pt,
-    uint32_t which
-) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_WARN_UNUSED;
-
-/**
- * @brief Overwrite scalar with zeros.
- */
-void decaf_448_scalar_destroy (
-    decaf_448_scalar_t scalar
-) DECAF_NONNULL DECAF_API_VIS;
-
-/**
- * @brief Overwrite point with zeros.
- */
-void decaf_448_point_destroy (
-    decaf_448_point_t point
-) DECAF_NONNULL DECAF_API_VIS;
-
-/**
- * @brief Overwrite precomputed table with zeros.
- */
-void decaf_448_precomputed_destroy (
-    decaf_448_precomputed_s *pre
-) DECAF_NONNULL DECAF_API_VIS;
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* __DECAF_POINT_448_H__ */
diff --git a/crypto/ec/curve448/GENERATED/include/decaf/sha512.h b/crypto/ec/curve448/GENERATED/include/decaf/sha512.h
deleted file mode 100644
index 3c8ec70e93..0000000000
--- a/crypto/ec/curve448/GENERATED/include/decaf/sha512.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * @file decaf/shake.h
- * @copyright Public domain.
- * @author Mike Hamburg
- * @brief SHA2-512
- */
-
-#ifndef __DECAF_SHA512_H__
-#define __DECAF_SHA512_H__
-
-#include <stdint.h>
-#include <sys/types.h>
-#include <stdlib.h> /* for NULL */
-
-#include <decaf/common.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-    
-
-typedef struct decaf_sha512_ctx_s {
-    uint64_t state[8];
-    uint8_t block[128];
-    uint64_t bytes_processed;
-} decaf_sha512_ctx_s, decaf_sha512_ctx_t[1];
-
-void decaf_sha512_init(decaf_sha512_ctx_t ctx) DECAF_NONNULL DECAF_API_VIS;
-void decaf_sha512_update(decaf_sha512_ctx_t ctx, const uint8_t *message, size_t length) DECAF_NONNULL DECAF_API_VIS;
-void decaf_sha512_final(decaf_sha512_ctx_t ctx, uint8_t *out, size_t length) DECAF_NONNULL DECAF_API_VIS;
-
-static inline void decaf_sha512_destroy(decaf_sha512_ctx_t ctx) {
-    decaf_bzero(ctx,sizeof(*ctx));
-}
-
-static inline void decaf_sha512_hash(
-    uint8_t *output,
-    size_t output_len,
-    const uint8_t *message,
-    size_t message_len
-) {
-    decaf_sha512_ctx_t ctx;
-    decaf_sha512_init(ctx);
-    decaf_sha512_update(ctx,message,message_len);
-    decaf_sha512_final(ctx,output,output_len);
-    decaf_sha512_destroy(ctx);
-}
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-    
-#endif /* __DECAF_SHA512_H__ */
diff --git a/crypto/ec/curve448/GENERATED/include/decaf/shake.h b/crypto/ec/curve448/GENERATED/include/decaf/shake.h
deleted file mode 100644
index ae125b923a..0000000000
--- a/crypto/ec/curve448/GENERATED/include/decaf/shake.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/**
- * @file decaf/shake.h
- * @copyright
- *   Based on CC0 code by David Leon Gil, 2015 \n
- *   Copyright (c) 2015 Cryptography Research, Inc.  \n
- *   Released under the MIT License.  See LICENSE.txt for license information.
- * @author Mike Hamburg
- * @brief SHA-3-n and DECAF_SHAKE-n instances.
- */
-
-#ifndef __DECAF_SHAKE_H__
-#define __DECAF_SHAKE_H__
-
-#include <stdint.h>
-#include <sys/types.h>
-#include <stdlib.h> /* for NULL */
-
-#include <decaf/common.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef INTERNAL_SPONGE_STRUCT
-    /** Sponge container object for the various primitives. */
-    typedef struct decaf_keccak_sponge_s {
-        /** @cond internal */
-        uint64_t opaque[26];
-        /** @endcond */
-    } decaf_keccak_sponge_s;
-
-    /** Convenience GMP-style one-element array version */
-    typedef struct decaf_keccak_sponge_s decaf_keccak_sponge_t[1];
-
-    /** Parameters for sponge construction, distinguishing DECAF_SHA3 and
-     * DECAF_SHAKE instances.
-     */
-    struct decaf_kparams_s;
-#endif
-
-/**
- * @brief Initialize a sponge context object.
- * @param [out] sponge The object to initialize.
- * @param [in] params The sponge's parameter description.
- */
-void decaf_sha3_init (
-    decaf_keccak_sponge_t sponge,
-    const struct decaf_kparams_s *params
-) DECAF_API_VIS;
-
-/**
- * @brief Absorb data into a DECAF_SHA3 or DECAF_SHAKE hash context.
- * @param [inout] sponge The context.
- * @param [in] in The input data.
- * @param [in] len The input data's length in bytes.
- * @return DECAF_FAILURE if the sponge has already been used for output.
- * @return DECAF_SUCCESS otherwise.
- */
-decaf_error_t decaf_sha3_update (
-    struct decaf_keccak_sponge_s * __restrict__ sponge,
-    const uint8_t *in,
-    size_t len
-) DECAF_API_VIS;
-
-/**
- * @brief Squeeze output data from a DECAF_SHA3 or DECAF_SHAKE hash context.
- * This does not destroy or re-initialize the hash context, and
- * decaf_sha3 output can be called more times.
- *
- * @param [inout] sponge The context.
- * @param [out] out The output data.
- * @param [in] len The requested output data length in bytes.
- * @return DECAF_FAILURE if the sponge has exhausted its output capacity.
- * @return DECAF_SUCCESS otherwise.
- */  
-decaf_error_t decaf_sha3_output (
-    decaf_keccak_sponge_t sponge,
-    uint8_t * __restrict__ out,
-    size_t len
-) DECAF_API_VIS;
-
-/**
- * @brief Squeeze output data from a DECAF_SHA3 or DECAF_SHAKE hash context.
- * This re-initializes the context to its starting parameters.
- *
- * @param [inout] sponge The context.
- * @param [out] out The output data.
- * @param [in] len The requested output data length in bytes.
- */  
-decaf_error_t decaf_sha3_final (
-    decaf_keccak_sponge_t sponge,
-    uint8_t * __restrict__ out,
-    size_t len
-) DECAF_API_VIS;
-
-/**
- * @brief Reset the sponge to the empty string.
- *
- * @param [inout] sponge The context.
- */  
-void decaf_sha3_reset (
-    decaf_keccak_sponge_t sponge
-) DECAF_API_VIS;
-
-/**
- * @brief Return the default output length of the sponge construction,
- * for the purpose of C++ default operators.
- *
- * Returns n/8 for DECAF_SHA3-n and 2n/8 for DECAF_SHAKE-n.
- */  
-size_t decaf_sha3_default_output_bytes (
-    const decaf_keccak_sponge_t sponge /**< [inout] The context. */
-) DECAF_API_VIS;
-
-/**
- * @brief Return the default output length of the sponge construction,
- * for the purpose of C++ default operators.
- *
- * Returns n/8 for DECAF_SHA3-n and SIZE_MAX for DECAF_SHAKE-n.
- */  
-size_t decaf_sha3_max_output_bytes (
-    const decaf_keccak_sponge_t sponge /**< [inout] The context. */
-) DECAF_API_VIS;
-
-/**
- * @brief Destroy a DECAF_SHA3 or DECAF_SHAKE sponge context by overwriting it with 0.
- * @param [out] sponge The context.
- */  
-void decaf_sha3_destroy (
-    decaf_keccak_sponge_t sponge
-) DECAF_API_VIS;
-
-/**
- * @brief Hash (in) to (out)
- * @param [in] in The input data.
- * @param [in] inlen The length of the input data.
- * @param [out] out A buffer for the output data.
- * @param [in] outlen The length of the output data.
- * @param [in] params The parameters of the sponge hash.
- */  
-decaf_error_t decaf_sha3_hash (
-    uint8_t *out,
-    size_t outlen,
-    const uint8_t *in,
-    size_t inlen,
-    const struct decaf_kparams_s *params
-) DECAF_API_VIS;
-
-/* FUTURE: expand/doxygenate individual DECAF_SHAKE/DECAF_SHA3 instances? */
-
-/** @cond internal */
-#define DECAF_DEC_SHAKE(n) \
-    extern const struct decaf_kparams_s DECAF_SHAKE##n##_params_s DECAF_API_VIS; \
-    typedef struct decaf_shake##n##_ctx_s { decaf_keccak_sponge_t s; } decaf_shake##n##_ctx_t[1]; \
-    static inline void DECAF_NONNULL decaf_shake##n##_init(decaf_shake##n##_ctx_t sponge) { \
-        decaf_sha3_init(sponge->s, &DECAF_SHAKE##n##_params_s); \
-    } \
-    static inline void DECAF_NONNULL decaf_shake##n##_gen_init(decaf_keccak_sponge_t sponge) { \
-        decaf_sha3_init(sponge, &DECAF_SHAKE##n##_params_s); \
-    } \
-    static inline decaf_error_t DECAF_NONNULL decaf_shake##n##_update(decaf_shake##n##_ctx_t sponge, const uint8_t *in, size_t inlen ) { \
-        return decaf_sha3_update(sponge->s, in, inlen); \
-    } \
-    static inline void  DECAF_NONNULL decaf_shake##n##_final(decaf_shake##n##_ctx_t sponge, uint8_t *out, size_t outlen ) { \
-        decaf_sha3_output(sponge->s, out, outlen); \
-        decaf_sha3_init(sponge->s, &DECAF_SHAKE##n##_params_s); \
-    } \
-    static inline void  DECAF_NONNULL decaf_shake##n##_output(decaf_shake##n##_ctx_t sponge, uint8_t *out, size_t outlen ) { \
-        decaf_sha3_output(sponge->s, out, outlen); \
-    } \
-    static inline void  DECAF_NONNULL decaf_shake##n##_hash(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen) { \
-        decaf_sha3_hash(out,outlen,in,inlen,&DECAF_SHAKE##n##_params_s); \
-    } \
-    static inline void  DECAF_NONNULL decaf_shake##n##_destroy( decaf_shake##n##_ctx_t sponge ) { \
-        decaf_sha3_destroy(sponge->s); \
-    }
-
-#define DECAF_DEC_SHA3(n) \
-    extern const struct decaf_kparams_s DECAF_SHA3_##n##_params_s DECAF_API_VIS; \
-    typedef struct decaf_sha3_##n##_ctx_s { decaf_keccak_sponge_t s; } decaf_sha3_##n##_ctx_t[1]; \
-    static inline void DECAF_NONNULL decaf_sha3_##n##_init(decaf_sha3_##n##_ctx_t sponge) { \
-        decaf_sha3_init(sponge->s, &DECAF_SHA3_##n##_params_s); \
-    } \
-    static inline void DECAF_NONNULL decaf_sha3_##n##_gen_init(decaf_keccak_sponge_t sponge) { \
-        decaf_sha3_init(sponge, &DECAF_SHA3_##n##_params_s); \
-    } \
-    static inline decaf_error_t DECAF_NONNULL decaf_sha3_##n##_update(decaf_sha3_##n##_ctx_t sponge, const uint8_t *in, size_t inlen ) { \
-        return decaf_sha3_update(sponge->s, in, inlen); \
-    } \
-    static inline decaf_error_t DECAF_NONNULL decaf_sha3_##n##_final(decaf_sha3_##n##_ctx_t sponge, uint8_t *out, size_t outlen ) { \
-        decaf_error_t ret = decaf_sha3_output(sponge->s, out, outlen); \
-        decaf_sha3_init(sponge->s, &DECAF_SHA3_##n##_params_s); \
-        return ret; \
-    } \
-    static inline decaf_error_t DECAF_NONNULL decaf_sha3_##n##_output(decaf_sha3_##n##_ctx_t sponge, uint8_t *out, size_t outlen ) { \
-        return decaf_sha3_output(sponge->s, out, outlen); \
-    } \
-    static inline decaf_error_t DECAF_NONNULL decaf_sha3_##n##_hash(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen) { \
-        return decaf_sha3_hash(out,outlen,in,inlen,&DECAF_SHA3_##n##_params_s); \
-    } \
-    static inline void DECAF_NONNULL decaf_sha3_##n##_destroy(decaf_sha3_##n##_ctx_t sponge) { \
-        decaf_sha3_destroy(sponge->s); \
-    }
-/** @endcond */
-
-DECAF_DEC_SHAKE(128)
-DECAF_DEC_SHAKE(256)
-DECAF_DEC_SHA3(224)
-DECAF_DEC_SHA3(256)
-DECAF_DEC_SHA3(384)
-DECAF_DEC_SHA3(512)
-#undef DECAF_DEC_SHAKE
-#undef DECAF_DEC_SHA3
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-    
-#endif /* __DECAF_SHAKE_H__ */
diff --git a/crypto/ec/curve448/arch_32/arch_intrinsics.h b/crypto/ec/curve448/arch_32/arch_intrinsics.h
new file mode 100644
index 0000000000..f3908a2589
--- /dev/null
+++ b/crypto/ec/curve448/arch_32/arch_intrinsics.h
@@ -0,0 +1,22 @@
+/* Copyright (c) 2016 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#ifndef __ARCH_ARCH_32_ARCH_INTRINSICS_H__
+#define __ARCH_ARCH_32_ARCH_INTRINSICS_H__
+
+#define ARCH_WORD_BITS 32
+
+static __inline__ __attribute((always_inline,unused))
+uint32_t word_is_zero(uint32_t a) {
+    /* let's hope the compiler isn't clever enough to optimize this. */
+    return (((uint64_t)a)-1)>>32;
+}
+
+static __inline__ __attribute((always_inline,unused))
+uint64_t widemul(uint32_t a, uint32_t b) {
+    return ((uint64_t)a) * b;
+}
+
+#endif /* __ARCH_ARM_32_ARCH_INTRINSICS_H__ */
+
diff --git a/crypto/ec/curve448/arch_32/f_impl.c b/crypto/ec/curve448/arch_32/f_impl.c
new file mode 100644
index 0000000000..0770bd9962
--- /dev/null
+++ b/crypto/ec/curve448/arch_32/f_impl.c
@@ -0,0 +1,101 @@
+/* Copyright (c) 2014 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#include "f_field.h"
+
+#if (defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) && !I_HATE_UNROLLED_LOOPS) \
+     || defined(DECAF_FORCE_UNROLL)
+#define REPEAT8(_x) _x _x _x _x _x _x _x _x
+#define FOR_LIMB(_i,_start,_end,_x) do { _i=_start; REPEAT8( if (_i<_end) { _x; } _i++;) } while (0)
+#else
+#define FOR_LIMB(_i,_start,_end,_x) do { for (_i=_start; _i<_end; _i++) _x; } while (0)
+#endif
+
+void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { 
+    const uint32_t *a = as->limb, *b = bs->limb;
+    uint32_t *c = cs->limb;
+
+    uint64_t accum0 = 0, accum1 = 0, accum2 = 0;
+    uint32_t mask = (1<<28) - 1;  
+
+    uint32_t aa[8], bb[8];
+    
+    int i,j;
+    for (i=0; i<8; i++) {
+        aa[i] = a[i] + a[i+8];
+        bb[i] = b[i] + b[i+8];
+    }
+    
+    FOR_LIMB(j,0,8,{
+        accum2 = 0;
+    
+        FOR_LIMB (i,0,j+1,{
+            accum2 += widemul(a[j-i],b[i]);
+            accum1 += widemul(aa[j-i],bb[i]);
+            accum0 += widemul(a[8+j-i], b[8+i]);
+        });
+        
+        accum1 -= accum2;
+        accum0 += accum2;
+        accum2 = 0;
+    
+        FOR_LIMB (i,j+1,8,{
+            accum0 -= widemul(a[8+j-i], b[i]);
+            accum2 += widemul(aa[8+j-i], bb[i]);
+            accum1 += widemul(a[16+j-i], b[8+i]);
+        });
+
+        accum1 += accum2;
+        accum0 += accum2;
+
+        c[j] = ((uint32_t)(accum0)) & mask;
+        c[j+8] = ((uint32_t)(accum1)) & mask;
+
+        accum0 >>= 28;
+        accum1 >>= 28;
+    });
+    
+    accum0 += accum1;
+    accum0 += c[8];
+    accum1 += c[0];
+    c[8] = ((uint32_t)(accum0)) & mask;
+    c[0] = ((uint32_t)(accum1)) & mask;
+    
+    accum0 >>= 28;
+    accum1 >>= 28;
+    c[9] += ((uint32_t)(accum0));
+    c[1] += ((uint32_t)(accum1));
+}
+
+void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
+    assert(b<1<<28);
+    
+    const uint32_t *a = as->limb;
+    uint32_t *c = cs->limb;
+
+    uint64_t accum0 = 0, accum8 = 0;
+    uint32_t mask = (1ull<<28)-1;  
+
+    int i;
+    FOR_LIMB(i,0,8,{
+        accum0 += widemul(b, a[i]);
+        accum8 += widemul(b, a[i+8]);
+
+        c[i] = accum0 & mask; accum0 >>= 28;
+        c[i+8] = accum8 & mask; accum8 >>= 28;
+    });
+
+    accum0 += accum8 + c[8];
+    c[8] = accum0 & mask;
+    c[9] += accum0 >> 28;
+
+    accum8 += c[0];
+    c[0] = accum8 & mask;
+    c[1] += accum8 >> 28;
+}
+
+void gf_sqr (gf_s *__restrict__ cs, const gf as) {
+    gf_mul(cs,as,as); /* Performs better with a dedicated square */
+}
+
diff --git a/crypto/ec/curve448/arch_32/f_impl.h b/crypto/ec/curve448/arch_32/f_impl.h
new file mode 100644
index 0000000000..c3687888c0
--- /dev/null
+++ b/crypto/ec/curve448/arch_32/f_impl.h
@@ -0,0 +1,40 @@
+/* Copyright (c) 2014-2016 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#define GF_HEADROOM 2
+#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
+#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
+    {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}}
+    
+#define LIMB_PLACE_VALUE(i) 28
+
+void gf_add_RAW (gf out, const gf a, const gf b) {
+    for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
+        out->limb[i] = a->limb[i] + b->limb[i];
+    }
+}
+
+void gf_sub_RAW (gf out, const gf a, const gf b) {
+    for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
+        out->limb[i] = a->limb[i] - b->limb[i];
+    }
+}
+
+void gf_bias (gf a, int amt) {    
+    uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
+    for (unsigned int i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) {
+        a->limb[i] += (i==sizeof(*a)/sizeof(a->limb[0])/2) ? co2 : co1;
+    }
+}
+
+void gf_weak_reduce (gf a) {
+    uint32_t mask = (1ull<<28) - 1;
+    uint32_t tmp = a->limb[15] >> 28;
+    a->limb[8] += tmp;
+    for (unsigned int i=15; i>0; i--) {
+        a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>28);
+    }
+    a->limb[0] = (a->limb[0] & mask) + tmp;
+}
+
diff --git a/crypto/ec/curve448/arch_arm_32/arch_intrinsics.h b/crypto/ec/curve448/arch_arm_32/arch_intrinsics.h
new file mode 100644
index 0000000000..7451c6fe7d
--- /dev/null
+++ b/crypto/ec/curve448/arch_arm_32/arch_intrinsics.h
@@ -0,0 +1,24 @@
+/* Copyright (c) 2016 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#ifndef __ARCH_ARM_32_ARCH_INTRINSICS_H__
+#define __ARCH_ARM_32_ARCH_INTRINSICS_H__
+
+#define ARCH_WORD_BITS 32
+
+static __inline__ __attribute((always_inline,unused))
+uint32_t word_is_zero(uint32_t a) {
+    uint32_t ret;
+    asm("subs %0, %1, #1;\n\tsbc %0, %0, %0" : "=r"(ret) : "r"(a) : "cc");
+    return ret;
+}
+
+static __inline__ __attribute((always_inline,unused))
+uint64_t widemul(uint32_t a, uint32_t b) {
+    /* Could be UMULL, but it's hard to express to CC that the registers must be different */
+    return ((uint64_t)a) * b; 
+}
+
+#endif /* __ARCH_ARM_32_ARCH_INTRINSICS_H__ */
+
diff --git a/crypto/ec/curve448/arch_arm_32/f_impl.c b/crypto/ec/curve448/arch_arm_32/f_impl.c
new file mode 100644
index 0000000000..0454bd6f29
--- /dev/null
+++ b/crypto/ec/curve448/arch_arm_32/f_impl.c
@@ -0,0 +1,819 @@
+/* Copyright (c) 2014 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#include "f_field.h"
+
+static inline void __attribute__((gnu_inline,always_inline))
+smlal (
+    uint64_t *acc,
+    const uint32_t a,
+    const uint32_t b
+) {
+
+#ifdef  __ARMEL__
+    uint32_t lo = *acc, hi = (*acc)>>32;
+    
+    __asm__ __volatile__ ("smlal %[lo], %[hi], %[a], %[b]"
+        : [lo]"+&r"(lo), [hi]"+&r"(hi)
+        : [a]"r"(a), [b]"r"(b));
+    
+    *acc = lo + (((uint64_t)hi)<<32);
+#else
+    *acc += (int64_t)(int32_t)a * (int64_t)(int32_t)b;
+#endif
+}
+
+static inline void __attribute__((gnu_inline,always_inline))
+smlal2 (
+    uint64_t *acc,
+    const uint32_t a,
+    const uint32_t b
+) {
+#ifdef __ARMEL__
+    uint32_t lo = *acc, hi = (*acc)>>32;
+    
+    __asm__ __volatile__ ("smlal %[lo], %[hi], %[a], %[b]"
+        : [lo]"+&r"(lo), [hi]"+&r"(hi)
+        : [a]"r"(a), [b]"r"(2*b));
+    
+    *acc = lo + (((uint64_t)hi)<<32);
+#else
+    *acc += (int64_t)(int32_t)a * (int64_t)(int32_t)(b * 2);
+#endif
+}
+
+static inline void __attribute__((gnu_inline,always_inline))
+smull (
+    uint64_t *acc,
+    const uint32_t a,
+    const uint32_t b
+) {
+#ifdef __ARMEL__
+    uint32_t lo, hi;
+    
+    __asm__ __volatile__ ("smull %[lo], %[hi], %[a], %[b]"
+        : [lo]"=&r"(lo), [hi]"=&r"(hi)
+        : [a]"r"(a), [b]"r"(b));
+    
+    *acc = lo + (((uint64_t)hi)<<32);
+#else
+    *acc = (int64_t)(int32_t)a * (int64_t)(int32_t)b;
+#endif
+}
+
+static inline void __attribute__((gnu_inline,always_inline))
+smull2 (
+    uint64_t *acc,
+    const uint32_t a,
+    const uint32_t b
+) {
+#ifdef __ARMEL__
+    uint32_t lo, hi;
+    
+    __asm__ /*__volatile__*/ ("smull %[lo], %[hi], %[a], %[b]"
+        : [lo]"=&r"(lo), [hi]"=&r"(hi)
+        : [a]"r"(a), [b]"r"(2*b));
+    
+    *acc = lo + (((uint64_t)hi)<<32);
+#else
+    *acc = (int64_t)(int32_t)a * (int64_t)(int32_t)(b * 2);
+#endif
+}
+
+void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
+    
+    const uint32_t *a = as->limb, *b = bs->limb;
+    uint32_t *c = cs->limb;
+
+    uint64_t accum0 = 0, accum1 = 0, accum2, accum3, accumC0, accumC1;
+    uint32_t mask = (1<<28) - 1;  
+
+    uint32_t aa[8], bm[8];
+
+    int i;
+    for (i=0; i<8; i++) {
+        aa[i] = a[i] + a[i+8];
+        bm[i] = b[i] - b[i+8];
+    }
+
+    uint32_t ax,bx;
+    {
+        /* t^3 terms */
+        smull(&accum1, ax = aa[1], bx = b[15]);
+        smull(&accum3, ax = aa[2], bx);
+        smlal(&accum1, ax, bx = b[14]);
+        smlal(&accum3, ax = aa[3], bx);
+        smlal(&accum1, ax, bx = b[13]);
+        smlal(&accum3, ax = aa[4], bx);
+        smlal(&accum1, ax, bx = b[12]);
+        smlal(&accum3, ax = aa[5], bx);
+        smlal(&accum1, ax, bx = b[11]);
+        smlal(&accum3, ax = aa[6], bx);
+        smlal(&accum1, ax, bx = b[10]);
+        smlal(&accum3, ax = aa[7], bx);
+        smlal(&accum1, ax, bx = b[9]);
+        
+        accum0 = accum1;
+        accum2 = accum3;
+        
+        /* t^2 terms */
+        smlal(&accum2, ax = aa[0], bx);
+        smlal(&accum0, ax, bx = b[8]);
+        smlal(&accum2, ax = aa[1], bx);
+        
+        smlal(&accum0, ax = a[9], bx = b[7]);
+        smlal(&accum2, ax = a[10], bx);
+        smlal(&accum0, ax, bx = b[6]);
+        smlal(&accum2, ax = a[11], bx);
+        smlal(&accum0, ax, bx = b[5]);
+        smlal(&accum2, ax = a[12], bx);
+        smlal(&accum0, ax, bx = b[4]);
+        smlal(&accum2, ax = a[13], bx);
+        smlal(&accum0, ax, bx = b[3]);
+        smlal(&accum2, ax = a[14], bx);
+        smlal(&accum0, ax, bx = b[2]);
+        smlal(&accum2, ax = a[15], bx);
+        smlal(&accum0, ax, bx = b[1]);
+        
+        /* t terms */
+        accum1 += accum0;
+        accum3 += accum2;
+        smlal(&accum3, ax = a[8], bx);
+        smlal(&accum1, ax, bx = b[0]);
+        smlal(&accum3, ax = a[9], bx);
+        
+        smlal(&accum1, ax = a[1], bx = bm[7]);
+        smlal(&accum3, ax = a[2], bx);
+        smlal(&accum1, ax, bx = bm[6]);
+        smlal(&accum3, ax = a[3], bx);
+        smlal(&accum1, ax, bx = bm[5]);
+        smlal(&accum3, ax = a[4], bx);
+        smlal(&accum1, ax, bx = bm[4]);
+        smlal(&accum3, ax = a[5], bx);
+        smlal(&accum1, ax, bx = bm[3]);
+        smlal(&accum3, ax = a[6], bx);
+        smlal(&accum1, ax, bx = bm[2]);
+        smlal(&accum3, ax = a[7], bx);
+        smlal(&accum1, ax, bx = bm[1]);
+        
+        /* 1 terms */
+        smlal(&accum2, ax = a[0], bx);
+        smlal(&accum0, ax, bx = bm[0]);
+        smlal(&accum2, ax = a[1], bx);
+        
+        accum2 += accum0 >> 28;
+        accum3 += accum1 >> 28;
+        
+        c[0] = ((uint32_t)(accum0)) & mask;
+        c[1] = ((uint32_t)(accum2)) & mask;
+        c[8] = ((uint32_t)(accum1)) & mask;
+        c[9] = ((uint32_t)(accum3)) & mask;
+        
+        accumC0 = accum2 >> 28;
+        accumC1 = accum3 >> 28;
+    }
+    {
+        /* t^3 terms */
+        smull(&accum1, ax = aa[3], bx = b[15]);
+        smull(&accum3, ax = aa[4], bx);
+        smlal(&accum1, ax, bx = b[14]);
+        smlal(&accum3, ax = aa[5], bx);
+        smlal(&accum1, ax, bx = b[13]);
+        smlal(&accum3, ax = aa[6], bx);
+        smlal(&accum1, ax, bx = b[12]);
+        smlal(&accum3, ax = aa[7], bx);
+        smlal(&accum1, ax, bx = b[11]);
+        
+        accum0 = accum1;
+        accum2 = accum3;
+        
+        /* t^2 terms */
+        smlal(&accum2, ax = aa[0], bx);
+        smlal(&accum0, ax, bx = b[10]);
+        smlal(&accum2, ax = aa[1], bx);
+        smlal(&accum0, ax, bx = b[9]);
+        smlal(&accum2, ax = aa[2], bx);
+        smlal(&accum0, ax, bx = b[8]);
+        smlal(&accum2, ax = aa[3], bx);
+        
+        smlal(&accum0, ax = a[11], bx = b[7]);
+        smlal(&accum2, ax = a[12], bx);
+        smlal(&accum0, ax, bx = b[6]);
+        smlal(&accum2, ax = a[13], bx);
+        smlal(&accum0, ax, bx = b[5]);
+        smlal(&accum2, ax = a[14], bx);
+        smlal(&accum0, ax, bx = b[4]);
+        smlal(&accum2, ax = a[15], bx);
+        smlal(&accum0, ax, bx = b[3]);
+        
+        /* t terms */
+        accum1 += accum0;
+        accum3 += accum2;
+        smlal(&accum3, ax = a[8], bx);
+        smlal(&accum1, ax, bx = b[2]);
+        smlal(&accum3, ax = a[9], bx);
+        smlal(&accum1, ax, bx = b[1]);
+        smlal(&accum3, ax = a[10], bx);
+        smlal(&accum1, ax, bx = b[0]);
+        smlal(&accum3, ax = a[11], bx);
+        
+        smlal(&accum1, ax = a[3], bx = bm[7]);
+        smlal(&accum3, ax = a[4], bx);
+        smlal(&accum1, ax, bx = bm[6]);
+        smlal(&accum3, ax = a[5], bx);
+        smlal(&accum1, ax, bx = bm[5]);
+        smlal(&accum3, ax = a[6], bx);
+        smlal(&accum1, ax, bx = bm[4]);
+        smlal(&accum3, ax = a[7], bx);
+        smlal(&accum1, ax, bx = bm[3]);
+        
+        /* 1 terms */
+        smlal(&accum2, ax = a[0], bx);
+        smlal(&accum0, ax, bx = bm[2]);
+        smlal(&accum2, ax = a[1], bx);
+        smlal(&accum0, ax, bx = bm[1]);
+        smlal(&accum2, ax = a[2], bx);
+        smlal(&accum0, ax, bx = bm[0]);
+        smlal(&accum2, ax = a[3], bx);
+        
+        accum0 += accumC0;
+        accum1 += accumC1;
+        accum2 += accum0 >> 28;
+        accum3 += accum1 >> 28;
+        
+        c[2] = ((uint32_t)(accum0)) & mask;
+        c[3] = ((uint32_t)(accum2)) & mask;
+        c[10] = ((uint32_t)(accum1)) & mask;
+        c[11] = ((uint32_t)(accum3)) & mask;
+        
+        accumC0 = accum2 >> 28;
+        accumC1 = accum3 >> 28;
+    }
+    {
+        
+        /* t^3 terms */
+        smull(&accum1, ax = aa[5], bx = b[15]);
+        smull(&accum3, ax = aa[6], bx);
+        smlal(&accum1, ax, bx = b[14]);
+        smlal(&accum3, ax = aa[7], bx);
+        smlal(&accum1, ax, bx = b[13]);
+        
+        accum0 = accum1;
+        accum2 = accum3;
+        
+        /* t^2 terms */
+        
+        smlal(&accum2, ax = aa[0], bx);
+        smlal(&accum0, ax, bx = b[12]);
+        smlal(&accum2, ax = aa[1], bx);
+        smlal(&accum0, ax, bx = b[11]);
+        smlal(&accum2, ax = aa[2], bx);
+        smlal(&accum0, ax, bx = b[10]);
+        smlal(&accum2, ax = aa[3], bx);
+        smlal(&accum0, ax, bx = b[9]);
+        smlal(&accum2, ax = aa[4], bx);
+        smlal(&accum0, ax, bx = b[8]);
+        smlal(&accum2, ax = aa[5], bx);
+        
+        
+        smlal(&accum0, ax = a[13], bx = b[7]);
+        smlal(&accum2, ax = a[14], bx);
+        smlal(&accum0, ax, bx = b[6]);
+        smlal(&accum2, ax = a[15], bx);
+        smlal(&accum0, ax, bx = b[5]);
+        
+        /* t terms */
+        accum1 += accum0;
+        accum3 += accum2;
+        
+        smlal(&accum3, ax = a[8], bx);
+        smlal(&accum1, ax, bx = b[4]);
+        smlal(&accum3, ax = a[9], bx);
+        smlal(&accum1, ax, bx = b[3]);
+        smlal(&accum3, ax = a[10], bx);
+        smlal(&accum1, ax, bx = b[2]);
+        smlal(&accum3, ax = a[11], bx);
+        smlal(&accum1, ax, bx = b[1]);
+        smlal(&accum3, ax = a[12], bx);
+        smlal(&accum1, ax, bx = b[0]);
+        smlal(&accum3, ax = a[13], bx);
+        
+        
+        smlal(&accum1, ax = a[5], bx = bm[7]);
+        smlal(&accum3, ax = a[6], bx);
+        smlal(&accum1, ax, bx = bm[6]);
+        smlal(&accum3, ax = a[7], bx);
+        smlal(&accum1, ax, bx = bm[5]);
+        
+        /* 1 terms */
+        
+        smlal(&accum2, ax = a[0], bx);
+        smlal(&accum0, ax, bx = bm[4]);
+        smlal(&accum2, ax = a[1], bx);
+        smlal(&accum0, ax, bx = bm[3]);
+        smlal(&accum2, ax = a[2], bx);
+        smlal(&accum0, ax, bx = bm[2]);
+        smlal(&accum2, ax = a[3], bx);
+        smlal(&accum0, ax, bx = bm[1]);
+        smlal(&accum2, ax = a[4], bx);
+        smlal(&accum0, ax, bx = bm[0]);
+        smlal(&accum2, ax = a[5], bx);
+        
+        accum0 += accumC0;
+        accum1 += accumC1;
+        accum2 += accum0 >> 28;
+        accum3 += accum1 >> 28;
+        
+        c[4] = ((uint32_t)(accum0)) & mask;
+        c[5] = ((uint32_t)(accum2)) & mask;
+        c[12] = ((uint32_t)(accum1)) & mask;
+        c[13] = ((uint32_t)(accum3)) & mask;
+        
+        accumC0 = accum2 >> 28;
+        accumC1 = accum3 >> 28;
+    }
+    {
+        
+        /* t^3 terms */
+        smull(&accum1, ax = aa[7], bx = b[15]);
+        accum0 = accum1;
+        
+        /* t^2 terms */
+        
+        smull(&accum2, ax = aa[0], bx);
+        smlal(&accum0, ax, bx = b[14]);
+        smlal(&accum2, ax = aa[1], bx);
+        smlal(&accum0, ax, bx = b[13]);
+        smlal(&accum2, ax = aa[2], bx);
+        smlal(&accum0, ax, bx = b[12]);
+        smlal(&accum2, ax = aa[3], bx);
+        smlal(&accum0, ax, bx = b[11]);
+        smlal(&accum2, ax = aa[4], bx);
+        smlal(&accum0, ax, bx = b[10]);
+        smlal(&accum2, ax = aa[5], bx);
+        smlal(&accum0, ax, bx = b[9]);
+        smlal(&accum2, ax = aa[6], bx);
+        smlal(&accum0, ax, bx = b[8]);
+        smlal(&accum2, ax = aa[7], bx);
+        
+        
+        smlal(&accum0, ax = a[15], bx = b[7]);
+        
+        /* t terms */
+        accum1 += accum0;
+        accum3 = accum2;
+        
+        smlal(&accum3, ax = a[8], bx);
+        smlal(&accum1, ax, bx = b[6]);
+        smlal(&accum3, ax = a[9], bx);
+        smlal(&accum1, ax, bx = b[5]);
+        smlal(&accum3, ax = a[10], bx);
+        smlal(&accum1, ax, bx = b[4]);
+        smlal(&accum3, ax = a[11], bx);
+        smlal(&accum1, ax, bx = b[3]);
+        smlal(&accum3, ax = a[12], bx);
+        smlal(&accum1, ax, bx = b[2]);
+        smlal(&accum3, ax = a[13], bx);
+        smlal(&accum1, ax, bx = b[1]);
+        smlal(&accum3, ax = a[14], bx);
+        smlal(&accum1, ax, bx = b[0]);
+        smlal(&accum3, ax = a[15], bx);
+        
+        
+        smlal(&accum1, ax = a[7], bx = bm[7]);
+        
+        /* 1 terms */
+        
+        smlal(&accum2, ax = a[0], bx);
+        smlal(&accum0, ax, bx = bm[6]);
+        smlal(&accum2, ax = a[1], bx);
+        smlal(&accum0, ax, bx = bm[5]);
+        smlal(&accum2, ax = a[2], bx);
+        smlal(&accum0, ax, bx = bm[4]);
+        smlal(&accum2, ax = a[3], bx);
+        smlal(&accum0, ax, bx = bm[3]);
+        smlal(&accum2, ax = a[4], bx);
+        smlal(&accum0, ax, bx = bm[2]);
+        smlal(&accum2, ax = a[5], bx);
+        smlal(&accum0, ax, bx = bm[1]);
+        smlal(&accum2, ax = a[6], bx);
+        smlal(&accum0, ax, bx = bm[0]);
+        smlal(&accum2, ax = a[7], bx);
+        
+        accum0 += accumC0;
+        accum1 += accumC1;
+        accum2 += accum0 >> 28;
+        accum3 += accum1 >> 28;
+        
+        c[6] = ((uint32_t)(accum0)) & mask;
+        c[7] = ((uint32_t)(accum2)) & mask;
+        c[14] = ((uint32_t)(accum1)) & mask;
+        c[15] = ((uint32_t)(accum3)) & mask;
+        
+        accum0 = accum2 >> 28;
+        accum1 = accum3 >> 28;
+    }
+
+    accum0 += accum1;
+    accum0 += c[8];
+    accum1 += c[0];
+    c[8] = ((uint32_t)(accum0)) & mask;
+    c[0] = ((uint32_t)(accum1)) & mask;
+    
+    accum0 >>= 28;
+    accum1 >>= 28;
+    c[9] += ((uint32_t)(accum0));
+    c[1] += ((uint32_t)(accum1));
+}
+
+void gf_sqr (gf_s *__restrict__ cs, const gf as) {
+    const uint32_t *a = as->limb;
+    uint32_t *c = cs->limb;
+
+    uint64_t accum0 = 0, accum1 = 0, accum2, accum3, accumC0, accumC1, tmp;
+    uint32_t mask = (1<<28) - 1;  
+
+    uint32_t bm[8];
+    
+    int i;
+    for (i=0; i<8; i++) {
+        bm[i] = a[i] - a[i+8];
+    }
+
+    uint32_t ax,bx;
+    {
+        /* t^3 terms */
+        smull2(&accum1, ax = a[9], bx = a[15]);
+        smull2(&accum3, ax = a[10], bx);
+        smlal2(&accum1, ax, bx = a[14]);
+        smlal2(&accum3, ax = a[11], bx);
+        smlal2(&accum1, ax, bx = a[13]);
+        smlal2(&accum3, ax = a[12], bx);
+        smlal(&accum1, ax, ax);
+        
+        accum0 = accum1;
+        accum2 = accum3;
+        
+        /* t^2 terms */
+        smlal2(&accum2, ax = a[8], a[9]);
+        smlal(&accum0, ax, ax);
+        
+        smlal2(&accum0, ax = a[1], bx = a[7]);
+        smlal2(&accum2, ax = a[2], bx);
+        smlal2(&accum0, ax, bx = a[6]);
+        smlal2(&accum2, ax = a[3], bx);
+        smlal2(&accum0, ax, bx = a[5]);
+        smlal2(&accum2, ax = a[4], bx);
+        smlal(&accum0, ax, ax);
+        
+        /* t terms */
+        accum1 += accum0;
+        accum3 += accum2;
+        smlal2(&accum3, ax = a[0], bx = a[1]);
+        smlal(&accum1, ax, ax);
+        
+        accum1 = -accum1;
+        accum3 = -accum3;
+        accum2 = -accum2;
+        accum0 = -accum0;
+        
+        smlal2(&accum1, ax = bm[1], bx = bm[7]);
+        smlal2(&accum3, ax = bm[2], bx);
+        smlal2(&accum1, ax, bx = bm[6]);
+        smlal2(&accum3, ax = bm[3], bx);
+        smlal2(&accum1, ax, bx = bm[5]);
+        smlal2(&accum3, ax = bm[4], bx);
+        smlal(&accum1, ax, ax);
+        
+        /* 1 terms */
+        smlal2(&accum2, ax = bm[0], bx = bm[1]);
+        smlal(&accum0, ax, ax);
+        
+        tmp = -accum3; accum3 = tmp-accum2; accum2 = tmp;
+        tmp = -accum1; accum1 = tmp-accum0; accum0 = tmp;
+        
+        accum2 += accum0 >> 28;
+        accum3 += accum1 >> 28;
+        
+        c[0] = ((uint32_t)(accum0)) & mask;
+        c[1] = ((uint32_t)(accum2)) & mask;
+        c[8] = ((uint32_t)(accum1)) & mask;
+        c[9] = ((uint32_t)(accum3)) & mask;
+        
+        accumC0 = accum2 >> 28;
+        accumC1 = accum3 >> 28;
+    }
+    {
+        /* t^3 terms */
+        smull2(&accum1, ax = a[11], bx = a[15]);
+        smull2(&accum3, ax = a[12], bx);
+        smlal2(&accum1, ax, bx = a[14]);
+        smlal2(&accum3, ax = a[13], bx);
+        smlal(&accum1, ax, ax);
+        
+        accum0 = accum1;
+        accum2 = accum3;
+        
+        /* t^2 terms */
+        smlal2(&accum2, ax = a[8], bx = a[11]);
+        smlal2(&accum0, ax, bx = a[10]);
+        smlal2(&accum2, ax = a[9], bx);
+        smlal(&accum0, ax, ax);
+        
+        smlal2(&accum0, ax = a[3], bx = a[7]);
+        smlal2(&accum2, ax = a[4], bx);
+        smlal2(&accum0, ax, bx = a[6]);
+        smlal2(&accum2, ax = a[5], bx);
+        smlal(&accum0, ax, ax);
+        
+        /* t terms */
+        accum1 += accum0;
+        accum3 += accum2;
+        smlal2(&accum3, ax = a[0], bx = a[3]);
+        smlal2(&accum1, ax, bx = a[2]);
+        smlal2(&accum3, ax = a[1], bx);
+        smlal(&accum1, ax, ax);
+        
+        accum1 = -accum1;
+        accum3 = -accum3;
+        accum2 = -accum2;
+        accum0 = -accum0;
+        
+        smlal2(&accum1, ax = bm[3], bx = bm[7]);
+        smlal2(&accum3, ax = bm[4], bx);
+        smlal2(&accum1, ax, bx = bm[6]);
+        smlal2(&accum3, ax = bm[5], bx);
+        smlal(&accum1, ax, ax);
+        
+        /* 1 terms */
+        smlal2(&accum2, ax = bm[0], bx = bm[3]);
+        smlal2(&accum0, ax, bx = bm[2]);
+        smlal2(&accum2, ax = bm[1], bx);
+        smlal(&accum0, ax, ax);
+        
+        
+        tmp = -accum3; accum3 = tmp-accum2; accum2 = tmp;
+        tmp = -accum1; accum1 = tmp-accum0; accum0 = tmp;
+        
+        accum0 += accumC0;
+        accum1 += accumC1;
+        accum2 += accum0 >> 28;
+        accum3 += accum1 >> 28;
+        
+        c[2] = ((uint32_t)(accum0)) & mask;
+        c[3] = ((uint32_t)(accum2)) & mask;
+        c[10] = ((uint32_t)(accum1)) & mask;
+        c[11] = ((uint32_t)(accum3)) & mask;
+        
+        accumC0 = accum2 >> 28;
+        accumC1 = accum3 >> 28;
+    }
+    {
+        
+        /* t^3 terms */
+        smull2(&accum1, ax = a[13], bx = a[15]);
+        smull2(&accum3, ax = a[14], bx);
+        smlal(&accum1, ax, ax);
+        
+        accum0 = accum1;
+        accum2 = accum3;
+        
+        /* t^2 terms */
+        
+        smlal2(&accum2, ax = a[8], bx = a[13]);
+        smlal2(&accum0, ax, bx = a[12]);
+        smlal2(&accum2, ax = a[9], bx);
+        smlal2(&accum0, ax, bx = a[11]);
+        smlal2(&accum2, ax = a[10], bx);
+        smlal(&accum0, ax, ax);
+        
+        
+        smlal2(&accum0, ax = a[5], bx = a[7]);
+        smlal2(&accum2, ax = a[6], bx);
+        smlal(&accum0, ax, ax);
+        
+        /* t terms */
+        accum1 += accum0;
+        accum3 += accum2;
+        
+        smlal2(&accum3, ax = a[0], bx = a[5]);
+        smlal2(&accum1, ax, bx = a[4]);
+        smlal2(&accum3, ax = a[1], bx);
+        smlal2(&accum1, ax, bx = a[3]);
+        smlal2(&accum3, ax = a[2], bx);
+        smlal(&accum1, ax, ax);
+        
+        accum1 = -accum1;
+        accum3 = -accum3;
+        accum2 = -accum2;
+        accum0 = -accum0;
+        
+        smlal2(&accum1, ax = bm[5], bx = bm[7]);
+        smlal2(&accum3, ax = bm[6], bx);
+        smlal(&accum1, ax, ax);
+        
+        /* 1 terms */
+        
+        smlal2(&accum2, ax = bm[0], bx = bm[5]);
+        smlal2(&accum0, ax, bx = bm[4]);
+        smlal2(&accum2, ax = bm[1], bx);
+        smlal2(&accum0, ax, bx = bm[3]);
+        smlal2(&accum2, ax = bm[2], bx);
+        smlal(&accum0, ax, ax);
+        
+        
+        tmp = -accum3; accum3 = tmp-accum2; accum2 = tmp;
+        tmp = -accum1; accum1 = tmp-accum0; accum0 = tmp;
+        
+        accum0 += accumC0;
+        accum1 += accumC1;
+        accum2 += accum0 >> 28;
+        accum3 += accum1 >> 28;
+        
+        c[4] = ((uint32_t)(accum0)) & mask;
+        c[5] = ((uint32_t)(accum2)) & mask;
+        c[12] = ((uint32_t)(accum1)) & mask;
+        c[13] = ((uint32_t)(accum3)) & mask;
+        
+        accumC0 = accum2 >> 28;
+        accumC1 = accum3 >> 28;
+    }
+    {
+        
+        /* t^3 terms */
+        smull(&accum1, ax = a[15], bx = a[15]);
+        accum0 = accum1;
+        
+        /* t^2 terms */
+        
+        smull2(&accum2, ax = a[8], bx);
+        smlal2(&accum0, ax, bx = a[14]);
+        smlal2(&accum2, ax = a[9], bx);
+        smlal2(&accum0, ax, bx = a[13]);
+        smlal2(&accum2, ax = a[10], bx);
+        smlal2(&accum0, ax, bx = a[12]);
+        smlal2(&accum2, ax = a[11], bx);
+        smlal(&accum0, ax, ax);
+        
+        
+        smlal(&accum0, ax = a[7], bx = a[7]);
+        
+        /* t terms */
+        accum1 += accum0;
+        accum3 = accum2;
+        
+        smlal2(&accum3, ax = a[0], bx);
+        smlal2(&accum1, ax, bx = a[6]);
+        smlal2(&accum3, ax = a[1], bx);
+        smlal2(&accum1, ax, bx = a[5]);
+        smlal2(&accum3, ax = a[2], bx);
+        smlal2(&accum1, ax, bx = a[4]);
+        smlal2(&accum3, ax = a[3], bx);
+        smlal(&accum1, ax, ax);
+        
+        accum1 = -accum1;
+        accum3 = -accum3;
+        accum2 = -accum2;
+        accum0 = -accum0;
+        
+        bx = bm[7];
+        smlal(&accum1, bx, bx);
+        
+        /* 1 terms */
+        
+        smlal2(&accum2, ax = bm[0], bx);
+        smlal2(&accum0, ax, bx = bm[6]);
+        smlal2(&accum2, ax = bm[1], bx);
+        smlal2(&accum0, ax, bx = bm[5]);
+        smlal2(&accum2, ax = bm[2], bx);
+        smlal2(&accum0, ax, bx = bm[4]);
+        smlal2(&accum2, ax = bm[3], bx);
+        smlal(&accum0, ax, ax);
+        
+        tmp = -accum3; accum3 = tmp-accum2; accum2 = tmp;
+        tmp = -accum1; accum1 = tmp-accum0; accum0 = tmp;
+        
+        
+        accum0 += accumC0;
+        accum1 += accumC1;
+        accum2 += accum0 >> 28;
+        accum3 += accum1 >> 28;
+        
+        c[6] = ((uint32_t)(accum0)) & mask;
+        c[7] = ((uint32_t)(accum2)) & mask;
+        c[14] = ((uint32_t)(accum1)) & mask;
+        c[15] = ((uint32_t)(accum3)) & mask;
+        
+        accum0 = accum2 >> 28;
+        accum1 = accum3 >> 28;
+    }
+
+    accum0 += accum1;
+    accum0 += c[8];
+    accum1 += c[0];
+    c[8] = ((uint32_t)(accum0)) & mask;
+    c[0] = ((uint32_t)(accum1)) & mask;
+    
+    accum0 >>= 28;
+    accum1 >>= 28;
+    c[9] += ((uint32_t)(accum0));
+    c[1] += ((uint32_t)(accum1));
+}
+
+void gf_mulw_unsigned (
+    gf_s *__restrict__ cs,
+    const gf as,
+    uint32_t b
+) {
+    uint32_t mask = (1ull<<28)-1;  
+    assert(b <= mask);
+    
+    const uint32_t *a = as->limb;
+    uint32_t *c = cs->limb;
+
+    uint64_t accum0, accum8;
+
+    int i;
+
+    uint32_t c0, c8, n0, n8;
+    c0 = a[0]; c8 = a[8];
+    accum0 = widemul(b, c0);
+    accum8 = widemul(b, c8);
+
+    c[0] = accum0 & mask; accum0 >>= 28;
+    c[8] = accum8 & mask; accum8 >>= 28;
+    
+    i=1;
+    {
+        n0 = a[i]; n8 = a[i+8];
+        smlal(&accum0, b, n0);
+        smlal(&accum8, b, n8);
+        
+        c[i] = accum0 & mask; accum0 >>= 28;
+        c[i+8] = accum8 & mask; accum8 >>= 28;
+        i++;
+    }
+    {
+        c0 = a[i]; c8 = a[i+8];
+        smlal(&accum0, b, c0);
+        smlal(&accum8, b, c8);
+
+        c[i] = accum0 & mask; accum0 >>= 28;
+        c[i+8] = accum8 & mask; accum8 >>= 28;
+        i++;
+    }
+    {
+        n0 = a[i]; n8 = a[i+8];
+        smlal(&accum0, b, n0);
+        smlal(&accum8, b, n8);
+
+        c[i] = accum0 & mask; accum0 >>= 28;
+        c[i+8] = accum8 & mask; accum8 >>= 28;
+        i++;
+    }
+    {
+        c0 = a[i]; c8 = a[i+8];
+        smlal(&accum0, b, c0);
+        smlal(&accum8, b, c8);
+
+        c[i] = accum0 & mask; accum0 >>= 28;
+        c[i+8] = accum8 & mask; accum8 >>= 28;
+        i++;
+    }
+    {
+        n0 = a[i]; n8 = a[i+8];
+        smlal(&accum0, b, n0);
+        smlal(&accum8, b, n8);
+
+        c[i] = accum0 & mask; accum0 >>= 28;
+        c[i+8] = accum8 & mask; accum8 >>= 28;
+        i++;
+    }
+    {
+        c0 = a[i]; c8 = a[i+8];
+        smlal(&accum0, b, c0);
+        smlal(&accum8, b, c8);
+        
+        c[i] = accum0 & mask; accum0 >>= 28;
+        c[i+8] = accum8 & mask; accum8 >>= 28;
+        i++;
+    }
+    {
+        n0 = a[i]; n8 = a[i+8];
+        smlal(&accum0, b, n0);
+        smlal(&accum8, b, n8);
+
+        c[i] = accum0 & mask; accum0 >>= 28;
+        c[i+8] = accum8 & mask; accum8 >>= 28;
+        i++;
+    }
+
+    accum0 += accum8 + c[8];
+    c[8] = accum0 & mask;
+    c[9] += accum0 >> 28;
+
+    accum8 += c[0];
+    c[0] = accum8 & mask;
+    c[1] += accum8 >> 28;
+}
diff --git a/crypto/ec/curve448/arch_arm_32/f_impl.h b/crypto/ec/curve448/arch_arm_32/f_impl.h
new file mode 100644
index 0000000000..09d77aafdd
--- /dev/null
+++ b/crypto/ec/curve448/arch_arm_32/f_impl.h
@@ -0,0 +1,53 @@
+/* Copyright (c) 2014-2016 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#define GF_HEADROOM 2
+#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
+#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
+    {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}}
+    
+#define LIMB_PLACE_VALUE(i) 28
+
+void gf_add_RAW (gf out, const gf a, const gf b) {
+    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
+        ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
+    }
+    /*
+    for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
+        out->limb[i] = a->limb[i] + b->limb[i];
+    }
+    */
+}
+
+void gf_sub_RAW (gf out, const gf a, const gf b) {
+    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
+        ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i];
+    }
+    /*
+    for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
+        out->limb[i] = a->limb[i] - b->limb[i];
+    }
+    */
+}
+
+void gf_bias (gf a, int amt) {
+    uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
+    uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1};
+    uint32x4_t *aa = (uint32x4_t*) a;
+    aa[0] += lo;
+    aa[1] += lo;
+    aa[2] += hi;
+    aa[3] += lo;
+}
+
+void gf_weak_reduce (gf a) {
+    uint64_t mask = (1ull<<28) - 1;
+    uint64_t tmp = a->limb[15] >> 28;
+    a->limb[8] += tmp;
+    for (unsigned int i=15; i>0; i--) {
+        a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>28);
+    }
+    a->limb[0] = (a->limb[0] & mask) + tmp;
+}
+
diff --git a/crypto/ec/curve448/arch_neon/arch_intrinsics.h b/crypto/ec/curve448/arch_neon/arch_intrinsics.h
new file mode 100644
index 0000000000..1a1e14b36c
--- /dev/null
+++ b/crypto/ec/curve448/arch_neon/arch_intrinsics.h
@@ -0,0 +1,24 @@
+/* Copyright (c) 2016 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#ifndef __ARCH_NEON_ARCH_INTRINSICS_H__
+#define __ARCH_NEON_ARCH_INTRINSICS_H__
+
+#define ARCH_WORD_BITS 32
+
+static __inline__ __attribute((always_inline,unused))
+uint32_t word_is_zero(uint32_t a) {
+    uint32_t ret;
+    __asm__("subs %0, %1, #1;\n\tsbc %0, %0, %0" : "=r"(ret) : "r"(a) : "cc");
+    return ret;
+}
+
+static __inline__ __attribute((always_inline,unused))
+uint64_t widemul(uint32_t a, uint32_t b) {
+    /* Could be UMULL, but it's hard to express to CC that the registers must be different */
+    return ((uint64_t)a) * b; 
+}
+
+#endif /* __ARCH_NEON_ARCH_INTRINSICS_H__ */
+
diff --git a/crypto/ec/curve448/arch_neon/f_impl.c b/crypto/ec/curve448/arch_neon/f_impl.c
new file mode 100644
index 0000000000..5e998f9f37
--- /dev/null
+++ b/crypto/ec/curve448/arch_neon/f_impl.c
@@ -0,0 +1,592 @@
+/* Copyright (c) 2014 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#include "f_field.h"
+
+static __inline__ uint64x2_t __attribute__((gnu_inline,always_inline,unused))
+xx_vaddup_u64(uint64x2_t x) {
+    __asm__ ("vadd.s64 %f0, %e0" : "+w"(x));
+    return x;
+}
+
+static __inline__ int64x2_t __attribute__((gnu_inline,always_inline,unused))
+vrev128_s64(int64x2_t x) {
+    __asm__ ("vswp.s64 %e0, %f0" : "+w"(x));
+    return x;
+}
+
+static __inline__ uint64x2_t __attribute__((gnu_inline,always_inline))
+vrev128_u64(uint64x2_t x) {
+    __asm__ ("vswp.s64 %e0, %f0" : "+w"(x));
+    return x;
+}
+
+static inline void __attribute__((gnu_inline,always_inline,unused))
+smlal (
+    uint64_t *acc,
+    const uint32_t a,
+    const uint32_t b
+) {
+    *acc += (int64_t)(int32_t)a * (int64_t)(int32_t)b;
+}
+
+static inline void __attribute__((gnu_inline,always_inline,unused))
+smlal2 (
+    uint64_t *acc,
+    const uint32_t a,
+    const uint32_t b
+) {
+    *acc += (int64_t)(int32_t)a * (int64_t)(int32_t)b * 2;
+}
+
+static inline void __attribute__((gnu_inline,always_inline,unused))
+smull (
+    uint64_t *acc,
+    const uint32_t a,
+    const uint32_t b
+) {
+    *acc = (int64_t)(int32_t)a * (int64_t)(int32_t)b;
+}
+
+static inline void __attribute__((gnu_inline,always_inline,unused))
+smull2 (
+    uint64_t *acc,
+    const uint32_t a,
+    const uint32_t b
+) {
+    *acc = (int64_t)(int32_t)a * (int64_t)(int32_t)b * 2;
+}
+
+void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
+    #define _bl0 "q0"
+    #define _bl0_0 "d0"
+    #define _bl0_1 "d1"
+    #define _bh0 "q1"
+    #define _bh0_0 "d2"
+    #define _bh0_1 "d3"
+    #define _bs0 "q2"
+    #define _bs0_0 "d4"
+    #define _bs0_1 "d5"
+    #define _bl2 "q3"
+    #define _bl2_0 "d6"
+    #define _bl2_1 "d7"
+    #define _bh2 "q4"
+    #define _bh2_0 "d8"
+    #define _bh2_1 "d9"
+    #define _bs2 "q5"
+    #define _bs2_0 "d10"
+    #define _bs2_1 "d11"
+
+    #define _as0 "q6"
+    #define _as0_0 "d12"
+    #define _as0_1 "d13"
+    #define _as2 "q7"
+    #define _as2_0 "d14"
+    #define _as2_1 "d15"
+    #define _al0 "q8"
+    #define _al0_0 "d16"
+    #define _al0_1 "d17"
+    #define _ah0 "q9"
+    #define _ah0_0 "d18"
+    #define _ah0_1 "d19"
+    #define _al2 "q10"
+    #define _al2_0 "d20"
+    #define _al2_1 "d21"
+    #define _ah2 "q11"
+    #define _ah2_0 "d22"
+    #define _ah2_1 "d23"
+
+    #define _a0a "q12"
+    #define _a0a_0 "d24"
+    #define _a0a_1 "d25"
+    #define _a0b "q13"
+    #define _a0b_0 "d26"
+    #define _a0b_1 "d27"
+    #define _a1a "q14"
+    #define _a1a_0 "d28"
+    #define _a1a_1 "d29"
+    #define _a1b "q15"
+    #define _a1b_0 "d30"
+    #define _a1b_1 "d31"
+    #define VMAC(op,result,a,b,n) #op" "result", "a", "b"[" #n "]\n\t"
+    #define VOP3(op,result,a,b)   #op" "result", "a", "b"\n\t"
+    #define VOP2(op,result,a)     #op" "result", "a"\n\t"
+
+    int32x2_t *vc = (int32x2_t*) cs->limb;
+
+    __asm__ __volatile__(
+        
+        "vld2.32 {"_al0_0","_al0_1","_ah0_0","_ah0_1"}, [%[a],:128]!" "\n\t"
+        VOP3(vadd.i32,_as0,_al0,_ah0)
+        
+        "vld2.32 {"_bl0_0","_bl0_1","_bh0_0","_bh0_1"}, [%[b],:128]!" "\n\t"
+        VOP3(vadd.i32,_bs0_1,_bl0_1,_bh0_1)
+        VOP3(vsub.i32,_bs0_0,_bl0_0,_bh0_0)
+            
+        "vld2.32 {"_bl2_0","_bl2_1","_bh2_0","_bh2_1"}, [%[b],:128]!" "\n\t"
+        VOP3(vadd.i32,_bs2,_bl2,_bh2)
+            
+        "vld2.32 {"_al2_0","_al2_1","_ah2_0","_ah2_1"}, [%[a],:128]!" "\n\t"
+        VOP3(vadd.i32,_as2,_al2,_ah2)
+        
+        VMAC(vmull.s32,_a0b,_as0_1,_bs2_1,0)
+        VMAC(vmlal.s32,_a0b,_as2_0,_bs2_0,0)
+        VMAC(vmlal.s32,_a0b,_as2_1,_bs0_1,0)
+        VMAC(vmlal.s32,_a0b,_as0_0,_bh0_0,0)
+            
+        VMAC(vmull.s32,_a1b,_as0_1,_bs2_1,1)
+        VMAC(vmlal.s32,_a1b,_as2_0,_bs2_0,1)
+        VMAC(vmlal.s32,_a1b,_as2_1,_bs0_1,1)
+        VMAC(vmlal.s32,_a1b,_as0_0,_bh0_0,1)
+            
+        VOP2(vmov,_a0a,_a0b)
+        VMAC(vmlal.s32,_a0a,_ah0_1,_bh2_1,0)
+        VMAC(vmlal.s32,_a0a,_ah2_0,_bh2_0,0)
+        VMAC(vmlal.s32,_a0a,_ah2_1,_bh0_1,0)
+        VMAC(vmlal.s32,_a0a,_ah0_0,_bl0_0,0)
+            
+        VMAC(vmlsl.s32,_a0b,_al0_1,_bl2_1,0)
+        VMAC(vmlsl.s32,_a0b,_al2_0,_bl2_0,0)
+        VMAC(vmlsl.s32,_a0b,_al2_1,_bl0_1,0)
+        VMAC(vmlal.s32,_a0b,_al0_0,_bs0_0,0)
+            
+        VOP2(vmov,_a1a,_a1b)
+        VMAC(vmlal.s32,_a1a,_ah0_1,_bh2_1,1)
+        VMAC(vmlal.s32,_a1a,_ah2_0,_bh2_0,1)
+        VMAC(vmlal.s32,_a1a,_ah2_1,_bh0_1,1)
+        VMAC(vmlal.s32,_a1a,_ah0_0,_bl0_0,1)
+            
+            VOP2(vswp,_a0b_1,_a0a_0)
+            
+        VMAC(vmlsl.s32,_a1b,_al0_1,_bl2_1,1)
+        VMAC(vmlsl.s32,_a1b,_al2_0,_bl2_0,1)
+        VMAC(vmlsl.s32,_a1b,_al2_1,_bl0_1,1)
+        VMAC(vmlal.s32,_a1b,_al0_0,_bs0_0,1)
+                
+            VOP3(vsra.u64,_a0a,_a0b,"#28")
+            VOP3(vsub.i32,_bs0_1,_bl0_1,_bh0_1)
+            VOP2(vmovn.i64,_a0b_0,_a0b)
+                
+            VOP2(vswp,_a1b_1,_a1a_0)
+            VOP3(vadd.i64,_a1b,_a0a,_a1b)
+                    
+                    
+        VMAC(vmull.s32,_a0a,_as2_0,_bs2_1,0)
+            VOP2(vmovn.i64,_a0b_1,_a1b)
+        VMAC(vmlal.s32,_a0a,_as2_1,_bs2_0,0)
+            VOP3(vsra.u64,_a1a,_a1b,"#28")
+        VMAC(vmlal.s32,_a0a,_as0_0,_bh0_1,0)
+            VOP2(vbic.i32,_a0b,"#0xf0000000")
+        VMAC(vmlal.s32,_a0a,_as0_1,_bh0_0,0)
+            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"
+                    
+        VMAC(vmull.s32,_a1b,_as2_0,_bs2_1,1)
+        VMAC(vmlal.s32,_a1b,_as2_1,_bs2_0,1)
+        VMAC(vmlal.s32,_a1b,_as0_0,_bh0_1,1)
+        VMAC(vmlal.s32,_a1b,_as0_1,_bh0_0,1)
+
+        VOP2(vmov,_a0b_1,_a0a_1)
+        VOP3(vadd.i64,_a0b_0,_a0a_0,_a1a_0)
+        VOP3(vadd.i64,_a0a_0,_a0a_0,_a1a_1)
+        VMAC(vmlal.s32,_a0a,_ah2_0,_bh2_1,0)
+        VMAC(vmlal.s32,_a0a,_ah2_1,_bh2_0,0)
+        VMAC(vmlal.s32,_a0a,_ah0_0,_bl0_1,0)
+        VMAC(vmlal.s32,_a0a,_ah0_1,_bl0_0,0)
+
+        VMAC(vmlsl.s32,_a0b,_al2_0,_bl2_1,0)
+        VMAC(vmlsl.s32,_a0b,_al2_1,_bl2_0,0)
+        VMAC(vmlal.s32,_a0b,_al0_0,_bs0_1,0)
+        VMAC(vmlal.s32,_a0b,_al0_1,_bs0_0,0)
+
+        VOP2(vmov,_a1a,_a1b)
+        VMAC(vmlal.s32,_a1a,_ah2_0,_bh2_1,1)
+        VMAC(vmlal.s32,_a1a,_ah2_1,_bh2_0,1)
+        VMAC(vmlal.s32,_a1a,_ah0_0,_bl0_1,1)
+        VMAC(vmlal.s32,_a1a,_ah0_1,_bl0_0,1)
+
+            VOP2(vswp,_a0b_1,_a0a_0)
+
+        VMAC(vmlsl.s32,_a1b,_al2_0,_bl2_1,1)
+        VMAC(vmlsl.s32,_a1b,_al2_1,_bl2_0,1)
+        VMAC(vmlal.s32,_a1b,_al0_0,_bs0_1,1)
+        VMAC(vmlal.s32,_a1b,_al0_1,_bs0_0,1)
+                                        
+            VOP3(vsra.u64,_a0a,_a0b,"#28")
+            VOP3(vsub.i32,_bs2_0,_bl2_0,_bh2_0)
+            VOP2(vmovn.i64,_a0b_0,_a0b)
+                        
+            VOP2(vswp,_a1b_1,_a1a_0)
+            VOP3(vadd.i64,_a1b,_a0a,_a1b)
+
+        VMAC(vmull.s32,_a0a,_as2_1,_bs2_1,0)
+            VOP2(vmovn.i64,_a0b_1,_a1b)
+        VMAC(vmlal.s32,_a0a,_as0_0,_bh2_0,0)
+            VOP3(vsra.u64,_a1a,_a1b,"#28")
+        VMAC(vmlal.s32,_a0a,_as0_1,_bh0_1,0)
+            VOP2(vbic.i32,_a0b,"#0xf0000000")
+        VMAC(vmlal.s32,_a0a,_as2_0,_bh0_0,0)
+            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"
+
+        VMAC(vmull.s32,_a1b,_as2_1,_bs2_1,1)
+        VMAC(vmlal.s32,_a1b,_as0_0,_bh2_0,1)
+        VMAC(vmlal.s32,_a1b,_as0_1,_bh0_1,1)
+        VMAC(vmlal.s32,_a1b,_as2_0,_bh0_0,1)
+
+        VOP2(vmov,_a0b_1,_a0a_1)
+        VOP3(vadd.i64,_a0b_0,_a0a_0,_a1a_0)
+        VOP3(vadd.i64,_a0a_0,_a0a_0,_a1a_1)
+        VMAC(vmlal.s32,_a0a,_ah2_1,_bh2_1,0)
+        VMAC(vmlal.s32,_a0a,_ah0_0,_bl2_0,0)
+        VMAC(vmlal.s32,_a0a,_ah0_1,_bl0_1,0)
+        VMAC(vmlal.s32,_a0a,_ah2_0,_bl0_0,0)
+
+        VMAC(vmlsl.s32,_a0b,_al2_1,_bl2_1,0)
+        VMAC(vmlal.s32,_a0b,_al0_0,_bs2_0,0)
+        VMAC(vmlal.s32,_a0b,_al0_1,_bs0_1,0)
+        VMAC(vmlal.s32,_a0b,_al2_0,_bs0_0,0)
+
+        VOP2(vmov,_a1a,_a1b)
+        VMAC(vmlal.s32,_a1a,_ah2_1,_bh2_1,1)
+        VMAC(vmlal.s32,_a1a,_ah0_0,_bl2_0,1)
+        VMAC(vmlal.s32,_a1a,_ah0_1,_bl0_1,1)
+        VMAC(vmlal.s32,_a1a,_ah2_0,_bl0_0,1)
+
+            VOP2(vswp,_a0b_1,_a0a_0)
+
+        VMAC(vmlsl.s32,_a1b,_al2_1,_bl2_1,1)
+        VMAC(vmlal.s32,_a1b,_al0_0,_bs2_0,1)
+        VMAC(vmlal.s32,_a1b,_al0_1,_bs0_1,1)
+        VMAC(vmlal.s32,_a1b,_al2_0,_bs0_0,1)
+                                                                
+            VOP3(vsub.i32,_bs2_1,_bl2_1,_bh2_1)
+            VOP3(vsra.u64,_a0a,_a0b,"#28")
+            VOP2(vmovn.i64,_a0b_0,_a0b)
+                        
+            VOP2(vswp,_a1b_1,_a1a_0)
+            VOP3(vadd.i64,_a1b,_a0a,_a1b)
+
+        VMAC(vmull.s32,_a0a,_as0_0,_bh2_1,0)
+            VOP2(vmovn.i64,_a0b_1,_a1b)
+        VMAC(vmlal.s32,_a0a,_as0_1,_bh2_0,0)
+            VOP3(vsra.u64,_a1a,_a1b,"#28")
+        VMAC(vmlal.s32,_a0a,_as2_0,_bh0_1,0)
+            VOP2(vbic.i32,_a0b,"#0xf0000000")
+        VMAC(vmlal.s32,_a0a,_as2_1,_bh0_0,0)
+            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"
+
+        VMAC(vmull.s32,_a1b,_as0_0,_bh2_1,1)
+        VMAC(vmlal.s32,_a1b,_as0_1,_bh2_0,1)
+        VMAC(vmlal.s32,_a1b,_as2_0,_bh0_1,1)
+        VMAC(vmlal.s32,_a1b,_as2_1,_bh0_0,1)
+
+        VOP2(vmov,_a0b_1,_a0a_1)
+        VOP3(vadd.i64,_a0b_0,_a0a_0,_a1a_0)
+        VOP3(vadd.i64,_a0a_0,_a0a_0,_a1a_1)
+        VMAC(vmlal.s32,_a0a,_ah0_0,_bl2_1,0)
+        VMAC(vmlal.s32,_a0a,_ah0_1,_bl2_0,0)
+        VMAC(vmlal.s32,_a0a,_ah2_0,_bl0_1,0)
+        VMAC(vmlal.s32,_a0a,_ah2_1,_bl0_0,0)
+
+        VMAC(vmlal.s32,_a0b,_al0_0,_bs2_1,0)
+        VMAC(vmlal.s32,_a0b,_al0_1,_bs2_0,0)
+        VMAC(vmlal.s32,_a0b,_al2_0,_bs0_1,0)
+        VMAC(vmlal.s32,_a0b,_al2_1,_bs0_0,0)
+
+        VOP2(vmov,_a1a,_a1b)
+        VMAC(vmlal.s32,_a1a,_ah0_0,_bl2_1,1)
+        VMAC(vmlal.s32,_a1a,_ah0_1,_bl2_0,1)
+        VMAC(vmlal.s32,_a1a,_ah2_0,_bl0_1,1)
+        VMAC(vmlal.s32,_a1a,_ah2_1,_bl0_0,1)
+
+            VOP2(vswp,_a0b_1,_a0a_0)
+
+        VMAC(vmlal.s32,_a1b,_al0_0,_bs2_1,1)
+        VMAC(vmlal.s32,_a1b,_al0_1,_bs2_0,1)
+        VMAC(vmlal.s32,_a1b,_al2_0,_bs0_1,1)
+        VMAC(vmlal.s32,_a1b,_al2_1,_bs0_0,1)
+                        
+            VOP3(vsra.u64,_a0a,_a0b,"#28")
+            VOP2(vmovn.i64,_a0b_0,_a0b)
+                                                                                            
+            VOP2(vswp,_a1b_1,_a1a_0)
+            VOP3(vadd.i64,_a0a,_a0a,_a1b)
+
+            VOP2(vmovn.i64,_a0b_1,_a0a)
+            VOP3(vsra.u64,_a1a,_a0a,"#28")
+                                                                                            
+            VOP2(vbic.i32,_a0b,"#0xf0000000") 
+                                                                                            
+        VOP2(vswp,_a1a_0,_a1a_1)
+                                                                                            
+            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"  
+            "sub %[c], #64" "\n\t"
+                                                                                                
+        VOP3(vadd.i64,_a1a_1,_a1a_1,_a1a_0)
+        
+            "vldmia %[c], {"_a0a_0", "_a0a_1", "_a0b_0"}" "\n\t"
+            VOP2(vaddw.s32,_a1a,_a0a_0)
+            VOP2(vmovn.i64,_a0a_0,_a1a)
+            VOP2(vshr.s64,_a1a,"#28")
+                                                
+            VOP2(vaddw.s32,_a1a,_a0a_1)
+            VOP2(vmovn.i64,_a0a_1,_a1a)
+            VOP2(vshr.s64,_a1a,"#28")
+                                                                                                    
+            VOP2(vbic.i32,_a0a,"#0xf0000000")
+                                                
+            VOP2(vaddw.s32,_a1a,_a0b_0) 
+            VOP2(vmovn.i64,_a0b_0,_a1a)
+            
+            "vstmia %[c], {"_a0a_0", "_a0a_1", "_a0b_0"}" "\n\t"
+        
+        : [a]"+r"(as)
+        , [b]"+r"(bs)
+        , [c]"+r"(vc)
+                            
+        :: "q0","q1","q2","q3",
+            "q4","q5","q6","q7",
+            "q8","q9","q10","q11",
+            "q12","q13","q14","q15",
+            "memory"
+    );
+}
+
+void gf_sqr (gf_s *__restrict__ cs, const gf bs) {
+    int32x2_t *vc = (int32x2_t*) cs->limb;
+
+    __asm__ __volatile__ (
+        "vld2.32 {"_bl0_0","_bl0_1","_bh0_0","_bh0_1"}, [%[b],:128]!" "\n\t"
+        VOP3(vadd.i32,_bs0_1,_bl0_1,_bh0_1) /* 0 .. 2^30 */
+        VOP3(vsub.i32,_bs0_0,_bl0_0,_bh0_0) /* +- 2^29 */
+        VOP3(vadd.i32,_as0,_bl0,_bh0)       /* 0 .. 2^30 */
+            
+        "vld2.32 {"_bl2_0","_bl2_1","_bh2_0","_bh2_1"}, [%[b],:128]!" "\n\t"
+        VOP3(vadd.i32,_bs2,_bl2,_bh2)       /* 0 .. 2^30 */
+        VOP2(vmov,_as2,_bs2)
+        
+        VMAC(vqdmull.s32,_a0b,_as0_1,_bs2_1,0) /* 0 .. 8 * 2^58.  danger for vqdmlal is 32 */
+        VMAC(vmlal.s32,_a0b,_as2_0,_bs2_0,0)   /* 0 .. 12 */
+        VMAC(vmlal.s32,_a0b,_as0_0,_bh0_0,0)   /* 0 .. 14 */
+            
+        VMAC(vqdmull.s32,_a1b,_as0_1,_bs2_1,1) /* 0 .. 8 */
+        VMAC(vmlal.s32,_a1b,_as2_0,_bs2_0,1)   /* 0 .. 14 */
+        VMAC(vmlal.s32,_a1b,_as0_0,_bh0_0,1)   /* 0 .. 16 */
+            
+        VOP2(vmov,_a0a,_a0b)                   /* 0 .. 14 */
+        VMAC(vqdmlal.s32,_a0a,_bh0_1,_bh2_1,0) /* 0 .. 16 */
+        VMAC(vmlal.s32,_a0a,_bh2_0,_bh2_0,0)   /* 0 .. 17 */
+        VMAC(vmlal.s32,_a0a,_bh0_0,_bl0_0,0)   /* 0 .. 18 */
+            
+        VMAC(vqdmlsl.s32,_a0b,_bl0_1,_bl2_1,0) /*-2 .. 14 */
+        VMAC(vmlsl.s32,_a0b,_bl2_0,_bl2_0,0)   /*-3 .. 14 */
+        VMAC(vmlal.s32,_a0b,_bl0_0,_bs0_0,0)   /*-4 .. 15 */
+            
+        VOP2(vmov,_a1a,_a1b)
+        VMAC(vqdmlal.s32,_a1a,_bh0_1,_bh2_1,1) /* 0 .. 18 */
+        VMAC(vmlal.s32,_a1a,_bh2_0,_bh2_0,1)   /* 0 .. 19 */
+        VMAC(vmlal.s32,_a1a,_bh0_0,_bl0_0,1)   /* 0 .. 20 */
+            
+            VOP2(vswp,_a0b_1,_a0a_0)
+            
+        VMAC(vqdmlsl.s32,_a1b,_bl0_1,_bl2_1,1) /*-2 .. 16 */
+        VMAC(vmlsl.s32,_a1b,_bl2_0,_bl2_0,1)   /*-3 .. 16 */
+        VMAC(vmlal.s32,_a1b,_bl0_0,_bs0_0,1)   /*-4 .. 17 */
+                
+            VOP3(vsra.u64,_a0a,_a0b,"#28")
+            VOP3(vsub.i32,_bs0_1,_bl0_1,_bh0_1)
+            VOP2(vmovn.i64,_a0b_0,_a0b)
+                
+            VOP2(vswp,_a1b_1,_a1a_0)
+            VOP3(vadd.i64,_a1b,_a0a,_a1b)
+                    
+                    
+        VMAC(vqdmull.s32,_a0a,_as2_0,_bs2_1,0) /* 0 .. 8 */
+            VOP2(vmovn.i64,_a0b_1,_a1b)
+            VOP3(vsra.u64,_a1a,_a1b,"#28")
+        VMAC(vqdmlal.s32,_a0a,_as0_0,_bh0_1,0) /* 0 .. 12 */
+            VOP2(vbic.i32,_a0b,"#0xf0000000")
+            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"
+                    
+        VMAC(vqdmull.s32,_a1b,_as2_0,_bs2_1,1) /* 0 .. 8 */
+        VMAC(vqdmlal.s32,_a1b,_as0_0,_bh0_1,1) /* 0 .. 12 */
+
+        VOP2(vmov,_a0b,_a0a)               /* 0 .. 12 */
+        VMAC(vqdmlal.s32,_a0a,_bh2_0,_bh2_1,0) /* 0 .. 14 */
+        VMAC(vqdmlal.s32,_a0a,_bh0_0,_bl0_1,0) /* 0 .. 16 */
+
+        VMAC(vqdmlsl.s32,_a0b,_bl2_0,_bl2_1,0) /*-2 .. 12 */
+        VMAC(vqdmlal.s32,_a0b,_bl0_0,_bs0_1,0) /*-4 .. 14 */
+        VOP3(vadd.i64,_a0a_0,_a0a_0,_a1a_1)
+        VOP3(vadd.i64,_a0b_0,_a0b_0,_a1a_0)
+
+        VOP2(vmov,_a1a,_a1b)                   /* 0 .. 12 */
+        VMAC(vqdmlal.s32,_a1a,_bh2_0,_bh2_1,1) /* 0 .. 14 */
+        VMAC(vqdmlal.s32,_a1a,_bh0_0,_bl0_1,1) /* 0 .. 16 */
+
+            VOP2(vswp,_a0b_1,_a0a_0)
+
+        VMAC(vqdmlsl.s32,_a1b,_bl2_0,_bl2_1,1) /*-2 .. 12 */
+        VMAC(vqdmlal.s32,_a1b,_bl0_0,_bs0_1,1) /*-4 .. 14 */
+                                        
+            VOP3(vsra.u64,_a0a,_a0b,"#28")
+            VOP3(vsub.i32,_bs2_0,_bl2_0,_bh2_0)
+            VOP2(vmovn.i64,_a0b_0,_a0b)
+                        
+            VOP2(vswp,_a1b_1,_a1a_0)
+            VOP3(vadd.i64,_a1b,_a0a,_a1b)
+
+        VMAC(vmull.s32,_a0a,_as2_1,_bs2_1,0)
+            VOP2(vmovn.i64,_a0b_1,_a1b)
+        VMAC(vqdmlal.s32,_a0a,_as0_0,_bh2_0,0)
+            VOP3(vsra.u64,_a1a,_a1b,"#28")
+        VMAC(vmlal.s32,_a0a,_as0_1,_bh0_1,0)
+            VOP2(vbic.i32,_a0b,"#0xf0000000")
+            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"
+
+        VMAC(vmull.s32,_a1b,_as2_1,_bs2_1,1)
+        VMAC(vqdmlal.s32,_a1b,_as0_0,_bh2_0,1)
+        VMAC(vmlal.s32,_a1b,_as0_1,_bh0_1,1)
+
+        VOP2(vmov,_a0b_1,_a0a_1)
+        VOP3(vadd.i64,_a0b_0,_a0a_0,_a1a_0)
+        VOP3(vadd.i64,_a0a_0,_a0a_0,_a1a_1)
+        VMAC(vmlal.s32,_a0a,_bh2_1,_bh2_1,0)
+        VMAC(vqdmlal.s32,_a0a,_bh0_0,_bl2_0,0)
+        VMAC(vmlal.s32,_a0a,_bh0_1,_bl0_1,0)
+
+        VMAC(vmlsl.s32,_a0b,_bl2_1,_bl2_1,0)
+        VMAC(vqdmlal.s32,_a0b,_bl0_0,_bs2_0,0)
+        VMAC(vmlal.s32,_a0b,_bl0_1,_bs0_1,0)
+
+        VOP2(vmov,_a1a,_a1b)
+        VMAC(vmlal.s32,_a1a,_bh2_1,_bh2_1,1)
+        VMAC(vqdmlal.s32,_a1a,_bh0_0,_bl2_0,1)
+        VMAC(vmlal.s32,_a1a,_bh0_1,_bl0_1,1)
+
+            VOP2(vswp,_a0b_1,_a0a_0)
+
+        VMAC(vmlsl.s32,_a1b,_bl2_1,_bl2_1,1)
+        VMAC(vqdmlal.s32,_a1b,_bl0_0,_bs2_0,1)
+        VMAC(vmlal.s32,_a1b,_bl0_1,_bs0_1,1)
+                                                                
+            VOP3(vsub.i32,_bs2_1,_bl2_1,_bh2_1)
+            VOP3(vsra.u64,_a0a,_a0b,"#28")
+            VOP2(vmovn.i64,_a0b_0,_a0b)
+                        
+            VOP2(vswp,_a1b_1,_a1a_0)
+            VOP3(vadd.i64,_a1b,_a0a,_a1b)
+
+        VMAC(vqdmull.s32,_a0a,_as0_0,_bh2_1,0)
+            VOP2(vmovn.i64,_a0b_1,_a1b)
+            VOP3(vsra.u64,_a1a,_a1b,"#28")
+        VMAC(vqdmlal.s32,_a0a,_as2_0,_bh0_1,0)
+            VOP2(vbic.i32,_a0b,"#0xf0000000")
+            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"
+
+        VMAC(vqdmull.s32,_a1b,_as0_0,_bh2_1,1)
+        VMAC(vqdmlal.s32,_a1b,_as2_0,_bh0_1,1)
+
+        VOP2(vmov,_a0b_1,_a0a_1)
+        VOP3(vadd.i64,_a0b_0,_a0a_0,_a1a_0)
+        VOP3(vadd.i64,_a0a_0,_a0a_0,_a1a_1)
+        VMAC(vqdmlal.s32,_a0a,_bh0_0,_bl2_1,0)
+        VMAC(vqdmlal.s32,_a0a,_bh2_0,_bl0_1,0)
+
+        VMAC(vqdmlal.s32,_a0b,_bl0_0,_bs2_1,0)
+        VMAC(vqdmlal.s32,_a0b,_bl2_0,_bs0_1,0)
+
+        VOP2(vmov,_a1a,_a1b)
+        VMAC(vqdmlal.s32,_a1a,_bh0_0,_bl2_1,1)
+        VMAC(vqdmlal.s32,_a1a,_bh2_0,_bl0_1,1)
+
+            VOP2(vswp,_a0b_1,_a0a_0)
+
+        VMAC(vqdmlal.s32,_a1b,_bl0_0,_bs2_1,1)
+        VMAC(vqdmlal.s32,_a1b,_bl2_0,_bs0_1,1)
+                        
+            VOP3(vsra.u64,_a0a,_a0b,"#28")
+            VOP2(vmovn.i64,_a0b_0,_a0b)
+                                                                                            
+            VOP2(vswp,_a1b_1,_a1a_0)
+            VOP3(vadd.i64,_a0a,_a0a,_a1b)
+
+            VOP2(vmovn.i64,_a0b_1,_a0a)
+            VOP3(vsra.u64,_a1a,_a0a,"#28")
+                                                                                            
+            VOP2(vbic.i32,_a0b,"#0xf0000000") 
+                                                                                            
+        VOP2(vswp,_a1a_0,_a1a_1)
+                                                                                            
+            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"  
+            "sub %[c], #64" "\n\t"
+                                                                                                
+        VOP3(vadd.i64,_a1a_1,_a1a_1,_a1a_0)
+        
+            "vldmia %[c], {"_a0a_0", "_a0a_1", "_a0b_0"}" "\n\t"
+            VOP2(vaddw.s32,_a1a,_a0a_0)
+            VOP2(vmovn.i64,_a0a_0,_a1a)
+            VOP2(vshr.s64,_a1a,"#28")
+                                                
+            VOP2(vaddw.s32,_a1a,_a0a_1)
+            VOP2(vmovn.i64,_a0a_1,_a1a)
+            VOP2(vshr.s64,_a1a,"#28")
+                                                                                                    
+            VOP2(vbic.i32,_a0a,"#0xf0000000")
+                                                
+            VOP2(vaddw.s32,_a1a,_a0b_0) 
+            VOP2(vmovn.i64,_a0b_0,_a1a)
+            
+            "vstmia %[c], {"_a0a_0", "_a0a_1", "_a0b_0"}" "\n\t"
+        
+        : [b]"+r"(bs)
+        , [c]"+r"(vc)
+                            
+        :: "q0","q1","q2","q3",
+            "q4","q5","q6","q7",
+            "q12","q13","q14","q15",
+            "memory"
+    );
+}
+
+void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) { 
+    uint32x2_t vmask = {(1<<28) - 1, (1<<28)-1};
+    assert(b<(1<<28));
+    
+    uint64x2_t accum;
+    const uint32x2_t *va = (const uint32x2_t *) as->limb;
+    uint32x2_t *vo = (uint32x2_t *) cs->limb;
+    uint32x2_t vc, vn;
+    uint32x2_t vb = {b, 0};
+    
+    vc = va[0];
+    accum = vmull_lane_u32(vc, vb, 0);
+    vo[0] = vmovn_u64(accum) & vmask;
+    accum = vshrq_n_u64(accum,28);
+    
+    /* PERF: the right way to do this is to reduce behind, i.e.
+     * vmull + vmlal round 0
+     * vmull + vmlal round 1
+     * vmull + vmlal round 2
+     * vsraq round 0, 1
+     * vmull + vmlal round 3
+     * vsraq round 1, 2
+     * ...
+     */
+    
+    int i;
+    for (i=1; i<8; i++) {
+        vn = va[i];
+        accum = vmlal_lane_u32(accum, vn, vb, 0);
+        vo[i] = vmovn_u64(accum) & vmask;
+        accum = vshrq_n_u64(accum,28);
+        vc = vn;
+    }
+        
+    accum = xx_vaddup_u64(vrev128_u64(accum));
+    accum = vaddw_u32(accum, vo[0]);
+    vo[0] = vmovn_u64(accum) & vmask;
+    
+    accum = vshrq_n_u64(accum,28);
+    vo[1] += vmovn_u64(accum);
+}
diff --git a/crypto/ec/curve448/arch_neon/f_impl.h b/crypto/ec/curve448/arch_neon/f_impl.h
new file mode 100644
index 0000000000..ba48d8cee2
--- /dev/null
+++ b/crypto/ec/curve448/arch_neon/f_impl.h
@@ -0,0 +1,56 @@
+/* Copyright (c) 2014-2016 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#define GF_HEADROOM 2
+#define LIMBPERM(x) (((x)<<1 | (x)>>3) & 15)
+#define USE_NEON_PERM 1
+#define LIMBHI(x) ((x##ull)>>28)
+#define LIMBLO(x) ((x##ull)&((1ull<<28)-1))
+#  define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
+    {{LIMBLO(a),LIMBLO(e), LIMBHI(a),LIMBHI(e), \
+      LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \
+      LIMBLO(c),LIMBLO(g), LIMBHI(c),LIMBHI(g), \
+      LIMBLO(d),LIMBLO(h), LIMBHI(d),LIMBHI(h)}}
+    
+#define LIMB_PLACE_VALUE(i) 28
+
+void gf_add_RAW (gf out, const gf a, const gf b) {
+    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
+        ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
+    }
+}
+
+void gf_sub_RAW (gf out, const gf a, const gf b) {
+    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
+        ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i];
+    }
+    /*
+    unsigned int i;
+    for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
+        out->limb[i] = a->limb[i] - b->limb[i];
+    }
+    */
+}
+
+void gf_bias (gf a, int amt) {
+    uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
+    uint32x4_t lo = {co1,co2,co1,co1}, hi = {co1,co1,co1,co1};
+    uint32x4_t *aa = (uint32x4_t*) a;
+    aa[0] += lo;
+    aa[1] += hi;
+    aa[2] += hi;
+    aa[3] += hi;
+}
+
+void gf_weak_reduce (gf a) {
+
+    uint32x2_t *aa = (uint32x2_t*) a, vmask = {(1ull<<28)-1, (1ull<<28)-1}, vm2 = {0,-1},
+       tmp = vshr_n_u32(aa[7],28);
+       
+    for (unsigned int i=7; i>=1; i--) {
+        aa[i] = vsra_n_u32(aa[i] & vmask, aa[i-1], 28);
+    }
+    aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp&vm2);
+}
+
diff --git a/crypto/ec/curve448/arch_ref64/arch_intrinsics.h b/crypto/ec/curve448/arch_ref64/arch_intrinsics.h
new file mode 100644
index 0000000000..4b34ea5520
--- /dev/null
+++ b/crypto/ec/curve448/arch_ref64/arch_intrinsics.h
@@ -0,0 +1,22 @@
+/* Copyright (c) 2016 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#ifndef __ARCH_REF64_ARCH_INTRINSICS_H__
+#define __ARCH_REF64_ARCH_INTRINSICS_H__
+
+#define ARCH_WORD_BITS 64
+
+static __inline__ __attribute((always_inline,unused))
+uint64_t word_is_zero(uint64_t a) {
+    /* let's hope the compiler isn't clever enough to optimize this. */
+    return (((__uint128_t)a)-1)>>64;
+}
+
+static __inline__ __attribute((always_inline,unused))
+__uint128_t widemul(uint64_t a, uint64_t b) {
+    return ((__uint128_t)a) * b; 
+}
+
+#endif /* ARCH_REF64_ARCH_INTRINSICS_H__ */
+
diff --git a/crypto/ec/curve448/arch_ref64/f_impl.c b/crypto/ec/curve448/arch_ref64/f_impl.c
new file mode 100644
index 0000000000..526810012a
--- /dev/null
+++ b/crypto/ec/curve448/arch_ref64/f_impl.c
@@ -0,0 +1,302 @@
+/* Copyright (c) 2014 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#include "f_field.h"
+
+void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
+    const uint64_t *a = as->limb, *b = bs->limb;
+    uint64_t *c = cs->limb;
+
+    __uint128_t accum0 = 0, accum1 = 0, accum2;
+    uint64_t mask = (1ull<<56) - 1;  
+
+    uint64_t aa[4], bb[4], bbb[4];
+
+    unsigned int i;
+    for (i=0; i<4; i++) {
+        aa[i]  = a[i] + a[i+4];
+        bb[i]  = b[i] + b[i+4];
+        bbb[i] = bb[i] + b[i+4];
+    }
+
+    int I_HATE_UNROLLED_LOOPS = 0;
+
+    if (I_HATE_UNROLLED_LOOPS) {
+        /* The compiler probably won't unroll this,
+         * so it's like 80% slower.
+         */
+        for (i=0; i<4; i++) {
+            accum2 = 0;
+
+            unsigned int j;
+            for (j=0; j<=i; j++) {
+                accum2 += widemul(a[j],   b[i-j]);
+                accum1 += widemul(aa[j], bb[i-j]);
+                accum0 += widemul(a[j+4], b[i-j+4]);
+            }
+            for (; j<4; j++) {
+                accum2 += widemul(a[j],   b[i-j+8]);
+                accum1 += widemul(aa[j], bbb[i-j+4]);
+                accum0 += widemul(a[j+4], bb[i-j+4]);
+            }
+
+            accum1 -= accum2;
+            accum0 += accum2;
+
+            c[i]   = ((uint64_t)(accum0)) & mask;
+            c[i+4] = ((uint64_t)(accum1)) & mask;
+
+            accum0 >>= 56;
+            accum1 >>= 56;
+        }
+    } else {
+        accum2  = widemul(a[0],  b[0]);
+        accum1 += widemul(aa[0], bb[0]);
+        accum0 += widemul(a[4],  b[4]);
+
+        accum2 += widemul(a[1],  b[7]);
+        accum1 += widemul(aa[1], bbb[3]);
+        accum0 += widemul(a[5],  bb[3]);
+
+        accum2 += widemul(a[2],  b[6]);
+        accum1 += widemul(aa[2], bbb[2]);
+        accum0 += widemul(a[6],  bb[2]);
+
+        accum2 += widemul(a[3],  b[5]);
+        accum1 += widemul(aa[3], bbb[1]);
+        accum0 += widemul(a[7],  bb[1]);
+
+        accum1 -= accum2;
+        accum0 += accum2;
+
+        c[0] = ((uint64_t)(accum0)) & mask;
+        c[4] = ((uint64_t)(accum1)) & mask;
+
+        accum0 >>= 56;
+        accum1 >>= 56;
+
+        accum2  = widemul(a[0],  b[1]);
+        accum1 += widemul(aa[0], bb[1]);
+        accum0 += widemul(a[4],  b[5]);
+
+        accum2 += widemul(a[1],  b[0]);
+        accum1 += widemul(aa[1], bb[0]);
+        accum0 += widemul(a[5],  b[4]);
+
+        accum2 += widemul(a[2],  b[7]);
+        accum1 += widemul(aa[2], bbb[3]);
+        accum0 += widemul(a[6],  bb[3]);
+
+        accum2 += widemul(a[3],  b[6]);
+        accum1 += widemul(aa[3], bbb[2]);
+        accum0 += widemul(a[7],  bb[2]);
+
+        accum1 -= accum2;
+        accum0 += accum2;
+
+        c[1] = ((uint64_t)(accum0)) & mask;
+        c[5] = ((uint64_t)(accum1)) & mask;
+
+        accum0 >>= 56;
+        accum1 >>= 56;
+
+        accum2  = widemul(a[0],  b[2]);
+        accum1 += widemul(aa[0], bb[2]);
+        accum0 += widemul(a[4],  b[6]);
+
+        accum2 += widemul(a[1],  b[1]);
+        accum1 += widemul(aa[1], bb[1]);
+        accum0 += widemul(a[5],  b[5]);
+
+        accum2 += widemul(a[2],  b[0]);
+        accum1 += widemul(aa[2], bb[0]);
+        accum0 += widemul(a[6],  b[4]);
+
+        accum2 += widemul(a[3],  b[7]);
+        accum1 += widemul(aa[3], bbb[3]);
+        accum0 += widemul(a[7],  bb[3]);
+
+        accum1 -= accum2;
+        accum0 += accum2;
+
+        c[2] = ((uint64_t)(accum0)) & mask;
+        c[6] = ((uint64_t)(accum1)) & mask;
+
+        accum0 >>= 56;
+        accum1 >>= 56;
+
+        accum2  = widemul(a[0],  b[3]);
+        accum1 += widemul(aa[0], bb[3]);
+        accum0 += widemul(a[4],  b[7]);
+
+        accum2 += widemul(a[1],  b[2]);
+        accum1 += widemul(aa[1], bb[2]);
+        accum0 += widemul(a[5],  b[6]);
+
+        accum2 += widemul(a[2],  b[1]);
+        accum1 += widemul(aa[2], bb[1]);
+        accum0 += widemul(a[6],  b[5]);
+
+        accum2 += widemul(a[3],  b[0]);
+        accum1 += widemul(aa[3], bb[0]);
+        accum0 += widemul(a[7],  b[4]);
+
+        accum1 -= accum2;
+        accum0 += accum2;
+
+        c[3] = ((uint64_t)(accum0)) & mask;
+        c[7] = ((uint64_t)(accum1)) & mask;
+
+        accum0 >>= 56;
+        accum1 >>= 56;
+    } /* !I_HATE_UNROLLED_LOOPS */
+
+    accum0 += accum1;
+    accum0 += c[4];
+    accum1 += c[0];
+    c[4] = ((uint64_t)(accum0)) & mask;
+    c[0] = ((uint64_t)(accum1)) & mask;
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+
+    c[5] += ((uint64_t)(accum0));
+    c[1] += ((uint64_t)(accum1));
+}
+
+void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
+    const uint64_t *a = as->limb;
+    uint64_t *c = cs->limb;
+
+    __uint128_t accum0 = 0, accum4 = 0;
+    uint64_t mask = (1ull<<56) - 1;  
+
+    int i;
+    for (i=0; i<4; i++) {
+        accum0 += widemul(b, a[i]);
+        accum4 += widemul(b, a[i+4]);
+        c[i]   = accum0 & mask; accum0 >>= 56;
+        c[i+4] = accum4 & mask; accum4 >>= 56;
+    }
+    
+    accum0 += accum4 + c[4];
+    c[4] = accum0 & mask;
+    c[5] += accum0 >> 56;
+
+    accum4 += c[0];
+    c[0] = accum4 & mask;
+    c[1] += accum4 >> 56;
+}
+
+void gf_sqr (gf_s *__restrict__ cs, const gf as) {
+    const uint64_t *a = as->limb;
+    uint64_t *c = cs->limb;
+
+    __uint128_t accum0 = 0, accum1 = 0, accum2;
+    uint64_t mask = (1ull<<56) - 1;  
+
+    uint64_t aa[4];
+
+    /* For some reason clang doesn't vectorize this without prompting? */
+    unsigned int i;
+    for (i=0; i<4; i++) {
+        aa[i] = a[i] + a[i+4];
+    }
+
+    accum2  = widemul(a[0],a[3]);
+    accum0  = widemul(aa[0],aa[3]);
+    accum1  = widemul(a[4],a[7]);
+
+    accum2 += widemul(a[1], a[2]);
+    accum0 += widemul(aa[1], aa[2]);
+    accum1 += widemul(a[5], a[6]);
+
+    accum0 -= accum2;
+    accum1 += accum2;
+
+    c[3] = ((uint64_t)(accum1))<<1 & mask;
+    c[7] = ((uint64_t)(accum0))<<1 & mask;
+
+    accum0 >>= 55;
+    accum1 >>= 55;
+
+    accum0 += widemul(2*aa[1],aa[3]);
+    accum1 += widemul(2*a[5], a[7]);
+    accum0 += widemul(aa[2], aa[2]);
+    accum1 += accum0;
+
+    accum0 -= widemul(2*a[1], a[3]);
+    accum1 += widemul(a[6], a[6]);
+    
+    accum2 = widemul(a[0],a[0]);
+    accum1 -= accum2;
+    accum0 += accum2;
+
+    accum0 -= widemul(a[2], a[2]);
+    accum1 += widemul(aa[0], aa[0]);
+    accum0 += widemul(a[4], a[4]);
+
+    c[0] = ((uint64_t)(accum0)) & mask;
+    c[4] = ((uint64_t)(accum1)) & mask;
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+
+    accum2  = widemul(2*aa[2],aa[3]);
+    accum0 -= widemul(2*a[2], a[3]);
+    accum1 += widemul(2*a[6], a[7]);
+
+    accum1 += accum2;
+    accum0 += accum2;
+
+    accum2  = widemul(2*a[0],a[1]);
+    accum1 += widemul(2*aa[0], aa[1]);
+    accum0 += widemul(2*a[4], a[5]);
+
+    accum1 -= accum2;
+    accum0 += accum2;
+
+    c[1] = ((uint64_t)(accum0)) & mask;
+    c[5] = ((uint64_t)(accum1)) & mask;
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+
+    accum2  = widemul(aa[3],aa[3]);
+    accum0 -= widemul(a[3], a[3]);
+    accum1 += widemul(a[7], a[7]);
+
+    accum1 += accum2;
+    accum0 += accum2;
+
+    accum2  = widemul(2*a[0],a[2]);
+    accum1 += widemul(2*aa[0], aa[2]);
+    accum0 += widemul(2*a[4], a[6]);
+
+    accum2 += widemul(a[1], a[1]);
+    accum1 += widemul(aa[1], aa[1]);
+    accum0 += widemul(a[5], a[5]);
+
+    accum1 -= accum2;
+    accum0 += accum2;
+
+    c[2] = ((uint64_t)(accum0)) & mask;
+    c[6] = ((uint64_t)(accum1)) & mask;
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+
+    accum0 += c[3];
+    accum1 += c[7];
+    c[3] = ((uint64_t)(accum0)) & mask;
+    c[7] = ((uint64_t)(accum1)) & mask;
+
+    /* we could almost stop here, but it wouldn't be stable, so... */
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+    c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1));
+    c[0] += ((uint64_t)(accum1));
+}
+
diff --git a/crypto/ec/curve448/arch_ref64/f_impl.h b/crypto/ec/curve448/arch_ref64/f_impl.h
new file mode 100644
index 0000000000..05206bf988
--- /dev/null
+++ b/crypto/ec/curve448/arch_ref64/f_impl.h
@@ -0,0 +1,38 @@
+/* Copyright (c) 2014-2016 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#define GF_HEADROOM 9999 /* Everything is reduced anyway */
+#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}
+    
+#define LIMB_PLACE_VALUE(i) 56
+
+void gf_add_RAW (gf out, const gf a, const gf b) {
+    for (unsigned int i=0; i<8; i++) {
+        out->limb[i] = a->limb[i] + b->limb[i];
+    }
+    gf_weak_reduce(out);
+}
+
+void gf_sub_RAW (gf out, const gf a, const gf b) {
+    uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2;
+    for (unsigned int i=0; i<8; i++) {
+        out->limb[i] = a->limb[i] - b->limb[i] + ((i==4) ? co2 : co1);
+    }
+    gf_weak_reduce(out);
+}
+
+void gf_bias (gf a, int amt) {
+    (void) a;
+    (void) amt;
+}
+
+void gf_weak_reduce (gf a) {
+    uint64_t mask = (1ull<<56) - 1;
+    uint64_t tmp = a->limb[7] >> 56;
+    a->limb[4] += tmp;
+    for (unsigned int i=7; i>0; i--) {
+        a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56);
+    }
+    a->limb[0] = (a->limb[0] & mask) + tmp;
+}
diff --git a/crypto/ec/curve448/arch_x86_64/arch_intrinsics.h b/crypto/ec/curve448/arch_x86_64/arch_intrinsics.h
new file mode 100644
index 0000000000..8fcf2c8dd4
--- /dev/null
+++ b/crypto/ec/curve448/arch_x86_64/arch_intrinsics.h
@@ -0,0 +1,305 @@
+/* Copyright (c) 2014-2016 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#ifndef __ARCH_X86_64_ARCH_INTRINSICS_H__
+#define __ARCH_X86_64_ARCH_INTRINSICS_H__
+
+#define ARCH_WORD_BITS 64
+
+#include <stdint.h>
+
+/* FUTURE: autogenerate */
+static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b) {
+  uint64_t c,d;
+  #ifndef __BMI2__
+      __asm__ volatile
+          ("movq %[a], %%rax;"
+           "mulq %[b];"
+           : [c]"=&a"(c), [d]"=d"(d)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rdx;"
+           "mulx %[b], %[c], %[d];"
+           : [c]"=r"(c), [d]"=r"(d)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rdx");
+  #endif
+  return (((__uint128_t)(d))<<64) | c;
+}
+
+static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b) {
+  uint64_t c,d;
+  #ifndef __BMI2__
+      __asm__ volatile
+          ("movq %[a], %%rax;"
+           "mulq %[b];"
+           : [c]"=&a"(c), [d]"=d"(d)
+           : [b]"m"(*b), [a]"r"(a)
+           : "cc");
+  #else
+      __asm__ volatile
+          ("mulx %[b], %[c], %[d];"
+           : [c]"=r"(c), [d]"=r"(d)
+           : [b]"m"(*b), [a]"d"(a));
+  #endif
+  return (((__uint128_t)(d))<<64) | c;
+}
+
+static __inline__ __uint128_t widemul_rr(uint64_t a, uint64_t b) {
+  uint64_t c,d;
+  #ifndef __BMI2__
+      __asm__ volatile
+          ("mulq %[b];"
+           : [c]"=a"(c), [d]"=d"(d)
+           : [b]"r"(b), "a"(a)
+           : "cc");
+  #else
+      __asm__ volatile
+          ("mulx %[b], %[c], %[d];"
+           : [c]"=r"(c), [d]"=r"(d)
+           : [b]"r"(b), [a]"d"(a));
+  #endif
+  return (((__uint128_t)(d))<<64) | c;
+}
+
+static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b) {
+  uint64_t c,d;
+  #ifndef __BMI2__
+      __asm__ volatile
+          ("movq %[a], %%rax; "
+           "addq %%rax, %%rax; "
+           "mulq %[b];"
+           : [c]"=&a"(c), [d]"=d"(d)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rdx;"
+           "leaq (,%%rdx,2), %%rdx;"
+           "mulx %[b], %[c], %[d];"
+           : [c]"=r"(c), [d]"=r"(d)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rdx");
+  #endif
+  return (((__uint128_t)(d))<<64) | c;
+}
+
+static __inline__ void mac(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
+  uint64_t lo = *acc, hi = *acc>>64;
+  
+  #ifdef __BMI2__
+      uint64_t c,d;
+      __asm__ volatile
+          ("movq %[a], %%rdx; "
+           "mulx %[b], %[c], %[d]; "
+           "addq %[c], %[lo]; "
+           "adcq %[d], %[hi]; "
+           : [c]"=&r"(c), [d]"=&r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rdx", "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rax; "
+           "mulq %[b]; "
+           "addq %%rax, %[lo]; "
+           "adcq %%rdx, %[hi]; "
+           : [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rax", "rdx", "cc");
+  #endif
+  
+  *acc = (((__uint128_t)(hi))<<64) | lo;
+}
+
+static __inline__ void macac(__uint128_t *acc, __uint128_t *acc2, const uint64_t *a, const uint64_t *b) {
+  uint64_t lo = *acc, hi = *acc>>64;
+  uint64_t lo2 = *acc2, hi2 = *acc2>>64;
+  
+  #ifdef __BMI2__
+      uint64_t c,d;
+      __asm__ volatile
+          ("movq %[a], %%rdx; "
+           "mulx %[b], %[c], %[d]; "
+           "addq %[c], %[lo]; "
+           "adcq %[d], %[hi]; "
+           "addq %[c], %[lo2]; "
+           "adcq %[d], %[hi2]; "
+           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rdx", "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rax; "
+           "mulq %[b]; "
+           "addq %%rax, %[lo]; "
+           "adcq %%rdx, %[hi]; "
+           "addq %%rax, %[lo2]; "
+           "adcq %%rdx, %[hi2]; "
+           : [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rax", "rdx", "cc");
+  #endif
+  
+  *acc = (((__uint128_t)(hi))<<64) | lo;
+  *acc2 = (((__uint128_t)(hi2))<<64) | lo2;
+}
+
+static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b) {
+  uint64_t lo = *acc, hi = *acc>>64;
+  
+  #ifdef __BMI2__
+      uint64_t c,d;
+      __asm__ volatile
+          ("mulx %[b], %[c], %[d]; "
+           "addq %[c], %[lo]; "
+           "adcq %[d], %[hi]; "
+           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"d"(a)
+           : "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rax; "
+           "mulq %[b]; "
+           "addq %%rax, %[lo]; "
+           "adcq %%rdx, %[hi]; "
+           : [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"r"(a)
+           : "rax", "rdx", "cc");
+  #endif
+  
+  *acc = (((__uint128_t)(hi))<<64) | lo;
+}
+
+static __inline__ void mac_rr(__uint128_t *acc, uint64_t a, const uint64_t b) {
+  uint64_t lo = *acc, hi = *acc>>64;
+  
+  #ifdef __BMI2__
+      uint64_t c,d;
+      __asm__ volatile
+          ("mulx %[b], %[c], %[d]; "
+           "addq %[c], %[lo]; "
+           "adcq %[d], %[hi]; "
+           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"r"(b), [a]"d"(a)
+           : "cc");
+  #else
+      __asm__ volatile
+          ("mulq %[b]; "
+           "addq %%rax, %[lo]; "
+           "adcq %%rdx, %[hi]; "
+           : [lo]"+r"(lo), [hi]"+r"(hi), "+a"(a)
+           : [b]"r"(b)
+           : "rdx", "cc");
+  #endif
+  
+  *acc = (((__uint128_t)(hi))<<64) | lo;
+}
+
+static __inline__ void mac2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
+  uint64_t lo = *acc, hi = *acc>>64;
+  
+  #ifdef __BMI2__
+      uint64_t c,d;
+      __asm__ volatile
+          ("movq %[a], %%rdx; "
+           "addq %%rdx, %%rdx; "
+           "mulx %[b], %[c], %[d]; "
+           "addq %[c], %[lo]; "
+           "adcq %[d], %[hi]; "
+           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rdx", "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rax; "
+           "addq %%rax, %%rax; "
+           "mulq %[b]; "
+           "addq %%rax, %[lo]; "
+           "adcq %%rdx, %[hi]; "
+           : [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rax", "rdx", "cc");
+  #endif
+  
+  *acc = (((__uint128_t)(hi))<<64) | lo;
+}
+
+static __inline__ void msb(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
+  uint64_t lo = *acc, hi = *acc>>64;
+  #ifdef __BMI2__
+      uint64_t c,d;
+      __asm__ volatile
+          ("movq %[a], %%rdx; "
+           "mulx %[b], %[c], %[d]; "
+           "subq %[c], %[lo]; "
+           "sbbq %[d], %[hi]; "
+           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rdx", "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rax; "
+           "mulq %[b]; "
+           "subq %%rax, %[lo]; "
+           "sbbq %%rdx, %[hi]; "
+           : [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rax", "rdx", "cc");
+  #endif
+  *acc = (((__uint128_t)(hi))<<64) | lo;
+}
+
+static __inline__ void msb2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
+  uint64_t lo = *acc, hi = *acc>>64;
+  #ifdef __BMI2__
+      uint64_t c,d;
+      __asm__ volatile
+          ("movq %[a], %%rdx; "
+           "addq %%rdx, %%rdx; "
+           "mulx %[b], %[c], %[d]; "
+           "subq %[c], %[lo]; "
+           "sbbq %[d], %[hi]; "
+           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rdx", "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rax; "
+           "addq %%rax, %%rax; "
+           "mulq %[b]; "
+           "subq %%rax, %[lo]; "
+           "sbbq %%rdx, %[hi]; "
+           : [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rax", "rdx", "cc");
+  #endif
+  *acc = (((__uint128_t)(hi))<<64) | lo;
+  
+}
+
+static __inline__ void mrs(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
+  uint64_t c,d, lo = *acc, hi = *acc>>64;
+  __asm__ volatile
+      ("movq %[a], %%rdx; "
+       "mulx %[b], %[c], %[d]; "
+       "subq %[lo], %[c]; "
+       "sbbq %[hi], %[d]; "
+       : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+       : [b]"m"(*b), [a]"m"(*a)
+       : "rdx", "cc");
+  *acc = (((__uint128_t)(d))<<64) | c;
+}
+
+static __inline__ uint64_t word_is_zero(uint64_t x) {
+  __asm__ volatile("neg %0; sbb %0, %0;" : "+r"(x));
+  return ~x;
+}
+
+static inline uint64_t shrld(__uint128_t x, int n) {
+    return x>>n;
+}
+
+#endif /* __ARCH_X86_64_ARCH_INTRINSICS_H__ */
diff --git a/crypto/ec/curve448/arch_x86_64/f_impl.c b/crypto/ec/curve448/arch_x86_64/f_impl.c
new file mode 100644
index 0000000000..1e1d76d617
--- /dev/null
+++ b/crypto/ec/curve448/arch_x86_64/f_impl.c
@@ -0,0 +1,291 @@
+/* Copyright (c) 2014 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#include "f_field.h"
+
+void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
+    const uint64_t *a = as->limb, *b = bs->limb;
+    uint64_t *c = cs->limb;
+
+    __uint128_t accum0 = 0, accum1 = 0, accum2;
+    uint64_t mask = (1ull<<56) - 1;  
+
+    uint64_t aa[4] VECTOR_ALIGNED, bb[4] VECTOR_ALIGNED, bbb[4] VECTOR_ALIGNED;
+
+    /* For some reason clang doesn't vectorize this without prompting? */
+    unsigned int i;
+    for (i=0; i<sizeof(aa)/sizeof(uint64xn_t); i++) {
+        ((uint64xn_t*)aa)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)(&a[4]))[i];
+        ((uint64xn_t*)bb)[i] = ((const uint64xn_t*)b)[i] + ((const uint64xn_t*)(&b[4]))[i]; 
+        ((uint64xn_t*)bbb)[i] = ((const uint64xn_t*)bb)[i] + ((const uint64xn_t*)(&b[4]))[i];     
+    }
+    /*
+    for (int i=0; i<4; i++) {
+    aa[i] = a[i] + a[i+4];
+    bb[i] = b[i] + b[i+4];
+    }
+    */
+
+    accum2  = widemul(&a[0],&b[3]);
+    accum0  = widemul(&aa[0],&bb[3]);
+    accum1  = widemul(&a[4],&b[7]);
+
+    mac(&accum2, &a[1], &b[2]);
+    mac(&accum0, &aa[1], &bb[2]);
+    mac(&accum1, &a[5], &b[6]);
+
+    mac(&accum2, &a[2], &b[1]);
+    mac(&accum0, &aa[2], &bb[1]);
+    mac(&accum1, &a[6], &b[5]);
+
+    mac(&accum2, &a[3], &b[0]);
+    mac(&accum0, &aa[3], &bb[0]);
+    mac(&accum1, &a[7], &b[4]);
+
+    accum0 -= accum2;
+    accum1 += accum2;
+
+    c[3] = ((uint64_t)(accum1)) & mask;
+    c[7] = ((uint64_t)(accum0)) & mask;
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+    
+    mac(&accum0, &aa[1],&bb[3]);
+    mac(&accum1, &a[5], &b[7]);
+    mac(&accum0, &aa[2], &bb[2]);
+    mac(&accum1, &a[6], &b[6]);
+    mac(&accum0, &aa[3], &bb[1]);
+    accum1 += accum0;
+
+    accum2 = widemul(&a[0],&b[0]);
+    accum1 -= accum2;
+    accum0 += accum2;
+    
+    msb(&accum0, &a[1], &b[3]);
+    msb(&accum0, &a[2], &b[2]);
+    mac(&accum1, &a[7], &b[5]);
+    msb(&accum0, &a[3], &b[1]);
+    mac(&accum1, &aa[0], &bb[0]);
+    mac(&accum0, &a[4], &b[4]);
+
+    c[0] = ((uint64_t)(accum0)) & mask;
+    c[4] = ((uint64_t)(accum1)) & mask;
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+
+    accum2  = widemul(&a[2],&b[7]);
+    mac(&accum0, &a[6], &bb[3]);
+    mac(&accum1, &aa[2], &bbb[3]);
+
+    mac(&accum2, &a[3], &b[6]);
+    mac(&accum0, &a[7], &bb[2]);
+    mac(&accum1, &aa[3], &bbb[2]);
+
+    mac(&accum2, &a[0],&b[1]);
+    mac(&accum1, &aa[0], &bb[1]);
+    mac(&accum0, &a[4], &b[5]);
+
+    mac(&accum2, &a[1], &b[0]);
+    mac(&accum1, &aa[1], &bb[0]);
+    mac(&accum0, &a[5], &b[4]);
+
+    accum1 -= accum2;
+    accum0 += accum2;
+
+    c[1] = ((uint64_t)(accum0)) & mask;
+    c[5] = ((uint64_t)(accum1)) & mask;
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+
+    accum2  = widemul(&a[3],&b[7]);
+    mac(&accum0, &a[7], &bb[3]);
+    mac(&accum1, &aa[3], &bbb[3]);
+
+    mac(&accum2, &a[0],&b[2]);
+    mac(&accum1, &aa[0], &bb[2]);
+    mac(&accum0, &a[4], &b[6]);
+
+    mac(&accum2, &a[1], &b[1]);
+    mac(&accum1, &aa[1], &bb[1]);
+    mac(&accum0, &a[5], &b[5]);
+
+    mac(&accum2, &a[2], &b[0]);
+    mac(&accum1, &aa[2], &bb[0]);
+    mac(&accum0, &a[6], &b[4]);
+
+    accum1 -= accum2;
+    accum0 += accum2;
+
+    c[2] = ((uint64_t)(accum0)) & mask;
+    c[6] = ((uint64_t)(accum1)) & mask;
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+
+    accum0 += c[3];
+    accum1 += c[7];
+    c[3] = ((uint64_t)(accum0)) & mask;
+    c[7] = ((uint64_t)(accum1)) & mask;
+
+    /* we could almost stop here, but it wouldn't be stable, so... */
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+    c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1));
+    c[0] += ((uint64_t)(accum1));
+}
+
+void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
+    const uint64_t *a = as->limb;
+    uint64_t *c = cs->limb;
+
+    __uint128_t accum0, accum4;
+    uint64_t mask = (1ull<<56) - 1;  
+
+    accum0 = widemul_rm(b, &a[0]);
+    accum4 = widemul_rm(b, &a[4]);
+
+    c[0] = accum0 & mask; accum0 >>= 56;
+    c[4] = accum4 & mask; accum4 >>= 56;
+
+    mac_rm(&accum0, b, &a[1]);
+    mac_rm(&accum4, b, &a[5]);
+
+    c[1] = accum0 & mask; accum0 >>= 56;
+    c[5] = accum4 & mask; accum4 >>= 56;
+
+    mac_rm(&accum0, b, &a[2]);
+    mac_rm(&accum4, b, &a[6]);
+
+    c[2] = accum0 & mask; accum0 >>= 56;
+    c[6] = accum4 & mask; accum4 >>= 56;
+
+    mac_rm(&accum0, b, &a[3]);
+    mac_rm(&accum4, b, &a[7]);
+
+    c[3] = accum0 & mask; accum0 >>= 56;
+    c[7] = accum4 & mask; accum4 >>= 56;
+    
+    accum0 += accum4 + c[4];
+    c[4] = accum0 & mask;
+    c[5] += accum0 >> 56;
+
+    accum4 += c[0];
+    c[0] = accum4 & mask;
+    c[1] += accum4 >> 56;
+}
+
+void gf_sqr (gf_s *__restrict__ cs, const gf as) {
+    const uint64_t *a = as->limb;
+    uint64_t *c = cs->limb;
+
+    __uint128_t accum0 = 0, accum1 = 0, accum2;
+    uint64_t mask = (1ull<<56) - 1;  
+
+    uint64_t aa[4] VECTOR_ALIGNED;
+
+    /* For some reason clang doesn't vectorize this without prompting? */
+    unsigned int i;
+    for (i=0; i<sizeof(aa)/sizeof(uint64xn_t); i++) {
+      ((uint64xn_t*)aa)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)(&a[4]))[i];
+    }
+
+    accum2  = widemul(&a[0],&a[3]);
+    accum0  = widemul(&aa[0],&aa[3]);
+    accum1  = widemul(&a[4],&a[7]);
+
+    mac(&accum2, &a[1], &a[2]);
+    mac(&accum0, &aa[1], &aa[2]);
+    mac(&accum1, &a[5], &a[6]);
+
+    accum0 -= accum2;
+    accum1 += accum2;
+
+    c[3] = ((uint64_t)(accum1))<<1 & mask;
+    c[7] = ((uint64_t)(accum0))<<1 & mask;
+
+    accum0 >>= 55;
+    accum1 >>= 55;
+
+    mac2(&accum0, &aa[1],&aa[3]);
+    mac2(&accum1, &a[5], &a[7]);
+    mac(&accum0, &aa[2], &aa[2]);
+    accum1 += accum0;
+
+    msb2(&accum0, &a[1], &a[3]);
+    mac(&accum1, &a[6], &a[6]);
+    
+    accum2 = widemul(&a[0],&a[0]);
+    accum1 -= accum2;
+    accum0 += accum2;
+
+    msb(&accum0, &a[2], &a[2]);
+    mac(&accum1, &aa[0], &aa[0]);
+    mac(&accum0, &a[4], &a[4]);
+
+    c[0] = ((uint64_t)(accum0)) & mask;
+    c[4] = ((uint64_t)(accum1)) & mask;
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+
+    accum2  = widemul2(&aa[2],&aa[3]);
+    msb2(&accum0, &a[2], &a[3]);
+    mac2(&accum1, &a[6], &a[7]);
+
+    accum1 += accum2;
+    accum0 += accum2;
+
+    accum2  = widemul2(&a[0],&a[1]);
+    mac2(&accum1, &aa[0], &aa[1]);
+    mac2(&accum0, &a[4], &a[5]);
+
+    accum1 -= accum2;
+    accum0 += accum2;
+
+    c[1] = ((uint64_t)(accum0)) & mask;
+    c[5] = ((uint64_t)(accum1)) & mask;
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+
+    accum2  = widemul(&aa[3],&aa[3]);
+    msb(&accum0, &a[3], &a[3]);
+    mac(&accum1, &a[7], &a[7]);
+
+    accum1 += accum2;
+    accum0 += accum2;
+
+    accum2  = widemul2(&a[0],&a[2]);
+    mac2(&accum1, &aa[0], &aa[2]);
+    mac2(&accum0, &a[4], &a[6]);
+
+    mac(&accum2, &a[1], &a[1]);
+    mac(&accum1, &aa[1], &aa[1]);
+    mac(&accum0, &a[5], &a[5]);
+
+    accum1 -= accum2;
+    accum0 += accum2;
+
+    c[2] = ((uint64_t)(accum0)) & mask;
+    c[6] = ((uint64_t)(accum1)) & mask;
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+
+    accum0 += c[3];
+    accum1 += c[7];
+    c[3] = ((uint64_t)(accum0)) & mask;
+    c[7] = ((uint64_t)(accum1)) & mask;
+
+    /* we could almost stop here, but it wouldn't be stable, so... */
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+    c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1));
+    c[0] += ((uint64_t)(accum1));
+}
diff --git a/crypto/ec/curve448/arch_x86_64/f_impl.h b/crypto/ec/curve448/arch_x86_64/f_impl.h
new file mode 100644
index 0000000000..a85044a7f4
--- /dev/null
+++ b/crypto/ec/curve448/arch_x86_64/f_impl.h
@@ -0,0 +1,65 @@
+/* Copyright (c) 2014-2016 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#define GF_HEADROOM 60
+#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}
+#define LIMB_PLACE_VALUE(i) 56
+
+void gf_add_RAW (gf out, const gf a, const gf b) {
+    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
+        ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
+    }
+    /*
+    unsigned int i;
+    for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
+        out->limb[i] = a->limb[i] + b->limb[i];
+    }
+    */
+}
+
+void gf_sub_RAW (gf out, const gf a, const gf b) {
+    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
+        ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
+    }
+    /*
+    unsigned int i;
+    for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
+        out->limb[i] = a->limb[i] - b->limb[i];
+    }
+    */
+}
+
+void gf_bias (gf a, int amt) {
+    uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt;
+    
+#if __AVX2__
+    uint64x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1};
+    uint64x4_t *aa = (uint64x4_t*) a;
+    aa[0] += lo;
+    aa[1] += hi;
+#elif __SSE2__
+    uint64x2_t lo = {co1,co1}, hi = {co2,co1};
+    uint64x2_t *aa = (uint64x2_t*) a;
+    aa[0] += lo;
+    aa[1] += lo;
+    aa[2] += hi;
+    aa[3] += lo;
+#else
+    for (unsigned int i=0; i<sizeof(*a)/sizeof(uint64_t); i++) {
+        a->limb[i] += (i==4) ? co2 : co1;
+    }
+#endif
+}
+
+void gf_weak_reduce (gf a) {
+    /* PERF: use pshufb/palignr if anyone cares about speed of this */
+    uint64_t mask = (1ull<<56) - 1;
+    uint64_t tmp = a->limb[7] >> 56;
+    a->limb[4] += tmp;
+    for (unsigned int i=7; i>0; i--) {
+        a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56);
+    }
+    a->limb[0] = (a->limb[0] & mask) + tmp;
+}
+
diff --git a/crypto/ec/curve448/constant_time.h b/crypto/ec/curve448/constant_time.h
new file mode 100644
index 0000000000..025ffe1729
--- /dev/null
+++ b/crypto/ec/curve448/constant_time.h
@@ -0,0 +1,362 @@
+/**
+ * @file constant_time.h
+ * @copyright
+ *   Copyright (c) 2014 Cryptography Research, Inc.  \n
+ *   Released under the MIT License.  See LICENSE.txt for license information.
+ * @author Mike Hamburg
+ *
+ * @brief Constant-time routines.
+ */
+
+#ifndef __CONSTANT_TIME_H__
+#define __CONSTANT_TIME_H__ 1
+
+#include "word.h"
+#include <string.h>
+
+/*
+ * Constant-time operations on hopefully-compile-time-sized memory
+ * regions.  Needed for flexibility / demagication: not all fields
+ * have sizes which are multiples of the vector width, necessitating
+ * a change from the Ed448 versions.
+ *
+ * These routines would be much simpler to define at the byte level,
+ * but if not vectorized they would be a significant fraction of the
+ * runtime.  Eg on NEON-less ARM, constant_time_lookup is like 15% of
+ * signing time, vs 6% on Haswell with its fancy AVX2 vectors.
+ *
+ * If the compiler could do a good job of autovectorizing the code,
+ * we could just leave it with the byte definition.  But that's unlikely
+ * on most deployed compilers, especially if you consider that pcmpeq[size]
+ * is much faster than moving a scalar to the vector unit (which is what
+ * a naive autovectorizer will do with constant_time_lookup on Intel).
+ *
+ * Instead, we're putting our trust in the loop unroller and unswitcher.
+ */
+
+
+/**
+ * Unaligned big (vector?) register.
+ */
+typedef struct {
+    big_register_t unaligned;
+} __attribute__((packed)) unaligned_br_t;
+
+/**
+ * Unaligned word register, for architectures where that matters.
+ */
+typedef struct {
+    word_t unaligned;
+} __attribute__((packed)) unaligned_word_t;
+
+/**
+ * @brief Constant-time conditional swap.
+ *
+ * If doswap, then swap elem_bytes between *a and *b.
+ *
+ * *a and *b must not alias.  Also, they must be at least as aligned
+ * as their sizes, if the CPU cares about that sort of thing.
+ */
+static __inline__ void
+__attribute__((unused,always_inline))
+constant_time_cond_swap (
+    void *__restrict__ a_,
+    void *__restrict__ b_,
+    word_t elem_bytes,
+    mask_t doswap
+) {
+    word_t k;
+    unsigned char *a = (unsigned char *)a_;
+    unsigned char *b = (unsigned char *)b_;
+    
+    big_register_t br_mask = br_set_to_mask(doswap);
+    for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
+        if (elem_bytes % sizeof(big_register_t)) {
+            /* unaligned */
+            big_register_t xor =
+                ((unaligned_br_t*)(&a[k]))->unaligned
+              ^ ((unaligned_br_t*)(&b[k]))->unaligned;
+            xor &= br_mask;
+            ((unaligned_br_t*)(&a[k]))->unaligned ^= xor;
+            ((unaligned_br_t*)(&b[k]))->unaligned ^= xor;
+        } else {
+            /* aligned */
+            big_register_t xor =
+                *((big_register_t*)(&a[k]))
+              ^ *((big_register_t*)(&b[k]));
+            xor &= br_mask;
+            *((big_register_t*)(&a[k])) ^= xor;
+            *((big_register_t*)(&b[k])) ^= xor;
+        }
+    }
+
+    if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
+        for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
+            if (elem_bytes % sizeof(word_t)) {
+                /* unaligned */
+                word_t xor =
+                    ((unaligned_word_t*)(&a[k]))->unaligned
+                  ^ ((unaligned_word_t*)(&b[k]))->unaligned;
+                xor &= doswap;
+                ((unaligned_word_t*)(&a[k]))->unaligned ^= xor;
+                ((unaligned_word_t*)(&b[k]))->unaligned ^= xor;
+            } else {
+                /* aligned */
+                word_t xor =
+                    *((word_t*)(&a[k]))
+                  ^ *((word_t*)(&b[k]));
+                xor &= doswap;
+                *((word_t*)(&a[k])) ^= xor;
+                *((word_t*)(&b[k])) ^= xor;
+            }
+        }
+    }
+    
+    if (elem_bytes % sizeof(word_t)) {
+        for (; k<elem_bytes; k+=1) {
+            unsigned char xor = a[k] ^ b[k];
+            xor &= doswap;
+            a[k] ^= xor;
+            b[k] ^= xor;
+        }
+    }
+}
+
+/**
+ * @brief Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
+ *
+ * The table must be at least as aligned as elem_bytes.  The output must be word aligned,
+ * and if the input size is vector aligned it must also be vector aligned.
+ *
+ * The table and output must not alias.
+ */
+static __inline__ void
+__attribute__((unused,always_inline))
+constant_time_lookup (
+    void *__restrict__ out_,
+    const void *table_,
+    word_t elem_bytes,
+    word_t n_table,
+    word_t idx
+) {
+    big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
+    
+    /* Can't do pointer arithmetic on void* */
+    unsigned char *out = (unsigned char *)out_;
+    const unsigned char *table = (const unsigned char *)table_;
+    word_t j,k;
+    
+    memset(out, 0, elem_bytes);
+    for (j=0; j<n_table; j++, big_i-=big_one) {        
+        big_register_t br_mask = br_is_zero(big_i);
+        for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
+            if (elem_bytes % sizeof(big_register_t)) {
+                /* unaligned */
+                ((unaligned_br_t *)(out+k))->unaligned
+			|= br_mask & ((const unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned;
+            } else {
+                /* aligned */
+                *(big_register_t *)(out+k) |= br_mask & *(const big_register_t*)(&table[k+j*elem_bytes]);
+            }
+        }
+
+        word_t mask = word_is_zero(idx^j);
+        if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
+            for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
+                if (elem_bytes % sizeof(word_t)) {
+                    /* input unaligned, output aligned */
+                    *(word_t *)(out+k) |= mask & ((const unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned;
+                } else {
+                    /* aligned */
+                    *(word_t *)(out+k) |= mask & *(const word_t*)(&table[k+j*elem_bytes]);
+                }
+            }
+        }
+        
+        if (elem_bytes % sizeof(word_t)) {
+            for (; k<elem_bytes; k+=1) {
+                out[k] |= mask & table[k+j*elem_bytes];
+            }
+        }
+    }
+}
+
+/**
+ * @brief Constant-time equivalent of memcpy(table + elem_bytes*idx, in, elem_bytes);
+ *
+ * The table must be at least as aligned as elem_bytes.  The input must be word aligned,
+ * and if the output size is vector aligned it must also be vector aligned.
+ *
+ * The table and input must not alias.
+ */
+static __inline__ void
+__attribute__((unused,always_inline))
+constant_time_insert (
+    void *__restrict__ table_,
+    const void *in_,
+    word_t elem_bytes,
+    word_t n_table,
+    word_t idx
+) {
+    big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
+    
+    /* Can't do pointer arithmetic on void* */
+    const unsigned char *in = (const unsigned char *)in_;
+    unsigned char *table = (unsigned char *)table_;
+    word_t j,k;
+    
+    for (j=0; j<n_table; j++, big_i-=big_one) {        
+        big_register_t br_mask = br_is_zero(big_i);
+        for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
+            if (elem_bytes % sizeof(big_register_t)) {
+                /* unaligned */
+                ((unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned
+                    = ( ((unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned & ~br_mask )
+                    | ( ((const unaligned_br_t *)(in+k))->unaligned & br_mask );
+            } else {
+                /* aligned */
+                *(big_register_t*)(&table[k+j*elem_bytes])
+                    = ( *(big_register_t*)(&table[k+j*elem_bytes]) & ~br_mask )
+                    | ( *(const big_register_t *)(in+k) & br_mask );
+            }
+        }
+
+        word_t mask = word_is_zero(idx^j);
+        if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
+            for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
+                if (elem_bytes % sizeof(word_t)) {
+                    /* output unaligned, input aligned */
+                    ((unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned
+                        = ( ((unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned & ~mask )
+                        | ( *(const word_t *)(in+k) & mask );
+                } else {
+                    /* aligned */
+                    *(word_t*)(&table[k+j*elem_bytes])
+                        = ( *(word_t*)(&table[k+j*elem_bytes]) & ~mask )
+                        | ( *(const word_t *)(in+k) & mask );
+                }
+            }
+        }
+        
+        if (elem_bytes % sizeof(word_t)) {
+            for (; k<elem_bytes; k+=1) {
+                table[k+j*elem_bytes]
+                    = ( table[k+j*elem_bytes] & ~mask )
+                    | ( in[k] & mask );
+            }
+        }
+    }
+}
+
+/**
+ * @brief Constant-time a = b&mask.
+ *
+ * The input and output must be at least as aligned as elem_bytes.
+ */
+static __inline__ void
+__attribute__((unused,always_inline))
+constant_time_mask (
+    void * a_,
+    const void *b_,
+    word_t elem_bytes,
+    mask_t mask
+) {
+    unsigned char *a = (unsigned char *)a_;
+    const unsigned char *b = (const unsigned char *)b_;
+    
+    word_t k;
+    big_register_t br_mask = br_set_to_mask(mask);
+    for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
+        if (elem_bytes % sizeof(big_register_t)) {
+            /* unaligned */
+            ((unaligned_br_t*)(&a[k]))->unaligned = br_mask & ((const unaligned_br_t*)(&b[k]))->unaligned;
+        } else {
+            /* aligned */
+            *(big_register_t *)(a+k) = br_mask & *(const big_register_t*)(&b[k]);
+        }
+    }
+
+    if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
+        for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
+            if (elem_bytes % sizeof(word_t)) {
+                /* unaligned */
+                ((unaligned_word_t*)(&a[k]))->unaligned = mask & ((const unaligned_word_t*)(&b[k]))->unaligned;
+            } else {
+                /* aligned */
+                *(word_t *)(a+k) = mask & *(const word_t*)(&b[k]);
+            }
+        }
+    }
+    
+    if (elem_bytes % sizeof(word_t)) {
+        for (; k<elem_bytes; k+=1) {
+            a[k] = mask & b[k];
+        }
+    }
+}
+
+/**
+ * @brief Constant-time a = mask ? bTrue : bFalse.
+ *
+ * The input and output must be at least as aligned as alignment_bytes
+ * or their size, whichever is smaller.
+ *
+ * Note that the output is not __restrict__, but if it overlaps either
+ * input, it must be equal and not partially overlap.
+ */
+static __inline__ void
+__attribute__((unused,always_inline))
+constant_time_select (
+    void *a_,
+    const void *bFalse_,
+    const void *bTrue_,
+    word_t elem_bytes,
+    mask_t mask,
+    size_t alignment_bytes
+) {
+    unsigned char *a = (unsigned char *)a_;
+    const unsigned char *bTrue = (const unsigned char *)bTrue_;
+    const unsigned char *bFalse = (const unsigned char *)bFalse_;
+    
+    alignment_bytes |= elem_bytes;
+
+    word_t k;
+    big_register_t br_mask = br_set_to_mask(mask);
+    for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
+        if (alignment_bytes % sizeof(big_register_t)) {
+            /* unaligned */
+            ((unaligned_br_t*)(&a[k]))->unaligned =
+		  ( br_mask & ((const unaligned_br_t*)(&bTrue [k]))->unaligned)
+		| (~br_mask & ((const unaligned_br_t*)(&bFalse[k]))->unaligned);
+        } else {
+            /* aligned */
+            *(big_register_t *)(a+k) =
+		  ( br_mask & *(const big_register_t*)(&bTrue [k]))
+		| (~br_mask & *(const big_register_t*)(&bFalse[k]));
+        }
+    }
+
+    if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
+        for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
+            if (alignment_bytes % sizeof(word_t)) {
+                /* unaligned */
+                ((unaligned_word_t*)(&a[k]))->unaligned =
+		    ( mask & ((const unaligned_word_t*)(&bTrue [k]))->unaligned)
+		  | (~mask & ((const unaligned_word_t*)(&bFalse[k]))->unaligned);
+            } else {
+                /* aligned */
+                *(word_t *)(a+k) =
+		    ( mask & *(const word_t*)(&bTrue [k]))
+		  | (~mask & *(const word_t*)(&bFalse[k]));
+            }
+        }
+    }
+    
+    if (elem_bytes % sizeof(word_t)) {
+        for (; k<elem_bytes; k+=1) {
+            a[k] = ( mask & bTrue[k]) | (~mask & bFalse[k]);
+        }
+    }
+}
+
+#endif /* __CONSTANT_TIME_H__ */
diff --git a/crypto/ec/curve448/decaf.c b/crypto/ec/curve448/decaf.c
new file mode 100644
index 0000000000..3fdc491db6
--- /dev/null
+++ b/crypto/ec/curve448/decaf.c
@@ -0,0 +1,1598 @@
+/**
+ * @file ed448goldilocks/decaf.c
+ * @author Mike Hamburg
+ *
+ * @copyright
+ *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
+ *   Released under the MIT License.  See LICENSE.txt for license information.
+ *
+ * @brief Decaf high-level functions.
+ *
+ * @warning This file was automatically generated in Python.
+ * Please do not edit it.
+ */
+#define _XOPEN_SOURCE 600 /* for posix_memalign */
+#include "word.h"
+#include "field.h"
+
+#include <decaf.h>
+#include <decaf/ed448.h>
+
+/* Template stuff */
+#define API_NS(_id) decaf_448_##_id
+#define SCALAR_BITS DECAF_448_SCALAR_BITS
+#define SCALAR_SER_BYTES DECAF_448_SCALAR_BYTES
+#define SCALAR_LIMBS DECAF_448_SCALAR_LIMBS
+#define scalar_t API_NS(scalar_t)
+#define point_t API_NS(point_t)
+#define precomputed_s API_NS(precomputed_s)
+#define IMAGINE_TWIST 0
+#define COFACTOR 4
+
+/* Comb config: number of combs, n, t, s. */
+#define COMBS_N 5
+#define COMBS_T 5
+#define COMBS_S 18
+#define DECAF_WINDOW_BITS 5
+#define DECAF_WNAF_FIXED_TABLE_BITS 5
+#define DECAF_WNAF_VAR_TABLE_BITS 3
+
+#define EDDSA_USE_SIGMA_ISOGENY 0
+
+static const int EDWARDS_D = -39081;
+static const scalar_t point_scalarmul_adjustment = {{{
+    SC_LIMB(0xc873d6d54a7bb0cf), SC_LIMB(0xe933d8d723a70aad), SC_LIMB(0xbb124b65129c96fd), SC_LIMB(0x00000008335dc163)
+}}}, precomputed_scalarmul_adjustment = {{{
+    SC_LIMB(0xc873d6d54a7bb0cf), SC_LIMB(0xe933d8d723a70aad), SC_LIMB(0xbb124b65129c96fd), SC_LIMB(0x00000008335dc163)
+}}};
+
+const uint8_t decaf_x448_base_point[DECAF_X448_PUBLIC_BYTES] = { 0x05 };
+
+#define RISTRETTO_FACTOR DECAF_448_RISTRETTO_FACTOR
+const gf RISTRETTO_FACTOR = {{{
+    0x42ef0f45572736, 0x7bf6aa20ce5296, 0xf4fd6eded26033, 0x968c14ba839a66, 0xb8d54b64a2d780, 0x6aa0a1f1a7b8a5, 0x683bf68d722fa2, 0x22d962fbeb24f7
+}}};
+
+#if IMAGINE_TWIST
+#define TWISTED_D (-(EDWARDS_D))
+#else
+#define TWISTED_D ((EDWARDS_D)-1)
+#endif
+
+#if TWISTED_D < 0
+#define EFF_D (-(TWISTED_D))
+#define NEG_D 1
+#else
+#define EFF_D TWISTED_D
+#define NEG_D 0
+#endif
+
+/* End of template stuff */
+
+/* Sanity */
+#if (COFACTOR == 8) && !IMAGINE_TWIST && !UNSAFE_CURVE_HAS_POINTS_AT_INFINITY
+/* FUTURE MAGIC: Curve41417 doesn't have these properties. */
+#error "Currently require IMAGINE_TWIST (and thus p=5 mod 8) for cofactor 8"
+        /* OK, but why?
+         * Two reasons: #1: There are bugs when COFACTOR == && IMAGINE_TWIST
+         # #2: 
+         */
+#endif
+
+#if IMAGINE_TWIST && (P_MOD_8 != 5)
+    #error "Cannot use IMAGINE_TWIST except for p == 5 mod 8"
+#endif
+
+#if (COFACTOR != 8) && (COFACTOR != 4)
+    #error "COFACTOR must be 4 or 8"
+#endif
+ 
+#if IMAGINE_TWIST
+    extern const gf SQRT_MINUS_ONE;
+#endif
+
+#define WBITS DECAF_WORD_BITS /* NB this may be different from ARCH_WORD_BITS */
+
+extern const point_t API_NS(point_base);
+
+/* Projective Niels coordinates */
+typedef struct { gf a, b, c; } niels_s, niels_t[1];
+typedef struct { niels_t n; gf z; } VECTOR_ALIGNED pniels_s, pniels_t[1];
+
+/* Precomputed base */
+struct precomputed_s { niels_t table [COMBS_N<<(COMBS_T-1)]; };
+
+extern const gf API_NS(precomputed_base_as_fe)[];
+const precomputed_s *API_NS(precomputed_base) =
+    (const precomputed_s *) &API_NS(precomputed_base_as_fe);
+
+const size_t API_NS(sizeof_precomputed_s) = sizeof(precomputed_s);
+const size_t API_NS(alignof_precomputed_s) = sizeof(big_register_t);
+
+/** Inverse. */
+static void
+gf_invert(gf y, const gf x, int assert_nonzero) {
+    gf t1, t2;
+    gf_sqr(t1, x); // o^2
+    mask_t ret = gf_isr(t2, t1); // +-1/sqrt(o^2) = +-1/o
+    (void)ret;
+    if (assert_nonzero) assert(ret);
+    gf_sqr(t1, t2);
+    gf_mul(t2, t1, x); // not direct to y in case of alias.
+    gf_copy(y, t2);
+}
+
+/** identity = (0,1) */
+const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}};
+
+/* Predeclare because not static: called by elligator */
+void API_NS(deisogenize) (
+    gf_s *__restrict__ s,
+    gf_s *__restrict__ inv_el_sum,
+    gf_s *__restrict__ inv_el_m1,
+    const point_t p,
+    mask_t toggle_s,
+    mask_t toggle_altx,
+    mask_t toggle_rotation
+);
+
+void API_NS(deisogenize) (
+    gf_s *__restrict__ s,
+    gf_s *__restrict__ inv_el_sum,
+    gf_s *__restrict__ inv_el_m1,
+    const point_t p,
+    mask_t toggle_s,
+    mask_t toggle_altx,
+    mask_t toggle_rotation
+) {
+#if COFACTOR == 4 && !IMAGINE_TWIST
+    (void)toggle_rotation; /* Only applies to cofactor 8 */
+    gf t1;
+    gf_s *t2 = s, *t3=inv_el_sum, *t4=inv_el_m1;
+    
+    gf_add(t1,p->x,p->t);
+    gf_sub(t2,p->x,p->t);
+    gf_mul(t3,t1,t2); /* t3 = num */
+    gf_sqr(t2,p->x);
+    gf_mul(t1,t2,t3);
+    gf_mulw(t2,t1,-1-TWISTED_D); /* -x^2 * (a-d) * num */
+    gf_isr(t1,t2);    /* t1 = isr */
+    gf_mul(t2,t1,t3); /* t2 = ratio */
+    gf_mul(t4,t2,RISTRETTO_FACTOR);
+    mask_t negx = gf_lobit(t4) ^ toggle_altx;
+    gf_cond_neg(t2, negx);
+    gf_mul(t3,t2,p->z);
+    gf_sub(t3,t3,p->t);
+    gf_mul(t2,t3,p->x);
+    gf_mulw(t4,t2,-1-TWISTED_D);
+    gf_mul(s,t4,t1);
+    mask_t lobs = gf_lobit(s);
+    gf_cond_neg(s,lobs);
+    gf_copy(inv_el_m1,p->x);
+    gf_cond_neg(inv_el_m1,~lobs^negx^toggle_s);
+    gf_add(inv_el_m1,inv_el_m1,p->t);
+    
+#elif COFACTOR == 8 && IMAGINE_TWIST
+    /* More complicated because of rotation */
+    gf t1,t2,t3,t4,t5;
+    gf_add(t1,p->z,p->y);
+    gf_sub(t2,p->z,p->y);
+    gf_mul(t3,t1,t2);      /* t3 = num */
+    gf_mul(t2,p->x,p->y);  /* t2 = den */
+    gf_sqr(t1,t2);
+    gf_mul(t4,t1,t3);
+    gf_mulw(t1,t4,-1-TWISTED_D);
+    gf_isr(t4,t1);         /* isqrt(num*(a-d)*den^2) */
+    gf_mul(t1,t2,t4);
+    gf_mul(t2,t1,RISTRETTO_FACTOR); /* t2 = "iden" in ristretto.sage */
+    gf_mul(t1,t3,t4);                 /* t1 = "inum" in ristretto.sage */
+
+    /* Calculate altxy = iden*inum*i*t^2*(d-a) */
+    gf_mul(t3,t1,t2);
+    gf_mul_i(t4,t3);
+    gf_mul(t3,t4,p->t);
+    gf_mul(t4,t3,p->t);
+    gf_mulw(t3,t4,TWISTED_D+1);      /* iden*inum*i*t^2*(d-1) */
+    mask_t rotate = toggle_rotation ^ gf_lobit(t3);
+    
+    /* Rotate if altxy is negative */
+    gf_cond_swap(t1,t2,rotate);
+    gf_mul_i(t4,p->x);
+    gf_cond_sel(t4,p->y,t4,rotate);  /* t4 = "fac" = ix if rotate, else y */
+    
+    gf_mul_i(t5,RISTRETTO_FACTOR); /* t5 = imi */
+    gf_mul(t3,t5,t2);                /* iden * imi */
+    gf_mul(t2,t5,t1);
+    gf_mul(t5,t2,p->t);              /* "altx" = iden*imi*t */
+    mask_t negx = gf_lobit(t5) ^ toggle_altx;
+    
+    gf_cond_neg(t1,negx^rotate);
+    gf_mul(t2,t1,p->z);
+    gf_add(t2,t2,ONE);
+    gf_mul(inv_el_sum,t2,t4);
+    gf_mul(s,inv_el_sum,t3);
+    
+    mask_t negs = gf_lobit(s);
+    gf_cond_neg(s,negs);
+    
+    mask_t negz = ~negs ^ toggle_s ^ negx;
+    gf_copy(inv_el_m1,p->z);
+    gf_cond_neg(inv_el_m1,negz);
+    gf_sub(inv_el_m1,inv_el_m1,t4);
+#else
+#error "Cofactor must be 4 (with no IMAGINE_TWIST) or 8 (with IMAGINE_TWIST)"
+#endif
+}
+
+void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) {
+    gf s,ie1,ie2;
+    API_NS(deisogenize)(s,ie1,ie2,p,0,0,0);
+    gf_serialize(ser,s,1);
+}
+
+decaf_error_t API_NS(point_decode) (
+    point_t p,
+    const unsigned char ser[SER_BYTES],
+    decaf_bool_t allow_identity
+) {
+    gf s, s2, num, tmp;
+    gf_s *tmp2=s2, *ynum=p->z, *isr=p->x, *den=p->t;
+    
+    mask_t succ = gf_deserialize(s, ser, 1, 0);
+    succ &= bool_to_mask(allow_identity) | ~gf_eq(s, ZERO);
+    succ &= ~gf_lobit(s);
+    
+    gf_sqr(s2,s);                  /* s^2 = -as^2 */
+#if IMAGINE_TWIST
+    gf_sub(s2,ZERO,s2);            /* -as^2 */
+#endif
+    gf_sub(den,ONE,s2);            /* 1+as^2 */
+    gf_add(ynum,ONE,s2);           /* 1-as^2 */
+    gf_mulw(num,s2,-4*TWISTED_D);
+    gf_sqr(tmp,den);               /* tmp = den^2 */
+    gf_add(num,tmp,num);           /* num = den^2 - 4*d*s^2 */
+    gf_mul(tmp2,num,tmp);          /* tmp2 = num*den^2 */
+    succ &= gf_isr(isr,tmp2);      /* isr = 1/sqrt(num*den^2) */
+    gf_mul(tmp,isr,den);           /* isr*den */
+    gf_mul(p->y,tmp,ynum);         /* isr*den*(1-as^2) */
+    gf_mul(tmp2,tmp,s);            /* s*isr*den */
+    gf_add(tmp2,tmp2,tmp2);        /* 2*s*isr*den */
+    gf_mul(tmp,tmp2,isr);          /* 2*s*isr^2*den */
+    gf_mul(p->x,tmp,num);          /* 2*s*isr^2*den*num */
+    gf_mul(tmp,tmp2,RISTRETTO_FACTOR); /* 2*s*isr*den*magic */
+    gf_cond_neg(p->x,gf_lobit(tmp)); /* flip x */
+    
+#if COFACTOR==8
+    /* Additionally check y != 0 and x*y*isomagic nonegative */
+    succ &= ~gf_eq(p->y,ZERO);
+    gf_mul(tmp,p->x,p->y);
+    gf_mul(tmp2,tmp,RISTRETTO_FACTOR);
+    succ &= ~gf_lobit(tmp2);
+#endif
+
+#if IMAGINE_TWIST
+    gf_copy(tmp,p->x);
+    gf_mul_i(p->x,tmp);
+#endif
+
+    /* Fill in z and t */
+    gf_copy(p->z,ONE);
+    gf_mul(p->t,p->x,p->y);
+    
+    assert(API_NS(point_valid)(p) | ~succ);
+    return decaf_succeed_if(mask_to_bool(succ));
+}
+
+void API_NS(point_sub) (
+    point_t p,
+    const point_t q,
+    const point_t r
+) {
+    gf a, b, c, d;
+    gf_sub_nr ( b, q->y, q->x ); /* 3+e */
+    gf_sub_nr ( d, r->y, r->x ); /* 3+e */
+    gf_add_nr ( c, r->y, r->x ); /* 2+e */
+    gf_mul ( a, c, b );
+    gf_add_nr ( b, q->y, q->x ); /* 2+e */
+    gf_mul ( p->y, d, b );
+    gf_mul ( b, r->t, q->t );
+    gf_mulw ( p->x, b, 2*EFF_D );
+    gf_add_nr ( b, a, p->y );    /* 2+e */
+    gf_sub_nr ( c, p->y, a );    /* 3+e */
+    gf_mul ( a, q->z, r->z );
+    gf_add_nr ( a, a, a );       /* 2+e */
+    if (GF_HEADROOM <= 3) gf_weak_reduce(a); /* or 1+e */
+#if NEG_D
+    gf_sub_nr ( p->y, a, p->x ); /* 4+e or 3+e */
+    gf_add_nr ( a, a, p->x );    /* 3+e or 2+e */
+#else
+    gf_add_nr ( p->y, a, p->x ); /* 3+e or 2+e */
+    gf_sub_nr ( a, a, p->x );    /* 4+e or 3+e */
+#endif
+    gf_mul ( p->z, a, p->y );
+    gf_mul ( p->x, p->y, c );
+    gf_mul ( p->y, a, b );
+    gf_mul ( p->t, b, c );
+}
+    
+void API_NS(point_add) (
+    point_t p,
+    const point_t q,
+    const point_t r
+) {
+    gf a, b, c, d;
+    gf_sub_nr ( b, q->y, q->x ); /* 3+e */
+    gf_sub_nr ( c, r->y, r->x ); /* 3+e */
+    gf_add_nr ( d, r->y, r->x ); /* 2+e */
+    gf_mul ( a, c, b );
+    gf_add_nr ( b, q->y, q->x ); /* 2+e */
+    gf_mul ( p->y, d, b );
+    gf_mul ( b, r->t, q->t );
+    gf_mulw ( p->x, b, 2*EFF_D );
+    gf_add_nr ( b, a, p->y );    /* 2+e */
+    gf_sub_nr ( c, p->y, a );    /* 3+e */
+    gf_mul ( a, q->z, r->z );
+    gf_add_nr ( a, a, a );       /* 2+e */
+    if (GF_HEADROOM <= 3) gf_weak_reduce(a); /* or 1+e */
+#if NEG_D
+    gf_add_nr ( p->y, a, p->x ); /* 3+e or 2+e */
+    gf_sub_nr ( a, a, p->x );    /* 4+e or 3+e */
+#else
+    gf_sub_nr ( p->y, a, p->x ); /* 4+e or 3+e */
+    gf_add_nr ( a, a, p->x );    /* 3+e or 2+e */
+#endif
+    gf_mul ( p->z, a, p->y );
+    gf_mul ( p->x, p->y, c );
+    gf_mul ( p->y, a, b );
+    gf_mul ( p->t, b, c );
+}
+
+static DECAF_NOINLINE void
+point_double_internal (
+    point_t p,
+    const point_t q,
+    int before_double
+) {
+    gf a, b, c, d;
+    gf_sqr ( c, q->x );
+    gf_sqr ( a, q->y );
+    gf_add_nr ( d, c, a );             /* 2+e */
+    gf_add_nr ( p->t, q->y, q->x );    /* 2+e */
+    gf_sqr ( b, p->t );
+    gf_subx_nr ( b, b, d, 3 );         /* 4+e */
+    gf_sub_nr ( p->t, a, c );          /* 3+e */
+    gf_sqr ( p->x, q->z );
+    gf_add_nr ( p->z, p->x, p->x );    /* 2+e */
+    gf_subx_nr ( a, p->z, p->t, 4 );   /* 6+e */
+    if (GF_HEADROOM == 5) gf_weak_reduce(a); /* or 1+e */
+    gf_mul ( p->x, a, b );
+    gf_mul ( p->z, p->t, a );
+    gf_mul ( p->y, p->t, d );
+    if (!before_double) gf_mul ( p->t, b, d );
+}
+
+void API_NS(point_double)(point_t p, const point_t q) {
+    point_double_internal(p,q,0);
+}
+
+void API_NS(point_negate) (
+   point_t nega,
+   const point_t a
+) {
+    gf_sub(nega->x, ZERO, a->x);
+    gf_copy(nega->y, a->y);
+    gf_copy(nega->z, a->z);
+    gf_sub(nega->t, ZERO, a->t);
+}
+
+/* Operations on [p]niels */
+static DECAF_INLINE void
+cond_neg_niels (
+    niels_t n,
+    mask_t neg
+) {
+    gf_cond_swap(n->a, n->b, neg);
+    gf_cond_neg(n->c, neg);
+}
+
+static DECAF_NOINLINE void pt_to_pniels (
+    pniels_t b,
+    const point_t a
+) {
+    gf_sub ( b->n->a, a->y, a->x );
+    gf_add ( b->n->b, a->x, a->y );
+    gf_mulw ( b->n->c, a->t, 2*TWISTED_D );
+    gf_add ( b->z, a->z, a->z );
+}
+
+static DECAF_NOINLINE void pniels_to_pt (
+    point_t e,
+    const pniels_t d
+) {
+    gf eu;
+    gf_add ( eu, d->n->b, d->n->a );
+    gf_sub ( e->y, d->n->b, d->n->a );
+    gf_mul ( e->t, e->y, eu);
+    gf_mul ( e->x, d->z, e->y );
+    gf_mul ( e->y, d->z, eu );
+    gf_sqr ( e->z, d->z );
+}
+
+static DECAF_NOINLINE void
+niels_to_pt (
+    point_t e,
+    const niels_t n
+) {
+    gf_add ( e->y, n->b, n->a );
+    gf_sub ( e->x, n->b, n->a );
+    gf_mul ( e->t, e->y, e->x );
+    gf_copy ( e->z, ONE );
+}
+
+static DECAF_NOINLINE void
+add_niels_to_pt (
+    point_t d,
+    const niels_t e,
+    int before_double
+) {
+    gf a, b, c;
+    gf_sub_nr ( b, d->y, d->x ); /* 3+e */
+    gf_mul ( a, e->a, b );
+    gf_add_nr ( b, d->x, d->y ); /* 2+e */
+    gf_mul ( d->y, e->b, b );
+    gf_mul ( d->x, e->c, d->t );
+    gf_add_nr ( c, a, d->y );    /* 2+e */
+    gf_sub_nr ( b, d->y, a );    /* 3+e */
+    gf_sub_nr ( d->y, d->z, d->x ); /* 3+e */
+    gf_add_nr ( a, d->x, d->z ); /* 2+e */
+    gf_mul ( d->z, a, d->y );
+    gf_mul ( d->x, d->y, b );
+    gf_mul ( d->y, a, c );
+    if (!before_double) gf_mul ( d->t, b, c );
+}
+
+static DECAF_NOINLINE void
+sub_niels_from_pt (
+    point_t d,
+    const niels_t e,
+    int before_double
+) {
+    gf a, b, c;
+    gf_sub_nr ( b, d->y, d->x ); /* 3+e */
+    gf_mul ( a, e->b, b );
+    gf_add_nr ( b, d->x, d->y ); /* 2+e */
+    gf_mul ( d->y, e->a, b );
+    gf_mul ( d->x, e->c, d->t );
+    gf_add_nr ( c, a, d->y );    /* 2+e */
+    gf_sub_nr ( b, d->y, a );    /* 3+e */
+    gf_add_nr ( d->y, d->z, d->x ); /* 2+e */
+    gf_sub_nr ( a, d->z, d->x ); /* 3+e */
+    gf_mul ( d->z, a, d->y );
+    gf_mul ( d->x, d->y, b );
+    gf_mul ( d->y, a, c );
+    if (!before_double) gf_mul ( d->t, b, c );
+}
+
+static void
+add_pniels_to_pt (
+    point_t p,
+    const pniels_t pn,
+    int before_double
+) {
+    gf L0;
+    gf_mul ( L0, p->z, pn->z );
+    gf_copy ( p->z, L0 );
+    add_niels_to_pt( p, pn->n, before_double );
+}
+
+static void
+sub_pniels_from_pt (
+    point_t p,
+    const pniels_t pn,
+    int before_double
+) {
+    gf L0;
+    gf_mul ( L0, p->z, pn->z );
+    gf_copy ( p->z, L0 );
+    sub_niels_from_pt( p, pn->n, before_double );
+}
+
+static DECAF_NOINLINE void
+prepare_fixed_window(
+    pniels_t *multiples,
+    const point_t b,
+    int ntable
+) {
+    point_t tmp;
+    pniels_t pn;
+    int i;
+    
+    point_double_internal(tmp, b, 0);
+    pt_to_pniels(pn, tmp);
+    pt_to_pniels(multiples[0], b);
+    API_NS(point_copy)(tmp, b);
+    for (i=1; i<ntable; i++) {
+        add_pniels_to_pt(tmp, pn, 0);
+        pt_to_pniels(multiples[i], tmp);
+    }
+    
+    decaf_bzero(pn,sizeof(pn));
+    decaf_bzero(tmp,sizeof(tmp));
+}
+
+void API_NS(point_scalarmul) (
+    point_t a,
+    const point_t b,
+    const scalar_t scalar
+) {
+    const int WINDOW = DECAF_WINDOW_BITS,
+        WINDOW_MASK = (1<<WINDOW)-1,
+        WINDOW_T_MASK = WINDOW_MASK >> 1,
+        NTABLE = 1<<(WINDOW-1);
+        
+    scalar_t scalar1x;
+    API_NS(scalar_add)(scalar1x, scalar, point_scalarmul_adjustment);
+    API_NS(scalar_halve)(scalar1x,scalar1x);
+    
+    /* Set up a precomputed table with odd multiples of b. */
+    pniels_t pn, multiples[NTABLE];
+    point_t tmp;
+    prepare_fixed_window(multiples, b, NTABLE);
+
+    /* Initialize. */
+    int i,j,first=1;
+    i = SCALAR_BITS - ((SCALAR_BITS-1) % WINDOW) - 1;
+
+    for (; i>=0; i-=WINDOW) {
+        /* Fetch another block of bits */
+        word_t bits = scalar1x->limb[i/WBITS] >> (i%WBITS);
+        if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
+            bits ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
+        }
+        bits &= WINDOW_MASK;
+        mask_t inv = (bits>>(WINDOW-1))-1;
+        bits ^= inv;
+    
+        /* Add in from table.  Compute t only on last iteration. */
+        constant_time_lookup(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK);
+        cond_neg_niels(pn->n, inv);
+        if (first) {
+            pniels_to_pt(tmp, pn);
+            first = 0;
+        } else {
+           /* Using Hisil et al's lookahead method instead of extensible here
+            * for no particular reason.  Double WINDOW times, but only compute t on
+            * the last one.
+            */
+            for (j=0; j<WINDOW-1; j++)
+                point_double_internal(tmp, tmp, -1);
+            point_double_internal(tmp, tmp, 0);
+            add_pniels_to_pt(tmp, pn, i ? -1 : 0);
+        }
+    }
+    
+    /* Write out the answer */
+    API_NS(point_copy)(a,tmp);
+    
+    decaf_bzero(scalar1x,sizeof(scalar1x));
+    decaf_bzero(pn,sizeof(pn));
+    decaf_bzero(multiples,sizeof(multiples));
+    decaf_bzero(tmp,sizeof(tmp));
+}
+
+void API_NS(point_double_scalarmul) (
+    point_t a,
+    const point_t b,
+    const scalar_t scalarb,
+    const point_t c,
+    const scalar_t scalarc
+) {
+    const int WINDOW = DECAF_WINDOW_BITS,
+        WINDOW_MASK = (1<<WINDOW)-1,
+        WINDOW_T_MASK = WINDOW_MASK >> 1,
+        NTABLE = 1<<(WINDOW-1);
+        
+    scalar_t scalar1x, scalar2x;
+    API_NS(scalar_add)(scalar1x, scalarb, point_scalarmul_adjustment);
+    API_NS(scalar_halve)(scalar1x,scalar1x);
+    API_NS(scalar_add)(scalar2x, scalarc, point_scalarmul_adjustment);
+    API_NS(scalar_halve)(scalar2x,scalar2x);
+    
+    /* Set up a precomputed table with odd multiples of b. */
+    pniels_t pn, multiples1[NTABLE], multiples2[NTABLE];
+    point_t tmp;
+    prepare_fixed_window(multiples1, b, NTABLE);
+    prepare_fixed_window(multiples2, c, NTABLE);
+
+    /* Initialize. */
+    int i,j,first=1;
+    i = SCALAR_BITS - ((SCALAR_BITS-1) % WINDOW) - 1;
+
+    for (; i>=0; i-=WINDOW) {
+        /* Fetch another block of bits */
+        word_t bits1 = scalar1x->limb[i/WBITS] >> (i%WBITS),
+                     bits2 = scalar2x->limb[i/WBITS] >> (i%WBITS);
+        if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
+            bits1 ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
+            bits2 ^= scalar2x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
+        }
+        bits1 &= WINDOW_MASK;
+        bits2 &= WINDOW_MASK;
+        mask_t inv1 = (bits1>>(WINDOW-1))-1;
+        mask_t inv2 = (bits2>>(WINDOW-1))-1;
+        bits1 ^= inv1;
+        bits2 ^= inv2;
+    
+        /* Add in from table.  Compute t only on last iteration. */
+        constant_time_lookup(pn, multiples1, sizeof(pn), NTABLE, bits1 & WINDOW_T_MASK);
+        cond_neg_niels(pn->n, inv1);
+        if (first) {
+            pniels_to_pt(tmp, pn);
+            first = 0;
+        } else {
+           /* Using Hisil et al's lookahead method instead of extensible here
+            * for no particular reason.  Double WINDOW times, but only compute t on
+            * the last one.
+            */
+            for (j=0; j<WINDOW-1; j++)
+                point_double_internal(tmp, tmp, -1);
+            point_double_internal(tmp, tmp, 0);
+            add_pniels_to_pt(tmp, pn, 0);
+        }
+        constant_time_lookup(pn, multiples2, sizeof(pn), NTABLE, bits2 & WINDOW_T_MASK);
+        cond_neg_niels(pn->n, inv2);
+        add_pniels_to_pt(tmp, pn, i?-1:0);
+    }
+    
+    /* Write out the answer */
+    API_NS(point_copy)(a,tmp);
+    
+
+    decaf_bzero(scalar1x,sizeof(scalar1x));
+    decaf_bzero(scalar2x,sizeof(scalar2x));
+    decaf_bzero(pn,sizeof(pn));
+    decaf_bzero(multiples1,sizeof(multiples1));
+    decaf_bzero(multiples2,sizeof(multiples2));
+    decaf_bzero(tmp,sizeof(tmp));
+}
+
+void API_NS(point_dual_scalarmul) (
+    point_t a1,
+    point_t a2,
+    const point_t b,
+    const scalar_t scalar1,
+    const scalar_t scalar2
+) {
+    const int WINDOW = DECAF_WINDOW_BITS,
+        WINDOW_MASK = (1<<WINDOW)-1,
+        WINDOW_T_MASK = WINDOW_MASK >> 1,
+        NTABLE = 1<<(WINDOW-1);
+        
+    scalar_t scalar1x, scalar2x;
+    API_NS(scalar_add)(scalar1x, scalar1, point_scalarmul_adjustment);
+    API_NS(scalar_halve)(scalar1x,scalar1x);
+    API_NS(scalar_add)(scalar2x, scalar2, point_scalarmul_adjustment);
+    API_NS(scalar_halve)(scalar2x,scalar2x);
+    
+    /* Set up a precomputed table with odd multiples of b. */
+    point_t multiples1[NTABLE], multiples2[NTABLE], working, tmp;
+    pniels_t pn;
+    
+    API_NS(point_copy)(working, b);
+
+    /* Initialize. */
+    int i,j;
+    
+    for (i=0; i<NTABLE; i++) {
+        API_NS(point_copy)(multiples1[i], API_NS(point_identity));
+        API_NS(point_copy)(multiples2[i], API_NS(point_identity));
+    }
+
+    for (i=0; i<SCALAR_BITS; i+=WINDOW) {   
+        if (i) {
+            for (j=0; j<WINDOW-1; j++)
+                point_double_internal(working, working, -1);
+            point_double_internal(working, working, 0);
+        }
+        
+        /* Fetch another block of bits */
+        word_t bits1 = scalar1x->limb[i/WBITS] >> (i%WBITS),
+               bits2 = scalar2x->limb[i/WBITS] >> (i%WBITS);
+        if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
+            bits1 ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
+            bits2 ^= scalar2x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
+        }
+        bits1 &= WINDOW_MASK;
+        bits2 &= WINDOW_MASK;
+        mask_t inv1 = (bits1>>(WINDOW-1))-1;
+        mask_t inv2 = (bits2>>(WINDOW-1))-1;
+        bits1 ^= inv1;
+        bits2 ^= inv2;
+        
+        pt_to_pniels(pn, working);
+
+        constant_time_lookup(tmp, multiples1, sizeof(tmp), NTABLE, bits1 & WINDOW_T_MASK);
+        cond_neg_niels(pn->n, inv1);
+        /* add_pniels_to_pt(multiples1[bits1 & WINDOW_T_MASK], pn, 0); */
+        add_pniels_to_pt(tmp, pn, 0);
+        constant_time_insert(multiples1, tmp, sizeof(tmp), NTABLE, bits1 & WINDOW_T_MASK);
+        
+        
+        constant_time_lookup(tmp, multiples2, sizeof(tmp), NTABLE, bits2 & WINDOW_T_MASK);
+        cond_neg_niels(pn->n, inv1^inv2);
+        /* add_pniels_to_pt(multiples2[bits2 & WINDOW_T_MASK], pn, 0); */
+        add_pniels_to_pt(tmp, pn, 0);
+        constant_time_insert(multiples2, tmp, sizeof(tmp), NTABLE, bits2 & WINDOW_T_MASK);
+    }
+    
+    if (NTABLE > 1) {
+        API_NS(point_copy)(working, multiples1[NTABLE-1]);
+        API_NS(point_copy)(tmp    , multiples2[NTABLE-1]);
+    
+        for (i=NTABLE-1; i>1; i--) {
+            API_NS(point_add)(multiples1[i-1], multiples1[i-1], multiples1[i]);
+            API_NS(point_add)(multiples2[i-1], multiples2[i-1], multiples2[i]);
+            API_NS(point_add)(working, working, multiples1[i-1]);
+            API_NS(point_add)(tmp,     tmp,     multiples2[i-1]);
+        }
+    
+        API_NS(point_add)(multiples1[0], multiples1[0], multiples1[1]);
+        API_NS(point_add)(multiples2[0], multiples2[0], multiples2[1]);
+        point_double_internal(working, working, 0);
+        point_double_internal(tmp,         tmp, 0);
+        API_NS(point_add)(a1, working, multiples1[0]);
+        API_NS(point_add)(a2, tmp,     multiples2[0]);
+    } else {
+        API_NS(point_copy)(a1, multiples1[0]);
+        API_NS(point_copy)(a2, multiples2[0]);
+    }
+
+    decaf_bzero(scalar1x,sizeof(scalar1x));
+    decaf_bzero(scalar2x,sizeof(scalar2x));
+    decaf_bzero(pn,sizeof(pn));
+    decaf_bzero(multiples1,sizeof(multiples1));
+    decaf_bzero(multiples2,sizeof(multiples2));
+    decaf_bzero(tmp,sizeof(tmp));
+    decaf_bzero(working,sizeof(working));
+}
+
+decaf_bool_t API_NS(point_eq) ( const point_t p, const point_t q ) {
+    /* equality mod 2-torsion compares x/y */
+    gf a, b;
+    gf_mul ( a, p->y, q->x );
+    gf_mul ( b, q->y, p->x );
+    mask_t succ = gf_eq(a,b);
+    
+    #if (COFACTOR == 8) && IMAGINE_TWIST
+        gf_mul ( a, p->y, q->y );
+        gf_mul ( b, q->x, p->x );
+        #if !(IMAGINE_TWIST)
+            gf_sub ( a, ZERO, a );
+        #else
+           /* Interesting note: the 4tor would normally be rotation.
+            * But because of the *i twist, it's actually
+            * (x,y) <-> (iy,ix)
+            */
+    
+           /* No code, just a comment. */
+        #endif
+        succ |= gf_eq(a,b);
+    #endif
+    
+    return mask_to_bool(succ);
+}
+
+decaf_bool_t API_NS(point_valid) (
+    const point_t p
+) {
+    gf a,b,c;
+    gf_mul(a,p->x,p->y);
+    gf_mul(b,p->z,p->t);
+    mask_t out = gf_eq(a,b);
+    gf_sqr(a,p->x);
+    gf_sqr(b,p->y);
+    gf_sub(a,b,a);
+    gf_sqr(b,p->t);
+    gf_mulw(c,b,TWISTED_D);
+    gf_sqr(b,p->z);
+    gf_add(b,b,c);
+    out &= gf_eq(a,b);
+    out &= ~gf_eq(p->z,ZERO);
+    return mask_to_bool(out);
+}
+
+void API_NS(point_debugging_torque) (
+    point_t q,
+    const point_t p
+) {
+#if COFACTOR == 8 && IMAGINE_TWIST
+    gf tmp;
+    gf_mul(tmp,p->x,SQRT_MINUS_ONE);
+    gf_mul(q->x,p->y,SQRT_MINUS_ONE);
+    gf_copy(q->y,tmp);
+    gf_copy(q->z,p->z);
+    gf_sub(q->t,ZERO,p->t);
+#else
+    gf_sub(q->x,ZERO,p->x);
+    gf_sub(q->y,ZERO,p->y);
+    gf_copy(q->z,p->z);
+    gf_copy(q->t,p->t);
+#endif
+}
+
+void API_NS(point_debugging_pscale) (
+    point_t q,
+    const point_t p,
+    const uint8_t factor[SER_BYTES]
+) {
+    gf gfac,tmp;
+    /* NB this means you'll never pscale by negative numbers for p521 */
+    ignore_result(gf_deserialize(gfac,factor,0,0));
+    gf_cond_sel(gfac,gfac,ONE,gf_eq(gfac,ZERO));
+    gf_mul(tmp,p->x,gfac);
+    gf_copy(q->x,tmp);
+    gf_mul(tmp,p->y,gfac);
+    gf_copy(q->y,tmp);
+    gf_mul(tmp,p->z,gfac);
+    gf_copy(q->z,tmp);
+    gf_mul(tmp,p->t,gfac);
+    gf_copy(q->t,tmp);
+}
+
+static void gf_batch_invert (
+    gf *__restrict__ out,
+    const gf *in,
+    unsigned int n
+) {
+    gf t1;
+    assert(n>1);
+  
+    gf_copy(out[1], in[0]);
+    int i;
+    for (i=1; i<(int) (n-1); i++) {
+        gf_mul(out[i+1], out[i], in[i]);
+    }
+    gf_mul(out[0], out[n-1], in[n-1]);
+
+    gf_invert(out[0], out[0], 1);
+
+    for (i=n-1; i>0; i--) {
+        gf_mul(t1, out[i], out[0]);
+        gf_copy(out[i], t1);
+        gf_mul(t1, out[0], in[i]);
+        gf_copy(out[0], t1);
+    }
+}
+
+static void batch_normalize_niels (
+    niels_t *table,
+    const gf *zs,
+    gf *__restrict__ zis,
+    int n
+) {
+    int i;
+    gf product;
+    gf_batch_invert(zis, zs, n);
+
+    for (i=0; i<n; i++) {
+        gf_mul(product, table[i]->a, zis[i]);
+        gf_strong_reduce(product);
+        gf_copy(table[i]->a, product);
+        
+        gf_mul(product, table[i]->b, zis[i]);
+        gf_strong_reduce(product);
+        gf_copy(table[i]->b, product);
+        
+        gf_mul(product, table[i]->c, zis[i]);
+        gf_strong_reduce(product);
+        gf_copy(table[i]->c, product);
+    }
+    
+    decaf_bzero(product,sizeof(product));
+}
+
+void API_NS(precompute) (
+    precomputed_s *table,
+    const point_t base
+) { 
+    const unsigned int n = COMBS_N, t = COMBS_T, s = COMBS_S;
+    assert(n*t*s >= SCALAR_BITS);
+  
+    point_t working, start, doubles[t-1];
+    API_NS(point_copy)(working, base);
+    pniels_t pn_tmp;
+  
+    gf zs[n<<(t-1)], zis[n<<(t-1)];
+  
+    unsigned int i,j,k;
+    
+    /* Compute n tables */
+    for (i=0; i<n; i++) {
+
+        /* Doubling phase */
+        for (j=0; j<t; j++) {
+            if (j) API_NS(point_add)(start, start, working);
+            else API_NS(point_copy)(start, working);
+
+            if (j==t-1 && i==n-1) break;
+
+            point_double_internal(working, working,0);
+            if (j<t-1) API_NS(point_copy)(doubles[j], working);
+
+            for (k=0; k<s-1; k++)
+                point_double_internal(working, working, k<s-2);
+        }
+
+        /* Gray-code phase */
+        for (j=0;; j++) {
+            int gray = j ^ (j>>1);
+            int idx = (((i+1)<<(t-1))-1) ^ gray;
+
+            pt_to_pniels(pn_tmp, start);
+            memcpy(table->table[idx], pn_tmp->n, sizeof(pn_tmp->n));
+            gf_copy(zs[idx], pn_tmp->z);
+			
+            if (j >= (1u<<(t-1)) - 1) break;
+            int delta = (j+1) ^ ((j+1)>>1) ^ gray;
+
+            for (k=0; delta>1; k++)
+                delta >>=1;
+            
+            if (gray & (1<<k)) {
+                API_NS(point_add)(start, start, doubles[k]);
+            } else {
+                API_NS(point_sub)(start, start, doubles[k]);
+            }
+        }
+    }
+    
+    batch_normalize_niels(table->table,(const gf *)zs,zis,n<<(t-1));
+    
+    decaf_bzero(zs,sizeof(zs));
+    decaf_bzero(zis,sizeof(zis));
+    decaf_bzero(pn_tmp,sizeof(pn_tmp));
+    decaf_bzero(working,sizeof(working));
+    decaf_bzero(start,sizeof(start));
+    decaf_bzero(doubles,sizeof(doubles));
+}
+
+static DECAF_INLINE void
+constant_time_lookup_niels (
+    niels_s *__restrict__ ni,
+    const niels_t *table,
+    int nelts,
+    int idx
+) {
+    constant_time_lookup(ni, table, sizeof(niels_s), nelts, idx);
+}
+
+void API_NS(precomputed_scalarmul) (
+    point_t out,
+    const precomputed_s *table,
+    const scalar_t scalar
+) {
+    int i;
+    unsigned j,k;
+    const unsigned int n = COMBS_N, t = COMBS_T, s = COMBS_S;
+    
+    scalar_t scalar1x;
+    API_NS(scalar_add)(scalar1x, scalar, precomputed_scalarmul_adjustment);
+    API_NS(scalar_halve)(scalar1x,scalar1x);
+    
+    niels_t ni;
+    
+    for (i=s-1; i>=0; i--) {
+        if (i != (int)s-1) point_double_internal(out,out,0);
+        
+        for (j=0; j<n; j++) {
+            int tab = 0;
+         
+            for (k=0; k<t; k++) {
+                unsigned int bit = i + s*(k + j*t);
+                if (bit < SCALAR_BITS) {
+                    tab |= (scalar1x->limb[bit/WBITS] >> (bit%WBITS) & 1) << k;
+                }
+            }
+            
+            mask_t invert = (tab>>(t-1))-1;
+            tab ^= invert;
+            tab &= (1<<(t-1)) - 1;
+
+            constant_time_lookup_niels(ni, &table->table[j<<(t-1)], 1<<(t-1), tab);
+
+            cond_neg_niels(ni, invert);
+            if ((i!=(int)s-1)||j) {
+                add_niels_to_pt(out, ni, j==n-1 && i);
+            } else {
+                niels_to_pt(out, ni);
+            }
+        }
+    }
+    
+    decaf_bzero(ni,sizeof(ni));
+    decaf_bzero(scalar1x,sizeof(scalar1x));
+}
+
+void API_NS(point_cond_sel) (
+    point_t out,
+    const point_t a,
+    const point_t b,
+    decaf_bool_t pick_b
+) {
+    constant_time_select(out,a,b,sizeof(point_t),bool_to_mask(pick_b),0);
+}
+
+/* FUTURE: restore Curve25519 Montgomery ladder? */
+decaf_error_t API_NS(direct_scalarmul) (
+    uint8_t scaled[SER_BYTES],
+    const uint8_t base[SER_BYTES],
+    const scalar_t scalar,
+    decaf_bool_t allow_identity,
+    decaf_bool_t short_circuit
+) {
+    point_t basep;
+    decaf_error_t succ = API_NS(point_decode)(basep, base, allow_identity);
+    if (short_circuit && succ != DECAF_SUCCESS) return succ;
+    API_NS(point_cond_sel)(basep, API_NS(point_base), basep, succ);
+    API_NS(point_scalarmul)(basep, basep, scalar);
+    API_NS(point_encode)(scaled, basep);
+    API_NS(point_destroy)(basep);
+    return succ;
+}
+
+void API_NS(point_mul_by_ratio_and_encode_like_eddsa) (
+    uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES],
+    const point_t p
+) {
+    
+    /* The point is now on the twisted curve.  Move it to untwisted. */
+    gf x, y, z, t;
+    point_t q;
+#if COFACTOR == 8
+    API_NS(point_double)(q,p);
+#else
+    API_NS(point_copy)(q,p);
+#endif
+    
+#if EDDSA_USE_SIGMA_ISOGENY
+    {
+        /* Use 4-isogeny like ed25519:
+         *   2*x*y*sqrt(d/a-1)/(ax^2 + y^2 - 2)
+         *   (y^2 - ax^2)/(y^2 + ax^2)
+         * with a = -1, d = -EDWARDS_D:
+         *   -2xysqrt(EDWARDS_D-1)/(2z^2-y^2+x^2)
+         *   (y^2+x^2)/(y^2-x^2)
+         */
+        gf u;
+        gf_sqr ( x, q->x ); // x^2
+        gf_sqr ( t, q->y ); // y^2
+        gf_add( u, x, t ); // x^2 + y^2
+        gf_add( z, q->y, q->x );
+        gf_sqr ( y, z);
+        gf_sub ( y, u, y ); // -2xy
+        gf_sub ( z, t, x ); // y^2 - x^2
+        gf_sqr ( x, q->z );
+        gf_add ( t, x, x);
+        gf_sub ( t, t, z);  // 2z^2 - y^2 + x^2
+        gf_mul ( x, y, z ); // 2xy(y^2-x^2)
+        gf_mul ( y, u, t ); // (x^2+y^2)(2z^2-y^2+x^2)
+        gf_mul ( u, z, t );
+        gf_copy( z, u );
+        gf_mul ( u, x, RISTRETTO_FACTOR );
+#if IMAGINE_TWIST
+        gf_mul_i( x, u );
+#else
+#error "... probably wrong"
+        gf_copy( x, u );
+#endif
+        decaf_bzero(u,sizeof(u));
+    }
+#elif IMAGINE_TWIST
+    {
+        API_NS(point_double)(q,q);
+        API_NS(point_double)(q,q);
+        gf_mul_i(x, q->x);
+        gf_copy(y, q->y);
+        gf_copy(z, q->z);
+    }
+#else
+    {
+        /* 4-isogeny: 2xy/(y^+x^2), (y^2-x^2)/(2z^2-y^2+x^2) */
+        gf u;
+        gf_sqr ( x, q->x );
+        gf_sqr ( t, q->y );
+        gf_add( u, x, t );
+        gf_add( z, q->y, q->x );
+        gf_sqr ( y, z);
+        gf_sub ( y, y, u );
+        gf_sub ( z, t, x );
+        gf_sqr ( x, q->z );
+        gf_add ( t, x, x); 
+        gf_sub ( t, t, z);
+        gf_mul ( x, t, y );
+        gf_mul ( y, z, u );
+        gf_mul ( z, u, t );
+        decaf_bzero(u,sizeof(u));
+    }
+#endif
+    /* Affinize */
+    gf_invert(z,z,1);
+    gf_mul(t,x,z);
+    gf_mul(x,y,z);
+    
+    /* Encode */
+    enc[DECAF_EDDSA_448_PRIVATE_BYTES-1] = 0;
+    gf_serialize(enc, x, 1);
+    enc[DECAF_EDDSA_448_PRIVATE_BYTES-1] |= 0x80 & gf_lobit(t);
+
+    decaf_bzero(x,sizeof(x));
+    decaf_bzero(y,sizeof(y));
+    decaf_bzero(z,sizeof(z));
+    decaf_bzero(t,sizeof(t));
+    API_NS(point_destroy)(q);
+}
+
+
+decaf_error_t API_NS(point_decode_like_eddsa_and_mul_by_ratio) (
+    point_t p,
+    const uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES]
+) {
+    uint8_t enc2[DECAF_EDDSA_448_PUBLIC_BYTES];
+    memcpy(enc2,enc,sizeof(enc2));
+
+    mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] & 0x80);
+    enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] &= ~0x80;
+    
+    mask_t succ = gf_deserialize(p->y, enc2, 1, 0);
+#if 0 == 0
+    succ &= word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1]);
+#endif
+
+    gf_sqr(p->x,p->y);
+    gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */
+    #if EDDSA_USE_SIGMA_ISOGENY
+        gf_mulw(p->t,p->z,EDWARDS_D); /* d-dy^2 */
+        gf_mulw(p->x,p->z,EDWARDS_D-1); /* num = (1-y^2)(d-1) */
+        gf_copy(p->z,p->x);
+    #else
+        gf_mulw(p->t,p->x,EDWARDS_D); /* dy^2 */
+    #endif
+    gf_sub(p->t,ONE,p->t); /* denom = 1-dy^2 or 1-d + dy^2 */
+    
+    gf_mul(p->x,p->z,p->t);
+    succ &= gf_isr(p->t,p->x); /* 1/sqrt(num * denom) */
+    
+    gf_mul(p->x,p->t,p->z); /* sqrt(num / denom) */
+    gf_cond_neg(p->x,gf_lobit(p->x)^low);
+    gf_copy(p->z,ONE);
+  
+    #if EDDSA_USE_SIGMA_ISOGENY
+    {
+       /* Use 4-isogeny like ed25519:
+        *   2*x*y/sqrt(1-d/a)/(ax^2 + y^2 - 2)
+        *   (y^2 - ax^2)/(y^2 + ax^2)
+        * (MAGIC: above formula may be off by a factor of -a
+        * or something somewhere; check it for other a)
+        *
+        * with a = -1, d = -EDWARDS_D:
+        *   -2xy/sqrt(1-EDWARDS_D)/(2z^2-y^2+x^2)
+        *   (y^2+x^2)/(y^2-x^2)
+        */
+        gf a, b, c, d;
+        gf_sqr ( c, p->x );
+        gf_sqr ( a, p->y );
+        gf_add ( d, c, a ); // x^2 + y^2
+        gf_add ( p->t, p->y, p->x );
+        gf_sqr ( b, p->t );
+        gf_sub ( b, b, d ); // 2xy
+        gf_sub ( p->t, a, c ); // y^2 - x^2
+        gf_sqr ( p->x, p->z );
+        gf_add ( p->z, p->x, p->x );
+        gf_sub ( c, p->z, p->t ); // 2z^2 - y^2 + x^2
+        gf_div_i ( a, c );
+        gf_mul ( c, a, RISTRETTO_FACTOR );
+        gf_mul ( p->x, b, p->t); // (2xy)(y^2-x^2)
+        gf_mul ( p->z, p->t, c ); // (y^2-x^2)sd(2z^2 - y^2 + x^2)
+        gf_mul ( p->y, d, c ); // (y^2+x^2)sd(2z^2 - y^2 + x^2)
+        gf_mul ( p->t, d, b );
+        decaf_bzero(a,sizeof(a));
+        decaf_bzero(b,sizeof(b));
+        decaf_bzero(c,sizeof(c));
+        decaf_bzero(d,sizeof(d));
+    } 
+    #elif IMAGINE_TWIST
+    {
+        gf_mul(p->t,p->x,SQRT_MINUS_ONE);
+        gf_copy(p->x,p->t);
+        gf_mul(p->t,p->x,p->y);
+    }
+    #else
+    {
+        /* 4-isogeny 2xy/(y^2-ax^2), (y^2+ax^2)/(2-y^2-ax^2) */
+        gf a, b, c, d;
+        gf_sqr ( c, p->x );
+        gf_sqr ( a, p->y );
+        gf_add ( d, c, a );
+        gf_add ( p->t, p->y, p->x );
+        gf_sqr ( b, p->t );
+        gf_sub ( b, b, d );
+        gf_sub ( p->t, a, c );
+        gf_sqr ( p->x, p->z );
+        gf_add ( p->z, p->x, p->x );
+        gf_sub ( a, p->z, d );
+        gf_mul ( p->x, a, b );
+        gf_mul ( p->z, p->t, a );
+        gf_mul ( p->y, p->t, d );
+        gf_mul ( p->t, b, d );
+        decaf_bzero(a,sizeof(a));
+        decaf_bzero(b,sizeof(b));
+        decaf_bzero(c,sizeof(c));
+        decaf_bzero(d,sizeof(d));
+    }
+    #endif
+    
+    decaf_bzero(enc2,sizeof(enc2));
+    assert(API_NS(point_valid)(p) || ~succ);
+    
+    return decaf_succeed_if(mask_to_bool(succ));
+}
+
+decaf_error_t decaf_x448 (
+    uint8_t out[X_PUBLIC_BYTES],
+    const uint8_t base[X_PUBLIC_BYTES],
+    const uint8_t scalar[X_PRIVATE_BYTES]
+) {
+    gf x1, x2, z2, x3, z3, t1, t2;
+    ignore_result(gf_deserialize(x1,base,1,0));
+    gf_copy(x2,ONE);
+    gf_copy(z2,ZERO);
+    gf_copy(x3,x1);
+    gf_copy(z3,ONE);
+    
+    int t;
+    mask_t swap = 0;
+    
+    for (t = X_PRIVATE_BITS-1; t>=0; t--) {
+        uint8_t sb = scalar[t/8];
+        
+        /* Scalar conditioning */
+        if (t/8==0) sb &= -(uint8_t)COFACTOR;
+        else if (t == X_PRIVATE_BITS-1) sb = -1;
+        
+        mask_t k_t = (sb>>(t%8)) & 1;
+        k_t = -k_t; /* set to all 0s or all 1s */
+        
+        swap ^= k_t;
+        gf_cond_swap(x2,x3,swap);
+        gf_cond_swap(z2,z3,swap);
+        swap = k_t;
+        
+        gf_add_nr(t1,x2,z2); /* A = x2 + z2 */        /* 2+e */
+        gf_sub_nr(t2,x2,z2); /* B = x2 - z2 */        /* 3+e */
+        gf_sub_nr(z2,x3,z3); /* D = x3 - z3 */        /* 3+e */
+        gf_mul(x2,t1,z2);    /* DA */
+        gf_add_nr(z2,z3,x3); /* C = x3 + z3 */        /* 2+e */
+        gf_mul(x3,t2,z2);    /* CB */
+        gf_sub_nr(z3,x2,x3); /* DA-CB */              /* 3+e */
+        gf_sqr(z2,z3);       /* (DA-CB)^2 */
+        gf_mul(z3,x1,z2);    /* z3 = x1(DA-CB)^2 */
+        gf_add_nr(z2,x2,x3); /* (DA+CB) */            /* 2+e */
+        gf_sqr(x3,z2);       /* x3 = (DA+CB)^2 */
+        
+        gf_sqr(z2,t1);       /* AA = A^2 */
+        gf_sqr(t1,t2);       /* BB = B^2 */
+        gf_mul(x2,z2,t1);    /* x2 = AA*BB */
+        gf_sub_nr(t2,z2,t1); /* E = AA-BB */          /* 3+e */
+        
+        gf_mulw(t1,t2,-EDWARDS_D); /* E*-d = a24*E */
+        gf_add_nr(t1,t1,z2); /* AA + a24*E */         /* 2+e */
+        gf_mul(z2,t2,t1); /* z2 = E(AA+a24*E) */
+    }
+    
+    /* Finish */
+    gf_cond_swap(x2,x3,swap);
+    gf_cond_swap(z2,z3,swap);
+    gf_invert(z2,z2,0);
+    gf_mul(x1,x2,z2);
+    gf_serialize(out,x1,1);
+    mask_t nz = ~gf_eq(x1,ZERO);
+    
+    decaf_bzero(x1,sizeof(x1));
+    decaf_bzero(x2,sizeof(x2));
+    decaf_bzero(z2,sizeof(z2));
+    decaf_bzero(x3,sizeof(x3));
+    decaf_bzero(z3,sizeof(z3));
+    decaf_bzero(t1,sizeof(t1));
+    decaf_bzero(t2,sizeof(t2));
+    
+    return decaf_succeed_if(mask_to_bool(nz));
+}
+
+/* Thanks Johan Pascal */
+void decaf_ed448_convert_public_key_to_x448 (
+    uint8_t x[DECAF_X448_PUBLIC_BYTES],
+    const uint8_t ed[DECAF_EDDSA_448_PUBLIC_BYTES]
+) {
+    gf y;
+    const uint8_t mask = (uint8_t)(0xFE<<(7));
+    ignore_result(gf_deserialize(y, ed, 1, mask));
+    
+    {
+        gf n,d;
+        
+#if EDDSA_USE_SIGMA_ISOGENY
+        /* u = (1+y)/(1-y)*/
+        gf_add(n, y, ONE); /* n = y+1 */
+        gf_sub(d, ONE, y); /* d = 1-y */
+        gf_invert(d, d, 0); /* d = 1/(1-y) */
+        gf_mul(y, n, d); /* u = (y+1)/(1-y) */
+        gf_serialize(x,y,1);
+#else /* EDDSA_USE_SIGMA_ISOGENY */
+        /* u = y^2 * (1-dy^2) / (1-y^2) */
+        gf_sqr(n,y); /* y^2*/
+        gf_sub(d,ONE,n); /* 1-y^2*/
+        gf_invert(d,d,0); /* 1/(1-y^2)*/
+        gf_mul(y,n,d); /* y^2 / (1-y^2) */
+        gf_mulw(d,n,EDWARDS_D); /* dy^2*/
+        gf_sub(d, ONE, d); /* 1-dy^2*/
+        gf_mul(n, y, d); /* y^2 * (1-dy^2) / (1-y^2) */
+        gf_serialize(x,n,1);
+#endif /* EDDSA_USE_SIGMA_ISOGENY */
+        
+        decaf_bzero(y,sizeof(y));
+        decaf_bzero(n,sizeof(n));
+        decaf_bzero(d,sizeof(d));
+    }
+}
+
+void decaf_x448_generate_key (
+    uint8_t out[X_PUBLIC_BYTES],
+    const uint8_t scalar[X_PRIVATE_BYTES]
+) {
+    decaf_x448_derive_public_key(out,scalar);
+}
+
+void API_NS(point_mul_by_ratio_and_encode_like_x448) (
+    uint8_t out[X_PUBLIC_BYTES],
+    const point_t p
+) {
+    point_t q;
+#if COFACTOR == 8
+    point_double_internal(q,p,1);
+#else
+    API_NS(point_copy)(q,p);
+#endif
+    gf_invert(q->t,q->x,0); /* 1/x */
+    gf_mul(q->z,q->t,q->y); /* y/x */
+    gf_sqr(q->y,q->z); /* (y/x)^2 */
+#if IMAGINE_TWIST
+    gf_sub(q->y,ZERO,q->y);
+#endif
+    gf_serialize(out,q->y,1);
+    API_NS(point_destroy(q));
+}
+
+void decaf_x448_derive_public_key (
+    uint8_t out[X_PUBLIC_BYTES],
+    const uint8_t scalar[X_PRIVATE_BYTES]
+) {
+    /* Scalar conditioning */
+    uint8_t scalar2[X_PRIVATE_BYTES];
+    memcpy(scalar2,scalar,sizeof(scalar2));
+    scalar2[0] &= -(uint8_t)COFACTOR;
+    
+    scalar2[X_PRIVATE_BYTES-1] &= ~(-1u<<((X_PRIVATE_BITS+7)%8));
+    scalar2[X_PRIVATE_BYTES-1] |= 1<<((X_PRIVATE_BITS+7)%8);
+    
+    scalar_t the_scalar;
+    API_NS(scalar_decode_long)(the_scalar,scalar2,sizeof(scalar2));
+    
+    /* Compensate for the encoding ratio */
+    for (unsigned i=1; i<DECAF_X448_ENCODE_RATIO; i<<=1) {
+        API_NS(scalar_halve)(the_scalar,the_scalar);
+    }
+    point_t p;
+    API_NS(precomputed_scalarmul)(p,API_NS(precomputed_base),the_scalar);
+    API_NS(point_mul_by_ratio_and_encode_like_x448)(out,p);
+    API_NS(point_destroy)(p);
+}
+
+/**
+ * @cond internal
+ * Control for variable-time scalar multiply algorithms.
+ */
+struct smvt_control {
+  int power, addend;
+};
+
+static int recode_wnaf (
+    struct smvt_control *control, /* [nbits/(table_bits+1) + 3] */
+    const scalar_t scalar,
+    unsigned int table_bits
+) {
+    unsigned int table_size = SCALAR_BITS/(table_bits+1) + 3;
+    int position = table_size - 1; /* at the end */
+    
+    /* place the end marker */
+    control[position].power = -1;
+    control[position].addend = 0;
+    position--;
+
+    /* PERF: Could negate scalar if it's large.  But then would need more cases
+     * in the actual code that uses it, all for an expected reduction of like 1/5 op.
+     * Probably not worth it.
+     */
+    
+    uint64_t current = scalar->limb[0] & 0xFFFF;
+    uint32_t mask = (1<<(table_bits+1))-1;
+
+    unsigned int w;
+    const unsigned int B_OVER_16 = sizeof(scalar->limb[0]) / 2;
+    for (w = 1; w<(SCALAR_BITS-1)/16+3; w++) {
+        if (w < (SCALAR_BITS-1)/16+1) {
+            /* Refill the 16 high bits of current */
+            current += (uint32_t)((scalar->limb[w/B_OVER_16]>>(16*(w%B_OVER_16)))<<16);
+        }
+        
+        while (current & 0xFFFF) {
+            assert(position >= 0);
+            uint32_t pos = __builtin_ctz((uint32_t)current), odd = (uint32_t)current >> pos;
+            int32_t delta = odd & mask;
+            if (odd & 1<<(table_bits+1)) delta -= (1<<(table_bits+1));
+            current -= delta << pos;
+            control[position].power = pos + 16*(w-1);
+            control[position].addend = delta;
+            position--;
+        }
+        current >>= 16;
+    }
+    assert(current==0);
+    
+    position++;
+    unsigned int n = table_size - position;
+    unsigned int i;
+    for (i=0; i<n; i++) {
+        control[i] = control[i+position];
+    }
+    return n-1;
+}
+
+static void
+prepare_wnaf_table(
+    pniels_t *output,
+    const point_t working,
+    unsigned int tbits
+) {
+    point_t tmp;
+    int i;
+    pt_to_pniels(output[0], working);
+
+    if (tbits == 0) return;
+
+    API_NS(point_double)(tmp,working);
+    pniels_t twop;
+    pt_to_pniels(twop, tmp);
+
+    add_pniels_to_pt(tmp, output[0],0);
+    pt_to_pniels(output[1], tmp);
+
+    for (i=2; i < 1<<tbits; i++) {
+        add_pniels_to_pt(tmp, twop,0);
+        pt_to_pniels(output[i], tmp);
+    }
+    
+    API_NS(point_destroy)(tmp);
+    decaf_bzero(twop,sizeof(twop));
+}
+
+extern const gf API_NS(precomputed_wnaf_as_fe)[];
+static const niels_t *API_NS(wnaf_base) = (const niels_t *)API_NS(precomputed_wnaf_as_fe);
+const size_t API_NS(sizeof_precomputed_wnafs) __attribute((visibility("hidden")))
+    = sizeof(niels_t)<<DECAF_WNAF_FIXED_TABLE_BITS;
+
+void API_NS(precompute_wnafs) (
+    niels_t out[1<<DECAF_WNAF_FIXED_TABLE_BITS],
+    const point_t base
+) __attribute__ ((visibility ("hidden")));
+
+void API_NS(precompute_wnafs) (
+    niels_t out[1<<DECAF_WNAF_FIXED_TABLE_BITS],
+    const point_t base
+) {
+    pniels_t tmp[1<<DECAF_WNAF_FIXED_TABLE_BITS];
+    gf zs[1<<DECAF_WNAF_FIXED_TABLE_BITS], zis[1<<DECAF_WNAF_FIXED_TABLE_BITS];
+    int i;
+    prepare_wnaf_table(tmp,base,DECAF_WNAF_FIXED_TABLE_BITS);
+    for (i=0; i<1<<DECAF_WNAF_FIXED_TABLE_BITS; i++) {
+        memcpy(out[i], tmp[i]->n, sizeof(niels_t));
+        gf_copy(zs[i], tmp[i]->z);
+    }
+    batch_normalize_niels(out, (const gf *)zs, zis, 1<<DECAF_WNAF_FIXED_TABLE_BITS);
+    
+    decaf_bzero(tmp,sizeof(tmp));
+    decaf_bzero(zs,sizeof(zs));
+    decaf_bzero(zis,sizeof(zis));
+}
+
+void API_NS(base_double_scalarmul_non_secret) (
+    point_t combo,
+    const scalar_t scalar1,
+    const point_t base2,
+    const scalar_t scalar2
+) {
+    const int table_bits_var = DECAF_WNAF_VAR_TABLE_BITS,
+        table_bits_pre = DECAF_WNAF_FIXED_TABLE_BITS;
+    struct smvt_control control_var[SCALAR_BITS/(table_bits_var+1)+3];
+    struct smvt_control control_pre[SCALAR_BITS/(table_bits_pre+1)+3];
+    
+    int ncb_pre = recode_wnaf(control_pre, scalar1, table_bits_pre);
+    int ncb_var = recode_wnaf(control_var, scalar2, table_bits_var);
+  
+    pniels_t precmp_var[1<<table_bits_var];
+    prepare_wnaf_table(precmp_var, base2, table_bits_var);
+  
+    int contp=0, contv=0, i = control_var[0].power;
+
+    if (i < 0) {
+        API_NS(point_copy)(combo, API_NS(point_identity));
+        return;
+    } else if (i > control_pre[0].power) {
+        pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]);
+        contv++;
+    } else if (i == control_pre[0].power && i >=0 ) {
+        pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]);
+        add_niels_to_pt(combo, API_NS(wnaf_base)[control_pre[0].addend >> 1], i);
+        contv++; contp++;
+    } else {
+        i = control_pre[0].power;
+        niels_to_pt(combo, API_NS(wnaf_base)[control_pre[0].addend >> 1]);
+        contp++;
+    }
+    
+    for (i--; i >= 0; i--) {
+        int cv = (i==control_var[contv].power), cp = (i==control_pre[contp].power);
+        point_double_internal(combo,combo,i && !(cv||cp));
+
+        if (cv) {
+            assert(control_var[contv].addend);
+
+            if (control_var[contv].addend > 0) {
+                add_pniels_to_pt(combo, precmp_var[control_var[contv].addend >> 1], i&&!cp);
+            } else {
+                sub_pniels_from_pt(combo, precmp_var[(-control_var[contv].addend) >> 1], i&&!cp);
+            }
+            contv++;
+        }
+
+        if (cp) {
+            assert(control_pre[contp].addend);
+
+            if (control_pre[contp].addend > 0) {
+                add_niels_to_pt(combo, API_NS(wnaf_base)[control_pre[contp].addend >> 1], i);
+            } else {
+                sub_niels_from_pt(combo, API_NS(wnaf_base)[(-control_pre[contp].addend) >> 1], i);
+            }
+            contp++;
+        }
+    }
+    
+    /* This function is non-secret, but whatever this is cheap. */
+    decaf_bzero(control_var,sizeof(control_var));
+    decaf_bzero(control_pre,sizeof(control_pre));
+    decaf_bzero(precmp_var,sizeof(precmp_var));
+
+    assert(contv == ncb_var); (void)ncb_var;
+    assert(contp == ncb_pre); (void)ncb_pre;
+}
+
+void API_NS(point_destroy) (
+    point_t point
+) {
+    decaf_bzero(point, sizeof(point_t));
+}
+
+void API_NS(precomputed_destroy) (
+    precomputed_s *pre
+) {
+    decaf_bzero(pre, API_NS(sizeof_precomputed_s));
+}
diff --git a/crypto/ec/curve448/decaf.h b/crypto/ec/curve448/decaf.h
new file mode 100644
index 0000000000..d3cb60ce3d
--- /dev/null
+++ b/crypto/ec/curve448/decaf.h
@@ -0,0 +1,32 @@
+/**
+ * @file decaf.h
+ * @author Mike Hamburg
+ *
+ * @copyright
+ *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
+ *   Released under the MIT License.  See LICENSE.txt for license information.
+ *
+ * Master header for Decaf library.
+ *
+ * The Decaf library implements cryptographic operations on a elliptic curve
+ * groups of prime order p.  It accomplishes this by using a twisted Edwards
+ * curve (isogenous to Ed448-Goldilocks or Ed25519) and wiping out the cofactor.
+ *
+ * The formulas are all complete and have no special cases.  However, some
+ * functions can fail.  For example, decoding functions can fail because not
+ * every string is the encoding of a valid group element.
+ *
+ * The formulas contain no data-dependent branches, timing or memory accesses,
+ * except for decaf_XXX_base_double_scalarmul_non_secret.
+ *
+ * @warning This file was automatically generated in Python.
+ * Please do not edit it.
+ */
+
+#ifndef __DECAF_H__
+#define __DECAF_H__ 1
+
+#include <decaf/point_255.h>
+#include <decaf/point_448.h>
+
+#endif /* __DECAF_H__ */
diff --git a/crypto/ec/curve448/decaf/common.h b/crypto/ec/curve448/decaf/common.h
new file mode 100644
index 0000000000..64719ad971
--- /dev/null
+++ b/crypto/ec/curve448/decaf/common.h
@@ -0,0 +1,116 @@
+/**
+ * @file decaf/common.h
+ * @author Mike Hamburg
+ *
+ * @copyright
+ *   Copyright (c) 2015 Cryptography Research, Inc.  \n
+ *   Released under the MIT License.  See LICENSE.txt for license information.
+ *
+ * @brief Common utility headers for Decaf library.
+ */
+
+#ifndef __DECAF_COMMON_H__
+#define __DECAF_COMMON_H__ 1
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Goldilocks' build flags default to hidden and stripping executables. */
+/** @cond internal */
+#if defined(DOXYGEN) && !defined(__attribute__)
+#define __attribute__((x))
+#endif
+#define DECAF_API_VIS __attribute__((visibility("default")))
+#define DECAF_NOINLINE  __attribute__((noinline))
+#define DECAF_WARN_UNUSED __attribute__((warn_unused_result))
+#define DECAF_NONNULL __attribute__((nonnull))
+#define DECAF_INLINE inline __attribute__((always_inline,unused))
+// Cribbed from libnotmuch
+#if defined (__clang_major__) && __clang_major__ >= 3 \
+    || defined (__GNUC__) && __GNUC__ >= 5 \
+    || defined (__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ >= 5
+#define DECAF_DEPRECATED(msg) __attribute__ ((deprecated(msg)))
+#else
+#define DECAF_DEPRECATED(msg) __attribute__ ((deprecated))
+#endif
+/** @endcond */
+
+/* Internal word types.
+ *
+ * Somewhat tricky.  This could be decided separately per platform.  However,
+ * the structs do need to be all the same size and alignment on a given
+ * platform to support dynamic linking, since even if you header was built
+ * with eg arch_neon, you might end up linking a library built with arch_arm32.
+ */
+#ifndef DECAF_WORD_BITS
+    #if (defined(__ILP64__) || defined(__amd64__) || defined(__x86_64__) || (((__UINT_FAST32_MAX__)>>30)>>30))
+        #define DECAF_WORD_BITS 64 /**< The number of bits in a word */
+    #else
+        #define DECAF_WORD_BITS 32 /**< The number of bits in a word */
+    #endif
+#endif
+    
+#if DECAF_WORD_BITS == 64
+typedef uint64_t decaf_word_t;      /**< Word size for internal computations */
+typedef int64_t decaf_sword_t;      /**< Signed word size for internal computations */
+typedef uint64_t decaf_bool_t;      /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
+typedef __uint128_t decaf_dword_t;  /**< Double-word size for internal computations */
+typedef __int128_t decaf_dsword_t;  /**< Signed double-word size for internal computations */
+#elif DECAF_WORD_BITS == 32         /**< The number of bits in a word */
+typedef uint32_t decaf_word_t;      /**< Word size for internal computations */
+typedef int32_t decaf_sword_t;      /**< Signed word size for internal computations */
+typedef uint32_t decaf_bool_t;      /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
+typedef uint64_t decaf_dword_t;     /**< Double-word size for internal computations */
+typedef int64_t decaf_dsword_t;     /**< Signed double-word size for internal computations */
+#else
+#error "Only supporting DECAF_WORD_BITS = 32 or 64 for now"
+#endif
+    
+/** DECAF_TRUE = -1 so that DECAF_TRUE & x = x */
+static const decaf_bool_t DECAF_TRUE = -(decaf_bool_t)1;
+
+/** DECAF_FALSE = 0 so that DECAF_FALSE & x = 0 */
+static const decaf_bool_t DECAF_FALSE = 0;
+
+/** Another boolean type used to indicate success or failure. */
+typedef enum {
+    DECAF_SUCCESS = -1, /**< The operation succeeded. */
+    DECAF_FAILURE = 0   /**< The operation failed. */
+} decaf_error_t;
+
+
+/** Return success if x is true */
+static DECAF_INLINE decaf_error_t
+decaf_succeed_if(decaf_bool_t x) {
+    return (decaf_error_t)x;
+}
+
+/** Return DECAF_TRUE iff x == DECAF_SUCCESS */
+static DECAF_INLINE decaf_bool_t
+decaf_successful(decaf_error_t e) {
+    decaf_dword_t w = ((decaf_word_t)e) ^  ((decaf_word_t)DECAF_SUCCESS);
+    return (w-1)>>DECAF_WORD_BITS;
+}
+    
+/** Overwrite data with zeros.  Uses memset_s if available. */
+void decaf_bzero (
+    void *data,
+    size_t size
+) DECAF_NONNULL DECAF_API_VIS;
+
+/** Compare two buffers, returning DECAF_TRUE if they are equal. */
+decaf_bool_t decaf_memeq (
+    const void *data1,
+    const void *data2,
+    size_t size
+) DECAF_NONNULL DECAF_WARN_UNUSED DECAF_API_VIS;
+    
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+    
+#endif /* __DECAF_COMMON_H__ */
diff --git a/crypto/ec/curve448/decaf/ed448.h b/crypto/ec/curve448/decaf/ed448.h
new file mode 100644
index 0000000000..eeed619adf
--- /dev/null
+++ b/crypto/ec/curve448/decaf/ed448.h
@@ -0,0 +1,251 @@
+/**
+ * @file decaf/ed448.h
+ * @author Mike Hamburg
+ *
+ * @copyright
+ *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
+ *   Released under the MIT License.  See LICENSE.txt for license information.
+ *
+ * @brief A group of prime order p, based on Ed448-Goldilocks.
+ *
+ * @warning This file was automatically generated in Python.
+ * Please do not edit it.
+ */
+
+#ifndef __DECAF_ED448_H__
+#define __DECAF_ED448_H__ 1
+
+#include <decaf/point_448.h>
+#include <decaf/shake.h>
+#include <decaf/sha512.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Number of bytes in an EdDSA public key. */
+#define DECAF_EDDSA_448_PUBLIC_BYTES 57
+
+/** Number of bytes in an EdDSA private key. */
+#define DECAF_EDDSA_448_PRIVATE_BYTES DECAF_EDDSA_448_PUBLIC_BYTES
+
+/** Number of bytes in an EdDSA private key. */
+#define DECAF_EDDSA_448_SIGNATURE_BYTES (DECAF_EDDSA_448_PUBLIC_BYTES + DECAF_EDDSA_448_PRIVATE_BYTES)
+
+/** Does EdDSA support non-contextual signatures? */
+#define DECAF_EDDSA_448_SUPPORTS_CONTEXTLESS_SIGS 0
+
+/** Prehash context renaming macros. */
+#define decaf_ed448_prehash_ctx_s   decaf_shake256_ctx_s
+#define decaf_ed448_prehash_ctx_t   decaf_shake256_ctx_t
+#define decaf_ed448_prehash_update  decaf_shake256_update
+#define decaf_ed448_prehash_destroy decaf_shake256_destroy
+
+/** EdDSA encoding ratio. */
+#define DECAF_448_EDDSA_ENCODE_RATIO 4
+
+/** EdDSA decoding ratio. */
+#define DECAF_448_EDDSA_DECODE_RATIO (4 / 4)
+
+/**
+ * @brief EdDSA key generation.  This function uses a different (non-Decaf)
+ * encoding.
+ *
+ * @param [out] pubkey The public key.
+ * @param [in] privkey The private key.
+ */    
+void decaf_ed448_derive_public_key (
+    uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief EdDSA signing.
+ *
+ * @param [out] signature The signature.
+ * @param [in] privkey The private key.
+ * @param [in] pubkey The public key.
+ * @param [in] message The message to sign.
+ * @param [in] message_len The length of the message.
+ * @param [in] prehashed Nonzero if the message is actually the hash of something you want to sign.
+ * @param [in] context A "context" for this signature of up to 255 bytes.
+ * @param [in] context_len Length of the context.
+ *
+ * @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
+ * messages, at least without some very careful protocol-level disambiguation.  For Ed448 it is
+ * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
+ * you no seat belt.
+ */  
+void decaf_ed448_sign (
+    uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
+    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+    const uint8_t *message,
+    size_t message_len,
+    uint8_t prehashed,
+    const uint8_t *context,
+    uint8_t context_len
+) DECAF_API_VIS __attribute__((nonnull(1,2,3))) DECAF_NOINLINE;
+
+/**
+ * @brief EdDSA signing with prehash.
+ *
+ * @param [out] signature The signature.
+ * @param [in] privkey The private key.
+ * @param [in] pubkey The public key.
+ * @param [in] hash The hash of the message.  This object will not be modified by the call.
+ * @param [in] context A "context" for this signature of up to 255 bytes.  Must be the same as what was used for the prehash.
+ * @param [in] context_len Length of the context.
+ *
+ * @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
+ * messages, at least without some very careful protocol-level disambiguation.  For Ed448 it is
+ * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
+ * you no seat belt.
+ */  
+void decaf_ed448_sign_prehash (
+    uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
+    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+    const decaf_ed448_prehash_ctx_t hash,
+    const uint8_t *context,
+    uint8_t context_len
+) DECAF_API_VIS __attribute__((nonnull(1,2,3,4))) DECAF_NOINLINE;
+    
+/**
+ * @brief Prehash initialization, with contexts if supported.
+ *
+ * @param [out] hash The hash object to be initialized.
+ */
+void decaf_ed448_prehash_init (
+    decaf_ed448_prehash_ctx_t hash
+) DECAF_API_VIS __attribute__((nonnull(1))) DECAF_NOINLINE;
+
+/**
+ * @brief EdDSA signature verification.
+ *
+ * Uses the standard (i.e. less-strict) verification formula.
+ *
+ * @param [in] signature The signature.
+ * @param [in] pubkey The public key.
+ * @param [in] message The message to verify.
+ * @param [in] message_len The length of the message.
+ * @param [in] prehashed Nonzero if the message is actually the hash of something you want to verify.
+ * @param [in] context A "context" for this signature of up to 255 bytes.
+ * @param [in] context_len Length of the context.
+ *
+ * @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
+ * messages, at least without some very careful protocol-level disambiguation.  For Ed448 it is
+ * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
+ * you no seat belt.
+ */
+decaf_error_t decaf_ed448_verify (
+    const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+    const uint8_t *message,
+    size_t message_len,
+    uint8_t prehashed,
+    const uint8_t *context,
+    uint8_t context_len
+) DECAF_API_VIS __attribute__((nonnull(1,2))) DECAF_NOINLINE;
+
+/**
+ * @brief EdDSA signature verification.
+ *
+ * Uses the standard (i.e. less-strict) verification formula.
+ *
+ * @param [in] signature The signature.
+ * @param [in] pubkey The public key.
+ * @param [in] hash The hash of the message.  This object will not be modified by the call.
+ * @param [in] context A "context" for this signature of up to 255 bytes.  Must be the same as what was used for the prehash.
+ * @param [in] context_len Length of the context.
+ *
+ * @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
+ * messages, at least without some very careful protocol-level disambiguation.  For Ed448 it is
+ * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
+ * you no seat belt.
+ */
+decaf_error_t decaf_ed448_verify_prehash (
+    const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+    const decaf_ed448_prehash_ctx_t hash,
+    const uint8_t *context,
+    uint8_t context_len
+) DECAF_API_VIS __attribute__((nonnull(1,2))) DECAF_NOINLINE;
+
+/**
+ * @brief EdDSA point encoding.  Used internally, exposed externally.
+ * Multiplies by DECAF_448_EDDSA_ENCODE_RATIO first.
+ *
+ * The multiplication is required because the EdDSA encoding represents
+ * the cofactor information, but the Decaf encoding ignores it (which
+ * is the whole point).  So if you decode from EdDSA and re-encode to
+ * EdDSA, the cofactor info must get cleared, because the intermediate
+ * representation doesn't track it.
+ *
+ * The way libdecaf handles this is to multiply by
+ * DECAF_448_EDDSA_DECODE_RATIO when decoding, and by
+ * DECAF_448_EDDSA_ENCODE_RATIO when encoding.  The product of these
+ * ratios is always exactly the cofactor 4, so the cofactor
+ * ends up cleared one way or another.  But exactly how that shakes
+ * out depends on the base points specified in RFC 8032.
+ *
+ * The upshot is that if you pass the Decaf/Ristretto base point to
+ * this function, you will get DECAF_448_EDDSA_ENCODE_RATIO times the
+ * EdDSA base point.
+ *
+ * @param [out] enc The encoded point.
+ * @param [in] p The point.
+ */       
+void decaf_448_point_mul_by_ratio_and_encode_like_eddsa (
+    uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES],
+    const decaf_448_point_t p
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief EdDSA point decoding.  Multiplies by DECAF_448_EDDSA_DECODE_RATIO,
+ * and ignores cofactor information.
+ *
+ * See notes on decaf_448_point_mul_by_ratio_and_encode_like_eddsa
+ *
+ * @param [out] enc The encoded point.
+ * @param [in] p The point.
+ */       
+decaf_error_t decaf_448_point_decode_like_eddsa_and_mul_by_ratio (
+    decaf_448_point_t p,
+    const uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief EdDSA to ECDH public key conversion
+ * Deserialize the point to get y on Edwards curve,
+ * Convert it to u coordinate on Montgomery curve.
+ *
+ * @warning This function does not check that the public key being converted
+ * is a valid EdDSA public key (FUTURE?)
+ *
+ * @param[out] x The ECDH public key as in RFC7748(point on Montgomery curve)
+ * @param[in] ed The EdDSA public key(point on Edwards curve)
+ */
+void decaf_ed448_convert_public_key_to_x448 (
+    uint8_t x[DECAF_X448_PUBLIC_BYTES],
+    const uint8_t ed[DECAF_EDDSA_448_PUBLIC_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief EdDSA to ECDH private key conversion
+ * Using the appropriate hash function, hash the EdDSA private key
+ * and keep only the lower bytes to get the ECDH private key
+ *
+ * @param[out] x The ECDH private key as in RFC7748
+ * @param[in] ed The EdDSA private key
+ */
+void decaf_ed448_convert_private_key_to_x448 (
+    uint8_t x[DECAF_X448_PRIVATE_BYTES],
+    const uint8_t ed[DECAF_EDDSA_448_PRIVATE_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __DECAF_ED448_H__ */
diff --git a/crypto/ec/curve448/decaf/point_255.h b/crypto/ec/curve448/decaf/point_255.h
new file mode 100644
index 0000000000..94e30a5b89
--- /dev/null
+++ b/crypto/ec/curve448/decaf/point_255.h
@@ -0,0 +1,765 @@
+/**
+ * @file decaf/point_255.h
+ * @author Mike Hamburg
+ *
+ * @copyright
+ *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
+ *   Released under the MIT License.  See LICENSE.txt for license information.
+ *
+ * @brief A group of prime order p, based on Curve25519.
+ *
+ * @warning This file was automatically generated in Python.
+ * Please do not edit it.
+ */
+
+#ifndef __DECAF_POINT_255_H__
+#define __DECAF_POINT_255_H__ 1
+
+#include <decaf/common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @cond internal */
+#define DECAF_255_SCALAR_LIMBS ((253-1)/DECAF_WORD_BITS+1)
+/** @endcond */
+
+/** The number of bits in a scalar */
+#define DECAF_255_SCALAR_BITS 253
+
+/** @cond internal */
+#ifndef __DECAF_25519_GF_DEFINED__
+#define __DECAF_25519_GF_DEFINED__ 1
+/** @brief Galois field element internal structure */
+typedef struct gf_25519_s {
+    decaf_word_t limb[320/DECAF_WORD_BITS];
+} __attribute__((aligned(32))) gf_25519_s, gf_25519_t[1];
+#endif /* __DECAF_25519_GF_DEFINED__ */
+/** @endcond */
+
+/** Number of bytes in a serialized point. */
+#define DECAF_255_SER_BYTES 32
+
+/** Number of bytes in an elligated point.  For now set the same as SER_BYTES
+ * but could be different for other curves.
+ */
+#define DECAF_255_HASH_BYTES 32
+
+/** Number of bytes in a serialized scalar. */
+#define DECAF_255_SCALAR_BYTES 32
+
+/** Number of bits in the "which" field of an elligator inverse */
+#define DECAF_255_INVERT_ELLIGATOR_WHICH_BITS 5
+
+/** The cofactor the curve would have, if we hadn't removed it */
+#define DECAF_255_REMOVED_COFACTOR 8
+
+/** X25519 encoding ratio. */
+#define DECAF_X25519_ENCODE_RATIO 4
+
+/** Number of bytes in an x25519 public key */
+#define DECAF_X25519_PUBLIC_BYTES 32
+
+/** Number of bytes in an x25519 private key */
+#define DECAF_X25519_PRIVATE_BYTES 32
+
+/** Twisted Edwards extended homogeneous coordinates */
+typedef struct decaf_255_point_s {
+    /** @cond internal */
+    gf_25519_t x,y,z,t;
+    /** @endcond */
+} decaf_255_point_t[1];
+
+/** Precomputed table based on a point.  Can be trivial implementation. */
+struct decaf_255_precomputed_s;
+
+/** Precomputed table based on a point.  Can be trivial implementation. */
+typedef struct decaf_255_precomputed_s decaf_255_precomputed_s; 
+
+/** Size and alignment of precomputed point tables. */
+extern const size_t decaf_255_sizeof_precomputed_s DECAF_API_VIS, decaf_255_alignof_precomputed_s DECAF_API_VIS;
+
+/** Scalar is stored packed, because we don't need the speed. */
+typedef struct decaf_255_scalar_s {
+    /** @cond internal */
+    decaf_word_t limb[DECAF_255_SCALAR_LIMBS];
+    /** @endcond */
+} decaf_255_scalar_t[1];
+
+/** A scalar equal to 1. */
+extern const decaf_255_scalar_t decaf_255_scalar_one DECAF_API_VIS;
+
+/** A scalar equal to 0. */
+extern const decaf_255_scalar_t decaf_255_scalar_zero DECAF_API_VIS;
+
+/** The identity point on the curve. */
+extern const decaf_255_point_t decaf_255_point_identity DECAF_API_VIS;
+
+/** An arbitrarily chosen base point on the curve. */
+extern const decaf_255_point_t decaf_255_point_base DECAF_API_VIS;
+
+/** Precomputed table for the base point on the curve. */
+extern const struct decaf_255_precomputed_s *decaf_255_precomputed_base DECAF_API_VIS;
+
+/**
+ * @brief Read a scalar from wire format or from bytes.
+ *
+ * @param [in] ser Serialized form of a scalar.
+ * @param [out] out Deserialized form.
+ *
+ * @retval DECAF_SUCCESS The scalar was correctly encoded.
+ * @retval DECAF_FAILURE The scalar was greater than the modulus,
+ * and has been reduced modulo that modulus.
+ */
+decaf_error_t decaf_255_scalar_decode (
+    decaf_255_scalar_t out,
+    const unsigned char ser[DECAF_255_SCALAR_BYTES]
+) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Read a scalar from wire format or from bytes.  Reduces mod
+ * scalar prime.
+ *
+ * @param [in] ser Serialized form of a scalar.
+ * @param [in] ser_len Length of serialized form.
+ * @param [out] out Deserialized form.
+ */
+void decaf_255_scalar_decode_long (
+    decaf_255_scalar_t out,
+    const unsigned char *ser,
+    size_t ser_len
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+    
+/**
+ * @brief Serialize a scalar to wire format.
+ *
+ * @param [out] ser Serialized form of a scalar.
+ * @param [in] s Deserialized scalar.
+ */
+void decaf_255_scalar_encode (
+    unsigned char ser[DECAF_255_SCALAR_BYTES],
+    const decaf_255_scalar_t s
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_NOINLINE;
+        
+/**
+ * @brief Add two scalars.  The scalars may use the same memory.
+ * @param [in] a One scalar.
+ * @param [in] b Another scalar.
+ * @param [out] out a+b.
+ */
+void decaf_255_scalar_add (
+    decaf_255_scalar_t out,
+    const decaf_255_scalar_t a,
+    const decaf_255_scalar_t b
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Compare two scalars.
+ * @param [in] a One scalar.
+ * @param [in] b Another scalar.
+ * @retval DECAF_TRUE The scalars are equal.
+ * @retval DECAF_FALSE The scalars are not equal.
+ */    
+decaf_bool_t decaf_255_scalar_eq (
+    const decaf_255_scalar_t a,
+    const decaf_255_scalar_t b
+) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Subtract two scalars.  The scalars may use the same memory.
+ * @param [in] a One scalar.
+ * @param [in] b Another scalar.
+ * @param [out] out a-b.
+ */  
+void decaf_255_scalar_sub (
+    decaf_255_scalar_t out,
+    const decaf_255_scalar_t a,
+    const decaf_255_scalar_t b
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Multiply two scalars.  The scalars may use the same memory.
+ * @param [in] a One scalar.
+ * @param [in] b Another scalar.
+ * @param [out] out a*b.
+ */  
+void decaf_255_scalar_mul (
+    decaf_255_scalar_t out,
+    const decaf_255_scalar_t a,
+    const decaf_255_scalar_t b
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+        
+/**
+* @brief Halve a scalar.  The scalars may use the same memory.
+* @param [in] a A scalar.
+* @param [out] out a/2.
+*/
+void decaf_255_scalar_halve (
+   decaf_255_scalar_t out,
+   const decaf_255_scalar_t a
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Invert a scalar.  When passed zero, return 0.  The input and output may alias.
+ * @param [in] a A scalar.
+ * @param [out] out 1/a.
+ * @return DECAF_SUCCESS The input is nonzero.
+ */  
+decaf_error_t decaf_255_scalar_invert (
+    decaf_255_scalar_t out,
+    const decaf_255_scalar_t a
+) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Copy a scalar.  The scalars may use the same memory, in which
+ * case this function does nothing.
+ * @param [in] a A scalar.
+ * @param [out] out Will become a copy of a.
+ */
+static inline void DECAF_NONNULL decaf_255_scalar_copy (
+    decaf_255_scalar_t out,
+    const decaf_255_scalar_t a
+) {
+    *out = *a;
+}
+
+/**
+ * @brief Set a scalar to an unsigned 64-bit integer.
+ * @param [in] a An integer.
+ * @param [out] out Will become equal to a.
+ */  
+void decaf_255_scalar_set_unsigned (
+    decaf_255_scalar_t out,
+    uint64_t a
+) DECAF_API_VIS DECAF_NONNULL;
+
+/**
+ * @brief Encode a point as a sequence of bytes.
+ *
+ * @param [out] ser The byte representation of the point.
+ * @param [in] pt The point to encode.
+ */
+void decaf_255_point_encode (
+    uint8_t ser[DECAF_255_SER_BYTES],
+    const decaf_255_point_t pt
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Decode a point from a sequence of bytes.
+ *
+ * Every point has a unique encoding, so not every
+ * sequence of bytes is a valid encoding.  If an invalid
+ * encoding is given, the output is undefined.
+ *
+ * @param [out] pt The decoded point.
+ * @param [in] ser The serialized version of the point.
+ * @param [in] allow_identity DECAF_TRUE if the identity is a legal input.
+ * @retval DECAF_SUCCESS The decoding succeeded.
+ * @retval DECAF_FAILURE The decoding didn't succeed, because
+ * ser does not represent a point.
+ */
+decaf_error_t decaf_255_point_decode (
+    decaf_255_point_t pt,
+    const uint8_t ser[DECAF_255_SER_BYTES],
+    decaf_bool_t allow_identity
+) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Copy a point.  The input and output may alias,
+ * in which case this function does nothing.
+ *
+ * @param [out] a A copy of the point.
+ * @param [in] b Any point.
+ */
+static inline void DECAF_NONNULL decaf_255_point_copy (
+    decaf_255_point_t a,
+    const decaf_255_point_t b
+) {
+    *a=*b;
+}
+
+/**
+ * @brief Test whether two points are equal.  If yes, return
+ * DECAF_TRUE, else return DECAF_FALSE.
+ *
+ * @param [in] a A point.
+ * @param [in] b Another point.
+ * @retval DECAF_TRUE The points are equal.
+ * @retval DECAF_FALSE The points are not equal.
+ */
+decaf_bool_t decaf_255_point_eq (
+    const decaf_255_point_t a,
+    const decaf_255_point_t b
+) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Add two points to produce a third point.  The
+ * input points and output point can be pointers to the same
+ * memory.
+ *
+ * @param [out] sum The sum a+b.
+ * @param [in] a An addend.
+ * @param [in] b An addend.
+ */
+void decaf_255_point_add (
+    decaf_255_point_t sum,
+    const decaf_255_point_t a,
+    const decaf_255_point_t b
+) DECAF_API_VIS DECAF_NONNULL;
+
+/**
+ * @brief Double a point.  Equivalent to
+ * decaf_255_point_add(two_a,a,a), but potentially faster.
+ *
+ * @param [out] two_a The sum a+a.
+ * @param [in] a A point.
+ */
+void decaf_255_point_double (
+    decaf_255_point_t two_a,
+    const decaf_255_point_t a
+) DECAF_API_VIS DECAF_NONNULL;
+
+/**
+ * @brief Subtract two points to produce a third point.  The
+ * input points and output point can be pointers to the same
+ * memory.
+ *
+ * @param [out] diff The difference a-b.
+ * @param [in] a The minuend.
+ * @param [in] b The subtrahend.
+ */
+void decaf_255_point_sub (
+    decaf_255_point_t diff,
+    const decaf_255_point_t a,
+    const decaf_255_point_t b
+) DECAF_API_VIS DECAF_NONNULL;
+    
+/**
+ * @brief Negate a point to produce another point.  The input
+ * and output points can use the same memory.
+ *
+ * @param [out] nega The negated input point
+ * @param [in] a The input point.
+ */
+void decaf_255_point_negate (
+   decaf_255_point_t nega,
+   const decaf_255_point_t a
+) DECAF_API_VIS DECAF_NONNULL;
+
+/**
+ * @brief Multiply a base point by a scalar: scaled = scalar*base.
+ *
+ * @param [out] scaled The scaled point base*scalar
+ * @param [in] base The point to be scaled.
+ * @param [in] scalar The scalar to multiply by.
+ */
+void decaf_255_point_scalarmul (
+    decaf_255_point_t scaled,
+    const decaf_255_point_t base,
+    const decaf_255_scalar_t scalar
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Multiply a base point by a scalar: scaled = scalar*base.
+ * This function operates directly on serialized forms.
+ *
+ * @warning This function is experimental.  It may not be supported
+ * long-term.
+ *
+ * @param [out] scaled The scaled point base*scalar
+ * @param [in] base The point to be scaled.
+ * @param [in] scalar The scalar to multiply by.
+ * @param [in] allow_identity Allow the input to be the identity.
+ * @param [in] short_circuit Allow a fast return if the input is illegal.
+ *
+ * @retval DECAF_SUCCESS The scalarmul succeeded.
+ * @retval DECAF_FAILURE The scalarmul didn't succeed, because
+ * base does not represent a point.
+ */
+decaf_error_t decaf_255_direct_scalarmul (
+    uint8_t scaled[DECAF_255_SER_BYTES],
+    const uint8_t base[DECAF_255_SER_BYTES],
+    const decaf_255_scalar_t scalar,
+    decaf_bool_t allow_identity,
+    decaf_bool_t short_circuit
+) DECAF_API_VIS DECAF_NONNULL DECAF_WARN_UNUSED DECAF_NOINLINE;
+
+/**
+ * @brief RFC 7748 Diffie-Hellman scalarmul.  This function uses a different
+ * (non-Decaf) encoding.
+ *
+ * @param [out] scaled The scaled point base*scalar
+ * @param [in] base The point to be scaled.
+ * @param [in] scalar The scalar to multiply by.
+ *
+ * @retval DECAF_SUCCESS The scalarmul succeeded.
+ * @retval DECAF_FAILURE The scalarmul didn't succeed, because the base
+ * point is in a small subgroup.
+ */
+decaf_error_t decaf_x25519 (
+    uint8_t out[DECAF_X25519_PUBLIC_BYTES],
+    const uint8_t base[DECAF_X25519_PUBLIC_BYTES],
+    const uint8_t scalar[DECAF_X25519_PRIVATE_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_WARN_UNUSED DECAF_NOINLINE;
+
+/**
+ * @brief Multiply a point by DECAF_X25519_ENCODE_RATIO,
+ * then encode it like RFC 7748.
+ *
+ * This function is mainly used internally, but is exported in case
+ * it will be useful.
+ *
+ * The ratio is necessary because the internal representation doesn't
+ * track the cofactor information, so on output we must clear the cofactor.
+ * This would multiply by the cofactor, but in fact internally libdecaf's
+ * points are always even, so it multiplies by half the cofactor instead.
+ *
+ * As it happens, this aligns with the base point definitions; that is,
+ * if you pass the Decaf/Ristretto base point to this function, the result
+ * will be DECAF_X25519_ENCODE_RATIO times the X25519
+ * base point.
+ *
+ * @param [out] out The scaled and encoded point.
+ * @param [in] p The point to be scaled and encoded.
+ */
+void decaf_255_point_mul_by_ratio_and_encode_like_x25519 (
+    uint8_t out[DECAF_X25519_PUBLIC_BYTES],
+    const decaf_255_point_t p
+) DECAF_API_VIS DECAF_NONNULL;
+
+/** The base point for X25519 Diffie-Hellman */
+extern const uint8_t decaf_x25519_base_point[DECAF_X25519_PUBLIC_BYTES] DECAF_API_VIS;
+
+/**
+ * @brief RFC 7748 Diffie-Hellman base point scalarmul.  This function uses
+ * a different (non-Decaf) encoding.
+ *
+ * @deprecated Renamed to decaf_x25519_derive_public_key.
+ * I have no particular timeline for removing this name.
+ *
+ * @param [out] scaled The scaled point base*scalar
+ * @param [in] scalar The scalar to multiply by.
+ */
+void decaf_x25519_generate_key (
+    uint8_t out[DECAF_X25519_PUBLIC_BYTES],
+    const uint8_t scalar[DECAF_X25519_PRIVATE_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_DEPRECATED("Renamed to decaf_x25519_derive_public_key");
+    
+/**
+ * @brief RFC 7748 Diffie-Hellman base point scalarmul.  This function uses
+ * a different (non-Decaf) encoding.
+ *
+ * Does exactly the same thing as decaf_x25519_generate_key,
+ * but has a better name.
+ *
+ * @param [out] scaled The scaled point base*scalar
+ * @param [in] scalar The scalar to multiply by.
+ */
+void decaf_x25519_derive_public_key (
+    uint8_t out[DECAF_X25519_PUBLIC_BYTES],
+    const uint8_t scalar[DECAF_X25519_PRIVATE_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/* FUTURE: uint8_t decaf_255_encode_like_curve25519) */
+
+/**
+ * @brief Precompute a table for fast scalar multiplication.
+ * Some implementations do not include precomputed points; for
+ * those implementations, this implementation simply copies the
+ * point.
+ *
+ * @param [out] a A precomputed table of multiples of the point.
+ * @param [in] b Any point.
+ */
+void decaf_255_precompute (
+    decaf_255_precomputed_s *a,
+    const decaf_255_point_t b
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Multiply a precomputed base point by a scalar:
+ * scaled = scalar*base.
+ * Some implementations do not include precomputed points; for
+ * those implementations, this function is the same as
+ * decaf_255_point_scalarmul
+ *
+ * @param [out] scaled The scaled point base*scalar
+ * @param [in] base The point to be scaled.
+ * @param [in] scalar The scalar to multiply by.
+ */
+void decaf_255_precomputed_scalarmul (
+    decaf_255_point_t scaled,
+    const decaf_255_precomputed_s *base,
+    const decaf_255_scalar_t scalar
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Multiply two base points by two scalars:
+ * scaled = scalar1*base1 + scalar2*base2.
+ *
+ * Equivalent to two calls to decaf_255_point_scalarmul, but may be
+ * faster.
+ *
+ * @param [out] combo The linear combination scalar1*base1 + scalar2*base2.
+ * @param [in] base1 A first point to be scaled.
+ * @param [in] scalar1 A first scalar to multiply by.
+ * @param [in] base2 A second point to be scaled.
+ * @param [in] scalar2 A second scalar to multiply by.
+ */
+void decaf_255_point_double_scalarmul (
+    decaf_255_point_t combo,
+    const decaf_255_point_t base1,
+    const decaf_255_scalar_t scalar1,
+    const decaf_255_point_t base2,
+    const decaf_255_scalar_t scalar2
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+    
+/**
+ * Multiply one base point by two scalars:
+ *
+ * a1 = scalar1 * base
+ * a2 = scalar2 * base
+ *
+ * Equivalent to two calls to decaf_255_point_scalarmul, but may be
+ * faster.
+ *
+ * @param [out] a1 The first multiple.  It may be the same as the input point.
+ * @param [out] a2 The second multiple.  It may be the same as the input point.
+ * @param [in] base1 A point to be scaled.
+ * @param [in] scalar1 A first scalar to multiply by.
+ * @param [in] scalar2 A second scalar to multiply by.
+ */
+void decaf_255_point_dual_scalarmul (
+    decaf_255_point_t a1,
+    decaf_255_point_t a2,
+    const decaf_255_point_t base1,
+    const decaf_255_scalar_t scalar1,
+    const decaf_255_scalar_t scalar2
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Multiply two base points by two scalars:
+ * scaled = scalar1*decaf_255_point_base + scalar2*base2.
+ *
+ * Otherwise equivalent to decaf_255_point_double_scalarmul, but may be
+ * faster at the expense of being variable time.
+ *
+ * @param [out] combo The linear combination scalar1*base + scalar2*base2.
+ * @param [in] scalar1 A first scalar to multiply by.
+ * @param [in] base2 A second point to be scaled.
+ * @param [in] scalar2 A second scalar to multiply by.
+ *
+ * @warning: This function takes variable time, and may leak the scalars
+ * used.  It is designed for signature verification.
+ */
+void decaf_255_base_double_scalarmul_non_secret (
+    decaf_255_point_t combo,
+    const decaf_255_scalar_t scalar1,
+    const decaf_255_point_t base2,
+    const decaf_255_scalar_t scalar2
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Constant-time decision between two points.  If pick_b
+ * is zero, out = a; else out = b.
+ *
+ * @param [out] out The output.  It may be the same as either input.
+ * @param [in] a Any point.
+ * @param [in] b Any point.
+ * @param [in] pick_b If nonzero, choose point b.
+ */
+void decaf_255_point_cond_sel (
+    decaf_255_point_t out,
+    const decaf_255_point_t a,
+    const decaf_255_point_t b,
+    decaf_word_t pick_b
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Constant-time decision between two scalars.  If pick_b
+ * is zero, out = a; else out = b.
+ *
+ * @param [out] out The output.  It may be the same as either input.
+ * @param [in] a Any scalar.
+ * @param [in] b Any scalar.
+ * @param [in] pick_b If nonzero, choose scalar b.
+ */
+void decaf_255_scalar_cond_sel (
+    decaf_255_scalar_t out,
+    const decaf_255_scalar_t a,
+    const decaf_255_scalar_t b,
+    decaf_word_t pick_b
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Test that a point is valid, for debugging purposes.
+ *
+ * @param [in] to_test The point to test.
+ * @retval DECAF_TRUE The point is valid.
+ * @retval DECAF_FALSE The point is invalid.
+ */
+decaf_bool_t decaf_255_point_valid (
+    const decaf_255_point_t to_test
+) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Torque a point, for debugging purposes.  The output
+ * will be equal to the input.
+ *
+ * @param [out] q The point to torque.
+ * @param [in] p The point to torque.
+ */
+void decaf_255_point_debugging_torque (
+    decaf_255_point_t q,
+    const decaf_255_point_t p
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Projectively scale a point, for debugging purposes.
+ * The output will be equal to the input, and will be valid
+ * even if the factor is zero.
+ *
+ * @param [out] q The point to scale.
+ * @param [in] p The point to scale.
+ * @param [in] factor Serialized GF factor to scale.
+ */
+void decaf_255_point_debugging_pscale (
+    decaf_255_point_t q,
+    const decaf_255_point_t p,
+    const unsigned char factor[DECAF_255_SER_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Almost-Elligator-like hash to curve.
+ *
+ * Call this function with the output of a hash to make a hash to the curve.
+ *
+ * This function runs Elligator2 on the decaf_255 Jacobi quartic model.  It then
+ * uses the isogeny to put the result in twisted Edwards form.  As a result,
+ * it is safe (cannot produce points of order 4), and would be compatible with
+ * hypothetical other implementations of Decaf using a Montgomery or untwisted
+ * Edwards model.
+ *
+ * Unlike Elligator, this function may be up to 4:1 on [0,(p-1)/2]:
+ *   A factor of 2 due to the isogeny.
+ *   A factor of 2 because we quotient out the 2-torsion.
+ *
+ * This makes it about 8:1 overall, or 16:1 overall on curves with cofactor 8.
+ *
+ * Negating the input (mod q) results in the same point.  Inverting the input
+ * (mod q) results in the negative point.  This is the same as Elligator.
+ *
+ * This function isn't quite indifferentiable from a random oracle.
+ * However, it is suitable for many protocols, including SPEKE and SPAKE2 EE. 
+ * Furthermore, calling it twice with independent seeds and adding the results
+ * is indifferentiable from a random oracle.
+ *
+ * @param [in] hashed_data Output of some hash function.
+ * @param [out] pt The data hashed to the curve.
+ */
+void
+decaf_255_point_from_hash_nonuniform (
+    decaf_255_point_t pt,
+    const unsigned char hashed_data[DECAF_255_HASH_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Indifferentiable hash function encoding to curve.
+ *
+ * Equivalent to calling decaf_255_point_from_hash_nonuniform twice and adding.
+ *
+ * @param [in] hashed_data Output of some hash function.
+ * @param [out] pt The data hashed to the curve.
+ */ 
+void decaf_255_point_from_hash_uniform (
+    decaf_255_point_t pt,
+    const unsigned char hashed_data[2*DECAF_255_HASH_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Inverse of elligator-like hash to curve.
+ *
+ * This function writes to the buffer, to make it so that
+ * decaf_255_point_from_hash_nonuniform(buffer) = pt if
+ * possible.  Since there may be multiple preimages, the
+ * "which" parameter chooses between them.  To ensure uniform
+ * inverse sampling, this function succeeds or fails
+ * independently for different "which" values.
+ *
+ * This function isn't guaranteed to find every possible
+ * preimage, but it finds all except a small finite number.
+ * In particular, when the number of bits in the modulus isn't
+ * a multiple of 8 (i.e. for curve25519), it sets the high bits
+ * independently, which enables the generated data to be uniform.
+ * But it doesn't add p, so you'll never get exactly p from this
+ * function.  This might change in the future, especially if
+ * we ever support eg Brainpool curves, where this could cause
+ * real nonuniformity.
+ *
+ * @param [out] recovered_hash Encoded data.
+ * @param [in] pt The point to encode.
+ * @param [in] which A value determining which inverse point
+ * to return.
+ *
+ * @retval DECAF_SUCCESS The inverse succeeded.
+ * @retval DECAF_FAILURE The inverse failed.
+ */
+decaf_error_t
+decaf_255_invert_elligator_nonuniform (
+    unsigned char recovered_hash[DECAF_255_HASH_BYTES],
+    const decaf_255_point_t pt,
+    uint32_t which
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_WARN_UNUSED;
+
+/**
+ * @brief Inverse of elligator-like hash to curve.
+ *
+ * This function writes to the buffer, to make it so that
+ * decaf_255_point_from_hash_uniform(buffer) = pt if
+ * possible.  Since there may be multiple preimages, the
+ * "which" parameter chooses between them.  To ensure uniform
+ * inverse sampling, this function succeeds or fails
+ * independently for different "which" values.
+ *
+ * @param [out] recovered_hash Encoded data.
+ * @param [in] pt The point to encode.
+ * @param [in] which A value determining which inverse point
+ * to return.
+ *
+ * @retval DECAF_SUCCESS The inverse succeeded.
+ * @retval DECAF_FAILURE The inverse failed.
+ */
+decaf_error_t
+decaf_255_invert_elligator_uniform (
+    unsigned char recovered_hash[2*DECAF_255_HASH_BYTES],
+    const decaf_255_point_t pt,
+    uint32_t which
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_WARN_UNUSED;
+
+/**
+ * @brief Overwrite scalar with zeros.
+ */
+void decaf_255_scalar_destroy (
+    decaf_255_scalar_t scalar
+) DECAF_NONNULL DECAF_API_VIS;
+
+/**
+ * @brief Overwrite point with zeros.
+ */
+void decaf_255_point_destroy (
+    decaf_255_point_t point
+) DECAF_NONNULL DECAF_API_VIS;
+
+/**
+ * @brief Overwrite precomputed table with zeros.
+ */
+void decaf_255_precomputed_destroy (
+    decaf_255_precomputed_s *pre
+) DECAF_NONNULL DECAF_API_VIS;
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __DECAF_POINT_255_H__ */
diff --git a/crypto/ec/curve448/decaf/point_448.h b/crypto/ec/curve448/decaf/point_448.h
new file mode 100644
index 0000000000..bc1cb43a00
--- /dev/null
+++ b/crypto/ec/curve448/decaf/point_448.h
@@ -0,0 +1,765 @@
+/**
+ * @file decaf/point_448.h
+ * @author Mike Hamburg
+ *
+ * @copyright
+ *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
+ *   Released under the MIT License.  See LICENSE.txt for license information.
+ *
+ * @brief A group of prime order p, based on Ed448-Goldilocks.
+ *
+ * @warning This file was automatically generated in Python.
+ * Please do not edit it.
+ */
+
+#ifndef __DECAF_POINT_448_H__
+#define __DECAF_POINT_448_H__ 1
+
+#include <decaf/common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @cond internal */
+#define DECAF_448_SCALAR_LIMBS ((446-1)/DECAF_WORD_BITS+1)
+/** @endcond */
+
+/** The number of bits in a scalar */
+#define DECAF_448_SCALAR_BITS 446
+
+/** @cond internal */
+#ifndef __DECAF_448_GF_DEFINED__
+#define __DECAF_448_GF_DEFINED__ 1
+/** @brief Galois field element internal structure */
+typedef struct gf_448_s {
+    decaf_word_t limb[512/DECAF_WORD_BITS];
+} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];
+#endif /* __DECAF_448_GF_DEFINED__ */
+/** @endcond */
+
+/** Number of bytes in a serialized point. */
+#define DECAF_448_SER_BYTES 56
+
+/** Number of bytes in an elligated point.  For now set the same as SER_BYTES
+ * but could be different for other curves.
+ */
+#define DECAF_448_HASH_BYTES 56
+
+/** Number of bytes in a serialized scalar. */
+#define DECAF_448_SCALAR_BYTES 56
+
+/** Number of bits in the "which" field of an elligator inverse */
+#define DECAF_448_INVERT_ELLIGATOR_WHICH_BITS 3
+
+/** The cofactor the curve would have, if we hadn't removed it */
+#define DECAF_448_REMOVED_COFACTOR 4
+
+/** X448 encoding ratio. */
+#define DECAF_X448_ENCODE_RATIO 2
+
+/** Number of bytes in an x448 public key */
+#define DECAF_X448_PUBLIC_BYTES 56
+
+/** Number of bytes in an x448 private key */
+#define DECAF_X448_PRIVATE_BYTES 56
+
+/** Twisted Edwards extended homogeneous coordinates */
+typedef struct decaf_448_point_s {
+    /** @cond internal */
+    gf_448_t x,y,z,t;
+    /** @endcond */
+} decaf_448_point_t[1];
+
+/** Precomputed table based on a point.  Can be trivial implementation. */
+struct decaf_448_precomputed_s;
+
+/** Precomputed table based on a point.  Can be trivial implementation. */
+typedef struct decaf_448_precomputed_s decaf_448_precomputed_s; 
+
+/** Size and alignment of precomputed point tables. */
+extern const size_t decaf_448_sizeof_precomputed_s DECAF_API_VIS, decaf_448_alignof_precomputed_s DECAF_API_VIS;
+
+/** Scalar is stored packed, because we don't need the speed. */
+typedef struct decaf_448_scalar_s {
+    /** @cond internal */
+    decaf_word_t limb[DECAF_448_SCALAR_LIMBS];
+    /** @endcond */
+} decaf_448_scalar_t[1];
+
+/** A scalar equal to 1. */
+extern const decaf_448_scalar_t decaf_448_scalar_one DECAF_API_VIS;
+
+/** A scalar equal to 0. */
+extern const decaf_448_scalar_t decaf_448_scalar_zero DECAF_API_VIS;
+
+/** The identity point on the curve. */
+extern const decaf_448_point_t decaf_448_point_identity DECAF_API_VIS;
+
+/** An arbitrarily chosen base point on the curve. */
+extern const decaf_448_point_t decaf_448_point_base DECAF_API_VIS;
+
+/** Precomputed table for the base point on the curve. */
+extern const struct decaf_448_precomputed_s *decaf_448_precomputed_base DECAF_API_VIS;
+
+/**
+ * @brief Read a scalar from wire format or from bytes.
+ *
+ * @param [in] ser Serialized form of a scalar.
+ * @param [out] out Deserialized form.
+ *
+ * @retval DECAF_SUCCESS The scalar was correctly encoded.
+ * @retval DECAF_FAILURE The scalar was greater than the modulus,
+ * and has been reduced modulo that modulus.
+ */
+decaf_error_t decaf_448_scalar_decode (
+    decaf_448_scalar_t out,
+    const unsigned char ser[DECAF_448_SCALAR_BYTES]
+) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Read a scalar from wire format or from bytes.  Reduces mod
+ * scalar prime.
+ *
+ * @param [in] ser Serialized form of a scalar.
+ * @param [in] ser_len Length of serialized form.
+ * @param [out] out Deserialized form.
+ */
+void decaf_448_scalar_decode_long (
+    decaf_448_scalar_t out,
+    const unsigned char *ser,
+    size_t ser_len
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+    
+/**
+ * @brief Serialize a scalar to wire format.
+ *
+ * @param [out] ser Serialized form of a scalar.
+ * @param [in] s Deserialized scalar.
+ */
+void decaf_448_scalar_encode (
+    unsigned char ser[DECAF_448_SCALAR_BYTES],
+    const decaf_448_scalar_t s
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_NOINLINE;
+        
+/**
+ * @brief Add two scalars.  The scalars may use the same memory.
+ * @param [in] a One scalar.
+ * @param [in] b Another scalar.
+ * @param [out] out a+b.
+ */
+void decaf_448_scalar_add (
+    decaf_448_scalar_t out,
+    const decaf_448_scalar_t a,
+    const decaf_448_scalar_t b
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Compare two scalars.
+ * @param [in] a One scalar.
+ * @param [in] b Another scalar.
+ * @retval DECAF_TRUE The scalars are equal.
+ * @retval DECAF_FALSE The scalars are not equal.
+ */    
+decaf_bool_t decaf_448_scalar_eq (
+    const decaf_448_scalar_t a,
+    const decaf_448_scalar_t b
+) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Subtract two scalars.  The scalars may use the same memory.
+ * @param [in] a One scalar.
+ * @param [in] b Another scalar.
+ * @param [out] out a-b.
+ */  
+void decaf_448_scalar_sub (
+    decaf_448_scalar_t out,
+    const decaf_448_scalar_t a,
+    const decaf_448_scalar_t b
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Multiply two scalars.  The scalars may use the same memory.
+ * @param [in] a One scalar.
+ * @param [in] b Another scalar.
+ * @param [out] out a*b.
+ */  
+void decaf_448_scalar_mul (
+    decaf_448_scalar_t out,
+    const decaf_448_scalar_t a,
+    const decaf_448_scalar_t b
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+        
+/**
+* @brief Halve a scalar.  The scalars may use the same memory.
+* @param [in] a A scalar.
+* @param [out] out a/2.
+*/
+void decaf_448_scalar_halve (
+   decaf_448_scalar_t out,
+   const decaf_448_scalar_t a
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Invert a scalar.  When passed zero, return 0.  The input and output may alias.
+ * @param [in] a A scalar.
+ * @param [out] out 1/a.
+ * @return DECAF_SUCCESS The input is nonzero.
+ */  
+decaf_error_t decaf_448_scalar_invert (
+    decaf_448_scalar_t out,
+    const decaf_448_scalar_t a
+) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Copy a scalar.  The scalars may use the same memory, in which
+ * case this function does nothing.
+ * @param [in] a A scalar.
+ * @param [out] out Will become a copy of a.
+ */
+static inline void DECAF_NONNULL decaf_448_scalar_copy (
+    decaf_448_scalar_t out,
+    const decaf_448_scalar_t a
+) {
+    *out = *a;
+}
+
+/**
+ * @brief Set a scalar to an unsigned 64-bit integer.
+ * @param [in] a An integer.
+ * @param [out] out Will become equal to a.
+ */  
+void decaf_448_scalar_set_unsigned (
+    decaf_448_scalar_t out,
+    uint64_t a
+) DECAF_API_VIS DECAF_NONNULL;
+
+/**
+ * @brief Encode a point as a sequence of bytes.
+ *
+ * @param [out] ser The byte representation of the point.
+ * @param [in] pt The point to encode.
+ */
+void decaf_448_point_encode (
+    uint8_t ser[DECAF_448_SER_BYTES],
+    const decaf_448_point_t pt
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Decode a point from a sequence of bytes.
+ *
+ * Every point has a unique encoding, so not every
+ * sequence of bytes is a valid encoding.  If an invalid
+ * encoding is given, the output is undefined.
+ *
+ * @param [out] pt The decoded point.
+ * @param [in] ser The serialized version of the point.
+ * @param [in] allow_identity DECAF_TRUE if the identity is a legal input.
+ * @retval DECAF_SUCCESS The decoding succeeded.
+ * @retval DECAF_FAILURE The decoding didn't succeed, because
+ * ser does not represent a point.
+ */
+decaf_error_t decaf_448_point_decode (
+    decaf_448_point_t pt,
+    const uint8_t ser[DECAF_448_SER_BYTES],
+    decaf_bool_t allow_identity
+) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Copy a point.  The input and output may alias,
+ * in which case this function does nothing.
+ *
+ * @param [out] a A copy of the point.
+ * @param [in] b Any point.
+ */
+static inline void DECAF_NONNULL decaf_448_point_copy (
+    decaf_448_point_t a,
+    const decaf_448_point_t b
+) {
+    *a=*b;
+}
+
+/**
+ * @brief Test whether two points are equal.  If yes, return
+ * DECAF_TRUE, else return DECAF_FALSE.
+ *
+ * @param [in] a A point.
+ * @param [in] b Another point.
+ * @retval DECAF_TRUE The points are equal.
+ * @retval DECAF_FALSE The points are not equal.
+ */
+decaf_bool_t decaf_448_point_eq (
+    const decaf_448_point_t a,
+    const decaf_448_point_t b
+) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Add two points to produce a third point.  The
+ * input points and output point can be pointers to the same
+ * memory.
+ *
+ * @param [out] sum The sum a+b.
+ * @param [in] a An addend.
+ * @param [in] b An addend.
+ */
+void decaf_448_point_add (
+    decaf_448_point_t sum,
+    const decaf_448_point_t a,
+    const decaf_448_point_t b
+) DECAF_API_VIS DECAF_NONNULL;
+
+/**
+ * @brief Double a point.  Equivalent to
+ * decaf_448_point_add(two_a,a,a), but potentially faster.
+ *
+ * @param [out] two_a The sum a+a.
+ * @param [in] a A point.
+ */
+void decaf_448_point_double (
+    decaf_448_point_t two_a,
+    const decaf_448_point_t a
+) DECAF_API_VIS DECAF_NONNULL;
+
+/**
+ * @brief Subtract two points to produce a third point.  The
+ * input points and output point can be pointers to the same
+ * memory.
+ *
+ * @param [out] diff The difference a-b.
+ * @param [in] a The minuend.
+ * @param [in] b The subtrahend.
+ */
+void decaf_448_point_sub (
+    decaf_448_point_t diff,
+    const decaf_448_point_t a,
+    const decaf_448_point_t b
+) DECAF_API_VIS DECAF_NONNULL;
+    
+/**
+ * @brief Negate a point to produce another point.  The input
+ * and output points can use the same memory.
+ *
+ * @param [out] nega The negated input point
+ * @param [in] a The input point.
+ */
+void decaf_448_point_negate (
+   decaf_448_point_t nega,
+   const decaf_448_point_t a
+) DECAF_API_VIS DECAF_NONNULL;
+
+/**
+ * @brief Multiply a base point by a scalar: scaled = scalar*base.
+ *
+ * @param [out] scaled The scaled point base*scalar
+ * @param [in] base The point to be scaled.
+ * @param [in] scalar The scalar to multiply by.
+ */
+void decaf_448_point_scalarmul (
+    decaf_448_point_t scaled,
+    const decaf_448_point_t base,
+    const decaf_448_scalar_t scalar
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Multiply a base point by a scalar: scaled = scalar*base.
+ * This function operates directly on serialized forms.
+ *
+ * @warning This function is experimental.  It may not be supported
+ * long-term.
+ *
+ * @param [out] scaled The scaled point base*scalar
+ * @param [in] base The point to be scaled.
+ * @param [in] scalar The scalar to multiply by.
+ * @param [in] allow_identity Allow the input to be the identity.
+ * @param [in] short_circuit Allow a fast return if the input is illegal.
+ *
+ * @retval DECAF_SUCCESS The scalarmul succeeded.
+ * @retval DECAF_FAILURE The scalarmul didn't succeed, because
+ * base does not represent a point.
+ */
+decaf_error_t decaf_448_direct_scalarmul (
+    uint8_t scaled[DECAF_448_SER_BYTES],
+    const uint8_t base[DECAF_448_SER_BYTES],
+    const decaf_448_scalar_t scalar,
+    decaf_bool_t allow_identity,
+    decaf_bool_t short_circuit
+) DECAF_API_VIS DECAF_NONNULL DECAF_WARN_UNUSED DECAF_NOINLINE;
+
+/**
+ * @brief RFC 7748 Diffie-Hellman scalarmul.  This function uses a different
+ * (non-Decaf) encoding.
+ *
+ * @param [out] scaled The scaled point base*scalar
+ * @param [in] base The point to be scaled.
+ * @param [in] scalar The scalar to multiply by.
+ *
+ * @retval DECAF_SUCCESS The scalarmul succeeded.
+ * @retval DECAF_FAILURE The scalarmul didn't succeed, because the base
+ * point is in a small subgroup.
+ */
+decaf_error_t decaf_x448 (
+    uint8_t out[DECAF_X448_PUBLIC_BYTES],
+    const uint8_t base[DECAF_X448_PUBLIC_BYTES],
+    const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_WARN_UNUSED DECAF_NOINLINE;
+
+/**
+ * @brief Multiply a point by DECAF_X448_ENCODE_RATIO,
+ * then encode it like RFC 7748.
+ *
+ * This function is mainly used internally, but is exported in case
+ * it will be useful.
+ *
+ * The ratio is necessary because the internal representation doesn't
+ * track the cofactor information, so on output we must clear the cofactor.
+ * This would multiply by the cofactor, but in fact internally libdecaf's
+ * points are always even, so it multiplies by half the cofactor instead.
+ *
+ * As it happens, this aligns with the base point definitions; that is,
+ * if you pass the Decaf/Ristretto base point to this function, the result
+ * will be DECAF_X448_ENCODE_RATIO times the X448
+ * base point.
+ *
+ * @param [out] out The scaled and encoded point.
+ * @param [in] p The point to be scaled and encoded.
+ */
+void decaf_448_point_mul_by_ratio_and_encode_like_x448 (
+    uint8_t out[DECAF_X448_PUBLIC_BYTES],
+    const decaf_448_point_t p
+) DECAF_API_VIS DECAF_NONNULL;
+
+/** The base point for X448 Diffie-Hellman */
+extern const uint8_t decaf_x448_base_point[DECAF_X448_PUBLIC_BYTES] DECAF_API_VIS;
+
+/**
+ * @brief RFC 7748 Diffie-Hellman base point scalarmul.  This function uses
+ * a different (non-Decaf) encoding.
+ *
+ * @deprecated Renamed to decaf_x448_derive_public_key.
+ * I have no particular timeline for removing this name.
+ *
+ * @param [out] scaled The scaled point base*scalar
+ * @param [in] scalar The scalar to multiply by.
+ */
+void decaf_x448_generate_key (
+    uint8_t out[DECAF_X448_PUBLIC_BYTES],
+    const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_DEPRECATED("Renamed to decaf_x448_derive_public_key");
+    
+/**
+ * @brief RFC 7748 Diffie-Hellman base point scalarmul.  This function uses
+ * a different (non-Decaf) encoding.
+ *
+ * Does exactly the same thing as decaf_x448_generate_key,
+ * but has a better name.
+ *
+ * @param [out] scaled The scaled point base*scalar
+ * @param [in] scalar The scalar to multiply by.
+ */
+void decaf_x448_derive_public_key (
+    uint8_t out[DECAF_X448_PUBLIC_BYTES],
+    const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/* FUTURE: uint8_t decaf_448_encode_like_curve448) */
+
+/**
+ * @brief Precompute a table for fast scalar multiplication.
+ * Some implementations do not include precomputed points; for
+ * those implementations, this implementation simply copies the
+ * point.
+ *
+ * @param [out] a A precomputed table of multiples of the point.
+ * @param [in] b Any point.
+ */
+void decaf_448_precompute (
+    decaf_448_precomputed_s *a,
+    const decaf_448_point_t b
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Multiply a precomputed base point by a scalar:
+ * scaled = scalar*base.
+ * Some implementations do not include precomputed points; for
+ * those implementations, this function is the same as
+ * decaf_448_point_scalarmul
+ *
+ * @param [out] scaled The scaled point base*scalar
+ * @param [in] base The point to be scaled.
+ * @param [in] scalar The scalar to multiply by.
+ */
+void decaf_448_precomputed_scalarmul (
+    decaf_448_point_t scaled,
+    const decaf_448_precomputed_s *base,
+    const decaf_448_scalar_t scalar
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Multiply two base points by two scalars:
+ * scaled = scalar1*base1 + scalar2*base2.
+ *
+ * Equivalent to two calls to decaf_448_point_scalarmul, but may be
+ * faster.
+ *
+ * @param [out] combo The linear combination scalar1*base1 + scalar2*base2.
+ * @param [in] base1 A first point to be scaled.
+ * @param [in] scalar1 A first scalar to multiply by.
+ * @param [in] base2 A second point to be scaled.
+ * @param [in] scalar2 A second scalar to multiply by.
+ */
+void decaf_448_point_double_scalarmul (
+    decaf_448_point_t combo,
+    const decaf_448_point_t base1,
+    const decaf_448_scalar_t scalar1,
+    const decaf_448_point_t base2,
+    const decaf_448_scalar_t scalar2
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+    
+/**
+ * Multiply one base point by two scalars:
+ *
+ * a1 = scalar1 * base
+ * a2 = scalar2 * base
+ *
+ * Equivalent to two calls to decaf_448_point_scalarmul, but may be
+ * faster.
+ *
+ * @param [out] a1 The first multiple.  It may be the same as the input point.
+ * @param [out] a2 The second multiple.  It may be the same as the input point.
+ * @param [in] base1 A point to be scaled.
+ * @param [in] scalar1 A first scalar to multiply by.
+ * @param [in] scalar2 A second scalar to multiply by.
+ */
+void decaf_448_point_dual_scalarmul (
+    decaf_448_point_t a1,
+    decaf_448_point_t a2,
+    const decaf_448_point_t base1,
+    const decaf_448_scalar_t scalar1,
+    const decaf_448_scalar_t scalar2
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Multiply two base points by two scalars:
+ * scaled = scalar1*decaf_448_point_base + scalar2*base2.
+ *
+ * Otherwise equivalent to decaf_448_point_double_scalarmul, but may be
+ * faster at the expense of being variable time.
+ *
+ * @param [out] combo The linear combination scalar1*base + scalar2*base2.
+ * @param [in] scalar1 A first scalar to multiply by.
+ * @param [in] base2 A second point to be scaled.
+ * @param [in] scalar2 A second scalar to multiply by.
+ *
+ * @warning: This function takes variable time, and may leak the scalars
+ * used.  It is designed for signature verification.
+ */
+void decaf_448_base_double_scalarmul_non_secret (
+    decaf_448_point_t combo,
+    const decaf_448_scalar_t scalar1,
+    const decaf_448_point_t base2,
+    const decaf_448_scalar_t scalar2
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Constant-time decision between two points.  If pick_b
+ * is zero, out = a; else out = b.
+ *
+ * @param [out] out The output.  It may be the same as either input.
+ * @param [in] a Any point.
+ * @param [in] b Any point.
+ * @param [in] pick_b If nonzero, choose point b.
+ */
+void decaf_448_point_cond_sel (
+    decaf_448_point_t out,
+    const decaf_448_point_t a,
+    const decaf_448_point_t b,
+    decaf_word_t pick_b
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Constant-time decision between two scalars.  If pick_b
+ * is zero, out = a; else out = b.
+ *
+ * @param [out] out The output.  It may be the same as either input.
+ * @param [in] a Any scalar.
+ * @param [in] b Any scalar.
+ * @param [in] pick_b If nonzero, choose scalar b.
+ */
+void decaf_448_scalar_cond_sel (
+    decaf_448_scalar_t out,
+    const decaf_448_scalar_t a,
+    const decaf_448_scalar_t b,
+    decaf_word_t pick_b
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Test that a point is valid, for debugging purposes.
+ *
+ * @param [in] to_test The point to test.
+ * @retval DECAF_TRUE The point is valid.
+ * @retval DECAF_FALSE The point is invalid.
+ */
+decaf_bool_t decaf_448_point_valid (
+    const decaf_448_point_t to_test
+) DECAF_API_VIS DECAF_WARN_UNUSED DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Torque a point, for debugging purposes.  The output
+ * will be equal to the input.
+ *
+ * @param [out] q The point to torque.
+ * @param [in] p The point to torque.
+ */
+void decaf_448_point_debugging_torque (
+    decaf_448_point_t q,
+    const decaf_448_point_t p
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Projectively scale a point, for debugging purposes.
+ * The output will be equal to the input, and will be valid
+ * even if the factor is zero.
+ *
+ * @param [out] q The point to scale.
+ * @param [in] p The point to scale.
+ * @param [in] factor Serialized GF factor to scale.
+ */
+void decaf_448_point_debugging_pscale (
+    decaf_448_point_t q,
+    const decaf_448_point_t p,
+    const unsigned char factor[DECAF_448_SER_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Almost-Elligator-like hash to curve.
+ *
+ * Call this function with the output of a hash to make a hash to the curve.
+ *
+ * This function runs Elligator2 on the decaf_448 Jacobi quartic model.  It then
+ * uses the isogeny to put the result in twisted Edwards form.  As a result,
+ * it is safe (cannot produce points of order 4), and would be compatible with
+ * hypothetical other implementations of Decaf using a Montgomery or untwisted
+ * Edwards model.
+ *
+ * Unlike Elligator, this function may be up to 4:1 on [0,(p-1)/2]:
+ *   A factor of 2 due to the isogeny.
+ *   A factor of 2 because we quotient out the 2-torsion.
+ *
+ * This makes it about 8:1 overall, or 16:1 overall on curves with cofactor 8.
+ *
+ * Negating the input (mod q) results in the same point.  Inverting the input
+ * (mod q) results in the negative point.  This is the same as Elligator.
+ *
+ * This function isn't quite indifferentiable from a random oracle.
+ * However, it is suitable for many protocols, including SPEKE and SPAKE2 EE. 
+ * Furthermore, calling it twice with independent seeds and adding the results
+ * is indifferentiable from a random oracle.
+ *
+ * @param [in] hashed_data Output of some hash function.
+ * @param [out] pt The data hashed to the curve.
+ */
+void
+decaf_448_point_from_hash_nonuniform (
+    decaf_448_point_t pt,
+    const unsigned char hashed_data[DECAF_448_HASH_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Indifferentiable hash function encoding to curve.
+ *
+ * Equivalent to calling decaf_448_point_from_hash_nonuniform twice and adding.
+ *
+ * @param [in] hashed_data Output of some hash function.
+ * @param [out] pt The data hashed to the curve.
+ */ 
+void decaf_448_point_from_hash_uniform (
+    decaf_448_point_t pt,
+    const unsigned char hashed_data[2*DECAF_448_HASH_BYTES]
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE;
+
+/**
+ * @brief Inverse of elligator-like hash to curve.
+ *
+ * This function writes to the buffer, to make it so that
+ * decaf_448_point_from_hash_nonuniform(buffer) = pt if
+ * possible.  Since there may be multiple preimages, the
+ * "which" parameter chooses between them.  To ensure uniform
+ * inverse sampling, this function succeeds or fails
+ * independently for different "which" values.
+ *
+ * This function isn't guaranteed to find every possible
+ * preimage, but it finds all except a small finite number.
+ * In particular, when the number of bits in the modulus isn't
+ * a multiple of 8 (i.e. for curve25519), it sets the high bits
+ * independently, which enables the generated data to be uniform.
+ * But it doesn't add p, so you'll never get exactly p from this
+ * function.  This might change in the future, especially if
+ * we ever support eg Brainpool curves, where this could cause
+ * real nonuniformity.
+ *
+ * @param [out] recovered_hash Encoded data.
+ * @param [in] pt The point to encode.
+ * @param [in] which A value determining which inverse point
+ * to return.
+ *
+ * @retval DECAF_SUCCESS The inverse succeeded.
+ * @retval DECAF_FAILURE The inverse failed.
+ */
+decaf_error_t
+decaf_448_invert_elligator_nonuniform (
+    unsigned char recovered_hash[DECAF_448_HASH_BYTES],
+    const decaf_448_point_t pt,
+    uint32_t which
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_WARN_UNUSED;
+
+/**
+ * @brief Inverse of elligator-like hash to curve.
+ *
+ * This function writes to the buffer, to make it so that
+ * decaf_448_point_from_hash_uniform(buffer) = pt if
+ * possible.  Since there may be multiple preimages, the
+ * "which" parameter chooses between them.  To ensure uniform
+ * inverse sampling, this function succeeds or fails
+ * independently for different "which" values.
+ *
+ * @param [out] recovered_hash Encoded data.
+ * @param [in] pt The point to encode.
+ * @param [in] which A value determining which inverse point
+ * to return.
+ *
+ * @retval DECAF_SUCCESS The inverse succeeded.
+ * @retval DECAF_FAILURE The inverse failed.
+ */
+decaf_error_t
+decaf_448_invert_elligator_uniform (
+    unsigned char recovered_hash[2*DECAF_448_HASH_BYTES],
+    const decaf_448_point_t pt,
+    uint32_t which
+) DECAF_API_VIS DECAF_NONNULL DECAF_NOINLINE DECAF_WARN_UNUSED;
+
+/**
+ * @brief Overwrite scalar with zeros.
+ */
+void decaf_448_scalar_destroy (
+    decaf_448_scalar_t scalar
+) DECAF_NONNULL DECAF_API_VIS;
+
+/**
+ * @brief Overwrite point with zeros.
+ */
+void decaf_448_point_destroy (
+    decaf_448_point_t point
+) DECAF_NONNULL DECAF_API_VIS;
+
+/**
+ * @brief Overwrite precomputed table with zeros.
+ */
+void decaf_448_precomputed_destroy (
+    decaf_448_precomputed_s *pre
+) DECAF_NONNULL DECAF_API_VIS;
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __DECAF_POINT_448_H__ */
diff --git a/crypto/ec/curve448/decaf/sha512.h b/crypto/ec/curve448/decaf/sha512.h
new file mode 100644
index 0000000000..3c8ec70e93
--- /dev/null
+++ b/crypto/ec/curve448/decaf/sha512.h
@@ -0,0 +1,53 @@
+/**
+ * @file decaf/shake.h
+ * @copyright Public domain.
+ * @author Mike Hamburg
+ * @brief SHA2-512
+ */
+
+#ifndef __DECAF_SHA512_H__
+#define __DECAF_SHA512_H__
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <stdlib.h> /* for NULL */
+
+#include <decaf/common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+    
+
+typedef struct decaf_sha512_ctx_s {
+    uint64_t state[8];
+    uint8_t block[128];
+    uint64_t bytes_processed;
+} decaf_sha512_ctx_s, decaf_sha512_ctx_t[1];
+
+void decaf_sha512_init(decaf_sha512_ctx_t ctx) DECAF_NONNULL DECAF_API_VIS;
+void decaf_sha512_update(decaf_sha512_ctx_t ctx, const uint8_t *message, size_t length) DECAF_NONNULL DECAF_API_VIS;
+void decaf_sha512_final(decaf_sha512_ctx_t ctx, uint8_t *out, size_t length) DECAF_NONNULL DECAF_API_VIS;
+
+static inline void decaf_sha512_destroy(decaf_sha512_ctx_t ctx) {
+    decaf_bzero(ctx,sizeof(*ctx));
+}
+
+static inline void decaf_sha512_hash(
+    uint8_t *output,
+    size_t output_len,
+    const uint8_t *message,
+    size_t message_len
+) {
+    decaf_sha512_ctx_t ctx;
+    decaf_sha512_init(ctx);
+    decaf_sha512_update(ctx,message,message_len);
+    decaf_sha512_final(ctx,output,output_len);
+    decaf_sha512_destroy(ctx);
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+    
+#endif /* __DECAF_SHA512_H__ */
diff --git a/crypto/ec/curve448/decaf/shake.h b/crypto/ec/curve448/decaf/shake.h
new file mode 100644
index 0000000000..ae125b923a
--- /dev/null
+++ b/crypto/ec/curve448/decaf/shake.h
@@ -0,0 +1,219 @@
+/**
+ * @file decaf/shake.h
+ * @copyright
+ *   Based on CC0 code by David Leon Gil, 2015 \n
+ *   Copyright (c) 2015 Cryptography Research, Inc.  \n
+ *   Released under the MIT License.  See LICENSE.txt for license information.
+ * @author Mike Hamburg
+ * @brief SHA-3-n and DECAF_SHAKE-n instances.
+ */
+
+#ifndef __DECAF_SHAKE_H__
+#define __DECAF_SHAKE_H__
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <stdlib.h> /* for NULL */
+
+#include <decaf/common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef INTERNAL_SPONGE_STRUCT
+    /** Sponge container object for the various primitives. */
+    typedef struct decaf_keccak_sponge_s {
+        /** @cond internal */
+        uint64_t opaque[26];
+        /** @endcond */
+    } decaf_keccak_sponge_s;
+
+    /** Convenience GMP-style one-element array version */
+    typedef struct decaf_keccak_sponge_s decaf_keccak_sponge_t[1];
+
+    /** Parameters for sponge construction, distinguishing DECAF_SHA3 and
+     * DECAF_SHAKE instances.
+     */
+    struct decaf_kparams_s;
+#endif
+
+/**
+ * @brief Initialize a sponge context object.
+ * @param [out] sponge The object to initialize.
+ * @param [in] params The sponge's parameter description.
+ */
+void decaf_sha3_init (
+    decaf_keccak_sponge_t sponge,
+    const struct decaf_kparams_s *params
+) DECAF_API_VIS;
+
+/**
+ * @brief Absorb data into a DECAF_SHA3 or DECAF_SHAKE hash context.
+ * @param [inout] sponge The context.
+ * @param [in] in The input data.
+ * @param [in] len The input data's length in bytes.
+ * @return DECAF_FAILURE if the sponge has already been used for output.
+ * @return DECAF_SUCCESS otherwise.
+ */
+decaf_error_t decaf_sha3_update (
+    struct decaf_keccak_sponge_s * __restrict__ sponge,
+    const uint8_t *in,
+    size_t len
+) DECAF_API_VIS;
+
+/**
+ * @brief Squeeze output data from a DECAF_SHA3 or DECAF_SHAKE hash context.
+ * This does not destroy or re-initialize the hash context, and
+ * decaf_sha3 output can be called more times.
+ *
+ * @param [inout] sponge The context.
+ * @param [out] out The output data.
+ * @param [in] len The requested output data length in bytes.
+ * @return DECAF_FAILURE if the sponge has exhausted its output capacity.
+ * @return DECAF_SUCCESS otherwise.
+ */  
+decaf_error_t decaf_sha3_output (
+    decaf_keccak_sponge_t sponge,
+    uint8_t * __restrict__ out,
+    size_t len
+) DECAF_API_VIS;
+
+/**
+ * @brief Squeeze output data from a DECAF_SHA3 or DECAF_SHAKE hash context.
+ * This re-initializes the context to its starting parameters.
+ *
+ * @param [inout] sponge The context.
+ * @param [out] out The output data.
+ * @param [in] len The requested output data length in bytes.
+ */  
+decaf_error_t decaf_sha3_final (
+    decaf_keccak_sponge_t sponge,
+    uint8_t * __restrict__ out,
+    size_t len
+) DECAF_API_VIS;
+
+/**
+ * @brief Reset the sponge to the empty string.
+ *
+ * @param [inout] sponge The context.
+ */  
+void decaf_sha3_reset (
+    decaf_keccak_sponge_t sponge
+) DECAF_API_VIS;
+
+/**
+ * @brief Return the default output length of the sponge construction,
+ * for the purpose of C++ default operators.
+ *
+ * Returns n/8 for DECAF_SHA3-n and 2n/8 for DECAF_SHAKE-n.
+ */  
+size_t decaf_sha3_default_output_bytes (
+    const decaf_keccak_sponge_t sponge /**< [inout] The context. */
+) DECAF_API_VIS;
+
+/**
+ * @brief Return the default output length of the sponge construction,
+ * for the purpose of C++ default operators.
+ *
+ * Returns n/8 for DECAF_SHA3-n and SIZE_MAX for DECAF_SHAKE-n.
+ */  
+size_t decaf_sha3_max_output_bytes (
+    const decaf_keccak_sponge_t sponge /**< [inout] The context. */
+) DECAF_API_VIS;
+
+/**
+ * @brief Destroy a DECAF_SHA3 or DECAF_SHAKE sponge context by overwriting it with 0.
+ * @param [out] sponge The context.
+ */  
+void decaf_sha3_destroy (
+    decaf_keccak_sponge_t sponge
+) DECAF_API_VIS;
+
+/**
+ * @brief Hash (in) to (out)
+ * @param [in] in The input data.
+ * @param [in] inlen The length of the input data.
+ * @param [out] out A buffer for the output data.
+ * @param [in] outlen The length of the output data.
+ * @param [in] params The parameters of the sponge hash.
+ */  
+decaf_error_t decaf_sha3_hash (
+    uint8_t *out,
+    size_t outlen,
+    const uint8_t *in,
+    size_t inlen,
+    const struct decaf_kparams_s *params
+) DECAF_API_VIS;
+
+/* FUTURE: expand/doxygenate individual DECAF_SHAKE/DECAF_SHA3 instances? */
+
+/** @cond internal */
+#define DECAF_DEC_SHAKE(n) \
+    extern const struct decaf_kparams_s DECAF_SHAKE##n##_params_s DECAF_API_VIS; \
+    typedef struct decaf_shake##n##_ctx_s { decaf_keccak_sponge_t s; } decaf_shake##n##_ctx_t[1]; \
+    static inline void DECAF_NONNULL decaf_shake##n##_init(decaf_shake##n##_ctx_t sponge) { \
+        decaf_sha3_init(sponge->s, &DECAF_SHAKE##n##_params_s); \
+    } \
+    static inline void DECAF_NONNULL decaf_shake##n##_gen_init(decaf_keccak_sponge_t sponge) { \
+        decaf_sha3_init(sponge, &DECAF_SHAKE##n##_params_s); \
+    } \
+    static inline decaf_error_t DECAF_NONNULL decaf_shake##n##_update(decaf_shake##n##_ctx_t sponge, const uint8_t *in, size_t inlen ) { \
+        return decaf_sha3_update(sponge->s, in, inlen); \
+    } \
+    static inline void  DECAF_NONNULL decaf_shake##n##_final(decaf_shake##n##_ctx_t sponge, uint8_t *out, size_t outlen ) { \
+        decaf_sha3_output(sponge->s, out, outlen); \
+        decaf_sha3_init(sponge->s, &DECAF_SHAKE##n##_params_s); \
+    } \
+    static inline void  DECAF_NONNULL decaf_shake##n##_output(decaf_shake##n##_ctx_t sponge, uint8_t *out, size_t outlen ) { \
+        decaf_sha3_output(sponge->s, out, outlen); \
+    } \
+    static inline void  DECAF_NONNULL decaf_shake##n##_hash(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen) { \
+        decaf_sha3_hash(out,outlen,in,inlen,&DECAF_SHAKE##n##_params_s); \
+    } \
+    static inline void  DECAF_NONNULL decaf_shake##n##_destroy( decaf_shake##n##_ctx_t sponge ) { \
+        decaf_sha3_destroy(sponge->s); \
+    }
+
+#define DECAF_DEC_SHA3(n) \
+    extern const struct decaf_kparams_s DECAF_SHA3_##n##_params_s DECAF_API_VIS; \
+    typedef struct decaf_sha3_##n##_ctx_s { decaf_keccak_sponge_t s; } decaf_sha3_##n##_ctx_t[1]; \
+    static inline void DECAF_NONNULL decaf_sha3_##n##_init(decaf_sha3_##n##_ctx_t sponge) { \
+        decaf_sha3_init(sponge->s, &DECAF_SHA3_##n##_params_s); \
+    } \
+    static inline void DECAF_NONNULL decaf_sha3_##n##_gen_init(decaf_keccak_sponge_t sponge) { \
+        decaf_sha3_init(sponge, &DECAF_SHA3_##n##_params_s); \
+    } \
+    static inline decaf_error_t DECAF_NONNULL decaf_sha3_##n##_update(decaf_sha3_##n##_ctx_t sponge, const uint8_t *in, size_t inlen ) { \
+        return decaf_sha3_update(sponge->s, in, inlen); \
+    } \
+    static inline decaf_error_t DECAF_NONNULL decaf_sha3_##n##_final(decaf_sha3_##n##_ctx_t sponge, uint8_t *out, size_t outlen ) { \
+        decaf_error_t ret = decaf_sha3_output(sponge->s, out, outlen); \
+        decaf_sha3_init(sponge->s, &DECAF_SHA3_##n##_params_s); \
+        return ret; \
+    } \
+    static inline decaf_error_t DECAF_NONNULL decaf_sha3_##n##_output(decaf_sha3_##n##_ctx_t sponge, uint8_t *out, size_t outlen ) { \
+        return decaf_sha3_output(sponge->s, out, outlen); \
+    } \
+    static inline decaf_error_t DECAF_NONNULL decaf_sha3_##n##_hash(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen) { \
+        return decaf_sha3_hash(out,outlen,in,inlen,&DECAF_SHA3_##n##_params_s); \
+    } \
+    static inline void DECAF_NONNULL decaf_sha3_##n##_destroy(decaf_sha3_##n##_ctx_t sponge) { \
+        decaf_sha3_destroy(sponge->s); \
+    }
+/** @endcond */
+
+DECAF_DEC_SHAKE(128)
+DECAF_DEC_SHAKE(256)
+DECAF_DEC_SHA3(224)
+DECAF_DEC_SHA3(256)
+DECAF_DEC_SHA3(384)
+DECAF_DEC_SHA3(512)
+#undef DECAF_DEC_SHAKE
+#undef DECAF_DEC_SHA3
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+    
+#endif /* __DECAF_SHAKE_H__ */
diff --git a/crypto/ec/curve448/decaf_tables.c b/crypto/ec/curve448/decaf_tables.c
new file mode 100644
index 0000000000..ab4e6d79c4
--- /dev/null
+++ b/crypto/ec/curve448/decaf_tables.c
@@ -0,0 +1,354 @@
+/** @warning: this file was automatically generated. */
+#include "field.h"
+
+#include <decaf.h>
+
+#define API_NS(_id) decaf_448_##_id
+const API_NS(point_t) API_NS(point_base) = {{
+{FIELD_LITERAL(0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0080000000000000,0x00fffffffffffffe,0x00ffffffffffffff,0x00ffffffffffffff,0x007fffffffffffff)},
+  {FIELD_LITERAL(0x006079b4dfdd4a64,0x000c1e3ab470a1c8,0x0044d73f48e5199b,0x0050452714141818,0x004c74c393d5242c,0x0024080526437050,0x00d48d06c13078ca,0x008508de14f04286)},
+  {FIELD_LITERAL(0x0000000000000001,0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000)},
+  {FIELD_LITERAL(0x00e3c816dc198105,0x0062071833f4e093,0x004dde98e3421403,0x00a319b57519c985,0x00794be956382384,0x00e1ddc2b86da60f,0x0050e23d5682a9ff,0x006d3669e173c6a4)}
+}};
+const gf API_NS(precomputed_base_as_fe)[240]
+VECTOR_ALIGNED __attribute__((visibility("hidden"))) = {
+  {FIELD_LITERAL(0x00cc3b062366f4cc,0x003d6e34e314aa3c,0x00d51c0a7521774d,0x0094e060eec6ab8b,0x00d21291b4d80082,0x00befed12b55ef1e,0x00c3dd2df5c94518,0x00e0a7b112b8d4e6)},
+  {FIELD_LITERAL(0x0019eb5608d8723a,0x00d1bab52fb3aedb,0x00270a7311ebc90c,0x0037c12b91be7f13,0x005be16cd8b5c704,0x003e181acda888e1,0x00bc1f00fc3fc6d0,0x00d3839bfa319e20)},
+  {FIELD_LITERAL(0x003caeb88611909f,0x00ea8b378c4df3d4,0x00b3295b95a5a19a,0x00a65f97514bdfb5,0x00b39efba743cab1,0x0016ba98b862fd2d,0x0001508812ee71d7,0x000a75740eea114a)},
+  {FIELD_LITERAL(0x00ebcf0eb649f823,0x00166d332e98ea03,0x0059ddf64f5cd5f6,0x0047763123d9471b,0x00a64065c53ef62f,0x00978e44c480153d,0x000b5b2a0265f194,0x0046a24b9f32965a)},
+  {FIELD_LITERAL(0x00b9eef787034df0,0x0020bc24de3390cd,0x000022160bae99bb,0x00ae66e886e97946,0x0048d4bbe02cbb8b,0x0072ba97b34e38d4,0x00eae7ec8f03e85a,0x005ba92ecf808b2c)},
+  {FIELD_LITERAL(0x00c9cfbbe74258fd,0x00843a979ea9eaa7,0x000cbb4371cfbe90,0x0059bac8f7f0a628,0x004b3dff882ff530,0x0011869df4d90733,0x00595aa71f4abfc2,0x0070e2d38990c2e6)},
+  {FIELD_LITERAL(0x00de2010c0a01733,0x00c739a612e24297,0x00a7212643141d7c,0x00f88444f6b67c11,0x00484b7b16ec28f2,0x009c1b8856af9c68,0x00ff4669591fe9d6,0x0054974be08a32c8)},
+  {FIELD_LITERAL(0x0010de3fd682ceed,0x008c07642d83ca4e,0x0013bb064e00a1cc,0x009411ae27870e11,0x00ea8e5b4d531223,0x0032fe7d2aaece2e,0x00d989e243e7bb41,0x000fe79a508e9b8b)},
+  {FIELD_LITERAL(0x005e0426b9bfc5b1,0x0041a5b1d29ee4fa,0x0015b0def7774391,0x00bc164f1f51af01,0x00d543b0942797b9,0x003c129b6398099c,0x002b114c6e5adf18,0x00b4e630e4018a7b)},
+  {FIELD_LITERAL(0x00d490afc95f8420,0x00b096bf50c1d9b9,0x00799fd707679866,0x007c74d9334afbea,0x00efaa8be80ff4ed,0x0075c4943bb81694,0x00c21c2fca161f36,0x00e77035d492bfee)},
+  {FIELD_LITERAL(0x006658a190dd6661,0x00e0e9bab38609a6,0x0028895c802237ed,0x006a0229c494f587,0x002dcde96c9916b7,0x00d158822de16218,0x00173b917a06856f,0x00ca78a79ae07326)},
+  {FIELD_LITERAL(0x00e35bfc79caced4,0x0087238a3e1fe3bb,0x00bcbf0ff4ceff5b,0x00a19c1c94099b91,0x0071e102b49db976,0x0059e3d004eada1e,0x008da78afa58a47e,0x00579c8ebf269187)},
+  {FIELD_LITERAL(0x00a16c2905eee75f,0x009d4bcaea2c7e1d,0x00d3bd79bfad19df,0x0050da745193342c,0x006abdb8f6b29ab1,0x00a24fe0a4fef7ef,0x0063730da1057dfb,0x00a08c312c8eb108)},
+  {FIELD_LITERAL(0x00b583be005375be,0x00a40c8f8a4e3df4,0x003fac4a8f5bdbf7,0x00d4481d872cd718,0x004dc8749cdbaefe,0x00cce740d5e5c975,0x000b1c1f4241fd21,0x00a76de1b4e1cd07)},
+  {FIELD_LITERAL(0x007a076500d30b62,0x000a6e117b7f090f,0x00c8712ae7eebd9a,0x000fbd6c1d5f6ff7,0x003a7977246ebf11,0x00166ed969c6600e,0x00aa42e469c98bec,0x00dc58f307cf0666)},
+  {FIELD_LITERAL(0x004b491f65a9a28b,0x006a10309e8a55b7,0x00b67210185187ef,0x00cf6497b12d9b8f,0x0085778c56e2b1ba,0x0015b4c07a814d85,0x00686479e62da561,0x008de5d88f114916)},
+  {FIELD_LITERAL(0x00e37c88d6bba7b1,0x003e4577e1b8d433,0x0050d8ea5f510ec0,0x0042fc9f2da9ef59,0x003bd074c1141420,0x00561b8b7b68774e,0x00232e5e5d1013a3,0x006b7f2cb3d7e73f)},
+  {FIELD_LITERAL(0x004bdd0f0b41e6a0,0x001773057c405d24,0x006029f99915bd97,0x006a5ba70a17fe2f,0x0046111977df7e08,0x004d8124c89fb6b7,0x00580983b2bb2724,0x00207bf330d6f3fe)},
+  {FIELD_LITERAL(0x007efdc93972a48b,0x002f5e50e78d5fee,0x0080dc11d61c7fe5,0x0065aa598707245b,0x009abba2300641be,0x000c68787656543a,0x00ffe0fef2dc0a17,0x00007ffbd6cb4f3a)},
+  {FIELD_LITERAL(0x0036012f2b836efc,0x00458c126d6b5fbc,0x00a34436d719ad1e,0x0097be6167117dea,0x0009c219c879cff3,0x0065564493e60755,0x00993ac94a8cdec0,0x002d4885a4d0dbaf)},
+  {FIELD_LITERAL(0x00598b60b4c068ba,0x00c547a0be7f1afd,0x009582164acf12af,0x00af4acac4fbbe40,0x005f6ca7c539121a,0x003b6e752ebf9d66,0x00f08a30d5cac5d4,0x00e399bb5f97c5a9)},
+  {FIELD_LITERAL(0x007445a0409c0a66,0x00a65c369f3829c0,0x0031d248a4f74826,0x006817f34defbe8e,0x00649741d95ebf2e,0x00d46466ab16b397,0x00fdc35703bee414,0x00343b43334525f8)},
+  {FIELD_LITERAL(0x001796bea93f6401,0x00090c5a42e85269,0x00672412ba1252ed,0x001201d47b6de7de,0x006877bccfe66497,0x00b554fd97a4c161,0x009753f42dbac3cf,0x00e983e3e378270a)},
+  {FIELD_LITERAL(0x00ac3eff18849872,0x00f0eea3bff05690,0x00a6d72c21dd505d,0x001b832642424169,0x00a6813017b540e5,0x00a744bd71b385cd,0x0022a7d089130a7b,0x004edeec9a133486)},
+  {FIELD_LITERAL(0x00b2d6729196e8a9,0x0088a9bb2031cef4,0x00579e7787dc1567,0x0030f49feb059190,0x00a0b1d69c7f7d8f,0x0040bdcc6d9d806f,0x00d76c4037edd095,0x00bbf24376415dd7)},
+  {FIELD_LITERAL(0x00240465ff5a7197,0x00bb97e76caf27d0,0x004b4edbf8116d39,0x001d8586f708cbaa,0x000f8ee8ff8e4a50,0x00dde5a1945dd622,0x00e6fc1c0957e07c,0x0041c9cdabfd88a0)},
+  {FIELD_LITERAL(0x005344b0bf5b548c,0x002957d0b705cc99,0x00f586a70390553d,0x0075b3229f583cc3,0x00a1aa78227490e4,0x001bf09cf7957717,0x00cf6bf344325f52,0x0065bd1c23ca3ecf)},
+  {FIELD_LITERAL(0x009bff3b3239363c,0x00e17368796ef7c0,0x00528b0fe0971f3a,0x0008014fc8d4a095,0x00d09f2e8a521ec4,0x006713ab5dde5987,0x0003015758e0dbb1,0x00215999f1ba212d)},
+  {FIELD_LITERAL(0x002c88e93527da0e,0x0077c78f3456aad5,0x0071087a0a389d1c,0x00934dac1fb96dbd,0x008470e801162697,0x005bc2196cd4ad49,0x00e535601d5087c3,0x00769888700f497f)},
+  {FIELD_LITERAL(0x00da7a4b557298ad,0x0019d2589ea5df76,0x00ef3e38be0c6497,0x00a9644e1312609a,0x004592f61b2558da,0x0082c1df510d7e46,0x0042809a535c0023,0x00215bcb5afd7757)},
+  {FIELD_LITERAL(0x002b9df55a1a4213,0x00dcfc3b464a26be,0x00c4f9e07a8144d5,0x00c8e0617a92b602,0x008e3c93accafae0,0x00bf1bcb95b2ca60,0x004ce2426a613bf3,0x00266cac58e40921)},
+  {FIELD_LITERAL(0x008456d5db76e8f0,0x0032ca9cab2ce163,0x0059f2b8bf91abcf,0x0063c2a021712788,0x00f86155af22f72d,0x00db98b2a6c005a0,0x00ac6e416a693ac4,0x007a93572af53226)},
+  {FIELD_LITERAL(0x0087767520f0de22,0x0091f64012279fb5,0x001050f1f0644999,0x004f097a2477ad3c,0x006b37913a9947bd,0x001a3d78645af241,0x0057832bbb3008a7,0x002c1d902b80dc20)},
+  {FIELD_LITERAL(0x001a6002bf178877,0x009bce168aa5af50,0x005fc318ff04a7f5,0x0052818f55c36461,0x008768f5d4b24afb,0x0037ffbae7b69c85,0x0018195a4b61edc0,0x001e12ea088434b2)},
+  {FIELD_LITERAL(0x0047d3f804e7ab07,0x00a809ab5f905260,0x00b3ffc7cdaf306d,0x00746e8ec2d6e509,0x00d0dade8887a645,0x00acceeebde0dd37,0x009bc2579054686b,0x0023804f97f1c2bf)},
+  {FIELD_LITERAL(0x0043e2e2e50b80d7,0x00143aafe4427e0f,0x005594aaecab855b,0x008b12ccaaecbc01,0x002deeb091082bc3,0x009cca4be2ae7514,0x00142b96e696d047,0x00ad2a2b1c05256a)},
+  {FIELD_LITERAL(0x003914f2f144b78b,0x007a95dd8bee6f68,0x00c7f4384d61c8e6,0x004e51eb60f1bdb2,0x00f64be7aa4621d8,0x006797bfec2f0ac0,0x007d17aab3c75900,0x001893e73cac8bc5)},
+  {FIELD_LITERAL(0x00140360b768665b,0x00b68aca4967f977,0x0001089b66195ae4,0x00fe71122185e725,0x000bca2618d49637,0x00a54f0557d7e98a,0x00cdcd2f91d6f417,0x00ab8c13741fd793)},
+  {FIELD_LITERAL(0x00725ee6b1e549e0,0x007124a0769777fa,0x000b68fdad07ae42,0x0085b909cd4952df,0x0092d2e3c81606f4,0x009f22f6cac099a0,0x00f59da57f2799a8,0x00f06c090122f777)},
+  {FIELD_LITERAL(0x00ce0bed0a3532bc,0x001a5048a22df16b,0x00e31db4cbad8bf1,0x00e89292120cf00e,0x007d1dd1a9b00034,0x00e2a9041ff8f680,0x006a4c837ae596e7,0x00713af1068070b3)},
+  {FIELD_LITERAL(0x00c4fe64ce66d04b,0x00b095d52e09b3d7,0x00758bbecb1a3a8e,0x00f35cce8d0650c0,0x002b878aa5984473,0x0062e0a3b7544ddc,0x00b25b290ed116fe,0x007b0f6abe0bebf2)},
+  {FIELD_LITERAL(0x0081d4e3addae0a8,0x003410c836c7ffcc,0x00c8129ad89e4314,0x000e3d5a23922dcd,0x00d91e46f29c31f3,0x006c728cde8c5947,0x002bc655ba2566c0,0x002ca94721533108)},
+  {FIELD_LITERAL(0x0051e4b3f764d8a9,0x0019792d46e904a0,0x00853bc13dbc8227,0x000840208179f12d,0x0068243474879235,0x0013856fbfe374d0,0x00bda12fe8676424,0x00bbb43635926eb2)},
+  {FIELD_LITERAL(0x0012cdc880a93982,0x003c495b21cd1b58,0x00b7e5c93f22a26e,0x0044aa82dfb99458,0x009ba092cdffe9c0,0x00a14b3ab2083b73,0x000271c2f70e1c4b,0x00eea9cac0f66eb8)},
+  {FIELD_LITERAL(0x001a1847c4ac5480,0x00b1b412935bb03a,0x00f74285983bf2b2,0x00624138b5b5d0f1,0x008820c0b03d38bf,0x00b94e50a18c1572,0x0060f6934841798f,0x00c52f5d66d6ebe2)},
+  {FIELD_LITERAL(0x00da23d59f9bcea6,0x00e0f27007a06a4b,0x00128b5b43a6758c,0x000cf50190fa8b56,0x00fc877aba2b2d72,0x00623bef52edf53f,0x00e6af6b819669e2,0x00e314dc34fcaa4f)},
+  {FIELD_LITERAL(0x0066e5eddd164d1e,0x00418a7c6fe28238,0x0002e2f37e962c25,0x00f01f56b5975306,0x0048842fa503875c,0x0057b0e968078143,0x00ff683024f3d134,0x0082ae28fcad12e4)},
+  {FIELD_LITERAL(0x0011ddfd21260e42,0x00d05b0319a76892,0x00183ea4368e9b8f,0x00b0815662affc96,0x00b466a5e7ce7c88,0x00db93b07506e6ee,0x0033885f82f62401,0x0086f9090ec9b419)},
+  {FIELD_LITERAL(0x00d95d1c5fcb435a,0x0016d1ed6b5086f9,0x00792aa0b7e54d71,0x0067b65715f1925d,0x00a219755ec6176b,0x00bc3f026b12c28f,0x00700c897ffeb93e,0x0089b83f6ec50b46)},
+  {FIELD_LITERAL(0x003c97e6384da36e,0x00423d53eac81a09,0x00b70d68f3cdce35,0x00ee7959b354b92c,0x00f4e9718819c8ca,0x009349f12acbffe9,0x005aee7b62cb7da6,0x00d97764154ffc86)},
+  {FIELD_LITERAL(0x00526324babb46dc,0x002ee99b38d7bf9e,0x007ea51794706ef4,0x00abeb04da6e3c39,0x006b457c1d281060,0x00fe243e9a66c793,0x00378de0fb6c6ee4,0x003e4194b9c3cb93)},
+  {FIELD_LITERAL(0x00fed3cd80ca2292,0x0015b043a73ca613,0x000a9fd7bf9be227,0x003b5e03de2db983,0x005af72d46904ef7,0x00c0f1b5c49faa99,0x00dc86fc3bd305e1,0x00c92f08c1cb1797)},
+  {FIELD_LITERAL(0x0079680ce111ed3b,0x001a1ed82806122c,0x000c2e7466d15df3,0x002c407f6f7150fd,0x00c5e7c96b1b0ce3,0x009aa44626863ff9,0x00887b8b5b80be42,0x00b6023cec964825)},
+  {FIELD_LITERAL(0x00e4a8e1048970c8,0x0062887b7830a302,0x00bcf1c8cd81402b,0x0056dbb81a68f5be,0x0014eced83f12452,0x00139e1a510150df,0x00bb81140a82d1a3,0x000febcc1aaf1aa7)},
+  {FIELD_LITERAL(0x00a7527958238159,0x0013ec9537a84cd6,0x001d7fee7d562525,0x00b9eefa6191d5e5,0x00dbc97db70bcb8a,0x00481affc7a4d395,0x006f73d3e70c31bb,0x00183f324ed96a61)},
+  {FIELD_LITERAL(0x0039dd7ce7fc6860,0x00d64f6425653da1,0x003e037c7f57d0af,0x0063477a06e2bcf2,0x001727dbb7ac67e6,0x0049589f5efafe2e,0x00fc0fef2e813d54,0x008baa5d087fb50d)},
+  {FIELD_LITERAL(0x0024fb59d9b457c7,0x00a7d4e060223e4c,0x00c118d1b555fd80,0x0082e216c732f22a,0x00cd2a2993089504,0x003638e836a3e13d,0x000d855ee89b4729,0x008ec5b7d4810c91)},
+  {FIELD_LITERAL(0x001bf51f7d65cdfd,0x00d14cdafa16a97d,0x002c38e60fcd10e7,0x00a27446e393efbd,0x000b5d8946a71fdd,0x0063df2cde128f2f,0x006c8679569b1888,0x0059ffc4925d732d)},
+  {FIELD_LITERAL(0x00ece96f95f2b66f,0x00ece7952813a27b,0x0026fc36592e489e,0x007157d1a2de0f66,0x00759dc111d86ddf,0x0012881e5780bb0f,0x00c8ccc83ad29496,0x0012b9bd1929eb71)},
+  {FIELD_LITERAL(0x000fa15a20da5df0,0x00349ddb1a46cd31,0x002c512ad1d8e726,0x00047611f669318d,0x009e68fba591e17e,0x004320dffa803906,0x00a640874951a3d3,0x00b6353478baa24f)},
+  {FIELD_LITERAL(0x009696510000d333,0x00ec2f788bc04826,0x000e4d02b1f67ba5,0x00659aa8dace08b6,0x00d7a38a3a3ae533,0x008856defa8c746b,0x004d7a4402d3da1a,0x00ea82e06229260f)},
+  {FIELD_LITERAL(0x006a15bb20f75c0c,0x0079a144027a5d0c,0x00d19116ce0b4d70,0x0059b83bcb0b268e,0x005f58f63f16c127,0x0079958318ee2c37,0x00defbb063d07f82,0x00f1f0b931d2d446)},
+  {FIELD_LITERAL(0x00cb5e4c3c35d422,0x008df885ca43577f,0x00fa50b16ca3e471,0x005a0e58e17488c8,0x00b2ceccd6d34d19,0x00f01d5d235e36e9,0x00db2e7e4be6ca44,0x00260ab77f35fccd)},
+  {FIELD_LITERAL(0x006f6fd9baac61d5,0x002a7710a020a895,0x009de0db7fc03d4d,0x00cdedcb1875f40b,0x00050caf9b6b1e22,0x005e3a6654456ab0,0x00775fdf8c4423d4,0x0028701ea5738b5d)},
+  {FIELD_LITERAL(0x009ffd90abfeae96,0x00cba3c2b624a516,0x005ef08bcee46c91,0x00e6fde30afb6185,0x00f0b4db4f818ce4,0x006c54f45d2127f5,0x00040125035854c7,0x00372658a3287e13)},
+  {FIELD_LITERAL(0x00d7070fb1beb2ab,0x0078fc845a93896b,0x006894a4b2f224a6,0x005bdd8192b9dbde,0x00b38839874b3a9e,0x00f93618b04b7a57,0x003e3ec75fd2c67e,0x00bf5e6bfc29494a)},
+  {FIELD_LITERAL(0x00f19224ebba2aa5,0x0074f89d358e694d,0x00eea486597135ad,0x0081579a4555c7e1,0x0010b9b872930a9d,0x00f002e87a30ecc0,0x009b9d66b6de56e2,0x00a3c4f45e8004eb)},
+  {FIELD_LITERAL(0x0045e8dda9400888,0x002ff12e5fc05db7,0x00a7098d54afe69c,0x00cdbe846a500585,0x00879c1593ca1882,0x003f7a7fea76c8b0,0x002cd73dd0c8e0a1,0x00645d6ce96f51fe)},
+  {FIELD_LITERAL(0x002b7e83e123d6d6,0x00398346f7419c80,0x0042922e55940163,0x005e7fc5601886a3,0x00e88f2cee1d3103,0x00e7fab135f2e377,0x00b059984dbf0ded,0x0009ce080faa5bb8)},
+  {FIELD_LITERAL(0x0085e78af7758979,0x00275a4ee1631a3a,0x00d26bc0ed78b683,0x004f8355ea21064f,0x00d618e1a32696e5,0x008d8d7b150e5680,0x00a74cd854b278d2,0x001dd62702203ea0)},
+  {FIELD_LITERAL(0x00f89335c2a59286,0x00a0f5c905d55141,0x00b41fb836ee9382,0x00e235d51730ca43,0x00a5cb37b5c0a69a,0x009b966ffe136c45,0x00cb2ea10bf80ed1,0x00fb2b370b40dc35)},
+  {FIELD_LITERAL(0x00d687d16d4ee8ba,0x0071520bdd069dff,0x00de85c60d32355d,0x0087d2e3565102f4,0x00cde391b8dfc9aa,0x00e18d69efdfefe5,0x004a9d0591954e91,0x00fa36dd8b50eee5)},
+  {FIELD_LITERAL(0x002e788749a865f7,0x006e4dc3116861ea,0x009f1428c37276e6,0x00e7d2e0fc1e1226,0x003aeebc6b6c45f6,0x0071a8073bf500c9,0x004b22ad986b530c,0x00f439e63c0d79d4)},
+  {FIELD_LITERAL(0x006bc3d53011f470,0x00032d6e692b83e8,0x00059722f497cd0b,0x0009b4e6f0c497cc,0x0058a804b7cce6c0,0x002b71d3302bbd5d,0x00e2f82a36765fce,0x008dded99524c703)},
+  {FIELD_LITERAL(0x004d058953747d64,0x00701940fe79aa6f,0x00a620ac71c760bf,0x009532b611158b75,0x00547ed7f466f300,0x003cb5ab53a8401a,0x00c7763168ce3120,0x007e48e33e4b9ab2)},
+  {FIELD_LITERAL(0x001b2fc57bf3c738,0x006a3f918993fb80,0x0026f7a14fdec288,0x0075a2cdccef08db,0x00d3ecbc9eecdbf1,0x0048c40f06e5bf7f,0x00d63e423009896b,0x000598bc99c056a8)},
+  {FIELD_LITERAL(0x002f194eaafa46dc,0x008e38f57fe87613,0x00dc8e5ae25f4ab2,0x000a17809575e6bd,0x00d3ec7923ba366a,0x003a7e72e0ad75e3,0x0010024b88436e0a,0x00ed3c5444b64051)},
+  {FIELD_LITERAL(0x00831fc1340af342,0x00c9645669466d35,0x007692b4cc5a080f,0x009fd4a47ac9259f,0x001eeddf7d45928b,0x003c0446fc45f28b,0x002c0713aa3e2507,0x0095706935f0f41e)},
+  {FIELD_LITERAL(0x00766ae4190ec6d8,0x0065768cabc71380,0x00b902598416cdc2,0x00380021ad38df52,0x008f0b89d6551134,0x004254d4cc62c5a5,0x000d79f4484b9b94,0x00b516732ae3c50e)},
+  {FIELD_LITERAL(0x001fb73475c45509,0x00d2b2e5ea43345a,0x00cb3c3842077bd1,0x0029f90ad820946e,0x007c11b2380778aa,0x009e54ece62c1704,0x004bc60c41ca01c3,0x004525679a5a0b03)},
+  {FIELD_LITERAL(0x00c64fbddbed87b3,0x0040601d11731faa,0x009c22475b6f9d67,0x0024b79dae875f15,0x00616fed3f02c3b0,0x0000cf39f6af2d3b,0x00c46bac0aa9a688,0x00ab23e2800da204)},
+  {FIELD_LITERAL(0x000b3a37617632b0,0x00597199fe1cfb6c,0x0042a7ccdfeafdd6,0x004cc9f15ebcea17,0x00f436e596a6b4a4,0x00168861142df0d8,0x000753edfec26af5,0x000c495d7e388116)},
+  {FIELD_LITERAL(0x0017085f4a346148,0x00c7cf7a37f62272,0x001776e129bc5c30,0x009955134c9eef2a,0x001ba5bdf1df07be,0x00ec39497103a55c,0x006578354fda6cfb,0x005f02719d4f15ee)},
+  {FIELD_LITERAL(0x0052b9d9b5d9655d,0x00d4ec7ba1b461c3,0x00f95df4974f280b,0x003d8e5ca11aeb51,0x00d4981eb5a70b26,0x000af9a4f6659f29,0x004598c846faeb43,0x0049d9a183a47670)},
+  {FIELD_LITERAL(0x000a72d23dcb3f1f,0x00a3737f84011727,0x00f870c0fbbf4a47,0x00a7aadd04b5c9ca,0x000c7715c67bd072,0x00015a136afcd74e,0x0080d5caea499634,0x0026b448ec7514b7)},
+  {FIELD_LITERAL(0x00b60167d9e7d065,0x00e60ba0d07381e8,0x003a4f17b725c2d4,0x006c19fe176b64fa,0x003b57b31af86ccb,0x0021047c286180fd,0x00bdc8fb00c6dbb6,0x00fe4a9f4bab4f3f)},
+  {FIELD_LITERAL(0x0088ffc3a16111f7,0x009155e4245d0bc8,0x00851d68220572d5,0x00557ace1e514d29,0x0031d7c339d91022,0x00101d0ae2eaceea,0x00246ab3f837b66a,0x00d5216d381ff530)},
+  {FIELD_LITERAL(0x0057e7ea35f36dae,0x00f47d7ad15de22e,0x00d757ea4b105115,0x008311457d579d7e,0x00b49b75b1edd4eb,0x0081c7ff742fd63a,0x00ddda3187433df6,0x00475727d55f9c66)},
+  {FIELD_LITERAL(0x00a6295218dc136a,0x00563b3af0e9c012,0x00d3753b0145db1b,0x004550389c043dc1,0x00ea94ae27401bdf,0x002b0b949f2b7956,0x00c63f780ad8e23c,0x00e591c47d6bab15)},
+  {FIELD_LITERAL(0x00416c582b058eb6,0x004107da5b2cc695,0x00b3cd2556aeec64,0x00c0b418267e57a1,0x001799293579bd2e,0x0046ed44590e4d07,0x001d7459b3630a1e,0x00c6afba8b6696aa)},
+  {FIELD_LITERAL(0x008d6009b26da3f8,0x00898e88ca06b1ca,0x00edb22b2ed7fe62,0x00fbc93516aabe80,0x008b4b470c42ce0d,0x00e0032ba7d0dcbb,0x00d76da3a956ecc8,0x007f20fe74e3852a)},
+  {FIELD_LITERAL(0x002419222c607674,0x00a7f23af89188b3,0x00ad127284e73d1c,0x008bba582fae1c51,0x00fc6aa7ca9ecab1,0x003df5319eb6c2ba,0x002a05af8a8b199a,0x004bf8354558407c)},
+  {FIELD_LITERAL(0x00ce7d4a30f0fcbf,0x00d02c272629f03d,0x0048c001f7400bc2,0x002c21368011958d,0x0098a550391e96b5,0x002d80b66390f379,0x001fa878760cc785,0x001adfce54b613d5)},
+  {FIELD_LITERAL(0x001ed4dc71fa2523,0x005d0bff19bf9b5c,0x00c3801cee065a64,0x001ed0b504323fbf,0x0003ab9fdcbbc593,0x00df82070178b8d2,0x00a2bcaa9c251f85,0x00c628a3674bd02e)},
+  {FIELD_LITERAL(0x006b7a0674f9f8de,0x00a742414e5c7cff,0x0041cbf3c6e13221,0x00e3a64fd207af24,0x0087c05f15fbe8d1,0x004c50936d9e8a33,0x001306ec21042b6d,0x00a4f4137d1141c2)},
+  {FIELD_LITERAL(0x0009e6fb921568b0,0x00b3c60120219118,0x002a6c3460dd503a,0x009db1ef11654b54,0x0063e4bf0be79601,0x00670d34bb2592b9,0x00dcee2f6c4130ce,0x00b2682e88e77f54)},
+  {FIELD_LITERAL(0x000d5b4b3da135ab,0x00838f3e5064d81d,0x00d44eb50f6d94ed,0x0008931ab502ac6d,0x00debe01ca3d3586,0x0025c206775f0641,0x005ad4b6ae912763,0x007e2c318ad8f247)},
+  {FIELD_LITERAL(0x00ddbe0750dd1add,0x004b3c7b885844b8,0x00363e7ecf12f1ae,0x0062e953e6438f9d,0x0023cc73b076afe9,0x00b09fa083b4da32,0x00c7c3d2456c541d,0x005b591ec6b694d4)},
+  {FIELD_LITERAL(0x0028656e19d62fcf,0x0052a4af03df148d,0x00122765ddd14e42,0x00f2252904f67157,0x004741965b636f3a,0x006441d296132cb9,0x005e2106f956a5b7,0x00247029592d335c)},
+  {FIELD_LITERAL(0x003fe038eb92f894,0x000e6da1b72e8e32,0x003a1411bfcbe0fa,0x00b55d473164a9e4,0x00b9a775ac2df48d,0x0002ddf350659e21,0x00a279a69eb19cb3,0x00f844eab25cba44)},
+  {FIELD_LITERAL(0x00c41d1f9c1f1ac1,0x007b2df4e9f19146,0x00b469355fd5ba7a,0x00b5e1965afc852a,0x00388d5f1e2d8217,0x0022079e4c09ae93,0x0014268acd4ef518,0x00c1dd8d9640464c)},
+  {FIELD_LITERAL(0x0038526adeed0c55,0x00dd68c607e3fe85,0x00f746ddd48a5d57,0x0042f2952b963b7c,0x001cbbd6876d5ec2,0x005e341470bca5c2,0x00871d41e085f413,0x00e53ab098f45732)},
+  {FIELD_LITERAL(0x004d51124797c831,0x008f5ae3750347ad,0x0070ced94c1a0c8e,0x00f6db2043898e64,0x000d00c9a5750cd0,0x000741ec59bad712,0x003c9d11aab37b7f,0x00a67ba169807714)},
+  {FIELD_LITERAL(0x00adb2c1566e8b8f,0x0096c68a35771a9a,0x00869933356f334a,0x00ba9c93459f5962,0x009ec73fb6e8ca4b,0x003c3802c27202e1,0x0031f5b733e0c008,0x00f9058c19611fa9)},
+  {FIELD_LITERAL(0x00238f01814a3421,0x00c325a44b6cce28,0x002136f97aeb0e73,0x000cac8268a4afe2,0x0022fd218da471b3,0x009dcd8dfff8def9,0x00cb9f8181d999bb,0x00143ae56edea349)},
+  {FIELD_LITERAL(0x0000623bf87622c5,0x00a1966fdd069496,0x00c315b7b812f9fc,0x00bdf5efcd128b97,0x001d464f532e3e16,0x003cd94f081bfd7e,0x00ed9dae12ce4009,0x002756f5736eee70)},
+  {FIELD_LITERAL(0x00a5187e6ee7341b,0x00e6d52e82d83b6e,0x00df3c41323094a7,0x00b3324f444e9de9,0x00689eb21a35bfe5,0x00f16363becd548d,0x00e187cc98e7f60f,0x00127d9062f0ccab)},
+  {FIELD_LITERAL(0x004ad71b31c29e40,0x00a5fcace12fae29,0x004425b5597280ed,0x00e7ef5d716c3346,0x0010b53ada410ac8,0x0092310226060c9b,0x0091c26128729c7e,0x0088b42900f8ec3b)},
+  {FIELD_LITERAL(0x00f1e26e9762d4a8,0x00d9d74082183414,0x00ffec9bd57a0282,0x000919e128fd497a,0x00ab7ae7d00fe5f8,0x0054dc442851ff68,0x00c9ebeb3b861687,0x00507f7cab8b698f)},
+  {FIELD_LITERAL(0x00c13c5aae3ae341,0x009c6c9ed98373e7,0x00098f26864577a8,0x0015b886e9488b45,0x0037692c42aadba5,0x00b83170b8e7791c,0x001670952ece1b44,0x00fd932a39276da2)},
+  {FIELD_LITERAL(0x0081a3259bef3398,0x005480fff416107b,0x00ce4f607d21be98,0x003ffc084b41df9b,0x0043d0bb100502d1,0x00ec35f575ba3261,0x00ca18f677300ef3,0x00e8bb0a827d8548)},
+  {FIELD_LITERAL(0x00df76b3328ada72,0x002e20621604a7c2,0x00f910638a105b09,0x00ef4724d96ef2cd,0x00377d83d6b8a2f7,0x00b4f48805ade324,0x001cd5da8b152018,0x0045af671a20ca7f)},
+  {FIELD_LITERAL(0x009ae3b93a56c404,0x004a410b7a456699,0x00023a619355e6b2,0x009cdc7297387257,0x0055b94d4ae70d04,0x002cbd607f65b005,0x003208b489697166,0x00ea2aa058867370)},
+  {FIELD_LITERAL(0x00f29d2598ee3f32,0x00b4ac5385d82adc,0x007633eaf04df19b,0x00aa2d3d77ceab01,0x004a2302fcbb778a,0x00927f225d5afa34,0x004a8e9d5047f237,0x008224ae9dbce530)},
+  {FIELD_LITERAL(0x001cf640859b02f8,0x00758d1d5d5ce427,0x00763c784ef4604c,0x005fa81aee205270,0x00ac537bfdfc44cb,0x004b919bd342d670,0x00238508d9bf4b7a,0x00154888795644f3)},
+  {FIELD_LITERAL(0x00c845923c084294,0x00072419a201bc25,0x0045f408b5f8e669,0x00e9d6a186b74dfe,0x00e19108c68fa075,0x0017b91d874177b7,0x002f0ca2c7912c5a,0x009400aa385a90a2)},
+  {FIELD_LITERAL(0x0071110b01482184,0x00cfed0044f2bef8,0x0034f2901cf4662e,0x003b4ae2a67f9834,0x00cca9b96fe94810,0x00522507ae77abd0,0x00bac7422721e73e,0x0066622b0f3a62b0)},
+  {FIELD_LITERAL(0x00f8ac5cf4705b6a,0x00867d82dcb457e3,0x007e13ab2ccc2ce9,0x009ee9a018d3930e,0x008370f8ecb42df8,0x002d9f019add263e,0x003302385b92d196,0x00a15654536e2c0c)},
+  {FIELD_LITERAL(0x0026ef1614e160af,0x00c023f9edfc9c76,0x00cff090da5f57ba,0x0076db7a66643ae9,0x0019462f8c646999,0x008fec00b3854b22,0x00d55041692a0a1c,0x0065db894215ca00)},
+  {FIELD_LITERAL(0x00a925036e0a451c,0x002a0390c36b6cc1,0x00f27020d90894f4,0x008d90d52cbd3d7f,0x00e1d0137392f3b8,0x00f017c158b51a8f,0x00cac313d3ed7dbc,0x00b99a81e3eb42d3)},
+  {FIELD_LITERAL(0x00b54850275fe626,0x0053a3fd1ec71140,0x00e3d2d7dbe096fa,0x00e4ac7b595cce4c,0x0077bad449c0a494,0x00b7c98814afd5b3,0x0057226f58486cf9,0x00b1557154f0cc57)},
+  {FIELD_LITERAL(0x008cc9cd236315c0,0x0031d9c5b39fda54,0x00a5713ef37e1171,0x00293d5ae2886325,0x00c4aba3e05015e1,0x0003f35ef78e4fc6,0x0039d6bd3ac1527b,0x0019d7c3afb77106)},
+  {FIELD_LITERAL(0x007b162931a985af,0x00ad40a2e0daa713,0x006df27c4009f118,0x00503e9f4e2e8bec,0x00751a77c82c182d,0x000298937769245b,0x00ffb1e8fabf9ee5,0x0008334706e09abe)},
+  {FIELD_LITERAL(0x00dbca4e98a7dcd9,0x00ee29cfc78bde99,0x00e4a3b6995f52e9,0x0045d70189ae8096,0x00fd2a8a3b9b0d1b,0x00af1793b107d8e1,0x00dbf92cbe4afa20,0x00da60f798e3681d)},
+  {FIELD_LITERAL(0x004246bfcecc627a,0x004ba431246c03a4,0x00bd1d101872d497,0x003b73d3f185ee16,0x001feb2e2678c0e3,0x00ff13c5a89dec76,0x00ed06042e771d8f,0x00a4fd2a897a83dd)},
+  {FIELD_LITERAL(0x009a4a3be50d6597,0x00de3165fc5a1096,0x004f3f56e345b0c7,0x00f7bf721d5ab8bc,0x004313e47b098c50,0x00e4c7d5c0e1adbb,0x002e3e3db365051e,0x00a480c2cd6a96fb)},
+  {FIELD_LITERAL(0x00417fa30a7119ed,0x00af257758419751,0x00d358a487b463d4,0x0089703cc720b00d,0x00ce56314ff7f271,0x0064db171ade62c1,0x00640b36d4a22fed,0x00424eb88696d23f)},
+  {FIELD_LITERAL(0x004ede34af2813f3,0x00d4a8e11c9e8216,0x004796d5041de8a5,0x00c4c6b4d21cc987,0x00e8a433ee07fa1e,0x0055720b5abcc5a1,0x008873ea9c74b080,0x005b3fec1ab65d48)},
+  {FIELD_LITERAL(0x0047e5277db70ec5,0x000a096c66db7d6b,0x00b4164cc1730159,0x004a9f783fe720fe,0x00a8177b94449dbc,0x0095a24ff49a599f,0x0069c1c578250cbc,0x00452019213debf4)},
+  {FIELD_LITERAL(0x0021ce99e09ebda3,0x00fcbd9f91875ad0,0x009bbf6b7b7a0b5f,0x00388886a69b1940,0x00926a56d0f81f12,0x00e12903c3358d46,0x005dfce4e8e1ce9d,0x0044cfa94e2f7e23)},
+  {FIELD_LITERAL(0x001bd59c09e982ea,0x00f72daeb937b289,0x0018b76dca908e0e,0x00edb498512384ad,0x00ce0243b6cc9538,0x00f96ff690cb4e70,0x007c77bf9f673c8d,0x005bf704c088a528)},
+  {FIELD_LITERAL(0x0093d4628dcb33be,0x0095263d51d42582,0x0049b3222458fe06,0x00e7fce73b653a7f,0x003ca2ebce60b369,0x00c5de239a32bea4,0x0063b8b3d71fb6bf,0x0039aeeb78a1a839)},
+  {FIELD_LITERAL(0x007dc52da400336c,0x001fded1e15b9457,0x00902e00f5568e3a,0x00219bef40456d2d,0x005684161fb3dbc9,0x004a4e9be49a76ea,0x006e685ae88b78ff,0x0021c42f13042d3c)},
+  {FIELD_LITERAL(0x00fb22bb5fd3ce50,0x0017b48aada7ae54,0x00fd5c44ad19a536,0x000ccc4e4e55e45c,0x00fd637d45b4c3f5,0x0038914e023c37cf,0x00ac1881d6a8d898,0x00611ed8d3d943a8)},
+  {FIELD_LITERAL(0x0056e2259d113d2b,0x00594819b284ec16,0x00c7bf794bb36696,0x00721ee75097cdc6,0x00f71be9047a2892,0x00df6ba142564edf,0x0069580b7a184e8d,0x00f056e38fca0fee)},
+  {FIELD_LITERAL(0x009df98566a18c6d,0x00cf3a200968f219,0x0044ba60da6d9086,0x00dbc9c0e344da03,0x000f9401c4466855,0x00d46a57c5b0a8d1,0x00875a635d7ac7c6,0x00ef4a933b7e0ae6)},
+  {FIELD_LITERAL(0x005e8694077a1535,0x008bef75f71c8f1d,0x000a7c1316423511,0x00906e1d70604320,0x003fc46c1a2ffbd6,0x00d1d5022e68f360,0x002515fba37bbf46,0x00ca16234e023b44)},
+  {FIELD_LITERAL(0x00787c99561f4690,0x00a857a8c1561f27,0x00a10df9223c09fe,0x00b98a9562e3b154,0x004330b8744c3ed2,0x00e06812807ec5c4,0x00e4cf6a7db9f1e3,0x00d95b089f132a34)},
+  {FIELD_LITERAL(0x002922b39ca33eec,0x0090d12a5f3ab194,0x00ab60c02fb5f8ed,0x00188d292abba1cf,0x00e10edec9698f6e,0x0069a4d9934133c8,0x0024aac40e6d3d06,0x001702c2177661b0)},
+  {FIELD_LITERAL(0x00139078397030bd,0x000e3c447e859a00,0x0064a5b334c82393,0x00b8aabeb7358093,0x00020778bb9ae73b,0x0032ee94c7892a18,0x008215253cb41bda,0x005e2797593517ae)},
+  {FIELD_LITERAL(0x0083765a5f855d4a,0x0051b6d1351b8ee2,0x00116de548b0f7bb,0x0087bd88703affa0,0x0095b2cc34d7fdd2,0x0084cd81b53f0bc8,0x008562fc995350ed,0x00a39abb193651e3)},
+  {FIELD_LITERAL(0x0019e23f0474b114,0x00eb94c2ad3b437e,0x006ddb34683b75ac,0x00391f9209b564c6,0x00083b3bb3bff7aa,0x00eedcd0f6dceefc,0x00b50817f794fe01,0x0036474deaaa75c9)},
+  {FIELD_LITERAL(0x0091868594265aa2,0x00797accae98ca6d,0x0008d8c5f0f8a184,0x00d1f4f1c2b2fe6e,0x0036783dfb48a006,0x008c165120503527,0x0025fd780058ce9b,0x0068beb007be7d27)},
+  {FIELD_LITERAL(0x00d0ff88aa7c90c2,0x00b2c60dacf53394,0x0094a7284d9666d6,0x00bed9022ce7a19d,0x00c51553f0cd7682,0x00c3fb870b124992,0x008d0bc539956c9b,0x00fc8cf258bb8885)},
+  {FIELD_LITERAL(0x003667bf998406f8,0x0000115c43a12975,0x001e662f3b20e8fd,0x0019ffa534cb24eb,0x00016be0dc8efb45,0x00ff76a8b26243f5,0x00ae20d241a541e3,0x0069bd6af13cd430)},
+  {FIELD_LITERAL(0x0045fdc16487cda3,0x00b2d8e844cf2ed7,0x00612c50e88c1607,0x00a08aabc66c1672,0x006031fdcbb24d97,0x001b639525744b93,0x004409d62639ab17,0x00a1853d0347ab1d)},
+  {FIELD_LITERAL(0x0075a1a56ebf5c21,0x00a3e72be9ac53ed,0x00efcde1629170c2,0x0004225fe91ef535,0x0088049fc73dfda7,0x004abc74857e1288,0x0024e2434657317c,0x00d98cb3d3e5543c)},
+  {FIELD_LITERAL(0x00b4b53eab6bdb19,0x009b22d8b43711d0,0x00d948b9d961785d,0x00cb167b6f279ead,0x00191de3a678e1c9,0x00d9dd9511095c2e,0x00f284324cd43067,0x00ed74fa535151dd)},
+  {FIELD_LITERAL(0x007e32c049b5c477,0x009d2bfdbd9bcfd8,0x00636e93045938c6,0x007fde4af7687298,0x0046a5184fafa5d3,0x0079b1e7f13a359b,0x00875adf1fb927d6,0x00333e21c61bcad2)},
+  {FIELD_LITERAL(0x00048014f73d8b8d,0x0075684aa0966388,0x0092be7df06dc47c,0x0097cebcd0f5568a,0x005a7004d9c4c6a9,0x00b0ecbb659924c7,0x00d90332dd492a7c,0x0057fc14df11493d)},
+  {FIELD_LITERAL(0x0008ed8ea0ad95be,0x0041d324b9709645,0x00e25412257a19b4,0x0058df9f3423d8d2,0x00a9ab20def71304,0x009ae0dbf8ac4a81,0x00c9565977e4392a,0x003c9269444baf55)},
+  {FIELD_LITERAL(0x007df6cbb926830b,0x00d336058ae37865,0x007af47dac696423,0x0048d3011ec64ac8,0x006b87666e40049f,0x0036a2e0e51303d7,0x00ba319bd79dbc55,0x003e2737ecc94f53)},
+  {FIELD_LITERAL(0x00d296ff726272d9,0x00f6d097928fcf57,0x00e0e616a55d7013,0x00deaf454ed9eac7,0x0073a56bedef4d92,0x006ccfdf6fc92e19,0x009d1ee1371a7218,0x00ee3c2ee4462d80)},
+  {FIELD_LITERAL(0x00437bce9bccdf9d,0x00e0c8e2f85dc0a3,0x00c91a7073995a19,0x00856ec9fe294559,0x009e4b33394b156e,0x00e245b0dc497e5c,0x006a54e687eeaeff,0x00f1cd1cd00fdb7c)},
+  {FIELD_LITERAL(0x008132ae5c5d8cd1,0x00121d68324a1d9f,0x00d6be9dafcb8c76,0x00684d9070edf745,0x00519fbc96d7448e,0x00388182fdc1f27e,0x000235baed41f158,0x00bf6cf6f1a1796a)},
+  {FIELD_LITERAL(0x002adc4b4d148219,0x003084ada0d3a90a,0x0046de8aab0f2e4e,0x00452d342a67b5fd,0x00d4b50f01d4de21,0x00db6d9fc0cefb79,0x008c184c86a462cd,0x00e17c83764d42da)},
+  {FIELD_LITERAL(0x007b2743b9a1e01a,0x007847ffd42688c4,0x006c7844d610a316,0x00f0cb8b250aa4b0,0x00a19060143b3ae6,0x0014eb10b77cfd80,0x000170905729dd06,0x00063b5b9cd72477)},
+  {FIELD_LITERAL(0x00ce382dc7993d92,0x00021153e938b4c8,0x00096f7567f48f51,0x0058f81ddfe4b0d5,0x00cc379a56b355c7,0x002c760770d3e819,0x00ee22d1d26e5a40,0x00de6d93d5b082d7)},
+  {FIELD_LITERAL(0x000a91a42c52e056,0x00185f6b77fce7ea,0x000803c51962f6b5,0x0022528582ba563d,0x0043f8040e9856d6,0x0085a29ec81fb860,0x005f9a611549f5ff,0x00c1f974ecbd4b06)},
+  {FIELD_LITERAL(0x005b64c6fd65ec97,0x00c1fdd7f877bc7f,0x000d9cc6c89f841c,0x005c97b7f1aff9ad,0x0075e3c61475d47e,0x001ecb1ba8153011,0x00fe7f1c8d71d40d,0x003fa9757a229832)},
+  {FIELD_LITERAL(0x00ffc5c89d2b0cba,0x00d363d42e3e6fc3,0x0019a1a0118e2e8a,0x00f7baeff48882e1,0x001bd5af28c6b514,0x0055476ca2253cb2,0x00d8eb1977e2ddf3,0x00b173b1adb228a1)},
+  {FIELD_LITERAL(0x00f2cb99dd0ad707,0x00e1e08b6859ddd8,0x000008f2d0650bcc,0x00d7ed392f8615c3,0x00976750a94da27f,0x003e83bb0ecb69ba,0x00df8e8d15c14ac6,0x00f9f7174295d9c2)},
+  {FIELD_LITERAL(0x00f11cc8e0e70bcb,0x00e5dc689974e7dd,0x0014e409f9ee5870,0x00826e6689acbd63,0x008a6f4e3d895d88,0x00b26a8da41fd4ad,0x000fb7723f83efd7,0x009c749db0a5f6c3)},
+  {FIELD_LITERAL(0x002389319450f9ba,0x003677f31aa1250a,0x0092c3db642f38cb,0x00f8b64c0dfc9773,0x00cd49fe3505b795,0x0068105a4090a510,0x00df0ba2072a8bb6,0x00eb396143afd8be)},
+  {FIELD_LITERAL(0x00a0d4ecfb24cdff,0x00ddaf8008ba6479,0x00f0b3e36d4b0f44,0x003734bd3af1f146,0x00b87e2efc75527e,0x00d230df55ddab50,0x002613257ae56c1d,0x00bc0946d135934d)},
+  {FIELD_LITERAL(0x00468711bd994651,0x0033108fa67561bf,0x0089d760192a54b4,0x00adc433de9f1871,0x000467d05f36e050,0x007847e0f0579f7f,0x00a2314ad320052d,0x00b3a93649f0b243)},
+  {FIELD_LITERAL(0x0067f8f0c4fe26c9,0x0079c4a3cc8f67b9,0x0082b1e62f23550d,0x00f2d409caefd7f5,0x0080e67dcdb26e81,0x0087ae993ea1f98a,0x00aa108becf61d03,0x001acf11efb608a3)},
+  {FIELD_LITERAL(0x008225febbab50d9,0x00f3b605e4dd2083,0x00a32b28189e23d2,0x00d507e5e5eb4c97,0x005a1a84e302821f,0x0006f54c1c5f08c7,0x00a347c8cb2843f0,0x0009f73e9544bfa5)},
+  {FIELD_LITERAL(0x006c59c9ae744185,0x009fc32f1b4282cd,0x004d6348ca59b1ac,0x00105376881be067,0x00af4096013147dc,0x004abfb5a5cb3124,0x000d2a7f8626c354,0x009c6ed568e07431)},
+  {FIELD_LITERAL(0x00e828333c297f8b,0x009ef3cf8c3f7e1f,0x00ab45f8fff31cb9,0x00c8b4178cb0b013,0x00d0c50dd3260a3f,0x0097126ac257f5bc,0x0042376cc90c705a,0x001d96fdb4a1071e)},
+  {FIELD_LITERAL(0x00542d44d89ee1a8,0x00306642e0442d98,0x0090853872b87338,0x002362cbf22dc044,0x002c222adff663b8,0x0067c924495fcb79,0x000e621d983c977c,0x00df77a9eccb66fb)},
+  {FIELD_LITERAL(0x002809e4bbf1814a,0x00b9e854f9fafb32,0x00d35e67c10f7a67,0x008f1bcb76e748cf,0x004224d9515687d2,0x005ba0b774e620c4,0x00b5e57db5d54119,0x00e15babe5683282)},
+  {FIELD_LITERAL(0x00832d02369b482c,0x00cba52ff0d93450,0x003fa9c908d554db,0x008d1e357b54122f,0x00abd91c2dc950c6,0x007eff1df4c0ec69,0x003f6aeb13fb2d31,0x00002d6179fc5b2c)},
+  {FIELD_LITERAL(0x0046c9eda81c9c89,0x00b60cb71c8f62fc,0x0022f5a683baa558,0x00f87319fccdf997,0x009ca09b51ce6a22,0x005b12baf4af7d77,0x008a46524a1e33e2,0x00035a77e988be0d)},
+  {FIELD_LITERAL(0x00a7efe46a7dbe2f,0x002f66fd55014fe7,0x006a428afa1ff026,0x0056caaa9604ab72,0x0033f3bcd7fac8ae,0x00ccb1aa01c86764,0x00158d1edf13bf40,0x009848ee76fcf3b4)},
+  {FIELD_LITERAL(0x00a9e7730a819691,0x00d9cc73c4992b70,0x00e299bde067de5a,0x008c314eb705192a,0x00e7226f17e8a3cc,0x0029dfd956e65a47,0x0053a8e839073b12,0x006f942b2ab1597e)},
+  {FIELD_LITERAL(0x001c3d780ecd5e39,0x0094f247fbdcc5fe,0x00d5c786fd527764,0x00b6f4da74f0db2a,0x0080f1f8badcd5fc,0x00f36a373ad2e23b,0x00f804f9f4343bf2,0x00d1af40ec623982)},
+  {FIELD_LITERAL(0x0082aeace5f1b144,0x00f68b3108cf4dd3,0x00634af01dde3020,0x000beab5df5c2355,0x00e8b790d1b49b0b,0x00e48d15854e36f4,0x0040ab2d95f3db9f,0x002711c4ed9e899a)},
+  {FIELD_LITERAL(0x0039343746531ebe,0x00c8509d835d429d,0x00e79eceff6b0018,0x004abfd31e8efce5,0x007bbfaaa1e20210,0x00e3be89c193e179,0x001c420f4c31d585,0x00f414a315bef5ae)},
+  {FIELD_LITERAL(0x007c296a24990df8,0x00d5d07525a75588,0x00dd8e113e94b7e7,0x007bbc58febe0cc8,0x0029f51af9bfcad3,0x007e9311ec7ab6f3,0x009a884de1676343,0x0050d5f2dce84be9)},
+  {FIELD_LITERAL(0x005fa020cca2450a,0x00491c29db6416d8,0x0037cefe3f9f9a85,0x003d405230647066,0x0049e835f0fdbe89,0x00feb78ac1a0815c,0x00828e4b32dc9724,0x00db84f2dc8d6fd4)},
+  {FIELD_LITERAL(0x0098cddc8b39549a,0x006da37e3b05d22c,0x00ce633cfd4eb3cb,0x00fda288ef526acd,0x0025338878c5d30a,0x00f34438c4e5a1b4,0x00584efea7c310f1,0x0041a551f1b660ad)},
+  {FIELD_LITERAL(0x00d7f7a8fbd6437a,0x0062872413bf3753,0x00ad4bbcb43c584b,0x007fe49be601d7e3,0x0077c659789babf4,0x00eb45fcb06a741b,0x005ce244913f9708,0x0088426401736326)},
+  {FIELD_LITERAL(0x007bf562ca768d7c,0x006c1f3a174e387c,0x00f024b447fee939,0x007e7af75f01143f,0x003adb70b4eed89d,0x00e43544021ad79a,0x0091f7f7042011f6,0x0093c1a1ee3a0ddc)},
+  {FIELD_LITERAL(0x00a0b68ec1eb72d2,0x002c03235c0d45a0,0x00553627323fe8c5,0x006186e94b17af94,0x00a9906196e29f14,0x0025b3aee6567733,0x007e0dd840080517,0x0018eb5801a4ba93)},
+  {FIELD_LITERAL(0x00d7fe7017bf6a40,0x006e3f0624be0c42,0x00ffbba205358245,0x00f9fc2cf8194239,0x008d93b37bf15b4e,0x006ddf2e38be8e95,0x002b6e79bf5fcff9,0x00ab355da425e2de)},
+  {FIELD_LITERAL(0x00938f97e20be973,0x0099141a36aaf306,0x0057b0ca29e545a1,0x0085db571f9fbc13,0x008b333c554b4693,0x0043ab6ef3e241cb,0x0054fb20aa1e5c70,0x00be0ff852760adf)},
+  {FIELD_LITERAL(0x003973d8938971d6,0x002aca26fa80c1f5,0x00108af1faa6b513,0x00daae275d7924e6,0x0053634ced721308,0x00d2355fe0bbd443,0x00357612b2d22095,0x00f9bb9dd4136cf3)},
+  {FIELD_LITERAL(0x002bff12cf5e03a5,0x001bdb1fa8a19cf8,0x00c91c6793f84d39,0x00f869f1b2eba9af,0x0059bc547dc3236b,0x00d91611d6d38689,0x00e062daaa2c0214,0x00ed3c047cc2bc82)},
+  {FIELD_LITERAL(0x000050d70c32b31a,0x001939d576d437b3,0x00d709e598bf9fe6,0x00a885b34bd2ee9e,0x00dd4b5c08ab1a50,0x0091bebd50b55639,0x00cf79ff64acdbc6,0x006067a39d826336)},
+  {FIELD_LITERAL(0x0062dd0fb31be374,0x00fcc96b84c8e727,0x003f64f1375e6ae3,0x0057d9b6dd1af004,0x00d6a167b1103c7b,0x00dd28f3180fb537,0x004ff27ad7167128,0x008934c33461f2ac)},
+  {FIELD_LITERAL(0x0065b472b7900043,0x00ba7efd2ff1064b,0x000b67d6c4c3020f,0x0012d28469f4e46d,0x0031c32939703ec7,0x00b49f0bce133066,0x00f7e10416181d47,0x005c90f51867eecc)},
+  {FIELD_LITERAL(0x0051207abd179101,0x00fc2a5c20d9c5da,0x00fb9d5f2701b6df,0x002dd040fdea82b8,0x00f163b0738442ff,0x00d9736bd68855b8,0x00e0d8e93005e61c,0x00df5a40b3988570)},
+  {FIELD_LITERAL(0x0006918f5dfce6dc,0x00d4bf1c793c57fb,0x0069a3f649435364,0x00e89a50e5b0cd6e,0x00b9f6a237e973af,0x006d4ed8b104e41d,0x00498946a3924cd2,0x00c136ec5ac9d4f7)},
+  {FIELD_LITERAL(0x0011a9c290ac5336,0x002b9a2d4a6a6533,0x009a8a68c445d937,0x00361b27b07e5e5c,0x003c043b1755b974,0x00b7eb66cf1155ee,0x0077af5909eefff2,0x0098f609877cc806)},
+  {FIELD_LITERAL(0x00ab13af436bf8f4,0x000bcf0a0dac8574,0x00d50c864f705045,0x00c40e611debc842,0x0085010489bd5caa,0x007c5050acec026f,0x00f67d943c8da6d1,0x00de1da0278074c6)},
+  {FIELD_LITERAL(0x00b373076597455f,0x00e83f1af53ac0f5,0x0041f63c01dc6840,0x0097dea19b0c6f4b,0x007f9d63b4c1572c,0x00e692d492d0f5f0,0x00cbcb392e83b4ad,0x0069c0f39ed9b1a8)},
+  {FIELD_LITERAL(0x00861030012707c9,0x009fbbdc7fd4aafb,0x008f591d6b554822,0x00df08a41ea18ade,0x009d7d83e642abea,0x0098c71bda3b78ff,0x0022c89e7021f005,0x0044d29a3fe1e3c4)},
+  {FIELD_LITERAL(0x00e748cd7b5c52f2,0x00ea9df883f89cc3,0x0018970df156b6c7,0x00c5a46c2a33a847,0x00cbde395e32aa09,0x0072474ebb423140,0x00fb00053086a23d,0x001dafcfe22d4e1f)},
+  {FIELD_LITERAL(0x00c903ee6d825540,0x00add6c4cf98473e,0x007636efed4227f1,0x00905124ae55e772,0x00e6b38fab12ed53,0x0045e132b863fe55,0x003974662edb366a,0x00b1787052be8208)},
+  {FIELD_LITERAL(0x00a614b00d775c7c,0x00d7c78941cc7754,0x00422dd68b5dabc4,0x00a6110f0167d28b,0x00685a309c252886,0x00b439ffd5143660,0x003656e29ee7396f,0x00c7c9b9ed5ad854)},
+  {FIELD_LITERAL(0x0040f7e7c5b37bf2,0x0064e4dc81181bba,0x00a8767ae2a366b6,0x001496b4f90546f2,0x002a28493f860441,0x0021f59513049a3a,0x00852d369a8b7ee3,0x00dd2e7d8b7d30a9)},
+  {FIELD_LITERAL(0x00006e34a35d9fbc,0x00eee4e48b2f019a,0x006b344743003a5f,0x00541d514f04a7e3,0x00e81f9ee7647455,0x005e2b916c438f81,0x00116f8137b7eff0,0x009bd3decc7039d1)},
+  {FIELD_LITERAL(0x0005d226f434110d,0x00af8288b8ef21d5,0x004a7a52ef181c8c,0x00be0b781b4b06de,0x00e6e3627ded07e1,0x00e43aa342272b8b,0x00e86ab424577d84,0x00fb292c566e35bb)},
+  {FIELD_LITERAL(0x00334f5303ea1222,0x00dfb3dbeb0a5d3e,0x002940d9592335c1,0x00706a7a63e8938a,0x005a533558bc4caf,0x00558e33192022a9,0x00970d9faf74c133,0x002979fcb63493ca)},
+  {FIELD_LITERAL(0x00e38abece3c82ab,0x005a51f18a2c7a86,0x009dafa2e86d592e,0x00495a62eb688678,0x00b79df74c0eb212,0x0023e8cc78b75982,0x005998cb91075e13,0x00735aa9ba61bc76)},
+  {FIELD_LITERAL(0x00d9f7a82ddbe628,0x00a1fc782889ae0f,0x0071ffda12d14b66,0x0037cf4eca7fb3d5,0x00c80bc242c58808,0x0075bf8c2d08c863,0x008d41f31afc52a7,0x00197962ecf38741)},
+  {FIELD_LITERAL(0x006e9f475cccf2ee,0x00454b9cd506430c,0x00224a4fb79ee479,0x0062e3347ef0b5e2,0x0034fd2a3512232a,0x00b8b3cb0f457046,0x00eb20165daa38ec,0x00128eebc2d9c0f7)},
+  {FIELD_LITERAL(0x00bfc5fa1e4ea21f,0x00c21d7b6bb892e6,0x00cf043f3acf0291,0x00c13f2f849b3c90,0x00d1a97ebef10891,0x0061e130a445e7fe,0x0019513fdedbf22b,0x001d60c813bff841)},
+  {FIELD_LITERAL(0x0019561c7fcf0213,0x00e3dca6843ebd77,0x0068ea95b9ca920e,0x009bdfb70f253595,0x00c68f59186aa02a,0x005aee1cca1c3039,0x00ab79a8a937a1ce,0x00b9a0e549959e6f)},
+  {FIELD_LITERAL(0x00c79e0b6d97dfbd,0x00917c71fd2bc6e8,0x00db7529ccfb63d8,0x00be5be957f17866,0x00a9e11fdc2cdac1,0x007b91a8e1f44443,0x00a3065e4057d80f,0x004825f5b8d5f6d4)},
+  {FIELD_LITERAL(0x003e4964fa8a8fc8,0x00f6a1cdbcf41689,0x00943cb18fe7fda7,0x00606dafbf34440a,0x005d37a86399c789,0x00e79a2a69417403,0x00fe34f7e68b8866,0x0011f448ed2df10e)},
+  {FIELD_LITERAL(0x00f1f57efcc1fcc4,0x00513679117de154,0x002e5b5b7c86d8c3,0x009f6486561f9cfb,0x00169e74b0170cf7,0x00900205af4af696,0x006acfddb77853f3,0x00df184c90f31068)},
+  {FIELD_LITERAL(0x00b37396c3320791,0x00fc7b67175c5783,0x00c36d2cd73ecc38,0x0080ebcc0b328fc5,0x0043a5b22b35d35d,0x00466c9f1713c9da,0x0026ad346dcaa8da,0x007c684e701183a6)},
+  {FIELD_LITERAL(0x00fd579ffb691713,0x00b76af4f81c412d,0x00f239de96110f82,0x00e965fb437f0306,0x00ca7e9436900921,0x00e487f1325fa24a,0x00633907de476380,0x00721c62ac5b8ea0)},
+  {FIELD_LITERAL(0x00c0d54e542eb4f9,0x004ed657171c8dcf,0x00b743a4f7c2a39b,0x00fd9f93ed6cc567,0x00307fae3113e58b,0x0058aa577c93c319,0x00d254556f35b346,0x00491aada2203f0d)},
+  {FIELD_LITERAL(0x00dff3103786ff34,0x000144553b1f20c3,0x0095613baeb930e4,0x00098058275ea5d4,0x007cd1402b046756,0x0074d74e4d58aee3,0x005f93fc343ff69b,0x00873df17296b3b0)},
+  {FIELD_LITERAL(0x00c4a1fb48635413,0x00b5dd54423ad59f,0x009ff5d53fd24a88,0x003c98d267fc06a7,0x002db7cb20013641,0x00bd1d6716e191f2,0x006dbc8b29094241,0x0044bbf233dafa2c)},
+  {FIELD_LITERAL(0x0055838d41f531e6,0x00bf6a2dd03c81b2,0x005827a061c4839e,0x0000de2cbb36aac3,0x002efa29d9717478,0x00f9e928cc8a77ba,0x00c134b458def9ef,0x00958a182223fc48)},
+  {FIELD_LITERAL(0x000a9ee23c06881f,0x002c727d3d871945,0x00f47d971512d24a,0x00671e816f9ef31a,0x00883af2cfaad673,0x00601f98583d6c9a,0x00b435f5adc79655,0x00ad87b71c04bff2)},
+  {FIELD_LITERAL(0x007860d99db787cf,0x00fda8983018f4a8,0x008c8866bac4743c,0x00ef471f84c82a3f,0x00abea5976d3b8e7,0x00714882896cd015,0x00b49fae584ddac5,0x008e33a1a0b69c81)},
+  {FIELD_LITERAL(0x007b6ee2c9e8a9ec,0x002455dbbd89d622,0x006490cf4eaab038,0x00d925f6c3081561,0x00153b3047de7382,0x003b421f8bdceb6f,0x00761a4a5049da78,0x00980348c5202433)},
+  {FIELD_LITERAL(0x007f8a43da97dd5c,0x00058539c800fc7b,0x0040f3cf5a28414a,0x00d68dd0d95283d6,0x004adce9da90146e,0x00befa41c7d4f908,0x007603bc2e3c3060,0x00bdf360ab3545db)},
+  {FIELD_LITERAL(0x00eebfd4e2312cc3,0x00474b2564e4fc8c,0x003303ef14b1da9b,0x003c93e0e66beb1d,0x0013619b0566925a,0x008817c24d901bf3,0x00b62bd8898d218b,0x0075a7716f1e88a2)},
+  {FIELD_LITERAL(0x0009218da1e6890f,0x0026907f5fd02575,0x004dabed5f19d605,0x003abf181870249d,0x00b52fd048cc92c4,0x00b6dd51e415a5c5,0x00d9eb82bd2b4014,0x002c865a43b46b43)},
+  {FIELD_LITERAL(0x0070047189452f4c,0x00f7ad12e1ce78d5,0x00af1ba51ec44a8b,0x005f39f63e667cd6,0x00058eac4648425e,0x00d7fdab42bea03b,0x0028576a5688de15,0x00af973209e77c10)},
+  {FIELD_LITERAL(0x00c338b915d8fef0,0x00a893292045c39a,0x0028ab4f2eba6887,0x0060743cb519fd61,0x0006213964093ac0,0x007c0b7a43f6266d,0x008e3557c4fa5bda,0x002da976de7b8d9d)},
+  {FIELD_LITERAL(0x0048729f8a8b6dcd,0x00fe23b85cc4d323,0x00e7384d16e4db0e,0x004a423970678942,0x00ec0b763345d4ba,0x00c477b9f99ed721,0x00c29dad3777b230,0x001c517b466f7df6)},
+  {FIELD_LITERAL(0x006366c380f7b574,0x001c7d1f09ff0438,0x003e20a7301f5b22,0x00d3efb1916d28f6,0x0049f4f81060ce83,0x00c69d91ea43ced1,0x002b6f3e5cd269ed,0x005b0fb22ce9ec65)},
+  {FIELD_LITERAL(0x00aa2261022d883f,0x00ebcca4548010ac,0x002528512e28a437,0x0070ca7676b66082,0x0084bda170f7c6d3,0x00581b4747c9b8bb,0x005c96a01061c7e2,0x00fb7c4a362b5273)},
+  {FIELD_LITERAL(0x00c30020eb512d02,0x0060f288283a4d26,0x00b7ed13becde260,0x0075ebb74220f6e9,0x00701079fcfe8a1f,0x001c28fcdff58938,0x002e4544b8f4df6b,0x0060c5bc4f1a7d73)},
+  {FIELD_LITERAL(0x00ae307cf069f701,0x005859f222dd618b,0x00212d6c46ec0b0d,0x00a0fe4642afb62d,0x00420d8e4a0a8903,0x00a80ff639bdf7b0,0x0019bee1490b5d8e,0x007439e4b9c27a86)},
+  {FIELD_LITERAL(0x00a94700032a093f,0x0076e96c225216e7,0x00a63a4316e45f91,0x007d8bbb4645d3b2,0x00340a6ff22793eb,0x006f935d4572aeb7,0x00b1fb69f00afa28,0x009e8f3423161ed3)},
+  {FIELD_LITERAL(0x009ef49c6b5ced17,0x00a555e6269e9f0a,0x007e6f1d79ec73b5,0x009ac78695a32ac4,0x0001d77fbbcd5682,0x008cea1fee0aaeed,0x00f42bea82a53462,0x002e46ab96cafcc9)},
+  {FIELD_LITERAL(0x0051cfcc5885377a,0x00dce566cb1803ca,0x00430c7643f2c7d4,0x00dce1a1337bdcc0,0x0010d5bd7283c128,0x003b1b547f9b46fe,0x000f245e37e770ab,0x007b72511f022b37)},
+  {FIELD_LITERAL(0x0060db815bc4786c,0x006fab25beedc434,0x00c610d06084797c,0x000c48f08537bec0,0x0031aba51c5b93da,0x007968fa6e01f347,0x0030070da52840c6,0x00c043c225a4837f)},
+  {FIELD_LITERAL(0x001bcfd00649ee93,0x006dceb47e2a0fd5,0x00f2cebda0cf8fd0,0x00b6b9d9d1fbdec3,0x00815262e6490611,0x00ef7f5ce3176760,0x00e49cd0c998d58b,0x005fc6cc269ba57c)},
+  {FIELD_LITERAL(0x008940211aa0d633,0x00addae28136571d,0x00d68fdbba20d673,0x003bc6129bc9e21a,0x000346cf184ebe9a,0x0068774d741ebc7f,0x0019d5e9e6966557,0x0003cbd7f981b651)},
+  {FIELD_LITERAL(0x004a2902926f8d3f,0x00ad79b42637ab75,0x0088f60b90f2d4e8,0x0030f54ef0e398c4,0x00021dc9bf99681e,0x007ebf66fde74ee3,0x004ade654386e9a4,0x00e7485066be4c27)},
+  {FIELD_LITERAL(0x00445f1263983be0,0x004cf371dda45e6a,0x00744a89d5a310e7,0x001f20ce4f904833,0x00e746edebe66e29,0x000912ab1f6c153d,0x00f61d77d9b2444c,0x0001499cd6647610)}
+};
+const gf API_NS(precomputed_wnaf_as_fe)[96]
+VECTOR_ALIGNED __attribute__((visibility("hidden"))) = {
+  {FIELD_LITERAL(0x00303cda6feea532,0x00860f1d5a3850e4,0x00226b9fa4728ccd,0x00e822938a0a0c0c,0x00263a61c9ea9216,0x001204029321b828,0x006a468360983c65,0x0002846f0a782143)},
+  {FIELD_LITERAL(0x00303cda6feea532,0x00860f1d5a3850e4,0x00226b9fa4728ccd,0x006822938a0a0c0c,0x00263a61c9ea9215,0x001204029321b828,0x006a468360983c65,0x0082846f0a782143)},
+  {FIELD_LITERAL(0x00ef8e22b275198d,0x00b0eb141a0b0e8b,0x001f6789da3cb38c,0x006d2ff8ed39073e,0x00610bdb69a167f3,0x00571f306c9689b4,0x00f557e6f84b2df8,0x002affd38b2c86db)},
+  {FIELD_LITERAL(0x00cea0fc8d2e88b5,0x00821612d69f1862,0x0074c283b3e67522,0x005a195ba05a876d,0x000cddfe557feea4,0x008046c795bcc5e5,0x00540969f4d6e119,0x00d27f96d6b143d5)},
+  {FIELD_LITERAL(0x000c3b1019d474e8,0x00e19533e4952284,0x00cc9810ba7c920a,0x00f103d2785945ac,0x00bfa5696cc69b34,0x00a8d3d51e9ca839,0x005623cb459586b9,0x00eae7ce1cd52e9e)},
+  {FIELD_LITERAL(0x0005a178751dd7d8,0x002cc3844c69c42f,0x00acbfe5efe10539,0x009c20f43431a65a,0x008435d96374a7b3,0x009ee57566877bd3,0x0044691725ed4757,0x001e87bb2fe2c6b2)},
+  {FIELD_LITERAL(0x000cedc4debf7a04,0x002ffa45000470ac,0x002e9f9678201915,0x0017da1208c4fe72,0x007d558cc7d656cb,0x0037a827287cf289,0x00142472d3441819,0x009c21f166cf8dd1)},
+  {FIELD_LITERAL(0x003ef83af164b2f2,0x000949a5a0525d0d,0x00f4498186cac051,0x00e77ac09ef126d2,0x0073ae0b2c9296e9,0x001c163f6922e3ed,0x0062946159321bea,0x00cfb79b22990b39)},
+  {FIELD_LITERAL(0x00b001431ca9e654,0x002d7e5eabcc9a3a,0x0052e8114c2f6747,0x0079ac4f94487f92,0x00bffd919b5d749c,0x00261f92ad15e620,0x00718397b7a97895,0x00c1443e6ebbc0c4)},
+  {FIELD_LITERAL(0x00eacd90c1e0a049,0x008977935b149fbe,0x0004cb9ba11c93dc,0x009fbd5b3470844d,0x004bc18c9bfc22cf,0x0057679a991839f3,0x00ef15b76fb4092e,0x0074a5173a225041)},
+  {FIELD_LITERAL(0x003f5f9d7ec4777b,0x00ab2e733c919c94,0x001bb6c035245ae5,0x00a325a49a883630,0x0033e9a9ea3cea2f,0x00e442a1eaa0e844,0x00b2116d5b0e71b8,0x00c16abed6d64047)},
+  {FIELD_LITERAL(0x00c560b5ed051165,0x001945adc5d65094,0x00e221865710f910,0x00cc12bc9e9b8ceb,0x004faa9518914e35,0x0017476d89d42f6d,0x00b8f637c8fa1c8b,0x0088c7d2790864b8)},
+  {FIELD_LITERAL(0x00ef7eafc1c69be6,0x0085d3855778fbea,0x002c8d5b450cb6f5,0x004e77de5e1e7fec,0x0047c057893abded,0x001b430b85d51e16,0x00965c7b45640c3c,0x00487b2bb1162b97)},
+  {FIELD_LITERAL(0x0099c73a311beec2,0x00a3eff38d8912ad,0x002efa9d1d7e8972,0x00f717ae1e14d126,0x002833f795850c8b,0x0066c12ad71486bd,0x00ae9889da4820eb,0x00d6044309555c08)},
+  {FIELD_LITERAL(0x004b1c5283d15e41,0x00669d8ea308ff75,0x0004390233f762a1,0x00e1d67b83cb6cec,0x003eebaa964c78b1,0x006b0aff965eb664,0x00b313d4470bdc37,0x008814ffcb3cb9d8)},
+  {FIELD_LITERAL(0x009724b8ce68db70,0x007678b5ed006f3d,0x00bdf4b89c0abd73,0x00299748e04c7c6d,0x00ddd86492c3c977,0x00c5a7febfa30a99,0x00ed84715b4b02bb,0x00319568adf70486)},
+  {FIELD_LITERAL(0x0070ff2d864de5bb,0x005a37eeb637ee95,0x0033741c258de160,0x00e6ca5cb1988f46,0x001ceabd92a24661,0x0030957bd500fe40,0x001c3362afe912c5,0x005187889f678bd2)},
+  {FIELD_LITERAL(0x0086835fc62bbdc7,0x009c3516ca4910a1,0x00956c71f8d00783,0x0095c78fcf63235f,0x00fc7ff6ba05c222,0x00cdd8b3f8d74a52,0x00ac5ae16de8256e,0x00e9d4be8ed48624)},
+  {FIELD_LITERAL(0x00c0ce11405df2d8,0x004e3f37b293d7b6,0x002410172e1ac6db,0x00b8dbff4bf8143d,0x003a7b409d56eb66,0x003e0f6a0dfef9af,0x0081c4e4d3645be1,0x00ce76076b127623)},
+  {FIELD_LITERAL(0x00f6ee0f98974239,0x0042d89af07d3a4f,0x00846b7fe84346b5,0x006a21fc6a8d39a1,0x00ac8bc2541ff2d9,0x006d4e2a77732732,0x009a39b694cc3f2f,0x0085c0aa2a404c8f)},
+  {FIELD_LITERAL(0x00b261101a218548,0x00c1cae96424277b,0x00869da0a77dd268,0x00bc0b09f8ec83ea,0x00d61027f8e82ba9,0x00aa4c85999dce67,0x00eac3132b9f3fe1,0x00fb9b0cf1c695d2)},
+  {FIELD_LITERAL(0x0043079295512f0d,0x0046a009861758e0,0x003ee2842a807378,0x0034cc9d1298e4fa,0x009744eb4d31b3ee,0x00afacec96650cd0,0x00ac891b313761ae,0x00e864d6d26e708a)},
+  {FIELD_LITERAL(0x00a84d7c8a23b491,0x0088e19aa868b27f,0x0005986d43e78ce9,0x00f28012f0606d28,0x0017ded7e10249b3,0x005ed4084b23af9b,0x00b9b0a940564472,0x00ad9056cceeb1f4)},
+  {FIELD_LITERAL(0x00db91b357fe755e,0x00a1aa544b15359c,0x00af4931a0195574,0x007686124fe11aef,0x00d1ead3c7b9ef7e,0x00aaf5fc580f8c15,0x00e727be147ee1ec,0x003c61c1e1577b86)},
+  {FIELD_LITERAL(0x009d3fca983220cf,0x00cd11acbc853dc4,0x0017590409d27f1d,0x00d2176698082802,0x00fa01251b2838c8,0x00dd297a0d9b51c6,0x00d76c92c045820a,0x00534bc7c46c9033)},
+  {FIELD_LITERAL(0x0080ed9bc9b07338,0x00fceac7745d2652,0x008a9d55f5f2cc69,0x0096ce72df301ac5,0x00f53232e7974d87,0x0071728c7ae73947,0x0090507602570778,0x00cb81cfd883b1b2)},
+  {FIELD_LITERAL(0x005011aadea373da,0x003a8578ec896034,0x00f20a6535fa6d71,0x005152d31e5a87cf,0x002bac1c8e68ca31,0x00b0e323db4c1381,0x00f1d596b7d5ae25,0x00eae458097cb4e0)},
+  {FIELD_LITERAL(0x00920ac80f9b0d21,0x00f80f7f73401246,0x0086d37849b557d6,0x0002bd4b317b752e,0x00b26463993a42bb,0x002070422a73b129,0x00341acaa0380cb3,0x00541914dd66a1b2)},
+  {FIELD_LITERAL(0x00c1513cd66abe8c,0x000139e01118944d,0x0064abbcb8080bbb,0x00b3b08202473142,0x00c629ef25da2403,0x00f0aec3310d9b7f,0x0050b2227472d8cd,0x00f6c8a922d41fb4)},
+  {FIELD_LITERAL(0x001075ccf26b7b1f,0x00bb6bb213170433,0x00e9491ad262da79,0x009ef4f48d2d384c,0x008992770766f09d,0x001584396b6b1101,0x00af3f8676c9feef,0x0024603c40269118)},
+  {FIELD_LITERAL(0x009dd7b31319527c,0x001e7ac948d873a9,0x00fa54b46ef9673a,0x0066efb8d5b02fe6,0x00754b1d3928aeae,0x0004262ac72a6f6b,0x0079b7d49a6eb026,0x003126a753540102)},
+  {FIELD_LITERAL(0x009666e24f693947,0x00f714311269d45f,0x0010ffac1d0c851c,0x0066e80c37363497,0x00f1f4ad010c60b0,0x0015c87408470ff7,0x00651d5e9c7766a4,0x008138819d7116de)},
+  {FIELD_LITERAL(0x003934b11c57253b,0x00ef308edf21f46e,0x00e54e99c7a16198,0x0080d57135764e63,0x00751c27b946bc24,0x00dd389ce4e9e129,0x00a1a2bfd1cd84dc,0x002fae73e5149b32)},
+  {FIELD_LITERAL(0x00911657dffb4cdd,0x00c100b7cc553d06,0x00449d075ec467cc,0x007062100bc64e70,0x0043cf86f7bd21e7,0x00f401dc4b797dea,0x005224afb2f62e65,0x00d1ede3fb5a42be)},
+  {FIELD_LITERAL(0x00f2ba36a41aa144,0x00a0c22d946ee18f,0x008aae8ef9a14f99,0x00eef4d79b19bb36,0x008e75ce3d27b1fc,0x00a65daa03b29a27,0x00d9cc83684eb145,0x009e1ed80cc2ed74)},
+  {FIELD_LITERAL(0x00bed953d1997988,0x00b93ed175a24128,0x00871c5963fb6365,0x00ca2df20014a787,0x00f5d9c1d0b34322,0x00f6f5942818db0a,0x004cc091f49c9906,0x00e8a188a60bff9f)},
+  {FIELD_LITERAL(0x0032c7762032fae8,0x00e4087232e0bc21,0x00f767344b6e8d85,0x00bbf369b76c2aa2,0x008a1f46c6e1570c,0x001368cd9780369f,0x007359a39d079430,0x0003646512921434)},
+  {FIELD_LITERAL(0x007c4b47ca7c73e7,0x005396221039734b,0x008b64ddf0e45d7e,0x00bfad5af285e6c2,0x008ec711c5b1a1a8,0x00cf663301237f98,0x00917ee3f1655126,0x004152f337efedd8)},
+  {FIELD_LITERAL(0x0007c7edc9305daa,0x000a6664f273701c,0x00f6e78795e200b1,0x005d05b9ecd2473e,0x0014f5f17c865786,0x00c7fd2d166fa995,0x004939a2d8eb80e0,0x002244ba0942c199)},
+  {FIELD_LITERAL(0x00321e767f0262cf,0x002e57d776caf68e,0x00bf2c94814f0437,0x00c339196acd622f,0x001db4cce71e2770,0x001ded5ddba6eee2,0x0078608ab1554c8d,0x00067fe0ab76365b)},
+  {FIELD_LITERAL(0x00f09758e11e3985,0x00169efdbd64fad3,0x00e8889b7d6dacd6,0x0035cdd58ea88209,0x00bcda47586d7f49,0x003cdddcb2879088,0x0016da70187e954b,0x009556ea2e92aacd)},
+  {FIELD_LITERAL(0x008cab16bd1ff897,0x00b389972cdf753f,0x00ea8ed1e46dfdc0,0x004fe7ef94c589f4,0x002b8ae9b805ecf3,0x0025c08d892874a5,0x0023938e98d44c4c,0x00f759134cabf69c)},
+  {FIELD_LITERAL(0x006c2a84678e4b3b,0x007a194aacd1868f,0x00ed0225af424761,0x00da0a6f293c64b8,0x001062ac5c6a7a18,0x0030f5775a8aeef4,0x0002acaad76b7af0,0x00410b8fd63a579f)},
+  {FIELD_LITERAL(0x001ec59db3d9590e,0x001e9e3f1c3f182d,0x0045a9c3ec2cab14,0x0008198572aeb673,0x00773b74068bd167,0x0012535eaa395434,0x0044dba9e3bbb74a,0x002fba4d3c74bd0e)},
+  {FIELD_LITERAL(0x0042bf08fe66922c,0x003318b8fbb49e8c,0x00d75946004aa14c,0x00f601586b42bf1c,0x00c74cf1d912fe66,0x00abcb36974b30ad,0x007eb78720c9d2b8,0x009f54ab7bd4df85)},
+  {FIELD_LITERAL(0x00db9fc948f73826,0x00fa8b3746ed8ee9,0x00132cb65aafbeb2,0x00c36ff3fe7925b8,0x00837daed353d2fe,0x00ec661be0667cf4,0x005beb8ed2e90204,0x00d77dd69e564967)},
+  {FIELD_LITERAL(0x0042e6268b861751,0x0008dd0469500c16,0x00b51b57c338a3fd,0x00cc4497d85cff6b,0x002f13d6b57c34a4,0x0083652eaf301105,0x00cc344294cc93a8,0x0060f4d02810e270)},
+  {FIELD_LITERAL(0x00a8954363cd518b,0x00ad171124bccb7b,0x0065f46a4adaae00,0x001b1a5b2a96e500,0x0043fe24f8233285,0x0066996d8ae1f2c3,0x00c530f3264169f9,0x00c0f92d07cf6a57)},
+  {FIELD_LITERAL(0x0036a55c6815d943,0x008c8d1def993db3,0x002e0e1e8ff7318f,0x00d883a4b92db00a,0x002f5e781ae33906,0x001a72adb235c06d,0x00f2e59e736e9caa,0x001a4b58e3031914)},
+  {FIELD_LITERAL(0x00d73bfae5e00844,0x00bf459766fb5f52,0x0061b4f5a5313cde,0x004392d4c3b95514,0x000d3551b1077523,0x0000998840ee5d71,0x006de6e340448b7b,0x00251aa504875d6e)},
+  {FIELD_LITERAL(0x003bf343427ac342,0x00adc0a78642b8c5,0x0003b893175a8314,0x0061a34ade5703bc,0x00ea3ea8bb71d632,0x00be0df9a1f198c2,0x0046dd8e7c1635fb,0x00f1523fdd25d5e5)},
+  {FIELD_LITERAL(0x00633f63fc9dd406,0x00e713ff80e04a43,0x0060c6e970f2d621,0x00a57cd7f0df1891,0x00f2406a550650bb,0x00b064290efdc684,0x001eab0144d17916,0x00cd15f863c293ab)},
+  {FIELD_LITERAL(0x0029cec55273f70d,0x007044ee275c6340,0x0040f637a93015e2,0x00338bb78db5aae9,0x001491b2a6132147,0x00a125d6cfe6bde3,0x005f7ac561ba8669,0x001d5eaea3fbaacf)},
+  {FIELD_LITERAL(0x00054e9635e3be31,0x000e43f31e2872be,0x00d05b1c9e339841,0x006fac50bd81fd98,0x00cdc7852eaebb09,0x004ff519b061991b,0x009099e8107d4c85,0x00273e24c36a4a61)},
+  {FIELD_LITERAL(0x00070b4441ef2c46,0x00efa5b02801a109,0x00bf0b8c3ee64adf,0x008a67e0b3452e98,0x001916b1f2fa7a74,0x00d781a78ff6cdc3,0x008682ce57e5c919,0x00cc1109dd210da3)},
+  {FIELD_LITERAL(0x00cae8aaff388663,0x005e983a35dda1c7,0x007ab1030d8e37f4,0x00e48940f5d032fe,0x006a36f9ef30b331,0x009be6f03958c757,0x0086231ceba91400,0x008bd0f7b823e7aa)},
+  {FIELD_LITERAL(0x00cf881ebef5a45a,0x004ebea78e7c6f2c,0x0090da9209cf26a0,0x00de2b2e4c775b84,0x0071d6031c3c15ae,0x00d9e927ef177d70,0x00894ee8c23896fd,0x00e3b3b401e41aad)},
+  {FIELD_LITERAL(0x00204fef26864170,0x00819269c5dee0f8,0x00bfb4713ec97966,0x0026339a6f34df78,0x001f26e64c761dc2,0x00effe3af313cb60,0x00e17b70138f601b,0x00f16e1ccd9ede5e)},
+  {FIELD_LITERAL(0x005d9a8353fdb2db,0x0055cc2048c698f0,0x00f6c4ac89657218,0x00525034d73faeb2,0x00435776fbda3c7d,0x0070ea5312323cbc,0x007a105d44d069fb,0x006dbc8d6dc786aa)},
+  {FIELD_LITERAL(0x0017cff19cd394ec,0x00fef7b810922587,0x00e6483970dff548,0x00ddf36ad6874264,0x00e61778523fcce2,0x0093a66c0c93b24a,0x00fd367114db7f86,0x007652d7ddce26dd)},
+  {FIELD_LITERAL(0x00d92ced7ba12843,0x00aea9c7771e86e7,0x0046639693354f7b,0x00a628dbb6a80c47,0x003a0b0507372953,0x00421113ab45c0d9,0x00e545f08362ab7a,0x0028ce087b4d6d96)},
+  {FIELD_LITERAL(0x00a67ee7cf9f99eb,0x005713b275f2ff68,0x00f1d536a841513d,0x00823b59b024712e,0x009c46b9d0d38cec,0x00cdb1595aa2d7d4,0x008375b3423d9af8,0x000ab0b516d978f7)},
+  {FIELD_LITERAL(0x00428dcb3c510b0f,0x00585607ea24bb4e,0x003736bf1603687a,0x00c47e568c4fe3c7,0x003cd00282848605,0x0043a487c3b91939,0x004ffc04e1095a06,0x00a4c989a3d4b918)},
+  {FIELD_LITERAL(0x00a8778d0e429f7a,0x004c02b059105a68,0x0016653b609da3ff,0x00d5107bd1a12d27,0x00b4708f9a771cab,0x00bb63b662033f69,0x0072f322240e7215,0x0019445b59c69222)},
+  {FIELD_LITERAL(0x00cf4f6069a658e6,0x0053ca52859436a6,0x0064b994d7e3e117,0x00cb469b9a07f534,0x00cfb68f399e9d47,0x00f0dcb8dac1c6e7,0x00f2ab67f538b3a5,0x0055544f178ab975)},
+  {FIELD_LITERAL(0x0099b7a2685d538c,0x00e2f1897b7c0018,0x003adac8ce48dae3,0x00089276d5c50c0c,0x00172fca07ad6717,0x00cb1a72f54069e5,0x004ee42f133545b3,0x00785f8651362f16)},
+  {FIELD_LITERAL(0x0049cbac38509e11,0x0015234505d42cdf,0x00794fb0b5840f1c,0x00496437344045a5,0x0031b6d944e4f9b0,0x00b207318ac1f5d8,0x0000c840da7f5c5d,0x00526f373a5c8814)},
+  {FIELD_LITERAL(0x002c7b7742d1dfd9,0x002cabeb18623c01,0x00055f5e3e044446,0x006c20f3b4ef54ba,0x00c600141ec6b35f,0x00354f437f1a32a3,0x00bac4624a3520f9,0x00c483f734a90691)},
+  {FIELD_LITERAL(0x0053a737d422918d,0x00f7fca1d8758625,0x00c360336dadb04c,0x00f38e3d9158a1b8,0x0069ce3b418e84c6,0x005d1697eca16ead,0x00f8bd6a35ece13d,0x007885dfc2b5afea)},
+  {FIELD_LITERAL(0x00c3617ae260776c,0x00b20dc3e96922d7,0x00a1a7802246706a,0x00ca6505a5240244,0x002246b62d919782,0x001439102d7aa9b3,0x00e8af1139e6422c,0x00c888d1b52f2b05)},
+  {FIELD_LITERAL(0x005b67690ffd41d9,0x005294f28df516f9,0x00a879272412fcb9,0x00098b629a6d1c8d,0x00fabd3c8050865a,0x00cd7e5b0a3879c5,0x00153238210f3423,0x00357cac101e9f42)},
+  {FIELD_LITERAL(0x008917b454444fb7,0x00f59247c97e441b,0x00a6200a6815152d,0x0009a4228601d254,0x001c0360559bd374,0x007563362039cb36,0x00bd75b48d74e32b,0x0017f515ac3499e8)},
+  {FIELD_LITERAL(0x001532a7ffe41c5a,0x00eb1edce358d6bf,0x00ddbacc7b678a7b,0x008a7b70f3c841a3,0x00f1923bf27d3f4c,0x000b2713ed8f7873,0x00aaf67e29047902,0x0044994a70b3976d)},
+  {FIELD_LITERAL(0x00d54e802082d42c,0x00a55aa0dce7cc6c,0x006477b96073f146,0x0082efe4ceb43594,0x00a922bcba026845,0x0077f19d1ab75182,0x00c2bb2737846e59,0x0004d7eec791dd33)},
+  {FIELD_LITERAL(0x0044588d1a81d680,0x00b0a9097208e4f8,0x00212605350dc57e,0x0028717cd2871123,0x00fb083c100fd979,0x0045a056ce063fdf,0x00a5d604b4dd6a41,0x001dabc08ba4e236)},
+  {FIELD_LITERAL(0x00c4887198d7a7fa,0x00244f98fb45784a,0x0045911e15a15d01,0x001d323d374c0966,0x00967c3915196562,0x0039373abd2f3c67,0x000d2c5614312423,0x0041cf2215442ce3)},
+  {FIELD_LITERAL(0x008ede889ada7f06,0x001611e91de2e135,0x00fdb9a458a471b9,0x00563484e03710d1,0x0031cc81925e3070,0x0062c97b3af80005,0x00fa733eea28edeb,0x00e82457e1ebbc88)},
+  {FIELD_LITERAL(0x006a0df5fe9b6f59,0x00a0d4ff46040d92,0x004a7cedb6f93250,0x00d1df8855b8c357,0x00e73a46086fd058,0x0048fb0add6dfe59,0x001e03a28f1b4e3d,0x00a871c993308d76)},
+  {FIELD_LITERAL(0x0030dbb2d1766ec8,0x00586c0ad138555e,0x00d1a34f9e91c77c,0x0063408ad0e89014,0x00d61231b05f6f5b,0x0009abf569f5fd8a,0x00aec67a110f1c43,0x0031d1a790938dd7)},
+  {FIELD_LITERAL(0x006cded841e2a862,0x00198d60af0ab6fb,0x0018f09db809e750,0x004e6ac676016263,0x00eafcd1620969cb,0x002c9784ca34917d,0x0054f00079796de7,0x00d9fab5c5972204)},
+  {FIELD_LITERAL(0x004bd0fee2438a83,0x00b571e62b0f83bd,0x0059287d7ce74800,0x00fb3631b645c3f0,0x00a018e977f78494,0x0091e27065c27b12,0x007696c1817165e0,0x008c40be7c45ba3a)},
+  {FIELD_LITERAL(0x00a0f326327cb684,0x001c7d0f672680ff,0x008c1c81ffb112d1,0x00f8f801674eddc8,0x00e926d5d48c2a9d,0x005bd6d954c6fe9a,0x004c6b24b4e33703,0x00d05eb5c09105cc)},
+  {FIELD_LITERAL(0x00d61731caacf2cf,0x002df0c7609e01c5,0x00306172208b1e2b,0x00b413fe4fb2b686,0x00826d360902a221,0x003f8d056e67e7f7,0x0065025b0175e989,0x00369add117865eb)},
+  {FIELD_LITERAL(0x00aaf895aec2fa11,0x000f892bc313eb52,0x005b1c794dad050b,0x003f8ec4864cec14,0x00af81058d0b90e5,0x00ebe43e183997bb,0x00a9d610f9f3e615,0x007acd8eec2e88d3)},
+  {FIELD_LITERAL(0x0049b2fab13812a3,0x00846db32cd60431,0x000177fa578c8d6c,0x00047d0e2ad4bc51,0x00b158ba38d1e588,0x006a45daad79e3f3,0x000997b93cab887b,0x00c47ea42fa23dc3)},
+  {FIELD_LITERAL(0x0012b6fef7aeb1ca,0x009412768194b6a7,0x00ff0d351f23ab93,0x007e8a14c1aff71b,0x006c1c0170c512bc,0x0016243ea02ab2e5,0x007bb6865b303f3e,0x0015ce6b29b159f4)},
+  {FIELD_LITERAL(0x009961cd02e68108,0x00e2035d3a1d0836,0x005d51f69b5e1a1d,0x004bccb4ea36edcd,0x0069be6a7aeef268,0x0063f4dd9de8d5a7,0x006283783092ca35,0x0075a31af2c35409)},
+  {FIELD_LITERAL(0x00c412365162e8cf,0x00012283fb34388a,0x003e6543babf39e2,0x00eead6b3a804978,0x0099c0314e8b326f,0x00e98e0a8d477a4f,0x00d2eb96b127a687,0x00ed8d7df87571bb)},
+  {FIELD_LITERAL(0x00777463e308cacf,0x00c8acb93950132d,0x00ebddbf4ca48b2c,0x0026ad7ca0795a0a,0x00f99a3d9a715064,0x000d60bcf9d4dfcc,0x005e65a73a437a06,0x0019d536a8db56c8)},
+  {FIELD_LITERAL(0x00192d7dd558d135,0x0027cd6a8323ffa7,0x00239f1a412dc1e7,0x0046b4b3be74fc5c,0x0020c47a2bef5bce,0x00aa17e48f43862b,0x00f7e26c96342e5f,0x0008011c530f39a9)},
+  {FIELD_LITERAL(0x00aad4ac569bf0f1,0x00a67adc90b27740,0x0048551369a5751a,0x0031252584a3306a,0x0084e15df770e6fc,0x00d7bba1c74b5805,0x00a80ef223af1012,0x0089c85ceb843a34)},
+  {FIELD_LITERAL(0x00c4545be4a54004,0x0099e11f60357e6c,0x001f3936d19515a6,0x007793df84341a6e,0x0051061886717ffa,0x00e9b0a660b28f85,0x0044ea685892de0d,0x000257d2a1fda9d9)},
+  {FIELD_LITERAL(0x007e8b01b24ac8a8,0x006cf3b0b5ca1337,0x00f1607d3e36a570,0x0039b7fab82991a1,0x00231777065840c5,0x00998e5afdd346f9,0x00b7dc3e64acc85f,0x00baacc748013ad6)},
+  {FIELD_LITERAL(0x008ea6a4177580bf,0x005fa1953e3f0378,0x005fe409ac74d614,0x00452327f477e047,0x00a4018507fb6073,0x007b6e71951caac8,0x0012b42ab8a6ce91,0x0080eca677294ab7)},
+  {FIELD_LITERAL(0x00a53edc023ba69b,0x00c6afa83ddde2e8,0x00c3f638b307b14e,0x004a357a64414062,0x00e4d94d8b582dc9,0x001739caf71695b7,0x0012431b2ae28de1,0x003b6bc98682907c)},
+  {FIELD_LITERAL(0x008a9a93be1f99d6,0x0079fa627cc699c8,0x00b0cfb134ba84c8,0x001c4b778249419a,0x00df4ab3d9c44f40,0x009f596e6c1a9e3c,0x001979c0df237316,0x00501e953a919b87)}
+};
diff --git a/crypto/ec/curve448/eddsa.c b/crypto/ec/curve448/eddsa.c
new file mode 100644
index 0000000000..f6c1836658
--- /dev/null
+++ b/crypto/ec/curve448/eddsa.c
@@ -0,0 +1,328 @@
+/**
+ * @file ed448goldilocks/eddsa.c
+ * @author Mike Hamburg
+ *
+ * @copyright
+ *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
+ *   Released under the MIT License.  See LICENSE.txt for license information.
+ *
+ * @cond internal
+ * @brief EdDSA routines.
+ *
+ * @warning This file was automatically generated in Python.
+ * Please do not edit it.
+ */
+#include "word.h"
+#include <decaf/ed448.h>
+#include <decaf/shake.h>
+#include <decaf/sha512.h>
+#include <string.h>
+
+#define API_NAME "decaf_448"
+#define API_NS(_id) decaf_448_##_id
+
+#define hash_ctx_t   decaf_shake256_ctx_t
+#define hash_init    decaf_shake256_init
+#define hash_update  decaf_shake256_update
+#define hash_final   decaf_shake256_final
+#define hash_destroy decaf_shake256_destroy
+#define hash_hash    decaf_shake256_hash
+
+#define NO_CONTEXT DECAF_EDDSA_448_SUPPORTS_CONTEXTLESS_SIGS
+#define EDDSA_USE_SIGMA_ISOGENY 0
+#define COFACTOR 4
+#define EDDSA_PREHASH_BYTES 64
+
+#if NO_CONTEXT
+const uint8_t NO_CONTEXT_POINTS_HERE = 0;
+const uint8_t * const DECAF_ED448_NO_CONTEXT = &NO_CONTEXT_POINTS_HERE;
+#endif
+
+/* EDDSA_BASE_POINT_RATIO = 1 or 2
+ * Because EdDSA25519 is not on E_d but on the isogenous E_sigma_d,
+ * its base point is twice ours.
+ */
+#define EDDSA_BASE_POINT_RATIO (1+EDDSA_USE_SIGMA_ISOGENY) /* TODO: remove */
+
+static void clamp (
+    uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES]
+) {
+    /* Blarg */
+    secret_scalar_ser[0] &= -COFACTOR;
+    uint8_t hibit = (1<<0)>>1;
+    if (hibit == 0) {
+        secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES - 1] = 0;
+        secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES - 2] |= 0x80;
+    } else {
+        secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES - 1] &= hibit-1;
+        secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES - 1] |= hibit;
+    }
+}
+
+static void hash_init_with_dom(
+    hash_ctx_t hash,
+    uint8_t prehashed,
+    uint8_t for_prehash,
+    const uint8_t *context,
+    uint8_t context_len
+) {
+    hash_init(hash);
+
+#if NO_CONTEXT
+    if (context_len == 0 && context == DECAF_ED448_NO_CONTEXT) {
+        (void)prehashed;
+        (void)for_prehash;
+        (void)context;
+        (void)context_len;
+        return;
+    }
+#endif
+    const char *dom_s = "SigEd448";
+    const uint8_t dom[2] = {2+word_is_zero(prehashed)+word_is_zero(for_prehash), context_len};
+    hash_update(hash,(const unsigned char *)dom_s, strlen(dom_s));
+    hash_update(hash,dom,2);
+    hash_update(hash,context,context_len);
+}
+
+void decaf_ed448_prehash_init (
+    hash_ctx_t hash
+) {
+    hash_init(hash);
+}
+
+/* In this file because it uses the hash */
+void decaf_ed448_convert_private_key_to_x448 (
+    uint8_t x[DECAF_X448_PRIVATE_BYTES],
+    const uint8_t ed[DECAF_EDDSA_448_PRIVATE_BYTES]
+) {
+    /* pass the private key through hash_hash function */
+    /* and keep the first DECAF_X448_PRIVATE_BYTES bytes */
+    hash_hash(
+        x,
+        DECAF_X448_PRIVATE_BYTES,
+        ed,
+        DECAF_EDDSA_448_PRIVATE_BYTES
+    );
+}
+    
+void decaf_ed448_derive_public_key (
+    uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES]
+) {
+    /* only this much used for keygen */
+    uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES];
+    
+    hash_hash(
+        secret_scalar_ser,
+        sizeof(secret_scalar_ser),
+        privkey,
+        DECAF_EDDSA_448_PRIVATE_BYTES
+    );
+    clamp(secret_scalar_ser);
+        
+    API_NS(scalar_t) secret_scalar;
+    API_NS(scalar_decode_long)(secret_scalar, secret_scalar_ser, sizeof(secret_scalar_ser));
+    
+    /* Since we are going to mul_by_cofactor during encoding, divide by it here.
+     * However, the EdDSA base point is not the same as the decaf base point if
+     * the sigma isogeny is in use: the EdDSA base point is on Etwist_d/(1-d) and
+     * the decaf base point is on Etwist_d, and when converted it effectively
+     * picks up a factor of 2 from the isogenies.  So we might start at 2 instead of 1. 
+     */
+    for (unsigned int c=1; c<DECAF_448_EDDSA_ENCODE_RATIO; c <<= 1) {
+        API_NS(scalar_halve)(secret_scalar,secret_scalar);
+    }
+    
+    API_NS(point_t) p;
+    API_NS(precomputed_scalarmul)(p,API_NS(precomputed_base),secret_scalar);
+    
+    API_NS(point_mul_by_ratio_and_encode_like_eddsa)(pubkey, p);
+        
+    /* Cleanup */
+    API_NS(scalar_destroy)(secret_scalar);
+    API_NS(point_destroy)(p);
+    decaf_bzero(secret_scalar_ser, sizeof(secret_scalar_ser));
+}
+
+void decaf_ed448_sign (
+    uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
+    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+    const uint8_t *message,
+    size_t message_len,
+    uint8_t prehashed,
+    const uint8_t *context,
+    uint8_t context_len
+) {
+    API_NS(scalar_t) secret_scalar;
+    hash_ctx_t hash;
+    {
+        /* Schedule the secret key */
+        struct {
+            uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES];
+            uint8_t seed[DECAF_EDDSA_448_PRIVATE_BYTES];
+        } __attribute__((packed)) expanded;
+        hash_hash(
+            (uint8_t *)&expanded,
+            sizeof(expanded),
+            privkey,
+            DECAF_EDDSA_448_PRIVATE_BYTES
+        );
+        clamp(expanded.secret_scalar_ser);   
+        API_NS(scalar_decode_long)(secret_scalar, expanded.secret_scalar_ser, sizeof(expanded.secret_scalar_ser));
+    
+        /* Hash to create the nonce */
+        hash_init_with_dom(hash,prehashed,0,context,context_len);
+        hash_update(hash,expanded.seed,sizeof(expanded.seed));
+        hash_update(hash,message,message_len);
+        decaf_bzero(&expanded, sizeof(expanded));
+    }
+    
+    /* Decode the nonce */
+    API_NS(scalar_t) nonce_scalar;
+    {
+        uint8_t nonce[2*DECAF_EDDSA_448_PRIVATE_BYTES];
+        hash_final(hash,nonce,sizeof(nonce));
+        API_NS(scalar_decode_long)(nonce_scalar, nonce, sizeof(nonce));
+        decaf_bzero(nonce, sizeof(nonce));
+    }
+    
+    uint8_t nonce_point[DECAF_EDDSA_448_PUBLIC_BYTES] = {0};
+    {
+        /* Scalarmul to create the nonce-point */
+        API_NS(scalar_t) nonce_scalar_2;
+        API_NS(scalar_halve)(nonce_scalar_2,nonce_scalar);
+        for (unsigned int c = 2; c < DECAF_448_EDDSA_ENCODE_RATIO; c <<= 1) {
+            API_NS(scalar_halve)(nonce_scalar_2,nonce_scalar_2);
+        }
+        
+        API_NS(point_t) p;
+        API_NS(precomputed_scalarmul)(p,API_NS(precomputed_base),nonce_scalar_2);
+        API_NS(point_mul_by_ratio_and_encode_like_eddsa)(nonce_point, p);
+        API_NS(point_destroy)(p);
+        API_NS(scalar_destroy)(nonce_scalar_2);
+    }
+    
+    API_NS(scalar_t) challenge_scalar;
+    {
+        /* Compute the challenge */
+        hash_init_with_dom(hash,prehashed,0,context,context_len);
+        hash_update(hash,nonce_point,sizeof(nonce_point));
+        hash_update(hash,pubkey,DECAF_EDDSA_448_PUBLIC_BYTES);
+        hash_update(hash,message,message_len);
+        uint8_t challenge[2*DECAF_EDDSA_448_PRIVATE_BYTES];
+        hash_final(hash,challenge,sizeof(challenge));
+        hash_destroy(hash);
+        API_NS(scalar_decode_long)(challenge_scalar,challenge,sizeof(challenge));
+        decaf_bzero(challenge,sizeof(challenge));
+    }
+    
+    API_NS(scalar_mul)(challenge_scalar,challenge_scalar,secret_scalar);
+    API_NS(scalar_add)(challenge_scalar,challenge_scalar,nonce_scalar);
+    
+    decaf_bzero(signature,DECAF_EDDSA_448_SIGNATURE_BYTES);
+    memcpy(signature,nonce_point,sizeof(nonce_point));
+    API_NS(scalar_encode)(&signature[DECAF_EDDSA_448_PUBLIC_BYTES],challenge_scalar);
+    
+    API_NS(scalar_destroy)(secret_scalar);
+    API_NS(scalar_destroy)(nonce_scalar);
+    API_NS(scalar_destroy)(challenge_scalar);
+}
+
+
+void decaf_ed448_sign_prehash (
+    uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
+    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+    const decaf_ed448_prehash_ctx_t hash,
+    const uint8_t *context,
+    uint8_t context_len
+) {
+    uint8_t hash_output[EDDSA_PREHASH_BYTES];
+    {
+        decaf_ed448_prehash_ctx_t hash_too;
+        memcpy(hash_too,hash,sizeof(hash_too));
+        hash_final(hash_too,hash_output,sizeof(hash_output));
+        hash_destroy(hash_too);
+    }
+
+    decaf_ed448_sign(signature,privkey,pubkey,hash_output,sizeof(hash_output),1,context,context_len);
+    decaf_bzero(hash_output,sizeof(hash_output));
+}
+
+decaf_error_t decaf_ed448_verify (
+    const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+    const uint8_t *message,
+    size_t message_len,
+    uint8_t prehashed,
+    const uint8_t *context,
+    uint8_t context_len
+) { 
+    API_NS(point_t) pk_point, r_point;
+    decaf_error_t error = API_NS(point_decode_like_eddsa_and_mul_by_ratio)(pk_point,pubkey);
+    if (DECAF_SUCCESS != error) { return error; }
+    
+    error = API_NS(point_decode_like_eddsa_and_mul_by_ratio)(r_point,signature);
+    if (DECAF_SUCCESS != error) { return error; }
+    
+    API_NS(scalar_t) challenge_scalar;
+    {
+        /* Compute the challenge */
+        hash_ctx_t hash;
+        hash_init_with_dom(hash,prehashed,0,context,context_len);
+        hash_update(hash,signature,DECAF_EDDSA_448_PUBLIC_BYTES);
+        hash_update(hash,pubkey,DECAF_EDDSA_448_PUBLIC_BYTES);
+        hash_update(hash,message,message_len);
+        uint8_t challenge[2*DECAF_EDDSA_448_PRIVATE_BYTES];
+        hash_final(hash,challenge,sizeof(challenge));
+        hash_destroy(hash);
+        API_NS(scalar_decode_long)(challenge_scalar,challenge,sizeof(challenge));
+        decaf_bzero(challenge,sizeof(challenge));
+    }
+    API_NS(scalar_sub)(challenge_scalar, API_NS(scalar_zero), challenge_scalar);
+    
+    API_NS(scalar_t) response_scalar;
+    API_NS(scalar_decode_long)(
+        response_scalar,
+        &signature[DECAF_EDDSA_448_PUBLIC_BYTES],
+        DECAF_EDDSA_448_PRIVATE_BYTES
+    );
+    
+    for (unsigned c=1; c<DECAF_448_EDDSA_DECODE_RATIO; c<<=1) {
+        API_NS(scalar_add)(response_scalar,response_scalar,response_scalar);
+    }
+    
+    
+    /* pk_point = -c(x(P)) + (cx + k)G = kG */
+    API_NS(base_double_scalarmul_non_secret)(
+        pk_point,
+        response_scalar,
+        pk_point,
+        challenge_scalar
+    );
+    return decaf_succeed_if(API_NS(point_eq(pk_point,r_point)));
+}
+
+
+decaf_error_t decaf_ed448_verify_prehash (
+    const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+    const decaf_ed448_prehash_ctx_t hash,
+    const uint8_t *context,
+    uint8_t context_len
+) {
+    decaf_error_t ret;
+    
+    uint8_t hash_output[EDDSA_PREHASH_BYTES];
+    {
+        decaf_ed448_prehash_ctx_t hash_too;
+        memcpy(hash_too,hash,sizeof(hash_too));
+        hash_final(hash_too,hash_output,sizeof(hash_output));
+        hash_destroy(hash_too);
+    }
+    
+    ret = decaf_ed448_verify(signature,pubkey,hash_output,sizeof(hash_output),1,context,context_len);
+    
+    return ret;
+}
diff --git a/crypto/ec/curve448/f_arithmetic.c b/crypto/ec/curve448/f_arithmetic.c
new file mode 100644
index 0000000000..cf68519686
--- /dev/null
+++ b/crypto/ec/curve448/f_arithmetic.c
@@ -0,0 +1,46 @@
+/**
+ * @cond internal
+ * @file f_arithmetic.c
+ * @copyright
+ *   Copyright (c) 2014 Cryptography Research, Inc.  \n
+ *   Released under the MIT License.  See LICENSE.txt for license information.
+ * @author Mike Hamburg
+ * @brief Field-specific arithmetic.
+ */
+
+#include "field.h"
+
+mask_t gf_isr (
+    gf a,
+    const gf x
+) {
+    gf L0, L1, L2;
+    gf_sqr  (L1,     x );
+    gf_mul  (L2,     x,   L1 );
+    gf_sqr  (L1,   L2 );
+    gf_mul  (L2,     x,   L1 );
+    gf_sqrn (L1,   L2,     3 );
+    gf_mul  (L0,   L2,   L1 );
+    gf_sqrn (L1,   L0,     3 );
+    gf_mul  (L0,   L2,   L1 );
+    gf_sqrn (L2,   L0,     9 );
+    gf_mul  (L1,   L0,   L2 );
+    gf_sqr  (L0,   L1 );
+    gf_mul  (L2,     x,   L0 );
+    gf_sqrn (L0,   L2,    18 );
+    gf_mul  (L2,   L1,   L0 );
+    gf_sqrn (L0,   L2,    37 );
+    gf_mul  (L1,   L2,   L0 );
+    gf_sqrn (L0,   L1,    37 );
+    gf_mul  (L1,   L2,   L0 );
+    gf_sqrn (L0,   L1,   111 );
+    gf_mul  (L2,   L1,   L0 );
+    gf_sqr  (L0,   L2 );
+    gf_mul  (L1,     x,   L0 );
+    gf_sqrn (L0,   L1,   223 );
+    gf_mul  (L1,   L2,   L0 );
+    gf_sqr  (L2, L1);
+    gf_mul  (L0, L2, x);
+    gf_copy(a,L1);
+    return gf_eq(L0,ONE);
+}
diff --git a/crypto/ec/curve448/f_field.h b/crypto/ec/curve448/f_field.h
new file mode 100644
index 0000000000..4eef7186d3
--- /dev/null
+++ b/crypto/ec/curve448/f_field.h
@@ -0,0 +1,110 @@
+/**
+ * @file p448/f_field.h
+ * @author Mike Hamburg
+ *
+ * @copyright
+ *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
+ *   Released under the MIT License.  See LICENSE.txt for license information.
+ *
+ * @brief Field-specific code for 2^448 - 2^224 - 1.
+ *
+ * @warning This file was automatically generated in Python.
+ * Please do not edit it.
+ */
+
+#ifndef __P448_F_FIELD_H__
+#define __P448_F_FIELD_H__ 1
+
+#include "constant_time.h"
+#include <string.h>
+#include <assert.h>
+
+#include "word.h"
+
+#define __DECAF_448_GF_DEFINED__ 1
+#define NLIMBS (64/sizeof(word_t))
+#define X_SER_BYTES 56
+#define SER_BYTES 56
+typedef struct gf_448_s {
+    word_t limb[NLIMBS];
+} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];
+
+#define GF_LIT_LIMB_BITS  56
+#define GF_BITS           448
+#define ZERO              gf_448_ZERO
+#define ONE               gf_448_ONE
+#define MODULUS           gf_448_MODULUS
+#define gf                gf_448_t
+#define gf_s              gf_448_s
+#define gf_eq             gf_448_eq
+#define gf_hibit          gf_448_hibit
+#define gf_lobit          gf_448_lobit
+#define gf_copy           gf_448_copy
+#define gf_add            gf_448_add
+#define gf_sub            gf_448_sub
+#define gf_add_RAW        gf_448_add_RAW
+#define gf_sub_RAW        gf_448_sub_RAW
+#define gf_bias           gf_448_bias
+#define gf_weak_reduce    gf_448_weak_reduce
+#define gf_strong_reduce  gf_448_strong_reduce
+#define gf_mul            gf_448_mul
+#define gf_sqr            gf_448_sqr
+#define gf_mulw_unsigned  gf_448_mulw_unsigned
+#define gf_isr            gf_448_isr
+#define gf_serialize      gf_448_serialize
+#define gf_deserialize    gf_448_deserialize
+
+/* RFC 7748 support */
+#define X_PUBLIC_BYTES  X_SER_BYTES
+#define X_PRIVATE_BYTES X_PUBLIC_BYTES
+#define X_PRIVATE_BITS  448
+
+#define SQRT_MINUS_ONE    P448_SQRT_MINUS_ONE /* might not be defined */
+
+#define INLINE_UNUSED __inline__ __attribute__((unused,always_inline))
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Defined below in f_impl.h */
+static INLINE_UNUSED void gf_copy (gf out, const gf a) { *out = *a; }
+static INLINE_UNUSED void gf_add_RAW (gf out, const gf a, const gf b);
+static INLINE_UNUSED void gf_sub_RAW (gf out, const gf a, const gf b);
+static INLINE_UNUSED void gf_bias (gf inout, int amount);
+static INLINE_UNUSED void gf_weak_reduce (gf inout);
+
+void gf_strong_reduce (gf inout);   
+void gf_add (gf out, const gf a, const gf b);
+void gf_sub (gf out, const gf a, const gf b);
+void gf_mul (gf_s *__restrict__ out, const gf a, const gf b);
+void gf_mulw_unsigned (gf_s *__restrict__ out, const gf a, uint32_t b);
+void gf_sqr (gf_s *__restrict__ out, const gf a);
+mask_t gf_isr(gf a, const gf x); /** a^2 x = 1, QNR, or 0 if x=0.  Return true if successful */
+mask_t gf_eq (const gf x, const gf y);
+mask_t gf_lobit (const gf x);
+mask_t gf_hibit (const gf x);
+
+void gf_serialize (uint8_t *serial, const gf x,int with_highbit);
+mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES],int with_hibit,uint8_t hi_nmask);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "f_impl.h" /* Bring in the inline implementations */
+
+#define P_MOD_8 7
+#if P_MOD_8 == 5
+    extern const gf SQRT_MINUS_ONE;
+#endif
+
+#ifndef LIMBPERM
+  #define LIMBPERM(i) (i)
+#endif
+#define LIMB_MASK(i) (((1ull)<<LIMB_PLACE_VALUE(i))-1)
+
+static const gf ZERO = {{{0}}}, ONE = {{{ [LIMBPERM(0)] = 1 }}};
+
+#endif /* __P448_F_FIELD_H__ */
diff --git a/crypto/ec/curve448/f_generic.c b/crypto/ec/curve448/f_generic.c
new file mode 100644
index 0000000000..d09a989f67
--- /dev/null
+++ b/crypto/ec/curve448/f_generic.c
@@ -0,0 +1,144 @@
+/**
+ * @file p448/f_generic.c
+ * @author Mike Hamburg
+ *
+ * @copyright
+ *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
+ *   Released under the MIT License.  See LICENSE.txt for license information.
+ *
+ * @brief Generic arithmetic which has to be compiled per field.
+ *
+ * @warning This file was automatically generated in Python.
+ * Please do not edit it.
+ */
+#include "field.h"
+
+static const gf MODULUS = {FIELD_LITERAL(
+    0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff, 0xfffffffffffffe, 0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff
+)};
+    
+#if P_MOD_8 == 5
+    const gf SQRT_MINUS_ONE = {FIELD_LITERAL(
+        /* NOPE */
+    )};
+#endif
+
+/** Serialize to wire format. */
+void gf_serialize (uint8_t serial[SER_BYTES], const gf x, int with_hibit) {
+    gf red;
+    gf_copy(red, x);
+    gf_strong_reduce(red);
+    if (!with_hibit) { assert(gf_hibit(red) == 0); }
+    
+    unsigned int j=0, fill=0;
+    dword_t buffer = 0;
+    UNROLL for (unsigned int i=0; i<(with_hibit ? X_SER_BYTES : SER_BYTES); i++) {
+        if (fill < 8 && j < NLIMBS) {
+            buffer |= ((dword_t)red->limb[LIMBPERM(j)]) << fill;
+            fill += LIMB_PLACE_VALUE(LIMBPERM(j));
+            j++;
+        }
+        serial[i] = buffer;
+        fill -= 8;
+        buffer >>= 8;
+    }
+}
+
+/** Return high bit of x = low bit of 2x mod p */
+mask_t gf_hibit(const gf x) {
+    gf y;
+    gf_add(y,x,x);
+    gf_strong_reduce(y);
+    return -(y->limb[0]&1);
+}
+
+/** Return high bit of x = low bit of 2x mod p */
+mask_t gf_lobit(const gf x) {
+    gf y;
+    gf_copy(y,x);
+    gf_strong_reduce(y);
+    return -(y->limb[0]&1);
+}
+
+/** Deserialize from wire format; return -1 on success and 0 on failure. */
+mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES], int with_hibit, uint8_t hi_nmask) {
+    unsigned int j=0, fill=0;
+    dword_t buffer = 0;
+    dsword_t scarry = 0;
+    const unsigned nbytes = with_hibit ? X_SER_BYTES : SER_BYTES;
+    UNROLL for (unsigned int i=0; i<NLIMBS; i++) {
+        UNROLL while (fill < LIMB_PLACE_VALUE(LIMBPERM(i)) && j < nbytes) {
+            uint8_t sj = serial[j];
+            if (j==nbytes-1) sj &= ~hi_nmask;
+            buffer |= ((dword_t)sj) << fill;
+            fill += 8;
+            j++;
+        }
+        x->limb[LIMBPERM(i)] = (i<NLIMBS-1) ? buffer & LIMB_MASK(LIMBPERM(i)) : buffer;
+        fill -= LIMB_PLACE_VALUE(LIMBPERM(i));
+        buffer >>= LIMB_PLACE_VALUE(LIMBPERM(i));
+        scarry = (scarry + x->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]) >> (8*sizeof(word_t));
+    }
+    mask_t succ = with_hibit ? -(mask_t)1 : ~gf_hibit(x);
+    return succ & word_is_zero(buffer) & ~word_is_zero(scarry);
+}
+
+/** Reduce to canonical form. */
+void gf_strong_reduce (gf a) {
+    /* first, clear high */
+    gf_weak_reduce(a); /* Determined to have negligible perf impact. */
+
+    /* now the total is less than 2p */
+
+    /* compute total_value - p.  No need to reduce mod p. */
+    dsword_t scarry = 0;
+    for (unsigned int i=0; i<NLIMBS; i++) {
+        scarry = scarry + a->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)];
+        a->limb[LIMBPERM(i)] = scarry & LIMB_MASK(LIMBPERM(i));
+        scarry >>= LIMB_PLACE_VALUE(LIMBPERM(i));
+    }
+
+    /* uncommon case: it was >= p, so now scarry = 0 and this = x
+     * common case: it was < p, so now scarry = -1 and this = x - p + 2^255
+     * so let's add back in p.  will carry back off the top for 2^255.
+     */
+    assert(word_is_zero(scarry) | word_is_zero(scarry+1));
+
+    word_t scarry_0 = scarry;
+    dword_t carry = 0;
+
+    /* add it back */
+    for (unsigned int i=0; i<NLIMBS; i++) {
+        carry = carry + a->limb[LIMBPERM(i)] + (scarry_0 & MODULUS->limb[LIMBPERM(i)]);
+        a->limb[LIMBPERM(i)] = carry & LIMB_MASK(LIMBPERM(i));
+        carry >>= LIMB_PLACE_VALUE(LIMBPERM(i));
+    }
+
+    assert(word_is_zero(carry + scarry_0));
+}
+
+/** Subtract two gf elements d=a-b */
+void gf_sub (gf d, const gf a, const gf b) {
+    gf_sub_RAW ( d, a, b );
+    gf_bias( d, 2 );
+    gf_weak_reduce ( d );
+}
+
+/** Add two field elements d = a+b */
+void gf_add (gf d, const gf a, const gf b) {
+    gf_add_RAW ( d, a, b );
+    gf_weak_reduce ( d );
+}
+
+/** Compare a==b */
+mask_t gf_eq(const gf a, const gf b) {
+    gf c;
+    gf_sub(c,a,b);
+    gf_strong_reduce(c);
+    mask_t ret=0;
+    for (unsigned int i=0; i<NLIMBS; i++) {
+        ret |= c->limb[LIMBPERM(i)];
+    }
+
+    return word_is_zero(ret);
+}
diff --git a/crypto/ec/curve448/field.h b/crypto/ec/curve448/field.h
new file mode 100644
index 0000000000..c536a51b66
--- /dev/null
+++ b/crypto/ec/curve448/field.h
@@ -0,0 +1,112 @@
+/**
+ * @file field.h
+ * @brief Generic gf header.
+ * @copyright
+ *   Copyright (c) 2014 Cryptography Research, Inc.  \n
+ *   Released under the MIT License.  See LICENSE.txt for license information.
+ * @author Mike Hamburg
+ */
+
+#ifndef __GF_H__
+#define __GF_H__
+
+#include "constant_time.h"
+#include "f_field.h"
+#include <string.h>
+    
+/** Square x, n times. */
+static DECAF_INLINE void gf_sqrn (
+    gf_s *__restrict__ y,
+    const gf x,
+    int n
+) {
+    gf tmp;
+    assert(n>0);
+    if (n&1) {
+        gf_sqr(y,x);
+        n--;
+    } else {
+        gf_sqr(tmp,x);
+        gf_sqr(y,tmp);
+        n-=2;
+    }
+    for (; n; n-=2) {
+        gf_sqr(tmp,y);
+        gf_sqr(y,tmp);
+    }
+}
+
+#define gf_add_nr gf_add_RAW
+
+/** Subtract mod p.  Bias by 2 and don't reduce  */
+static inline void gf_sub_nr ( gf c, const gf a, const gf b ) {
+    gf_sub_RAW(c,a,b);
+    gf_bias(c, 2);
+    if (GF_HEADROOM < 3) gf_weak_reduce(c);
+}
+
+/** Subtract mod p. Bias by amt but don't reduce.  */
+static inline void gf_subx_nr ( gf c, const gf a, const gf b, int amt ) {
+    gf_sub_RAW(c,a,b);
+    gf_bias(c, amt);
+    if (GF_HEADROOM < amt+1) gf_weak_reduce(c);
+}
+
+/** Mul by signed int.  Not constant-time WRT the sign of that int. */
+static inline void gf_mulw(gf c, const gf a, int32_t w) {
+    if (w>0) {
+        gf_mulw_unsigned(c, a, w);
+    } else {
+        gf_mulw_unsigned(c, a, -w);
+        gf_sub(c,ZERO,c);
+    }
+}
+
+/** Constant time, x = is_z ? z : y */
+static inline void gf_cond_sel(gf x, const gf y, const gf z, mask_t is_z) {
+    constant_time_select(x,y,z,sizeof(gf),is_z,0);
+}
+
+/** Constant time, if (neg) x=-x; */
+static inline void gf_cond_neg(gf x, mask_t neg) {
+    gf y;
+    gf_sub(y,ZERO,x);
+    gf_cond_sel(x,x,y,neg);
+}
+
+/** Constant time, if (swap) (x,y) = (y,x); */
+static inline void
+gf_cond_swap(gf x, gf_s *__restrict__ y, mask_t swap) {
+    constant_time_cond_swap(x,y,sizeof(gf_s),swap);
+}
+
+static DECAF_INLINE void gf_mul_qnr(gf_s *__restrict__ out, const gf x) {
+#if P_MOD_8 == 5
+    /* r = QNR * r0^2 */
+    gf_mul(out,x,SQRT_MINUS_ONE);
+#elif P_MOD_8 == 3 || P_MOD_8 == 7
+    gf_sub(out,ZERO,x);
+#else
+    #error "Only supporting p=3,5,7 mod 8"
+#endif
+}
+
+static DECAF_INLINE void gf_div_qnr(gf_s *__restrict__ out, const gf x) {
+#if P_MOD_8 == 5
+    /* r = QNR * r0^2 */
+    gf_mul(out,x,SQRT_MINUS_ONE);
+    gf_sub(out,ZERO,out);
+#elif P_MOD_8 == 3 || P_MOD_8 == 7
+    gf_sub(out,ZERO,x);
+#else
+    #error "Only supporting p=3,5,7 mod 8"
+#endif
+}
+
+#if P_MOD_8 == 5
+#define gf_mul_i gf_mul_qnr
+#define gf_div_i gf_div_qnr
+#endif
+
+
+#endif // __GF_H__
diff --git a/crypto/ec/curve448/include/arch_32/arch_intrinsics.h b/crypto/ec/curve448/include/arch_32/arch_intrinsics.h
deleted file mode 100644
index f3908a2589..0000000000
--- a/crypto/ec/curve448/include/arch_32/arch_intrinsics.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Copyright (c) 2016 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#ifndef __ARCH_ARCH_32_ARCH_INTRINSICS_H__
-#define __ARCH_ARCH_32_ARCH_INTRINSICS_H__
-
-#define ARCH_WORD_BITS 32
-
-static __inline__ __attribute((always_inline,unused))
-uint32_t word_is_zero(uint32_t a) {
-    /* let's hope the compiler isn't clever enough to optimize this. */
-    return (((uint64_t)a)-1)>>32;
-}
-
-static __inline__ __attribute((always_inline,unused))
-uint64_t widemul(uint32_t a, uint32_t b) {
-    return ((uint64_t)a) * b;
-}
-
-#endif /* __ARCH_ARM_32_ARCH_INTRINSICS_H__ */
-
diff --git a/crypto/ec/curve448/include/arch_arm_32/arch_intrinsics.h b/crypto/ec/curve448/include/arch_arm_32/arch_intrinsics.h
deleted file mode 100644
index 7451c6fe7d..0000000000
--- a/crypto/ec/curve448/include/arch_arm_32/arch_intrinsics.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* Copyright (c) 2016 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#ifndef __ARCH_ARM_32_ARCH_INTRINSICS_H__
-#define __ARCH_ARM_32_ARCH_INTRINSICS_H__
-
-#define ARCH_WORD_BITS 32
-
-static __inline__ __attribute((always_inline,unused))
-uint32_t word_is_zero(uint32_t a) {
-    uint32_t ret;
-    asm("subs %0, %1, #1;\n\tsbc %0, %0, %0" : "=r"(ret) : "r"(a) : "cc");
-    return ret;
-}
-
-static __inline__ __attribute((always_inline,unused))
-uint64_t widemul(uint32_t a, uint32_t b) {
-    /* Could be UMULL, but it's hard to express to CC that the registers must be different */
-    return ((uint64_t)a) * b; 
-}
-
-#endif /* __ARCH_ARM_32_ARCH_INTRINSICS_H__ */
-
diff --git a/crypto/ec/curve448/include/arch_neon/arch_intrinsics.h b/crypto/ec/curve448/include/arch_neon/arch_intrinsics.h
deleted file mode 100644
index 1a1e14b36c..0000000000
--- a/crypto/ec/curve448/include/arch_neon/arch_intrinsics.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* Copyright (c) 2016 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#ifndef __ARCH_NEON_ARCH_INTRINSICS_H__
-#define __ARCH_NEON_ARCH_INTRINSICS_H__
-
-#define ARCH_WORD_BITS 32
-
-static __inline__ __attribute((always_inline,unused))
-uint32_t word_is_zero(uint32_t a) {
-    uint32_t ret;
-    __asm__("subs %0, %1, #1;\n\tsbc %0, %0, %0" : "=r"(ret) : "r"(a) : "cc");
-    return ret;
-}
-
-static __inline__ __attribute((always_inline,unused))
-uint64_t widemul(uint32_t a, uint32_t b) {
-    /* Could be UMULL, but it's hard to express to CC that the registers must be different */
-    return ((uint64_t)a) * b; 
-}
-
-#endif /* __ARCH_NEON_ARCH_INTRINSICS_H__ */
-
diff --git a/crypto/ec/curve448/include/arch_ref64/arch_intrinsics.h b/crypto/ec/curve448/include/arch_ref64/arch_intrinsics.h
deleted file mode 100644
index 4b34ea5520..0000000000
--- a/crypto/ec/curve448/include/arch_ref64/arch_intrinsics.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Copyright (c) 2016 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#ifndef __ARCH_REF64_ARCH_INTRINSICS_H__
-#define __ARCH_REF64_ARCH_INTRINSICS_H__
-
-#define ARCH_WORD_BITS 64
-
-static __inline__ __attribute((always_inline,unused))
-uint64_t word_is_zero(uint64_t a) {
-    /* let's hope the compiler isn't clever enough to optimize this. */
-    return (((__uint128_t)a)-1)>>64;
-}
-
-static __inline__ __attribute((always_inline,unused))
-__uint128_t widemul(uint64_t a, uint64_t b) {
-    return ((__uint128_t)a) * b; 
-}
-
-#endif /* ARCH_REF64_ARCH_INTRINSICS_H__ */
-
diff --git a/crypto/ec/curve448/include/arch_x86_64/arch_intrinsics.h b/crypto/ec/curve448/include/arch_x86_64/arch_intrinsics.h
deleted file mode 100644
index 8fcf2c8dd4..0000000000
--- a/crypto/ec/curve448/include/arch_x86_64/arch_intrinsics.h
+++ /dev/null
@@ -1,305 +0,0 @@
-/* Copyright (c) 2014-2016 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#ifndef __ARCH_X86_64_ARCH_INTRINSICS_H__
-#define __ARCH_X86_64_ARCH_INTRINSICS_H__
-
-#define ARCH_WORD_BITS 64
-
-#include <stdint.h>
-
-/* FUTURE: autogenerate */
-static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b) {
-  uint64_t c,d;
-  #ifndef __BMI2__
-      __asm__ volatile
-          ("movq %[a], %%rax;"
-           "mulq %[b];"
-           : [c]"=&a"(c), [d]"=d"(d)
-           : [b]"m"(*b), [a]"m"(*a)
-           : "cc");
-  #else
-      __asm__ volatile
-          ("movq %[a], %%rdx;"
-           "mulx %[b], %[c], %[d];"
-           : [c]"=r"(c), [d]"=r"(d)
-           : [b]"m"(*b), [a]"m"(*a)
-           : "rdx");
-  #endif
-  return (((__uint128_t)(d))<<64) | c;
-}
-
-static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b) {
-  uint64_t c,d;
-  #ifndef __BMI2__
-      __asm__ volatile
-          ("movq %[a], %%rax;"
-           "mulq %[b];"
-           : [c]"=&a"(c), [d]"=d"(d)
-           : [b]"m"(*b), [a]"r"(a)
-           : "cc");
-  #else
-      __asm__ volatile
-          ("mulx %[b], %[c], %[d];"
-           : [c]"=r"(c), [d]"=r"(d)
-           : [b]"m"(*b), [a]"d"(a));
-  #endif
-  return (((__uint128_t)(d))<<64) | c;
-}
-
-static __inline__ __uint128_t widemul_rr(uint64_t a, uint64_t b) {
-  uint64_t c,d;
-  #ifndef __BMI2__
-      __asm__ volatile
-          ("mulq %[b];"
-           : [c]"=a"(c), [d]"=d"(d)
-           : [b]"r"(b), "a"(a)
-           : "cc");
-  #else
-      __asm__ volatile
-          ("mulx %[b], %[c], %[d];"
-           : [c]"=r"(c), [d]"=r"(d)
-           : [b]"r"(b), [a]"d"(a));
-  #endif
-  return (((__uint128_t)(d))<<64) | c;
-}
-
-static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b) {
-  uint64_t c,d;
-  #ifndef __BMI2__
-      __asm__ volatile
-          ("movq %[a], %%rax; "
-           "addq %%rax, %%rax; "
-           "mulq %[b];"
-           : [c]"=&a"(c), [d]"=d"(d)
-           : [b]"m"(*b), [a]"m"(*a)
-           : "cc");
-  #else
-      __asm__ volatile
-          ("movq %[a], %%rdx;"
-           "leaq (,%%rdx,2), %%rdx;"
-           "mulx %[b], %[c], %[d];"
-           : [c]"=r"(c), [d]"=r"(d)
-           : [b]"m"(*b), [a]"m"(*a)
-           : "rdx");
-  #endif
-  return (((__uint128_t)(d))<<64) | c;
-}
-
-static __inline__ void mac(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
-  uint64_t lo = *acc, hi = *acc>>64;
-  
-  #ifdef __BMI2__
-      uint64_t c,d;
-      __asm__ volatile
-          ("movq %[a], %%rdx; "
-           "mulx %[b], %[c], %[d]; "
-           "addq %[c], %[lo]; "
-           "adcq %[d], %[hi]; "
-           : [c]"=&r"(c), [d]"=&r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
-           : [b]"m"(*b), [a]"m"(*a)
-           : "rdx", "cc");
-  #else
-      __asm__ volatile
-          ("movq %[a], %%rax; "
-           "mulq %[b]; "
-           "addq %%rax, %[lo]; "
-           "adcq %%rdx, %[hi]; "
-           : [lo]"+r"(lo), [hi]"+r"(hi)
-           : [b]"m"(*b), [a]"m"(*a)
-           : "rax", "rdx", "cc");
-  #endif
-  
-  *acc = (((__uint128_t)(hi))<<64) | lo;
-}
-
-static __inline__ void macac(__uint128_t *acc, __uint128_t *acc2, const uint64_t *a, const uint64_t *b) {
-  uint64_t lo = *acc, hi = *acc>>64;
-  uint64_t lo2 = *acc2, hi2 = *acc2>>64;
-  
-  #ifdef __BMI2__
-      uint64_t c,d;
-      __asm__ volatile
-          ("movq %[a], %%rdx; "
-           "mulx %[b], %[c], %[d]; "
-           "addq %[c], %[lo]; "
-           "adcq %[d], %[hi]; "
-           "addq %[c], %[lo2]; "
-           "adcq %[d], %[hi2]; "
-           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
-           : [b]"m"(*b), [a]"m"(*a)
-           : "rdx", "cc");
-  #else
-      __asm__ volatile
-          ("movq %[a], %%rax; "
-           "mulq %[b]; "
-           "addq %%rax, %[lo]; "
-           "adcq %%rdx, %[hi]; "
-           "addq %%rax, %[lo2]; "
-           "adcq %%rdx, %[hi2]; "
-           : [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
-           : [b]"m"(*b), [a]"m"(*a)
-           : "rax", "rdx", "cc");
-  #endif
-  
-  *acc = (((__uint128_t)(hi))<<64) | lo;
-  *acc2 = (((__uint128_t)(hi2))<<64) | lo2;
-}
-
-static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b) {
-  uint64_t lo = *acc, hi = *acc>>64;
-  
-  #ifdef __BMI2__
-      uint64_t c,d;
-      __asm__ volatile
-          ("mulx %[b], %[c], %[d]; "
-           "addq %[c], %[lo]; "
-           "adcq %[d], %[hi]; "
-           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
-           : [b]"m"(*b), [a]"d"(a)
-           : "cc");
-  #else
-      __asm__ volatile
-          ("movq %[a], %%rax; "
-           "mulq %[b]; "
-           "addq %%rax, %[lo]; "
-           "adcq %%rdx, %[hi]; "
-           : [lo]"+r"(lo), [hi]"+r"(hi)
-           : [b]"m"(*b), [a]"r"(a)
-           : "rax", "rdx", "cc");
-  #endif
-  
-  *acc = (((__uint128_t)(hi))<<64) | lo;
-}
-
-static __inline__ void mac_rr(__uint128_t *acc, uint64_t a, const uint64_t b) {
-  uint64_t lo = *acc, hi = *acc>>64;
-  
-  #ifdef __BMI2__
-      uint64_t c,d;
-      __asm__ volatile
-          ("mulx %[b], %[c], %[d]; "
-           "addq %[c], %[lo]; "
-           "adcq %[d], %[hi]; "
-           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
-           : [b]"r"(b), [a]"d"(a)
-           : "cc");
-  #else
-      __asm__ volatile
-          ("mulq %[b]; "
-           "addq %%rax, %[lo]; "
-           "adcq %%rdx, %[hi]; "
-           : [lo]"+r"(lo), [hi]"+r"(hi), "+a"(a)
-           : [b]"r"(b)
-           : "rdx", "cc");
-  #endif
-  
-  *acc = (((__uint128_t)(hi))<<64) | lo;
-}
-
-static __inline__ void mac2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
-  uint64_t lo = *acc, hi = *acc>>64;
-  
-  #ifdef __BMI2__
-      uint64_t c,d;
-      __asm__ volatile
-          ("movq %[a], %%rdx; "
-           "addq %%rdx, %%rdx; "
-           "mulx %[b], %[c], %[d]; "
-           "addq %[c], %[lo]; "
-           "adcq %[d], %[hi]; "
-           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
-           : [b]"m"(*b), [a]"m"(*a)
-           : "rdx", "cc");
-  #else
-      __asm__ volatile
-          ("movq %[a], %%rax; "
-           "addq %%rax, %%rax; "
-           "mulq %[b]; "
-           "addq %%rax, %[lo]; "
-           "adcq %%rdx, %[hi]; "
-           : [lo]"+r"(lo), [hi]"+r"(hi)
-           : [b]"m"(*b), [a]"m"(*a)
-           : "rax", "rdx", "cc");
-  #endif
-  
-  *acc = (((__uint128_t)(hi))<<64) | lo;
-}
-
-static __inline__ void msb(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
-  uint64_t lo = *acc, hi = *acc>>64;
-  #ifdef __BMI2__
-      uint64_t c,d;
-      __asm__ volatile
-          ("movq %[a], %%rdx; "
-           "mulx %[b], %[c], %[d]; "
-           "subq %[c], %[lo]; "
-           "sbbq %[d], %[hi]; "
-           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
-           : [b]"m"(*b), [a]"m"(*a)
-           : "rdx", "cc");
-  #else
-      __asm__ volatile
-          ("movq %[a], %%rax; "
-           "mulq %[b]; "
-           "subq %%rax, %[lo]; "
-           "sbbq %%rdx, %[hi]; "
-           : [lo]"+r"(lo), [hi]"+r"(hi)
-           : [b]"m"(*b), [a]"m"(*a)
-           : "rax", "rdx", "cc");
-  #endif
-  *acc = (((__uint128_t)(hi))<<64) | lo;
-}
-
-static __inline__ void msb2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
-  uint64_t lo = *acc, hi = *acc>>64;
-  #ifdef __BMI2__
-      uint64_t c,d;
-      __asm__ volatile
-          ("movq %[a], %%rdx; "
-           "addq %%rdx, %%rdx; "
-           "mulx %[b], %[c], %[d]; "
-           "subq %[c], %[lo]; "
-           "sbbq %[d], %[hi]; "
-           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
-           : [b]"m"(*b), [a]"m"(*a)
-           : "rdx", "cc");
-  #else
-      __asm__ volatile
-          ("movq %[a], %%rax; "
-           "addq %%rax, %%rax; "
-           "mulq %[b]; "
-           "subq %%rax, %[lo]; "
-           "sbbq %%rdx, %[hi]; "
-           : [lo]"+r"(lo), [hi]"+r"(hi)
-           : [b]"m"(*b), [a]"m"(*a)
-           : "rax", "rdx", "cc");
-  #endif
-  *acc = (((__uint128_t)(hi))<<64) | lo;
-  
-}
-
-static __inline__ void mrs(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
-  uint64_t c,d, lo = *acc, hi = *acc>>64;
-  __asm__ volatile
-      ("movq %[a], %%rdx; "
-       "mulx %[b], %[c], %[d]; "
-       "subq %[lo], %[c]; "
-       "sbbq %[hi], %[d]; "
-       : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
-       : [b]"m"(*b), [a]"m"(*a)
-       : "rdx", "cc");
-  *acc = (((__uint128_t)(d))<<64) | c;
-}
-
-static __inline__ uint64_t word_is_zero(uint64_t x) {
-  __asm__ volatile("neg %0; sbb %0, %0;" : "+r"(x));
-  return ~x;
-}
-
-static inline uint64_t shrld(__uint128_t x, int n) {
-    return x>>n;
-}
-
-#endif /* __ARCH_X86_64_ARCH_INTRINSICS_H__ */
diff --git a/crypto/ec/curve448/include/constant_time.h b/crypto/ec/curve448/include/constant_time.h
deleted file mode 100644
index 025ffe1729..0000000000
--- a/crypto/ec/curve448/include/constant_time.h
+++ /dev/null
@@ -1,362 +0,0 @@
-/**
- * @file constant_time.h
- * @copyright
- *   Copyright (c) 2014 Cryptography Research, Inc.  \n
- *   Released under the MIT License.  See LICENSE.txt for license information.
- * @author Mike Hamburg
- *
- * @brief Constant-time routines.
- */
-
-#ifndef __CONSTANT_TIME_H__
-#define __CONSTANT_TIME_H__ 1
-
-#include "word.h"
-#include <string.h>
-
-/*
- * Constant-time operations on hopefully-compile-time-sized memory
- * regions.  Needed for flexibility / demagication: not all fields
- * have sizes which are multiples of the vector width, necessitating
- * a change from the Ed448 versions.
- *
- * These routines would be much simpler to define at the byte level,
- * but if not vectorized they would be a significant fraction of the
- * runtime.  Eg on NEON-less ARM, constant_time_lookup is like 15% of
- * signing time, vs 6% on Haswell with its fancy AVX2 vectors.
- *
- * If the compiler could do a good job of autovectorizing the code,
- * we could just leave it with the byte definition.  But that's unlikely
- * on most deployed compilers, especially if you consider that pcmpeq[size]
- * is much faster than moving a scalar to the vector unit (which is what
- * a naive autovectorizer will do with constant_time_lookup on Intel).
- *
- * Instead, we're putting our trust in the loop unroller and unswitcher.
- */
-
-
-/**
- * Unaligned big (vector?) register.
- */
-typedef struct {
-    big_register_t unaligned;
-} __attribute__((packed)) unaligned_br_t;
-
-/**
- * Unaligned word register, for architectures where that matters.
- */
-typedef struct {
-    word_t unaligned;
-} __attribute__((packed)) unaligned_word_t;
-
-/**
- * @brief Constant-time conditional swap.
- *
- * If doswap, then swap elem_bytes between *a and *b.
- *
- * *a and *b must not alias.  Also, they must be at least as aligned
- * as their sizes, if the CPU cares about that sort of thing.
- */
-static __inline__ void
-__attribute__((unused,always_inline))
-constant_time_cond_swap (
-    void *__restrict__ a_,
-    void *__restrict__ b_,
-    word_t elem_bytes,
-    mask_t doswap
-) {
-    word_t k;
-    unsigned char *a = (unsigned char *)a_;
-    unsigned char *b = (unsigned char *)b_;
-    
-    big_register_t br_mask = br_set_to_mask(doswap);
-    for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
-        if (elem_bytes % sizeof(big_register_t)) {
-            /* unaligned */
-            big_register_t xor =
-                ((unaligned_br_t*)(&a[k]))->unaligned
-              ^ ((unaligned_br_t*)(&b[k]))->unaligned;
-            xor &= br_mask;
-            ((unaligned_br_t*)(&a[k]))->unaligned ^= xor;
-            ((unaligned_br_t*)(&b[k]))->unaligned ^= xor;
-        } else {
-            /* aligned */
-            big_register_t xor =
-                *((big_register_t*)(&a[k]))
-              ^ *((big_register_t*)(&b[k]));
-            xor &= br_mask;
-            *((big_register_t*)(&a[k])) ^= xor;
-            *((big_register_t*)(&b[k])) ^= xor;
-        }
-    }
-
-    if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
-        for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
-            if (elem_bytes % sizeof(word_t)) {
-                /* unaligned */
-                word_t xor =
-                    ((unaligned_word_t*)(&a[k]))->unaligned
-                  ^ ((unaligned_word_t*)(&b[k]))->unaligned;
-                xor &= doswap;
-                ((unaligned_word_t*)(&a[k]))->unaligned ^= xor;
-                ((unaligned_word_t*)(&b[k]))->unaligned ^= xor;
-            } else {
-                /* aligned */
-                word_t xor =
-                    *((word_t*)(&a[k]))
-                  ^ *((word_t*)(&b[k]));
-                xor &= doswap;
-                *((word_t*)(&a[k])) ^= xor;
-                *((word_t*)(&b[k])) ^= xor;
-            }
-        }
-    }
-    
-    if (elem_bytes % sizeof(word_t)) {
-        for (; k<elem_bytes; k+=1) {
-            unsigned char xor = a[k] ^ b[k];
-            xor &= doswap;
-            a[k] ^= xor;
-            b[k] ^= xor;
-        }
-    }
-}
-
-/**
- * @brief Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
- *
- * The table must be at least as aligned as elem_bytes.  The output must be word aligned,
- * and if the input size is vector aligned it must also be vector aligned.
- *
- * The table and output must not alias.
- */
-static __inline__ void
-__attribute__((unused,always_inline))
-constant_time_lookup (
-    void *__restrict__ out_,
-    const void *table_,
-    word_t elem_bytes,
-    word_t n_table,
-    word_t idx
-) {
-    big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
-    
-    /* Can't do pointer arithmetic on void* */
-    unsigned char *out = (unsigned char *)out_;
-    const unsigned char *table = (const unsigned char *)table_;
-    word_t j,k;
-    
-    memset(out, 0, elem_bytes);
-    for (j=0; j<n_table; j++, big_i-=big_one) {        
-        big_register_t br_mask = br_is_zero(big_i);
-        for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
-            if (elem_bytes % sizeof(big_register_t)) {
-                /* unaligned */
-                ((unaligned_br_t *)(out+k))->unaligned
-			|= br_mask & ((const unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned;
-            } else {
-                /* aligned */
-                *(big_register_t *)(out+k) |= br_mask & *(const big_register_t*)(&table[k+j*elem_bytes]);
-            }
-        }
-
-        word_t mask = word_is_zero(idx^j);
-        if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
-            for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
-                if (elem_bytes % sizeof(word_t)) {
-                    /* input unaligned, output aligned */
-                    *(word_t *)(out+k) |= mask & ((const unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned;
-                } else {
-                    /* aligned */
-                    *(word_t *)(out+k) |= mask & *(const word_t*)(&table[k+j*elem_bytes]);
-                }
-            }
-        }
-        
-        if (elem_bytes % sizeof(word_t)) {
-            for (; k<elem_bytes; k+=1) {
-                out[k] |= mask & table[k+j*elem_bytes];
-            }
-        }
-    }
-}
-
-/**
- * @brief Constant-time equivalent of memcpy(table + elem_bytes*idx, in, elem_bytes);
- *
- * The table must be at least as aligned as elem_bytes.  The input must be word aligned,
- * and if the output size is vector aligned it must also be vector aligned.
- *
- * The table and input must not alias.
- */
-static __inline__ void
-__attribute__((unused,always_inline))
-constant_time_insert (
-    void *__restrict__ table_,
-    const void *in_,
-    word_t elem_bytes,
-    word_t n_table,
-    word_t idx
-) {
-    big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
-    
-    /* Can't do pointer arithmetic on void* */
-    const unsigned char *in = (const unsigned char *)in_;
-    unsigned char *table = (unsigned char *)table_;
-    word_t j,k;
-    
-    for (j=0; j<n_table; j++, big_i-=big_one) {        
-        big_register_t br_mask = br_is_zero(big_i);
-        for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
-            if (elem_bytes % sizeof(big_register_t)) {
-                /* unaligned */
-                ((unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned
-                    = ( ((unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned & ~br_mask )
-                    | ( ((const unaligned_br_t *)(in+k))->unaligned & br_mask );
-            } else {
-                /* aligned */
-                *(big_register_t*)(&table[k+j*elem_bytes])
-                    = ( *(big_register_t*)(&table[k+j*elem_bytes]) & ~br_mask )
-                    | ( *(const big_register_t *)(in+k) & br_mask );
-            }
-        }
-
-        word_t mask = word_is_zero(idx^j);
-        if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
-            for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
-                if (elem_bytes % sizeof(word_t)) {
-                    /* output unaligned, input aligned */
-                    ((unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned
-                        = ( ((unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned & ~mask )
-                        | ( *(const word_t *)(in+k) & mask );
-                } else {
-                    /* aligned */
-                    *(word_t*)(&table[k+j*elem_bytes])
-                        = ( *(word_t*)(&table[k+j*elem_bytes]) & ~mask )
-                        | ( *(const word_t *)(in+k) & mask );
-                }
-            }
-        }
-        
-        if (elem_bytes % sizeof(word_t)) {
-            for (; k<elem_bytes; k+=1) {
-                table[k+j*elem_bytes]
-                    = ( table[k+j*elem_bytes] & ~mask )
-                    | ( in[k] & mask );
-            }
-        }
-    }
-}
-
-/**
- * @brief Constant-time a = b&mask.
- *
- * The input and output must be at least as aligned as elem_bytes.
- */
-static __inline__ void
-__attribute__((unused,always_inline))
-constant_time_mask (
-    void * a_,
-    const void *b_,
-    word_t elem_bytes,
-    mask_t mask
-) {
-    unsigned char *a = (unsigned char *)a_;
-    const unsigned char *b = (const unsigned char *)b_;
-    
-    word_t k;
-    big_register_t br_mask = br_set_to_mask(mask);
-    for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
-        if (elem_bytes % sizeof(big_register_t)) {
-            /* unaligned */
-            ((unaligned_br_t*)(&a[k]))->unaligned = br_mask & ((const unaligned_br_t*)(&b[k]))->unaligned;
-        } else {
-            /* aligned */
-            *(big_register_t *)(a+k) = br_mask & *(const big_register_t*)(&b[k]);
-        }
-    }
-
-    if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
-        for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
-            if (elem_bytes % sizeof(word_t)) {
-                /* unaligned */
-                ((unaligned_word_t*)(&a[k]))->unaligned = mask & ((const unaligned_word_t*)(&b[k]))->unaligned;
-            } else {
-                /* aligned */
-                *(word_t *)(a+k) = mask & *(const word_t*)(&b[k]);
-            }
-        }
-    }
-    
-    if (elem_bytes % sizeof(word_t)) {
-        for (; k<elem_bytes; k+=1) {
-            a[k] = mask & b[k];
-        }
-    }
-}
-
-/**
- * @brief Constant-time a = mask ? bTrue : bFalse.
- *
- * The input and output must be at least as aligned as alignment_bytes
- * or their size, whichever is smaller.
- *
- * Note that the output is not __restrict__, but if it overlaps either
- * input, it must be equal and not partially overlap.
- */
-static __inline__ void
-__attribute__((unused,always_inline))
-constant_time_select (
-    void *a_,
-    const void *bFalse_,
-    const void *bTrue_,
-    word_t elem_bytes,
-    mask_t mask,
-    size_t alignment_bytes
-) {
-    unsigned char *a = (unsigned char *)a_;
-    const unsigned char *bTrue = (const unsigned char *)bTrue_;
-    const unsigned char *bFalse = (const unsigned char *)bFalse_;
-    
-    alignment_bytes |= elem_bytes;
-
-    word_t k;
-    big_register_t br_mask = br_set_to_mask(mask);
-    for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
-        if (alignment_bytes % sizeof(big_register_t)) {
-            /* unaligned */
-            ((unaligned_br_t*)(&a[k]))->unaligned =
-		  ( br_mask & ((const unaligned_br_t*)(&bTrue [k]))->unaligned)
-		| (~br_mask & ((const unaligned_br_t*)(&bFalse[k]))->unaligned);
-        } else {
-            /* aligned */
-            *(big_register_t *)(a+k) =
-		  ( br_mask & *(const big_register_t*)(&bTrue [k]))
-		| (~br_mask & *(const big_register_t*)(&bFalse[k]));
-        }
-    }
-
-    if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
-        for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
-            if (alignment_bytes % sizeof(word_t)) {
-                /* unaligned */
-                ((unaligned_word_t*)(&a[k]))->unaligned =
-		    ( mask & ((const unaligned_word_t*)(&bTrue [k]))->unaligned)
-		  | (~mask & ((const unaligned_word_t*)(&bFalse[k]))->unaligned);
-            } else {
-                /* aligned */
-                *(word_t *)(a+k) =
-		    ( mask & *(const word_t*)(&bTrue [k]))
-		  | (~mask & *(const word_t*)(&bFalse[k]));
-            }
-        }
-    }
-    
-    if (elem_bytes % sizeof(word_t)) {
-        for (; k<elem_bytes; k+=1) {
-            a[k] = ( mask & bTrue[k]) | (~mask & bFalse[k]);
-        }
-    }
-}
-
-#endif /* __CONSTANT_TIME_H__ */
diff --git a/crypto/ec/curve448/include/field.h b/crypto/ec/curve448/include/field.h
deleted file mode 100644
index c536a51b66..0000000000
--- a/crypto/ec/curve448/include/field.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/**
- * @file field.h
- * @brief Generic gf header.
- * @copyright
- *   Copyright (c) 2014 Cryptography Research, Inc.  \n
- *   Released under the MIT License.  See LICENSE.txt for license information.
- * @author Mike Hamburg
- */
-
-#ifndef __GF_H__
-#define __GF_H__
-
-#include "constant_time.h"
-#include "f_field.h"
-#include <string.h>
-    
-/** Square x, n times. */
-static DECAF_INLINE void gf_sqrn (
-    gf_s *__restrict__ y,
-    const gf x,
-    int n
-) {
-    gf tmp;
-    assert(n>0);
-    if (n&1) {
-        gf_sqr(y,x);
-        n--;
-    } else {
-        gf_sqr(tmp,x);
-        gf_sqr(y,tmp);
-        n-=2;
-    }
-    for (; n; n-=2) {
-        gf_sqr(tmp,y);
-        gf_sqr(y,tmp);
-    }
-}
-
-#define gf_add_nr gf_add_RAW
-
-/** Subtract mod p.  Bias by 2 and don't reduce  */
-static inline void gf_sub_nr ( gf c, const gf a, const gf b ) {
-    gf_sub_RAW(c,a,b);
-    gf_bias(c, 2);
-    if (GF_HEADROOM < 3) gf_weak_reduce(c);
-}
-
-/** Subtract mod p. Bias by amt but don't reduce.  */
-static inline void gf_subx_nr ( gf c, const gf a, const gf b, int amt ) {
-    gf_sub_RAW(c,a,b);
-    gf_bias(c, amt);
-    if (GF_HEADROOM < amt+1) gf_weak_reduce(c);
-}
-
-/** Mul by signed int.  Not constant-time WRT the sign of that int. */
-static inline void gf_mulw(gf c, const gf a, int32_t w) {
-    if (w>0) {
-        gf_mulw_unsigned(c, a, w);
-    } else {
-        gf_mulw_unsigned(c, a, -w);
-        gf_sub(c,ZERO,c);
-    }
-}
-
-/** Constant time, x = is_z ? z : y */
-static inline void gf_cond_sel(gf x, const gf y, const gf z, mask_t is_z) {
-    constant_time_select(x,y,z,sizeof(gf),is_z,0);
-}
-
-/** Constant time, if (neg) x=-x; */
-static inline void gf_cond_neg(gf x, mask_t neg) {
-    gf y;
-    gf_sub(y,ZERO,x);
-    gf_cond_sel(x,x,y,neg);
-}
-
-/** Constant time, if (swap) (x,y) = (y,x); */
-static inline void
-gf_cond_swap(gf x, gf_s *__restrict__ y, mask_t swap) {
-    constant_time_cond_swap(x,y,sizeof(gf_s),swap);
-}
-
-static DECAF_INLINE void gf_mul_qnr(gf_s *__restrict__ out, const gf x) {
-#if P_MOD_8 == 5
-    /* r = QNR * r0^2 */
-    gf_mul(out,x,SQRT_MINUS_ONE);
-#elif P_MOD_8 == 3 || P_MOD_8 == 7
-    gf_sub(out,ZERO,x);
-#else
-    #error "Only supporting p=3,5,7 mod 8"
-#endif
-}
-
-static DECAF_INLINE void gf_div_qnr(gf_s *__restrict__ out, const gf x) {
-#if P_MOD_8 == 5
-    /* r = QNR * r0^2 */
-    gf_mul(out,x,SQRT_MINUS_ONE);
-    gf_sub(out,ZERO,out);
-#elif P_MOD_8 == 3 || P_MOD_8 == 7
-    gf_sub(out,ZERO,x);
-#else
-    #error "Only supporting p=3,5,7 mod 8"
-#endif
-}
-
-#if P_MOD_8 == 5
-#define gf_mul_i gf_mul_qnr
-#define gf_div_i gf_div_qnr
-#endif
-
-
-#endif // __GF_H__
diff --git a/crypto/ec/curve448/include/keccak_internal.h b/crypto/ec/curve448/include/keccak_internal.h
deleted file mode 100644
index 15d1be4507..0000000000
--- a/crypto/ec/curve448/include/keccak_internal.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * @cond internal
- * @file keccak_internal.h
- * @copyright
- *   Copyright (c) 2016 Cryptography Research, Inc.  \n
- *   Released under the MIT License.  See LICENSE.txt for license information.
- * @author Mike Hamburg
- * @brief Keccak internal interfaces.  Will be used by STROBE once reintegrated.
- */
-#ifndef __DECAF_KECCAK_INTERNAL_H__
-#define __DECAF_KECCAK_INTERNAL_H__ 1
-
-#include <stdint.h>
-
-/* The internal, non-opaque definition of the decaf_sponge struct. */
-typedef union {
-    uint64_t w[25]; uint8_t b[25*8];
-} kdomain_t[1];
-
-typedef struct decaf_kparams_s {
-    uint8_t position, flags, rate, start_round, pad, rate_pad, max_out, remaining;
-} decaf_kparams_s, decaf_kparams_t[1];
-
-typedef struct decaf_keccak_sponge_s {
-    kdomain_t state;
-    decaf_kparams_t params;
-} decaf_keccak_sponge_s, decaf_keccak_sponge_t[1];
-
-#define INTERNAL_SPONGE_STRUCT 1
-
-void __attribute__((noinline)) keccakf(kdomain_t state, uint8_t start_round);
-
-static inline void dokeccak (decaf_keccak_sponge_t decaf_sponge) {
-    keccakf(decaf_sponge->state, decaf_sponge->params->start_round);
-    decaf_sponge->params->position = 0;
-}
-
-#endif /* __DECAF_KECCAK_INTERNAL_H__ */
diff --git a/crypto/ec/curve448/include/portable_endian.h b/crypto/ec/curve448/include/portable_endian.h
deleted file mode 100644
index 5cbfca7aac..0000000000
--- a/crypto/ec/curve448/include/portable_endian.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Subset of Mathias PanzenbÃ¶ck's portable endian code, public domain */
-
-#ifndef __PORTABLE_ENDIAN_H__
-#define __PORTABLE_ENDIAN_H__
-
-#if defined(__linux__) || defined(__CYGWIN__)
-#	include <endian.h>
-#elif defined(__OpenBSD__)
-#	include <sys/endian.h>
-#elif defined(__APPLE__)
-#	include <libkern/OSByteOrder.h>
-#	define htole64(x) OSSwapHostToLittleInt64(x)
-#	define le64toh(x) OSSwapLittleToHostInt64(x)
-#elif defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
-#	include <sys/endian.h>
-#	ifndef le64toh
-#		define le64toh(x) letoh64(x)
-#	endif
-#elif defined(__sun) && defined(__SVR4)
-#	include <sys/byteorder.h>
-#	define htole64(x) LE_64(x)
-#	define le64toh(x) LE_64(x)
-#elif defined(_WIN16) || defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
-#	include <winsock2.h>
-#	include <sys/param.h>
-#	if BYTE_ORDER == LITTLE_ENDIAN
-#		define htole64(x) (x)
-#		define le64toh(x) (x)
-#	elif BYTE_ORDER == BIG_ENDIAN
-#		define htole64(x) __builtin_bswap64(x)
-#		define le64toh(x) __builtin_bswap64(x)
-#	else
-#		error byte order not supported
-#	endif
-#else
-#	error platform not supported
-#endif
-
-#endif // __PORTABLE_ENDIAN_H__
diff --git a/crypto/ec/curve448/include/word.h b/crypto/ec/curve448/include/word.h
deleted file mode 100644
index 7c7644ad2c..0000000000
--- a/crypto/ec/curve448/include/word.h
+++ /dev/null
@@ -1,281 +0,0 @@
-/* Copyright (c) 2014 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#ifndef __WORD_H__
-#define __WORD_H__
-
-/* for posix_memalign */
-#define _XOPEN_SOURCE 600
-#define __STDC_WANT_LIB_EXT1__ 1 /* for memset_s */
-#include <string.h>
-#if defined(__sun) && defined(__SVR4)
-extern int posix_memalign(void **, size_t, size_t);
-#endif
-
-#include <assert.h>
-#include <stdint.h>
-#include "arch_intrinsics.h"
-
-#include <decaf/common.h>
-
-#ifndef _BSD_SOURCE
-#define _BSD_SOURCE 1
-#endif
-
-#ifndef _DEFAULT_SOURCE
-#define _DEFAULT_SOURCE 1
-#endif
-
-#include "portable_endian.h"
-
-#include <stdlib.h>
-#include <sys/types.h>
-#include <inttypes.h>
-
-#if defined(__ARM_NEON__)
-#include <arm_neon.h>
-#elif defined(__SSE2__)
-    #if !defined(__GNUC__) || __clang__ || __GNUC__ >= 5 || (__GNUC__==4 && __GNUC_MINOR__ >= 4)
-        #include <immintrin.h>
-    #else
-        #include <emmintrin.h>
-    #endif
-#endif
-
-#if (ARCH_WORD_BITS == 64)
-    typedef uint64_t word_t, mask_t;
-    typedef __uint128_t dword_t;
-    typedef int32_t hsword_t;
-    typedef int64_t sword_t;
-    typedef __int128_t dsword_t;
-#elif (ARCH_WORD_BITS == 32)
-    typedef uint32_t word_t, mask_t;
-    typedef uint64_t dword_t;
-    typedef int16_t hsword_t;
-    typedef int32_t sword_t;
-    typedef int64_t dsword_t;
-#else
-    #error "For now, libdecaf only supports 32- and 64-bit architectures."
-#endif
-    
-/* Scalar limbs are keyed off of the API word size instead of the arch word size. */
-#if DECAF_WORD_BITS == 64
-    #define SC_LIMB(x) (x##ull)
-#elif DECAF_WORD_BITS == 32
-    #define SC_LIMB(x) ((uint32_t)x##ull),(x##ull>>32)
-#else
-    #error "For now, libdecaf only supports 32- and 64-bit architectures."
-#endif
-
-#ifdef __ARM_NEON__
-    typedef uint32x4_t vecmask_t;
-#elif __clang__
-    typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2)));
-    typedef int64_t  int64x2_t __attribute__((ext_vector_type(2)));
-    typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4)));
-    typedef int64_t  int64x4_t __attribute__((ext_vector_type(4)));
-    typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4)));
-    typedef int32_t  int32x4_t __attribute__((ext_vector_type(4)));
-    typedef uint32_t uint32x2_t __attribute__((ext_vector_type(2)));
-    typedef int32_t  int32x2_t __attribute__((ext_vector_type(2)));
-    typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8)));
-    typedef int32_t  int32x8_t __attribute__((ext_vector_type(8)));
-    typedef word_t vecmask_t __attribute__((ext_vector_type(4)));
-#else /* GCC, hopefully? */
-    typedef uint64_t uint64x2_t __attribute__((vector_size(16)));
-    typedef int64_t  int64x2_t __attribute__((vector_size(16)));
-    typedef uint64_t uint64x4_t __attribute__((vector_size(32)));
-    typedef int64_t  int64x4_t __attribute__((vector_size(32)));
-    typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
-    typedef int32_t  int32x4_t __attribute__((vector_size(16)));
-    typedef uint32_t uint32x2_t __attribute__((vector_size(8)));
-    typedef int32_t  int32x2_t __attribute__((vector_size(8)));
-    typedef uint32_t uint32x8_t __attribute__((vector_size(32)));
-    typedef int32_t  int32x8_t __attribute__((vector_size(32)));
-    typedef word_t vecmask_t __attribute__((vector_size(32)));
-#endif
-
-#if __AVX2__
-    #define VECTOR_ALIGNED __attribute__((aligned(32)))
-    typedef uint32x8_t big_register_t;
-    typedef uint64x4_t uint64xn_t;
-    typedef uint32x8_t uint32xn_t;
-
-    static DECAF_INLINE big_register_t
-    br_set_to_mask(mask_t x) {
-        uint32_t y = (uint32_t)x;
-        big_register_t ret = {y,y,y,y,y,y,y,y};
-        return ret;
-    }
-#elif __SSE2__
-    #define VECTOR_ALIGNED __attribute__((aligned(16)))
-    typedef uint32x4_t big_register_t;
-    typedef uint64x2_t uint64xn_t;
-    typedef uint32x4_t uint32xn_t;
-
-    static DECAF_INLINE big_register_t
-    br_set_to_mask(mask_t x) {
-        uint32_t y = x;
-        big_register_t ret = {y,y,y,y};
-        return ret;
-    }
-#elif __ARM_NEON__
-    #define VECTOR_ALIGNED __attribute__((aligned(16)))
-    typedef uint32x4_t big_register_t;
-    typedef uint64x2_t uint64xn_t;
-    typedef uint32x4_t uint32xn_t;
-    
-    static DECAF_INLINE big_register_t
-    br_set_to_mask(mask_t x) {
-        return vdupq_n_u32(x);
-    }
-#elif _WIN64 || __amd64__ || __X86_64__ || __aarch64__
-    #define VECTOR_ALIGNED __attribute__((aligned(8)))
-    typedef uint64_t big_register_t, uint64xn_t;
-
-    typedef uint32_t uint32xn_t;
-    static DECAF_INLINE big_register_t
-    br_set_to_mask(mask_t x) {
-        return (big_register_t)x;
-    }
-#else
-    #define VECTOR_ALIGNED __attribute__((aligned(4)))
-    typedef uint64_t uint64xn_t;
-    typedef uint32_t uint32xn_t;
-    typedef uint32_t big_register_t;
-
-    static DECAF_INLINE big_register_t
-    br_set_to_mask(mask_t x) {
-        return (big_register_t)x;
-    }
-#endif
-
-typedef struct {
-    uint64xn_t unaligned;
-} __attribute__((packed)) unaligned_uint64xn_t;
-
-typedef struct {
-    uint32xn_t unaligned;
-} __attribute__((packed)) unaligned_uint32xn_t;
-
-#if __AVX2__
-    static DECAF_INLINE big_register_t
-    br_is_zero(big_register_t x) {
-        return (big_register_t)(x == br_set_to_mask(0));
-    }
-#elif __SSE2__
-    static DECAF_INLINE big_register_t
-    br_is_zero(big_register_t x) {
-        return (big_register_t)_mm_cmpeq_epi32((__m128i)x, _mm_setzero_si128());
-        //return (big_register_t)(x == br_set_to_mask(0));
-    }
-#elif __ARM_NEON__
-    static DECAF_INLINE big_register_t
-    br_is_zero(big_register_t x) {
-        return vceqq_u32(x,x^x);
-    }
-#else
-    #define br_is_zero word_is_zero
-#endif
-
-/**
- * Really call memset, in a way that prevents the compiler from optimizing it out.
- * @param p The object to zeroize.
- * @param c The char to set it to (probably zero).
- * @param s The size of the object.
- */
-#if defined(__DARWIN_C_LEVEL) || defined(__STDC_LIB_EXT1__)
-#define HAS_MEMSET_S
-#endif
-
-#if !defined(__STDC_WANT_LIB_EXT1__) || __STDC_WANT_LIB_EXT1__ != 1
-#define NEED_MEMSET_S_EXTERN
-#endif
-
-#ifdef HAS_MEMSET_S
-    #ifdef NEED_MEMSET_S_EXTERN
-        extern int memset_s(void *, size_t, int, size_t);
-    #endif
-    static DECAF_INLINE void
-    really_memset(void *p, char c, size_t s) {
-        memset_s(p, s, c, s);
-    }
-#else
-    /* PERF: use words? */
-    static DECAF_INLINE void
-    really_memset(void *p, char c, size_t s) {
-        volatile char *pv = (volatile char *)p;
-        size_t i;
-        for (i=0; i<s; i++) pv[i] = c;
-    }
-#endif
-
-/**
- * Allocate memory which is sufficiently aligned to be used for the
- * largest vector on the system (for now that's a big_register_t).
- *
- * Man malloc says that it does this, but at least for AVX2 on MacOS X,
- * it's lying.
- *
- * @param size The size of the region to allocate.
- * @return A suitable pointer, which can be free'd with free(),
- * or NULL if no memory can be allocated.
- */
-static DECAF_INLINE void *
-malloc_vector(size_t size) {
-    void *out = NULL;
-    
-    int ret = posix_memalign(&out, sizeof(big_register_t), size);
-    
-    if (ret) {
-        return NULL;
-    } else {
-        return out;
-    }
-}
-
-/* PERF: vectorize vs unroll */
-#ifdef __clang__
-#if 100*__clang_major__ + __clang_minor__ > 305
-#define UNROLL _Pragma("clang loop unroll(full)")
-#endif
-#endif
-
-#ifndef UNROLL
-#define UNROLL
-#endif
-
-/* The plan on booleans:
- *
- * The external interface uses decaf_bool_t, but this might be a different
- * size than our particular arch's word_t (and thus mask_t).  Also, the caller
- * isn't guaranteed to pass it as nonzero.  So bool_to_mask converts word sizes
- * and checks nonzero.
- *
- * On the flip side, mask_t is always -1 or 0, but it might be a different size
- * than decaf_bool_t.
- *
- * On the third hand, we have success vs boolean types, but that's handled in
- * common.h: it converts between decaf_bool_t and decaf_error_t.
- */
-static DECAF_INLINE decaf_bool_t mask_to_bool (mask_t m) {
-    return (decaf_sword_t)(sword_t)m;
-}
-
-static DECAF_INLINE mask_t bool_to_mask (decaf_bool_t m) {
-    /* On most arches this will be optimized to a simple cast. */
-    mask_t ret = 0;
-    unsigned int limit = sizeof(decaf_bool_t)/sizeof(mask_t);
-    if (limit < 1) limit = 1;
-    for (unsigned int i=0; i<limit; i++) {
-        ret |= ~ word_is_zero(m >> (i*8*sizeof(word_t)));
-    }
-    return ret;
-}
-
-static DECAF_INLINE void ignore_result ( decaf_bool_t boo ) {
-    (void)boo;
-}
-
-#endif /* __WORD_H__ */
diff --git a/crypto/ec/curve448/keccak_internal.h b/crypto/ec/curve448/keccak_internal.h
new file mode 100644
index 0000000000..15d1be4507
--- /dev/null
+++ b/crypto/ec/curve448/keccak_internal.h
@@ -0,0 +1,38 @@
+/**
+ * @cond internal
+ * @file keccak_internal.h
+ * @copyright
+ *   Copyright (c) 2016 Cryptography Research, Inc.  \n
+ *   Released under the MIT License.  See LICENSE.txt for license information.
+ * @author Mike Hamburg
+ * @brief Keccak internal interfaces.  Will be used by STROBE once reintegrated.
+ */
+#ifndef __DECAF_KECCAK_INTERNAL_H__
+#define __DECAF_KECCAK_INTERNAL_H__ 1
+
+#include <stdint.h>
+
+/* The internal, non-opaque definition of the decaf_sponge struct. */
+typedef union {
+    uint64_t w[25]; uint8_t b[25*8];
+} kdomain_t[1];
+
+typedef struct decaf_kparams_s {
+    uint8_t position, flags, rate, start_round, pad, rate_pad, max_out, remaining;
+} decaf_kparams_s, decaf_kparams_t[1];
+
+typedef struct decaf_keccak_sponge_s {
+    kdomain_t state;
+    decaf_kparams_t params;
+} decaf_keccak_sponge_s, decaf_keccak_sponge_t[1];
+
+#define INTERNAL_SPONGE_STRUCT 1
+
+void __attribute__((noinline)) keccakf(kdomain_t state, uint8_t start_round);
+
+static inline void dokeccak (decaf_keccak_sponge_t decaf_sponge) {
+    keccakf(decaf_sponge->state, decaf_sponge->params->start_round);
+    decaf_sponge->params->position = 0;
+}
+
+#endif /* __DECAF_KECCAK_INTERNAL_H__ */
diff --git a/crypto/ec/curve448/p448/arch_32/f_impl.c b/crypto/ec/curve448/p448/arch_32/f_impl.c
deleted file mode 100644
index 0770bd9962..0000000000
--- a/crypto/ec/curve448/p448/arch_32/f_impl.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/* Copyright (c) 2014 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#include "f_field.h"
-
-#if (defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) && !I_HATE_UNROLLED_LOOPS) \
-     || defined(DECAF_FORCE_UNROLL)
-#define REPEAT8(_x) _x _x _x _x _x _x _x _x
-#define FOR_LIMB(_i,_start,_end,_x) do { _i=_start; REPEAT8( if (_i<_end) { _x; } _i++;) } while (0)
-#else
-#define FOR_LIMB(_i,_start,_end,_x) do { for (_i=_start; _i<_end; _i++) _x; } while (0)
-#endif
-
-void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { 
-    const uint32_t *a = as->limb, *b = bs->limb;
-    uint32_t *c = cs->limb;
-
-    uint64_t accum0 = 0, accum1 = 0, accum2 = 0;
-    uint32_t mask = (1<<28) - 1;  
-
-    uint32_t aa[8], bb[8];
-    
-    int i,j;
-    for (i=0; i<8; i++) {
-        aa[i] = a[i] + a[i+8];
-        bb[i] = b[i] + b[i+8];
-    }
-    
-    FOR_LIMB(j,0,8,{
-        accum2 = 0;
-    
-        FOR_LIMB (i,0,j+1,{
-            accum2 += widemul(a[j-i],b[i]);
-            accum1 += widemul(aa[j-i],bb[i]);
-            accum0 += widemul(a[8+j-i], b[8+i]);
-        });
-        
-        accum1 -= accum2;
-        accum0 += accum2;
-        accum2 = 0;
-    
-        FOR_LIMB (i,j+1,8,{
-            accum0 -= widemul(a[8+j-i], b[i]);
-            accum2 += widemul(aa[8+j-i], bb[i]);
-            accum1 += widemul(a[16+j-i], b[8+i]);
-        });
-
-        accum1 += accum2;
-        accum0 += accum2;
-
-        c[j] = ((uint32_t)(accum0)) & mask;
-        c[j+8] = ((uint32_t)(accum1)) & mask;
-
-        accum0 >>= 28;
-        accum1 >>= 28;
-    });
-    
-    accum0 += accum1;
-    accum0 += c[8];
-    accum1 += c[0];
-    c[8] = ((uint32_t)(accum0)) & mask;
-    c[0] = ((uint32_t)(accum1)) & mask;
-    
-    accum0 >>= 28;
-    accum1 >>= 28;
-    c[9] += ((uint32_t)(accum0));
-    c[1] += ((uint32_t)(accum1));
-}
-
-void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
-    assert(b<1<<28);
-    
-    const uint32_t *a = as->limb;
-    uint32_t *c = cs->limb;
-
-    uint64_t accum0 = 0, accum8 = 0;
-    uint32_t mask = (1ull<<28)-1;  
-
-    int i;
-    FOR_LIMB(i,0,8,{
-        accum0 += widemul(b, a[i]);
-        accum8 += widemul(b, a[i+8]);
-
-        c[i] = accum0 & mask; accum0 >>= 28;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
-    });
-
-    accum0 += accum8 + c[8];
-    c[8] = accum0 & mask;
-    c[9] += accum0 >> 28;
-
-    accum8 += c[0];
-    c[0] = accum8 & mask;
-    c[1] += accum8 >> 28;
-}
-
-void gf_sqr (gf_s *__restrict__ cs, const gf as) {
-    gf_mul(cs,as,as); /* Performs better with a dedicated square */
-}
-
diff --git a/crypto/ec/curve448/p448/arch_32/f_impl.h b/crypto/ec/curve448/p448/arch_32/f_impl.h
deleted file mode 100644
index c3687888c0..0000000000
--- a/crypto/ec/curve448/p448/arch_32/f_impl.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Copyright (c) 2014-2016 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#define GF_HEADROOM 2
-#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
-#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
-    {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}}
-    
-#define LIMB_PLACE_VALUE(i) 28
-
-void gf_add_RAW (gf out, const gf a, const gf b) {
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-        out->limb[i] = a->limb[i] + b->limb[i];
-    }
-}
-
-void gf_sub_RAW (gf out, const gf a, const gf b) {
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-        out->limb[i] = a->limb[i] - b->limb[i];
-    }
-}
-
-void gf_bias (gf a, int amt) {    
-    uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
-    for (unsigned int i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) {
-        a->limb[i] += (i==sizeof(*a)/sizeof(a->limb[0])/2) ? co2 : co1;
-    }
-}
-
-void gf_weak_reduce (gf a) {
-    uint32_t mask = (1ull<<28) - 1;
-    uint32_t tmp = a->limb[15] >> 28;
-    a->limb[8] += tmp;
-    for (unsigned int i=15; i>0; i--) {
-        a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>28);
-    }
-    a->limb[0] = (a->limb[0] & mask) + tmp;
-}
-
diff --git a/crypto/ec/curve448/p448/arch_arm_32/f_impl.c b/crypto/ec/curve448/p448/arch_arm_32/f_impl.c
deleted file mode 100644
index 0454bd6f29..0000000000
--- a/crypto/ec/curve448/p448/arch_arm_32/f_impl.c
+++ /dev/null
@@ -1,819 +0,0 @@
-/* Copyright (c) 2014 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#include "f_field.h"
-
-static inline void __attribute__((gnu_inline,always_inline))
-smlal (
-    uint64_t *acc,
-    const uint32_t a,
-    const uint32_t b
-) {
-
-#ifdef  __ARMEL__
-    uint32_t lo = *acc, hi = (*acc)>>32;
-    
-    __asm__ __volatile__ ("smlal %[lo], %[hi], %[a], %[b]"
-        : [lo]"+&r"(lo), [hi]"+&r"(hi)
-        : [a]"r"(a), [b]"r"(b));
-    
-    *acc = lo + (((uint64_t)hi)<<32);
-#else
-    *acc += (int64_t)(int32_t)a * (int64_t)(int32_t)b;
-#endif
-}
-
-static inline void __attribute__((gnu_inline,always_inline))
-smlal2 (
-    uint64_t *acc,
-    const uint32_t a,
-    const uint32_t b
-) {
-#ifdef __ARMEL__
-    uint32_t lo = *acc, hi = (*acc)>>32;
-    
-    __asm__ __volatile__ ("smlal %[lo], %[hi], %[a], %[b]"
-        : [lo]"+&r"(lo), [hi]"+&r"(hi)
-        : [a]"r"(a), [b]"r"(2*b));
-    
-    *acc = lo + (((uint64_t)hi)<<32);
-#else
-    *acc += (int64_t)(int32_t)a * (int64_t)(int32_t)(b * 2);
-#endif
-}
-
-static inline void __attribute__((gnu_inline,always_inline))
-smull (
-    uint64_t *acc,
-    const uint32_t a,
-    const uint32_t b
-) {
-#ifdef __ARMEL__
-    uint32_t lo, hi;
-    
-    __asm__ __volatile__ ("smull %[lo], %[hi], %[a], %[b]"
-        : [lo]"=&r"(lo), [hi]"=&r"(hi)
-        : [a]"r"(a), [b]"r"(b));
-    
-    *acc = lo + (((uint64_t)hi)<<32);
-#else
-    *acc = (int64_t)(int32_t)a * (int64_t)(int32_t)b;
-#endif
-}
-
-static inline void __attribute__((gnu_inline,always_inline))
-smull2 (
-    uint64_t *acc,
-    const uint32_t a,
-    const uint32_t b
-) {
-#ifdef __ARMEL__
-    uint32_t lo, hi;
-    
-    __asm__ /*__volatile__*/ ("smull %[lo], %[hi], %[a], %[b]"
-        : [lo]"=&r"(lo), [hi]"=&r"(hi)
-        : [a]"r"(a), [b]"r"(2*b));
-    
-    *acc = lo + (((uint64_t)hi)<<32);
-#else
-    *acc = (int64_t)(int32_t)a * (int64_t)(int32_t)(b * 2);
-#endif
-}
-
-void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
-    
-    const uint32_t *a = as->limb, *b = bs->limb;
-    uint32_t *c = cs->limb;
-
-    uint64_t accum0 = 0, accum1 = 0, accum2, accum3, accumC0, accumC1;
-    uint32_t mask = (1<<28) - 1;  
-
-    uint32_t aa[8], bm[8];
-
-    int i;
-    for (i=0; i<8; i++) {
-        aa[i] = a[i] + a[i+8];
-        bm[i] = b[i] - b[i+8];
-    }
-
-    uint32_t ax,bx;
-    {
-        /* t^3 terms */
-        smull(&accum1, ax = aa[1], bx = b[15]);
-        smull(&accum3, ax = aa[2], bx);
-        smlal(&accum1, ax, bx = b[14]);
-        smlal(&accum3, ax = aa[3], bx);
-        smlal(&accum1, ax, bx = b[13]);
-        smlal(&accum3, ax = aa[4], bx);
-        smlal(&accum1, ax, bx = b[12]);
-        smlal(&accum3, ax = aa[5], bx);
-        smlal(&accum1, ax, bx = b[11]);
-        smlal(&accum3, ax = aa[6], bx);
-        smlal(&accum1, ax, bx = b[10]);
-        smlal(&accum3, ax = aa[7], bx);
-        smlal(&accum1, ax, bx = b[9]);
-        
-        accum0 = accum1;
-        accum2 = accum3;
-        
-        /* t^2 terms */
-        smlal(&accum2, ax = aa[0], bx);
-        smlal(&accum0, ax, bx = b[8]);
-        smlal(&accum2, ax = aa[1], bx);
-        
-        smlal(&accum0, ax = a[9], bx = b[7]);
-        smlal(&accum2, ax = a[10], bx);
-        smlal(&accum0, ax, bx = b[6]);
-        smlal(&accum2, ax = a[11], bx);
-        smlal(&accum0, ax, bx = b[5]);
-        smlal(&accum2, ax = a[12], bx);
-        smlal(&accum0, ax, bx = b[4]);
-        smlal(&accum2, ax = a[13], bx);
-        smlal(&accum0, ax, bx = b[3]);
-        smlal(&accum2, ax = a[14], bx);
-        smlal(&accum0, ax, bx = b[2]);
-        smlal(&accum2, ax = a[15], bx);
-        smlal(&accum0, ax, bx = b[1]);
-        
-        /* t terms */
-        accum1 += accum0;
-        accum3 += accum2;
-        smlal(&accum3, ax = a[8], bx);
-        smlal(&accum1, ax, bx = b[0]);
-        smlal(&accum3, ax = a[9], bx);
-        
-        smlal(&accum1, ax = a[1], bx = bm[7]);
-        smlal(&accum3, ax = a[2], bx);
-        smlal(&accum1, ax, bx = bm[6]);
-        smlal(&accum3, ax = a[3], bx);
-        smlal(&accum1, ax, bx = bm[5]);
-        smlal(&accum3, ax = a[4], bx);
-        smlal(&accum1, ax, bx = bm[4]);
-        smlal(&accum3, ax = a[5], bx);
-        smlal(&accum1, ax, bx = bm[3]);
-        smlal(&accum3, ax = a[6], bx);
-        smlal(&accum1, ax, bx = bm[2]);
-        smlal(&accum3, ax = a[7], bx);
-        smlal(&accum1, ax, bx = bm[1]);
-        
-        /* 1 terms */
-        smlal(&accum2, ax = a[0], bx);
-        smlal(&accum0, ax, bx = bm[0]);
-        smlal(&accum2, ax = a[1], bx);
-        
-        accum2 += accum0 >> 28;
-        accum3 += accum1 >> 28;
-        
-        c[0] = ((uint32_t)(accum0)) & mask;
-        c[1] = ((uint32_t)(accum2)) & mask;
-        c[8] = ((uint32_t)(accum1)) & mask;
-        c[9] = ((uint32_t)(accum3)) & mask;
-        
-        accumC0 = accum2 >> 28;
-        accumC1 = accum3 >> 28;
-    }
-    {
-        /* t^3 terms */
-        smull(&accum1, ax = aa[3], bx = b[15]);
-        smull(&accum3, ax = aa[4], bx);
-        smlal(&accum1, ax, bx = b[14]);
-        smlal(&accum3, ax = aa[5], bx);
-        smlal(&accum1, ax, bx = b[13]);
-        smlal(&accum3, ax = aa[6], bx);
-        smlal(&accum1, ax, bx = b[12]);
-        smlal(&accum3, ax = aa[7], bx);
-        smlal(&accum1, ax, bx = b[11]);
-        
-        accum0 = accum1;
-        accum2 = accum3;
-        
-        /* t^2 terms */
-        smlal(&accum2, ax = aa[0], bx);
-        smlal(&accum0, ax, bx = b[10]);
-        smlal(&accum2, ax = aa[1], bx);
-        smlal(&accum0, ax, bx = b[9]);
-        smlal(&accum2, ax = aa[2], bx);
-        smlal(&accum0, ax, bx = b[8]);
-        smlal(&accum2, ax = aa[3], bx);
-        
-        smlal(&accum0, ax = a[11], bx = b[7]);
-        smlal(&accum2, ax = a[12], bx);
-        smlal(&accum0, ax, bx = b[6]);
-        smlal(&accum2, ax = a[13], bx);
-        smlal(&accum0, ax, bx = b[5]);
-        smlal(&accum2, ax = a[14], bx);
-        smlal(&accum0, ax, bx = b[4]);
-        smlal(&accum2, ax = a[15], bx);
-        smlal(&accum0, ax, bx = b[3]);
-        
-        /* t terms */
-        accum1 += accum0;
-        accum3 += accum2;
-        smlal(&accum3, ax = a[8], bx);
-        smlal(&accum1, ax, bx = b[2]);
-        smlal(&accum3, ax = a[9], bx);
-        smlal(&accum1, ax, bx = b[1]);
-        smlal(&accum3, ax = a[10], bx);
-        smlal(&accum1, ax, bx = b[0]);
-        smlal(&accum3, ax = a[11], bx);
-        
-        smlal(&accum1, ax = a[3], bx = bm[7]);
-        smlal(&accum3, ax = a[4], bx);
-        smlal(&accum1, ax, bx = bm[6]);
-        smlal(&accum3, ax = a[5], bx);
-        smlal(&accum1, ax, bx = bm[5]);
-        smlal(&accum3, ax = a[6], bx);
-        smlal(&accum1, ax, bx = bm[4]);
-        smlal(&accum3, ax = a[7], bx);
-        smlal(&accum1, ax, bx = bm[3]);
-        
-        /* 1 terms */
-        smlal(&accum2, ax = a[0], bx);
-        smlal(&accum0, ax, bx = bm[2]);
-        smlal(&accum2, ax = a[1], bx);
-        smlal(&accum0, ax, bx = bm[1]);
-        smlal(&accum2, ax = a[2], bx);
-        smlal(&accum0, ax, bx = bm[0]);
-        smlal(&accum2, ax = a[3], bx);
-        
-        accum0 += accumC0;
-        accum1 += accumC1;
-        accum2 += accum0 >> 28;
-        accum3 += accum1 >> 28;
-        
-        c[2] = ((uint32_t)(accum0)) & mask;
-        c[3] = ((uint32_t)(accum2)) & mask;
-        c[10] = ((uint32_t)(accum1)) & mask;
-        c[11] = ((uint32_t)(accum3)) & mask;
-        
-        accumC0 = accum2 >> 28;
-        accumC1 = accum3 >> 28;
-    }
-    {
-        
-        /* t^3 terms */
-        smull(&accum1, ax = aa[5], bx = b[15]);
-        smull(&accum3, ax = aa[6], bx);
-        smlal(&accum1, ax, bx = b[14]);
-        smlal(&accum3, ax = aa[7], bx);
-        smlal(&accum1, ax, bx = b[13]);
-        
-        accum0 = accum1;
-        accum2 = accum3;
-        
-        /* t^2 terms */
-        
-        smlal(&accum2, ax = aa[0], bx);
-        smlal(&accum0, ax, bx = b[12]);
-        smlal(&accum2, ax = aa[1], bx);
-        smlal(&accum0, ax, bx = b[11]);
-        smlal(&accum2, ax = aa[2], bx);
-        smlal(&accum0, ax, bx = b[10]);
-        smlal(&accum2, ax = aa[3], bx);
-        smlal(&accum0, ax, bx = b[9]);
-        smlal(&accum2, ax = aa[4], bx);
-        smlal(&accum0, ax, bx = b[8]);
-        smlal(&accum2, ax = aa[5], bx);
-        
-        
-        smlal(&accum0, ax = a[13], bx = b[7]);
-        smlal(&accum2, ax = a[14], bx);
-        smlal(&accum0, ax, bx = b[6]);
-        smlal(&accum2, ax = a[15], bx);
-        smlal(&accum0, ax, bx = b[5]);
-        
-        /* t terms */
-        accum1 += accum0;
-        accum3 += accum2;
-        
-        smlal(&accum3, ax = a[8], bx);
-        smlal(&accum1, ax, bx = b[4]);
-        smlal(&accum3, ax = a[9], bx);
-        smlal(&accum1, ax, bx = b[3]);
-        smlal(&accum3, ax = a[10], bx);
-        smlal(&accum1, ax, bx = b[2]);
-        smlal(&accum3, ax = a[11], bx);
-        smlal(&accum1, ax, bx = b[1]);
-        smlal(&accum3, ax = a[12], bx);
-        smlal(&accum1, ax, bx = b[0]);
-        smlal(&accum3, ax = a[13], bx);
-        
-        
-        smlal(&accum1, ax = a[5], bx = bm[7]);
-        smlal(&accum3, ax = a[6], bx);
-        smlal(&accum1, ax, bx = bm[6]);
-        smlal(&accum3, ax = a[7], bx);
-        smlal(&accum1, ax, bx = bm[5]);
-        
-        /* 1 terms */
-        
-        smlal(&accum2, ax = a[0], bx);
-        smlal(&accum0, ax, bx = bm[4]);
-        smlal(&accum2, ax = a[1], bx);
-        smlal(&accum0, ax, bx = bm[3]);
-        smlal(&accum2, ax = a[2], bx);
-        smlal(&accum0, ax, bx = bm[2]);
-        smlal(&accum2, ax = a[3], bx);
-        smlal(&accum0, ax, bx = bm[1]);
-        smlal(&accum2, ax = a[4], bx);
-        smlal(&accum0, ax, bx = bm[0]);
-        smlal(&accum2, ax = a[5], bx);
-        
-        accum0 += accumC0;
-        accum1 += accumC1;
-        accum2 += accum0 >> 28;
-        accum3 += accum1 >> 28;
-        
-        c[4] = ((uint32_t)(accum0)) & mask;
-        c[5] = ((uint32_t)(accum2)) & mask;
-        c[12] = ((uint32_t)(accum1)) & mask;
-        c[13] = ((uint32_t)(accum3)) & mask;
-        
-        accumC0 = accum2 >> 28;
-        accumC1 = accum3 >> 28;
-    }
-    {
-        
-        /* t^3 terms */
-        smull(&accum1, ax = aa[7], bx = b[15]);
-        accum0 = accum1;
-        
-        /* t^2 terms */
-        
-        smull(&accum2, ax = aa[0], bx);
-        smlal(&accum0, ax, bx = b[14]);
-        smlal(&accum2, ax = aa[1], bx);
-        smlal(&accum0, ax, bx = b[13]);
-        smlal(&accum2, ax = aa[2], bx);
-        smlal(&accum0, ax, bx = b[12]);
-        smlal(&accum2, ax = aa[3], bx);
-        smlal(&accum0, ax, bx = b[11]);
-        smlal(&accum2, ax = aa[4], bx);
-        smlal(&accum0, ax, bx = b[10]);
-        smlal(&accum2, ax = aa[5], bx);
-        smlal(&accum0, ax, bx = b[9]);
-        smlal(&accum2, ax = aa[6], bx);
-        smlal(&accum0, ax, bx = b[8]);
-        smlal(&accum2, ax = aa[7], bx);
-        
-        
-        smlal(&accum0, ax = a[15], bx = b[7]);
-        
-        /* t terms */
-        accum1 += accum0;
-        accum3 = accum2;
-        
-        smlal(&accum3, ax = a[8], bx);
-        smlal(&accum1, ax, bx = b[6]);
-        smlal(&accum3, ax = a[9], bx);
-        smlal(&accum1, ax, bx = b[5]);
-        smlal(&accum3, ax = a[10], bx);
-        smlal(&accum1, ax, bx = b[4]);
-        smlal(&accum3, ax = a[11], bx);
-        smlal(&accum1, ax, bx = b[3]);
-        smlal(&accum3, ax = a[12], bx);
-        smlal(&accum1, ax, bx = b[2]);
-        smlal(&accum3, ax = a[13], bx);
-        smlal(&accum1, ax, bx = b[1]);
-        smlal(&accum3, ax = a[14], bx);
-        smlal(&accum1, ax, bx = b[0]);
-        smlal(&accum3, ax = a[15], bx);
-        
-        
-        smlal(&accum1, ax = a[7], bx = bm[7]);
-        
-        /* 1 terms */
-        
-        smlal(&accum2, ax = a[0], bx);
-        smlal(&accum0, ax, bx = bm[6]);
-        smlal(&accum2, ax = a[1], bx);
-        smlal(&accum0, ax, bx = bm[5]);
-        smlal(&accum2, ax = a[2], bx);
-        smlal(&accum0, ax, bx = bm[4]);
-        smlal(&accum2, ax = a[3], bx);
-        smlal(&accum0, ax, bx = bm[3]);
-        smlal(&accum2, ax = a[4], bx);
-        smlal(&accum0, ax, bx = bm[2]);
-        smlal(&accum2, ax = a[5], bx);
-        smlal(&accum0, ax, bx = bm[1]);
-        smlal(&accum2, ax = a[6], bx);
-        smlal(&accum0, ax, bx = bm[0]);
-        smlal(&accum2, ax = a[7], bx);
-        
-        accum0 += accumC0;
-        accum1 += accumC1;
-        accum2 += accum0 >> 28;
-        accum3 += accum1 >> 28;
-        
-        c[6] = ((uint32_t)(accum0)) & mask;
-        c[7] = ((uint32_t)(accum2)) & mask;
-        c[14] = ((uint32_t)(accum1)) & mask;
-        c[15] = ((uint32_t)(accum3)) & mask;
-        
-        accum0 = accum2 >> 28;
-        accum1 = accum3 >> 28;
-    }
-
-    accum0 += accum1;
-    accum0 += c[8];
-    accum1 += c[0];
-    c[8] = ((uint32_t)(accum0)) & mask;
-    c[0] = ((uint32_t)(accum1)) & mask;
-    
-    accum0 >>= 28;
-    accum1 >>= 28;
-    c[9] += ((uint32_t)(accum0));
-    c[1] += ((uint32_t)(accum1));
-}
-
-void gf_sqr (gf_s *__restrict__ cs, const gf as) {
-    const uint32_t *a = as->limb;
-    uint32_t *c = cs->limb;
-
-    uint64_t accum0 = 0, accum1 = 0, accum2, accum3, accumC0, accumC1, tmp;
-    uint32_t mask = (1<<28) - 1;  
-
-    uint32_t bm[8];
-    
-    int i;
-    for (i=0; i<8; i++) {
-        bm[i] = a[i] - a[i+8];
-    }
-
-    uint32_t ax,bx;
-    {
-        /* t^3 terms */
-        smull2(&accum1, ax = a[9], bx = a[15]);
-        smull2(&accum3, ax = a[10], bx);
-        smlal2(&accum1, ax, bx = a[14]);
-        smlal2(&accum3, ax = a[11], bx);
-        smlal2(&accum1, ax, bx = a[13]);
-        smlal2(&accum3, ax = a[12], bx);
-        smlal(&accum1, ax, ax);
-        
-        accum0 = accum1;
-        accum2 = accum3;
-        
-        /* t^2 terms */
-        smlal2(&accum2, ax = a[8], a[9]);
-        smlal(&accum0, ax, ax);
-        
-        smlal2(&accum0, ax = a[1], bx = a[7]);
-        smlal2(&accum2, ax = a[2], bx);
-        smlal2(&accum0, ax, bx = a[6]);
-        smlal2(&accum2, ax = a[3], bx);
-        smlal2(&accum0, ax, bx = a[5]);
-        smlal2(&accum2, ax = a[4], bx);
-        smlal(&accum0, ax, ax);
-        
-        /* t terms */
-        accum1 += accum0;
-        accum3 += accum2;
-        smlal2(&accum3, ax = a[0], bx = a[1]);
-        smlal(&accum1, ax, ax);
-        
-        accum1 = -accum1;
-        accum3 = -accum3;
-        accum2 = -accum2;
-        accum0 = -accum0;
-        
-        smlal2(&accum1, ax = bm[1], bx = bm[7]);
-        smlal2(&accum3, ax = bm[2], bx);
-        smlal2(&accum1, ax, bx = bm[6]);
-        smlal2(&accum3, ax = bm[3], bx);
-        smlal2(&accum1, ax, bx = bm[5]);
-        smlal2(&accum3, ax = bm[4], bx);
-        smlal(&accum1, ax, ax);
-        
-        /* 1 terms */
-        smlal2(&accum2, ax = bm[0], bx = bm[1]);
-        smlal(&accum0, ax, ax);
-        
-        tmp = -accum3; accum3 = tmp-accum2; accum2 = tmp;
-        tmp = -accum1; accum1 = tmp-accum0; accum0 = tmp;
-        
-        accum2 += accum0 >> 28;
-        accum3 += accum1 >> 28;
-        
-        c[0] = ((uint32_t)(accum0)) & mask;
-        c[1] = ((uint32_t)(accum2)) & mask;
-        c[8] = ((uint32_t)(accum1)) & mask;
-        c[9] = ((uint32_t)(accum3)) & mask;
-        
-        accumC0 = accum2 >> 28;
-        accumC1 = accum3 >> 28;
-    }
-    {
-        /* t^3 terms */
-        smull2(&accum1, ax = a[11], bx = a[15]);
-        smull2(&accum3, ax = a[12], bx);
-        smlal2(&accum1, ax, bx = a[14]);
-        smlal2(&accum3, ax = a[13], bx);
-        smlal(&accum1, ax, ax);
-        
-        accum0 = accum1;
-        accum2 = accum3;
-        
-        /* t^2 terms */
-        smlal2(&accum2, ax = a[8], bx = a[11]);
-        smlal2(&accum0, ax, bx = a[10]);
-        smlal2(&accum2, ax = a[9], bx);
-        smlal(&accum0, ax, ax);
-        
-        smlal2(&accum0, ax = a[3], bx = a[7]);
-        smlal2(&accum2, ax = a[4], bx);
-        smlal2(&accum0, ax, bx = a[6]);
-        smlal2(&accum2, ax = a[5], bx);
-        smlal(&accum0, ax, ax);
-        
-        /* t terms */
-        accum1 += accum0;
-        accum3 += accum2;
-        smlal2(&accum3, ax = a[0], bx = a[3]);
-        smlal2(&accum1, ax, bx = a[2]);
-        smlal2(&accum3, ax = a[1], bx);
-        smlal(&accum1, ax, ax);
-        
-        accum1 = -accum1;
-        accum3 = -accum3;
-        accum2 = -accum2;
-        accum0 = -accum0;
-        
-        smlal2(&accum1, ax = bm[3], bx = bm[7]);
-        smlal2(&accum3, ax = bm[4], bx);
-        smlal2(&accum1, ax, bx = bm[6]);
-        smlal2(&accum3, ax = bm[5], bx);
-        smlal(&accum1, ax, ax);
-        
-        /* 1 terms */
-        smlal2(&accum2, ax = bm[0], bx = bm[3]);
-        smlal2(&accum0, ax, bx = bm[2]);
-        smlal2(&accum2, ax = bm[1], bx);
-        smlal(&accum0, ax, ax);
-        
-        
-        tmp = -accum3; accum3 = tmp-accum2; accum2 = tmp;
-        tmp = -accum1; accum1 = tmp-accum0; accum0 = tmp;
-        
-        accum0 += accumC0;
-        accum1 += accumC1;
-        accum2 += accum0 >> 28;
-        accum3 += accum1 >> 28;
-        
-        c[2] = ((uint32_t)(accum0)) & mask;
-        c[3] = ((uint32_t)(accum2)) & mask;
-        c[10] = ((uint32_t)(accum1)) & mask;
-        c[11] = ((uint32_t)(accum3)) & mask;
-        
-        accumC0 = accum2 >> 28;
-        accumC1 = accum3 >> 28;
-    }
-    {
-        
-        /* t^3 terms */
-        smull2(&accum1, ax = a[13], bx = a[15]);
-        smull2(&accum3, ax = a[14], bx);
-        smlal(&accum1, ax, ax);
-        
-        accum0 = accum1;
-        accum2 = accum3;
-        
-        /* t^2 terms */
-        
-        smlal2(&accum2, ax = a[8], bx = a[13]);
-        smlal2(&accum0, ax, bx = a[12]);
-        smlal2(&accum2, ax = a[9], bx);
-        smlal2(&accum0, ax, bx = a[11]);
-        smlal2(&accum2, ax = a[10], bx);
-        smlal(&accum0, ax, ax);
-        
-        
-        smlal2(&accum0, ax = a[5], bx = a[7]);
-        smlal2(&accum2, ax = a[6], bx);
-        smlal(&accum0, ax, ax);
-        
-        /* t terms */
-        accum1 += accum0;
-        accum3 += accum2;
-        
-        smlal2(&accum3, ax = a[0], bx = a[5]);
-        smlal2(&accum1, ax, bx = a[4]);
-        smlal2(&accum3, ax = a[1], bx);
-        smlal2(&accum1, ax, bx = a[3]);
-        smlal2(&accum3, ax = a[2], bx);
-        smlal(&accum1, ax, ax);
-        
-        accum1 = -accum1;
-        accum3 = -accum3;
-        accum2 = -accum2;
-        accum0 = -accum0;
-        
-        smlal2(&accum1, ax = bm[5], bx = bm[7]);
-        smlal2(&accum3, ax = bm[6], bx);
-        smlal(&accum1, ax, ax);
-        
-        /* 1 terms */
-        
-        smlal2(&accum2, ax = bm[0], bx = bm[5]);
-        smlal2(&accum0, ax, bx = bm[4]);
-        smlal2(&accum2, ax = bm[1], bx);
-        smlal2(&accum0, ax, bx = bm[3]);
-        smlal2(&accum2, ax = bm[2], bx);
-        smlal(&accum0, ax, ax);
-        
-        
-        tmp = -accum3; accum3 = tmp-accum2; accum2 = tmp;
-        tmp = -accum1; accum1 = tmp-accum0; accum0 = tmp;
-        
-        accum0 += accumC0;
-        accum1 += accumC1;
-        accum2 += accum0 >> 28;
-        accum3 += accum1 >> 28;
-        
-        c[4] = ((uint32_t)(accum0)) & mask;
-        c[5] = ((uint32_t)(accum2)) & mask;
-        c[12] = ((uint32_t)(accum1)) & mask;
-        c[13] = ((uint32_t)(accum3)) & mask;
-        
-        accumC0 = accum2 >> 28;
-        accumC1 = accum3 >> 28;
-    }
-    {
-        
-        /* t^3 terms */
-        smull(&accum1, ax = a[15], bx = a[15]);
-        accum0 = accum1;
-        
-        /* t^2 terms */
-        
-        smull2(&accum2, ax = a[8], bx);
-        smlal2(&accum0, ax, bx = a[14]);
-        smlal2(&accum2, ax = a[9], bx);
-        smlal2(&accum0, ax, bx = a[13]);
-        smlal2(&accum2, ax = a[10], bx);
-        smlal2(&accum0, ax, bx = a[12]);
-        smlal2(&accum2, ax = a[11], bx);
-        smlal(&accum0, ax, ax);
-        
-        
-        smlal(&accum0, ax = a[7], bx = a[7]);
-        
-        /* t terms */
-        accum1 += accum0;
-        accum3 = accum2;
-        
-        smlal2(&accum3, ax = a[0], bx);
-        smlal2(&accum1, ax, bx = a[6]);
-        smlal2(&accum3, ax = a[1], bx);
-        smlal2(&accum1, ax, bx = a[5]);
-        smlal2(&accum3, ax = a[2], bx);
-        smlal2(&accum1, ax, bx = a[4]);
-        smlal2(&accum3, ax = a[3], bx);
-        smlal(&accum1, ax, ax);
-        
-        accum1 = -accum1;
-        accum3 = -accum3;
-        accum2 = -accum2;
-        accum0 = -accum0;
-        
-        bx = bm[7];
-        smlal(&accum1, bx, bx);
-        
-        /* 1 terms */
-        
-        smlal2(&accum2, ax = bm[0], bx);
-        smlal2(&accum0, ax, bx = bm[6]);
-        smlal2(&accum2, ax = bm[1], bx);
-        smlal2(&accum0, ax, bx = bm[5]);
-        smlal2(&accum2, ax = bm[2], bx);
-        smlal2(&accum0, ax, bx = bm[4]);
-        smlal2(&accum2, ax = bm[3], bx);
-        smlal(&accum0, ax, ax);
-        
-        tmp = -accum3; accum3 = tmp-accum2; accum2 = tmp;
-        tmp = -accum1; accum1 = tmp-accum0; accum0 = tmp;
-        
-        
-        accum0 += accumC0;
-        accum1 += accumC1;
-        accum2 += accum0 >> 28;
-        accum3 += accum1 >> 28;
-        
-        c[6] = ((uint32_t)(accum0)) & mask;
-        c[7] = ((uint32_t)(accum2)) & mask;
-        c[14] = ((uint32_t)(accum1)) & mask;
-        c[15] = ((uint32_t)(accum3)) & mask;
-        
-        accum0 = accum2 >> 28;
-        accum1 = accum3 >> 28;
-    }
-
-    accum0 += accum1;
-    accum0 += c[8];
-    accum1 += c[0];
-    c[8] = ((uint32_t)(accum0)) & mask;
-    c[0] = ((uint32_t)(accum1)) & mask;
-    
-    accum0 >>= 28;
-    accum1 >>= 28;
-    c[9] += ((uint32_t)(accum0));
-    c[1] += ((uint32_t)(accum1));
-}
-
-void gf_mulw_unsigned (
-    gf_s *__restrict__ cs,
-    const gf as,
-    uint32_t b
-) {
-    uint32_t mask = (1ull<<28)-1;  
-    assert(b <= mask);
-    
-    const uint32_t *a = as->limb;
-    uint32_t *c = cs->limb;
-
-    uint64_t accum0, accum8;
-
-    int i;
-
-    uint32_t c0, c8, n0, n8;
-    c0 = a[0]; c8 = a[8];
-    accum0 = widemul(b, c0);
-    accum8 = widemul(b, c8);
-
-    c[0] = accum0 & mask; accum0 >>= 28;
-    c[8] = accum8 & mask; accum8 >>= 28;
-    
-    i=1;
-    {
-        n0 = a[i]; n8 = a[i+8];
-        smlal(&accum0, b, n0);
-        smlal(&accum8, b, n8);
-        
-        c[i] = accum0 & mask; accum0 >>= 28;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
-        i++;
-    }
-    {
-        c0 = a[i]; c8 = a[i+8];
-        smlal(&accum0, b, c0);
-        smlal(&accum8, b, c8);
-
-        c[i] = accum0 & mask; accum0 >>= 28;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
-        i++;
-    }
-    {
-        n0 = a[i]; n8 = a[i+8];
-        smlal(&accum0, b, n0);
-        smlal(&accum8, b, n8);
-
-        c[i] = accum0 & mask; accum0 >>= 28;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
-        i++;
-    }
-    {
-        c0 = a[i]; c8 = a[i+8];
-        smlal(&accum0, b, c0);
-        smlal(&accum8, b, c8);
-
-        c[i] = accum0 & mask; accum0 >>= 28;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
-        i++;
-    }
-    {
-        n0 = a[i]; n8 = a[i+8];
-        smlal(&accum0, b, n0);
-        smlal(&accum8, b, n8);
-
-        c[i] = accum0 & mask; accum0 >>= 28;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
-        i++;
-    }
-    {
-        c0 = a[i]; c8 = a[i+8];
-        smlal(&accum0, b, c0);
-        smlal(&accum8, b, c8);
-        
-        c[i] = accum0 & mask; accum0 >>= 28;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
-        i++;
-    }
-    {
-        n0 = a[i]; n8 = a[i+8];
-        smlal(&accum0, b, n0);
-        smlal(&accum8, b, n8);
-
-        c[i] = accum0 & mask; accum0 >>= 28;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
-        i++;
-    }
-
-    accum0 += accum8 + c[8];
-    c[8] = accum0 & mask;
-    c[9] += accum0 >> 28;
-
-    accum8 += c[0];
-    c[0] = accum8 & mask;
-    c[1] += accum8 >> 28;
-}
diff --git a/crypto/ec/curve448/p448/arch_arm_32/f_impl.h b/crypto/ec/curve448/p448/arch_arm_32/f_impl.h
deleted file mode 100644
index 09d77aafdd..0000000000
--- a/crypto/ec/curve448/p448/arch_arm_32/f_impl.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* Copyright (c) 2014-2016 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#define GF_HEADROOM 2
-#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
-#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
-    {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}}
-    
-#define LIMB_PLACE_VALUE(i) 28
-
-void gf_add_RAW (gf out, const gf a, const gf b) {
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
-        ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
-    }
-    /*
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-        out->limb[i] = a->limb[i] + b->limb[i];
-    }
-    */
-}
-
-void gf_sub_RAW (gf out, const gf a, const gf b) {
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
-        ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i];
-    }
-    /*
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-        out->limb[i] = a->limb[i] - b->limb[i];
-    }
-    */
-}
-
-void gf_bias (gf a, int amt) {
-    uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
-    uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1};
-    uint32x4_t *aa = (uint32x4_t*) a;
-    aa[0] += lo;
-    aa[1] += lo;
-    aa[2] += hi;
-    aa[3] += lo;
-}
-
-void gf_weak_reduce (gf a) {
-    uint64_t mask = (1ull<<28) - 1;
-    uint64_t tmp = a->limb[15] >> 28;
-    a->limb[8] += tmp;
-    for (unsigned int i=15; i>0; i--) {
-        a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>28);
-    }
-    a->limb[0] = (a->limb[0] & mask) + tmp;
-}
-
diff --git a/crypto/ec/curve448/p448/arch_neon/f_impl.c b/crypto/ec/curve448/p448/arch_neon/f_impl.c
deleted file mode 100644
index 5e998f9f37..0000000000
--- a/crypto/ec/curve448/p448/arch_neon/f_impl.c
+++ /dev/null
@@ -1,592 +0,0 @@
-/* Copyright (c) 2014 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#include "f_field.h"
-
-static __inline__ uint64x2_t __attribute__((gnu_inline,always_inline,unused))
-xx_vaddup_u64(uint64x2_t x) {
-    __asm__ ("vadd.s64 %f0, %e0" : "+w"(x));
-    return x;
-}
-
-static __inline__ int64x2_t __attribute__((gnu_inline,always_inline,unused))
-vrev128_s64(int64x2_t x) {
-    __asm__ ("vswp.s64 %e0, %f0" : "+w"(x));
-    return x;
-}
-
-static __inline__ uint64x2_t __attribute__((gnu_inline,always_inline))
-vrev128_u64(uint64x2_t x) {
-    __asm__ ("vswp.s64 %e0, %f0" : "+w"(x));
-    return x;
-}
-
-static inline void __attribute__((gnu_inline,always_inline,unused))
-smlal (
-    uint64_t *acc,
-    const uint32_t a,
-    const uint32_t b
-) {
-    *acc += (int64_t)(int32_t)a * (int64_t)(int32_t)b;
-}
-
-static inline void __attribute__((gnu_inline,always_inline,unused))
-smlal2 (
-    uint64_t *acc,
-    const uint32_t a,
-    const uint32_t b
-) {
-    *acc += (int64_t)(int32_t)a * (int64_t)(int32_t)b * 2;
-}
-
-static inline void __attribute__((gnu_inline,always_inline,unused))
-smull (
-    uint64_t *acc,
-    const uint32_t a,
-    const uint32_t b
-) {
-    *acc = (int64_t)(int32_t)a * (int64_t)(int32_t)b;
-}
-
-static inline void __attribute__((gnu_inline,always_inline,unused))
-smull2 (
-    uint64_t *acc,
-    const uint32_t a,
-    const uint32_t b
-) {
-    *acc = (int64_t)(int32_t)a * (int64_t)(int32_t)b * 2;
-}
-
-void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
-    #define _bl0 "q0"
-    #define _bl0_0 "d0"
-    #define _bl0_1 "d1"
-    #define _bh0 "q1"
-    #define _bh0_0 "d2"
-    #define _bh0_1 "d3"
-    #define _bs0 "q2"
-    #define _bs0_0 "d4"
-    #define _bs0_1 "d5"
-    #define _bl2 "q3"
-    #define _bl2_0 "d6"
-    #define _bl2_1 "d7"
-    #define _bh2 "q4"
-    #define _bh2_0 "d8"
-    #define _bh2_1 "d9"
-    #define _bs2 "q5"
-    #define _bs2_0 "d10"
-    #define _bs2_1 "d11"
-
-    #define _as0 "q6"
-    #define _as0_0 "d12"
-    #define _as0_1 "d13"
-    #define _as2 "q7"
-    #define _as2_0 "d14"
-    #define _as2_1 "d15"
-    #define _al0 "q8"
-    #define _al0_0 "d16"
-    #define _al0_1 "d17"
-    #define _ah0 "q9"
-    #define _ah0_0 "d18"
-    #define _ah0_1 "d19"
-    #define _al2 "q10"
-    #define _al2_0 "d20"
-    #define _al2_1 "d21"
-    #define _ah2 "q11"
-    #define _ah2_0 "d22"
-    #define _ah2_1 "d23"
-
-    #define _a0a "q12"
-    #define _a0a_0 "d24"
-    #define _a0a_1 "d25"
-    #define _a0b "q13"
-    #define _a0b_0 "d26"
-    #define _a0b_1 "d27"
-    #define _a1a "q14"
-    #define _a1a_0 "d28"
-    #define _a1a_1 "d29"
-    #define _a1b "q15"
-    #define _a1b_0 "d30"
-    #define _a1b_1 "d31"
-    #define VMAC(op,result,a,b,n) #op" "result", "a", "b"[" #n "]\n\t"
-    #define VOP3(op,result,a,b)   #op" "result", "a", "b"\n\t"
-    #define VOP2(op,result,a)     #op" "result", "a"\n\t"
-
-    int32x2_t *vc = (int32x2_t*) cs->limb;
-
-    __asm__ __volatile__(
-        
-        "vld2.32 {"_al0_0","_al0_1","_ah0_0","_ah0_1"}, [%[a],:128]!" "\n\t"
-        VOP3(vadd.i32,_as0,_al0,_ah0)
-        
-        "vld2.32 {"_bl0_0","_bl0_1","_bh0_0","_bh0_1"}, [%[b],:128]!" "\n\t"
-        VOP3(vadd.i32,_bs0_1,_bl0_1,_bh0_1)
-        VOP3(vsub.i32,_bs0_0,_bl0_0,_bh0_0)
-            
-        "vld2.32 {"_bl2_0","_bl2_1","_bh2_0","_bh2_1"}, [%[b],:128]!" "\n\t"
-        VOP3(vadd.i32,_bs2,_bl2,_bh2)
-            
-        "vld2.32 {"_al2_0","_al2_1","_ah2_0","_ah2_1"}, [%[a],:128]!" "\n\t"
-        VOP3(vadd.i32,_as2,_al2,_ah2)
-        
-        VMAC(vmull.s32,_a0b,_as0_1,_bs2_1,0)
-        VMAC(vmlal.s32,_a0b,_as2_0,_bs2_0,0)
-        VMAC(vmlal.s32,_a0b,_as2_1,_bs0_1,0)
-        VMAC(vmlal.s32,_a0b,_as0_0,_bh0_0,0)
-            
-        VMAC(vmull.s32,_a1b,_as0_1,_bs2_1,1)
-        VMAC(vmlal.s32,_a1b,_as2_0,_bs2_0,1)
-        VMAC(vmlal.s32,_a1b,_as2_1,_bs0_1,1)
-        VMAC(vmlal.s32,_a1b,_as0_0,_bh0_0,1)
-            
-        VOP2(vmov,_a0a,_a0b)
-        VMAC(vmlal.s32,_a0a,_ah0_1,_bh2_1,0)
-        VMAC(vmlal.s32,_a0a,_ah2_0,_bh2_0,0)
-        VMAC(vmlal.s32,_a0a,_ah2_1,_bh0_1,0)
-        VMAC(vmlal.s32,_a0a,_ah0_0,_bl0_0,0)
-            
-        VMAC(vmlsl.s32,_a0b,_al0_1,_bl2_1,0)
-        VMAC(vmlsl.s32,_a0b,_al2_0,_bl2_0,0)
-        VMAC(vmlsl.s32,_a0b,_al2_1,_bl0_1,0)
-        VMAC(vmlal.s32,_a0b,_al0_0,_bs0_0,0)
-            
-        VOP2(vmov,_a1a,_a1b)
-        VMAC(vmlal.s32,_a1a,_ah0_1,_bh2_1,1)
-        VMAC(vmlal.s32,_a1a,_ah2_0,_bh2_0,1)
-        VMAC(vmlal.s32,_a1a,_ah2_1,_bh0_1,1)
-        VMAC(vmlal.s32,_a1a,_ah0_0,_bl0_0,1)
-            
-            VOP2(vswp,_a0b_1,_a0a_0)
-            
-        VMAC(vmlsl.s32,_a1b,_al0_1,_bl2_1,1)
-        VMAC(vmlsl.s32,_a1b,_al2_0,_bl2_0,1)
-        VMAC(vmlsl.s32,_a1b,_al2_1,_bl0_1,1)
-        VMAC(vmlal.s32,_a1b,_al0_0,_bs0_0,1)
-                
-            VOP3(vsra.u64,_a0a,_a0b,"#28")
-            VOP3(vsub.i32,_bs0_1,_bl0_1,_bh0_1)
-            VOP2(vmovn.i64,_a0b_0,_a0b)
-                
-            VOP2(vswp,_a1b_1,_a1a_0)
-            VOP3(vadd.i64,_a1b,_a0a,_a1b)
-                    
-                    
-        VMAC(vmull.s32,_a0a,_as2_0,_bs2_1,0)
-            VOP2(vmovn.i64,_a0b_1,_a1b)
-        VMAC(vmlal.s32,_a0a,_as2_1,_bs2_0,0)
-            VOP3(vsra.u64,_a1a,_a1b,"#28")
-        VMAC(vmlal.s32,_a0a,_as0_0,_bh0_1,0)
-            VOP2(vbic.i32,_a0b,"#0xf0000000")
-        VMAC(vmlal.s32,_a0a,_as0_1,_bh0_0,0)
-            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"
-                    
-        VMAC(vmull.s32,_a1b,_as2_0,_bs2_1,1)
-        VMAC(vmlal.s32,_a1b,_as2_1,_bs2_0,1)
-        VMAC(vmlal.s32,_a1b,_as0_0,_bh0_1,1)
-        VMAC(vmlal.s32,_a1b,_as0_1,_bh0_0,1)
-
-        VOP2(vmov,_a0b_1,_a0a_1)
-        VOP3(vadd.i64,_a0b_0,_a0a_0,_a1a_0)
-        VOP3(vadd.i64,_a0a_0,_a0a_0,_a1a_1)
-        VMAC(vmlal.s32,_a0a,_ah2_0,_bh2_1,0)
-        VMAC(vmlal.s32,_a0a,_ah2_1,_bh2_0,0)
-        VMAC(vmlal.s32,_a0a,_ah0_0,_bl0_1,0)
-        VMAC(vmlal.s32,_a0a,_ah0_1,_bl0_0,0)
-
-        VMAC(vmlsl.s32,_a0b,_al2_0,_bl2_1,0)
-        VMAC(vmlsl.s32,_a0b,_al2_1,_bl2_0,0)
-        VMAC(vmlal.s32,_a0b,_al0_0,_bs0_1,0)
-        VMAC(vmlal.s32,_a0b,_al0_1,_bs0_0,0)
-
-        VOP2(vmov,_a1a,_a1b)
-        VMAC(vmlal.s32,_a1a,_ah2_0,_bh2_1,1)
-        VMAC(vmlal.s32,_a1a,_ah2_1,_bh2_0,1)
-        VMAC(vmlal.s32,_a1a,_ah0_0,_bl0_1,1)
-        VMAC(vmlal.s32,_a1a,_ah0_1,_bl0_0,1)
-
-            VOP2(vswp,_a0b_1,_a0a_0)
-
-        VMAC(vmlsl.s32,_a1b,_al2_0,_bl2_1,1)
-        VMAC(vmlsl.s32,_a1b,_al2_1,_bl2_0,1)
-        VMAC(vmlal.s32,_a1b,_al0_0,_bs0_1,1)
-        VMAC(vmlal.s32,_a1b,_al0_1,_bs0_0,1)
-                                        
-            VOP3(vsra.u64,_a0a,_a0b,"#28")
-            VOP3(vsub.i32,_bs2_0,_bl2_0,_bh2_0)
-            VOP2(vmovn.i64,_a0b_0,_a0b)
-                        
-            VOP2(vswp,_a1b_1,_a1a_0)
-            VOP3(vadd.i64,_a1b,_a0a,_a1b)
-
-        VMAC(vmull.s32,_a0a,_as2_1,_bs2_1,0)
-            VOP2(vmovn.i64,_a0b_1,_a1b)
-        VMAC(vmlal.s32,_a0a,_as0_0,_bh2_0,0)
-            VOP3(vsra.u64,_a1a,_a1b,"#28")
-        VMAC(vmlal.s32,_a0a,_as0_1,_bh0_1,0)
-            VOP2(vbic.i32,_a0b,"#0xf0000000")
-        VMAC(vmlal.s32,_a0a,_as2_0,_bh0_0,0)
-            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"
-
-        VMAC(vmull.s32,_a1b,_as2_1,_bs2_1,1)
-        VMAC(vmlal.s32,_a1b,_as0_0,_bh2_0,1)
-        VMAC(vmlal.s32,_a1b,_as0_1,_bh0_1,1)
-        VMAC(vmlal.s32,_a1b,_as2_0,_bh0_0,1)
-
-        VOP2(vmov,_a0b_1,_a0a_1)
-        VOP3(vadd.i64,_a0b_0,_a0a_0,_a1a_0)
-        VOP3(vadd.i64,_a0a_0,_a0a_0,_a1a_1)
-        VMAC(vmlal.s32,_a0a,_ah2_1,_bh2_1,0)
-        VMAC(vmlal.s32,_a0a,_ah0_0,_bl2_0,0)
-        VMAC(vmlal.s32,_a0a,_ah0_1,_bl0_1,0)
-        VMAC(vmlal.s32,_a0a,_ah2_0,_bl0_0,0)
-
-        VMAC(vmlsl.s32,_a0b,_al2_1,_bl2_1,0)
-        VMAC(vmlal.s32,_a0b,_al0_0,_bs2_0,0)
-        VMAC(vmlal.s32,_a0b,_al0_1,_bs0_1,0)
-        VMAC(vmlal.s32,_a0b,_al2_0,_bs0_0,0)
-
-        VOP2(vmov,_a1a,_a1b)
-        VMAC(vmlal.s32,_a1a,_ah2_1,_bh2_1,1)
-        VMAC(vmlal.s32,_a1a,_ah0_0,_bl2_0,1)
-        VMAC(vmlal.s32,_a1a,_ah0_1,_bl0_1,1)
-        VMAC(vmlal.s32,_a1a,_ah2_0,_bl0_0,1)
-
-            VOP2(vswp,_a0b_1,_a0a_0)
-
-        VMAC(vmlsl.s32,_a1b,_al2_1,_bl2_1,1)
-        VMAC(vmlal.s32,_a1b,_al0_0,_bs2_0,1)
-        VMAC(vmlal.s32,_a1b,_al0_1,_bs0_1,1)
-        VMAC(vmlal.s32,_a1b,_al2_0,_bs0_0,1)
-                                                                
-            VOP3(vsub.i32,_bs2_1,_bl2_1,_bh2_1)
-            VOP3(vsra.u64,_a0a,_a0b,"#28")
-            VOP2(vmovn.i64,_a0b_0,_a0b)
-                        
-            VOP2(vswp,_a1b_1,_a1a_0)
-            VOP3(vadd.i64,_a1b,_a0a,_a1b)
-
-        VMAC(vmull.s32,_a0a,_as0_0,_bh2_1,0)
-            VOP2(vmovn.i64,_a0b_1,_a1b)
-        VMAC(vmlal.s32,_a0a,_as0_1,_bh2_0,0)
-            VOP3(vsra.u64,_a1a,_a1b,"#28")
-        VMAC(vmlal.s32,_a0a,_as2_0,_bh0_1,0)
-            VOP2(vbic.i32,_a0b,"#0xf0000000")
-        VMAC(vmlal.s32,_a0a,_as2_1,_bh0_0,0)
-            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"
-
-        VMAC(vmull.s32,_a1b,_as0_0,_bh2_1,1)
-        VMAC(vmlal.s32,_a1b,_as0_1,_bh2_0,1)
-        VMAC(vmlal.s32,_a1b,_as2_0,_bh0_1,1)
-        VMAC(vmlal.s32,_a1b,_as2_1,_bh0_0,1)
-
-        VOP2(vmov,_a0b_1,_a0a_1)
-        VOP3(vadd.i64,_a0b_0,_a0a_0,_a1a_0)
-        VOP3(vadd.i64,_a0a_0,_a0a_0,_a1a_1)
-        VMAC(vmlal.s32,_a0a,_ah0_0,_bl2_1,0)
-        VMAC(vmlal.s32,_a0a,_ah0_1,_bl2_0,0)
-        VMAC(vmlal.s32,_a0a,_ah2_0,_bl0_1,0)
-        VMAC(vmlal.s32,_a0a,_ah2_1,_bl0_0,0)
-
-        VMAC(vmlal.s32,_a0b,_al0_0,_bs2_1,0)
-        VMAC(vmlal.s32,_a0b,_al0_1,_bs2_0,0)
-        VMAC(vmlal.s32,_a0b,_al2_0,_bs0_1,0)
-        VMAC(vmlal.s32,_a0b,_al2_1,_bs0_0,0)
-
-        VOP2(vmov,_a1a,_a1b)
-        VMAC(vmlal.s32,_a1a,_ah0_0,_bl2_1,1)
-        VMAC(vmlal.s32,_a1a,_ah0_1,_bl2_0,1)
-        VMAC(vmlal.s32,_a1a,_ah2_0,_bl0_1,1)
-        VMAC(vmlal.s32,_a1a,_ah2_1,_bl0_0,1)
-
-            VOP2(vswp,_a0b_1,_a0a_0)
-
-        VMAC(vmlal.s32,_a1b,_al0_0,_bs2_1,1)
-        VMAC(vmlal.s32,_a1b,_al0_1,_bs2_0,1)
-        VMAC(vmlal.s32,_a1b,_al2_0,_bs0_1,1)
-        VMAC(vmlal.s32,_a1b,_al2_1,_bs0_0,1)
-                        
-            VOP3(vsra.u64,_a0a,_a0b,"#28")
-            VOP2(vmovn.i64,_a0b_0,_a0b)
-                                                                                            
-            VOP2(vswp,_a1b_1,_a1a_0)
-            VOP3(vadd.i64,_a0a,_a0a,_a1b)
-
-            VOP2(vmovn.i64,_a0b_1,_a0a)
-            VOP3(vsra.u64,_a1a,_a0a,"#28")
-                                                                                            
-            VOP2(vbic.i32,_a0b,"#0xf0000000") 
-                                                                                            
-        VOP2(vswp,_a1a_0,_a1a_1)
-                                                                                            
-            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"  
-            "sub %[c], #64" "\n\t"
-                                                                                                
-        VOP3(vadd.i64,_a1a_1,_a1a_1,_a1a_0)
-        
-            "vldmia %[c], {"_a0a_0", "_a0a_1", "_a0b_0"}" "\n\t"
-            VOP2(vaddw.s32,_a1a,_a0a_0)
-            VOP2(vmovn.i64,_a0a_0,_a1a)
-            VOP2(vshr.s64,_a1a,"#28")
-                                                
-            VOP2(vaddw.s32,_a1a,_a0a_1)
-            VOP2(vmovn.i64,_a0a_1,_a1a)
-            VOP2(vshr.s64,_a1a,"#28")
-                                                                                                    
-            VOP2(vbic.i32,_a0a,"#0xf0000000")
-                                                
-            VOP2(vaddw.s32,_a1a,_a0b_0) 
-            VOP2(vmovn.i64,_a0b_0,_a1a)
-            
-            "vstmia %[c], {"_a0a_0", "_a0a_1", "_a0b_0"}" "\n\t"
-        
-        : [a]"+r"(as)
-        , [b]"+r"(bs)
-        , [c]"+r"(vc)
-                            
-        :: "q0","q1","q2","q3",
-            "q4","q5","q6","q7",
-            "q8","q9","q10","q11",
-            "q12","q13","q14","q15",
-            "memory"
-    );
-}
-
-void gf_sqr (gf_s *__restrict__ cs, const gf bs) {
-    int32x2_t *vc = (int32x2_t*) cs->limb;
-
-    __asm__ __volatile__ (
-        "vld2.32 {"_bl0_0","_bl0_1","_bh0_0","_bh0_1"}, [%[b],:128]!" "\n\t"
-        VOP3(vadd.i32,_bs0_1,_bl0_1,_bh0_1) /* 0 .. 2^30 */
-        VOP3(vsub.i32,_bs0_0,_bl0_0,_bh0_0) /* +- 2^29 */
-        VOP3(vadd.i32,_as0,_bl0,_bh0)       /* 0 .. 2^30 */
-            
-        "vld2.32 {"_bl2_0","_bl2_1","_bh2_0","_bh2_1"}, [%[b],:128]!" "\n\t"
-        VOP3(vadd.i32,_bs2,_bl2,_bh2)       /* 0 .. 2^30 */
-        VOP2(vmov,_as2,_bs2)
-        
-        VMAC(vqdmull.s32,_a0b,_as0_1,_bs2_1,0) /* 0 .. 8 * 2^58.  danger for vqdmlal is 32 */
-        VMAC(vmlal.s32,_a0b,_as2_0,_bs2_0,0)   /* 0 .. 12 */
-        VMAC(vmlal.s32,_a0b,_as0_0,_bh0_0,0)   /* 0 .. 14 */
-            
-        VMAC(vqdmull.s32,_a1b,_as0_1,_bs2_1,1) /* 0 .. 8 */
-        VMAC(vmlal.s32,_a1b,_as2_0,_bs2_0,1)   /* 0 .. 14 */
-        VMAC(vmlal.s32,_a1b,_as0_0,_bh0_0,1)   /* 0 .. 16 */
-            
-        VOP2(vmov,_a0a,_a0b)                   /* 0 .. 14 */
-        VMAC(vqdmlal.s32,_a0a,_bh0_1,_bh2_1,0) /* 0 .. 16 */
-        VMAC(vmlal.s32,_a0a,_bh2_0,_bh2_0,0)   /* 0 .. 17 */
-        VMAC(vmlal.s32,_a0a,_bh0_0,_bl0_0,0)   /* 0 .. 18 */
-            
-        VMAC(vqdmlsl.s32,_a0b,_bl0_1,_bl2_1,0) /*-2 .. 14 */
-        VMAC(vmlsl.s32,_a0b,_bl2_0,_bl2_0,0)   /*-3 .. 14 */
-        VMAC(vmlal.s32,_a0b,_bl0_0,_bs0_0,0)   /*-4 .. 15 */
-            
-        VOP2(vmov,_a1a,_a1b)
-        VMAC(vqdmlal.s32,_a1a,_bh0_1,_bh2_1,1) /* 0 .. 18 */
-        VMAC(vmlal.s32,_a1a,_bh2_0,_bh2_0,1)   /* 0 .. 19 */
-        VMAC(vmlal.s32,_a1a,_bh0_0,_bl0_0,1)   /* 0 .. 20 */
-            
-            VOP2(vswp,_a0b_1,_a0a_0)
-            
-        VMAC(vqdmlsl.s32,_a1b,_bl0_1,_bl2_1,1) /*-2 .. 16 */
-        VMAC(vmlsl.s32,_a1b,_bl2_0,_bl2_0,1)   /*-3 .. 16 */
-        VMAC(vmlal.s32,_a1b,_bl0_0,_bs0_0,1)   /*-4 .. 17 */
-                
-            VOP3(vsra.u64,_a0a,_a0b,"#28")
-            VOP3(vsub.i32,_bs0_1,_bl0_1,_bh0_1)
-            VOP2(vmovn.i64,_a0b_0,_a0b)
-                
-            VOP2(vswp,_a1b_1,_a1a_0)
-            VOP3(vadd.i64,_a1b,_a0a,_a1b)
-                    
-                    
-        VMAC(vqdmull.s32,_a0a,_as2_0,_bs2_1,0) /* 0 .. 8 */
-            VOP2(vmovn.i64,_a0b_1,_a1b)
-            VOP3(vsra.u64,_a1a,_a1b,"#28")
-        VMAC(vqdmlal.s32,_a0a,_as0_0,_bh0_1,0) /* 0 .. 12 */
-            VOP2(vbic.i32,_a0b,"#0xf0000000")
-            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"
-                    
-        VMAC(vqdmull.s32,_a1b,_as2_0,_bs2_1,1) /* 0 .. 8 */
-        VMAC(vqdmlal.s32,_a1b,_as0_0,_bh0_1,1) /* 0 .. 12 */
-
-        VOP2(vmov,_a0b,_a0a)               /* 0 .. 12 */
-        VMAC(vqdmlal.s32,_a0a,_bh2_0,_bh2_1,0) /* 0 .. 14 */
-        VMAC(vqdmlal.s32,_a0a,_bh0_0,_bl0_1,0) /* 0 .. 16 */
-
-        VMAC(vqdmlsl.s32,_a0b,_bl2_0,_bl2_1,0) /*-2 .. 12 */
-        VMAC(vqdmlal.s32,_a0b,_bl0_0,_bs0_1,0) /*-4 .. 14 */
-        VOP3(vadd.i64,_a0a_0,_a0a_0,_a1a_1)
-        VOP3(vadd.i64,_a0b_0,_a0b_0,_a1a_0)
-
-        VOP2(vmov,_a1a,_a1b)                   /* 0 .. 12 */
-        VMAC(vqdmlal.s32,_a1a,_bh2_0,_bh2_1,1) /* 0 .. 14 */
-        VMAC(vqdmlal.s32,_a1a,_bh0_0,_bl0_1,1) /* 0 .. 16 */
-
-            VOP2(vswp,_a0b_1,_a0a_0)
-
-        VMAC(vqdmlsl.s32,_a1b,_bl2_0,_bl2_1,1) /*-2 .. 12 */
-        VMAC(vqdmlal.s32,_a1b,_bl0_0,_bs0_1,1) /*-4 .. 14 */
-                                        
-            VOP3(vsra.u64,_a0a,_a0b,"#28")
-            VOP3(vsub.i32,_bs2_0,_bl2_0,_bh2_0)
-            VOP2(vmovn.i64,_a0b_0,_a0b)
-                        
-            VOP2(vswp,_a1b_1,_a1a_0)
-            VOP3(vadd.i64,_a1b,_a0a,_a1b)
-
-        VMAC(vmull.s32,_a0a,_as2_1,_bs2_1,0)
-            VOP2(vmovn.i64,_a0b_1,_a1b)
-        VMAC(vqdmlal.s32,_a0a,_as0_0,_bh2_0,0)
-            VOP3(vsra.u64,_a1a,_a1b,"#28")
-        VMAC(vmlal.s32,_a0a,_as0_1,_bh0_1,0)
-            VOP2(vbic.i32,_a0b,"#0xf0000000")
-            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"
-
-        VMAC(vmull.s32,_a1b,_as2_1,_bs2_1,1)
-        VMAC(vqdmlal.s32,_a1b,_as0_0,_bh2_0,1)
-        VMAC(vmlal.s32,_a1b,_as0_1,_bh0_1,1)
-
-        VOP2(vmov,_a0b_1,_a0a_1)
-        VOP3(vadd.i64,_a0b_0,_a0a_0,_a1a_0)
-        VOP3(vadd.i64,_a0a_0,_a0a_0,_a1a_1)
-        VMAC(vmlal.s32,_a0a,_bh2_1,_bh2_1,0)
-        VMAC(vqdmlal.s32,_a0a,_bh0_0,_bl2_0,0)
-        VMAC(vmlal.s32,_a0a,_bh0_1,_bl0_1,0)
-
-        VMAC(vmlsl.s32,_a0b,_bl2_1,_bl2_1,0)
-        VMAC(vqdmlal.s32,_a0b,_bl0_0,_bs2_0,0)
-        VMAC(vmlal.s32,_a0b,_bl0_1,_bs0_1,0)
-
-        VOP2(vmov,_a1a,_a1b)
-        VMAC(vmlal.s32,_a1a,_bh2_1,_bh2_1,1)
-        VMAC(vqdmlal.s32,_a1a,_bh0_0,_bl2_0,1)
-        VMAC(vmlal.s32,_a1a,_bh0_1,_bl0_1,1)
-
-            VOP2(vswp,_a0b_1,_a0a_0)
-
-        VMAC(vmlsl.s32,_a1b,_bl2_1,_bl2_1,1)
-        VMAC(vqdmlal.s32,_a1b,_bl0_0,_bs2_0,1)
-        VMAC(vmlal.s32,_a1b,_bl0_1,_bs0_1,1)
-                                                                
-            VOP3(vsub.i32,_bs2_1,_bl2_1,_bh2_1)
-            VOP3(vsra.u64,_a0a,_a0b,"#28")
-            VOP2(vmovn.i64,_a0b_0,_a0b)
-                        
-            VOP2(vswp,_a1b_1,_a1a_0)
-            VOP3(vadd.i64,_a1b,_a0a,_a1b)
-
-        VMAC(vqdmull.s32,_a0a,_as0_0,_bh2_1,0)
-            VOP2(vmovn.i64,_a0b_1,_a1b)
-            VOP3(vsra.u64,_a1a,_a1b,"#28")
-        VMAC(vqdmlal.s32,_a0a,_as2_0,_bh0_1,0)
-            VOP2(vbic.i32,_a0b,"#0xf0000000")
-            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"
-
-        VMAC(vqdmull.s32,_a1b,_as0_0,_bh2_1,1)
-        VMAC(vqdmlal.s32,_a1b,_as2_0,_bh0_1,1)
-
-        VOP2(vmov,_a0b_1,_a0a_1)
-        VOP3(vadd.i64,_a0b_0,_a0a_0,_a1a_0)
-        VOP3(vadd.i64,_a0a_0,_a0a_0,_a1a_1)
-        VMAC(vqdmlal.s32,_a0a,_bh0_0,_bl2_1,0)
-        VMAC(vqdmlal.s32,_a0a,_bh2_0,_bl0_1,0)
-
-        VMAC(vqdmlal.s32,_a0b,_bl0_0,_bs2_1,0)
-        VMAC(vqdmlal.s32,_a0b,_bl2_0,_bs0_1,0)
-
-        VOP2(vmov,_a1a,_a1b)
-        VMAC(vqdmlal.s32,_a1a,_bh0_0,_bl2_1,1)
-        VMAC(vqdmlal.s32,_a1a,_bh2_0,_bl0_1,1)
-
-            VOP2(vswp,_a0b_1,_a0a_0)
-
-        VMAC(vqdmlal.s32,_a1b,_bl0_0,_bs2_1,1)
-        VMAC(vqdmlal.s32,_a1b,_bl2_0,_bs0_1,1)
-                        
-            VOP3(vsra.u64,_a0a,_a0b,"#28")
-            VOP2(vmovn.i64,_a0b_0,_a0b)
-                                                                                            
-            VOP2(vswp,_a1b_1,_a1a_0)
-            VOP3(vadd.i64,_a0a,_a0a,_a1b)
-
-            VOP2(vmovn.i64,_a0b_1,_a0a)
-            VOP3(vsra.u64,_a1a,_a0a,"#28")
-                                                                                            
-            VOP2(vbic.i32,_a0b,"#0xf0000000") 
-                                                                                            
-        VOP2(vswp,_a1a_0,_a1a_1)
-                                                                                            
-            "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t"  
-            "sub %[c], #64" "\n\t"
-                                                                                                
-        VOP3(vadd.i64,_a1a_1,_a1a_1,_a1a_0)
-        
-            "vldmia %[c], {"_a0a_0", "_a0a_1", "_a0b_0"}" "\n\t"
-            VOP2(vaddw.s32,_a1a,_a0a_0)
-            VOP2(vmovn.i64,_a0a_0,_a1a)
-            VOP2(vshr.s64,_a1a,"#28")
-                                                
-            VOP2(vaddw.s32,_a1a,_a0a_1)
-            VOP2(vmovn.i64,_a0a_1,_a1a)
-            VOP2(vshr.s64,_a1a,"#28")
-                                                                                                    
-            VOP2(vbic.i32,_a0a,"#0xf0000000")
-                                                
-            VOP2(vaddw.s32,_a1a,_a0b_0) 
-            VOP2(vmovn.i64,_a0b_0,_a1a)
-            
-            "vstmia %[c], {"_a0a_0", "_a0a_1", "_a0b_0"}" "\n\t"
-        
-        : [b]"+r"(bs)
-        , [c]"+r"(vc)
-                            
-        :: "q0","q1","q2","q3",
-            "q4","q5","q6","q7",
-            "q12","q13","q14","q15",
-            "memory"
-    );
-}
-
-void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) { 
-    uint32x2_t vmask = {(1<<28) - 1, (1<<28)-1};
-    assert(b<(1<<28));
-    
-    uint64x2_t accum;
-    const uint32x2_t *va = (const uint32x2_t *) as->limb;
-    uint32x2_t *vo = (uint32x2_t *) cs->limb;
-    uint32x2_t vc, vn;
-    uint32x2_t vb = {b, 0};
-    
-    vc = va[0];
-    accum = vmull_lane_u32(vc, vb, 0);
-    vo[0] = vmovn_u64(accum) & vmask;
-    accum = vshrq_n_u64(accum,28);
-    
-    /* PERF: the right way to do this is to reduce behind, i.e.
-     * vmull + vmlal round 0
-     * vmull + vmlal round 1
-     * vmull + vmlal round 2
-     * vsraq round 0, 1
-     * vmull + vmlal round 3
-     * vsraq round 1, 2
-     * ...
-     */
-    
-    int i;
-    for (i=1; i<8; i++) {
-        vn = va[i];
-        accum = vmlal_lane_u32(accum, vn, vb, 0);
-        vo[i] = vmovn_u64(accum) & vmask;
-        accum = vshrq_n_u64(accum,28);
-        vc = vn;
-    }
-        
-    accum = xx_vaddup_u64(vrev128_u64(accum));
-    accum = vaddw_u32(accum, vo[0]);
-    vo[0] = vmovn_u64(accum) & vmask;
-    
-    accum = vshrq_n_u64(accum,28);
-    vo[1] += vmovn_u64(accum);
-}
diff --git a/crypto/ec/curve448/p448/arch_neon/f_impl.h b/crypto/ec/curve448/p448/arch_neon/f_impl.h
deleted file mode 100644
index ba48d8cee2..0000000000
--- a/crypto/ec/curve448/p448/arch_neon/f_impl.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Copyright (c) 2014-2016 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#define GF_HEADROOM 2
-#define LIMBPERM(x) (((x)<<1 | (x)>>3) & 15)
-#define USE_NEON_PERM 1
-#define LIMBHI(x) ((x##ull)>>28)
-#define LIMBLO(x) ((x##ull)&((1ull<<28)-1))
-#  define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
-    {{LIMBLO(a),LIMBLO(e), LIMBHI(a),LIMBHI(e), \
-      LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \
-      LIMBLO(c),LIMBLO(g), LIMBHI(c),LIMBHI(g), \
-      LIMBLO(d),LIMBLO(h), LIMBHI(d),LIMBHI(h)}}
-    
-#define LIMB_PLACE_VALUE(i) 28
-
-void gf_add_RAW (gf out, const gf a, const gf b) {
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
-        ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
-    }
-}
-
-void gf_sub_RAW (gf out, const gf a, const gf b) {
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
-        ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i];
-    }
-    /*
-    unsigned int i;
-    for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-        out->limb[i] = a->limb[i] - b->limb[i];
-    }
-    */
-}
-
-void gf_bias (gf a, int amt) {
-    uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
-    uint32x4_t lo = {co1,co2,co1,co1}, hi = {co1,co1,co1,co1};
-    uint32x4_t *aa = (uint32x4_t*) a;
-    aa[0] += lo;
-    aa[1] += hi;
-    aa[2] += hi;
-    aa[3] += hi;
-}
-
-void gf_weak_reduce (gf a) {
-
-    uint32x2_t *aa = (uint32x2_t*) a, vmask = {(1ull<<28)-1, (1ull<<28)-1}, vm2 = {0,-1},
-       tmp = vshr_n_u32(aa[7],28);
-       
-    for (unsigned int i=7; i>=1; i--) {
-        aa[i] = vsra_n_u32(aa[i] & vmask, aa[i-1], 28);
-    }
-    aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp&vm2);
-}
-
diff --git a/crypto/ec/curve448/p448/arch_ref64/f_impl.c b/crypto/ec/curve448/p448/arch_ref64/f_impl.c
deleted file mode 100644
index 526810012a..0000000000
--- a/crypto/ec/curve448/p448/arch_ref64/f_impl.c
+++ /dev/null
@@ -1,302 +0,0 @@
-/* Copyright (c) 2014 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#include "f_field.h"
-
-void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
-    const uint64_t *a = as->limb, *b = bs->limb;
-    uint64_t *c = cs->limb;
-
-    __uint128_t accum0 = 0, accum1 = 0, accum2;
-    uint64_t mask = (1ull<<56) - 1;  
-
-    uint64_t aa[4], bb[4], bbb[4];
-
-    unsigned int i;
-    for (i=0; i<4; i++) {
-        aa[i]  = a[i] + a[i+4];
-        bb[i]  = b[i] + b[i+4];
-        bbb[i] = bb[i] + b[i+4];
-    }
-
-    int I_HATE_UNROLLED_LOOPS = 0;
-
-    if (I_HATE_UNROLLED_LOOPS) {
-        /* The compiler probably won't unroll this,
-         * so it's like 80% slower.
-         */
-        for (i=0; i<4; i++) {
-            accum2 = 0;
-
-            unsigned int j;
-            for (j=0; j<=i; j++) {
-                accum2 += widemul(a[j],   b[i-j]);
-                accum1 += widemul(aa[j], bb[i-j]);
-                accum0 += widemul(a[j+4], b[i-j+4]);
-            }
-            for (; j<4; j++) {
-                accum2 += widemul(a[j],   b[i-j+8]);
-                accum1 += widemul(aa[j], bbb[i-j+4]);
-                accum0 += widemul(a[j+4], bb[i-j+4]);
-            }
-
-            accum1 -= accum2;
-            accum0 += accum2;
-
-            c[i]   = ((uint64_t)(accum0)) & mask;
-            c[i+4] = ((uint64_t)(accum1)) & mask;
-
-            accum0 >>= 56;
-            accum1 >>= 56;
-        }
-    } else {
-        accum2  = widemul(a[0],  b[0]);
-        accum1 += widemul(aa[0], bb[0]);
-        accum0 += widemul(a[4],  b[4]);
-
-        accum2 += widemul(a[1],  b[7]);
-        accum1 += widemul(aa[1], bbb[3]);
-        accum0 += widemul(a[5],  bb[3]);
-
-        accum2 += widemul(a[2],  b[6]);
-        accum1 += widemul(aa[2], bbb[2]);
-        accum0 += widemul(a[6],  bb[2]);
-
-        accum2 += widemul(a[3],  b[5]);
-        accum1 += widemul(aa[3], bbb[1]);
-        accum0 += widemul(a[7],  bb[1]);
-
-        accum1 -= accum2;
-        accum0 += accum2;
-
-        c[0] = ((uint64_t)(accum0)) & mask;
-        c[4] = ((uint64_t)(accum1)) & mask;
-
-        accum0 >>= 56;
-        accum1 >>= 56;
-
-        accum2  = widemul(a[0],  b[1]);
-        accum1 += widemul(aa[0], bb[1]);
-        accum0 += widemul(a[4],  b[5]);
-
-        accum2 += widemul(a[1],  b[0]);
-        accum1 += widemul(aa[1], bb[0]);
-        accum0 += widemul(a[5],  b[4]);
-
-        accum2 += widemul(a[2],  b[7]);
-        accum1 += widemul(aa[2], bbb[3]);
-        accum0 += widemul(a[6],  bb[3]);
-
-        accum2 += widemul(a[3],  b[6]);
-        accum1 += widemul(aa[3], bbb[2]);
-        accum0 += widemul(a[7],  bb[2]);
-
-        accum1 -= accum2;
-        accum0 += accum2;
-
-        c[1] = ((uint64_t)(accum0)) & mask;
-        c[5] = ((uint64_t)(accum1)) & mask;
-
-        accum0 >>= 56;
-        accum1 >>= 56;
-
-        accum2  = widemul(a[0],  b[2]);
-        accum1 += widemul(aa[0], bb[2]);
-        accum0 += widemul(a[4],  b[6]);
-
-        accum2 += widemul(a[1],  b[1]);
-        accum1 += widemul(aa[1], bb[1]);
-        accum0 += widemul(a[5],  b[5]);
-
-        accum2 += widemul(a[2],  b[0]);
-        accum1 += widemul(aa[2], bb[0]);
-        accum0 += widemul(a[6],  b[4]);
-
-        accum2 += widemul(a[3],  b[7]);
-        accum1 += widemul(aa[3], bbb[3]);
-        accum0 += widemul(a[7],  bb[3]);
-
-        accum1 -= accum2;
-        accum0 += accum2;
-
-        c[2] = ((uint64_t)(accum0)) & mask;
-        c[6] = ((uint64_t)(accum1)) & mask;
-
-        accum0 >>= 56;
-        accum1 >>= 56;
-
-        accum2  = widemul(a[0],  b[3]);
-        accum1 += widemul(aa[0], bb[3]);
-        accum0 += widemul(a[4],  b[7]);
-
-        accum2 += widemul(a[1],  b[2]);
-        accum1 += widemul(aa[1], bb[2]);
-        accum0 += widemul(a[5],  b[6]);
-
-        accum2 += widemul(a[2],  b[1]);
-        accum1 += widemul(aa[2], bb[1]);
-        accum0 += widemul(a[6],  b[5]);
-
-        accum2 += widemul(a[3],  b[0]);
-        accum1 += widemul(aa[3], bb[0]);
-        accum0 += widemul(a[7],  b[4]);
-
-        accum1 -= accum2;
-        accum0 += accum2;
-
-        c[3] = ((uint64_t)(accum0)) & mask;
-        c[7] = ((uint64_t)(accum1)) & mask;
-
-        accum0 >>= 56;
-        accum1 >>= 56;
-    } /* !I_HATE_UNROLLED_LOOPS */
-
-    accum0 += accum1;
-    accum0 += c[4];
-    accum1 += c[0];
-    c[4] = ((uint64_t)(accum0)) & mask;
-    c[0] = ((uint64_t)(accum1)) & mask;
-
-    accum0 >>= 56;
-    accum1 >>= 56;
-
-    c[5] += ((uint64_t)(accum0));
-    c[1] += ((uint64_t)(accum1));
-}
-
-void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
-    const uint64_t *a = as->limb;
-    uint64_t *c = cs->limb;
-
-    __uint128_t accum0 = 0, accum4 = 0;
-    uint64_t mask = (1ull<<56) - 1;  
-
-    int i;
-    for (i=0; i<4; i++) {
-        accum0 += widemul(b, a[i]);
-        accum4 += widemul(b, a[i+4]);
-        c[i]   = accum0 & mask; accum0 >>= 56;
-        c[i+4] = accum4 & mask; accum4 >>= 56;
-    }
-    
-    accum0 += accum4 + c[4];
-    c[4] = accum0 & mask;
-    c[5] += accum0 >> 56;
-
-    accum4 += c[0];
-    c[0] = accum4 & mask;
-    c[1] += accum4 >> 56;
-}
-
-void gf_sqr (gf_s *__restrict__ cs, const gf as) {
-    const uint64_t *a = as->limb;
-    uint64_t *c = cs->limb;
-
-    __uint128_t accum0 = 0, accum1 = 0, accum2;
-    uint64_t mask = (1ull<<56) - 1;  
-
-    uint64_t aa[4];
-
-    /* For some reason clang doesn't vectorize this without prompting? */
-    unsigned int i;
-    for (i=0; i<4; i++) {
-        aa[i] = a[i] + a[i+4];
-    }
-
-    accum2  = widemul(a[0],a[3]);
-    accum0  = widemul(aa[0],aa[3]);
-    accum1  = widemul(a[4],a[7]);
-
-    accum2 += widemul(a[1], a[2]);
-    accum0 += widemul(aa[1], aa[2]);
-    accum1 += widemul(a[5], a[6]);
-
-    accum0 -= accum2;
-    accum1 += accum2;
-
-    c[3] = ((uint64_t)(accum1))<<1 & mask;
-    c[7] = ((uint64_t)(accum0))<<1 & mask;
-
-    accum0 >>= 55;
-    accum1 >>= 55;
-
-    accum0 += widemul(2*aa[1],aa[3]);
-    accum1 += widemul(2*a[5], a[7]);
-    accum0 += widemul(aa[2], aa[2]);
-    accum1 += accum0;
-
-    accum0 -= widemul(2*a[1], a[3]);
-    accum1 += widemul(a[6], a[6]);
-    
-    accum2 = widemul(a[0],a[0]);
-    accum1 -= accum2;
-    accum0 += accum2;
-
-    accum0 -= widemul(a[2], a[2]);
-    accum1 += widemul(aa[0], aa[0]);
-    accum0 += widemul(a[4], a[4]);
-
-    c[0] = ((uint64_t)(accum0)) & mask;
-    c[4] = ((uint64_t)(accum1)) & mask;
-
-    accum0 >>= 56;
-    accum1 >>= 56;
-
-    accum2  = widemul(2*aa[2],aa[3]);
-    accum0 -= widemul(2*a[2], a[3]);
-    accum1 += widemul(2*a[6], a[7]);
-
-    accum1 += accum2;
-    accum0 += accum2;
-
-    accum2  = widemul(2*a[0],a[1]);
-    accum1 += widemul(2*aa[0], aa[1]);
-    accum0 += widemul(2*a[4], a[5]);
-
-    accum1 -= accum2;
-    accum0 += accum2;
-
-    c[1] = ((uint64_t)(accum0)) & mask;
-    c[5] = ((uint64_t)(accum1)) & mask;
-
-    accum0 >>= 56;
-    accum1 >>= 56;
-
-    accum2  = widemul(aa[3],aa[3]);
-    accum0 -= widemul(a[3], a[3]);
-    accum1 += widemul(a[7], a[7]);
-
-    accum1 += accum2;
-    accum0 += accum2;
-
-    accum2  = widemul(2*a[0],a[2]);
-    accum1 += widemul(2*aa[0], aa[2]);
-    accum0 += widemul(2*a[4], a[6]);
-
-    accum2 += widemul(a[1], a[1]);
-    accum1 += widemul(aa[1], aa[1]);
-    accum0 += widemul(a[5], a[5]);
-
-    accum1 -= accum2;
-    accum0 += accum2;
-
-    c[2] = ((uint64_t)(accum0)) & mask;
-    c[6] = ((uint64_t)(accum1)) & mask;
-
-    accum0 >>= 56;
-    accum1 >>= 56;
-
-    accum0 += c[3];
-    accum1 += c[7];
-    c[3] = ((uint64_t)(accum0)) & mask;
-    c[7] = ((uint64_t)(accum1)) & mask;
-
-    /* we could almost stop here, but it wouldn't be stable, so... */
-
-    accum0 >>= 56;
-    accum1 >>= 56;
-    c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1));
-    c[0] += ((uint64_t)(accum1));
-}
-
diff --git a/crypto/ec/curve448/p448/arch_ref64/f_impl.h b/crypto/ec/curve448/p448/arch_ref64/f_impl.h
deleted file mode 100644
index 05206bf988..0000000000
--- a/crypto/ec/curve448/p448/arch_ref64/f_impl.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Copyright (c) 2014-2016 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#define GF_HEADROOM 9999 /* Everything is reduced anyway */
-#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}
-    
-#define LIMB_PLACE_VALUE(i) 56
-
-void gf_add_RAW (gf out, const gf a, const gf b) {
-    for (unsigned int i=0; i<8; i++) {
-        out->limb[i] = a->limb[i] + b->limb[i];
-    }
-    gf_weak_reduce(out);
-}
-
-void gf_sub_RAW (gf out, const gf a, const gf b) {
-    uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2;
-    for (unsigned int i=0; i<8; i++) {
-        out->limb[i] = a->limb[i] - b->limb[i] + ((i==4) ? co2 : co1);
-    }
-    gf_weak_reduce(out);
-}
-
-void gf_bias (gf a, int amt) {
-    (void) a;
-    (void) amt;
-}
-
-void gf_weak_reduce (gf a) {
-    uint64_t mask = (1ull<<56) - 1;
-    uint64_t tmp = a->limb[7] >> 56;
-    a->limb[4] += tmp;
-    for (unsigned int i=7; i>0; i--) {
-        a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56);
-    }
-    a->limb[0] = (a->limb[0] & mask) + tmp;
-}
diff --git a/crypto/ec/curve448/p448/arch_x86_64/f_impl.c b/crypto/ec/curve448/p448/arch_x86_64/f_impl.c
deleted file mode 100644
index 1e1d76d617..0000000000
--- a/crypto/ec/curve448/p448/arch_x86_64/f_impl.c
+++ /dev/null
@@ -1,291 +0,0 @@
-/* Copyright (c) 2014 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#include "f_field.h"
-
-void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
-    const uint64_t *a = as->limb, *b = bs->limb;
-    uint64_t *c = cs->limb;
-
-    __uint128_t accum0 = 0, accum1 = 0, accum2;
-    uint64_t mask = (1ull<<56) - 1;  
-
-    uint64_t aa[4] VECTOR_ALIGNED, bb[4] VECTOR_ALIGNED, bbb[4] VECTOR_ALIGNED;
-
-    /* For some reason clang doesn't vectorize this without prompting? */
-    unsigned int i;
-    for (i=0; i<sizeof(aa)/sizeof(uint64xn_t); i++) {
-        ((uint64xn_t*)aa)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)(&a[4]))[i];
-        ((uint64xn_t*)bb)[i] = ((const uint64xn_t*)b)[i] + ((const uint64xn_t*)(&b[4]))[i]; 
-        ((uint64xn_t*)bbb)[i] = ((const uint64xn_t*)bb)[i] + ((const uint64xn_t*)(&b[4]))[i];     
-    }
-    /*
-    for (int i=0; i<4; i++) {
-    aa[i] = a[i] + a[i+4];
-    bb[i] = b[i] + b[i+4];
-    }
-    */
-
-    accum2  = widemul(&a[0],&b[3]);
-    accum0  = widemul(&aa[0],&bb[3]);
-    accum1  = widemul(&a[4],&b[7]);
-
-    mac(&accum2, &a[1], &b[2]);
-    mac(&accum0, &aa[1], &bb[2]);
-    mac(&accum1, &a[5], &b[6]);
-
-    mac(&accum2, &a[2], &b[1]);
-    mac(&accum0, &aa[2], &bb[1]);
-    mac(&accum1, &a[6], &b[5]);
-
-    mac(&accum2, &a[3], &b[0]);
-    mac(&accum0, &aa[3], &bb[0]);
-    mac(&accum1, &a[7], &b[4]);
-
-    accum0 -= accum2;
-    accum1 += accum2;
-
-    c[3] = ((uint64_t)(accum1)) & mask;
-    c[7] = ((uint64_t)(accum0)) & mask;
-
-    accum0 >>= 56;
-    accum1 >>= 56;
-    
-    mac(&accum0, &aa[1],&bb[3]);
-    mac(&accum1, &a[5], &b[7]);
-    mac(&accum0, &aa[2], &bb[2]);
-    mac(&accum1, &a[6], &b[6]);
-    mac(&accum0, &aa[3], &bb[1]);
-    accum1 += accum0;
-
-    accum2 = widemul(&a[0],&b[0]);
-    accum1 -= accum2;
-    accum0 += accum2;
-    
-    msb(&accum0, &a[1], &b[3]);
-    msb(&accum0, &a[2], &b[2]);
-    mac(&accum1, &a[7], &b[5]);
-    msb(&accum0, &a[3], &b[1]);
-    mac(&accum1, &aa[0], &bb[0]);
-    mac(&accum0, &a[4], &b[4]);
-
-    c[0] = ((uint64_t)(accum0)) & mask;
-    c[4] = ((uint64_t)(accum1)) & mask;
-
-    accum0 >>= 56;
-    accum1 >>= 56;
-
-    accum2  = widemul(&a[2],&b[7]);
-    mac(&accum0, &a[6], &bb[3]);
-    mac(&accum1, &aa[2], &bbb[3]);
-
-    mac(&accum2, &a[3], &b[6]);
-    mac(&accum0, &a[7], &bb[2]);
-    mac(&accum1, &aa[3], &bbb[2]);
-
-    mac(&accum2, &a[0],&b[1]);
-    mac(&accum1, &aa[0], &bb[1]);
-    mac(&accum0, &a[4], &b[5]);
-
-    mac(&accum2, &a[1], &b[0]);
-    mac(&accum1, &aa[1], &bb[0]);
-    mac(&accum0, &a[5], &b[4]);
-
-    accum1 -= accum2;
-    accum0 += accum2;
-
-    c[1] = ((uint64_t)(accum0)) & mask;
-    c[5] = ((uint64_t)(accum1)) & mask;
-
-    accum0 >>= 56;
-    accum1 >>= 56;
-
-    accum2  = widemul(&a[3],&b[7]);
-    mac(&accum0, &a[7], &bb[3]);
-    mac(&accum1, &aa[3], &bbb[3]);
-
-    mac(&accum2, &a[0],&b[2]);
-    mac(&accum1, &aa[0], &bb[2]);
-    mac(&accum0, &a[4], &b[6]);
-
-    mac(&accum2, &a[1], &b[1]);
-    mac(&accum1, &aa[1], &bb[1]);
-    mac(&accum0, &a[5], &b[5]);
-
-    mac(&accum2, &a[2], &b[0]);
-    mac(&accum1, &aa[2], &bb[0]);
-    mac(&accum0, &a[6], &b[4]);
-
-    accum1 -= accum2;
-    accum0 += accum2;
-
-    c[2] = ((uint64_t)(accum0)) & mask;
-    c[6] = ((uint64_t)(accum1)) & mask;
-
-    accum0 >>= 56;
-    accum1 >>= 56;
-
-    accum0 += c[3];
-    accum1 += c[7];
-    c[3] = ((uint64_t)(accum0)) & mask;
-    c[7] = ((uint64_t)(accum1)) & mask;
-
-    /* we could almost stop here, but it wouldn't be stable, so... */
-
-    accum0 >>= 56;
-    accum1 >>= 56;
-    c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1));
-    c[0] += ((uint64_t)(accum1));
-}
-
-void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
-    const uint64_t *a = as->limb;
-    uint64_t *c = cs->limb;
-
-    __uint128_t accum0, accum4;
-    uint64_t mask = (1ull<<56) - 1;  
-
-    accum0 = widemul_rm(b, &a[0]);
-    accum4 = widemul_rm(b, &a[4]);
-
-    c[0] = accum0 & mask; accum0 >>= 56;
-    c[4] = accum4 & mask; accum4 >>= 56;
-
-    mac_rm(&accum0, b, &a[1]);
-    mac_rm(&accum4, b, &a[5]);
-
-    c[1] = accum0 & mask; accum0 >>= 56;
-    c[5] = accum4 & mask; accum4 >>= 56;
-
-    mac_rm(&accum0, b, &a[2]);
-    mac_rm(&accum4, b, &a[6]);
-
-    c[2] = accum0 & mask; accum0 >>= 56;
-    c[6] = accum4 & mask; accum4 >>= 56;
-
-    mac_rm(&accum0, b, &a[3]);
-    mac_rm(&accum4, b, &a[7]);
-
-    c[3] = accum0 & mask; accum0 >>= 56;
-    c[7] = accum4 & mask; accum4 >>= 56;
-    
-    accum0 += accum4 + c[4];
-    c[4] = accum0 & mask;
-    c[5] += accum0 >> 56;
-
-    accum4 += c[0];
-    c[0] = accum4 & mask;
-    c[1] += accum4 >> 56;
-}
-
-void gf_sqr (gf_s *__restrict__ cs, const gf as) {
-    const uint64_t *a = as->limb;
-    uint64_t *c = cs->limb;
-
-    __uint128_t accum0 = 0, accum1 = 0, accum2;
-    uint64_t mask = (1ull<<56) - 1;  
-
-    uint64_t aa[4] VECTOR_ALIGNED;
-
-    /* For some reason clang doesn't vectorize this without prompting? */
-    unsigned int i;
-    for (i=0; i<sizeof(aa)/sizeof(uint64xn_t); i++) {
-      ((uint64xn_t*)aa)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)(&a[4]))[i];
-    }
-
-    accum2  = widemul(&a[0],&a[3]);
-    accum0  = widemul(&aa[0],&aa[3]);
-    accum1  = widemul(&a[4],&a[7]);
-
-    mac(&accum2, &a[1], &a[2]);
-    mac(&accum0, &aa[1], &aa[2]);
-    mac(&accum1, &a[5], &a[6]);
-
-    accum0 -= accum2;
-    accum1 += accum2;
-
-    c[3] = ((uint64_t)(accum1))<<1 & mask;
-    c[7] = ((uint64_t)(accum0))<<1 & mask;
-
-    accum0 >>= 55;
-    accum1 >>= 55;
-
-    mac2(&accum0, &aa[1],&aa[3]);
-    mac2(&accum1, &a[5], &a[7]);
-    mac(&accum0, &aa[2], &aa[2]);
-    accum1 += accum0;
-
-    msb2(&accum0, &a[1], &a[3]);
-    mac(&accum1, &a[6], &a[6]);
-    
-    accum2 = widemul(&a[0],&a[0]);
-    accum1 -= accum2;
-    accum0 += accum2;
-
-    msb(&accum0, &a[2], &a[2]);
-    mac(&accum1, &aa[0], &aa[0]);
-    mac(&accum0, &a[4], &a[4]);
-
-    c[0] = ((uint64_t)(accum0)) & mask;
-    c[4] = ((uint64_t)(accum1)) & mask;
-
-    accum0 >>= 56;
-    accum1 >>= 56;
-
-    accum2  = widemul2(&aa[2],&aa[3]);
-    msb2(&accum0, &a[2], &a[3]);
-    mac2(&accum1, &a[6], &a[7]);
-
-    accum1 += accum2;
-    accum0 += accum2;
-
-    accum2  = widemul2(&a[0],&a[1]);
-    mac2(&accum1, &aa[0], &aa[1]);
-    mac2(&accum0, &a[4], &a[5]);
-
-    accum1 -= accum2;
-    accum0 += accum2;
-
-    c[1] = ((uint64_t)(accum0)) & mask;
-    c[5] = ((uint64_t)(accum1)) & mask;
-
-    accum0 >>= 56;
-    accum1 >>= 56;
-
-    accum2  = widemul(&aa[3],&aa[3]);
-    msb(&accum0, &a[3], &a[3]);
-    mac(&accum1, &a[7], &a[7]);
-
-    accum1 += accum2;
-    accum0 += accum2;
-
-    accum2  = widemul2(&a[0],&a[2]);
-    mac2(&accum1, &aa[0], &aa[2]);
-    mac2(&accum0, &a[4], &a[6]);
-
-    mac(&accum2, &a[1], &a[1]);
-    mac(&accum1, &aa[1], &aa[1]);
-    mac(&accum0, &a[5], &a[5]);
-
-    accum1 -= accum2;
-    accum0 += accum2;
-
-    c[2] = ((uint64_t)(accum0)) & mask;
-    c[6] = ((uint64_t)(accum1)) & mask;
-
-    accum0 >>= 56;
-    accum1 >>= 56;
-
-    accum0 += c[3];
-    accum1 += c[7];
-    c[3] = ((uint64_t)(accum0)) & mask;
-    c[7] = ((uint64_t)(accum1)) & mask;
-
-    /* we could almost stop here, but it wouldn't be stable, so... */
-
-    accum0 >>= 56;
-    accum1 >>= 56;
-    c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1));
-    c[0] += ((uint64_t)(accum1));
-}
diff --git a/crypto/ec/curve448/p448/arch_x86_64/f_impl.h b/crypto/ec/curve448/p448/arch_x86_64/f_impl.h
deleted file mode 100644
index a85044a7f4..0000000000
--- a/crypto/ec/curve448/p448/arch_x86_64/f_impl.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Copyright (c) 2014-2016 Cryptography Research, Inc.
- * Released under the MIT License.  See LICENSE.txt for license information.
- */
-
-#define GF_HEADROOM 60
-#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}
-#define LIMB_PLACE_VALUE(i) 56
-
-void gf_add_RAW (gf out, const gf a, const gf b) {
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
-        ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
-    }
-    /*
-    unsigned int i;
-    for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-        out->limb[i] = a->limb[i] + b->limb[i];
-    }
-    */
-}
-
-void gf_sub_RAW (gf out, const gf a, const gf b) {
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
-        ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
-    }
-    /*
-    unsigned int i;
-    for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-        out->limb[i] = a->limb[i] - b->limb[i];
-    }
-    */
-}
-
-void gf_bias (gf a, int amt) {
-    uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt;
-    
-#if __AVX2__
-    uint64x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1};
-    uint64x4_t *aa = (uint64x4_t*) a;
-    aa[0] += lo;
-    aa[1] += hi;
-#elif __SSE2__
-    uint64x2_t lo = {co1,co1}, hi = {co2,co1};
-    uint64x2_t *aa = (uint64x2_t*) a;
-    aa[0] += lo;
-    aa[1] += lo;
-    aa[2] += hi;
-    aa[3] += lo;
-#else
-    for (unsigned int i=0; i<sizeof(*a)/sizeof(uint64_t); i++) {
-        a->limb[i] += (i==4) ? co2 : co1;
-    }
-#endif
-}
-
-void gf_weak_reduce (gf a) {
-    /* PERF: use pshufb/palignr if anyone cares about speed of this */
-    uint64_t mask = (1ull<<56) - 1;
-    uint64_t tmp = a->limb[7] >> 56;
-    a->limb[4] += tmp;
-    for (unsigned int i=7; i>0; i--) {
-        a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56);
-    }
-    a->limb[0] = (a->limb[0] & mask) + tmp;
-}
-
diff --git a/crypto/ec/curve448/p448/f_arithmetic.c b/crypto/ec/curve448/p448/f_arithmetic.c
deleted file mode 100644
index cf68519686..0000000000
--- a/crypto/ec/curve448/p448/f_arithmetic.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * @cond internal
- * @file f_arithmetic.c
- * @copyright
- *   Copyright (c) 2014 Cryptography Research, Inc.  \n
- *   Released under the MIT License.  See LICENSE.txt for license information.
- * @author Mike Hamburg
- * @brief Field-specific arithmetic.
- */
-
-#include "field.h"
-
-mask_t gf_isr (
-    gf a,
-    const gf x
-) {
-    gf L0, L1, L2;
-    gf_sqr  (L1,     x );
-    gf_mul  (L2,     x,   L1 );
-    gf_sqr  (L1,   L2 );
-    gf_mul  (L2,     x,   L1 );
-    gf_sqrn (L1,   L2,     3 );
-    gf_mul  (L0,   L2,   L1 );
-    gf_sqrn (L1,   L0,     3 );
-    gf_mul  (L0,   L2,   L1 );
-    gf_sqrn (L2,   L0,     9 );
-    gf_mul  (L1,   L0,   L2 );
-    gf_sqr  (L0,   L1 );
-    gf_mul  (L2,     x,   L0 );
-    gf_sqrn (L0,   L2,    18 );
-    gf_mul  (L2,   L1,   L0 );
-    gf_sqrn (L0,   L2,    37 );
-    gf_mul  (L1,   L2,   L0 );
-    gf_sqrn (L0,   L1,    37 );
-    gf_mul  (L1,   L2,   L0 );
-    gf_sqrn (L0,   L1,   111 );
-    gf_mul  (L2,   L1,   L0 );
-    gf_sqr  (L0,   L2 );
-    gf_mul  (L1,     x,   L0 );
-    gf_sqrn (L0,   L1,   223 );
-    gf_mul  (L1,   L2,   L0 );
-    gf_sqr  (L2, L1);
-    gf_mul  (L0, L2, x);
-    gf_copy(a,L1);
-    return gf_eq(L0,ONE);
-}
diff --git a/crypto/ec/curve448/portable_endian.h b/crypto/ec/curve448/portable_endian.h
new file mode 100644
index 0000000000..5cbfca7aac
--- /dev/null
+++ b/crypto/ec/curve448/portable_endian.h
@@ -0,0 +1,39 @@
+/* Subset of Mathias PanzenbÃ¶ck's portable endian code, public domain */
+
+#ifndef __PORTABLE_ENDIAN_H__
+#define __PORTABLE_ENDIAN_H__
+
+#if defined(__linux__) || defined(__CYGWIN__)
+#	include <endian.h>
+#elif defined(__OpenBSD__)
+#	include <sys/endian.h>
+#elif defined(__APPLE__)
+#	include <libkern/OSByteOrder.h>
+#	define htole64(x) OSSwapHostToLittleInt64(x)
+#	define le64toh(x) OSSwapLittleToHostInt64(x)
+#elif defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
+#	include <sys/endian.h>
+#	ifndef le64toh
+#		define le64toh(x) letoh64(x)
+#	endif
+#elif defined(__sun) && defined(__SVR4)
+#	include <sys/byteorder.h>
+#	define htole64(x) LE_64(x)
+#	define le64toh(x) LE_64(x)
+#elif defined(_WIN16) || defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
+#	include <winsock2.h>
+#	include <sys/param.h>
+#	if BYTE_ORDER == LITTLE_ENDIAN
+#		define htole64(x) (x)
+#		define le64toh(x) (x)
+#	elif BYTE_ORDER == BIG_ENDIAN
+#		define htole64(x) __builtin_bswap64(x)
+#		define le64toh(x) __builtin_bswap64(x)
+#	else
+#		error byte order not supported
+#	endif
+#else
+#	error platform not supported
+#endif
+
+#endif // __PORTABLE_ENDIAN_H__
diff --git a/crypto/ec/curve448/scalar.c b/crypto/ec/curve448/scalar.c
new file mode 100644
index 0000000000..1c98ac91d4
--- /dev/null
+++ b/crypto/ec/curve448/scalar.c
@@ -0,0 +1,341 @@
+/**
+ * @file ed448goldilocks/scalar.c
+ * @author Mike Hamburg
+ *
+ * @copyright
+ *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
+ *   Released under the MIT License.  See LICENSE.txt for license information.
+ *
+ * @brief Decaf high-level functions.
+ *
+ * @warning This file was automatically generated in Python.
+ * Please do not edit it.
+ */
+#include "word.h"
+#include "constant_time.h"
+#include <decaf.h>
+
+/* Template stuff */
+#define API_NS(_id) decaf_448_##_id
+#define SCALAR_BITS DECAF_448_SCALAR_BITS
+#define SCALAR_SER_BYTES DECAF_448_SCALAR_BYTES
+#define SCALAR_LIMBS DECAF_448_SCALAR_LIMBS
+#define scalar_t API_NS(scalar_t)
+
+static const decaf_word_t MONTGOMERY_FACTOR = (decaf_word_t)0x3bd440fae918bc5ull;
+static const scalar_t sc_p = {{{
+    SC_LIMB(0x2378c292ab5844f3), SC_LIMB(0x216cc2728dc58f55), SC_LIMB(0xc44edb49aed63690), SC_LIMB(0xffffffff7cca23e9), SC_LIMB(0xffffffffffffffff), SC_LIMB(0xffffffffffffffff), SC_LIMB(0x3fffffffffffffff)
+}}}, sc_r2 = {{{
+    SC_LIMB(0xe3539257049b9b60), SC_LIMB(0x7af32c4bc1b195d9), SC_LIMB(0x0d66de2388ea1859), SC_LIMB(0xae17cf725ee4d838), SC_LIMB(0x1a9cc14ba3c47c44), SC_LIMB(0x2052bcb7e4d070af), SC_LIMB(0x3402a939f823b729)
+}}};
+/* End of template stuff */
+
+#define WBITS DECAF_WORD_BITS /* NB this may be different from ARCH_WORD_BITS */
+
+const scalar_t API_NS(scalar_one) = {{{1}}}, API_NS(scalar_zero) = {{{0}}};
+
+/** {extra,accum} - sub +? p
+ * Must have extra <= 1
+ */
+static DECAF_NOINLINE void sc_subx(
+    scalar_t out,
+    const decaf_word_t accum[SCALAR_LIMBS],
+    const scalar_t sub,
+    const scalar_t p,
+    decaf_word_t extra
+) {
+    decaf_dsword_t chain = 0;
+    unsigned int i;
+    for (i=0; i<SCALAR_LIMBS; i++) {
+        chain = (chain + accum[i]) - sub->limb[i];
+        out->limb[i] = chain;
+        chain >>= WBITS;
+    }
+    decaf_word_t borrow = chain+extra; /* = 0 or -1 */
+    
+    chain = 0;
+    for (i=0; i<SCALAR_LIMBS; i++) {
+        chain = (chain + out->limb[i]) + (p->limb[i] & borrow);
+        out->limb[i] = chain;
+        chain >>= WBITS;
+    }
+}
+
+static DECAF_NOINLINE void sc_montmul (
+    scalar_t out,
+    const scalar_t a,
+    const scalar_t b
+) {
+    unsigned int i,j;
+    decaf_word_t accum[SCALAR_LIMBS+1] = {0};
+    decaf_word_t hi_carry = 0;
+    
+    for (i=0; i<SCALAR_LIMBS; i++) {
+        decaf_word_t mand = a->limb[i];
+        const decaf_word_t *mier = b->limb;
+        
+        decaf_dword_t chain = 0;
+        for (j=0; j<SCALAR_LIMBS; j++) {
+            chain += ((decaf_dword_t)mand)*mier[j] + accum[j];
+            accum[j] = chain;
+            chain >>= WBITS;
+        }
+        accum[j] = chain;
+        
+        mand = accum[0] * MONTGOMERY_FACTOR;
+        chain = 0;
+        mier = sc_p->limb;
+        for (j=0; j<SCALAR_LIMBS; j++) {
+            chain += (decaf_dword_t)mand*mier[j] + accum[j];
+            if (j) accum[j-1] = chain;
+            chain >>= WBITS;
+        }
+        chain += accum[j];
+        chain += hi_carry;
+        accum[j-1] = chain;
+        hi_carry = chain >> WBITS;
+    }
+    
+    sc_subx(out, accum, sc_p, sc_p, hi_carry);
+}
+
+void API_NS(scalar_mul) (
+    scalar_t out,
+    const scalar_t a,
+    const scalar_t b
+) {
+    sc_montmul(out,a,b);
+    sc_montmul(out,out,sc_r2);
+}
+
+/* PERF: could implement this */
+static DECAF_INLINE void sc_montsqr (scalar_t out, const scalar_t a) {
+    sc_montmul(out,a,a);
+}
+
+decaf_error_t API_NS(scalar_invert) (
+    scalar_t out,
+    const scalar_t a
+) {
+    /* Fermat's little theorem, sliding window.
+     * Sliding window is fine here because the modulus isn't secret.
+     */
+    const int SCALAR_WINDOW_BITS = 3;
+    scalar_t precmp[1<<SCALAR_WINDOW_BITS];
+    const int LAST = (1<<SCALAR_WINDOW_BITS)-1;
+
+    /* Precompute precmp = [a^1,a^3,...] */
+    sc_montmul(precmp[0],a,sc_r2);
+    if (LAST > 0) sc_montmul(precmp[LAST],precmp[0],precmp[0]);
+
+    int i;
+    for (i=1; i<=LAST; i++) {
+        sc_montmul(precmp[i],precmp[i-1],precmp[LAST]);
+    }
+    
+    /* Sliding window */
+    unsigned residue = 0, trailing = 0, started = 0;
+    for (i=SCALAR_BITS-1; i>=-SCALAR_WINDOW_BITS; i--) {
+        
+        if (started) sc_montsqr(out,out);
+        
+        decaf_word_t w = (i>=0) ? sc_p->limb[i/WBITS] : 0;
+        if (i >= 0 && i<WBITS) {
+            assert(w >= 2);
+            w-=2;
+        }
+        
+        residue = (residue<<1) | ((w>>(i%WBITS))&1);
+        if (residue>>SCALAR_WINDOW_BITS != 0) {
+            assert(trailing == 0);
+            trailing = residue;
+            residue = 0;
+        }
+        
+        if (trailing > 0 && (trailing & ((1<<SCALAR_WINDOW_BITS)-1)) == 0) {
+            if (started) {
+                sc_montmul(out,out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]);
+            } else {
+                API_NS(scalar_copy)(out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]);
+                started = 1;
+            }
+            trailing = 0;
+        }
+        trailing <<= 1;
+        
+    }
+    assert(residue==0);
+    assert(trailing==0);
+    
+    /* Demontgomerize */
+    sc_montmul(out,out,API_NS(scalar_one));
+    decaf_bzero(precmp, sizeof(precmp));
+    return decaf_succeed_if(~API_NS(scalar_eq)(out,API_NS(scalar_zero)));
+}
+
+void API_NS(scalar_sub) (
+    scalar_t out,
+    const scalar_t a,
+    const scalar_t b
+) {
+    sc_subx(out, a->limb, b, sc_p, 0);
+}
+
+void API_NS(scalar_add) (
+    scalar_t out,
+    const scalar_t a,
+    const scalar_t b
+) {
+    decaf_dword_t chain = 0;
+    unsigned int i;
+    for (i=0; i<SCALAR_LIMBS; i++) {
+        chain = (chain + a->limb[i]) + b->limb[i];
+        out->limb[i] = chain;
+        chain >>= WBITS;
+    }
+    sc_subx(out, out->limb, sc_p, sc_p, chain);
+}
+
+void
+API_NS(scalar_set_unsigned) (
+    scalar_t out,
+    uint64_t w
+) {
+    memset(out,0,sizeof(scalar_t));
+    unsigned int i = 0;
+    for (; i<sizeof(uint64_t)/sizeof(decaf_word_t); i++) {
+        out->limb[i] = w;
+#if DECAF_WORD_BITS < 64
+        w >>= 8*sizeof(decaf_word_t);
+#endif
+    }
+}
+
+decaf_bool_t
+API_NS(scalar_eq) (
+    const scalar_t a,
+    const scalar_t b
+) {
+    decaf_word_t diff = 0;
+    unsigned int i;
+    for (i=0; i<SCALAR_LIMBS; i++) {
+        diff |= a->limb[i] ^ b->limb[i];
+    }
+    return mask_to_bool(word_is_zero(diff));
+}
+
+static DECAF_INLINE void scalar_decode_short (
+    scalar_t s,
+    const unsigned char *ser,
+    unsigned int nbytes
+) {
+    unsigned int i,j,k=0;
+    for (i=0; i<SCALAR_LIMBS; i++) {
+        decaf_word_t out = 0;
+        for (j=0; j<sizeof(decaf_word_t) && k<nbytes; j++,k++) {
+            out |= ((decaf_word_t)ser[k])<<(8*j);
+        }
+        s->limb[i] = out;
+    }
+}
+
+decaf_error_t API_NS(scalar_decode)(
+    scalar_t s,
+    const unsigned char ser[SCALAR_SER_BYTES]
+) {
+    unsigned int i;
+    scalar_decode_short(s, ser, SCALAR_SER_BYTES);
+    decaf_dsword_t accum = 0;
+    for (i=0; i<SCALAR_LIMBS; i++) {
+        accum = (accum + s->limb[i] - sc_p->limb[i]) >> WBITS;
+    }
+    /* Here accum == 0 or -1 */
+    
+    API_NS(scalar_mul)(s,s,API_NS(scalar_one)); /* ham-handed reduce */
+    
+    return decaf_succeed_if(~word_is_zero(accum));
+}
+
+void API_NS(scalar_destroy) (
+    scalar_t scalar
+) {
+    decaf_bzero(scalar, sizeof(scalar_t));
+}
+
+void API_NS(scalar_decode_long)(
+    scalar_t s,
+    const unsigned char *ser,
+    size_t ser_len
+) {
+    if (ser_len == 0) {
+        API_NS(scalar_copy)(s, API_NS(scalar_zero));
+        return;
+    }
+    
+    size_t i;
+    scalar_t t1, t2;
+
+    i = ser_len - (ser_len%SCALAR_SER_BYTES);
+    if (i==ser_len) i -= SCALAR_SER_BYTES;
+    
+    scalar_decode_short(t1, &ser[i], ser_len-i);
+
+    if (ser_len == sizeof(scalar_t)) {
+        assert(i==0);
+        /* ham-handed reduce */
+        API_NS(scalar_mul)(s,t1,API_NS(scalar_one));
+        API_NS(scalar_destroy)(t1);
+        return;
+    }
+
+    while (i) {
+        i -= SCALAR_SER_BYTES;
+        sc_montmul(t1,t1,sc_r2);
+        ignore_result( API_NS(scalar_decode)(t2, ser+i) );
+        API_NS(scalar_add)(t1, t1, t2);
+    }
+
+    API_NS(scalar_copy)(s, t1);
+    API_NS(scalar_destroy)(t1);
+    API_NS(scalar_destroy)(t2);
+}
+
+void API_NS(scalar_encode)(
+    unsigned char ser[SCALAR_SER_BYTES],
+    const scalar_t s
+) {
+    unsigned int i,j,k=0;
+    for (i=0; i<SCALAR_LIMBS; i++) {
+        for (j=0; j<sizeof(decaf_word_t); j++,k++) {
+            ser[k] = s->limb[i] >> (8*j);
+        }
+    }
+}
+
+void API_NS(scalar_cond_sel) (
+    scalar_t out,
+    const scalar_t a,
+    const scalar_t b,
+    decaf_bool_t pick_b
+) {
+    constant_time_select(out,a,b,sizeof(scalar_t),bool_to_mask(pick_b),sizeof(out->limb[0]));
+}
+
+void API_NS(scalar_halve) (
+    scalar_t out,
+    const scalar_t a
+) {
+    decaf_word_t mask = -(a->limb[0] & 1);
+    decaf_dword_t chain = 0;
+    unsigned int i;
+    for (i=0; i<SCALAR_LIMBS; i++) {
+        chain = (chain + a->limb[i]) + (sc_p->limb[i] & mask);
+        out->limb[i] = chain;
+        chain >>= DECAF_WORD_BITS;
+    }
+    for (i=0; i<SCALAR_LIMBS-1; i++) {
+        out->limb[i] = out->limb[i]>>1 | out->limb[i+1]<<(WBITS-1);
+    }
+    out->limb[i] = out->limb[i]>>1 | chain<<(WBITS-1);
+}
+
diff --git a/crypto/ec/curve448/word.h b/crypto/ec/curve448/word.h
new file mode 100644
index 0000000000..7c7644ad2c
--- /dev/null
+++ b/crypto/ec/curve448/word.h
@@ -0,0 +1,281 @@
+/* Copyright (c) 2014 Cryptography Research, Inc.
+ * Released under the MIT License.  See LICENSE.txt for license information.
+ */
+
+#ifndef __WORD_H__
+#define __WORD_H__
+
+/* for posix_memalign */
+#define _XOPEN_SOURCE 600
+#define __STDC_WANT_LIB_EXT1__ 1 /* for memset_s */
+#include <string.h>
+#if defined(__sun) && defined(__SVR4)
+extern int posix_memalign(void **, size_t, size_t);
+#endif
+
+#include <assert.h>
+#include <stdint.h>
+#include "arch_intrinsics.h"
+
+#include <decaf/common.h>
+
+#ifndef _BSD_SOURCE
+#define _BSD_SOURCE 1
+#endif
+
+#ifndef _DEFAULT_SOURCE
+#define _DEFAULT_SOURCE 1
+#endif
+
+#include "portable_endian.h"
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <inttypes.h>
+
+#if defined(__ARM_NEON__)
+#include <arm_neon.h>
+#elif defined(__SSE2__)
+    #if !defined(__GNUC__) || __clang__ || __GNUC__ >= 5 || (__GNUC__==4 && __GNUC_MINOR__ >= 4)
+        #include <immintrin.h>
+    #else
+        #include <emmintrin.h>
+    #endif
+#endif
+
+#if (ARCH_WORD_BITS == 64)
+    typedef uint64_t word_t, mask_t;
+    typedef __uint128_t dword_t;
+    typedef int32_t hsword_t;
+    typedef int64_t sword_t;
+    typedef __int128_t dsword_t;
+#elif (ARCH_WORD_BITS == 32)
+    typedef uint32_t word_t, mask_t;
+    typedef uint64_t dword_t;
+    typedef int16_t hsword_t;
+    typedef int32_t sword_t;
+    typedef int64_t dsword_t;
+#else
+    #error "For now, libdecaf only supports 32- and 64-bit architectures."
+#endif
+    
+/* Scalar limbs are keyed off of the API word size instead of the arch word size. */
+#if DECAF_WORD_BITS == 64
+    #define SC_LIMB(x) (x##ull)
+#elif DECAF_WORD_BITS == 32
+    #define SC_LIMB(x) ((uint32_t)x##ull),(x##ull>>32)
+#else
+    #error "For now, libdecaf only supports 32- and 64-bit architectures."
+#endif
+
+#ifdef __ARM_NEON__
+    typedef uint32x4_t vecmask_t;
+#elif __clang__
+    typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2)));
+    typedef int64_t  int64x2_t __attribute__((ext_vector_type(2)));
+    typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4)));
+    typedef int64_t  int64x4_t __attribute__((ext_vector_type(4)));
+    typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4)));
+    typedef int32_t  int32x4_t __attribute__((ext_vector_type(4)));
+    typedef uint32_t uint32x2_t __attribute__((ext_vector_type(2)));
+    typedef int32_t  int32x2_t __attribute__((ext_vector_type(2)));
+    typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8)));
+    typedef int32_t  int32x8_t __attribute__((ext_vector_type(8)));
+    typedef word_t vecmask_t __attribute__((ext_vector_type(4)));
+#else /* GCC, hopefully? */
+    typedef uint64_t uint64x2_t __attribute__((vector_size(16)));
+    typedef int64_t  int64x2_t __attribute__((vector_size(16)));
+    typedef uint64_t uint64x4_t __attribute__((vector_size(32)));
+    typedef int64_t  int64x4_t __attribute__((vector_size(32)));
+    typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
+    typedef int32_t  int32x4_t __attribute__((vector_size(16)));
+    typedef uint32_t uint32x2_t __attribute__((vector_size(8)));
+    typedef int32_t  int32x2_t __attribute__((vector_size(8)));
+    typedef uint32_t uint32x8_t __attribute__((vector_size(32)));
+    typedef int32_t  int32x8_t __attribute__((vector_size(32)));
+    typedef word_t vecmask_t __attribute__((vector_size(32)));
+#endif
+
+#if __AVX2__
+    #define VECTOR_ALIGNED __attribute__((aligned(32)))
+    typedef uint32x8_t big_register_t;
+    typedef uint64x4_t uint64xn_t;
+    typedef uint32x8_t uint32xn_t;
+
+    static DECAF_INLINE big_register_t
+    br_set_to_mask(mask_t x) {
+        uint32_t y = (uint32_t)x;
+        big_register_t ret = {y,y,y,y,y,y,y,y};
+        return ret;
+    }
+#elif __SSE2__
+    #define VECTOR_ALIGNED __attribute__((aligned(16)))
+    typedef uint32x4_t big_register_t;
+    typedef uint64x2_t uint64xn_t;
+    typedef uint32x4_t uint32xn_t;
+
+    static DECAF_INLINE big_register_t
+    br_set_to_mask(mask_t x) {
+        uint32_t y = x;
+        big_register_t ret = {y,y,y,y};
+        return ret;
+    }
+#elif __ARM_NEON__
+    #define VECTOR_ALIGNED __attribute__((aligned(16)))
+    typedef uint32x4_t big_register_t;
+    typedef uint64x2_t uint64xn_t;
+    typedef uint32x4_t uint32xn_t;
+    
+    static DECAF_INLINE big_register_t
+    br_set_to_mask(mask_t x) {
+        return vdupq_n_u32(x);
+    }
+#elif _WIN64 || __amd64__ || __X86_64__ || __aarch64__
+    #define VECTOR_ALIGNED __attribute__((aligned(8)))
+    typedef uint64_t big_register_t, uint64xn_t;
+
+    typedef uint32_t uint32xn_t;
+    static DECAF_INLINE big_register_t
+    br_set_to_mask(mask_t x) {
+        return (big_register_t)x;
+    }
+#else
+    #define VECTOR_ALIGNED __attribute__((aligned(4)))
+    typedef uint64_t uint64xn_t;
+    typedef uint32_t uint32xn_t;
+    typedef uint32_t big_register_t;
+
+    static DECAF_INLINE big_register_t
+    br_set_to_mask(mask_t x) {
+        return (big_register_t)x;
+    }
+#endif
+
+typedef struct {
+    uint64xn_t unaligned;
+} __attribute__((packed)) unaligned_uint64xn_t;
+
+typedef struct {
+    uint32xn_t unaligned;
+} __attribute__((packed)) unaligned_uint32xn_t;
+
+#if __AVX2__
+    static DECAF_INLINE big_register_t
+    br_is_zero(big_register_t x) {
+        return (big_register_t)(x == br_set_to_mask(0));
+    }
+#elif __SSE2__
+    static DECAF_INLINE big_register_t
+    br_is_zero(big_register_t x) {
+        return (big_register_t)_mm_cmpeq_epi32((__m128i)x, _mm_setzero_si128());
+        //return (big_register_t)(x == br_set_to_mask(0));
+    }
+#elif __ARM_NEON__
+    static DECAF_INLINE big_register_t
+    br_is_zero(big_register_t x) {
+        return vceqq_u32(x,x^x);
+    }
+#else
+    #define br_is_zero word_is_zero
+#endif
+
+/**
+ * Really call memset, in a way that prevents the compiler from optimizing it out.
+ * @param p The object to zeroize.
+ * @param c The char to set it to (probably zero).
+ * @param s The size of the object.
+ */
+#if defined(__DARWIN_C_LEVEL) || defined(__STDC_LIB_EXT1__)
+#define HAS_MEMSET_S
+#endif
+
+#if !defined(__STDC_WANT_LIB_EXT1__) || __STDC_WANT_LIB_EXT1__ != 1
+#define NEED_MEMSET_S_EXTERN
+#endif
+
+#ifdef HAS_MEMSET_S
+    #ifdef NEED_MEMSET_S_EXTERN
+        extern int memset_s(void *, size_t, int, size_t);
+    #endif
+    static DECAF_INLINE void
+    really_memset(void *p, char c, size_t s) {
+        memset_s(p, s, c, s);
+    }
+#else
+    /* PERF: use words? */
+    static DECAF_INLINE void
+    really_memset(void *p, char c, size_t s) {
+        volatile char *pv = (volatile char *)p;
+        size_t i;
+        for (i=0; i<s; i++) pv[i] = c;
+    }
+#endif
+
+/**
+ * Allocate memory which is sufficiently aligned to be used for the
+ * largest vector on the system (for now that's a big_register_t).
+ *
+ * Man malloc says that it does this, but at least for AVX2 on MacOS X,
+ * it's lying.
+ *
+ * @param size The size of the region to allocate.
+ * @return A suitable pointer, which can be free'd with free(),
+ * or NULL if no memory can be allocated.
+ */
+static DECAF_INLINE void *
+malloc_vector(size_t size) {
+    void *out = NULL;
+    
+    int ret = posix_memalign(&out, sizeof(big_register_t), size);
+    
+    if (ret) {
+        return NULL;
+    } else {
+        return out;
+    }
+}
+
+/* PERF: vectorize vs unroll */
+#ifdef __clang__
+#if 100*__clang_major__ + __clang_minor__ > 305
+#define UNROLL _Pragma("clang loop unroll(full)")
+#endif
+#endif
+
+#ifndef UNROLL
+#define UNROLL
+#endif
+
+/* The plan on booleans:
+ *
+ * The external interface uses decaf_bool_t, but this might be a different
+ * size than our particular arch's word_t (and thus mask_t).  Also, the caller
+ * isn't guaranteed to pass it as nonzero.  So bool_to_mask converts word sizes
+ * and checks nonzero.
+ *
+ * On the flip side, mask_t is always -1 or 0, but it might be a different size
+ * than decaf_bool_t.
+ *
+ * On the third hand, we have success vs boolean types, but that's handled in
+ * common.h: it converts between decaf_bool_t and decaf_error_t.
+ */
+static DECAF_INLINE decaf_bool_t mask_to_bool (mask_t m) {
+    return (decaf_sword_t)(sword_t)m;
+}
+
+static DECAF_INLINE mask_t bool_to_mask (decaf_bool_t m) {
+    /* On most arches this will be optimized to a simple cast. */
+    mask_t ret = 0;
+    unsigned int limit = sizeof(decaf_bool_t)/sizeof(mask_t);
+    if (limit < 1) limit = 1;
+    for (unsigned int i=0; i<limit; i++) {
+        ret |= ~ word_is_zero(m >> (i*8*sizeof(word_t)));
+    }
+    return ret;
+}
+
+static DECAF_INLINE void ignore_result ( decaf_bool_t boo ) {
+    (void)boo;
+}
+
+#endif /* __WORD_H__ */