From a091e212fc55244fe03a4c7db7d8978c5b5014cb Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Wed, 27 Jun 2018 12:16:52 +0200 Subject: [PATCH] evp/e_chacha20_poly1305.c: improve performance for short TLS records. Inputs not longer than 64 bytes are processed ~10% faster, longer lengths not divisble by 64, e.g. 255, up to ~20%. Unfortunately it's impossible to measure with apps/speed.c, -aead benchmarks TLS-like call sequence, but not exact. It took specially crafted code path... Reviewed-by: Rich Salz (Merged from https://github.com/openssl/openssl/pull/6597) --- crypto/evp/e_chacha20_poly1305.c | 158 +++++++++++++++++++++++++++++-- 1 file changed, 148 insertions(+), 10 deletions(-) diff --git a/crypto/evp/e_chacha20_poly1305.c b/crypto/evp/e_chacha20_poly1305.c index 9bf98f16a5..47d5e50d45 100644 --- a/crypto/evp/e_chacha20_poly1305.c +++ b/crypto/evp/e_chacha20_poly1305.c @@ -150,6 +150,7 @@ typedef struct { EVP_CHACHA_KEY key; unsigned int nonce[12/4]; unsigned char tag[POLY1305_BLOCK_SIZE]; + unsigned char tls_aad[POLY1305_BLOCK_SIZE]; struct { uint64_t aad, text; } len; int aad, mac_inited, tag_len, nonce_len; size_t tls_payload_length; @@ -179,7 +180,8 @@ static int chacha20_poly1305_init_key(EVP_CIPHER_CTX *ctx, /* pad on the left */ if (actx->nonce_len <= CHACHA_CTR_SIZE) - memcpy(temp + CHACHA_CTR_SIZE - actx->nonce_len, iv, actx->nonce_len); + memcpy(temp + CHACHA_CTR_SIZE - actx->nonce_len, iv, + actx->nonce_len); chacha_init_key(ctx, inkey, temp, enc); @@ -193,23 +195,158 @@ static int chacha20_poly1305_init_key(EVP_CIPHER_CTX *ctx, return 1; } +# if !defined(OPENSSL_SMALL_FOOTPRINT) +static const unsigned char zero[2 * CHACHA_BLK_SIZE] = { 0 }; + +static int chacha20_poly1305_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + EVP_CHACHA_AEAD_CTX *actx = aead_data(ctx); + size_t i, tail, tohash_len, plen = actx->tls_payload_length; + unsigned char *buf, *tohash, *ctr, storage[2 * CHACHA_BLK_SIZE + 32]; + + if (len != plen + POLY1305_BLOCK_SIZE) + return -1; + + buf = storage + ((0 - (size_t)storage) & 15); /* align */ + ctr = buf + CHACHA_BLK_SIZE; + tohash = buf + CHACHA_BLK_SIZE - POLY1305_BLOCK_SIZE; + + if (plen <= CHACHA_BLK_SIZE) { + actx->key.counter[0] = 0; + ChaCha20_ctr32(buf, zero, 2 * CHACHA_BLK_SIZE, actx->key.key.d, + actx->key.counter); + Poly1305_Init(POLY1305_ctx(actx), buf); + actx->key.partial_len = 0; + memcpy(tohash, actx->tls_aad, POLY1305_BLOCK_SIZE); + tohash_len = POLY1305_BLOCK_SIZE; + actx->len.aad = EVP_AEAD_TLS1_AAD_LEN; + actx->len.text = plen; + + if (ctx->encrypt) { + for (i = 0; i < plen; i++) { + out[i] = ctr[i] ^= in[i]; + } + } else { + for (i = 0; i < plen; i++) { + unsigned char c = in[i]; + out[i] = ctr[i] ^ c; + ctr[i] = c; + } + } + + in += i; + out += i; + + tail = (0 - i) & (POLY1305_BLOCK_SIZE - 1); + memset(ctr + i, 0, tail); + ctr += i + tail; + tohash_len += i + tail; + } else { + actx->key.counter[0] = 0; + ChaCha20_ctr32(buf, zero, CHACHA_BLK_SIZE, actx->key.key.d, + actx->key.counter); + Poly1305_Init(POLY1305_ctx(actx), buf); + actx->key.counter[0] = 1; + actx->key.partial_len = 0; + Poly1305_Update(POLY1305_ctx(actx), actx->tls_aad, POLY1305_BLOCK_SIZE); + tohash = ctr; + tohash_len = 0; + actx->len.aad = EVP_AEAD_TLS1_AAD_LEN; + actx->len.text = plen; + + if (ctx->encrypt) { + ChaCha20_ctr32(out, in, plen, actx->key.key.d, actx->key.counter); + Poly1305_Update(POLY1305_ctx(actx), out, plen); + } else { + Poly1305_Update(POLY1305_ctx(actx), in, plen); + ChaCha20_ctr32(out, in, plen, actx->key.key.d, actx->key.counter); + } + + in += plen; + out += plen; + tail = (0 - plen) & (POLY1305_BLOCK_SIZE - 1); + Poly1305_Update(POLY1305_ctx(actx), zero, tail); + } + + { + const union { + long one; + char little; + } is_endian = { 1 }; + + if (is_endian.little) { + memcpy(ctr, (unsigned char *)&actx->len, POLY1305_BLOCK_SIZE); + } else { + ctr[0] = (unsigned char)(actx->len.aad); + ctr[1] = (unsigned char)(actx->len.aad>>8); + ctr[2] = (unsigned char)(actx->len.aad>>16); + ctr[3] = (unsigned char)(actx->len.aad>>24); + ctr[4] = (unsigned char)(actx->len.aad>>32); + ctr[5] = (unsigned char)(actx->len.aad>>40); + ctr[6] = (unsigned char)(actx->len.aad>>48); + ctr[7] = (unsigned char)(actx->len.aad>>56); + + ctr[8] = (unsigned char)(actx->len.text); + ctr[9] = (unsigned char)(actx->len.text>>8); + ctr[10] = (unsigned char)(actx->len.text>>16); + ctr[11] = (unsigned char)(actx->len.text>>24); + ctr[12] = (unsigned char)(actx->len.text>>32); + ctr[13] = (unsigned char)(actx->len.text>>40); + ctr[14] = (unsigned char)(actx->len.text>>48); + ctr[15] = (unsigned char)(actx->len.text>>56); + } + tohash_len += POLY1305_BLOCK_SIZE; + } + + Poly1305_Update(POLY1305_ctx(actx), tohash, tohash_len); + OPENSSL_cleanse(buf, 2 * CHACHA_BLK_SIZE); + Poly1305_Final(POLY1305_ctx(actx), ctx->encrypt ? actx->tag + : tohash); + + actx->tls_payload_length = NO_TLS_PAYLOAD_LENGTH; + + if (ctx->encrypt) { + memcpy(out, actx->tag, POLY1305_BLOCK_SIZE); + } else { + if (CRYPTO_memcmp(tohash, in, POLY1305_BLOCK_SIZE)) { + memset(out - (len - POLY1305_BLOCK_SIZE), 0, + len - POLY1305_BLOCK_SIZE); + return -1; + } + } + + return len; +} +# else +static const unsigned char zero[CHACHA_BLK_SIZE] = { 0 }; +# endif + static int chacha20_poly1305_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len) { EVP_CHACHA_AEAD_CTX *actx = aead_data(ctx); size_t rem, plen = actx->tls_payload_length; - static const unsigned char zero[POLY1305_BLOCK_SIZE] = { 0 }; if (!actx->mac_inited) { +# if !defined(OPENSSL_SMALL_FOOTPRINT) + if (plen != NO_TLS_PAYLOAD_LENGTH && out != NULL) + return chacha20_poly1305_tls_cipher(ctx, out, in, len); +# endif actx->key.counter[0] = 0; - memset(actx->key.buf, 0, sizeof(actx->key.buf)); - ChaCha20_ctr32(actx->key.buf, actx->key.buf, CHACHA_BLK_SIZE, + ChaCha20_ctr32(actx->key.buf, zero, CHACHA_BLK_SIZE, actx->key.key.d, actx->key.counter); Poly1305_Init(POLY1305_ctx(actx), actx->key.buf); actx->key.counter[0] = 1; actx->key.partial_len = 0; actx->len.aad = actx->len.text = 0; actx->mac_inited = 1; + if (plen != NO_TLS_PAYLOAD_LENGTH) { + Poly1305_Update(POLY1305_ctx(actx), actx->tls_aad, + EVP_AEAD_TLS1_AAD_LEN); + actx->len.aad = EVP_AEAD_TLS1_AAD_LEN; + actx->aad = 1; + } } if (in) { /* aad or text */ @@ -341,6 +478,7 @@ static int chacha20_poly1305_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, actx->tag_len = 0; actx->nonce_len = 12; actx->tls_payload_length = NO_TLS_PAYLOAD_LENGTH; + memset(actx->tls_aad, 0, POLY1305_BLOCK_SIZE); return 1; case EVP_CTRL_COPY: @@ -393,18 +531,18 @@ static int chacha20_poly1305_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, return 0; { unsigned int len; - unsigned char *aad = ptr, temp[POLY1305_BLOCK_SIZE]; + unsigned char *aad = ptr; + memcpy(actx->tls_aad, ptr, EVP_AEAD_TLS1_AAD_LEN); len = aad[EVP_AEAD_TLS1_AAD_LEN - 2] << 8 | aad[EVP_AEAD_TLS1_AAD_LEN - 1]; + aad = actx->tls_aad; if (!ctx->encrypt) { if (len < POLY1305_BLOCK_SIZE) return 0; len -= POLY1305_BLOCK_SIZE; /* discount attached tag */ - memcpy(temp, aad, EVP_AEAD_TLS1_AAD_LEN - 2); - aad = temp; - temp[EVP_AEAD_TLS1_AAD_LEN - 2] = (unsigned char)(len >> 8); - temp[EVP_AEAD_TLS1_AAD_LEN - 1] = (unsigned char)len; + aad[EVP_AEAD_TLS1_AAD_LEN - 2] = (unsigned char)(len >> 8); + aad[EVP_AEAD_TLS1_AAD_LEN - 1] = (unsigned char)len; } actx->tls_payload_length = len; @@ -415,7 +553,7 @@ static int chacha20_poly1305_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, actx->key.counter[2] = actx->nonce[1] ^ CHACHA_U8TOU32(aad); actx->key.counter[3] = actx->nonce[2] ^ CHACHA_U8TOU32(aad+4); actx->mac_inited = 0; - chacha20_poly1305_cipher(ctx, NULL, aad, EVP_AEAD_TLS1_AAD_LEN); + return POLY1305_BLOCK_SIZE; /* tag length */ } -- 2.25.1