From 53eb05bdf00d7237e3b12976c2ac38d68206eb13 Mon Sep 17 00:00:00 2001 From: Patrick Steuer Date: Fri, 15 Nov 2019 23:27:09 +0100 Subject: [PATCH] AES CTR-DRGB: performance improvement Optimize the the AES-based implementation of the CTR_DRBG construction, see 10.2.1 in [1]. Due to the optimizations, the code may deviate (more) from the pseudocode in [1], but it is functional equivalence being decisive for compliance: "All DRBG mechanisms and algorithms are described in this document in pseudocode, which is intended to explain functionality. The pseudocode is not intended to constrain real-world implementations." [9 in [1]]. The following optimizations are done: - Replace multiple plain AES encryptions by a single AES-ECB encryption of a corresponding pre-initialized buffer, where possible. This allows platform-specific AES-ECB support to be used and reduces the overhead of multiple EVP calls. - Replace the generate operation loop (which is a counter increment followed by a plain AES encryption) by a loop which does a plain AES encryption followed by a counter increment. The latter loop is just a description of AES-CTR, so we replace it by a single AES-CTR encryption. This allows for platform-specific AES-CTR support to be used and reduces the overhead of multiple EVP calls. This change, that is, going from a pre- to a post- counter increment, requires the counter in the internal state to be kept at "+1" (compared to the pseudocode in [1]) such that it is in the correct state, when a generate operation is called. That in turn also requires all other operations to be changed from pre- to post-increment to keep functional equivalence. [1] NIST SP 800-90A Revision 1 Signed-off-by: Patrick Steuer Reviewed-by: Tomas Mraz (cherry picked from commit 28bdbe1aaa474ae8cd83e520d02e463e46ce89d9) Reviewed-by: Bernd Edlinger (Merged from https://github.com/openssl/openssl/pull/11968) --- crypto/rand/drbg_ctr.c | 198 ++++++++++++++++++++++++--------------- crypto/rand/rand_local.h | 6 +- 2 files changed, 129 insertions(+), 75 deletions(-) diff --git a/crypto/rand/drbg_ctr.c b/crypto/rand/drbg_ctr.c index 0f0ad1b37b..89c9ccc876 100644 --- a/crypto/rand/drbg_ctr.c +++ b/crypto/rand/drbg_ctr.c @@ -63,15 +63,15 @@ static void ctr_XOR(RAND_DRBG_CTR *ctr, const unsigned char *in, size_t inlen) * Process a complete block using BCC algorithm of SP 800-90A 10.3.3 */ __owur static int ctr_BCC_block(RAND_DRBG_CTR *ctr, unsigned char *out, - const unsigned char *in) + const unsigned char *in, int len) { int i, outlen = AES_BLOCK_SIZE; - for (i = 0; i < 16; i++) + for (i = 0; i < len; i++) out[i] ^= in[i]; - if (!EVP_CipherUpdate(ctr->ctx_df, out, &outlen, out, AES_BLOCK_SIZE) - || outlen != AES_BLOCK_SIZE) + if (!EVP_CipherUpdate(ctr->ctx_df, out, &outlen, out, len) + || outlen != len) return 0; return 1; } @@ -82,12 +82,16 @@ __owur static int ctr_BCC_block(RAND_DRBG_CTR *ctr, unsigned char *out, */ __owur static int ctr_BCC_blocks(RAND_DRBG_CTR *ctr, const unsigned char *in) { - if (!ctr_BCC_block(ctr, ctr->KX, in) - || !ctr_BCC_block(ctr, ctr->KX + 16, in)) - return 0; - if (ctr->keylen != 16 && !ctr_BCC_block(ctr, ctr->KX + 32, in)) - return 0; - return 1; + unsigned char in_tmp[48]; + unsigned char num_of_blk = 2; + + memcpy(in_tmp, in, 16); + memcpy(in_tmp + 16, in, 16); + if (ctr->keylen != 16) { + memcpy(in_tmp + 32, in, 16); + num_of_blk = 3; + } + return ctr_BCC_block(ctr, ctr->KX, in_tmp, AES_BLOCK_SIZE * num_of_blk); } /* @@ -96,19 +100,14 @@ __owur static int ctr_BCC_blocks(RAND_DRBG_CTR *ctr, const unsigned char *in) */ __owur static int ctr_BCC_init(RAND_DRBG_CTR *ctr) { + unsigned char bltmp[48] = {0}; + unsigned char num_of_blk; + memset(ctr->KX, 0, 48); - memset(ctr->bltmp, 0, 16); - if (!ctr_BCC_block(ctr, ctr->KX, ctr->bltmp)) - return 0; - ctr->bltmp[3] = 1; - if (!ctr_BCC_block(ctr, ctr->KX + 16, ctr->bltmp)) - return 0; - if (ctr->keylen != 16) { - ctr->bltmp[3] = 2; - if (!ctr_BCC_block(ctr, ctr->KX + 32, ctr->bltmp)) - return 0; - } - return 1; + num_of_blk = ctr->keylen == 16 ? 2 : 3; + bltmp[(AES_BLOCK_SIZE * 1) + 3] = 1; + bltmp[(AES_BLOCK_SIZE * 2) + 3] = 2; + return ctr_BCC_block(ctr, ctr->KX, bltmp, num_of_blk * AES_BLOCK_SIZE); } /* @@ -197,20 +196,20 @@ __owur static int ctr_df(RAND_DRBG_CTR *ctr, || !ctr_BCC_final(ctr)) return 0; /* Set up key K */ - if (!EVP_CipherInit_ex(ctr->ctx, ctr->cipher, NULL, ctr->KX, NULL, 1)) + if (!EVP_CipherInit_ex(ctr->ctx_ecb, NULL, NULL, ctr->KX, NULL, -1)) return 0; /* X follows key K */ - if (!EVP_CipherUpdate(ctr->ctx, ctr->KX, &outlen, ctr->KX + ctr->keylen, + if (!EVP_CipherUpdate(ctr->ctx_ecb, ctr->KX, &outlen, ctr->KX + ctr->keylen, AES_BLOCK_SIZE) || outlen != AES_BLOCK_SIZE) return 0; - if (!EVP_CipherUpdate(ctr->ctx, ctr->KX + 16, &outlen, ctr->KX, + if (!EVP_CipherUpdate(ctr->ctx_ecb, ctr->KX + 16, &outlen, ctr->KX, AES_BLOCK_SIZE) || outlen != AES_BLOCK_SIZE) return 0; if (ctr->keylen != 16) - if (!EVP_CipherUpdate(ctr->ctx, ctr->KX + 32, &outlen, ctr->KX + 16, - AES_BLOCK_SIZE) + if (!EVP_CipherUpdate(ctr->ctx_ecb, ctr->KX + 32, &outlen, + ctr->KX + 16, AES_BLOCK_SIZE) || outlen != AES_BLOCK_SIZE) return 0; return 1; @@ -229,31 +228,25 @@ __owur static int ctr_update(RAND_DRBG *drbg, { RAND_DRBG_CTR *ctr = &drbg->data.ctr; int outlen = AES_BLOCK_SIZE; + unsigned char V_tmp[48], out[48]; + unsigned char len; /* correct key is already set up. */ + memcpy(V_tmp, ctr->V, 16); inc_128(ctr); - if (!EVP_CipherUpdate(ctr->ctx, ctr->K, &outlen, ctr->V, AES_BLOCK_SIZE) - || outlen != AES_BLOCK_SIZE) - return 0; - - /* If keylen longer than 128 bits need extra encrypt */ - if (ctr->keylen != 16) { + memcpy(V_tmp + 16, ctr->V, 16); + if (ctr->keylen == 16) { + len = 32; + } else { inc_128(ctr); - if (!EVP_CipherUpdate(ctr->ctx, ctr->K+16, &outlen, ctr->V, - AES_BLOCK_SIZE) - || outlen != AES_BLOCK_SIZE) - return 0; + memcpy(V_tmp + 32, ctr->V, 16); + len = 48; } - inc_128(ctr); - if (!EVP_CipherUpdate(ctr->ctx, ctr->V, &outlen, ctr->V, AES_BLOCK_SIZE) - || outlen != AES_BLOCK_SIZE) + if (!EVP_CipherUpdate(ctr->ctx_ecb, out, &outlen, V_tmp, len) + || outlen != len) return 0; - - /* If 192 bit key part of V is on end of K */ - if (ctr->keylen == 24) { - memcpy(ctr->V + 8, ctr->V, 8); - memcpy(ctr->V, ctr->K + 24, 8); - } + memcpy(ctr->K, out, ctr->keylen); + memcpy(ctr->V, out + ctr->keylen, 16); if ((drbg->flags & RAND_DRBG_FLAG_CTR_NO_DF) == 0) { /* If no input reuse existing derived value */ @@ -268,7 +261,8 @@ __owur static int ctr_update(RAND_DRBG *drbg, ctr_XOR(ctr, in2, in2len); } - if (!EVP_CipherInit_ex(ctr->ctx, ctr->cipher, NULL, ctr->K, NULL, 1)) + if (!EVP_CipherInit_ex(ctr->ctx_ecb, NULL, NULL, ctr->K, NULL, -1) + || !EVP_CipherInit_ex(ctr->ctx_ctr, NULL, NULL, ctr->K, NULL, -1)) return 0; return 1; } @@ -285,8 +279,10 @@ __owur static int drbg_ctr_instantiate(RAND_DRBG *drbg, memset(ctr->K, 0, sizeof(ctr->K)); memset(ctr->V, 0, sizeof(ctr->V)); - if (!EVP_CipherInit_ex(ctr->ctx, ctr->cipher, NULL, ctr->K, NULL, 1)) + if (!EVP_CipherInit_ex(ctr->ctx_ecb, NULL, NULL, ctr->K, NULL, -1)) return 0; + + inc_128(ctr); if (!ctr_update(drbg, entropy, entropylen, pers, perslen, nonce, noncelen)) return 0; return 1; @@ -296,20 +292,40 @@ __owur static int drbg_ctr_reseed(RAND_DRBG *drbg, const unsigned char *entropy, size_t entropylen, const unsigned char *adin, size_t adinlen) { + RAND_DRBG_CTR *ctr = &drbg->data.ctr; + if (entropy == NULL) return 0; + + inc_128(ctr); if (!ctr_update(drbg, entropy, entropylen, adin, adinlen, NULL, 0)) return 0; return 1; } +static void ctr96_inc(unsigned char *counter) +{ + u32 n = 12, c = 1; + + do { + --n; + c += counter[n]; + counter[n] = (u8)c; + c >>= 8; + } while (n); +} + __owur static int drbg_ctr_generate(RAND_DRBG *drbg, unsigned char *out, size_t outlen, const unsigned char *adin, size_t adinlen) { RAND_DRBG_CTR *ctr = &drbg->data.ctr; + unsigned int ctr32, blocks; + int outl, buflen; if (adin != NULL && adinlen != 0) { + inc_128(ctr); + if (!ctr_update(drbg, adin, adinlen, NULL, 0, NULL, 0)) return 0; /* This means we reuse derived value */ @@ -321,28 +337,51 @@ __owur static int drbg_ctr_generate(RAND_DRBG *drbg, adinlen = 0; } - for ( ; ; ) { - int outl = AES_BLOCK_SIZE; + inc_128(ctr); + if (outlen == 0) { inc_128(ctr); - if (outlen < 16) { - /* Use K as temp space as it will be updated */ - if (!EVP_CipherUpdate(ctr->ctx, ctr->K, &outl, ctr->V, - AES_BLOCK_SIZE) - || outl != AES_BLOCK_SIZE) - return 0; - memcpy(out, ctr->K, outlen); - break; - } - if (!EVP_CipherUpdate(ctr->ctx, out, &outl, ctr->V, AES_BLOCK_SIZE) - || outl != AES_BLOCK_SIZE) + + if (!ctr_update(drbg, adin, adinlen, NULL, 0, NULL, 0)) return 0; - out += 16; - outlen -= 16; - if (outlen == 0) - break; + return 1; } + memset(out, 0, outlen); + + do { + if (!EVP_CipherInit_ex(ctr->ctx_ctr, + NULL, NULL, NULL, ctr->V, -1)) + return 0; + + /*- + * outlen has type size_t while EVP_CipherUpdate takes an + * int argument and thus cannot be guaranteed to process more + * than 2^31-1 bytes at a time. We process such huge generate + * requests in 2^30 byte chunks, which is the greatest multiple + * of AES block size lower than or equal to 2^31-1. + */ + buflen = outlen > (1U << 30) ? (1U << 30) : outlen; + blocks = (buflen + 15) / 16; + + ctr32 = GETU32(ctr->V + 12) + blocks; + if (ctr32 < blocks) { + /* 32-bit counter overflow into V. */ + blocks -= ctr32; + buflen = blocks * 16; + ctr32 = 0; + ctr96_inc(ctr->V); + } + PUTU32(ctr->V + 12, ctr32); + + if (!EVP_CipherUpdate(ctr->ctx_ctr, out, &outl, out, buflen) + || outl != buflen) + return 0; + + out += buflen; + outlen -= buflen; + } while (outlen); + if (!ctr_update(drbg, adin, adinlen, NULL, 0, NULL, 0)) return 0; return 1; @@ -350,7 +389,8 @@ __owur static int drbg_ctr_generate(RAND_DRBG *drbg, static int drbg_ctr_uninstantiate(RAND_DRBG *drbg) { - EVP_CIPHER_CTX_free(drbg->data.ctr.ctx); + EVP_CIPHER_CTX_free(drbg->data.ctr.ctx_ecb); + EVP_CIPHER_CTX_free(drbg->data.ctr.ctx_ctr); EVP_CIPHER_CTX_free(drbg->data.ctr.ctx_df); OPENSSL_cleanse(&drbg->data.ctr, sizeof(drbg->data.ctr)); return 1; @@ -374,25 +414,36 @@ int drbg_ctr_init(RAND_DRBG *drbg) return 0; case NID_aes_128_ctr: keylen = 16; - ctr->cipher = EVP_aes_128_ecb(); + ctr->cipher_ecb = EVP_aes_128_ecb(); + ctr->cipher_ctr = EVP_aes_128_ctr(); break; case NID_aes_192_ctr: keylen = 24; - ctr->cipher = EVP_aes_192_ecb(); + ctr->cipher_ecb = EVP_aes_192_ecb(); + ctr->cipher_ctr = EVP_aes_192_ctr(); break; case NID_aes_256_ctr: keylen = 32; - ctr->cipher = EVP_aes_256_ecb(); + ctr->cipher_ecb = EVP_aes_256_ecb(); + ctr->cipher_ctr = EVP_aes_256_ctr(); break; } drbg->meth = &drbg_ctr_meth; ctr->keylen = keylen; - if (ctr->ctx == NULL) - ctr->ctx = EVP_CIPHER_CTX_new(); - if (ctr->ctx == NULL) + if (ctr->ctx_ecb == NULL) + ctr->ctx_ecb = EVP_CIPHER_CTX_new(); + if (ctr->ctx_ctr == NULL) + ctr->ctx_ctr = EVP_CIPHER_CTX_new(); + if (ctr->ctx_ecb == NULL || ctr->ctx_ctr == NULL + || !EVP_CipherInit_ex(ctr->ctx_ecb, + ctr->cipher_ecb, NULL, NULL, NULL, 1) + || !EVP_CipherInit_ex(ctr->ctx_ctr, + ctr->cipher_ctr, NULL, NULL, NULL, 1)) return 0; + + drbg->meth = &drbg_ctr_meth; drbg->strength = keylen * 8; drbg->seedlen = keylen + 16; @@ -410,7 +461,8 @@ int drbg_ctr_init(RAND_DRBG *drbg) if (ctr->ctx_df == NULL) return 0; /* Set key schedule for df_key */ - if (!EVP_CipherInit_ex(ctr->ctx_df, ctr->cipher, NULL, df_key, NULL, 1)) + if (!EVP_CipherInit_ex(ctr->ctx_df, + ctr->cipher_ecb, NULL, df_key, NULL, 1)) return 0; drbg->min_entropylen = ctr->keylen; diff --git a/crypto/rand/rand_local.h b/crypto/rand/rand_local.h index 1bc9bf7d26..0cdfb3332e 100644 --- a/crypto/rand/rand_local.h +++ b/crypto/rand/rand_local.h @@ -138,9 +138,11 @@ typedef struct rand_drbg_method_st { * The state of a DRBG AES-CTR. */ typedef struct rand_drbg_ctr_st { - EVP_CIPHER_CTX *ctx; + EVP_CIPHER_CTX *ctx_ecb; + EVP_CIPHER_CTX *ctx_ctr; EVP_CIPHER_CTX *ctx_df; - const EVP_CIPHER *cipher; + const EVP_CIPHER *cipher_ecb; + const EVP_CIPHER *cipher_ctr; size_t keylen; unsigned char K[32]; unsigned char V[16]; -- 2.25.1