X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=ssl%2Fs3_cbc.c;h=9f57fc9a622f14147ebc4d76a35093c23edf8a14;hb=cadbbd51c8b4e66515cd3e97754cfeda606c7b15;hp=8dfc11e713204fe82014e810e431daf2c7c465c5;hpb=59b1129e0a50fdf7e4e58d7c355783a7bfc1f44c;p=oweals%2Fopenssl.git diff --git a/ssl/s3_cbc.c b/ssl/s3_cbc.c index 8dfc11e713..9f57fc9a62 100644 --- a/ssl/s3_cbc.c +++ b/ssl/s3_cbc.c @@ -76,6 +76,13 @@ #define DUPLICATE_MSB_TO_ALL(x) ( (unsigned)( (int)(x) >> (sizeof(int)*8-1) ) ) #define DUPLICATE_MSB_TO_ALL_8(x) ((unsigned char)(DUPLICATE_MSB_TO_ALL(x))) +/* constant_time_lt returns 0xff if a=b and 0x00 otherwise. */ static unsigned constant_time_ge(unsigned a, unsigned b) { @@ -84,7 +91,7 @@ static unsigned constant_time_ge(unsigned a, unsigned b) } /* constant_time_eq_8 returns 0xff if a==b and 0x00 otherwise. */ -static unsigned char constant_time_eq_8(unsigned char a, unsigned char b) +static unsigned char constant_time_eq_8(unsigned a, unsigned b) { unsigned c = a ^ b; c--; @@ -141,7 +148,7 @@ int tls1_cbc_remove_padding(const SSL* s, unsigned padding_length, good, to_check, i; const unsigned overhead = 1 /* padding length byte */ + mac_size; /* Check if version requires explicit IV */ - if (s->version == DTLS1_VERSION) + if (s->version == DTLS1_VERSION || s->version == DTLS1_BAD_VER) { /* These lengths are all public so we can test them in * non-constant time. @@ -218,10 +225,6 @@ int tls1_cbc_remove_padding(const SSL* s, return (int)((good & 1) | (~good & -1)); } -#if defined(_M_AMD64) || defined(__x86_64__) -#define CBC_MAC_ROTATE_IN_PLACE -#endif - /* ssl3_cbc_copy_mac copies |md_size| bytes from the end of |rec| to |out| in * constant time (independent of the concrete value of rec->length, which may * vary within a 256-byte window). @@ -235,15 +238,18 @@ int tls1_cbc_remove_padding(const SSL* s, * * If CBC_MAC_ROTATE_IN_PLACE is defined then the rotation is performed with * variable accesses in a 64-byte-aligned buffer. Assuming that this fits into - * a single cache-line, then the variable memory accesses don't actually affect - * the timing. This has been tested to be true on Intel amd64 chips. + * a single or pair of cache-lines, then the variable memory accesses don't + * actually affect the timing. CPUs with smaller cache-lines [if any] are + * not multi-core and are not considered vulnerable to cache-timing attacks. */ +#define CBC_MAC_ROTATE_IN_PLACE + void ssl3_cbc_copy_mac(unsigned char* out, const SSL3_RECORD *rec, unsigned md_size,unsigned orig_len) { #if defined(CBC_MAC_ROTATE_IN_PLACE) - unsigned char rotated_mac_buf[EVP_MAX_MD_SIZE*2]; + unsigned char rotated_mac_buf[64+EVP_MAX_MD_SIZE]; unsigned char *rotated_mac; #else unsigned char rotated_mac[EVP_MAX_MD_SIZE]; @@ -263,7 +269,7 @@ void ssl3_cbc_copy_mac(unsigned char* out, OPENSSL_assert(md_size <= EVP_MAX_MD_SIZE); #if defined(CBC_MAC_ROTATE_IN_PLACE) - rotated_mac = (unsigned char*) (((intptr_t)(rotated_mac_buf + 64)) & ~63); + rotated_mac = rotated_mac_buf + ((0-(size_t)rotated_mac_buf)&63); #endif /* This information is public so it's safe to branch based on it. */ @@ -281,16 +287,13 @@ void ssl3_cbc_copy_mac(unsigned char* out, rotate_offset = (div_spoiler + mac_start - scan_start) % md_size; memset(rotated_mac, 0, md_size); - for (i = scan_start; i < orig_len;) + for (i = scan_start, j = 0; i < orig_len; i++) { - for (j = 0; j < md_size && i < orig_len; i++, j++) - { - unsigned char mac_started = constant_time_ge(i, mac_start); - unsigned char mac_ended = constant_time_ge(i, mac_end); - unsigned char b = 0; - b = rec->data[i]; - rotated_mac[j] |= b & mac_started & ~mac_ended; - } + unsigned char mac_started = constant_time_ge(i, mac_start); + unsigned char mac_ended = constant_time_ge(i, mac_end); + unsigned char b = rec->data[i]; + rotated_mac[j++] |= b & mac_started & ~mac_ended; + j &= constant_time_lt(j,md_size); } /* Now rotate the MAC */ @@ -298,16 +301,21 @@ void ssl3_cbc_copy_mac(unsigned char* out, j = 0; for (i = 0; i < md_size; i++) { - unsigned char offset = (div_spoiler + rotate_offset + i) % md_size; - out[j++] = rotated_mac[offset]; + /* in case cache-line is 32 bytes, touch second line */ + ((volatile unsigned char *)rotated_mac)[rotate_offset^32]; + out[j++] = rotated_mac[rotate_offset++]; + rotate_offset &= constant_time_lt(rotate_offset,md_size); } #else memset(out, 0, md_size); + rotate_offset = md_size - rotate_offset; + rotate_offset &= constant_time_lt(rotate_offset,md_size); for (i = 0; i < md_size; i++) { - unsigned char offset = (div_spoiler + md_size - rotate_offset + i) % md_size; for (j = 0; j < md_size; j++) - out[j] |= rotated_mac[i] & constant_time_eq_8(j, offset); + out[j] |= rotated_mac[i] & constant_time_eq_8(j, rotate_offset); + rotate_offset++; + rotate_offset &= constant_time_lt(rotate_offset,md_size); } #endif }