From f472ec8c2f354314d278e11be567b43630acf090 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Tue, 4 May 2010 19:23:02 +0000 Subject: [PATCH] "Jumbo" update for crypto/modes: - introduce common modes_lcl.h; - ctr128.c: implement additional CRYPTO_ctr128_encrypt_ctr32 interface; - gcm128.c: add omitted ARM initialization, remove ctx.ctr; --- crypto/modes/Makefile | 11 +-- crypto/modes/cbc128.c | 9 +-- crypto/modes/cfb128.c | 10 +-- crypto/modes/ctr128.c | 91 ++++++++++++++++++++--- crypto/modes/cts128.c | 2 +- crypto/modes/gcm128.c | 155 ++++++++++++++++++--------------------- crypto/modes/modes.h | 9 +++ crypto/modes/modes_lcl.h | 75 +++++++++++++++++++ crypto/modes/ofb128.c | 10 +-- 9 files changed, 245 insertions(+), 127 deletions(-) create mode 100644 crypto/modes/modes_lcl.h diff --git a/crypto/modes/Makefile b/crypto/modes/Makefile index 4c0de955ba..0d6306c47a 100644 --- a/crypto/modes/Makefile +++ b/crypto/modes/Makefile @@ -95,8 +95,9 @@ clean: # DO NOT DELETE THIS LINE -- make depend depends on it. -cbc128.o: cbc128.c modes.h -cfb128.o: cfb128.c modes.h -ctr128.o: ctr128.c modes.h -cts128.o: cts128.c modes.h -ofb128.o: modes.h ofb128.c +cbc128.o: ../../include/openssl/modes.h cbc128.c modes_lcl.h +cfb128.o: ../../include/openssl/modes.h cfb128.c modes_lcl.h +ctr128.o: ../../include/openssl/modes.h ctr128.c modes_lcl.h +cts128.o: ../../include/openssl/modes.h cts128.c modes_lcl.h +gcm128.o: ../../include/openssl/modes.h gcm128.c modes_lcl.h +ofb128.o: ../../include/openssl/modes.h modes_lcl.h ofb128.c diff --git a/crypto/modes/cbc128.c b/crypto/modes/cbc128.c index 8f8bd563b9..dbf5b45a21 100644 --- a/crypto/modes/cbc128.c +++ b/crypto/modes/cbc128.c @@ -48,7 +48,7 @@ * */ -#include "modes.h" +#include "modes_lcl.h" #include #ifndef MODES_DEBUG @@ -58,12 +58,7 @@ #endif #include -#define STRICT_ALIGNMENT 1 -#if defined(__i386) || defined(__i386__) || \ - defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ - defined(__s390__) || defined(__s390x__) -# undef STRICT_ALIGNMENT +#ifndef STRICT_ALIGNMENT # define STRICT_ALIGNMENT 0 #endif diff --git a/crypto/modes/cfb128.c b/crypto/modes/cfb128.c index e5938c6137..b76514b9b5 100644 --- a/crypto/modes/cfb128.c +++ b/crypto/modes/cfb128.c @@ -48,7 +48,7 @@ * */ -#include "modes.h" +#include "modes_lcl.h" #include #ifndef MODES_DEBUG @@ -58,14 +58,6 @@ #endif #include -#define STRICT_ALIGNMENT -#if defined(__i386) || defined(__i386__) || \ - defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ - defined(__s390__) || defined(__s390x__) -# undef STRICT_ALIGNMENT -#endif - /* The input and output encrypted as though 128bit cfb mode is being * used. The extra state information to record how much of the * 128bit block we have used is contained in *num; diff --git a/crypto/modes/ctr128.c b/crypto/modes/ctr128.c index 932037f551..087b1b6866 100644 --- a/crypto/modes/ctr128.c +++ b/crypto/modes/ctr128.c @@ -48,7 +48,7 @@ * */ -#include "modes.h" +#include "modes_lcl.h" #include #ifndef MODES_DEBUG @@ -58,17 +58,6 @@ #endif #include -typedef unsigned int u32; -typedef unsigned char u8; - -#define STRICT_ALIGNMENT -#if defined(__i386) || defined(__i386__) || \ - defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ - defined(__s390__) || defined(__s390x__) -# undef STRICT_ALIGNMENT -#endif - /* NOTE: the IV/counter CTR mode is big-endian. The code itself * is endian-neutral. */ @@ -182,3 +171,81 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out, *num=n; } + +/* increment upper 96 bits of 128-bit counter by 1 */ +static void ctr96_inc(unsigned char *counter) { + u32 n=12; + u8 c; + + do { + --n; + c = counter[n]; + ++c; + counter[n] = c; + if (c) return; + } while (n); +} + +void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out, + size_t len, const void *key, + unsigned char ivec[16], unsigned char ecount_buf[16], + unsigned int *num, ctr128_f func) +{ + unsigned int n,ctr32; + + assert(in && out && key && ecount_buf && num); + assert(*num < 16); + + n = *num; + + while (n && len) { + *(out++) = *(in++) ^ ecount_buf[n]; + --len; + n = (n+1) % 16; + } + + ctr32 = GETU32(ivec+12); + while (len>=16) { + size_t blocks = len/16; + /* + * 1<<28 is just a not-so-small yet not-so-large number... + * Below condition is practically never met, but it has to + * be checked for code correctness. + */ + if (sizeof(size_t)>sizeof(unsigned int) && blocks>(1U<<28)) + blocks = (1U<<28); + /* + * As (*func) operates on 32-bit counter, caller + * has to handle overflow. 'if' below detects the + * overflow, which is then handled by limiting the + * amount of blocks to the exact overflow point... + */ + ctr32 += (u32)blocks; + if (ctr32 < blocks) { + blocks -= ctr32; + ctr32 = 0; + } + (*func)(in,out,blocks,key,ivec); + /* (*ctr) does not update ivec, caller does: */ + PUTU32(ivec+12,ctr32); + /* ... overflow was detected, propogate carry. */ + if (ctr32 == 0) ctr96_inc(ivec); + blocks *= 16; + len -= blocks; + out += blocks; + in += blocks; + } + if (len) { + memset(ecount_buf,0,16); + (*func)(ecount_buf,ecount_buf,1,key,ivec); + ++ctr32; + PUTU32(ivec+12,ctr32); + if (ctr32 == 0) ctr96_inc(ivec); + while (len--) { + out[n] = in[n] ^ ecount_buf[n]; + ++n; + } + } + + *num=n; +} diff --git a/crypto/modes/cts128.c b/crypto/modes/cts128.c index 450ea44a92..3348450450 100644 --- a/crypto/modes/cts128.c +++ b/crypto/modes/cts128.c @@ -5,7 +5,7 @@ * forms are granted according to the OpenSSL license. */ -#include "modes.h" +#include "modes_lcl.h" #include #ifndef MODES_DEBUG diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c index 8dc69cbcfb..920f525789 100644 --- a/crypto/modes/gcm128.c +++ b/crypto/modes/gcm128.c @@ -47,7 +47,7 @@ * ==================================================================== */ -#include "modes.h" +#include "modes_lcl.h" #include #ifndef MODES_DEBUG @@ -57,64 +57,14 @@ #endif #include -#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) -typedef __int64 i64; -typedef unsigned __int64 u64; -#define U64(C) C##UI64 -#elif defined(__arch64__) -typedef long i64; -typedef unsigned long u64; -#define U64(C) C##UL -#else -typedef long long i64; -typedef unsigned long long u64; -#define U64(C) C##ULL -#endif - -typedef unsigned int u32; -typedef unsigned char u8; typedef struct { u64 hi,lo; } u128; -#define STRICT_ALIGNMENT -#if defined(__i386) || defined(__i386__) || \ - defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ - defined(__s390__) || defined(__s390x__) -# undef STRICT_ALIGNMENT -#endif - -#if defined(__GNUC__) && __GNUC__>=2 && !defined(PEDANTIC) -# if defined(__x86_64) || defined(__x86_64__) -# define BSWAP8(x) ({ u64 ret=(x); \ - asm volatile ("bswapq %0" \ - : "+r"(ret)); ret; }) -# define BSWAP4(x) ({ u32 ret=(x); \ - asm volatile ("bswapl %0" \ - : "+r"(ret)); ret; }) -# elif (defined(__i386) || defined(__i386__)) && !defined(PEDANTIC) -# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \ - asm volatile ("bswapl %0; bswapl %1" \ - : "+r"(hi),"+r"(lo)); \ - (u64)hi<<32|lo; }) -# define BSWAP4(x) ({ u32 ret=(x); \ - asm volatile ("bswapl %0" \ - : "+r"(ret)); ret; }) -# endif -#elif defined(_MSC_VER) -# if _MSC_VER>=1300 -# pragma intrinsic(_byteswap_uint64,_byteswap_ulong) -# define BSWAP8(x) _byteswap_uint64((u64)(x)) -# define BSWAP4(x) _byteswap_ulong((u32)(x)) -# elif defined(_M_IX86) -# endif -#endif - -#ifdef BSWAP4 -#define GETU32(p) BSWAP4(*(const u32 *)(p)) -#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) -#else -#define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3]) -#define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v)) +#if defined(BSWAP4) && defined(STRICT_ALIGNMENT) +/* redefine, because alignment is ensured */ +#undef GETU32 +#define GETU32(p) BSWAP4(*(const u32 *)(p)) +#undef PUTU32 +#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) #endif #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) @@ -284,29 +234,34 @@ static void gcm_gmult_8bit(u64 Xi[2], u128 Htable[256]) static void gcm_init_4bit(u128 Htable[16], u64 H[2]) { - int i; u128 V; +#if defined(OPENSSL_SMALL_FOOTPRINT) + int i; +#endif +#define REDUCE(V) do { \ + if (sizeof(size_t)==8) { \ + u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \ + V.lo = (V.hi<<63)|(V.lo>>1); \ + V.hi = (V.hi>>1 )^T; \ + } \ + else { \ + u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \ + V.lo = (V.hi<<63)|(V.lo>>1); \ + V.hi = (V.hi>>1 )^((u64)T<<32); \ + } \ +} while(0) Htable[0].hi = 0; Htable[0].lo = 0; V.hi = H[0]; V.lo = H[1]; +#if defined(OPENSSL_SMALL_FOOTPRINT) for (Htable[8]=V, i=4; i>0; i>>=1) { - if (sizeof(size_t)==8) { - u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); - V.lo = (V.hi<<63)|(V.lo>>1); - V.hi = (V.hi>>1 )^T; - } - else { - u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); - V.lo = (V.hi<<63)|(V.lo>>1); - V.hi = (V.hi>>1 )^((u64)T<<32); - } + REDUCE(V); Htable[i] = V; } -#if defined(OPENSSL_SMALL_FOOTPRINT) for (i=2; i<16; i<<=1) { u128 *Hi = Htable+i; int j; @@ -316,6 +271,13 @@ static void gcm_init_4bit(u128 Htable[16], u64 H[2]) } } #else + Htable[8] = V; + REDUCE(V); + Htable[4] = V; + REDUCE(V); + Htable[2] = V; + REDUCE(V); + Htable[1] = V; Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo; V=Htable[4]; Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo; @@ -330,6 +292,29 @@ static void gcm_init_4bit(u128 Htable[16], u64 H[2]) Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo; Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo; #endif +#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm)) + /* + * ARM assembler expects specific dword order in Htable. + */ + { + int j; + const union { long one; char little; } is_endian = {1}; + + if (is_endian.little) + for (j=0;j<16;++j) { + V = Htable[j]; + Htable[j].hi = V.lo; + Htable[j].lo = V.hi; + } + else + for (j=0;j<16;++j) { + V = Htable[j]; + Htable[j].hi = V.lo<<32|V.lo>>32; + Htable[j].lo = V.hi<<32|V.hi>>32; + } + } +#endif +#undef REDUCE } #ifndef GHASH_ASM @@ -568,15 +553,14 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) struct gcm128_context { /* Following 6 names follow names in GCM specification */ union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0, - Xi,H, - len; + Xi,H,len; /* Pre-computed table used by gcm_gmult_* */ #if TABLE_BITS==8 u128 Htable[256]; #else u128 Htable[16]; #endif - unsigned int res, ctr; + unsigned int res, pad; block128_f block; void *key; }; @@ -616,6 +600,7 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len) { const union { long one; char little; } is_endian = {1}; + unsigned int ctr; ctx->Yi.u[0] = 0; ctx->Yi.u[1] = 0; @@ -628,7 +613,7 @@ void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len) if (len==12) { memcpy(ctx->Yi.c,iv,12); ctx->Yi.c[15]=1; - ctx->ctr=1; + ctr=1; } else { size_t i; @@ -665,17 +650,17 @@ void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len) GCM_MUL(ctx,Yi); if (is_endian.little) - ctx->ctr = GETU32(ctx->Yi.c+12); + ctr = GETU32(ctx->Yi.c+12); else - ctx->ctr = ctx->Yi.d[3]; + ctr = ctx->Yi.d[3]; } (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key); - ++ctx->ctr; + ++ctr; if (is_endian.little) - PUTU32(ctx->Yi.c+12,ctx->ctr); + PUTU32(ctx->Yi.c+12,ctr); else - ctx->Yi.d[3] = ctx->ctr; + ctx->Yi.d[3] = ctr; } void CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len) @@ -714,7 +699,10 @@ void CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, ctx->len.u[1] += len; n = ctx->res; - ctr = ctx->ctr; + if (is_endian.little) + ctr = GETU32(ctx->Yi.c+12); + else + ctr = ctx->Yi.d[3]; #if !defined(OPENSSL_SMALL_FOOTPRINT) if (16%sizeof(size_t) == 0) do { /* always true actually */ @@ -806,7 +794,6 @@ void CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, } ctx->res = n; - ctx->ctr = ctr; return; } while(0); #endif @@ -826,7 +813,6 @@ void CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, } ctx->res = n; - ctx->ctr = ctr; } void CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, @@ -839,7 +825,10 @@ void CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, ctx->len.u[1] += len; n = ctx->res; - ctr = ctx->ctr; + if (is_endian.little) + ctr = GETU32(ctx->Yi.c+12); + else + ctr = ctx->Yi.d[3]; #if !defined(OPENSSL_SMALL_FOOTPRINT) if (16%sizeof(size_t) == 0) do { /* always true actually */ @@ -934,7 +923,6 @@ void CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, } ctx->res = n; - ctx->ctr = ctr; return; } while(0); #endif @@ -957,7 +945,6 @@ void CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, } ctx->res = n; - ctx->ctr = ctr; } void CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx) diff --git a/crypto/modes/modes.h b/crypto/modes/modes.h index b70942bd78..10ce57667a 100644 --- a/crypto/modes/modes.h +++ b/crypto/modes/modes.h @@ -15,6 +15,10 @@ typedef void (*cbc128_f)(const unsigned char *in, unsigned char *out, size_t len, const void *key, unsigned char ivec[16], int enc); +typedef void (*ctr128_f)(const unsigned char *in, unsigned char *out, + size_t blocks, const void *key, + const unsigned char ivec[16]); + void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out, size_t len, const void *key, unsigned char ivec[16], block128_f block); @@ -27,6 +31,11 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out, unsigned char ivec[16], unsigned char ecount_buf[16], unsigned int *num, block128_f block); +void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out, + size_t len, const void *key, + unsigned char ivec[16], unsigned char ecount_buf[16], + unsigned int *num, ctr128_f ctr); + void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out, size_t len, const void *key, unsigned char ivec[16], int *num, diff --git a/crypto/modes/modes_lcl.h b/crypto/modes/modes_lcl.h new file mode 100644 index 0000000000..12368fb039 --- /dev/null +++ b/crypto/modes/modes_lcl.h @@ -0,0 +1,75 @@ +/* ==================================================================== + * Copyright (c) 2010 The OpenSSL Project. All rights reserved. + * + * Redistribution and use is governed by OpenSSL license. + * ==================================================================== + */ + +#include + + +#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) +typedef __int64 i64; +typedef unsigned __int64 u64; +#define U64(C) C##UI64 +#elif defined(__arch64__) +typedef long i64; +typedef unsigned long u64; +#define U64(C) C##UL +#else +typedef long long i64; +typedef unsigned long long u64; +#define U64(C) C##ULL +#endif + +typedef unsigned int u32; +typedef unsigned char u8; + +#define STRICT_ALIGNMENT 1 +#if defined(__i386) || defined(__i386__) || \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ + defined(__s390__) || defined(__s390x__) +# undef STRICT_ALIGNMENT +#endif + +#if !defined(PEDANTIC) && !defined(OPENSSL_NO_ASM) && !defined(OPNESSL_NO_INLINE_ASM) +#if defined(__GNUC__) && __GNUC__>=2 +# if defined(__x86_64) || defined(__x86_64__) +# define BSWAP8(x) ({ u64 ret=(x); \ + asm volatile ("bswapq %0" \ + : "+r"(ret)); ret; }) +# define BSWAP4(x) ({ u32 ret=(x); \ + asm volatile ("bswapl %0" \ + : "+r"(ret)); ret; }) +# elif (defined(__i386) || defined(__i386__)) +# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \ + asm volatile ("bswapl %0; bswapl %1" \ + : "+r"(hi),"+r"(lo)); \ + (u64)hi<<32|lo; }) +# define BSWAP4(x) ({ u32 ret=(x); \ + asm volatile ("bswapl %0" \ + : "+r"(ret)); ret; }) +# endif +#elif defined(_MSC_VER) +# if _MSC_VER>=1300 +# pragma intrinsic(_byteswap_uint64,_byteswap_ulong) +# define BSWAP8(x) _byteswap_uint64((u64)(x)) +# define BSWAP4(x) _byteswap_ulong((u32)(x)) +# elif defined(_M_IX86) + __inline u32 _bswap4(u32 val) { + _asm mov eax,val + _asm bswap eax + } +# define BSWAP4(x) _bswap4(x) +# endif +#endif +#endif + +#if defined(BSWAP4) && !defined(STRICT_ALIGNMENT) +#define GETU32(p) BSWAP4(*(const u32 *)(p)) +#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) +#else +#define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3]) +#define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v)) +#endif diff --git a/crypto/modes/ofb128.c b/crypto/modes/ofb128.c index c732e2ec58..4b5b4ec648 100644 --- a/crypto/modes/ofb128.c +++ b/crypto/modes/ofb128.c @@ -48,7 +48,7 @@ * */ -#include "modes.h" +#include "modes_lcl.h" #include #ifndef MODES_DEBUG @@ -58,14 +58,6 @@ #endif #include -#define STRICT_ALIGNMENT -#if defined(__i386) || defined(__i386__) || \ - defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ - defined(__s390__) || defined(__s390x__) -# undef STRICT_ALIGNMENT -#endif - /* The input and output encrypted as though 128bit ofb mode is being * used. The extra state information to record how much of the * 128bit block we have used is contained in *num; -- 2.25.1