X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=crypto%2Frc4%2Frc4_enc.c;h=0660ea60a25e90e0d8f7c0231552b0660ea9ee25;hb=cb59297438c954d152fc085fb36a76af93252924;hp=93a75cd8f9f0305cf7b600a01cb5d5608d621538;hpb=6cc4ee03df4be25f3be44ae20bc8750b90c15705;p=oweals%2Fopenssl.git diff --git a/crypto/rc4/rc4_enc.c b/crypto/rc4/rc4_enc.c index 93a75cd8f9..0660ea60a2 100644 --- a/crypto/rc4/rc4_enc.c +++ b/crypto/rc4/rc4_enc.c @@ -67,7 +67,7 @@ * Date: Wed, 14 Sep 1994 06:35:31 GMT */ -void RC4(RC4_KEY *key, unsigned long len, unsigned char *indata, +void RC4(RC4_KEY *key, unsigned long len, const unsigned char *indata, unsigned char *outdata) { register RC4_INT *d; @@ -78,7 +78,7 @@ void RC4(RC4_KEY *key, unsigned long len, unsigned char *indata, y=key->y; d=key->data; -#if defined(RC4_CHUNK) && (defined(L_ENDIAN) || defined(B_ENDIAN)) +#if defined(RC4_CHUNK) /* * The original reason for implementing this(*) was the fact that * pre-21164a Alpha CPUs don't have byte load/store instructions @@ -87,21 +87,30 @@ void RC4(RC4_KEY *key, unsigned long len, unsigned char *indata, * at natural word size made it possible to reduce amount of * instructions as well as to perform early read-ahead without * suffering from RAW (read-after-write) hazard. This resulted - * in >40%(**) performance improvement (on 21064 box with gcc). + * in ~40%(**) performance improvement on 21064 box with gcc. * But it's not only Alpha users who win here:-) Thanks to the * early-n-wide read-ahead this implementation also exhibits - * >40% speed-up on SPARC and almost 20% on MIPS. + * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending + * on sizeof(RC4_INT)). * * (*) "this" means code which recognizes the case when input * and output pointers appear to be aligned at natural CPU - * word boundary. + * word boundary * (**) i.e. according to 'apps/openssl speed rc4' benchmark, - * crypto/rc4/rc4speed.c exhibits almost 70% speed-up. + * crypto/rc4/rc4speed.c exhibits almost 70% speed-up... + * + * Cavets. + * + * - RC4_CHUNK="unsigned long long" should be a #1 choice for + * UltraSPARC. Unfortunately gcc generates very slow code + * (2.5-3 times slower than one generated by Sun's WorkShop + * C) and therefore gcc (at least 2.95 and earlier) should + * always be told that RC4_CHUNK="unsigned long". * * */ -#define RC4_STEP ( \ +# define RC4_STEP ( \ x=(x+1) &0xff, \ tx=d[x], \ y=(tx+y)&0xff, \ @@ -111,70 +120,148 @@ void RC4(RC4_KEY *key, unsigned long len, unsigned char *indata, (RC4_CHUNK)d[(tx+ty)&0xff]\ ) -#if defined(L_ENDIAN) -# define SHFT(c) ((c)*8) -# define MASK(i) (((RC4_CHUNK)-1)>>((sizeof(RC4_CHUNK)-(i))<<3)) -# define SHINC 8 -#elif defined(B_ENDIAN) -# define SHFT(c) ((sizeof(RC4_CHUNK)-(c)-1)*8) -# define MASK(i) (((RC4_CHUNK)-1)<<((sizeof(RC4_CHUNK)-(i))<<3)) -# define SHINC -8 -#else -# error "L_ENDIAN or B_ENDIAN *must* be defined!" -#endif - if ( ( ((unsigned long)indata & (sizeof(RC4_CHUNK)-1)) | - ((unsigned long)outdata & (sizeof(RC4_CHUNK)-1)) ) == 0 - ) { - RC4_CHUNK ichunk,cipher; + ((unsigned long)outdata & (sizeof(RC4_CHUNK)-1)) ) == 0 ) + { + RC4_CHUNK ichunk,otp; + const union { long one; char little; } is_endian = {1}; - for (;len&-sizeof(RC4_CHUNK);len-=sizeof(RC4_CHUNK)) { - ichunk = *(RC4_CHUNK *)indata; - cipher = RC4_STEP< + * who also stands for the is_endian union:-) + * + * Special notes. + * + * - is_endian is declared automatic as doing otherwise + * (declaring static) prevents gcc from eliminating + * the redundant code; + * - compilers (those I've tried) don't seem to have + * problems eliminating either the operators guarded + * by "if (sizeof(RC4_CHUNK)==8)" or the condition + * expressions themselves so I've got 'em to replace + * corresponding #ifdefs from the previous version; + * - I chose to let the redundant switch cases when + * sizeof(RC4_CHUNK)!=8 be (were also #ifdefed + * before); + * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in + * [LB]ESHFT guards against "shift is out of range" + * warnings when sizeof(RC4_CHUNK)!=8 + * + * + */ + if (!is_endian.little) + { /* BIG-ENDIAN CASE */ +# define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1)) + for (;len&~(sizeof(RC4_CHUNK)-1);len-=sizeof(RC4_CHUNK)) + { + ichunk = *(RC4_CHUNK *)indata; + otp = RC4_STEP<x=x; + key->y=y; + return; + } + else + { /* LITTLE-ENDIAN CASE */ +# define LESHFT(c) (((c)*8)&(sizeof(RC4_CHUNK)*8-1)) + for (;len&~(sizeof(RC4_CHUNK)-1);len-=sizeof(RC4_CHUNK)) + { + ichunk = *(RC4_CHUNK *)indata; + otp = RC4_STEP; + otp |= RC4_STEP<<8; + otp |= RC4_STEP<<16; + otp |= RC4_STEP<<24; + if (sizeof(RC4_CHUNK)==8) + { + otp |= RC4_STEP<>= (sizeof(RC4_CHUNK)-len)<<3; + switch (len&(sizeof(RC4_CHUNK)-1)) + { + case 7: otp = RC4_STEP, i+=8; + case 6: otp |= RC4_STEP<x=x; + key->y=y; + return; } - ochunk &= ~mask; - ochunk |= (cipher^ichunk) & mask; - *(RC4_CHUNK *)outdata = ochunk; } - } - else #endif - { #define LOOP(in,out) \ x=((x+1)&0xff); \ tx=d[x]; \ @@ -223,7 +310,6 @@ void RC4(RC4_KEY *key, unsigned long len, unsigned char *indata, RC4_LOOP(indata,outdata,6); if (--i == 0) break; } } - } key->x=x; key->y=y; }