Changes between 0.9.2b and 0.9.3
+ *) Reorganize and speed up MD5.
+ [Andy Polyakov <appro@fy.chalmers.se>]
+
*) VMS support.
[Richard Levitte <richard@levitte.org>]
# Solaris setups
"solaris-x86-gcc","gcc:-O3 -fomit-frame-pointer -m486 -Wall -DL_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG $x86_gcc_des $x86_gcc_opts:$x86_sol_asm",
"solaris-sparc-gcc","gcc:-O3 -fomit-frame-pointer -mv8 -Wall -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:asm/sparcv8.o::",
-"solaris-usparc-gcc","gcc:-O3 -fomit-frame-pointer -mcpu=ultrasparc -Wall -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:asm/sparcv8plus-gcc.o::",
+"solaris-usparc-gcc","gcc:-O3 -fomit-frame-pointer -mcpu=ultrasparc -Wall -DB_ENDIAN -DULTRASPARC:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:asm/sparcv8plus-gcc.o:::asm/md5-sparcv8plus.o:",
"debug-solaris-sparc-gcc","gcc:-O3 -g -mv8 -Wall -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:::",
"debug-solaris-usparc-gcc","gcc:-O3 -g -mcpu=ultrasparc -Wall -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:asm/sparcv8plus-gcc.o::",
# SC4 is ok, better than gcc even on bn as long as you tell it -xarch=v8
# -fast slows things like DES down quite a lot
# Don't use -xtarget=ultra with SC4.2. It is broken, and will break exptest.
-# SC5.0 with the compiler common patch works.
"solaris-sparc-sc4","cc:-xarch=v8 -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8.o::",
"solaris-usparc-sc4","cc:-xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8plus.o::",
# SC5.0 note: Compiler common patch 107357-01 or later is required!
-"solaris-usparc-sc5","cc:-xtarget=ultra -xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8plus.o::",
-"solaris64-usparc-sc5","cc:-xtarget=ultra -xarch=v9 -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:SIXTY_FOUR_BIT_LONG RC4_CHAR DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:::",
+"solaris-usparc-sc5","cc:-xtarget=ultra -xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DULTRASPARC -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8plus.o:::asm/md5-sparcv8plus.o:",
+"solaris64-usparc-sc5","cc:-xtarget=ultra -xarch=v9 -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DULTRASPARC:-D_REENTRANT:-lsocket -lnsl:SIXTY_FOUR_BIT_LONG RC4_CHAR DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR::::asm/md5-sparcv9.o:",
# Sunos configs, assuming sparc for the gcc one.
##"sunos-cc", "cc:-O4 -DNOPROTO -DNOCONST:(unknown)::DES_UNROLL:::",
--- /dev/null
+/* crypto/md32_common.h */
+/* ====================================================================
+ * Copyright (c) 1999 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com). This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+/*
+ * This is a generic 32 bit "collector" for message digest algorithms.
+ * Whenever needed it collects input character stream into chunks of
+ * 32 bit values and invokes a block function that performs actual hash
+ * calculations.
+ *
+ * Porting guide.
+ *
+ * Obligatory macros:
+ *
+ * DATA_ORDER_IS_BIG_ENDIAN or DATA_ORDER_IS_LITTLE_ENDIAN
+ * this macro defines byte order of input stream.
+ * HASH_CBLOCK
+ * size of a unit chunk HASH_BLOCK operates on.
+ * HASH_LONG
+ * has to be at lest 32 bit wide, if it's wider, then
+ * HASH_LONG_LOG2 *has to* be defined along
+ * HASH_CTX
+ * context structure that at least contains following
+ * members:
+ * typedef struct {
+ * ...
+ * HASH_LONG Nl,Nh;
+ * HASH_LONG data[HASH_LBLOCK];
+ * int num;
+ * ...
+ * } HASH_CTX;
+ * HASH_UPDATE
+ * name of "Update" function, implemented here.
+ * HASH_TRANSFORM
+ * name of "Transform" function, implemented here.
+ * HASH_FINAL
+ * name of "Final" function, implemented here.
+ * HASH_BLOCK_HOST_ORDER
+ * name of "block" function treating *aligned* input message
+ * in host byte order, implemented externally.
+ * HASH_BLOCK_DATA_ORDER
+ * name of "block" function treating *unaligned* input message
+ * in original (data) byte order, implemented externally (it
+ * actually is optional if data and host are of the same
+ * "endianess").
+ *
+ * Optional macros:
+ *
+ * B_ENDIAN or L_ENDIAN
+ * defines host byte-order.
+ * HASH_LONG_LOG2
+ * defaults to 2 if not states otherwise.
+ * HASH_LBLOCK
+ * assumed to be HASH_CBLOCK/4 if not stated otherwise.
+ * HASH_BLOCK_DATA_ORDER_ALIGNED
+ * alternative "block" function capable of treating
+ * aligned input message in original (data) order,
+ * implemented externally.
+ *
+ * MD5 example:
+ *
+ * #define DATA_ORDER_IS_LITTLE_ENDIAN
+ *
+ * #define HASH_LONG MD5_LONG
+ * #define HASH_LONG_LOG2 MD5_LONG_LOG2
+ * #define HASH_CTX MD5_CTX
+ * #define HASH_CBLOCK MD5_CBLOCK
+ * #define HASH_LBLOCK MD5_LBLOCK
+ * #define HASH_UPDATE MD5_Update
+ * #define HASH_TRANSFORM MD5_Transform
+ * #define HASH_FINAL MD5_Final
+ * #define HASH_BLOCK_HOST_ORDER md5_block_host_order
+ * #define HASH_BLOCK_DATA_ORDER md5_block_data_order
+ *
+ * <appro@fy.chalmers.se>
+ */
+
+#if !defined(DATA_ORDER_IS_BIG_ENDIAN) && !defined(DATA_ORDER_IS_LITTLE_ENDIAN)
+#error "DATA_ORDER must be defined!"
+#endif
+
+#ifndef HASH_CBLOCK
+#error "HASH_CBLOCK must be defined!"
+#endif
+#ifndef HASH_LONG
+#error "HASH_LONG must be defined!"
+#endif
+#ifndef HASH_CTX
+#error "HASH_CTX must be defined!"
+#endif
+
+#ifndef HASH_UPDATE
+#error "HASH_UPDATE must be defined!"
+#endif
+#ifndef HASH_TRANSFORM
+#error "HASH_TRANSFORM must be defined!"
+#endif
+#ifndef HASH_FINAL
+#error "HASH_FINAL must be defined!"
+#endif
+
+#ifndef HASH_BLOCK_HOST_ORDER
+#error "HASH_BLOCK_HOST_ORDER must be defined!"
+#endif
+
+#if 0
+/*
+ * Moved below as it's required only if HASH_BLOCK_DATA_ORDER_ALIGNED
+ * isn't defined.
+ */
+#ifndef HASH_BLOCK_DATA_ORDER
+#error "HASH_BLOCK_DATA_ORDER must be defined!"
+#endif
+#endif
+
+#ifndef HASH_LBLOCK
+#define HASH_LBLOCK (HASH_CBLOCK/4)
+#endif
+
+#ifndef HASH_LONG_LOG2
+#define HASH_LONG_LOG2 2
+#endif
+
+/*
+ * Engage compiler specific rotate intrinsic function if available.
+ */
+#undef ROTATE
+#ifndef PEDANTIC
+# if defined(_MSC_VER)
+# define ROTATE(a,n) _lrotl(a,n)
+# elif defined(__GNUC__) && __GNUC__>=2
+ /*
+ * Some GNU C inline assembler templates. Note that these are
+ * rotates by *constant* number of bits! But that's exactly
+ * what we need here...
+ *
+ * <appro@fy.chalmers.se>
+ */
+# if defined(__i386)
+# define ROTATE(a,n) ({ register unsigned int ret; \
+ asm volatile ( \
+ "roll %1,%0" \
+ : "=r"(ret) \
+ : "I"(n), "0"(a) \
+ : "cc"); \
+ ret; \
+ })
+# elif defined(__powerpc)
+# define ROTATE(a,n) ({ register unsigned int ret; \
+ asm volatile ( \
+ "rlwinm %0,%1,%2,0,31" \
+ : "=r"(ret) \
+ : "r"(a), "I"(n)); \
+ ret; \
+ })
+# endif
+# endif
+
+/*
+ * Engage compiler specific "fetch in reverse byte order"
+ * intrinsic function if available.
+ */
+# if defined(__GNUC__) && __GNUC__>=2
+ /* some GNU C inline assembler templates by <appro@fy.chalmers.se> */
+# if defined(__i386) && !defined(I386_ONLY)
+# define BE_FETCH32(a) ({ register unsigned int l=(a);\
+ asm volatile ( \
+ "bswapl %0" \
+ : "=r"(l) : "0"(l)); \
+ l; \
+ })
+# elif defined(__powerpc)
+# define LE_FETCH32(a) ({ register unsigned int l; \
+ asm volatile ( \
+ "lwbrx %0,0,%1" \
+ : "=r"(l) \
+ : "r"(a)); \
+ l; \
+ })
+
+# elif defined(__sparc) && defined(ULTRASPARC)
+# define LE_FETCH32(a) ({ register unsigned int l; \
+ asm volatile ( \
+ "lda [%1]#ASI_PRIMARY_LITTLE,%0"\
+ : "=r"(l) \
+ : "r"(a)); \
+ l; \
+ })
+# endif
+# endif
+#endif /* PEDANTIC */
+
+#if HASH_LONG_LOG2==2 /* Engage only if sizeof(HASH_LONG)== 4 */
+/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
+#ifdef ROTATE
+/* 5 instructions with rotate instruction, else 9 */
+#define REVERSE_FETCH32(a,l) ( \
+ l=*(const HASH_LONG *)(a), \
+ ((ROTATE(l,8)&0x00FF00FF)|(ROTATE((l&0x00FF00FF),24))) \
+ )
+#else
+/* 6 instructions with rotate instruction, else 8 */
+#define REVERSE_FETCH32(a,l) ( \
+ l=*(const HASH_LONG *)(a), \
+ l=(((l>>8)&0x00FF00FF)|((l&0x00FF00FF)<<8)), \
+ ROTATE(l,16) \
+ )
+/*
+ * Originally the middle line started with l=(((l&0xFF00FF00)>>8)|...
+ * It's rewritten as above for two reasons:
+ * - RISCs aren't good at long constants and have to explicitely
+ * compose 'em with several (well, usually 2) instructions in a
+ * register before performing the actual operation and (as you
+ * already realized:-) having same constant should inspire the
+ * compiler to permanently allocate the only register for it;
+ * - most modern CPUs have two ALUs, but usually only one has
+ * circuitry for shifts:-( this minor tweak inspires compiler
+ * to schedule shift instructions in a better way...
+ *
+ * <appro@fy.chalmers.se>
+ */
+#endif
+#endif
+
+#ifndef ROTATE
+#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
+#endif
+
+/*
+ * Make some obvious choices. E.g., HASH_BLOCK_DATA_ORDER_ALIGNED
+ * and HASH_BLOCK_HOST_ORDER ought to be the same if input data
+ * and host are of the same "endianess". It's possible to mask
+ * this with blank #define HASH_BLOCK_DATA_ORDER though...
+ *
+ * <appro@fy.chalmers.se>
+ */
+#if defined(B_ENDIAN)
+# if defined(DATA_ORDER_IS_BIG_ENDIAN)
+# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
+# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER
+# endif
+# elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
+# ifndef HOST_FETCH32
+# ifdef LE_FETCH32
+# define HOST_FETCH32(p,l) LE_FETCH32(p)
+# elif defined(REVERSE_FETCH32)
+# define HOST_FETCH32(p,l) REVERSE_FETCH32(p,l)
+# endif
+# endif
+# endif
+#elif defined(L_ENDIAN)
+# if defined(DATA_ORDER_IS_LITTLE_ENDIAN)
+# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
+# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER
+# endif
+# elif defined(DATA_ORDER_IS_BIG_ENDIAN)
+# ifndef HOST_FETCH32
+# ifdef BE_FETCH32
+# define HOST_FETCH32(p,l) BE_FETCH32(p)
+# elif defined(REVERSE_FETCH32)
+# define HOST_FETCH32(p,l) REVERSE_FETCH32(p,l)
+# endif
+# endif
+# endif
+#endif
+
+#if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_BLOCK_DATA_ORDER_ALIGNED!=1
+#ifndef HASH_BLOCK_DATA_ORDER
+#error "HASH_BLOCK_DATA_ORDER must be defined!"
+#endif
+#endif
+
+#if defined(DATA_ORDER_IS_BIG_ENDIAN)
+
+#define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++)))<<24), \
+ l|=(((unsigned long)(*((c)++)))<<16), \
+ l|=(((unsigned long)(*((c)++)))<< 8), \
+ l|=(((unsigned long)(*((c)++))) ), \
+ l)
+#define HOST_p_c2l(c,l,n) { \
+ switch (n) { \
+ case 0: l =((unsigned long)(*((c)++)))<<24; \
+ case 1: l|=((unsigned long)(*((c)++)))<<16; \
+ case 2: l|=((unsigned long)(*((c)++)))<< 8; \
+ case 3: l|=((unsigned long)(*((c)++))); \
+ } }
+#define HOST_p_c2l_p(c,l,sc,len) { \
+ switch (sc) { \
+ case 0: l =((unsigned long)(*((c)++)))<<24; \
+ if (--len == 0) break; \
+ case 1: l|=((unsigned long)(*((c)++)))<<16; \
+ if (--len == 0) break; \
+ case 2: l|=((unsigned long)(*((c)++)))<< 8; \
+ } }
+/* NOTE the pointer is not incremented at the end of this */
+#define HOST_c2l_p(c,l,n) { \
+ l=0; (c)+=n; \
+ switch (n) { \
+ case 3: l =((unsigned long)(*(--(c))))<< 8; \
+ case 2: l|=((unsigned long)(*(--(c))))<<16; \
+ case 1: l|=((unsigned long)(*(--(c))))<<24; \
+ } }
+#define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \
+ *((c)++)=(unsigned char)(((l)>>16)&0xff), \
+ *((c)++)=(unsigned char)(((l)>> 8)&0xff), \
+ *((c)++)=(unsigned char)(((l) )&0xff), \
+ l)
+
+#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
+
+#define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++))) ), \
+ l|=(((unsigned long)(*((c)++)))<< 8), \
+ l|=(((unsigned long)(*((c)++)))<<16), \
+ l|=(((unsigned long)(*((c)++)))<<24), \
+ l)
+#define HOST_p_c2l(c,l,n) { \
+ switch (n) { \
+ case 0: l =((unsigned long)(*((c)++))); \
+ case 1: l|=((unsigned long)(*((c)++)))<< 8; \
+ case 2: l|=((unsigned long)(*((c)++)))<<16; \
+ case 3: l|=((unsigned long)(*((c)++)))<<24; \
+ } }
+#define HOST_p_c2l_p(c,l,sc,len) { \
+ switch (sc) { \
+ case 0: l =((unsigned long)(*((c)++))); \
+ if (--len == 0) break; \
+ case 1: l|=((unsigned long)(*((c)++)))<< 8; \
+ if (--len == 0) break; \
+ case 2: l|=((unsigned long)(*((c)++)))<<16; \
+ } }
+/* NOTE the pointer is not incremented at the end of this */
+#define HOST_c2l_p(c,l,n) { \
+ l=0; (c)+=n; \
+ switch (n) { \
+ case 3: l =((unsigned long)(*(--(c))))<<16; \
+ case 2: l|=((unsigned long)(*(--(c))))<< 8; \
+ case 1: l|=((unsigned long)(*(--(c)))); \
+ } }
+#define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
+ *((c)++)=(unsigned char)(((l)>> 8)&0xff), \
+ *((c)++)=(unsigned char)(((l)>>16)&0xff), \
+ *((c)++)=(unsigned char)(((l)>>24)&0xff), \
+ l)
+
+#endif
+
+/*
+ * Time for some action:-)
+ */
+
+void HASH_UPDATE (HASH_CTX *c, const unsigned char *data, unsigned long len)
+ {
+ register HASH_LONG * p;
+ register unsigned long l;
+ int sw,sc,ew,ec;
+
+ if (len==0) return;
+
+ l=(c->Nl+(len<<3))&0xffffffffL;
+ /* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
+ * Wei Dai <weidai@eskimo.com> for pointing it out. */
+ if (l < c->Nl) /* overflow */
+ c->Nh++;
+ c->Nh+=(len>>29);
+ c->Nl=l;
+
+ if (c->num != 0)
+ {
+ p=c->data;
+ sw=c->num>>2;
+ sc=c->num&0x03;
+
+ if ((c->num+len) >= HASH_CBLOCK)
+ {
+ l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l;
+ for (; sw<HASH_LBLOCK; sw++)
+ {
+ HOST_c2l(data,l); p[sw]=l;
+ }
+ HASH_BLOCK_HOST_ORDER (c,p,1);
+ len-=(HASH_CBLOCK-c->num);
+ c->num=0;
+ /* drop through and do the rest */
+ }
+ else
+ {
+ c->num+=len;
+ if ((sc+len) < 4) /* ugly, add char's to a word */
+ {
+ l=p[sw]; HOST_p_c2l_p(data,l,sc,len); p[sw]=l;
+ }
+ else
+ {
+ ew=(c->num>>2);
+ ec=(c->num&0x03);
+ l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l;
+ for (; sw < ew; sw++)
+ {
+ HOST_c2l(data,l); p[sw]=l;
+ }
+ if (ec)
+ {
+ HOST_c2l_p(data,l,ec); p[sw]=l;
+ }
+ }
+ return;
+ }
+ }
+
+ sw=len/HASH_CBLOCK;
+ if (sw > 0)
+ {
+#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_BLOCK_DATA_ORDER_ALIGNED!=1
+ /*
+ * Note that HASH_BLOCK_DATA_ORDER_ALIGNED gets defined
+ * only if sizeof(HASH_LONG)==4.
+ */
+ if ((((unsigned long)data)%4) == 0)
+ {
+ HASH_BLOCK_DATA_ORDER_ALIGNED (c,(HASH_LONG *)data,sw);
+ sw*=HASH_CBLOCK;
+ data+=sw;
+ len-=sw;
+ }
+ else
+#if !defined(HASH_BLOCK_DATA_ORDER)
+ while (sw--)
+ {
+ memcpy (p=c->data,data,HASH_CBLOCK);
+ HASH_BLOCK_DATA_ORDER_ALIGNED(c,p,1);
+ data+=HASH_CBLOCK;
+ len-=HASH_CBLOCK;
+ }
+#endif
+#endif
+#if defined(HASH_BLOCK_DATA_ORDER)
+ {
+ HASH_BLOCK_DATA_ORDER (c,(HASH_LONG *)data,sw);
+ sw*=HASH_CBLOCK;
+ data+=sw;
+ len-=sw;
+ }
+#endif
+ }
+
+ if (len!=0)
+ {
+ p = c->data;
+ c->num = len;
+ ew=len>>2; /* words to copy */
+ ec=len&0x03;
+ for (; ew; ew--,p++)
+ {
+ HOST_c2l(data,l); *p=l;
+ }
+ HOST_c2l_p(data,l,ec);
+ *p=l;
+ }
+ }
+
+
+void HASH_TRANSFORM (HASH_CTX *c, unsigned char *data)
+ {
+#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_BLOCK_DATA_ORDER_ALIGNED!=1
+ if ((((unsigned long)data)%4) == 0)
+ HASH_BLOCK_DATA_ORDER_ALIGNED (c,(HASH_LONG *)data,1);
+ else
+#if !defined(HASH_BLOCK_DATA_ORDER)
+ {
+ memcpy (c->data,data,HASH_CBLOCK);
+ HASH_BLOCK_DATA_ORDER_ALIGNED (c,c->data,1);
+ }
+#endif
+#endif
+#if defined(HASH_BLOCK_DATA_ORDER)
+ HASH_BLOCK_DATA_ORDER (c,(HASH_LONG *)data,1);
+#endif
+ }
+
+
+void HASH_FINAL (unsigned char *md, HASH_CTX *c)
+ {
+ register HASH_LONG *p;
+ register unsigned long l;
+ register int i,j;
+ static const unsigned char end[4]={0x80,0x00,0x00,0x00};
+ const unsigned char *cp=end;
+
+ /* c->num should definitly have room for at least one more byte. */
+ p=c->data;
+ i=c->num>>2;
+ j=c->num&0x03;
+
+#if 0
+ /* purify often complains about the following line as an
+ * Uninitialized Memory Read. While this can be true, the
+ * following p_c2l macro will reset l when that case is true.
+ * This is because j&0x03 contains the number of 'valid' bytes
+ * already in p[i]. If and only if j&0x03 == 0, the UMR will
+ * occur but this is also the only time p_c2l will do
+ * l= *(cp++) instead of l|= *(cp++)
+ * Many thanks to Alex Tang <altitude@cic.net> for pickup this
+ * 'potential bug' */
+#ifdef PURIFY
+ if (j==0) p[i]=0; /* Yeah, but that's not the way to fix it:-) */
+#endif
+ l=p[i];
+#else
+ l = (j==0) ? 0 : p[i];
+#endif
+ HOST_p_c2l(cp,l,j); p[i++]=l; /* i is the next 'undefined word' */
+
+ if (i>(HASH_LBLOCK-2)) /* save room for Nl and Nh */
+ {
+ if (i<HASH_LBLOCK) p[i]=0;
+ HASH_BLOCK_HOST_ORDER (c,p,1);
+ i=0;
+ }
+ for (; i<(HASH_LBLOCK-2); i++)
+ p[i]=0;
+
+#if defined(DATA_ORDER_IS_BIG_ENDIAN)
+ p[HASH_LBLOCK-2]=c->Nh;
+ p[HASH_LBLOCK-1]=c->Nl;
+#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
+ p[HASH_LBLOCK-2]=c->Nl;
+ p[HASH_LBLOCK-1]=c->Nh;
+#endif
+ HASH_BLOCK_HOST_ORDER (c,p,1);
+
+ l=c->A; HOST_l2c(l,md);
+ l=c->B; HOST_l2c(l,md);
+ l=c->C; HOST_l2c(l,md);
+ l=c->D; HOST_l2c(l,md);
+
+ c->num=0;
+ /* clear stuff, HASH_BLOCK may be leaving some stuff on the stack
+ * but I'm not worried :-)
+ memset((void *)c,0,sizeof(HASH_CTX));
+ */
+ }
asm/mx86unix.cpp: asm/md5-586.pl
(cd asm; $(PERL) md5-586.pl cpp >mx86unix.cpp)
+# works for both SC and gcc
+asm/md5-sparcv8plus.o: asm/md5-sparcv9.S
+ $(CPP) -DULTRASPARC -DMD5_BLOCK_DATA_ORDER asm/md5-sparcv9.S | as -xarch=v8plus /dev/fd/0 -o asm/md5-sparcv8plus.o
+
+asm/md5-sparcv9.o: asm/md5-sparcv9.S
+ $(CC) -xarch=v9 -DULTRASPARC -DMD5_BLOCK_DATA_ORDER -c asm/md5-sparcv9.S -o asm/md5-sparcv9.o
+
+
files:
$(PERL) $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO
# DO NOT DELETE THIS LINE -- make depend depends on it.
md5_dgst.o: ../../include/openssl/md5.h ../../include/openssl/opensslv.h
-md5_dgst.o: md5_locl.h
+md5_dgst.o: ../md32_common.h md5_locl.h
md5_one.o: ../../include/openssl/md5.h md5_locl.h
0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3
);
-&md5_block("md5_block_x86");
+&md5_block("md5_block_asm_host_order");
&asm_finish();
sub Np
&mov($X, &wparam(1)); # esi
&mov($C, &wparam(2));
&push("ebp");
+ &shl($C, 6);
&push("ebx");
&add($C, $X); # offset we end at
&sub($C, 64);
--- /dev/null
+.ident "md5-sparcv9.S, Version 1.0"
+.ident "SPARC V9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
+.file "md5-sparcv9.S"
+
+/*
+ * ====================================================================
+ * Copyright (c) 1999 Andy Polyakov <appro@fy.chalmers.se>.
+ *
+ * Rights for redistribution and usage in source and binary forms are
+ * granted as long as above copyright notices are retained. Warranty
+ * of any kind is (of course:-) disclaimed.
+ * ====================================================================
+ */
+
+/*
+ * This is my modest contribution to OpenSSL project (see
+ * http://www.openssl.org/ for more information about it) and is an
+ * assembler implementation of MD5 block hash function. I've hand-coded
+ * this for the sole reason to reach UltraSPARC-specific "load in
+ * little-endian byte order" instruction. This gives up to 15%
+ * performance improvement for cases when input message is aligned at
+ * 32 bits boundary. The module was tested under both 32 *and* 64 bit
+ * kernels. For updates see http://fy.chalmers.se/~appro/hpe/.
+ *
+ * To compile with SC4.x/SC5.x:
+ *
+ * cc -xarch=v[9|8plus] -DULTRASPARC -DMD5_BLOCK_DATA_ORDER \
+ * -c md5-sparcv9.S
+ *
+ * and with gcc:
+ *
+ * gcc -mcpu=ultrasparc -DULTRASPARC -DMD5_BLOCK_DATA_ORDER \
+ * -c md5-sparcv9.S
+ *
+ * or if above fails (it does if you have gas):
+ *
+ * gcc -E -DULTRASPARC -DMD5_BLOCK_DATA_ORDER md5_block.sparc.S | \
+ * as -xarch=v8plus /dev/fd/0 -o md5-sparcv9.o
+ */
+
+#define A %o0
+#define B %o1
+#define C %o2
+#define D %o3
+#define T1 %o4
+#define T2 %o5
+
+#define R0 %l0
+#define R1 %l1
+#define R2 %l2
+#define R3 %l3
+#define R4 %l4
+#define R5 %l5
+#define R6 %l6
+#define R7 %l7
+#define R8 %i3
+#define R9 %i4
+#define R10 %i5
+#define R11 %g1
+#define R12 %g2
+#define R13 %g3
+#define RX %g4
+
+#define Aptr %i0+0
+#define Bptr %i0+4
+#define Cptr %i0+8
+#define Dptr %i0+12
+
+#define Aval R5 /* those not used at the end of the last round */
+#define Bval R6
+#define Cval R7
+#define Dval R8
+
+#if defined(MD5_BLOCK_DATA_ORDER)
+# if defined(ULTRASPARC)
+# define LOAD lda
+# define X(i) [%i1+i*4]%asi
+# define md5_block md5_block_asm_data_order_aligned
+# define ASI_PRIMARY_LITTLE 0x88
+# else
+# error "MD5_BLOCK_DATA_ORDER is supported only on UltraSPARC!"
+# endif
+#else
+# define LOAD ld
+# define X(i) [%i1+i*4]
+# define md5_block md5_block_asm_host_order
+#endif
+
+.section ".text",#alloc,#execinstr
+#if defined(__SUNPRO_C) && defined(__sparcv9)
+ /* They've said -xarch=v9 at command line */
+ .register %g2,#scratch
+ .register %g3,#scratch
+# define FRAME -192
+#else
+# define FRAME -96
+#endif
+
+.align 32
+
+.global md5_block
+md5_block:
+ save %sp,FRAME,%sp
+
+ ld [Dptr],D
+#ifdef ASI_PRIMARY_LITTLE
+ mov %asi,%o7 ! How dare I? Well, I just do:-)
+#else
+ nop
+#endif
+ ld [Cptr],C
+#ifdef ASI_PRIMARY_LITTLE
+ mov ASI_PRIMARY_LITTLE,%asi
+#else
+ nop
+#endif
+ ld [Bptr],B
+ nop
+ ld [Aptr],A
+ nop
+ LOAD X(0),R0
+ nop
+ ba .Lmd5_block_loop
+ nop
+
+.align 32
+.Lmd5_block_loop:
+
+!!!!!!!!Round 0
+
+ xor C,D,T1
+ sethi %hi(0xd76aa478),T2
+ and T1,B,T1
+ or T2,%lo(0xd76aa478),T2 !=
+ xor T1,D,T1
+ add T1,R0,T1
+ LOAD X(1),R1
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,7,T2
+ srl A,32-7,A
+ or A,T2,A !=
+ xor B,C,T1
+ add A,B,A
+
+ sethi %hi(0xe8c7b756),T2
+ and T1,A,T1 !=
+ or T2,%lo(0xe8c7b756),T2
+ xor T1,C,T1
+ LOAD X(2),R2
+ add T1,R1,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,12,T2
+ srl D,32-12,D !=
+ or D,T2,D
+ xor A,B,T1
+ add D,A,D
+
+ sethi %hi(0x242070db),T2 !=
+ and T1,D,T1
+ or T2,%lo(0x242070db),T2
+ xor T1,B,T1
+ add T1,R2,T1 !=
+ LOAD X(3),R3
+ add T1,T2,T1
+ add C,T1,C
+ sll C,17,T2 !=
+ srl C,32-17,C
+ or C,T2,C
+ xor D,A,T1
+ add C,D,C !=
+
+ sethi %hi(0xc1bdceee),T2
+ and T1,C,T1
+ or T2,%lo(0xc1bdceee),T2
+ xor T1,A,T1 !=
+ add T1,R3,T1
+ LOAD X(4),R4
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,22,T2
+ srl B,32-22,B
+ or B,T2,B
+ xor C,D,T1 !=
+ add B,C,B
+
+ sethi %hi(0xf57c0faf),T2
+ and T1,B,T1
+ or T2,%lo(0xf57c0faf),T2 !=
+ xor T1,D,T1
+ add T1,R4,T1
+ LOAD X(5),R5
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,7,T2
+ srl A,32-7,A
+ or A,T2,A !=
+ xor B,C,T1
+ add A,B,A
+
+ sethi %hi(0x4787c62a),T2
+ and T1,A,T1 !=
+ or T2,%lo(0x4787c62a),T2
+ xor T1,C,T1
+ LOAD X(6),R6
+ add T1,R5,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,12,T2
+ srl D,32-12,D !=
+ or D,T2,D
+ xor A,B,T1
+ add D,A,D
+
+ sethi %hi(0xa8304613),T2 !=
+ and T1,D,T1
+ or T2,%lo(0xa8304613),T2
+ xor T1,B,T1
+ add T1,R6,T1 !=
+ LOAD X(7),R7
+ add T1,T2,T1
+ add C,T1,C
+ sll C,17,T2 !=
+ srl C,32-17,C
+ or C,T2,C
+ xor D,A,T1
+ add C,D,C !=
+
+ sethi %hi(0xfd469501),T2
+ and T1,C,T1
+ or T2,%lo(0xfd469501),T2
+ xor T1,A,T1 !=
+ add T1,R7,T1
+ LOAD X(8),R8
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,22,T2
+ srl B,32-22,B
+ or B,T2,B
+ xor C,D,T1 !=
+ add B,C,B
+
+ sethi %hi(0x698098d8),T2
+ and T1,B,T1
+ or T2,%lo(0x698098d8),T2 !=
+ xor T1,D,T1
+ add T1,R8,T1
+ LOAD X(9),R9
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,7,T2
+ srl A,32-7,A
+ or A,T2,A !=
+ xor B,C,T1
+ add A,B,A
+
+ sethi %hi(0x8b44f7af),T2
+ and T1,A,T1 !=
+ or T2,%lo(0x8b44f7af),T2
+ xor T1,C,T1
+ LOAD X(10),R10
+ add T1,R9,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,12,T2
+ srl D,32-12,D !=
+ or D,T2,D
+ xor A,B,T1
+ add D,A,D
+
+ sethi %hi(0xffff5bb1),T2 !=
+ and T1,D,T1
+ or T2,%lo(0xffff5bb1),T2
+ xor T1,B,T1
+ add T1,R10,T1 !=
+ LOAD X(11),R11
+ add T1,T2,T1
+ add C,T1,C
+ sll C,17,T2 !=
+ srl C,32-17,C
+ or C,T2,C
+ xor D,A,T1
+ add C,D,C !=
+
+ sethi %hi(0x895cd7be),T2
+ and T1,C,T1
+ or T2,%lo(0x895cd7be),T2
+ xor T1,A,T1 !=
+ add T1,R11,T1
+ LOAD X(12),R12
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,22,T2
+ srl B,32-22,B
+ or B,T2,B
+ xor C,D,T1 !=
+ add B,C,B
+
+ sethi %hi(0x6b901122),T2
+ and T1,B,T1
+ or T2,%lo(0x6b901122),T2 !=
+ xor T1,D,T1
+ add T1,R12,T1
+ LOAD X(13),R13
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,7,T2
+ srl A,32-7,A
+ or A,T2,A !=
+ xor B,C,T1
+ add A,B,A
+
+ sethi %hi(0xfd987193),T2
+ and T1,A,T1 !=
+ or T2,%lo(0xfd987193),T2
+ xor T1,C,T1
+ LOAD X(14),RX
+ add T1,R13,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,12,T2
+ srl D,32-12,D !=
+ or D,T2,D
+ xor A,B,T1
+ add D,A,D
+
+ sethi %hi(0xa679438e),T2 !=
+ and T1,D,T1
+ or T2,%lo(0xa679438e),T2
+ xor T1,B,T1
+ add T1,RX,T1 !=
+ LOAD X(15),RX
+ add T1,T2,T1
+ add C,T1,C
+ sll C,17,T2 !=
+ srl C,32-17,C
+ or C,T2,C
+ xor D,A,T1
+ add C,D,C !=
+
+ sethi %hi(0x49b40821),T2
+ and T1,C,T1
+ or T2,%lo(0x49b40821),T2
+ xor T1,A,T1 !=
+ add T1,RX,T1
+ !pre-LOADed X(1),R1
+ add T1,T2,T1
+ add B,T1,B
+ sll B,22,T2 !=
+ srl B,32-22,B
+ or B,T2,B
+ add B,C,B
+
+!!!!!!!!Round 1
+
+ xor B,C,T1 !=
+ sethi %hi(0xf61e2562),T2
+ and T1,D,T1
+ or T2,%lo(0xf61e2562),T2
+ xor T1,C,T1 !=
+ add T1,R1,T1
+ !pre-LOADed X(6),R6
+ add T1,T2,T1
+ add A,T1,A
+ sll A,5,T2 !=
+ srl A,32-5,A
+ or A,T2,A
+ add A,B,A
+
+ xor A,B,T1 !=
+ sethi %hi(0xc040b340),T2
+ and T1,C,T1
+ or T2,%lo(0xc040b340),T2
+ xor T1,B,T1 !=
+ add T1,R6,T1
+ !pre-LOADed X(11),R11
+ add T1,T2,T1
+ add D,T1,D
+ sll D,9,T2 !=
+ srl D,32-9,D
+ or D,T2,D
+ add D,A,D
+
+ xor D,A,T1 !=
+ sethi %hi(0x265e5a51),T2
+ and T1,B,T1
+ or T2,%lo(0x265e5a51),T2
+ xor T1,A,T1 !=
+ add T1,R11,T1
+ !pre-LOADed X(0),R0
+ add T1,T2,T1
+ add C,T1,C
+ sll C,14,T2 !=
+ srl C,32-14,C
+ or C,T2,C
+ add C,D,C
+
+ xor C,D,T1 !=
+ sethi %hi(0xe9b6c7aa),T2
+ and T1,A,T1
+ or T2,%lo(0xe9b6c7aa),T2
+ xor T1,D,T1 !=
+ add T1,R0,T1
+ !pre-LOADed X(5),R5
+ add T1,T2,T1
+ add B,T1,B
+ sll B,20,T2 !=
+ srl B,32-20,B
+ or B,T2,B
+ add B,C,B
+
+ xor B,C,T1 !=
+ sethi %hi(0xd62f105d),T2
+ and T1,D,T1
+ or T2,%lo(0xd62f105d),T2
+ xor T1,C,T1 !=
+ add T1,R5,T1
+ !pre-LOADed X(10),R10
+ add T1,T2,T1
+ add A,T1,A
+ sll A,5,T2 !=
+ srl A,32-5,A
+ or A,T2,A
+ add A,B,A
+
+ xor A,B,T1 !=
+ sethi %hi(0x02441453),T2
+ and T1,C,T1
+ or T2,%lo(0x02441453),T2
+ xor T1,B,T1 !=
+ add T1,R10,T1
+ LOAD X(15),RX
+ add T1,T2,T1
+ add D,T1,D !=
+ sll D,9,T2
+ srl D,32-9,D
+ or D,T2,D
+ add D,A,D !=
+
+ xor D,A,T1
+ sethi %hi(0xd8a1e681),T2
+ and T1,B,T1
+ or T2,%lo(0xd8a1e681),T2 !=
+ xor T1,A,T1
+ add T1,RX,T1
+ !pre-LOADed X(4),R4
+ add T1,T2,T1
+ add C,T1,C !=
+ sll C,14,T2
+ srl C,32-14,C
+ or C,T2,C
+ add C,D,C !=
+
+ xor C,D,T1
+ sethi %hi(0xe7d3fbc8),T2
+ and T1,A,T1
+ or T2,%lo(0xe7d3fbc8),T2 !=
+ xor T1,D,T1
+ add T1,R4,T1
+ !pre-LOADed X(9),R9
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,20,T2
+ srl B,32-20,B
+ or B,T2,B
+ add B,C,B !=
+
+ xor B,C,T1
+ sethi %hi(0x21e1cde6),T2
+ and T1,D,T1
+ or T2,%lo(0x21e1cde6),T2 !=
+ xor T1,C,T1
+ add T1,R9,T1
+ LOAD X(14),RX
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,5,T2
+ srl A,32-5,A
+ or A,T2,A !=
+ add A,B,A
+
+ xor A,B,T1
+ sethi %hi(0xc33707d6),T2
+ and T1,C,T1 !=
+ or T2,%lo(0xc33707d6),T2
+ xor T1,B,T1
+ add T1,RX,T1
+ !pre-LOADed X(3),R3
+ add T1,T2,T1 !=
+ add D,T1,D
+ sll D,9,T2
+ srl D,32-9,D
+ or D,T2,D !=
+ add D,A,D
+
+ xor D,A,T1
+ sethi %hi(0xf4d50d87),T2
+ and T1,B,T1 !=
+ or T2,%lo(0xf4d50d87),T2
+ xor T1,A,T1
+ add T1,R3,T1
+ !pre-LOADed X(8),R8
+ add T1,T2,T1 !=
+ add C,T1,C
+ sll C,14,T2
+ srl C,32-14,C
+ or C,T2,C !=
+ add C,D,C
+
+ xor C,D,T1
+ sethi %hi(0x455a14ed),T2
+ and T1,A,T1 !=
+ or T2,%lo(0x455a14ed),T2
+ xor T1,D,T1
+ add T1,R8,T1
+ !pre-LOADed X(13),R13
+ add T1,T2,T1 !=
+ add B,T1,B
+ sll B,20,T2
+ srl B,32-20,B
+ or B,T2,B !=
+ add B,C,B
+
+ xor B,C,T1
+ sethi %hi(0xa9e3e905),T2
+ and T1,D,T1 !=
+ or T2,%lo(0xa9e3e905),T2
+ xor T1,C,T1
+ add T1,R13,T1
+ !pre-LOADed X(2),R2
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,5,T2
+ srl A,32-5,A
+ or A,T2,A !=
+ add A,B,A
+
+ xor A,B,T1
+ sethi %hi(0xfcefa3f8),T2
+ and T1,C,T1 !=
+ or T2,%lo(0xfcefa3f8),T2
+ xor T1,B,T1
+ add T1,R2,T1
+ !pre-LOADed X(7),R7
+ add T1,T2,T1 !=
+ add D,T1,D
+ sll D,9,T2
+ srl D,32-9,D
+ or D,T2,D !=
+ add D,A,D
+
+ xor D,A,T1
+ sethi %hi(0x676f02d9),T2
+ and T1,B,T1 !=
+ or T2,%lo(0x676f02d9),T2
+ xor T1,A,T1
+ add T1,R7,T1
+ !pre-LOADed X(12),R12
+ add T1,T2,T1 !=
+ add C,T1,C
+ sll C,14,T2
+ srl C,32-14,C
+ or C,T2,C !=
+ add C,D,C
+
+ xor C,D,T1
+ sethi %hi(0x8d2a4c8a),T2
+ and T1,A,T1 !=
+ or T2,%lo(0x8d2a4c8a),T2
+ xor T1,D,T1
+ add T1,R12,T1
+ !pre-LOADed X(5),R5
+ add T1,T2,T1 !=
+ add B,T1,B
+ sll B,20,T2
+ srl B,32-20,B
+ or B,T2,B !=
+ add B,C,B
+
+!!!!!!!!Round 2
+
+ xor B,C,T1
+ sethi %hi(0xfffa3942),T2
+ xor T1,D,T1 !=
+ or T2,%lo(0xfffa3942),T2
+ add T1,R5,T1
+ !pre-LOADed X(8),R8
+ add T1,T2,T1
+ add A,T1,A !=
+ sll A,4,T2
+ srl A,32-4,A
+ or A,T2,A
+ add A,B,A !=
+
+ xor A,B,T1
+ sethi %hi(0x8771f681),T2
+ xor T1,C,T1
+ or T2,%lo(0x8771f681),T2 !=
+ add T1,R8,T1
+ !pre-LOADed X(11),R11
+ add T1,T2,T1
+ add D,T1,D
+ sll D,11,T2 !=
+ srl D,32-11,D
+ or D,T2,D
+ add D,A,D
+
+ xor D,A,T1 !=
+ sethi %hi(0x6d9d6122),T2
+ xor T1,B,T1
+ or T2,%lo(0x6d9d6122),T2
+ add T1,R11,T1 !=
+ LOAD X(14),RX
+ add T1,T2,T1
+ add C,T1,C
+ sll C,16,T2 !=
+ srl C,32-16,C
+ or C,T2,C
+ add C,D,C
+
+ xor C,D,T1 !=
+ sethi %hi(0xfde5380c),T2
+ xor T1,A,T1
+ or T2,%lo(0xfde5380c),T2
+ add T1,RX,T1 !=
+ !pre-LOADed X(1),R1
+ add T1,T2,T1
+ add B,T1,B
+ sll B,23,T2
+ srl B,32-23,B !=
+ or B,T2,B
+ add B,C,B
+
+ xor B,C,T1
+ sethi %hi(0xa4beea44),T2 !=
+ xor T1,D,T1
+ or T2,%lo(0xa4beea44),T2
+ add T1,R1,T1
+ !pre-LOADed X(4),R4
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,4,T2
+ srl A,32-4,A
+ or A,T2,A !=
+ add A,B,A
+
+ xor A,B,T1
+ sethi %hi(0x4bdecfa9),T2
+ xor T1,C,T1 !=
+ or T2,%lo(0x4bdecfa9),T2
+ add T1,R4,T1
+ !pre-LOADed X(7),R7
+ add T1,T2,T1
+ add D,T1,D !=
+ sll D,11,T2
+ srl D,32-11,D
+ or D,T2,D
+ add D,A,D !=
+
+ xor D,A,T1
+ sethi %hi(0xf6bb4b60),T2
+ xor T1,B,T1
+ or T2,%lo(0xf6bb4b60),T2 !=
+ add T1,R7,T1
+ !pre-LOADed X(10),R10
+ add T1,T2,T1
+ add C,T1,C
+ sll C,16,T2 !=
+ srl C,32-16,C
+ or C,T2,C
+ add C,D,C
+
+ xor C,D,T1 !=
+ sethi %hi(0xbebfbc70),T2
+ xor T1,A,T1
+ or T2,%lo(0xbebfbc70),T2
+ add T1,R10,T1 !=
+ !pre-LOADed X(13),R13
+ add T1,T2,T1
+ add B,T1,B
+ sll B,23,T2
+ srl B,32-23,B !=
+ or B,T2,B
+ add B,C,B
+
+ xor B,C,T1
+ sethi %hi(0x289b7ec6),T2 !=
+ xor T1,D,T1
+ or T2,%lo(0x289b7ec6),T2
+ add T1,R13,T1
+ !pre-LOADed X(0),R0
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,4,T2
+ srl A,32-4,A
+ or A,T2,A !=
+ add A,B,A
+
+ xor A,B,T1
+ sethi %hi(0xeaa127fa),T2
+ xor T1,C,T1 !=
+ or T2,%lo(0xeaa127fa),T2
+ add T1,R0,T1
+ !pre-LOADed X(3),R3
+ add T1,T2,T1
+ add D,T1,D !=
+ sll D,11,T2
+ srl D,32-11,D
+ or D,T2,D
+ add D,A,D !=
+
+ xor D,A,T1
+ sethi %hi(0xd4ef3085),T2
+ xor T1,B,T1
+ or T2,%lo(0xd4ef3085),T2 !=
+ add T1,R3,T1
+ !pre-LOADed X(6),R6
+ add T1,T2,T1
+ add C,T1,C
+ sll C,16,T2 !=
+ srl C,32-16,C
+ or C,T2,C
+ add C,D,C
+
+ xor C,D,T1 !=
+ sethi %hi(0x04881d05),T2
+ xor T1,A,T1
+ or T2,%lo(0x04881d05),T2
+ add T1,R6,T1 !=
+ !pre-LOADed X(9),R9
+ add T1,T2,T1
+ add B,T1,B
+ sll B,23,T2
+ srl B,32-23,B !=
+ or B,T2,B
+ add B,C,B
+
+ xor B,C,T1
+ sethi %hi(0xd9d4d039),T2 !=
+ xor T1,D,T1
+ or T2,%lo(0xd9d4d039),T2
+ add T1,R9,T1
+ !pre-LOADed X(12),R12
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,4,T2
+ srl A,32-4,A
+ or A,T2,A !=
+ add A,B,A
+
+ xor A,B,T1
+ sethi %hi(0xe6db99e5),T2
+ xor T1,C,T1 !=
+ or T2,%lo(0xe6db99e5),T2
+ add T1,R12,T1
+ LOAD X(15),RX
+ add T1,T2,T1 !=
+ add D,T1,D
+ sll D,11,T2
+ srl D,32-11,D
+ or D,T2,D !=
+ add D,A,D
+
+ xor D,A,T1
+ sethi %hi(0x1fa27cf8),T2
+ xor T1,B,T1 !=
+ or T2,%lo(0x1fa27cf8),T2
+ add T1,RX,T1
+ !pre-LOADed X(2),R2
+ add T1,T2,T1
+ add C,T1,C !=
+ sll C,16,T2
+ srl C,32-16,C
+ or C,T2,C
+ add C,D,C !=
+
+ xor C,D,T1
+ sethi %hi(0xc4ac5665),T2
+ xor T1,A,T1
+ or T2,%lo(0xc4ac5665),T2 !=
+ add T1,R2,T1
+ !pre-LOADed X(0),R0
+ add T1,T2,T1
+ add B,T1,B
+ sll B,23,T2 !=
+ srl B,32-23,B
+ or B,T2,B
+ add B,C,B
+
+!!!!!!!!Round 3
+
+ orn B,D,T1 !=
+ sethi %hi(0xf4292244),T2
+ xor T1,C,T1
+ or T2,%lo(0xf4292244),T2
+ add T1,R0,T1 !=
+ !pre-LOADed X(7),R7
+ add T1,T2,T1
+ add A,T1,A
+ sll A,6,T2
+ srl A,32-6,A !=
+ or A,T2,A
+ add A,B,A
+
+ orn A,C,T1
+ sethi %hi(0x432aff97),T2 !=
+ xor T1,B,T1
+ or T2,%lo(0x432aff97),T2
+ LOAD X(14),RX
+ add T1,R7,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,10,T2
+ srl D,32-10,D !=
+ or D,T2,D
+ add D,A,D
+
+ orn D,B,T1
+ sethi %hi(0xab9423a7),T2 !=
+ xor T1,A,T1
+ or T2,%lo(0xab9423a7),T2
+ add T1,RX,T1
+ !pre-LOADed X(5),R5
+ add T1,T2,T1 !=
+ add C,T1,C
+ sll C,15,T2
+ srl C,32-15,C
+ or C,T2,C !=
+ add C,D,C
+
+ orn C,A,T1
+ sethi %hi(0xfc93a039),T2
+ xor T1,D,T1 !=
+ or T2,%lo(0xfc93a039),T2
+ add T1,R5,T1
+ !pre-LOADed X(12),R12
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,21,T2
+ srl B,32-21,B
+ or B,T2,B
+ add B,C,B !=
+
+ orn B,D,T1
+ sethi %hi(0x655b59c3),T2
+ xor T1,C,T1
+ or T2,%lo(0x655b59c3),T2 !=
+ add T1,R12,T1
+ !pre-LOADed X(3),R3
+ add T1,T2,T1
+ add A,T1,A
+ sll A,6,T2 !=
+ srl A,32-6,A
+ or A,T2,A
+ add A,B,A
+
+ orn A,C,T1 !=
+ sethi %hi(0x8f0ccc92),T2
+ xor T1,B,T1
+ or T2,%lo(0x8f0ccc92),T2
+ add T1,R3,T1 !=
+ !pre-LOADed X(10),R10
+ add T1,T2,T1
+ add D,T1,D
+ sll D,10,T2
+ srl D,32-10,D !=
+ or D,T2,D
+ add D,A,D
+
+ orn D,B,T1
+ sethi %hi(0xffeff47d),T2 !=
+ xor T1,A,T1
+ or T2,%lo(0xffeff47d),T2
+ add T1,R10,T1
+ !pre-LOADed X(1),R1
+ add T1,T2,T1 !=
+ add C,T1,C
+ sll C,15,T2
+ srl C,32-15,C
+ or C,T2,C !=
+ add C,D,C
+
+ orn C,A,T1
+ sethi %hi(0x85845dd1),T2
+ xor T1,D,T1 !=
+ or T2,%lo(0x85845dd1),T2
+ add T1,R1,T1
+ !pre-LOADed X(8),R8
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,21,T2
+ srl B,32-21,B
+ or B,T2,B
+ add B,C,B !=
+
+ orn B,D,T1
+ sethi %hi(0x6fa87e4f),T2
+ xor T1,C,T1
+ or T2,%lo(0x6fa87e4f),T2 !=
+ add T1,R8,T1
+ LOAD X(15),RX
+ add T1,T2,T1
+ add A,T1,A !=
+ sll A,6,T2
+ srl A,32-6,A
+ or A,T2,A
+ add A,B,A !=
+
+ orn A,C,T1
+ sethi %hi(0xfe2ce6e0),T2
+ xor T1,B,T1
+ or T2,%lo(0xfe2ce6e0),T2 !=
+ add T1,RX,T1
+ !pre-LOADed X(6),R6
+ add T1,T2,T1
+ add D,T1,D
+ sll D,10,T2 !=
+ srl D,32-10,D
+ or D,T2,D
+ add D,A,D
+
+ orn D,B,T1 !=
+ sethi %hi(0xa3014314),T2
+ xor T1,A,T1
+ or T2,%lo(0xa3014314),T2
+ add T1,R6,T1 !=
+ !pre-LOADed X(13),R13
+ add T1,T2,T1
+ add C,T1,C
+ sll C,15,T2
+ srl C,32-15,C !=
+ or C,T2,C
+ add C,D,C
+
+ orn C,A,T1
+ sethi %hi(0x4e0811a1),T2 !=
+ xor T1,D,T1
+ or T2,%lo(0x4e0811a1),T2
+ !pre-LOADed X(4),R4
+ ld [Aptr],Aval
+ add T1,R13,T1 !=
+ add T1,T2,T1
+ add B,T1,B
+ sll B,21,T2
+ srl B,32-21,B !=
+ or B,T2,B
+ add B,C,B
+
+ orn B,D,T1
+ sethi %hi(0xf7537e82),T2 !=
+ xor T1,C,T1
+ or T2,%lo(0xf7537e82),T2
+ !pre-LOADed X(11),R11
+ ld [Dptr],Dval
+ add T1,R4,T1 !=
+ add T1,T2,T1
+ add A,T1,A
+ sll A,6,T2
+ srl A,32-6,A !=
+ or A,T2,A
+ add A,B,A
+
+ orn A,C,T1
+ sethi %hi(0xbd3af235),T2 !=
+ xor T1,B,T1
+ or T2,%lo(0xbd3af235),T2
+ !pre-LOADed X(2),R2
+ ld [Cptr],Cval
+ add T1,R11,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,10,T2
+ srl D,32-10,D !=
+ or D,T2,D
+ add D,A,D
+
+ orn D,B,T1
+ sethi %hi(0x2ad7d2bb),T2 !=
+ xor T1,A,T1
+ or T2,%lo(0x2ad7d2bb),T2
+ !pre-LOADed X(9),R9
+ ld [Bptr],Bval
+ add T1,R2,T1 !=
+ add Aval,A,Aval
+ add T1,T2,T1
+ st Aval,[Aptr]
+ add C,T1,C !=
+ sll C,15,T2
+ add Dval,D,Dval
+ srl C,32-15,C
+ or C,T2,C !=
+ st Dval,[Dptr]
+ add C,D,C
+
+ orn C,A,T1
+ sethi %hi(0xeb86d391),T2 !=
+ xor T1,D,T1
+ or T2,%lo(0xeb86d391),T2
+ add T1,R9,T1
+ !pre-LOADed X(0),R0
+ mov Aval,A !=
+ add T1,T2,T1
+ mov Dval,D
+ add B,T1,B
+ sll B,21,T2 !=
+ add Cval,C,Cval
+ srl B,32-21,B
+ st Cval,[Cptr]
+ or B,T2,B !=
+ add B,C,B
+
+ deccc %i2
+ mov Cval,C
+ add B,Bval,B !=
+ inc 64,%i1
+ nop
+ st B,[Bptr]
+ nop !=
+
+#ifdef ULTRASPARC
+ bg,a,pt %icc,.Lmd5_block_loop
+#else
+ bg,a .Lmd5_block_loop
+#endif
+ LOAD X(0),R0
+
+#ifdef ASI_PRIMARY_LITTLE
+ mov %o7,%asi
+#endif
+ ret
+ restore %g0,0,%o0
+
+.type md5_block,#function
+.size md5_block,(.-md5_block)
#error MD5 is disabled.
#endif
+/*
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ * ! MD5_LONG has to be at least 32 bits wide. If it's wider, then !
+ * ! MD5_LONG_LOG2 has to be defined along. !
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ */
+
+#if defined(WIN16) || defined(__LP32__)
+#define MD5_LONG unsigned long
+#elif defined(_CRAY) || defined(__ILP64__)
+#define MD5_LONG unsigned long
+#define MD5_LONG_LOG2 3
+/*
+ * _CRAY note. I could declare short, but I have no idea what impact
+ * does it have on performance on none-T3E machines. I could declare
+ * int, but at least on C90 sizeof(int) can be chosen at compile time.
+ * So I've chosen long...
+ * <appro@fy.chalmers.se>
+ */
+#else
+#define MD5_LONG unsigned int
+#endif
+
#define MD5_CBLOCK 64
-#define MD5_LBLOCK 16
-#define MD5_BLOCK 16
-#define MD5_LAST_BLOCK 56
-#define MD5_LENGTH_BLOCK 8
+#define MD5_LBLOCK (MD5_CBLOCK/4)
#define MD5_DIGEST_LENGTH 16
typedef struct MD5state_st
{
- unsigned long A,B,C,D;
- unsigned long Nl,Nh;
- unsigned long data[MD5_LBLOCK];
+ MD5_LONG A,B,C,D;
+ MD5_LONG Nl,Nh;
+ MD5_LONG data[MD5_LBLOCK];
int num;
} MD5_CTX;
void MD5_Init(MD5_CTX *c);
-void MD5_Update(MD5_CTX *c, const void *data, unsigned long len);
+void MD5_Update(MD5_CTX *c, const unsigned char *data, unsigned long len);
void MD5_Final(unsigned char *md, MD5_CTX *c);
unsigned char *MD5(unsigned char *d, unsigned long n, unsigned char *md);
void MD5_Transform(MD5_CTX *c, unsigned char *b);
#define INIT_DATA_C (unsigned long)0x98badcfeL
#define INIT_DATA_D (unsigned long)0x10325476L
-# ifdef MD5_ASM
- void md5_block_x86(MD5_CTX *c, unsigned long *p,int num);
-# define md5_block md5_block_x86
-# else
- static void md5_block(MD5_CTX *c, unsigned long *p,int num);
-# endif
void MD5_Init(MD5_CTX *c)
{
c->A=INIT_DATA_A;
c->num=0;
}
-void MD5_Update(MD5_CTX *c, const void *_data, unsigned long len)
+#ifndef md5_block_host_order
+void md5_block_host_order (MD5_CTX *c, const MD5_LONG *X, int num)
{
- register const unsigned char *data=_data;
- register ULONG *p;
- int sw,sc;
- ULONG l;
-
- if (len == 0) return;
-
- l=(c->Nl+(len<<3))&0xffffffffL;
- /* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
- * Wei Dai <weidai@eskimo.com> for pointing it out. */
- if (l < c->Nl) /* overflow */
- c->Nh++;
- c->Nh+=(len>>29);
- c->Nl=l;
-
- if (c->num != 0)
- {
- p=c->data;
- sw=c->num>>2;
- sc=c->num&0x03;
-
- if ((c->num+len) >= MD5_CBLOCK)
- {
- l= p[sw];
- p_c2l(data,l,sc);
- p[sw++]=l;
- for (; sw<MD5_LBLOCK; sw++)
- {
- c2l(data,l);
- p[sw]=l;
- }
- len-=(MD5_CBLOCK-c->num);
-
- md5_block(c,p,64);
- c->num=0;
- /* drop through and do the rest */
- }
- else
- {
- int ew,ec;
-
- c->num+=(int)len;
- if ((sc+len) < 4) /* ugly, add char's to a word */
- {
- l= p[sw];
- p_c2l_p(data,l,sc,len);
- p[sw]=l;
- }
- else
- {
- ew=(c->num>>2);
- ec=(c->num&0x03);
- l= p[sw];
- p_c2l(data,l,sc);
- p[sw++]=l;
- for (; sw < ew; sw++)
- { c2l(data,l); p[sw]=l; }
- if (ec)
- {
- c2l_p(data,l,ec);
- p[sw]=l;
- }
- }
- return;
- }
- }
- /* we now can process the input data in blocks of MD5_CBLOCK
- * chars and save the leftovers to c->data. */
-#ifdef L_ENDIAN
- if ((((unsigned long)data)%sizeof(ULONG)) == 0)
- {
- sw=(int)len/MD5_CBLOCK;
- if (sw > 0)
- {
- sw*=MD5_CBLOCK;
- md5_block(c,(ULONG *)data,sw);
- data+=sw;
- len-=sw;
- }
- }
-#endif
- p=c->data;
- while (len >= MD5_CBLOCK)
- {
-#if defined(L_ENDIAN) || defined(B_ENDIAN)
- if (p != (unsigned long *)data)
- memcpy(p,data,MD5_CBLOCK);
- data+=MD5_CBLOCK;
-#ifdef B_ENDIAN
- for (sw=(MD5_LBLOCK/4); sw; sw--)
- {
- Endian_Reverse32(p[0]);
- Endian_Reverse32(p[1]);
- Endian_Reverse32(p[2]);
- Endian_Reverse32(p[3]);
- p+=4;
- }
-#endif
-#else
- for (sw=(MD5_LBLOCK/4); sw; sw--)
- {
- c2l(data,l); *(p++)=l;
- c2l(data,l); *(p++)=l;
- c2l(data,l); *(p++)=l;
- c2l(data,l); *(p++)=l;
- }
-#endif
- p=c->data;
- md5_block(c,p,64);
- len-=MD5_CBLOCK;
- }
- sc=(int)len;
- c->num=sc;
- if (sc)
- {
- sw=sc>>2; /* words to copy */
-#ifdef L_ENDIAN
- p[sw]=0;
- memcpy(p,data,sc);
-#else
- sc&=0x03;
- for ( ; sw; sw--)
- { c2l(data,l); *(p++)=l; }
- c2l_p(data,l,sc);
- *p=l;
-#endif
- }
- }
-
-void MD5_Transform(MD5_CTX *c, unsigned char *b)
- {
- ULONG p[16];
-#if !defined(L_ENDIAN)
- ULONG *q;
- int i;
-#endif
-
-#if defined(B_ENDIAN) || defined(L_ENDIAN)
- memcpy(p,b,64);
-#ifdef B_ENDIAN
- q=p;
- for (i=(MD5_LBLOCK/4); i; i--)
- {
- Endian_Reverse32(q[0]);
- Endian_Reverse32(q[1]);
- Endian_Reverse32(q[2]);
- Endian_Reverse32(q[3]);
- q+=4;
- }
-#endif
-#else
- q=p;
- for (i=(MD5_LBLOCK/4); i; i--)
- {
- ULONG l;
- c2l(b,l); *(q++)=l;
- c2l(b,l); *(q++)=l;
- c2l(b,l); *(q++)=l;
- c2l(b,l); *(q++)=l;
- }
-#endif
- md5_block(c,p,64);
- }
-
-#ifndef MD5_ASM
-
-static void md5_block(MD5_CTX *c, register ULONG *X, int num)
- {
- register ULONG A,B,C,D;
+ register unsigned long A,B,C,D;
+ /*
+ * In case you wonder why A-D are declared as long and not
+ * as MD5_LONG. Doing so results in slight performance
+ * boost on LP64 architectures. The catch is we don't
+ * really care if 32 MSBs of a 64-bit register get polluted
+ * with eventual overflows as we *save* only 32 LSBs in
+ * *either* case. Now declaring 'em long excuses the compiler
+ * from keeping 32 MSBs zeroed resulting in 13% performance
+ * improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
+ * Well, to be honest it should say that this *prevents*
+ * performance degradation.
+ *
+ * <appro@fy.chalmers.se>
+ */
A=c->A;
B=c->B;
C=c->C;
D=c->D;
- for (;;)
+
+ for (;num--;X+=HASH_LBLOCK)
{
/* Round 0 */
R0(A,B,C,D,X[ 0], 7,0xd76aa478L);
R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL);
R3(B,C,D,A,X[ 9],21,0xeb86d391L);
- A+=c->A&0xffffffffL;
- B+=c->B&0xffffffffL;
- c->A=A;
- c->B=B;
- C+=c->C&0xffffffffL;
- D+=c->D&0xffffffffL;
- c->C=C;
- c->D=D;
- X+=16;
- num-=64;
- if (num <= 0) break;
+ A = c->A += A;
+ B = c->B += B;
+ C = c->C += C;
+ D = c->D += D;
}
}
#endif
-void MD5_Final(unsigned char *md, MD5_CTX *c)
+#ifndef md5_block_data_order
+void md5_block_data_order (MD5_CTX *c, const unsigned char *data, int num)
{
- register int i,j;
- register ULONG l;
- register ULONG *p;
- static unsigned char end[4]={0x80,0x00,0x00,0x00};
- unsigned char *cp=end;
+ register unsigned long A,B,C,D,l;
+ /*
+ * In case you wonder why A-D are declared as long and not
+ * as MD5_LONG. Doing so results in slight performance
+ * boost on LP64 architectures. The catch is we don't
+ * really care if 32 MSBs of a 64-bit register get polluted
+ * with eventual overflows as we *save* only 32 LSBs in
+ * *either* case. Now declaring 'em long excuses the compiler
+ * from keeping 32 MSBs zeroed resulting in 13% performance
+ * improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
+ * Well, to be honest it should say that this *prevents*
+ * performance degradation.
+ *
+ * <appro@fy.chalmers.se>
+ */
+ MD5_LONG X[MD5_LBLOCK];
+ /*
+ * In case you wonder why don't I use c->data for this.
+ * RISCs usually have a handful of registers and if X is
+ * declared as automatic array good optimizing compiler
+ * shall accomodate at least part of it in register bank
+ * instead of memory.
+ *
+ * <appro@fy.chalmers.se>
+ */
- /* c->num should definitly have room for at least one more byte. */
- p=c->data;
- j=c->num;
- i=j>>2;
+ A=c->A;
+ B=c->B;
+ C=c->C;
+ D=c->D;
- /* purify often complains about the following line as an
- * Uninitialized Memory Read. While this can be true, the
- * following p_c2l macro will reset l when that case is true.
- * This is because j&0x03 contains the number of 'valid' bytes
- * already in p[i]. If and only if j&0x03 == 0, the UMR will
- * occur but this is also the only time p_c2l will do
- * l= *(cp++) instead of l|= *(cp++)
- * Many thanks to Alex Tang <altitude@cic.net> for pickup this
- * 'potential bug' */
-#ifdef PURIFY
- if ((j&0x03) == 0) p[i]=0;
-#endif
- l=p[i];
- p_c2l(cp,l,j&0x03);
- p[i]=l;
- i++;
- /* i is the next 'undefined word' */
- if (c->num >= MD5_LAST_BLOCK)
+ for (;num--;)
{
- for (; i<MD5_LBLOCK; i++)
- p[i]=0;
- md5_block(c,p,64);
- i=0;
- }
- for (; i<(MD5_LBLOCK-2); i++)
- p[i]=0;
- p[MD5_LBLOCK-2]=c->Nl;
- p[MD5_LBLOCK-1]=c->Nh;
- md5_block(c,p,64);
- cp=md;
- l=c->A; l2c(l,cp);
- l=c->B; l2c(l,cp);
- l=c->C; l2c(l,cp);
- l=c->D; l2c(l,cp);
+ HOST_c2l(data,l); X[ 0]=l; HOST_c2l(data,l); X[ 1]=l;
+ /* Round 0 */
+ R0(A,B,C,D,X[ 0], 7,0xd76aa478L); HOST_c2l(data,l); X[ 2]=l;
+ R0(D,A,B,C,X[ 1],12,0xe8c7b756L); HOST_c2l(data,l); X[ 3]=l;
+ R0(C,D,A,B,X[ 2],17,0x242070dbL); HOST_c2l(data,l); X[ 4]=l;
+ R0(B,C,D,A,X[ 3],22,0xc1bdceeeL); HOST_c2l(data,l); X[ 5]=l;
+ R0(A,B,C,D,X[ 4], 7,0xf57c0fafL); HOST_c2l(data,l); X[ 6]=l;
+ R0(D,A,B,C,X[ 5],12,0x4787c62aL); HOST_c2l(data,l); X[ 7]=l;
+ R0(C,D,A,B,X[ 6],17,0xa8304613L); HOST_c2l(data,l); X[ 8]=l;
+ R0(B,C,D,A,X[ 7],22,0xfd469501L); HOST_c2l(data,l); X[ 9]=l;
+ R0(A,B,C,D,X[ 8], 7,0x698098d8L); HOST_c2l(data,l); X[10]=l;
+ R0(D,A,B,C,X[ 9],12,0x8b44f7afL); HOST_c2l(data,l); X[11]=l;
+ R0(C,D,A,B,X[10],17,0xffff5bb1L); HOST_c2l(data,l); X[12]=l;
+ R0(B,C,D,A,X[11],22,0x895cd7beL); HOST_c2l(data,l); X[13]=l;
+ R0(A,B,C,D,X[12], 7,0x6b901122L); HOST_c2l(data,l); X[14]=l;
+ R0(D,A,B,C,X[13],12,0xfd987193L); HOST_c2l(data,l); X[15]=l;
+ R0(C,D,A,B,X[14],17,0xa679438eL);
+ R0(B,C,D,A,X[15],22,0x49b40821L);
+ /* Round 1 */
+ R1(A,B,C,D,X[ 1], 5,0xf61e2562L);
+ R1(D,A,B,C,X[ 6], 9,0xc040b340L);
+ R1(C,D,A,B,X[11],14,0x265e5a51L);
+ R1(B,C,D,A,X[ 0],20,0xe9b6c7aaL);
+ R1(A,B,C,D,X[ 5], 5,0xd62f105dL);
+ R1(D,A,B,C,X[10], 9,0x02441453L);
+ R1(C,D,A,B,X[15],14,0xd8a1e681L);
+ R1(B,C,D,A,X[ 4],20,0xe7d3fbc8L);
+ R1(A,B,C,D,X[ 9], 5,0x21e1cde6L);
+ R1(D,A,B,C,X[14], 9,0xc33707d6L);
+ R1(C,D,A,B,X[ 3],14,0xf4d50d87L);
+ R1(B,C,D,A,X[ 8],20,0x455a14edL);
+ R1(A,B,C,D,X[13], 5,0xa9e3e905L);
+ R1(D,A,B,C,X[ 2], 9,0xfcefa3f8L);
+ R1(C,D,A,B,X[ 7],14,0x676f02d9L);
+ R1(B,C,D,A,X[12],20,0x8d2a4c8aL);
+ /* Round 2 */
+ R2(A,B,C,D,X[ 5], 4,0xfffa3942L);
+ R2(D,A,B,C,X[ 8],11,0x8771f681L);
+ R2(C,D,A,B,X[11],16,0x6d9d6122L);
+ R2(B,C,D,A,X[14],23,0xfde5380cL);
+ R2(A,B,C,D,X[ 1], 4,0xa4beea44L);
+ R2(D,A,B,C,X[ 4],11,0x4bdecfa9L);
+ R2(C,D,A,B,X[ 7],16,0xf6bb4b60L);
+ R2(B,C,D,A,X[10],23,0xbebfbc70L);
+ R2(A,B,C,D,X[13], 4,0x289b7ec6L);
+ R2(D,A,B,C,X[ 0],11,0xeaa127faL);
+ R2(C,D,A,B,X[ 3],16,0xd4ef3085L);
+ R2(B,C,D,A,X[ 6],23,0x04881d05L);
+ R2(A,B,C,D,X[ 9], 4,0xd9d4d039L);
+ R2(D,A,B,C,X[12],11,0xe6db99e5L);
+ R2(C,D,A,B,X[15],16,0x1fa27cf8L);
+ R2(B,C,D,A,X[ 2],23,0xc4ac5665L);
+ /* Round 3 */
+ R3(A,B,C,D,X[ 0], 6,0xf4292244L);
+ R3(D,A,B,C,X[ 7],10,0x432aff97L);
+ R3(C,D,A,B,X[14],15,0xab9423a7L);
+ R3(B,C,D,A,X[ 5],21,0xfc93a039L);
+ R3(A,B,C,D,X[12], 6,0x655b59c3L);
+ R3(D,A,B,C,X[ 3],10,0x8f0ccc92L);
+ R3(C,D,A,B,X[10],15,0xffeff47dL);
+ R3(B,C,D,A,X[ 1],21,0x85845dd1L);
+ R3(A,B,C,D,X[ 8], 6,0x6fa87e4fL);
+ R3(D,A,B,C,X[15],10,0xfe2ce6e0L);
+ R3(C,D,A,B,X[ 6],15,0xa3014314L);
+ R3(B,C,D,A,X[13],21,0x4e0811a1L);
+ R3(A,B,C,D,X[ 4], 6,0xf7537e82L);
+ R3(D,A,B,C,X[11],10,0xbd3af235L);
+ R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL);
+ R3(B,C,D,A,X[ 9],21,0xeb86d391L);
- /* clear stuff, md5_block may be leaving some stuff on the stack
- * but I'm not worried :-) */
- c->num=0;
-/* memset((char *)&c,0,sizeof(c));*/
+ A = c->A += A;
+ B = c->B += B;
+ C = c->C += C;
+ D = c->D += D;
+ }
}
+#endif
#ifdef undef
int printit(unsigned long *l)
* [including the GNU Public Licence.]
*/
-/* On sparc, this actually slows things down :-( */
-#if defined(sun)
-#undef B_ENDIAN
-#endif
-
#include <stdlib.h>
#include <string.h>
#include <openssl/md5.h>
-#define ULONG unsigned long
-#define UCHAR unsigned char
-#define UINT unsigned int
-
-#undef c2l
-#define c2l(c,l) (l = ((unsigned long)(*((c)++))) , \
- l|=(((unsigned long)(*((c)++)))<< 8), \
- l|=(((unsigned long)(*((c)++)))<<16), \
- l|=(((unsigned long)(*((c)++)))<<24))
+#ifndef MD5_LONG_LOG2
+#define MD5_LONG_LOG2 2 /* default to 32 bits */
+#endif
-#undef p_c2l
-#define p_c2l(c,l,n) { \
- switch (n) { \
- case 0: l =((unsigned long)(*((c)++))); \
- case 1: l|=((unsigned long)(*((c)++)))<< 8; \
- case 2: l|=((unsigned long)(*((c)++)))<<16; \
- case 3: l|=((unsigned long)(*((c)++)))<<24; \
- } \
- }
+#ifdef MD5_ASM
+# if defined(__i386) || defined(WIN32)
+# define md5_block_host_order md5_block_asm_host_order
+# elif defined(__sparc) && defined(ULTRASPARC)
+ void md5_block_asm_data_order_aligned (MD5_CTX *c, const MD5_LONG *p,int num);
+# define HASH_BLOCK_DATA_ORDER_ALIGNED md5_block_asm_data_order_aligned
+# endif
+#endif
-/* NOTE the pointer is not incremented at the end of this */
-#undef c2l_p
-#define c2l_p(c,l,n) { \
- l=0; \
- (c)+=n; \
- switch (n) { \
- case 3: l =((unsigned long)(*(--(c))))<<16; \
- case 2: l|=((unsigned long)(*(--(c))))<< 8; \
- case 1: l|=((unsigned long)(*(--(c)))) ; \
- } \
- }
+void md5_block_host_order (MD5_CTX *c, const MD5_LONG *p,int num);
+void md5_block_data_order (MD5_CTX *c, const unsigned char *p,int num);
-#undef p_c2l_p
-#define p_c2l_p(c,l,sc,len) { \
- switch (sc) \
- { \
- case 0: l =((unsigned long)(*((c)++))); \
- if (--len == 0) break; \
- case 1: l|=((unsigned long)(*((c)++)))<< 8; \
- if (--len == 0) break; \
- case 2: l|=((unsigned long)(*((c)++)))<<16; \
- } \
- }
+#if defined(__i386)
+/*
+ * *_block_host_order is expected to handle aligned data while
+ * *_block_data_order - unaligned. As algorithm and host (x86)
+ * are in this case of the same "endianess" these two are
+ * otherwise indistinguishable. But normally you don't want to
+ * call the same function because unaligned access in places
+ * where alignment is expected is usually a "Bad Thing". Indeed,
+ * on RISCs you get punished with BUS ERROR signal or *severe*
+ * performance degradation. Intel CPUs are in turn perfectly
+ * capable of loading unaligned data without such drastic side
+ * effect. Yes, they say it's slower than aligned load, but no
+ * exception is generated and therefore performance degradation
+ * is *incomparable* with RISCs. What we should weight here is
+ * costs of unaligned access against costs of aligning data.
+ * According to my measurements allowing unaligned access results
+ * in ~9% performance improvement on Pentium II operating at
+ * 266MHz. I won't be surprised if the difference will be higher
+ * on faster systems:-)
+ *
+ * <appro@fy.chalmers.se>
+ */
+#define md5_block_data_order md5_block_host_order
+#endif
-#undef l2c
-#define l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
- *((c)++)=(unsigned char)(((l)>> 8)&0xff), \
- *((c)++)=(unsigned char)(((l)>>16)&0xff), \
- *((c)++)=(unsigned char)(((l)>>24)&0xff))
+#define DATA_ORDER_IS_LITTLE_ENDIAN
+
+#define HASH_LONG MD5_LONG
+#define HASH_LONG_LOG2 MD5_LONG_LOG2
+#define HASH_CTX MD5_CTX
+#define HASH_CBLOCK MD5_CBLOCK
+#define HASH_LBLOCK MD5_LBLOCK
+#define HASH_UPDATE MD5_Update
+#define HASH_TRANSFORM MD5_Transform
+#define HASH_FINAL MD5_Final
+#define HASH_BLOCK_HOST_ORDER md5_block_host_order
+#if defined(B_ENDIAN) || defined(md5_block_data_order)
+#define HASH_BLOCK_DATA_ORDER md5_block_data_order
+/*
+ * Little-endians (Intel and Alpha) feel better without this.
+ * It looks like memcpy does better job than generic
+ * md5_block_data_order on copying-n-aligning input data.
+ * But franlky speaking I didn't expect such result on Alpha.
+ * On the other hand I've got this with egcs-1.0.2 and if
+ * program is compiled with another (better?) compiler it
+ * might turn out other way around.
+ *
+ * <appro@fy.chalmers.se>
+ */
+#endif
-/* NOTE - c is not incremented as per l2c */
-#undef l2cn
-#define l2cn(l1,l2,c,n) { \
- c+=n; \
- switch (n) { \
- case 8: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \
- case 7: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \
- case 6: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \
- case 5: *(--(c))=(unsigned char)(((l2) )&0xff); \
- case 4: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \
- case 3: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \
- case 2: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \
- case 1: *(--(c))=(unsigned char)(((l1) )&0xff); \
- } \
- }
+#include "../md32_common.h"
-/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
-#if defined(WIN32)
-/* 5 instructions with rotate instruction, else 9 */
-#define Endian_Reverse32(a) \
- { \
- unsigned long l=(a); \
- (a)=((ROTATE(l,8)&0x00FF00FF)|(ROTATE(l,24)&0xFF00FF00)); \
- }
-#else
-/* 6 instructions with rotate instruction, else 8 */
-#define Endian_Reverse32(a) \
- { \
- unsigned long l=(a); \
- l=(((l&0xFF00FF00)>>8L)|((l&0x00FF00FF)<<8L)); \
- (a)=ROTATE(l,16L); \
- }
-#endif
/*
#define F(x,y,z) (((x) & (y)) | ((~(x)) & (z)))
#define G(x,y,z) (((x) & (z)) | ((y) & (~(z))))
#define H(b,c,d) ((b) ^ (c) ^ (d))
#define I(b,c,d) (((~(d)) | (b)) ^ (c))
-#undef ROTATE
-#if defined(WIN32)
-#define ROTATE(a,n) _lrotl(a,n)
-#else
-#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
-#endif
-
-
#define R0(a,b,c,d,k,s,t) { \
a+=((k)+(t)+F((b),(c),(d))); \
a=ROTATE(a,s); \
*/
#include <stdio.h>
-#include "md5_locl.h"
+#include <string.h>
+#include <openssl/md5.h>
unsigned char *MD5(unsigned char *d, unsigned long n, unsigned char *md)
{