From 0f04379d9cd08107e2915d6121b3831f8df08e70 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Tue, 19 Jul 2005 22:33:03 +0000 Subject: [PATCH] This update gets endianness-neutrality right and adds second required entry point, md5_block_asm_data_order. --- crypto/md5/asm/md5-ia64.S | 249 ++++++++++++++++++++------------------ 1 file changed, 133 insertions(+), 116 deletions(-) diff --git a/crypto/md5/asm/md5-ia64.S b/crypto/md5/asm/md5-ia64.S index 900263224f..73273fa828 100644 --- a/crypto/md5/asm/md5-ia64.S +++ b/crypto/md5/asm/md5-ia64.S @@ -86,6 +86,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define pPad2 p12 #define pPad3 p13 #define pSkip p8 +// This two below shall remain constant througout whole routine +#define pDataOrder p14 +#define pHostOrder p15 #define A_ out24 #define B_ out25 @@ -159,6 +162,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define _NOUTPUT 0 #define _NROTATE 24 /* this must be <= _NINPUTS */ +#if defined(_HPUX_SOURCE) && !defined(_LP64) +#define ADDP addp4 +#else +#define ADDP add +#endif // Macros for getting the left and right portions of little-endian words @@ -225,78 +233,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define LCSave r21 #define PFSSave r20 #define PRSave r22 -#define pAgain p14 -#define pOff p14 - - .rodata - // Values are specified as bytes to ensure they are - // in little-endian byte-order. - .align 4 -md5_round_constants: - data1 0x78, 0xa4, 0x6a, 0xd7 // 0 - data1 0x56, 0xb7, 0xc7, 0xe8 // 1 - data1 0xdb, 0x70, 0x20, 0x24 // 2 - data1 0xee, 0xce, 0xbd, 0xc1 // 3 - data1 0xaf, 0x0f, 0x7c, 0xf5 // 4 - data1 0x2a, 0xc6, 0x87, 0x47 // 5 - data1 0x13, 0x46, 0x30, 0xa8 // 6 - data1 0x01, 0x95, 0x46, 0xfd // 7 - data1 0xd8, 0x98, 0x80, 0x69 // 8 - data1 0xaf, 0xf7, 0x44, 0x8b // 9 - data1 0xb1, 0x5b, 0xff, 0xff // 10 - data1 0xbe, 0xd7, 0x5c, 0x89 // 11 - data1 0x22, 0x11, 0x90, 0x6b // 12 - data1 0x93, 0x71, 0x98, 0xfd // 13 - data1 0x8e, 0x43, 0x79, 0xa6 // 14 - data1 0x21, 0x08, 0xb4, 0x49 // 15 - data1 0x62, 0x25, 0x1e, 0xf6 // 16 - data1 0x40, 0xb3, 0x40, 0xc0 // 17 - data1 0x51, 0x5a, 0x5e, 0x26 // 18 - data1 0xaa, 0xc7, 0xb6, 0xe9 // 19 - data1 0x5d, 0x10, 0x2f, 0xd6 // 20 - data1 0x53, 0x14, 0x44, 0x02 // 21 - data1 0x81, 0xe6, 0xa1, 0xd8 // 22 - data1 0xc8, 0xfb, 0xd3, 0xe7 // 23 - data1 0xe6, 0xcd, 0xe1, 0x21 // 24 - data1 0xd6, 0x07, 0x37, 0xc3 // 25 - data1 0x87, 0x0d, 0xd5, 0xf4 // 26 - data1 0xed, 0x14, 0x5a, 0x45 // 27 - data1 0x05, 0xe9, 0xe3, 0xa9 // 28 - data1 0xf8, 0xa3, 0xef, 0xfc // 29 - data1 0xd9, 0x02, 0x6f, 0x67 // 30 - data1 0x8a, 0x4c, 0x2a, 0x8d // 31 - data1 0x42, 0x39, 0xfa, 0xff // 32 - data1 0x81, 0xf6, 0x71, 0x87 // 33 - data1 0x22, 0x61, 0x9d, 0x6d // 34 - data1 0x0c, 0x38, 0xe5, 0xfd // 35 - data1 0x44, 0xea, 0xbe, 0xa4 // 36 - data1 0xa9, 0xcf, 0xde, 0x4b // 37 - data1 0x60, 0x4b, 0xbb, 0xf6 // 38 - data1 0x70, 0xbc, 0xbf, 0xbe // 39 - data1 0xc6, 0x7e, 0x9b, 0x28 // 40 - data1 0xfa, 0x27, 0xa1, 0xea // 41 - data1 0x85, 0x30, 0xef, 0xd4 // 42 - data1 0x05, 0x1d, 0x88, 0x04 // 43 - data1 0x39, 0xd0, 0xd4, 0xd9 // 44 - data1 0xe5, 0x99, 0xdb, 0xe6 // 45 - data1 0xf8, 0x7c, 0xa2, 0x1f // 46 - data1 0x65, 0x56, 0xac, 0xc4 // 47 - data1 0x44, 0x22, 0x29, 0xf4 // 48 - data1 0x97, 0xff, 0x2a, 0x43 // 49 - data1 0xa7, 0x23, 0x94, 0xab // 50 - data1 0x39, 0xa0, 0x93, 0xfc // 51 - data1 0xc3, 0x59, 0x5b, 0x65 // 52 - data1 0x92, 0xcc, 0x0c, 0x8f // 53 - data1 0x7d, 0xf4, 0xef, 0xff // 54 - data1 0xd1, 0x5d, 0x84, 0x85 // 55 - data1 0x4f, 0x7e, 0xa8, 0x6f // 56 - data1 0xe0, 0xe6, 0x2c, 0xfe // 57 - data1 0x14, 0x43, 0x01, 0xa3 // 58 - data1 0xa1, 0x11, 0x08, 0x4e // 59 - data1 0x82, 0x7e, 0x53, 0xf7 // 60 - data1 0x35, 0xf2, 0x3a, 0xbd // 61 - data1 0xbb, 0xd2, 0xd7, 0x2a // 62 - data1 0x91, 0xd3, 0x86, 0xeb // 63 +#define pAgain p63 +#define pOff p63 .text @@ -320,52 +258,47 @@ md5_round_constants: */ + .type md5_block_asm_data_order, @function + .global md5_block_asm_data_order + .align 32 + .proc md5_block_asm_data_order +md5_block_asm_data_order: +{ .mib + cmp.eq pDataOrder,pHostOrder = r0,r0 + br.sptk.many .md5_block +};; + .endp md5_block_asm_data_order + .type md5_block_asm_host_order, @function .global md5_block_asm_host_order - .align 32 .proc md5_block_asm_host_order md5_block_asm_host_order: .prologue -#ifndef __LP64__ +{ .mib + cmp.eq pHostOrder,pDataOrder = r0,r0 +};; +.md5_block: { .mmi - .save ar.pfs, PFSSave + .save ar.pfs, PFSSave alloc PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT - addp4 DPtrIn = 0, DPtrIn - addp4 CtxPtr0 = 0, CtxPtr0 + ADDP CtxPtr1 = 8, CtxPtr0 + mov CTable = ip } -;; { .mmi - nop 0x0 - and InAlign = 0x3, DPtrIn - .save ar.lc, LCSave + ADDP DPtrIn = 0, DPtrIn + ADDP CtxPtr0 = 0, CtxPtr0 + .save ar.lc, LCSave mov LCSave = ar.lc } -#else +;; +.pred.rel "mutex",pDataOrder,pHostOrder { .mmi - .save ar.pfs, PFSSave - alloc PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT +(pDataOrder) add CTable = .md5_tbl_data_order#-.md5_block#, CTable +(pHostOrder) add CTable = .md5_tbl_host_order#-.md5_block#, CTable and InAlign = 0x3, DPtrIn - .save ar.lc, LCSave - mov LCSave = ar.lc } -#endif -{ .mmi - addl CTable = @ltoffx(md5_round_constants), gp - ;; - ld8.mov CTable = [CTable], md5_round_constants // native byte-order - add CtxPtr1 = 8, CtxPtr0 -} -#ifdef B_ENDIAN -{ - .mmi - rum psr.be // switch to little-endian mode - nop.m 0x0 - nop.i 0x0 -} -#endif -;; { .mmi ld4 AccumA = [CtxPtr0], 4 ld4 AccumC = [CtxPtr1], 4 @@ -379,15 +312,12 @@ md5_block_asm_host_order: ld4 AccumD = [CtxPtr1] dep DPtr_ = 0, DPtrIn, 0, 2 } ;; - -{ .mmi +#if defined(_HPUX_SOURCE) || defined(B_ENDIAN) +(pDataOrder) rum psr.be;; // switch to little-endian +#endif +{ .mmb ld4 CTable0 = [CTable], 4 cmp.ne pOff, p0 = 0, InAlign -} ;; - -{ .mib - nop.m 0x0 - nop.i 0x0 (pOff) br.cond.spnt.many .md5_unaligned } ;; @@ -431,9 +361,9 @@ md5_block_asm_host_order: } ;; .md5_exit: -// Note that we switch back to the entry endianess AFTER storing so -// that the memory image of the hash is preserved. - +#if defined(_HPUX_SOURCE) || defined(B_ENDIAN) +(pDataOrder) sum psr.be;; // switch back to big-endian mode +#endif { .mmi st4 [CtxPtr0] = AccumB, -4 st4 [CtxPtr1] = AccumD, -4 @@ -445,9 +375,6 @@ md5_block_asm_host_order: mov ar.lc = LCSave } ;; { .mib -#ifdef B_ENDIAN - sum psr.be // switch back to big-endian mode -#endif mov ar.pfs = PFSSave br.ret.sptk.few rp } ;; @@ -1001,9 +928,99 @@ md5_digest_block##offset: \ nop 0x0 ; \ nop 0x0 ; \ br.cond.sptk.many md5_digest_GHI ; \ -} ; \ +} ;; \ .endp md5digestBlock ## offset MD5FBLOCK(1) MD5FBLOCK(2) MD5FBLOCK(3) + + .align 64 + .type md5_constants, @object +md5_constants: +.md5_tbl_data_order: // To ensure little-endian data + // order, code as bytes. + data1 0x78, 0xa4, 0x6a, 0xd7 // 0 + data1 0x56, 0xb7, 0xc7, 0xe8 // 1 + data1 0xdb, 0x70, 0x20, 0x24 // 2 + data1 0xee, 0xce, 0xbd, 0xc1 // 3 + data1 0xaf, 0x0f, 0x7c, 0xf5 // 4 + data1 0x2a, 0xc6, 0x87, 0x47 // 5 + data1 0x13, 0x46, 0x30, 0xa8 // 6 + data1 0x01, 0x95, 0x46, 0xfd // 7 + data1 0xd8, 0x98, 0x80, 0x69 // 8 + data1 0xaf, 0xf7, 0x44, 0x8b // 9 + data1 0xb1, 0x5b, 0xff, 0xff // 10 + data1 0xbe, 0xd7, 0x5c, 0x89 // 11 + data1 0x22, 0x11, 0x90, 0x6b // 12 + data1 0x93, 0x71, 0x98, 0xfd // 13 + data1 0x8e, 0x43, 0x79, 0xa6 // 14 + data1 0x21, 0x08, 0xb4, 0x49 // 15 + data1 0x62, 0x25, 0x1e, 0xf6 // 16 + data1 0x40, 0xb3, 0x40, 0xc0 // 17 + data1 0x51, 0x5a, 0x5e, 0x26 // 18 + data1 0xaa, 0xc7, 0xb6, 0xe9 // 19 + data1 0x5d, 0x10, 0x2f, 0xd6 // 20 + data1 0x53, 0x14, 0x44, 0x02 // 21 + data1 0x81, 0xe6, 0xa1, 0xd8 // 22 + data1 0xc8, 0xfb, 0xd3, 0xe7 // 23 + data1 0xe6, 0xcd, 0xe1, 0x21 // 24 + data1 0xd6, 0x07, 0x37, 0xc3 // 25 + data1 0x87, 0x0d, 0xd5, 0xf4 // 26 + data1 0xed, 0x14, 0x5a, 0x45 // 27 + data1 0x05, 0xe9, 0xe3, 0xa9 // 28 + data1 0xf8, 0xa3, 0xef, 0xfc // 29 + data1 0xd9, 0x02, 0x6f, 0x67 // 30 + data1 0x8a, 0x4c, 0x2a, 0x8d // 31 + data1 0x42, 0x39, 0xfa, 0xff // 32 + data1 0x81, 0xf6, 0x71, 0x87 // 33 + data1 0x22, 0x61, 0x9d, 0x6d // 34 + data1 0x0c, 0x38, 0xe5, 0xfd // 35 + data1 0x44, 0xea, 0xbe, 0xa4 // 36 + data1 0xa9, 0xcf, 0xde, 0x4b // 37 + data1 0x60, 0x4b, 0xbb, 0xf6 // 38 + data1 0x70, 0xbc, 0xbf, 0xbe // 39 + data1 0xc6, 0x7e, 0x9b, 0x28 // 40 + data1 0xfa, 0x27, 0xa1, 0xea // 41 + data1 0x85, 0x30, 0xef, 0xd4 // 42 + data1 0x05, 0x1d, 0x88, 0x04 // 43 + data1 0x39, 0xd0, 0xd4, 0xd9 // 44 + data1 0xe5, 0x99, 0xdb, 0xe6 // 45 + data1 0xf8, 0x7c, 0xa2, 0x1f // 46 + data1 0x65, 0x56, 0xac, 0xc4 // 47 + data1 0x44, 0x22, 0x29, 0xf4 // 48 + data1 0x97, 0xff, 0x2a, 0x43 // 49 + data1 0xa7, 0x23, 0x94, 0xab // 50 + data1 0x39, 0xa0, 0x93, 0xfc // 51 + data1 0xc3, 0x59, 0x5b, 0x65 // 52 + data1 0x92, 0xcc, 0x0c, 0x8f // 53 + data1 0x7d, 0xf4, 0xef, 0xff // 54 + data1 0xd1, 0x5d, 0x84, 0x85 // 55 + data1 0x4f, 0x7e, 0xa8, 0x6f // 56 + data1 0xe0, 0xe6, 0x2c, 0xfe // 57 + data1 0x14, 0x43, 0x01, 0xa3 // 58 + data1 0xa1, 0x11, 0x08, 0x4e // 59 + data1 0x82, 0x7e, 0x53, 0xf7 // 60 + data1 0x35, 0xf2, 0x3a, 0xbd // 61 + data1 0xbb, 0xd2, 0xd7, 0x2a // 62 + data1 0x91, 0xd3, 0x86, 0xeb // 63 + +.md5_tbl_host_order: // OS data order, might as well + // be little-endian. + data4 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee // 0 + data4 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501 // 4 + data4 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be // 8 + data4 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 // 12 + data4 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa // 16 + data4 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 // 20 + data4 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed // 24 + data4 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a // 28 + data4 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c // 32 + data4 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 // 36 + data4 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 // 40 + data4 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 // 44 + data4 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 // 48 + data4 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 // 52 + data4 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 // 56 + data4 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 // 60 +.size md5_constants#,64*4*2 -- 2.25.1