-/*
- * Make some obvious choices. E.g., HASH_BLOCK_DATA_ORDER_ALIGNED
- * and HASH_BLOCK_HOST_ORDER ought to be the same if input data
- * and host are of the same "endianess". It's possible to mask
- * this with blank #define HASH_BLOCK_DATA_ORDER though...
- *
- * <appro@fy.chalmers.se>
- */
-#if defined(B_ENDIAN)
-# if defined(DATA_ORDER_IS_BIG_ENDIAN)
-# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
-# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER
-# endif
-# elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
-# ifndef HOST_FETCH32
-# ifdef LE_FETCH32
-# define HOST_FETCH32(p,l) LE_FETCH32(p)
-# elif defined(REVERSE_FETCH32)
-# define HOST_FETCH32(p,l) REVERSE_FETCH32(p,l)
-# endif
-# endif
-# endif
-#elif defined(L_ENDIAN)
-# if defined(DATA_ORDER_IS_LITTLE_ENDIAN)
-# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
-# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER
-# endif
-# elif defined(DATA_ORDER_IS_BIG_ENDIAN)
-# ifndef HOST_FETCH32
-# ifdef BE_FETCH32
-# define HOST_FETCH32(p,l) BE_FETCH32(p)
-# elif defined(REVERSE_FETCH32)
-# define HOST_FETCH32(p,l) REVERSE_FETCH32(p,l)
-# endif
+#if defined(DATA_ORDER_IS_BIG_ENDIAN)
+
+#ifndef PEDANTIC
+# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
+# if ((defined(__i386) || defined(__i386__)) && !defined(I386_ONLY)) || \
+ (defined(__x86_64) || defined(__x86_64__))
+# if !defined(B_ENDIAN)
+ /*
+ * This gives ~30-40% performance improvement in SHA-256 compiled
+ * with gcc [on P4]. Well, first macro to be frank. We can pull
+ * this trick on x86* platforms only, because these CPUs can fetch
+ * unaligned data without raising an exception.
+ */
+# define HOST_c2l(c,l) ({ unsigned int r=*((const unsigned int *)(c)); \
+ asm ("bswapl %0":"=r"(r):"0"(r)); \
+ (c)+=4; (l)=r; })
+# define HOST_l2c(l,c) ({ unsigned int r=(l); \
+ asm ("bswapl %0":"=r"(r):"0"(r)); \
+ *((unsigned int *)(c))=r; (c)+=4; r; })
+# endif
+# elif defined(__aarch64__)
+# if defined(__BYTE_ORDER__)
+# if defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
+# define HOST_c2l(c,l) ({ unsigned int r; \
+ asm ("rev %w0,%w1" \
+ :"=r"(r) \
+ :"r"(*((const unsigned int *)(c))));\
+ (c)+=4; (l)=r; })
+# define HOST_l2c(l,c) ({ unsigned int r; \
+ asm ("rev %w0,%w1" \
+ :"=r"(r) \
+ :"r"((unsigned int)(l)));\
+ *((unsigned int *)(c))=r; (c)+=4; r; })
+# elif defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
+# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l))
+# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l))