/* The part below is a section of matrixssl-3-7-2b-open/crypto/cryptolib.h
* Changes are flagged with //bbox
- * TODO:
- * Take a look at "roll %%cl" part... rotates by constant use fewer registers,
- * and on many Intel CPUs rotates by %cl are slower: they take 2 cycles, not 1.
*/
/******************************************************************************/
#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && \
!defined(INTEL_CC) && !defined(PS_NO_ASM)
-static inline unsigned ROL(unsigned word, int i)
+static ALWAYS_INLINE unsigned ROL(unsigned word, int i)
{
+ if (__builtin_constant_p(i)) //box
+ // Rotates by constant use fewer registers,
+ // and on many Intel CPUs rotates by %cl take 2 cycles, not 1.
+ asm ("roll %2,%0" //box
+ :"=r" (word)
+ :"0" (word),"i" (i));
+ else //box
asm ("roll %%cl,%0"
:"=r" (word)
:"0" (word),"c" (i));
return word;
}
-static inline unsigned ROR(unsigned word, int i)
+static ALWAYS_INLINE unsigned ROR(unsigned word, int i)
{
+ if (__builtin_constant_p(i)) //box
+ asm ("rorl %2,%0" //box
+ :"=r" (word)
+ :"0" (word),"i" (i));
+ else //box
asm ("rorl %%cl,%0"
:"=r" (word)
:"0" (word),"c" (i));