From 74e334dcd177b585c64ddafa732a3dc9e3f6b5ec Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 10 Feb 2015 18:30:57 +0100 Subject: [PATCH] x86_64/memset: avoid performing final store twice The code does a potentially misaligned 8-byte store to fill the tail of the buffer. Then it fills the initial part of the buffer which is a multiple of 8 bytes. Therefore, if size is divisible by 8, we were storing last word twice. This patch decrements byte count before dividing it by 8, making one less store in "size is divisible by 8" case, and not changing anything in all other cases. All at the cost of replacing one MOV insn with LEA insn. Signed-off-by: Denys Vlasenko --- src/string/x86_64/memset.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/string/x86_64/memset.s b/src/string/x86_64/memset.s index 263336b5..3cc8fcf6 100644 --- a/src/string/x86_64/memset.s +++ b/src/string/x86_64/memset.s @@ -9,7 +9,7 @@ memset: cmp $16,%rdx jb 1f - mov %rdx,%rcx + lea -1(%rdx),%rcx mov %rdi,%r8 shr $3,%rcx mov %rax,-8(%rdi,%rdx) -- 2.25.1