this only made the function unnecessarily slow on systems with
unaligned access, but would of course crash on systems that can't do
unaligned accesses (none of which have ports yet).
size_t *wd;
const size_t *ws;
- if (((uintptr_t)s & ALIGN) != ((uintptr_t)d & ALIGN)) {
+ if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
for (; ((uintptr_t)s & ALIGN) && n && (*d=*s); n--, s++, d++);
if (!n || !*s) goto tail;
wd=(void *)d; ws=(const void *)s;