From: Rich Felker Date: Sun, 13 Oct 2019 21:21:36 +0000 (-0400) Subject: fix aliasing-based undefined behavior in mbsrtowcs X-Git-Tag: v1.1.24~2 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=716745e00e304a650a8eef57c15fbd326168096e;p=oweals%2Fmusl.git fix aliasing-based undefined behavior in mbsrtowcs mbsrtowcs contains "vectorized" loops to quickly step over bytes without the high bit set; these have undefined behavior by virtue of aliasing uint32_t over top of char data for the accesses. commit 4d0a82170a25464c39522d7190b9fe302045ddb2 fixed the corresponding usage in string functions by using the may_alias attribute conditional on __GNUC__ and disabled the vectorized code in its absence. do the same for mbsrtowcs. --- diff --git a/src/multibyte/mbsrtowcs.c b/src/multibyte/mbsrtowcs.c index 0ee8b69c..9b2f2dfb 100644 --- a/src/multibyte/mbsrtowcs.c +++ b/src/multibyte/mbsrtowcs.c @@ -38,12 +38,15 @@ size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbs } if (!ws) for (;;) { +#ifdef __GNUC__ + typedef uint32_t __attribute__((__may_alias__)) w32; if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) { - while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) { + while (!(( *(w32*)s | *(w32*)s-0x01010101) & 0x80808080)) { s += 4; wn -= 4; } } +#endif if (*s-1u < 0x7f) { s++; wn--; @@ -69,8 +72,10 @@ resume0: *src = (const void *)s; return wn0; } +#ifdef __GNUC__ + typedef uint32_t __attribute__((__may_alias__)) w32; if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) { - while (wn>=5 && !(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) { + while (wn>=5 && !(( *(w32*)s | *(w32*)s-0x01010101) & 0x80808080)) { *ws++ = *s++; *ws++ = *s++; *ws++ = *s++; @@ -78,6 +83,7 @@ resume0: wn -= 4; } } +#endif if (*s-1u < 0x7f) { *ws++ = *s++; wn--;