fix aliasing-based undefined behavior in mbsrtowcs
authorRich Felker <dalias@aerifal.cx>
Sun, 13 Oct 2019 21:21:36 +0000 (17:21 -0400)
committerRich Felker <dalias@aerifal.cx>
Sun, 13 Oct 2019 21:21:36 +0000 (17:21 -0400)
mbsrtowcs contains "vectorized" loops to quickly step over bytes
without the high bit set; these have undefined behavior by virtue of
aliasing uint32_t over top of char data for the accesses.

commit 4d0a82170a25464c39522d7190b9fe302045ddb2 fixed the
corresponding usage in string functions by using the may_alias
attribute conditional on __GNUC__ and disabled the vectorized code in
its absence. do the same for mbsrtowcs.

src/multibyte/mbsrtowcs.c

index 0ee8b69cbfda8d90354b5b298f8ba7ce567d2957..9b2f2dfbb023b6db8924bc7081c5c128594c8e1d 100644 (file)
@@ -38,12 +38,15 @@ size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbs
        }
 
        if (!ws) for (;;) {
+#ifdef __GNUC__
+               typedef uint32_t __attribute__((__may_alias__)) w32;
                if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {
-                       while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
+                       while (!(( *(w32*)s | *(w32*)s-0x01010101) & 0x80808080)) {
                                s += 4;
                                wn -= 4;
                        }
                }
+#endif
                if (*s-1u < 0x7f) {
                        s++;
                        wn--;
@@ -69,8 +72,10 @@ resume0:
                        *src = (const void *)s;
                        return wn0;
                }
+#ifdef __GNUC__
+               typedef uint32_t __attribute__((__may_alias__)) w32;
                if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {
-                       while (wn>=5 && !(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
+                       while (wn>=5 && !(( *(w32*)s | *(w32*)s-0x01010101) & 0x80808080)) {
                                *ws++ = *s++;
                                *ws++ = *s++;
                                *ws++ = *s++;
@@ -78,6 +83,7 @@ resume0:
                                wn -= 4;
                        }
                }
+#endif
                if (*s-1u < 0x7f) {
                        *ws++ = *s++;
                        wn--;