this patch adjusts libc components which use the multibyte functions
internally, and which depend on them operating in a particular
encoding, to make the appropriate locale changes before calling them
and restore the calling thread's locale afterwards. activating the
byte-based C locale without these changes would cause regressions in
stdio and iconv.
in the case of iconv, the current implementation was simply using the
multibyte functions as UTF-8 conversions. setting a multibyte UTF-8
locale for the duration of the iconv operation allows the code to
continue working.
in the case of stdio, POSIX requires that FILE streams have an
encoding rule bound at the time of setting wide orientation. as long
as all locales, including the C locale, used the same encoding,
treating high bytes as UTF-8, there was no need to store an encoding
rule as part of the stream's state.
a new locale field in the FILE structure points to the locale that
should be made active during fgetwc/fputwc/ungetwc on the stream. it
cannot point to the locale active at the time the stream becomes
oriented, because this locale could be mutable (the global locale) or
could be destroyed (locale_t objects produced by newlocale) before the
stream is closed. instead, a pointer to the static C or C.UTF-8 locale
object added in commit commit
aeeac9ca5490d7d90fe061ab72da446c01ddf746
is used. this is valid since categories other than LC_CTYPE will not
affect these functions.
unsigned char *shend;
off_t shlim, shcnt;
FILE *prev_locked, *next_locked;
+ struct __locale_struct *locale;
};
size_t __stdio_read(FILE *, unsigned char *, size_t);
#include <stdlib.h>
#include <limits.h>
#include <stdint.h>
+#include "locale_impl.h"
#define UTF_32BE 0300
#define UTF_16LE 0301
int err;
unsigned char type = map[-1];
unsigned char totype = tomap[-1];
+ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
if (!in || !*in || !*inb) return 0;
+ *ploc = UTF8_LOCALE;
+
for (; *inb; *in+=l, *inb-=l) {
c = *(unsigned char *)*in;
l = 1;
break;
}
}
+ *ploc = loc;
return x;
ilseq:
err = EILSEQ;
x = -1;
end:
errno = err;
+ *ploc = loc;
return x;
}
#include "stdio_impl.h"
+#include "locale_impl.h"
#include <wchar.h>
#include <errno.h>
-wint_t __fgetwc_unlocked(FILE *f)
+static wint_t __fgetwc_unlocked_internal(FILE *f)
{
mbstate_t st = { 0 };
wchar_t wc;
unsigned char b;
size_t l;
- if (f->mode <= 0) fwide(f, 1);
-
/* Convert character from buffer if possible */
if (f->rpos < f->rend) {
l = mbrtowc(&wc, (void *)f->rpos, f->rend - f->rpos, &st);
return wc;
}
+wint_t __fgetwc_unlocked(FILE *f)
+{
+ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
+ if (f->mode <= 0) fwide(f, 1);
+ *ploc = f->locale;
+ wchar_t wc = __fgetwc_unlocked_internal(f);
+ *ploc = loc;
+ return wc;
+}
+
wint_t fgetwc(FILE *f)
{
wint_t c;
#include "stdio_impl.h"
+#include "locale_impl.h"
#include <wchar.h>
#include <limits.h>
#include <ctype.h>
{
char mbc[MB_LEN_MAX];
int l;
+ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
if (f->mode <= 0) fwide(f, 1);
+ *ploc = f->locale;
if (isascii(c)) {
c = putc_unlocked(c, f);
l = wctomb(mbc, c);
if (l < 0 || __fwritex((void *)mbc, l, f) < l) c = WEOF;
}
+ if (c==WEOF) f->flags |= F_ERR;
+ *ploc = loc;
return c;
}
#include "stdio_impl.h"
+#include "locale_impl.h"
#include <wchar.h>
int fputws(const wchar_t *restrict ws, FILE *restrict f)
{
unsigned char buf[BUFSIZ];
size_t l=0;
+ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
FLOCK(f);
fwide(f, 1);
+ *ploc = f->locale;
while (ws && (l = wcsrtombs((void *)buf, (void*)&ws, sizeof buf, 0))+1 > 1)
if (__fwritex(buf, l, f) < l) {
FUNLOCK(f);
+ *ploc = loc;
return -1;
}
FUNLOCK(f);
+ *ploc = loc;
return l; /* 0 or -1 */
}
-#include <wchar.h>
#include "stdio_impl.h"
-
-#define SH (8*sizeof(int)-1)
-#define NORMALIZE(x) ((x)>>SH | -((-(x))>>SH))
+#include "locale_impl.h"
int fwide(FILE *f, int mode)
{
FLOCK(f);
- if (!f->mode) f->mode = NORMALIZE(mode);
+ if (mode) {
+ if (!f->locale) f->locale = MB_CUR_MAX==1
+ ? C_LOCALE : UTF8_LOCALE;
+ if (!f->mode) f->mode = mode>0 ? 1 : -1;
+ }
mode = f->mode;
FUNLOCK(f);
return mode;
#include "stdio_impl.h"
+#include "locale_impl.h"
#include <wchar.h>
#include <limits.h>
#include <ctype.h>
{
unsigned char mbc[MB_LEN_MAX];
int l=1;
+ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
FLOCK(f);
if (f->mode <= 0) fwide(f, 1);
+ *ploc = f->locale;
if (!f->rpos) __toread(f);
if (!f->rpos || f->rpos < f->buf - UNGET + l || c == WEOF ||
(!isascii(c) && (l = wctomb((void *)mbc, c)) < 0)) {
FUNLOCK(f);
+ *ploc = loc;
return WEOF;
}
f->flags &= ~F_EOF;
FUNLOCK(f);
+ *ploc = loc;
return c;
}