From 9eb6dd5165b803715f82b9f5d4b557878f77a580 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 10 Nov 2017 13:34:21 -0500 Subject: [PATCH] handle ascii range individually in each iconv case short-circuiting low bytes before the switch precluded support for character encodings that don't coincide with ascii in this range. this limitation affected iso-2022 encodings, which use the esc byte to introduce a shift sequence, and things like ebcdic. --- src/locale/iconv.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/locale/iconv.c b/src/locale/iconv.c index c64bcf35..af0d8283 100644 --- a/src/locale/iconv.c +++ b/src/locale/iconv.c @@ -193,8 +193,9 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri c = *(unsigned char *)*in; l = 1; - if (c >= 128 || type-UTF_32BE < 7U) switch (type) { + switch (type) { case UTF_8: + if (c < 128) break; // optimization l = mbrtowc_utf8(&wc, *in, *inb, &st); if (!l) l++; else if (l == (size_t)-1) goto ilseq; @@ -202,7 +203,8 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri c = wc; break; case US_ASCII: - goto ilseq; + if (c >= 128) goto ilseq; + break; case WCHAR_T: l = sizeof(wchar_t); if (*inb < l) goto starved; @@ -234,6 +236,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri } break; case SHIFT_JIS: + if (c < 128) break; if (c-0xa1 <= 0xdf-0xa1) { c += 0xff61-0xa1; break; @@ -257,6 +260,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri if (!c) goto ilseq; break; case EUC_JP: + if (c < 128) break; l = 2; if (*inb < 2) goto starved; d = *((unsigned char *)*in + 1); @@ -273,9 +277,11 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri if (!c) goto ilseq; break; case GB2312: + if (c < 128) break; if (c < 0xa1) goto ilseq; case GBK: case GB18030: + if (c < 128) break; c -= 0x81; if (c >= 126) goto ilseq; l = 2; @@ -311,6 +317,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri c = gb18030[c][d]; break; case BIG5: + if (c < 128) break; l = 2; if (*inb < 2) goto starved; d = *((unsigned char *)*in + 1); @@ -348,6 +355,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri if (!c) goto ilseq; break; case EUC_KR: + if (c < 128) break; l = 2; if (*inb < 2) goto starved; d = *((unsigned char *)*in + 1); -- 2.25.1