simplify/optimize iconv utf-8 case
[oweals/musl.git] / src / locale / iconv.c
index fd2f2e01497085fa1cd4c1b4ad68f9ae71571c59..fd51b73e86da1adbd5789d9bbbeb0008b5a80051 100644 (file)
@@ -100,6 +100,21 @@ static size_t find_charmap(const void *name)
        return -1;
 }
 
+static iconv_t combine_to_from(size_t t, size_t f)
+{
+       return (void *)(f<<16 | t);
+}
+
+static size_t extract_from(iconv_t cd)
+{
+       return (size_t)cd >> 16;
+}
+
+static size_t extract_to(iconv_t cd)
+{
+       return (size_t)cd & 0xffff;
+}
+
 iconv_t iconv_open(const char *to, const char *from)
 {
        size_t f, t;
@@ -111,12 +126,7 @@ iconv_t iconv_open(const char *to, const char *from)
                return (iconv_t)-1;
        }
 
-       return (void *)(f<<16 | t);
-}
-
-int iconv_close(iconv_t cd)
-{
-       return 0;
+       return combine_to_from(t, f);
 }
 
 static unsigned get_16(const unsigned char *s, int e)
@@ -159,12 +169,11 @@ static unsigned legacy_map(const unsigned char *map, unsigned c)
        return x ? x : c;
 }
 
-size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restrict out, size_t *restrict outb)
+size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restrict out, size_t *restrict outb)
 {
        size_t x=0;
-       unsigned long cd = (unsigned long)cd0;
-       unsigned to = cd & 0xffff;
-       unsigned from = cd >> 16;
+       unsigned to = extract_to(cd);
+       unsigned from = extract_from(cd);
        const unsigned char *map = charmaps+from+1;
        const unsigned char *tomap = charmaps+to+1;
        mbstate_t st = {0};
@@ -184,16 +193,17 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
                c = *(unsigned char *)*in;
                l = 1;
 
-               if (c >= 128 || type-UTF_32BE < 7U) switch (type) {
+               switch (type) {
                case UTF_8:
+                       if (c < 128) break;
                        l = mbrtowc_utf8(&wc, *in, *inb, &st);
-                       if (!l) l++;
-                       else if (l == (size_t)-1) goto ilseq;
-                       else if (l == (size_t)-2) goto starved;
+                       if (l == (size_t)-1) goto ilseq;
+                       if (l == (size_t)-2) goto starved;
                        c = wc;
                        break;
                case US_ASCII:
-                       goto ilseq;
+                       if (c >= 128) goto ilseq;
+                       break;
                case WCHAR_T:
                        l = sizeof(wchar_t);
                        if (*inb < l) goto starved;
@@ -225,6 +235,7 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
                        }
                        break;
                case SHIFT_JIS:
+                       if (c < 128) break;
                        if (c-0xa1 <= 0xdf-0xa1) {
                                c += 0xff61-0xa1;
                                break;
@@ -248,6 +259,7 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
                        if (!c) goto ilseq;
                        break;
                case EUC_JP:
+                       if (c < 128) break;
                        l = 2;
                        if (*inb < 2) goto starved;
                        d = *((unsigned char *)*in + 1);
@@ -264,9 +276,11 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
                        if (!c) goto ilseq;
                        break;
                case GB2312:
+                       if (c < 128) break;
                        if (c < 0xa1) goto ilseq;
                case GBK:
                case GB18030:
+                       if (c < 128) break;
                        c -= 0x81;
                        if (c >= 126) goto ilseq;
                        l = 2;
@@ -302,6 +316,7 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
                        c = gb18030[c][d];
                        break;
                case BIG5:
+                       if (c < 128) break;
                        l = 2;
                        if (*inb < 2) goto starved;
                        d = *((unsigned char *)*in + 1);
@@ -322,7 +337,7 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
                                        if (totype-0300U > 8) k = 2;
                                        else k = "\10\4\4\10\4\4\10\2\4"[totype-0300];
                                        if (k > *outb) goto toobig;
-                                       x += iconv((iconv_t)(uintptr_t)to,
+                                       x += iconv(combine_to_from(to, 0),
                                                &(char *){"\303\212\314\204"
                                                "\303\212\314\214"
                                                "\303\252\314\204"
@@ -339,6 +354,7 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
                        if (!c) goto ilseq;
                        break;
                case EUC_KR:
+                       if (c < 128) break;
                        l = 2;
                        if (*inb < 2) goto starved;
                        d = *((unsigned char *)*in + 1);