From ab9672ae73248f51e30f4553c4b8878525e46383 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 13 Oct 2014 20:59:42 -0400 Subject: [PATCH] implement uchar.h (C11 UTF-16/32 conversion) interfaces --- include/alltypes.h.in | 2 ++ include/uchar.h | 27 +++++++++++++++++++++++++++ include/wchar.h | 6 +----- src/multibyte/c16rtomb.c | 33 +++++++++++++++++++++++++++++++++ src/multibyte/c32rtomb.c | 7 +++++++ src/multibyte/mbrtoc16.c | 28 ++++++++++++++++++++++++++++ src/multibyte/mbrtoc32.c | 11 +++++++++++ 7 files changed, 109 insertions(+), 5 deletions(-) create mode 100644 include/uchar.h create mode 100644 src/multibyte/c16rtomb.c create mode 100644 src/multibyte/c32rtomb.c create mode 100644 src/multibyte/mbrtoc16.c create mode 100644 src/multibyte/mbrtoc32.c diff --git a/include/alltypes.h.in b/include/alltypes.h.in index c4ca5d52..98c2f3b1 100644 --- a/include/alltypes.h.in +++ b/include/alltypes.h.in @@ -58,6 +58,8 @@ TYPEDEF struct { unsigned __attr[2]; } pthread_rwlockattr_t; TYPEDEF struct _IO_FILE FILE; +TYPEDEF struct __mbstate_t { unsigned __opaque1, __opaque2; } mbstate_t; + TYPEDEF struct __locale_struct * locale_t; TYPEDEF struct __sigset_t { unsigned long __bits[128/sizeof(long)]; } sigset_t; diff --git a/include/uchar.h b/include/uchar.h new file mode 100644 index 00000000..8dabf1ed --- /dev/null +++ b/include/uchar.h @@ -0,0 +1,27 @@ +#ifndef _UCHAR_H +#define _UCHAR_H + +#ifdef __cplusplus +extern "C" { +#else +typedef unsigned short char16_t; +typedef unsigned char32_t; +#endif + +#define __NEED_mbstate_t +#define __NEED_size_t + +#include +#include + +size_t c16rtomb(char *__restrict, char16_t, mbstate_t *__restrict); +size_t mbrtoc16(char16_t *__restrict, const char *__restrict, size_t, mbstate_t *__restrict); + +size_t c32rtomb(char *__restrict, char32_t, mbstate_t *__restrict); +size_t mbrtoc32(char32_t *__restrict, const char *__restrict, size_t, mbstate_t *__restrict); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/wchar.h b/include/wchar.h index 9fd967cc..52da6395 100644 --- a/include/wchar.h +++ b/include/wchar.h @@ -12,6 +12,7 @@ extern "C" { #define __NEED_size_t #define __NEED_wchar_t #define __NEED_wint_t +#define __NEED_mbstate_t #if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \ || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE) @@ -42,11 +43,6 @@ extern "C" { #undef WEOF #define WEOF 0xffffffffU -typedef struct __mbstate_t -{ - unsigned __opaque1, __opaque2; -} mbstate_t; - wchar_t *wcscpy (wchar_t *__restrict, const wchar_t *__restrict); wchar_t *wcsncpy (wchar_t *__restrict, const wchar_t *__restrict, size_t); diff --git a/src/multibyte/c16rtomb.c b/src/multibyte/c16rtomb.c new file mode 100644 index 00000000..2e8ec970 --- /dev/null +++ b/src/multibyte/c16rtomb.c @@ -0,0 +1,33 @@ +#include +#include +#include + +size_t c16rtomb(char *restrict s, char16_t c16, mbstate_t *restrict ps) +{ + unsigned *x = (unsigned *)ps; + wchar_t wc; + + if (!s) { + if (*x) goto ilseq; + return 1; + } + + if (!*x && c16 - 0xd800u < 0x400) { + *x = c16 - 0xd7c0 << 10; + return 0; + } + + if (*x) { + if (c16 - 0xdc00u >= 0x400) goto ilseq; + else wc = *x + c16 - 0xdc00; + *x = 0; + } else { + wc = c16; + } + return wcrtomb(s, wc, 0); + +ilseq: + *x = 0; + errno = EILSEQ; + return -1; +} diff --git a/src/multibyte/c32rtomb.c b/src/multibyte/c32rtomb.c new file mode 100644 index 00000000..67851328 --- /dev/null +++ b/src/multibyte/c32rtomb.c @@ -0,0 +1,7 @@ +#include +#include + +size_t c32rtomb(char *restrict s, char32_t c32, mbstate_t *restrict ps) +{ + return wcrtomb(s, c32, ps); +} diff --git a/src/multibyte/mbrtoc16.c b/src/multibyte/mbrtoc16.c new file mode 100644 index 00000000..74b7d77e --- /dev/null +++ b/src/multibyte/mbrtoc16.c @@ -0,0 +1,28 @@ +#include +#include + +size_t mbrtoc16(char16_t *restrict pc16, const char *restrict s, size_t n, mbstate_t *restrict ps) +{ + unsigned *pending = (unsigned *)ps; + + if (!s) return mbrtoc16(0, "", 1, ps); + + /* mbrtowc states for partial UTF-8 characters have the high bit set; + * we use nonzero states without high bit for pending surrogates. */ + if ((int)*pending > 0) { + if (pc16) *pc16 = *pending; + *pending = 0; + return -3; + } + + wchar_t wc; + size_t ret = mbrtowc(&wc, s, n, ps); + if (ret <= 4) { + if (wc >= 0x10000) { + *pending = (wc & 0x3ff) + 0xdc00; + wc = 0xd7c0 + (wc >> 10); + } + if (pc16) *pc16 = wc; + } + return ret; +} diff --git a/src/multibyte/mbrtoc32.c b/src/multibyte/mbrtoc32.c new file mode 100644 index 00000000..c6d20824 --- /dev/null +++ b/src/multibyte/mbrtoc32.c @@ -0,0 +1,11 @@ +#include +#include + +size_t mbrtoc32(char32_t *restrict pc32, const char *restrict s, size_t n, mbstate_t *restrict ps) +{ + if (!s) return mbrtoc32(0, "", 1, ps); + wchar_t wc; + size_t ret = mbrtowc(&wc, s, n, ps); + if (ret <= 4 && pc32) *pc32 = wc; + return ret; +} -- 2.25.1