From 61a3364d246e72b903da8b76c2e27a225a51351e Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 27 May 2015 03:22:52 -0400 Subject: [PATCH] overhaul locale internals to treat categories roughly uniformly previously, LC_MESSAGES was treated specially as the only category which could be set to a locale name without a definition file, in order to facilitate gettext message translations when no libc locale was available. LC_NUMERIC was completely un-settable, and LC_CTYPE stored a flag intended to be used for a possible future byte-based C locale, instead of storing a __locale_map pointer like the other categories use. this patch changes all categories to be represented by pointers to __locale_map structures, and allows locale names without definition files to be treated as valid locales with trivial definition when used in any category. outwardly visible functional changes should be minor, limited mainly to the strings read back from setlocale and the way gettext handles translations in categories other than LC_MESSAGES. various internal refactoring has also been performed, and improvements in const correctness have been made. --- src/internal/libc.h | 4 +- src/internal/locale_impl.h | 8 +-- src/locale/__lctrans.c | 2 +- src/locale/__setlocalecat.c | 116 ++++++++++++++++++++---------------- src/locale/dcngettext.c | 28 ++++----- src/locale/duplocale.c | 11 +--- src/locale/newlocale.c | 7 +-- src/locale/setlocale.c | 72 +++++++++------------- 8 files changed, 112 insertions(+), 136 deletions(-) diff --git a/src/internal/libc.h b/src/internal/libc.h index 212f0e8b..6810cd8b 100644 --- a/src/internal/libc.h +++ b/src/internal/libc.h @@ -8,9 +8,7 @@ struct __locale_map; struct __locale_struct { - volatile int ctype_utf8; - char *messages_name; - struct __locale_map *volatile cat[4]; + const struct __locale_map *volatile cat[6]; }; struct __libc { diff --git a/src/internal/locale_impl.h b/src/internal/locale_impl.h index 5aebbf68..9b8385e9 100644 --- a/src/internal/locale_impl.h +++ b/src/internal/locale_impl.h @@ -9,20 +9,20 @@ struct __locale_map { const void *map; size_t map_size; char name[LOCALE_NAME_MAX+1]; - struct __locale_map *next; + const struct __locale_map *next; }; -int __setlocalecat(locale_t, int, const char *); +const struct __locale_map *__get_locale(int, const char *); const char *__mo_lookup(const void *, size_t, const char *); const char *__lctrans(const char *, const struct __locale_map *); const char *__lctrans_cur(const char *); -#define LCTRANS(msg, lc, loc) __lctrans(msg, (loc)->cat[(lc)-2]) +#define LCTRANS(msg, lc, loc) __lctrans(msg, (loc)->cat[(lc)]) #define LCTRANS_CUR(msg) __lctrans_cur(msg) #define CURRENT_LOCALE (__pthread_self()->locale) -#define CURRENT_UTF8 (__pthread_self()->locale->ctype_utf8) +#define CURRENT_UTF8 (!!__pthread_self()->locale->cat[LC_CTYPE]) #undef MB_CUR_MAX #define MB_CUR_MAX (CURRENT_UTF8 ? 4 : 1) diff --git a/src/locale/__lctrans.c b/src/locale/__lctrans.c index 15994c57..107fe14a 100644 --- a/src/locale/__lctrans.c +++ b/src/locale/__lctrans.c @@ -16,5 +16,5 @@ const char *__lctrans(const char *msg, const struct __locale_map *lm) const char *__lctrans_cur(const char *msg) { - return __lctrans_impl(msg, CURRENT_LOCALE->cat[LC_MESSAGES-2]); + return __lctrans_impl(msg, CURRENT_LOCALE->cat[LC_MESSAGES]); } diff --git a/src/locale/__setlocalecat.c b/src/locale/__setlocalecat.c index e829da56..30aa7fcc 100644 --- a/src/locale/__setlocalecat.c +++ b/src/locale/__setlocalecat.c @@ -15,24 +15,60 @@ const unsigned char *__map_file(const char *, size_t *); int __munmap(void *, size_t); char *__strchrnul(const char *, int); -static struct __locale_map *findlocale(const char *name, size_t n) +static const char envvars[][12] = { + "LC_CTYPE", + "LC_NUMERIC", + "LC_TIME", + "LC_COLLATE", + "LC_MONETARY", + "LC_MESSAGES", +}; + +static const uint32_t empty_mo[] = { 0x950412de, 0, -1, -1, -1 }; + +static const struct __locale_map c_dot_utf8 = { + .map = empty_mo, + .map_size = sizeof empty_mo, + .name = "C.UTF-8" +}; + +const struct __locale_map *__get_locale(int cat, const char *val) { static int lock[2]; static void *volatile loc_head; - struct __locale_map *p, *new = 0; + const struct __locale_map *p; + struct __locale_map *new = 0; const char *path = 0, *z; char buf[256]; - size_t l; - const void *map; - size_t map_size; + size_t l, n; + + if (!*val) { + (val = getenv("LC_ALL")) && *val || + (val = getenv(envvars[cat])) && *val || + (val = getenv("LANG")) && *val || + (val = "C.UTF-8"); + } + + /* Limit name length and forbid leading dot or any slashes. */ + for (n=0; nnext) - if (!strcmp(name, p->name)) return p; + if (!strcmp(val, p->name)) return p; LOCK(lock); for (p=loc_head; p; p=p->next) - if (!strcmp(name, p->name)) { + if (!strcmp(val, p->name)) { UNLOCK(lock); return p; } @@ -46,9 +82,10 @@ static struct __locale_map *findlocale(const char *name, size_t n) if (l >= sizeof buf - n - 2) continue; memcpy(buf, path, l); buf[l] = '/'; - memcpy(buf+l+1, name, n); + memcpy(buf+l+1, val, n); buf[l+1+n] = 0; - map = __map_file(buf, &map_size); + size_t map_size; + const void *map = __map_file(buf, &map_size); if (map) { new = malloc(sizeof *new); if (!new) { @@ -57,58 +94,31 @@ static struct __locale_map *findlocale(const char *name, size_t n) } new->map = map; new->map_size = map_size; - memcpy(new->name, name, n); + memcpy(new->name, val, n); new->name[n] = 0; new->next = loc_head; loc_head = new; break; } } - UNLOCK(lock); - return new; -} - -static const char envvars[][12] = { - "LC_CTYPE", - "LC_NUMERIC", - "LC_TIME", - "LC_COLLATE", - "LC_MONETARY", - "LC_MESSAGES", -}; -int __setlocalecat(locale_t loc, int cat, const char *val) -{ - if (!*val) { - (val = getenv("LC_ALL")) && *val || - (val = getenv(envvars[cat])) && *val || - (val = getenv("LANG")) && *val || - (val = "C.UTF-8"); + /* If no locale definition was found, make a locale map + * object anyway to store the name, which is kept for the + * sake of being able to do message translations at the + * application level. */ + if (!new && (new = malloc(sizeof *new))) { + new->map = empty_mo; + new->map_size = sizeof empty_mo; + memcpy(new->name, val, n); + new->name[n] = 0; + new->next = loc_head; + loc_head = new; } - size_t n; - for (n=0; nctype_utf8 = !builtin || val[1]=='.'; - break; - case LC_MESSAGES: - if (builtin) { - loc->messages_name[0] = 0; - } else { - memcpy(loc->messages_name, val, n); - loc->messages_name[n] = 0; - } - /* fall through */ - default: - loc->cat[cat-2] = builtin ? 0 : findlocale(val, n); - case LC_NUMERIC: - break; - } - return 0; + UNLOCK(lock); + return new; } diff --git a/src/locale/dcngettext.c b/src/locale/dcngettext.c index 30dd41d4..a5ff8475 100644 --- a/src/locale/dcngettext.c +++ b/src/locale/dcngettext.c @@ -84,13 +84,15 @@ char *bindtextdomain(const char *domainname, const char *dirname) } static const char catnames[][12] = { + "LC_CTYPE", + "LC_NUMERIC", "LC_TIME", "LC_COLLATE", "LC_MONETARY", "LC_MESSAGES", }; -static const char catlens[] = { 7, 10, 11, 11 }; +static const char catlens[] = { 8, 10, 7, 10, 11, 11 }; struct msgcat { struct msgcat *next; @@ -117,10 +119,12 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2, static struct msgcat *volatile cats; struct msgcat *p; struct __locale_struct *loc = CURRENT_LOCALE; - struct __locale_map *lm; + const struct __locale_map *lm; const char *dirname, *locname, *catname; size_t dirlen, loclen, catlen, domlen; + if ((unsigned)category >= LC_ALL) goto notrans; + if (!domainname) domainname = __gettextdomain(); domlen = strlen(domainname); @@ -129,25 +133,15 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2, dirname = gettextdir(domainname, &dirlen); if (!dirname) goto notrans; - switch (category) { - case LC_MESSAGES: - locname = loc->messages_name; - if (!locname || !*locname) goto notrans; - break; - case LC_TIME: - case LC_MONETARY: - case LC_COLLATE: - lm = loc->cat[category-2]; - if (!lm) goto notrans; - locname = lm->name; - break; - default: + lm = loc->cat[category]; + if (!lm) { notrans: return (char *) ((n == 1) ? msgid1 : msgid2); } + locname = lm->name; - catname = catnames[category-2]; - catlen = catlens[category-2]; + catname = catnames[category]; + catlen = catlens[category]; loclen = strlen(locname); size_t namelen = dirlen+1 + loclen+1 + catlen+1 + domlen+3; diff --git a/src/locale/duplocale.c b/src/locale/duplocale.c index b87c933e..030b64cb 100644 --- a/src/locale/duplocale.c +++ b/src/locale/duplocale.c @@ -5,17 +5,10 @@ locale_t __duplocale(locale_t old) { - locale_t new = calloc(1, sizeof *new + LOCALE_NAME_MAX + 1); + locale_t new = malloc(sizeof *new); if (!new) return 0; - new->messages_name = (void *)(new+1); - if (old == LC_GLOBAL_LOCALE) old = &libc.global_locale; - new->ctype_utf8 = old->ctype_utf8; - if (old->messages_name) - strcpy(new->messages_name, old->messages_name); - - for (size_t i=0; icat/sizeof new->cat[0]; i++) - new->cat[i] = old->cat[i]; + *new = *old; return new; } diff --git a/src/locale/newlocale.c b/src/locale/newlocale.c index 39501d0c..4e0cbd34 100644 --- a/src/locale/newlocale.c +++ b/src/locale/newlocale.c @@ -8,17 +8,16 @@ locale_t __newlocale(int mask, const char *name, locale_t loc) int i; if (!loc) { - loc = calloc(1, sizeof *loc + LOCALE_NAME_MAX + 1); + loc = malloc(sizeof *loc); if (!loc) return 0; - loc->messages_name = (void *)(loc+1); for (i=0; icat[i] = __get_locale(i, ""); } for (i=0; icat[i] = __get_locale(i, name); return loc; } diff --git a/src/locale/setlocale.c b/src/locale/setlocale.c index 32a8fcab..8dae5a4e 100644 --- a/src/locale/setlocale.c +++ b/src/locale/setlocale.c @@ -5,38 +5,23 @@ #include "libc.h" #include "atomic.h" -static char buf[2+4*(LOCALE_NAME_MAX+1)]; +static char buf[LC_ALL*(LOCALE_NAME_MAX+1)]; static char *setlocale_one_unlocked(int cat, const char *name) { - struct __locale_map *lm; + const struct __locale_map *lm; - if (name) __setlocalecat(&libc.global_locale, cat, name); + if (name) libc.global_locale.cat[cat] = lm = __get_locale(cat, name); + else lm = libc.global_locale.cat[cat]; - switch (cat) { - case LC_CTYPE: - return libc.global_locale.ctype_utf8 ? "C.UTF-8" : "C"; - case LC_NUMERIC: - return "C"; - case LC_MESSAGES: - return libc.global_locale.messages_name[0] - ? libc.global_locale.messages_name : "C"; - default: - lm = libc.global_locale.cat[cat-2]; - return lm ? lm->name : "C"; - } + return lm ? (char *)lm->name : "C"; } +char *__strchrnul(const char *, int); + char *setlocale(int cat, const char *name) { static volatile int lock[2]; - struct __locale_map *lm; - int i, j; - - if (!libc.global_locale.messages_name) { - libc.global_locale.messages_name = - buf + 2 + 3*(LOCALE_NAME_MAX+1); - } if ((unsigned)cat > LC_ALL) return 0; @@ -48,34 +33,31 @@ char *setlocale(int cat, const char *name) * performs both the serialization and deserialization, depends * on the format, so it can easily be changed if needed. */ if (cat == LC_ALL) { + int i; if (name) { - char part[LOCALE_NAME_MAX+1]; - if (name[0] && name[1]==';' - && strlen(name) > 2 + 3*(LOCALE_NAME_MAX+1)) { - part[0] = name[0]; - part[1] = 0; - setlocale(LC_CTYPE, part); - part[LOCALE_NAME_MAX] = 0; - for (i=LC_TIME; iname, strlen(lm->name)); + char *s = buf; + for (i=0; iname : "C"; + size_t l = strlen(part); + memcpy(s, part, l); + s[l] = ';'; + s += l+1; } + *--s = 0; UNLOCK(lock); return buf; } -- 2.25.1