From e4fc9ad780e36c84e1ed6b0fc01b3c53ae65ff9d Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 21 Mar 2017 08:59:48 -0400 Subject: [PATCH] search locale name variants for gettext translations often translations will be named only by language, whereas locale names may also include a territory code, modifier, and codeset portion. previously, only translations exactly matching the locale name were loaded. this was a major usability issue, requiring workarounds like symlinks or tweaking of the locale name. with these changes, gettext now searches for translations by first removing the codeset portion of the locale name, then trying the remainder in full, with modifier (@mod) removed, with territory code (_XX) removed, and with both removed. part of the reason gettext lacked support for searching fallbacks before is that the candidate pathname for a translation file was constructed on each call and used as the key to lookup an already-mapped translation file. this was very costly/inefficient. we now use the tuple of textdomain binding pointer, locale map pointer, and integer category id as the key for looking up a translation file mapping. based on patch by He X. --- src/locale/dcngettext.c | 87 ++++++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 32 deletions(-) diff --git a/src/locale/dcngettext.c b/src/locale/dcngettext.c index b68e24bc..b79b7010 100644 --- a/src/locale/dcngettext.c +++ b/src/locale/dcngettext.c @@ -100,7 +100,9 @@ struct msgcat { size_t map_size; void *volatile plural_rule; volatile int nplurals; - char name[]; + struct binding *binding; + const struct __locale_map *lm; + int cat; }; static char *dummy_gettextdomain() @@ -120,8 +122,8 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2, struct msgcat *p; struct __locale_struct *loc = CURRENT_LOCALE; const struct __locale_map *lm; - const char *dirname, *locname, *catname; - size_t dirlen, loclen, catlen, domlen; + size_t domlen; + struct binding *q; if ((unsigned)category >= LC_ALL) goto notrans; @@ -130,55 +132,76 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2, domlen = strnlen(domainname, NAME_MAX+1); if (domlen > NAME_MAX) goto notrans; - dirname = gettextdir(domainname, &dirlen); - if (!dirname) goto notrans; + for (q=bindings; q; q=q->next) + if (!strcmp(q->domainname, domainname) && q->active) + break; + if (!q) goto notrans; lm = loc->cat[category]; if (!lm) { notrans: return (char *) ((n == 1) ? msgid1 : msgid2); } - locname = lm->name; - - catname = catnames[category]; - catlen = catlens[category]; - loclen = strlen(locname); - - size_t namelen = dirlen+1 + loclen+1 + catlen+1 + domlen+3; - char name[namelen+1], *s = name; - - memcpy(s, dirname, dirlen); - s[dirlen] = '/'; - s += dirlen + 1; - memcpy(s, locname, loclen); - s[loclen] = '/'; - s += loclen + 1; - memcpy(s, catname, catlen); - s[catlen] = '/'; - s += catlen + 1; - memcpy(s, domainname, domlen); - s[domlen] = '.'; - s[domlen+1] = 'm'; - s[domlen+2] = 'o'; - s[domlen+3] = 0; for (p=cats; p; p=p->next) - if (!strcmp(p->name, name)) + if (p->binding == q && p->lm == lm && p->cat == category) break; if (!p) { + const char *dirname, *locname, *catname, *modname, *locp; + size_t dirlen, loclen, catlen, modlen, alt_modlen; void *old_cats; size_t map_size; - const void *map = __map_file(name, &map_size); + + dirname = q->dirname; + locname = lm->name; + catname = catnames[category]; + + dirlen = q->dirlen; + loclen = strlen(locname); + catlen = catlens[category]; + + /* Logically split @mod suffix from locale name. */ + modname = memchr(locname, '@', loclen); + if (!modname) modname = locname + loclen; + alt_modlen = modlen = loclen - (modname-locname); + loclen = modname-locname; + + /* Drop .charset identifier; it is not used. */ + const char *csp = memchr(locname, '.', loclen); + if (csp) loclen = csp-locname; + + char name[dirlen+1 + loclen+modlen+1 + catlen+1 + domlen+3 + 1]; + const void *map; + + for (;;) { + snprintf(name, sizeof name, "%s/%.*s%.*s/%s/%s.mo\0", + dirname, (int)loclen, locname, + (int)alt_modlen, modname, catname, domainname); + if (map = __map_file(name, &map_size)) break; + + /* Try dropping @mod, _YY, then both. */ + if (alt_modlen) { + alt_modlen = 0; + } else if ((locp = memchr(locname, '_', loclen))) { + loclen = locp-locname; + alt_modlen = modlen; + } else { + break; + } + } if (!map) goto notrans; - p = calloc(sizeof *p + namelen + 1, 1); + + p = calloc(sizeof *p, 1); if (!p) { __munmap((void *)map, map_size); goto notrans; } + p->cat = category; + p->binding = q; + p->lm = lm; p->map = map; p->map_size = map_size; - memcpy(p->name, name, namelen+1); do { old_cats = cats; p->next = old_cats; -- 2.25.1