/* Unicode support using libc locale support. */
-void FAST_FUNC init_unicode(void)
+void FAST_FUNC reinit_unicode(const char *LANG)
{
static const char unicode_0x394[] = { 0xce, 0x94, 0 };
size_t width;
- if (unicode_status != UNICODE_UNKNOWN)
- return;
+ /* We pass "" instead of "C" because some libc's have
+ * non-ASCII default locale for setlocale("") call
+ * (this allows users of such libc to have Unicoded
+ * system without having to mess with env).
+ *
+ * We set LC_CTYPE because (a) we may be called with $LC_CTYPE
+ * value in LANG, not with $LC_ALL, (b) internationalized
+ * LC_NUMERIC and LC_TIME are more PITA than benefit
+ * (for one, some utilities have hard time with comma
+ * used as a fractional separator).
+ */
+//TODO: avoid repeated calls by caching last string?
+ setlocale(LC_CTYPE, LANG ? LANG : "");
+
/* In unicode, this is a one character string */
-// can use unicode_strlen(string) too, but otherwise unicode_strlen() is unused
- width = mbstowcs(NULL, unicode_0x394, INT_MAX);
+ width = unicode_strlen(unicode_0x394);
unicode_status = (width == 1 ? UNICODE_ON : UNICODE_OFF);
}
+void FAST_FUNC init_unicode(void)
+{
+ /* Some people set only $LC_CTYPE, not $LC_ALL, because they want
+ * only Unicode to be activated on their system, not the whole
+ * shebang of wrong decimal points, strange date formats and so on.
+ */
+ if (unicode_status == UNICODE_UNKNOWN) {
+ char *s = getenv("LC_ALL");
+ if (!s) s = getenv("LC_CTYPE");
+ if (!s) s = getenv("LANG");
+ reinit_unicode(s);
+ }
+}
+
#else
/* Homegrown Unicode support. It knows only C and Unicode locales. */
# if ENABLE_FEATURE_CHECK_UNICODE_IN_ENV
-void FAST_FUNC init_unicode(void)
+void FAST_FUNC reinit_unicode(const char *LANG)
{
- char *lang;
-
- if (unicode_status != UNICODE_UNKNOWN)
- return;
-
unicode_status = UNICODE_OFF;
- lang = getenv("LANG");
- if (!lang || !(strstr(lang, ".utf") || strstr(lang, ".UTF")))
+ if (!LANG || !(strstr(LANG, ".utf") || strstr(LANG, ".UTF")))
return;
unicode_status = UNICODE_ON;
}
+
+void FAST_FUNC init_unicode(void)
+{
+ if (unicode_status == UNICODE_UNKNOWN) {
+ char *s = getenv("LC_ALL");
+ if (!s) s = getenv("LC_CTYPE");
+ if (!s) s = getenv("LANG");
+ reinit_unicode(s);
+ }
+}
# endif
static size_t wcrtomb_internal(char *s, wchar_t wc)
/* The rest is mostly same for libc and for "homegrown" support */
-#if 0 // UNUSED
size_t FAST_FUNC unicode_strlen(const char *string)
{
size_t width = mbstowcs(NULL, string, INT_MAX);
return strlen(string);
return width;
}
-#endif
size_t FAST_FUNC unicode_strwidth(const char *string)
{
d++;
}
}
- if (stats)
- stats->byte_count = stats->unicode_count = (d - dst);
+ if (stats) {
+ stats->byte_count = (d - dst);
+ stats->unicode_count = (d - dst);
+ stats->unicode_width = (d - dst);
+ }
return dst;
}
{
return unicode_conv_to_printable2(stats, src, INT_MAX, 0);
}
-char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth)
+char* FAST_FUNC unicode_conv_to_printable_fixedwidth(/*uni_stat_t *stats,*/ const char *src, unsigned width)
{
- return unicode_conv_to_printable2(stats, src, maxwidth, 0);
+ return unicode_conv_to_printable2(/*stats:*/ NULL, src, width, UNI_FLAG_PAD);
}
-char* FAST_FUNC unicode_conv_to_printable_fixedwidth(uni_stat_t *stats, const char *src, unsigned width)
+
+#ifdef UNUSED
+char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth)
{
- return unicode_conv_to_printable2(stats, src, width, UNI_FLAG_PAD);
+ return unicode_conv_to_printable2(stats, src, maxwidth, 0);
}
-#ifdef UNUSED
unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
{
if (unicode_status != UNICODE_ON) {