X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=libbb%2Funicode.c;h=99dc1dfa6afee28e7834b0f98fdd32a0ab79b6ac;hb=b2320370be14811459718b9fe418efed75ea3615;hp=d1c6167c78fa5506f11a20c13ac77dc180d3a385;hpb=a659b81dfa435aa19130a8c7dd1bfe8fa9a22131;p=oweals%2Fbusybox.git

diff --git a/libbb/unicode.c b/libbb/unicode.c
index d1c6167c7..99dc1dfa6 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -4,7 +4,7 @@
  *
  * Copyright (C) 2009 Denys Vlasenko
  *
- * Licensed under GPL version 2, see file LICENSE in this tarball for details.
+ * Licensed under GPLv2, see file LICENSE in this source tree.
  */
 #include "libbb.h"
 #include "unicode.h"
@@ -23,15 +23,24 @@ uint8_t unicode_status;
 
 /* Unicode support using libc locale support. */
 
-void FAST_FUNC init_unicode(void)
+void FAST_FUNC reinit_unicode(const char *LANG)
 {
-	/* In unicode, this is a one character string */
 	static const char unicode_0x394[] = { 0xce, 0x94, 0 };
+	size_t width;
 
-	if (unicode_status != UNICODE_UNKNOWN)
-		return;
+//TODO: avoid repeated calls by caching last string?
+	setlocale(LC_ALL, (LANG && LANG[0]) ? LANG : "C");
+
+	/* In unicode, this is a one character string */
+// can use unicode_strlen(string) too, but otherwise unicode_strlen() is unused
+	width = mbstowcs(NULL, unicode_0x394, INT_MAX);
+	unicode_status = (width == 1 ? UNICODE_ON : UNICODE_OFF);
+}
 
-	unicode_status = unicode_strlen(unicode_0x394) == 1 ? UNICODE_ON : UNICODE_OFF;
+void FAST_FUNC init_unicode(void)
+{
+	if (unicode_status == UNICODE_UNKNOWN)
+		reinit_unicode(getenv("LANG"));
 }
 
 #else
@@ -39,19 +48,19 @@ void FAST_FUNC init_unicode(void)
 /* Homegrown Unicode support. It knows only C and Unicode locales. */
 
 # if ENABLE_FEATURE_CHECK_UNICODE_IN_ENV
-void FAST_FUNC init_unicode(void)
+void FAST_FUNC reinit_unicode(const char *LANG)
 {
-	char *lang;
-
-	if (unicode_status != UNICODE_UNKNOWN)
-		return;
-
 	unicode_status = UNICODE_OFF;
-	lang = getenv("LANG");
-	if (!lang || !(strstr(lang, ".utf") || strstr(lang, ".UTF")))
+	if (!LANG || !(strstr(LANG, ".utf") || strstr(LANG, ".UTF")))
 		return;
 	unicode_status = UNICODE_ON;
 }
+
+void FAST_FUNC init_unicode(void)
+{
+	if (unicode_status == UNICODE_UNKNOWN)
+		reinit_unicode(getenv("LANG"));
+}
 # endif
 
 static size_t wcrtomb_internal(char *s, wchar_t wc)
@@ -129,7 +138,7 @@ size_t FAST_FUNC wcstombs(char *dest, const wchar_t *src, size_t n)
 		size_t len = wcrtomb_internal(tbuf, wc);
 
 		if (len > n)
-			len = n;
+			break;
 		memcpy(dest, tbuf, len);
 		if (wc == L'\0')
 			return org_n - n;
@@ -240,7 +249,7 @@ int FAST_FUNC iswpunct(wint_t wc)
 }
 
 
-# if LAST_SUPPORTED_WCHAR >= 0x300
+# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
 struct interval {
 	uint16_t first;
 	uint16_t last;
@@ -418,9 +427,9 @@ static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
  * This implementation assumes that wchar_t characters are encoded
  * in ISO 10646.
  */
-static int wcwidth(unsigned ucs)
+int FAST_FUNC wcwidth(unsigned ucs)
 {
-# if LAST_SUPPORTED_WCHAR >= 0x300
+# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
 	/* sorted list of non-overlapping intervals of non-spacing characters */
 	/* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
 #  define BIG_(a,b) { a, b },
@@ -579,14 +588,14 @@ static int wcwidth(unsigned ucs)
 	if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
 		return -1;
 	/* Quick abort if it is an obviously invalid char */
-	if (ucs > LAST_SUPPORTED_WCHAR)
+	if (ucs > CONFIG_LAST_SUPPORTED_WCHAR)
 		return -1;
 
 	/* Optimization: no combining chars below 0x300 */
-	if (LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
+	if (CONFIG_LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
 		return 1;
 
-# if LAST_SUPPORTED_WCHAR >= 0x300
+# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
 	/* Binary search in table of non-spacing characters */
 	if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
 		return 0;
@@ -594,25 +603,25 @@ static int wcwidth(unsigned ucs)
 		return 0;
 
 	/* Optimization: all chars below 0x1100 are not double-width */
-	if (LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
+	if (CONFIG_LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
 		return 1;
 
-#  if LAST_SUPPORTED_WCHAR >= 0x1100
+#  if CONFIG_LAST_SUPPORTED_WCHAR >= 0x1100
 	/* Invalid code points: */
 	/* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */
 	/* Private Use Area (e000..f8ff) */
 	/* Noncharacters fdd0..fdef */
-	if ((LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
-	 || (LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
+	if ((CONFIG_LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
+	 || (CONFIG_LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
 	) {
 		return -1;
 	}
 	/* 0xfffe and 0xffff in every plane are invalid */
-	if (LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
+	if (CONFIG_LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
 		return -1;
 	}
 
-#   if LAST_SUPPORTED_WCHAR >= 0x10000
+#   if CONFIG_LAST_SUPPORTED_WCHAR >= 0x10000
 	if (ucs >= 0x10000) {
 		/* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
 		static const struct interval combining0x10000[] = {
@@ -625,7 +634,7 @@ static int wcwidth(unsigned ucs)
 		if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
 			return 0;
 		/* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
-		if (LAST_SUPPORTED_WCHAR >= 0xE0001
+		if (CONFIG_LAST_SUPPORTED_WCHAR >= 0xE0001
 		 && (  ucs == 0xE0001
 		    || (ucs >= 0xE0020 && ucs <= 0xE007F)
 		    || (ucs >= 0xE0100 && ucs <= 0xE01EF)
@@ -644,7 +653,7 @@ static int wcwidth(unsigned ucs)
 		|| ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */
 		|| ucs == 0x232a /* right-pointing angle bracket; also CJK punct. char */
 		|| (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */
-#   if LAST_SUPPORTED_WCHAR >= 0xac00
+#   if CONFIG_LAST_SUPPORTED_WCHAR >= 0xac00
 		|| (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */
 		|| (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */
 		|| (ucs >= 0xfe10 && ucs <= 0xfe19) /* Vertical forms */
@@ -954,6 +963,7 @@ int FAST_FUNC unicode_bidi_is_neutral_wchar(wint_t wc)
 
 /* The rest is mostly same for libc and for "homegrown" support */
 
+#if 0 // UNUSED
 size_t FAST_FUNC unicode_strlen(const char *string)
 {
 	size_t width = mbstowcs(NULL, string, INT_MAX);
@@ -961,6 +971,14 @@ size_t FAST_FUNC unicode_strlen(const char *string)
 		return strlen(string);
 	return width;
 }
+#endif
+
+size_t FAST_FUNC unicode_strwidth(const char *string)
+{
+	uni_stat_t uni_stat;
+	printable_string(&uni_stat, string);
+	return uni_stat.unicode_width;
+}
 
 static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)
 {
@@ -994,8 +1012,11 @@ static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char
 				d++;
 			}
 		}
-		if (stats)
-			stats->byte_count = stats->unicode_count = (d - dst);
+		if (stats) {
+			stats->byte_count = (d - dst);
+			stats->unicode_count = (d - dst);
+			stats->unicode_width = (d - dst);
+		}
 		return dst;
 	}
 
@@ -1093,16 +1114,17 @@ char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src)
 {
 	return unicode_conv_to_printable2(stats, src, INT_MAX, 0);
 }
-char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth)
+char* FAST_FUNC unicode_conv_to_printable_fixedwidth(/*uni_stat_t *stats,*/ const char *src, unsigned width)
 {
-	return unicode_conv_to_printable2(stats, src, maxwidth, 0);
+	return unicode_conv_to_printable2(/*stats:*/ NULL, src, width, UNI_FLAG_PAD);
 }
-char* FAST_FUNC unicode_conv_to_printable_fixedwidth(uni_stat_t *stats, const char *src, unsigned width)
+
+#ifdef UNUSED
+char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth)
 {
-	return unicode_conv_to_printable2(stats, src, width, UNI_FLAG_PAD);
+	return unicode_conv_to_printable2(stats, src, maxwidth, 0);
 }
 
-#ifdef UNUSED
 unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
 {
 	if (unicode_status != UNICODE_ON) {