consolidate xz format comment. no code changes

[oweals/busybox.git] / libbb / unicode.c
diff --git a/libbb/unicode.c b/libbb/unicode.c

index 83e70b412ee5748d71bb062f86e8eac96886c9d0..d6fcf7a43cb2e67ebe9457482c7fb062afad3282 100644 (file)
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -25,13 +25,15 @@ uint8_t unicode_status;
  
  void FAST_FUNC init_unicode(void)
  {
-       /* In unicode, this is a one character string */
         static const char unicode_0x394[] = { 0xce, 0x94, 0 };
+       size_t width;
  
         if (unicode_status != UNICODE_UNKNOWN)
                 return;
-
-       unicode_status = unicode_strlen(unicode_0x394) == 1 ? UNICODE_ON : UNICODE_OFF;
+       /* In unicode, this is a one character string */
+// can use unicode_strlen(string) too, but otherwise unicode_strlen() is unused
+       width = mbstowcs(NULL, unicode_0x394, INT_MAX);
+       unicode_status = (width == 1 ? UNICODE_ON : UNICODE_OFF);
  }
  
  #else
@@ -240,7 +242,7 @@ int FAST_FUNC iswpunct(wint_t wc)
  }
  
  
-# if LAST_SUPPORTED_WCHAR >= 0x300
+# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
  struct interval {
         uint16_t first;
         uint16_t last;
@@ -418,12 +420,11 @@ static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
   * This implementation assumes that wchar_t characters are encoded
   * in ISO 10646.
   */
-static int wcwidth(unsigned ucs)
+int FAST_FUNC wcwidth(unsigned ucs)
  {
-# if LAST_SUPPORTED_WCHAR >= 0x300
+# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
         /* sorted list of non-overlapping intervals of non-spacing characters */
         /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
-       static const struct interval combining[] = {
  #  define BIG_(a,b) { a, b },
  #  define PAIR(a,b)
  #  define ARRAY /* PAIR if < 0x4000 and no more than 4 chars big */ \
@@ -557,10 +558,9 @@ static int wcwidth(unsigned ucs)
                 BIG_(0xFE20, 0xFE23) \
                 BIG_(0xFEFF, 0xFEFF) \
                 BIG_(0xFFF9, 0xFFFB)
-               ARRAY
+       static const struct interval combining[] = { ARRAY };
  #  undef BIG_
  #  undef PAIR
-       };
  #  define BIG_(a,b)
  #  define PAIR(a,b) (a << 2) | (b-a),
         static const uint16_t combining1[] = { ARRAY };
@@ -581,14 +581,14 @@ static int wcwidth(unsigned ucs)
         if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
                 return -1;
         /* Quick abort if it is an obviously invalid char */
-       if (ucs > LAST_SUPPORTED_WCHAR)
+       if (ucs > CONFIG_LAST_SUPPORTED_WCHAR)
                 return -1;
  
         /* Optimization: no combining chars below 0x300 */
-       if (LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
+       if (CONFIG_LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
                 return 1;
  
-# if LAST_SUPPORTED_WCHAR >= 0x300
+# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
         /* Binary search in table of non-spacing characters */
         if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
                 return 0;
@@ -596,25 +596,25 @@ static int wcwidth(unsigned ucs)
                 return 0;
  
         /* Optimization: all chars below 0x1100 are not double-width */
-       if (LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
+       if (CONFIG_LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
                 return 1;
  
-#  if LAST_SUPPORTED_WCHAR >= 0x1100
+#  if CONFIG_LAST_SUPPORTED_WCHAR >= 0x1100
         /* Invalid code points: */
         /* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */
         /* Private Use Area (e000..f8ff) */
         /* Noncharacters fdd0..fdef */
-       if ((LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
-        || (LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
+       if ((CONFIG_LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
+        || (CONFIG_LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
         ) {
                 return -1;
         }
         /* 0xfffe and 0xffff in every plane are invalid */
-       if (LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
+       if (CONFIG_LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
                 return -1;
         }
  
-#   if LAST_SUPPORTED_WCHAR >= 0x10000
+#   if CONFIG_LAST_SUPPORTED_WCHAR >= 0x10000
         if (ucs >= 0x10000) {
                 /* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
                 static const struct interval combining0x10000[] = {
@@ -627,7 +627,7 @@ static int wcwidth(unsigned ucs)
                 if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
                         return 0;
                 /* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
-               if (LAST_SUPPORTED_WCHAR >= 0xE0001
+               if (CONFIG_LAST_SUPPORTED_WCHAR >= 0xE0001
                  && (  ucs == 0xE0001
                     || (ucs >= 0xE0020 && ucs <= 0xE007F)
                     || (ucs >= 0xE0100 && ucs <= 0xE01EF)
@@ -646,7 +646,7 @@ static int wcwidth(unsigned ucs)
                 || ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */
                 || ucs == 0x232a /* right-pointing angle bracket; also CJK punct. char */
                 || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */
-#   if LAST_SUPPORTED_WCHAR >= 0xac00
+#   if CONFIG_LAST_SUPPORTED_WCHAR >= 0xac00
                 || (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */
                 || (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */
                 || (ucs >= 0xfe10 && ucs <= 0xfe19) /* Vertical forms */
@@ -668,7 +668,6 @@ int FAST_FUNC unicode_bidi_isrtl(wint_t wc)
          * http://www.unicode.org/Public/5.2.0/ucd/extracted/DerivedBidiClass.txt
          * Bidi_Class=Left_To_Right | Bidi_Class=Arabic_Letter
          */
-       static const struct interval rtl_b[] = {
  #  define BIG_(a,b) { a, b },
  #  define PAIR(a,b)
  #  define ARRAY \
@@ -723,10 +722,9 @@ int FAST_FUNC unicode_bidi_isrtl(wint_t wc)
                 {0x10E7F, 0x10FFF},
                 {0x1E800, 0x1EFFF}
                 */
-               ARRAY
+       static const struct interval rtl_b[] = { ARRAY };
  #  undef BIG_
  #  undef PAIR
-       };
  #  define BIG_(a,b)
  #  define PAIR(a,b) (a << 2) | (b-a),
         static const uint16_t rtl_p[] = { ARRAY };
@@ -755,7 +753,6 @@ int FAST_FUNC unicode_bidi_is_neutral_wchar(wint_t wc)
          * White_Space, Other_Neutral, European_Number, European_Separator,
          * European_Terminator, Arabic_Number, Common_Separator
          */
-       static const struct interval neutral_b[] = {
  #  define BIG_(a,b) { a, b },
  #  define PAIR(a,b)
  #  define ARRAY \
@@ -929,10 +926,9 @@ int FAST_FUNC unicode_bidi_is_neutral_wchar(wint_t wc)
                 {0x1F030, 0x1F093},
                 {0x1F100, 0x1F10A}
                 */
-               ARRAY
+       static const struct interval neutral_b[] = { ARRAY };
  #  undef BIG_
  #  undef PAIR
-       };
  #  define BIG_(a,b)
  #  define PAIR(a,b) (a << 2) | (b-a),
         static const uint16_t neutral_p[] = { ARRAY };
@@ -960,6 +956,7 @@ int FAST_FUNC unicode_bidi_is_neutral_wchar(wint_t wc)
  
  /* The rest is mostly same for libc and for "homegrown" support */
  
+#if 0 // UNUSED
  size_t FAST_FUNC unicode_strlen(const char *string)
  {
         size_t width = mbstowcs(NULL, string, INT_MAX);
@@ -967,6 +964,14 @@ size_t FAST_FUNC unicode_strlen(const char *string)
                 return strlen(string);
         return width;
  }
+#endif
+
+size_t FAST_FUNC unicode_strwidth(const char *string)
+{
+       uni_stat_t uni_stat;
+       printable_string(&uni_stat, string);
+       return uni_stat.unicode_width;
+}
  
  static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)
  {