unicode: s/FEATURE_ASSUME_UNICODE/UNICODE_SUPPORT, add UNICODE_USING_LOCALE

author Denys Vlasenko <vda.linux@googlemail.com>

Fri, 26 Mar 2010 13:06:56 +0000 (14:06 +0100)

committer Denys Vlasenko <vda.linux@googlemail.com>

Fri, 26 Mar 2010 13:06:56 +0000 (14:06 +0100)
author Denys Vlasenko <vda.linux@googlemail.com>
Fri, 26 Mar 2010 13:06:56 +0000 (14:06 +0100)
committer Denys Vlasenko <vda.linux@googlemail.com>
Fri, 26 Mar 2010 13:06:56 +0000 (14:06 +0100)
diff --git a/Config.in b/Config.in

index 4439ce4f998723604680bd9fa9da33f37354ce48..bb7dd6d5d312845a84d3e4909f4600e26849f6ab 100644 (file)
--- a/Config.in
+++ b/Config.in
@@ -119,7 +119,7 @@ config LOCALE_SUPPORT
           Enable this if your system has locale support and you would like
           busybox to support locale settings.
  
           Enable this if your system has locale support and you would like
           busybox to support locale settings.
  
-config FEATURE_ASSUME_UNICODE
+config UNICODE_SUPPORT
         bool "Support Unicode"
         default n
         help
         bool "Support Unicode"
         default n
         help
@@ -131,10 +131,18 @@ config FEATURE_ASSUME_UNICODE
           Probably by the time when busybox will be fully Unicode-clean,
           other encodings will be mainly of historic interest.
  
           Probably by the time when busybox will be fully Unicode-clean,
           other encodings will be mainly of historic interest.
  
+config UNICODE_USING_LOCALE
+       bool "Use libc routines for Unicode (else uses internal ones)"
+       default n
+       depends on UNICODE_SUPPORT && LOCALE_SUPPORT
+       help
+         With this option on, Unicode support is implemented using libc
+         routines. Otherwise, internal implementation is used.
+
  config FEATURE_CHECK_UNICODE_IN_ENV
         bool "Check $LANG environment variable"
         default y
  config FEATURE_CHECK_UNICODE_IN_ENV
         bool "Check $LANG environment variable"
         default y
-       depends on FEATURE_ASSUME_UNICODE && !LOCALE_SUPPORT
+       depends on UNICODE_SUPPORT && !UNICODE_USING_LOCALE
         help
           With this option on, Unicode support is activated
           only if LANG variable has the value of the form "xxxx.utf8"
         help
           With this option on, Unicode support is activated
           only if LANG variable has the value of the form "xxxx.utf8"
@@ -143,7 +151,7 @@ config FEATURE_CHECK_UNICODE_IN_ENV
  
  config SUBST_WCHAR
         int "Character code to substitute unprintable characters with"
  
  config SUBST_WCHAR
         int "Character code to substitute unprintable characters with"
-       depends on FEATURE_ASSUME_UNICODE
+       depends on UNICODE_SUPPORT
         default 63
         help
           Typical values are 63 for '?' (works with any output device),
         default 63
         help
           Typical values are 63 for '?' (works with any output device),
@@ -152,7 +160,7 @@ config SUBST_WCHAR
  
  config LAST_SUPPORTED_WCHAR
         int "Range of supported Unicode characters"
  
  config LAST_SUPPORTED_WCHAR
         int "Range of supported Unicode characters"
-       depends on FEATURE_ASSUME_UNICODE
+       depends on UNICODE_SUPPORT
         default 767
         help
           Any character with Unicode value bigger than this is assumed
         default 767
         help
           Any character with Unicode value bigger than this is assumed
@@ -183,7 +191,7 @@ config LAST_SUPPORTED_WCHAR
  config UNICODE_COMBINING_WCHARS
         bool "Allow zero-width Unicode characters on output"
         default n
  config UNICODE_COMBINING_WCHARS
         bool "Allow zero-width Unicode characters on output"
         default n
-       depends on FEATURE_ASSUME_UNICODE
+       depends on UNICODE_SUPPORT
         help
           With this option off, any Unicode char with width of 0
           is substituted on output.
         help
           With this option off, any Unicode char with width of 0
           is substituted on output.
@@ -191,7 +199,7 @@ config UNICODE_COMBINING_WCHARS
  config UNICODE_WIDE_WCHARS
         bool "Allow wide Unicode characters on output"
         default n
  config UNICODE_WIDE_WCHARS
         bool "Allow wide Unicode characters on output"
         default n
-       depends on FEATURE_ASSUME_UNICODE
+       depends on UNICODE_SUPPORT
         help
           With this option off, any Unicode char with width > 1
           is substituted on output.
         help
           With this option off, any Unicode char with width > 1
           is substituted on output.
@@ -199,7 +207,7 @@ config UNICODE_WIDE_WCHARS
  config UNICODE_BIDI_SUPPORT
         bool "Bidirectional character-aware line input"
         default n
  config UNICODE_BIDI_SUPPORT
         bool "Bidirectional character-aware line input"
         default n
-       depends on FEATURE_ASSUME_UNICODE && !LOCALE_SUPPORT
+       depends on UNICODE_SUPPORT && !UNICODE_USING_LOCALE
         help
           With this option on, right-to-left Unicode characters
           are treated differently on input (e.g. cursor movement).
         help
           With this option on, right-to-left Unicode characters
           are treated differently on input (e.g. cursor movement).
diff --git a/TODO b/TODO

index 31aae41fde5cb4c08cee32eba01340e2ca8129c4..af4c467c2d332e6f0a40eaf0290b9893e9e140f5 100644 (file)
--- a/TODO
+++ b/TODO
@@ -324,8 +324,8 @@ This is useful if you build against uclibc with locale support disabled.
  Unicode-dependent applets must call check_unicode_in_env() when they
  begin executing.
  
  Unicode-dependent applets must call check_unicode_in_env() when they
  begin executing.
  
-Applet code may conditionalize on FEATURE_ASSUME_UNICODE
-in order to use more efficient code if unicode support is not requested.
+Applet code may conditionalize on UNICODE_SUPPORT in order to use
+more efficient code if unicode support is not requested.
  
  Available functions (if you need more, implement them in libbb/unicode.c
  so that they work without LOCALE_SUPPORT too):
  
  Available functions (if you need more, implement them in libbb/unicode.c
  so that they work without LOCALE_SUPPORT too):
diff --git a/TODO_config_nommu b/TODO_config_nommu

index 2c8210cfe9f3d1152fd30db380e59c61a1b6df1a..911f02f6bd3219100e7d4b1cda29f5c659280667 100644 (file)
--- a/TODO_config_nommu
+++ b/TODO_config_nommu
@@ -24,7 +24,7 @@ CONFIG_FEATURE_VERBOSE_USAGE=y
  CONFIG_FEATURE_COMPRESS_USAGE=y
  CONFIG_FEATURE_INSTALLER=y
  # CONFIG_LOCALE_SUPPORT is not set
  CONFIG_FEATURE_COMPRESS_USAGE=y
  CONFIG_FEATURE_INSTALLER=y
  # CONFIG_LOCALE_SUPPORT is not set
-# CONFIG_FEATURE_ASSUME_UNICODE is not set
+# CONFIG_UNICODE_SUPPORT is not set
  # CONFIG_FEATURE_CHECK_UNICODE_IN_ENV is not set
  CONFIG_LONG_OPTS=y
  CONFIG_FEATURE_DEVPTS=y
  # CONFIG_FEATURE_CHECK_UNICODE_IN_ENV is not set
  CONFIG_LONG_OPTS=y
  CONFIG_FEATURE_DEVPTS=y
diff --git a/coreutils/cal.c b/coreutils/cal.c

index 79fe074f84281fe644c9b16414df53e824073440..c98229cb06fad3c253888b980e72dd475fd9e6ef 100644 (file)
--- a/coreutils/cal.c
+++ b/coreutils/cal.c
@@ -87,8 +87,8 @@ int cal_main(int argc UNUSED_PARAM, char **argv)
         /* "Su Mo Tu We Th Fr Sa" */
         /* -j heading: */
         /* " Su  Mo  Tu  We  Th  Fr  Sa" */
         /* "Su Mo Tu We Th Fr Sa" */
         /* -j heading: */
         /* " Su  Mo  Tu  We  Th  Fr  Sa" */
-       char day_headings[ENABLE_FEATURE_ASSUME_UNICODE ? 28 * 6 : 28];
-       IF_FEATURE_ASSUME_UNICODE(char *hp = day_headings;)
+       char day_headings[ENABLE_UNICODE_SUPPORT ? 28 * 6 : 28];
+       IF_UNICODE_SUPPORT(char *hp = day_headings;)
         char buf[40];
  
         init_unicode();
         char buf[40];
  
         init_unicode();
@@ -134,7 +134,7 @@ int cal_main(int argc UNUSED_PARAM, char **argv)
                         zero_tm.tm_wday = i;
                         /* abbreviated weekday name according to locale */
                         strftime(buf, sizeof(buf), "%a", &zero_tm);
                         zero_tm.tm_wday = i;
                         /* abbreviated weekday name according to locale */
                         strftime(buf, sizeof(buf), "%a", &zero_tm);
-#if ENABLE_FEATURE_ASSUME_UNICODE
+#if ENABLE_UNICODE_SUPPORT
                         if (julian)
                                 *hp++ = ' ';
                         {
                         if (julian)
                                 *hp++ = ' ';
                         {
@@ -149,7 +149,7 @@ int cal_main(int argc UNUSED_PARAM, char **argv)
  #endif
                 }
         } while (++i < 12);
  #endif
                 }
         } while (++i < 12);
-       IF_FEATURE_ASSUME_UNICODE(hp[-1] = '\0';)
+       IF_UNICODE_SUPPORT(hp[-1] = '\0';)
  
         if (month) {
                 unsigned row, len, days[MAXDAYS];
  
         if (month) {
                 unsigned row, len, days[MAXDAYS];
diff --git a/coreutils/df.c b/coreutils/df.c

index 4b23faa7a9bce7579871a241bf74b01738ebb72c..5eeb5b476bad436ed70ddce492f6a15801c2ddf3 100644 (file)
--- a/coreutils/df.c
+++ b/coreutils/df.c
@@ -174,7 +174,7 @@ int df_main(int argc UNUSED_PARAM, char **argv)
                         }
  #endif
  
                         }
  #endif
  
-#if ENABLE_FEATURE_ASSUME_UNICODE
+#if ENABLE_UNICODE_SUPPORT
                         {
                                 uni_stat_t uni_stat;
                                 char *uni_dev = unicode_conv_to_printable(&uni_stat, device);
                         {
                                 uni_stat_t uni_stat;
                                 char *uni_dev = unicode_conv_to_printable(&uni_stat, device);
diff --git a/coreutils/expand.c b/coreutils/expand.c

index cfb1e25d9216eebbd72593d18433233aa53afe00..b874b6ad461b4545edae401dbedff2fa81b134b6 100644 (file)
--- a/coreutils/expand.c
+++ b/coreutils/expand.c
@@ -48,7 +48,7 @@ static void expand(FILE *file, unsigned tab_size, unsigned opt)
                         if (c == '\t') {
                                 unsigned len;
                                 *ptr = '\0';
                         if (c == '\t') {
                                 unsigned len;
                                 *ptr = '\0';
-# if ENABLE_FEATURE_ASSUME_UNICODE
+# if ENABLE_UNICODE_SUPPORT
                                 {
                                         uni_stat_t uni_stat;
                                         printable_string(&uni_stat, ptr_strbeg);
                                 {
                                         uni_stat_t uni_stat;
                                         printable_string(&uni_stat, ptr_strbeg);
@@ -107,7 +107,7 @@ static void unexpand(FILE *file, unsigned tab_size, unsigned opt)
                         }
                         n = strcspn(ptr, "\t ");
                         printf("%*s%.*s", len, "", n, ptr);
                         }
                         n = strcspn(ptr, "\t ");
                         printf("%*s%.*s", len, "", n, ptr);
-# if ENABLE_FEATURE_ASSUME_UNICODE
+# if ENABLE_UNICODE_SUPPORT
                         {
                                 char c;
                                 uni_stat_t uni_stat;
                         {
                                 char c;
                                 uni_stat_t uni_stat;
diff --git a/include/unicode.h b/include/unicode.h

index deb4022c357a4fd87d9875e0b19937d048bea86e..4e2927297db6f8307c11e9959433d516502234a0 100644 (file)
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -5,7 +5,7 @@
  #ifndef UNICODE_H
  #define UNICODE_H 1
  
  #ifndef UNICODE_H
  #define UNICODE_H 1
  
-#if ENABLE_LOCALE_SUPPORT
+#if ENABLE_UNICODE_USING_LOCALE
  # include <wchar.h>
  # include <wctype.h>
  #endif
  # include <wchar.h>
  # include <wctype.h>
  #endif
@@ -21,7 +21,7 @@ enum {
  #define unicode_bidi_isrtl(wc) 0
  #define unicode_bidi_is_neutral_wchar(wc) (wc <= 126 && !isalpha(wc))
  
  #define unicode_bidi_isrtl(wc) 0
  #define unicode_bidi_is_neutral_wchar(wc) (wc <= 126 && !isalpha(wc))
  
-#if !ENABLE_FEATURE_ASSUME_UNICODE
+#if !ENABLE_UNICODE_SUPPORT
  
  # define unicode_strlen(string) strlen(string)
  # define unicode_status UNICODE_OFF
  
  # define unicode_strlen(string) strlen(string)
  # define unicode_status UNICODE_OFF
@@ -50,7 +50,7 @@ char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src);
  char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth);
  char* FAST_FUNC unicode_conv_to_printable_fixedwidth(uni_stat_t *stats, const char *src, unsigned width);
  
  char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth);
  char* FAST_FUNC unicode_conv_to_printable_fixedwidth(uni_stat_t *stats, const char *src, unsigned width);
  
-# if ENABLE_LOCALE_SUPPORT
+# if ENABLE_UNICODE_USING_LOCALE
  
  extern uint8_t unicode_status;
  void init_unicode(void) FAST_FUNC;
  
  extern uint8_t unicode_status;
  void init_unicode(void) FAST_FUNC;
@@ -102,9 +102,9 @@ int unicode_bidi_is_neutral_wchar(wint_t wc) FAST_FUNC;
  #  endif
  
  
  #  endif
  
  
-# endif /* !LOCALE_SUPPORT */
+# endif /* !UNICODE_USING_LOCALE */
  
  
-#endif /* FEATURE_ASSUME_UNICODE */
+#endif /* UNICODE_SUPPORT */
  
  POP_SAVED_FUNCTION_VISIBILITY
  
  
  POP_SAVED_FUNCTION_VISIBILITY
  
diff --git a/libbb/Kbuild b/libbb/Kbuild

index 49cf4b8add631f491df58876bed65cadcf7c7231..4606d5aa7e15cd8ad3072e42628905c0eb6d6bef 100644 (file)
--- a/libbb/Kbuild
+++ b/libbb/Kbuild
@@ -124,7 +124,7 @@ lib-y += xrealloc_vector.o
  # and objects which may fail to build (SELinux on selinux-less system)
  lib-$(CONFIG_SELINUX) += selinux_common.o
  lib-$(CONFIG_FEATURE_MTAB_SUPPORT) += mtab.o
  # and objects which may fail to build (SELinux on selinux-less system)
  lib-$(CONFIG_SELINUX) += selinux_common.o
  lib-$(CONFIG_FEATURE_MTAB_SUPPORT) += mtab.o
-lib-$(CONFIG_FEATURE_ASSUME_UNICODE) += unicode.o
+lib-$(CONFIG_UNICODE_SUPPORT) += unicode.o
  lib-$(CONFIG_FEATURE_CHECK_NAMES) += die_if_bad_username.o
  
  lib-$(CONFIG_LOSETUP) += loop.o
  lib-$(CONFIG_FEATURE_CHECK_NAMES) += die_if_bad_username.o
  
  lib-$(CONFIG_LOSETUP) += loop.o
diff --git a/libbb/lineedit.c b/libbb/lineedit.c

index 38a09cb26f52b7be90439a93b5a74c1bf21b7587..dc90846f94c36c9ce263483565a3c4af4534208b 100644 (file)
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -67,7 +67,7 @@
  
  
  #undef CHAR_T
  
  
  #undef CHAR_T
-#if ENABLE_FEATURE_ASSUME_UNICODE
+#if ENABLE_UNICODE_SUPPORT
  # define BB_NUL L'\0'
  # define CHAR_T wchar_t
  static bool BB_isspace(CHAR_T c) { return ((unsigned)c < 256 && isspace(c)); }
  # define BB_NUL L'\0'
  # define CHAR_T wchar_t
  static bool BB_isspace(CHAR_T c) { return ((unsigned)c < 256 && isspace(c)); }
@@ -202,7 +202,7 @@ static void deinit_S(void)
  #define DEINIT_S() deinit_S()
  
  
  #define DEINIT_S() deinit_S()
  
  
-#if ENABLE_FEATURE_ASSUME_UNICODE
+#if ENABLE_UNICODE_SUPPORT
  static size_t load_string(const char *src, int maxsize)
  {
         ssize_t len = mbstowcs(command_ps, src, maxsize - 1);
  static size_t load_string(const char *src, int maxsize)
  {
         ssize_t len = mbstowcs(command_ps, src, maxsize - 1);
@@ -932,7 +932,7 @@ static void input_tab(smallint *lastWasTab)
  #define matchBuf (S.input_tab__matchBuf)
                 int find_type;
                 int recalc_pos;
  #define matchBuf (S.input_tab__matchBuf)
                 int find_type;
                 int recalc_pos;
-#if ENABLE_FEATURE_ASSUME_UNICODE
+#if ENABLE_UNICODE_SUPPORT
                 /* cursor pos in command converted to multibyte form */
                 int cursor_mb;
  #endif
                 /* cursor pos in command converted to multibyte form */
                 int cursor_mb;
  #endif
@@ -942,7 +942,7 @@ static void input_tab(smallint *lastWasTab)
                 /* Make a local copy of the string --
                  * up to the position of the cursor */
                 save_string(matchBuf, cursor + 1);
                 /* Make a local copy of the string --
                  * up to the position of the cursor */
                 save_string(matchBuf, cursor + 1);
-#if ENABLE_FEATURE_ASSUME_UNICODE
+#if ENABLE_UNICODE_SUPPORT
                 cursor_mb = strlen(matchBuf);
  #endif
                 tmp = matchBuf;
                 cursor_mb = strlen(matchBuf);
  #endif
                 tmp = matchBuf;
@@ -1015,7 +1015,7 @@ static void input_tab(smallint *lastWasTab)
                 }
  
                 len_found = strlen(tmp);
                 }
  
                 len_found = strlen(tmp);
-#if !ENABLE_FEATURE_ASSUME_UNICODE
+#if !ENABLE_UNICODE_SUPPORT
                 /* have space to place the match? */
                 /* The result consists of three parts with these lengths: */
                 /* (cursor - recalc_pos) + len_found + (command_len - cursor) */
                 /* have space to place the match? */
                 /* The result consists of three parts with these lengths: */
                 /* (cursor - recalc_pos) + len_found + (command_len - cursor) */
@@ -1088,7 +1088,7 @@ static void save_command_ps_at_cur_history(void)
                 int cur = state->cur_history;
                 free(state->history[cur]);
  
                 int cur = state->cur_history;
                 free(state->history[cur]);
  
-# if ENABLE_FEATURE_ASSUME_UNICODE
+# if ENABLE_UNICODE_SUPPORT
                 {
                         char tbuf[MAX_LINELEN];
                         save_string(tbuf, sizeof(tbuf));
                 {
                         char tbuf[MAX_LINELEN];
                         save_string(tbuf, sizeof(tbuf));
@@ -1659,7 +1659,7 @@ static int lineedit_read_key(char *read_key_buffer)
  {
         int64_t ic;
         int timeout = -1;
  {
         int64_t ic;
         int timeout = -1;
-#if ENABLE_FEATURE_ASSUME_UNICODE
+#if ENABLE_UNICODE_SUPPORT
         char unicode_buf[MB_CUR_MAX + 1];
         int unicode_idx = 0;
  #endif
         char unicode_buf[MB_CUR_MAX + 1];
         int unicode_idx = 0;
  #endif
@@ -1674,7 +1674,7 @@ static int lineedit_read_key(char *read_key_buffer)
                  */
                 ic = read_key(STDIN_FILENO, read_key_buffer, timeout);
                 if (errno) {
                  */
                 ic = read_key(STDIN_FILENO, read_key_buffer, timeout);
                 if (errno) {
-#if ENABLE_FEATURE_ASSUME_UNICODE
+#if ENABLE_UNICODE_SUPPORT
                         if (errno == EAGAIN && unicode_idx != 0)
                                 goto pushback;
  #endif
                         if (errno == EAGAIN && unicode_idx != 0)
                                 goto pushback;
  #endif
@@ -1700,7 +1700,7 @@ static int lineedit_read_key(char *read_key_buffer)
                 }
  #endif
  
                 }
  #endif
  
-#if ENABLE_FEATURE_ASSUME_UNICODE
+#if ENABLE_UNICODE_SUPPORT
                 if (unicode_status == UNICODE_ON) {
                         wchar_t wc;
  
                 if (unicode_status == UNICODE_ON) {
                         wchar_t wc;
  
@@ -1817,7 +1817,7 @@ int FAST_FUNC read_line_input(const char *prompt, char *command, int maxsize, li
         /* prepare before init handlers */
         cmdedit_y = 0;  /* quasireal y, not true if line > xt*yt */
         command_len = 0;
         /* prepare before init handlers */
         cmdedit_y = 0;  /* quasireal y, not true if line > xt*yt */
         command_len = 0;
-#if ENABLE_FEATURE_ASSUME_UNICODE
+#if ENABLE_UNICODE_SUPPORT
         command_ps = xzalloc(maxsize * sizeof(command_ps[0]));
  #else
         command_ps = command;
         command_ps = xzalloc(maxsize * sizeof(command_ps[0]));
  #else
         command_ps = command;
@@ -2199,8 +2199,8 @@ int FAST_FUNC read_line_input(const char *prompt, char *command, int maxsize, li
  //                             }
  //                     }
                         if (ic < ' '
  //                             }
  //                     }
                         if (ic < ' '
-                        || (!ENABLE_FEATURE_ASSUME_UNICODE && ic >= 256)
-                        || (ENABLE_FEATURE_ASSUME_UNICODE && ic >= VI_CMDMODE_BIT)
+                        || (!ENABLE_UNICODE_SUPPORT && ic >= 256)
+                        || (ENABLE_UNICODE_SUPPORT && ic >= VI_CMDMODE_BIT)
                         ) {
                                 /* If VI_CMDMODE_BIT is set, ic is >= 256
                                  * and vi mode ignores unexpected chars.
                         ) {
                                 /* If VI_CMDMODE_BIT is set, ic is >= 256
                                  * and vi mode ignores unexpected chars.
@@ -2268,7 +2268,7 @@ int FAST_FUNC read_line_input(const char *prompt, char *command, int maxsize, li
  /* Stop bug catching using "command_must_not_be_used" trick */
  #undef command
  
  /* Stop bug catching using "command_must_not_be_used" trick */
  #undef command
  
-#if ENABLE_FEATURE_ASSUME_UNICODE
+#if ENABLE_UNICODE_SUPPORT
         command[0] = '\0';
         if (command_len > 0)
                 command_len = save_string(command, maxsize - 1);
         command[0] = '\0';
         if (command_len > 0)
                 command_len = save_string(command, maxsize - 1);
diff --git a/libbb/printable_string.c b/libbb/printable_string.c

index 47565de0df92834e188d304fc6cc4cd219a66e1f..83a48219681d29f0711325670b5357046a81466e 100644 (file)
--- a/libbb/printable_string.c
+++ b/libbb/printable_string.c
@@ -36,7 +36,7 @@ const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)
                 s++;
         }
  
                 s++;
         }
  
-#if ENABLE_FEATURE_ASSUME_UNICODE
+#if ENABLE_UNICODE_SUPPORT
         dst = unicode_conv_to_printable(stats, str);
  #else
         {
         dst = unicode_conv_to_printable(stats, str);
  #else
         {
diff --git a/libbb/progress.c b/libbb/progress.c

index 0e484da6c7d1d4ab1df62263128fa6c2dc4bd876..e96039042ccdf9378e5a9fa8dae7c8149549e659 100644 (file)
--- a/libbb/progress.c
+++ b/libbb/progress.c
@@ -78,7 +78,7 @@ void FAST_FUNC bb_progress_update(bb_progress_t *p,
                 if (ratio > 100) ratio = 100;
         }
  
                 if (ratio > 100) ratio = 100;
         }
  
-#if ENABLE_FEATURE_ASSUME_UNICODE
+#if ENABLE_UNICODE_SUPPORT
         init_unicode();
         /* libbb candidate? */
         {
         init_unicode();
         /* libbb candidate? */
         {
diff --git a/libbb/unicode.c b/libbb/unicode.c

index bc97145622071d27263a9c11a407b7f2565a182a..83e70b412ee5748d71bb062f86e8eac96886c9d0 100644 (file)
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -14,12 +14,12 @@
  uint8_t unicode_status;
  #endif
  
  uint8_t unicode_status;
  #endif
  
-/* This file is compiled only if FEATURE_ASSUME_UNICODE is on.
+/* This file is compiled only if UNICODE_SUPPORT is on.
   * We check other options and decide whether to use libc support
   * via locale, or use our own logic:
   */
  
   * We check other options and decide whether to use libc support
   * via locale, or use our own logic:
   */
  
-#if ENABLE_LOCALE_SUPPORT
+#if ENABLE_UNICODE_USING_LOCALE
  
  /* Unicode support using libc locale support. */
  
  
  /* Unicode support using libc locale support. */
  
@@ -139,7 +139,7 @@ size_t FAST_FUNC wcstombs(char *dest, const wchar_t *src, size_t n)
         return org_n - n;
  }
  
         return org_n - n;
  }
  
-#define ERROR_WCHAR (~(wchar_t)0)
+# define ERROR_WCHAR (~(wchar_t)0)
  
  static const char *mbstowc_internal(wchar_t *res, const char *src)
  {
  
  static const char *mbstowc_internal(wchar_t *res, const char *src)
  {
@@ -239,7 +239,427 @@ int FAST_FUNC iswpunct(wint_t wc)
         return (unsigned)wc <= 0x7f && ispunct(wc);
  }
  
         return (unsigned)wc <= 0x7f && ispunct(wc);
  }
  
-#include "unicode_wcwidth.c"
+
+# if LAST_SUPPORTED_WCHAR >= 0x300
+struct interval {
+       uint16_t first;
+       uint16_t last;
+};
+
+/* auxiliary function for binary search in interval table */
+static int in_interval_table(unsigned ucs, const struct interval *table, unsigned max)
+{
+       unsigned min;
+       unsigned mid;
+
+       if (ucs < table[0].first || ucs > table[max].last)
+               return 0;
+
+       min = 0;
+       while (max >= min) {
+               mid = (min + max) / 2;
+               if (ucs > table[mid].last)
+                       min = mid + 1;
+               else if (ucs < table[mid].first)
+                       max = mid - 1;
+               else
+                       return 1;
+       }
+       return 0;
+}
+
+static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
+{
+       unsigned min;
+       unsigned mid;
+       unsigned first, last;
+
+       first = table[0] >> 2;
+       last = first + (table[0] & 3);
+       if (ucs < first || ucs > last)
+               return 0;
+
+       min = 0;
+       while (max >= min) {
+               mid = (min + max) / 2;
+               first = table[mid] >> 2;
+               last = first + (table[mid] & 3);
+               if (ucs > last)
+                       min = mid + 1;
+               else if (ucs < first)
+                       max = mid - 1;
+               else
+                       return 1;
+       }
+       return 0;
+}
+# endif
+
+
+/*
+ * This is an implementation of wcwidth() and wcswidth() (defined in
+ * IEEE Std 1002.1-2001) for Unicode.
+ *
+ * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
+ * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
+ *
+ * In fixed-width output devices, Latin characters all occupy a single
+ * "cell" position of equal width, whereas ideographic CJK characters
+ * occupy two such cells. Interoperability between terminal-line
+ * applications and (teletype-style) character terminals using the
+ * UTF-8 encoding requires agreement on which character should advance
+ * the cursor by how many cell positions. No established formal
+ * standards exist at present on which Unicode character shall occupy
+ * how many cell positions on character terminals. These routines are
+ * a first attempt of defining such behavior based on simple rules
+ * applied to data provided by the Unicode Consortium.
+ *
+ * For some graphical characters, the Unicode standard explicitly
+ * defines a character-cell width via the definition of the East Asian
+ * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
+ * In all these cases, there is no ambiguity about which width a
+ * terminal shall use. For characters in the East Asian Ambiguous (A)
+ * class, the width choice depends purely on a preference of backward
+ * compatibility with either historic CJK or Western practice.
+ * Choosing single-width for these characters is easy to justify as
+ * the appropriate long-term solution, as the CJK practice of
+ * displaying these characters as double-width comes from historic
+ * implementation simplicity (8-bit encoded characters were displayed
+ * single-width and 16-bit ones double-width, even for Greek,
+ * Cyrillic, etc.) and not any typographic considerations.
+ *
+ * Much less clear is the choice of width for the Not East Asian
+ * (Neutral) class. Existing practice does not dictate a width for any
+ * of these characters. It would nevertheless make sense
+ * typographically to allocate two character cells to characters such
+ * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
+ * represented adequately with a single-width glyph. The following
+ * routines at present merely assign a single-cell width to all
+ * neutral characters, in the interest of simplicity. This is not
+ * entirely satisfactory and should be reconsidered before
+ * establishing a formal standard in this area. At the moment, the
+ * decision which Not East Asian (Neutral) characters should be
+ * represented by double-width glyphs cannot yet be answered by
+ * applying a simple rule from the Unicode database content. Setting
+ * up a proper standard for the behavior of UTF-8 character terminals
+ * will require a careful analysis not only of each Unicode character,
+ * but also of each presentation form, something the author of these
+ * routines has avoided to do so far.
+ *
+ * http://www.unicode.org/unicode/reports/tr11/
+ *
+ * Markus Kuhn -- 2007-05-26 (Unicode 5.0)
+ *
+ * Permission to use, copy, modify, and distribute this software
+ * for any purpose and without fee is hereby granted. The author
+ * disclaims all warranties with regard to this software.
+ *
+ * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+ */
+
+/* Assigned Unicode character ranges:
+ * Plane Range
+ * 0       0000–FFFF   Basic Multilingual Plane
+ * 1      10000–1FFFF  Supplementary Multilingual Plane
+ * 2      20000–2FFFF  Supplementary Ideographic Plane
+ * 3      30000-3FFFF  Tertiary Ideographic Plane (no chars assigned yet)
+ * 4-13   40000–DFFFF  currently unassigned
+ * 14     E0000–EFFFF  Supplementary Special-purpose Plane
+ * 15     F0000–FFFFF  Supplementary Private Use Area-A
+ * 16    100000–10FFFF Supplementary Private Use Area-B
+ *
+ * "Supplementary Special-purpose Plane currently contains non-graphical
+ * characters in two blocks of 128 and 240 characters. The first block
+ * is for language tag characters for use when language cannot be indicated
+ * through other protocols (such as the xml:lang  attribute in XML).
+ * The other block contains glyph variation selectors to indicate
+ * an alternate glyph for a character that cannot be determined by context."
+ *
+ * In simpler terms: it is a tool to fix the "Han unification" mess
+ * created by Unicode committee, to select Chinese/Japanese/Korean/Taiwan
+ * version of a character. (They forgot that the whole purpose of the Unicode
+ * was to be able to write all chars in one charset without such tricks).
+ * Until East Asian users say it is actually necessary to support these
+ * code points in console applications like busybox
+ * (i.e. do these chars ever appear in filenames, hostnames, text files
+ * and such?), we are treating these code points as invalid.
+ *
+ * Tertiary Ideographic Plane is also ignored for now,
+ * until Unicode committee assigns something there.
+ */
+/* The following two functions define the column width of an ISO 10646
+ * character as follows:
+ *
+ *    - The null character (U+0000) has a column width of 0.
+ *
+ *    - Other C0/C1 control characters and DEL will lead to a return
+ *      value of -1.
+ *
+ *    - Non-spacing and enclosing combining characters (general
+ *      category code Mn or Me in the Unicode database) have a
+ *      column width of 0.
+ *
+ *    - SOFT HYPHEN (U+00AD) has a column width of 1.
+ *
+ *    - Other format characters (general category code Cf in the Unicode
+ *      database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
+ *
+ *    - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
+ *      have a column width of 0.
+ *
+ *    - Spacing characters in the East Asian Wide (W) or East Asian
+ *      Full-width (F) category as defined in Unicode Technical
+ *      Report #11 have a column width of 2.
+ *
+ *    - All remaining characters (including all printable
+ *      ISO 8859-1 and WGL4 characters, Unicode control characters,
+ *      etc.) have a column width of 1.
+ *
+ * This implementation assumes that wchar_t characters are encoded
+ * in ISO 10646.
+ */
+static int wcwidth(unsigned ucs)
+{
+# if LAST_SUPPORTED_WCHAR >= 0x300
+       /* sorted list of non-overlapping intervals of non-spacing characters */
+       /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
+       static const struct interval combining[] = {
+#  define BIG_(a,b) { a, b },
+#  define PAIR(a,b)
+#  define ARRAY /* PAIR if < 0x4000 and no more than 4 chars big */ \
+               BIG_(0x0300, 0x036F) \
+               PAIR(0x0483, 0x0486) \
+               PAIR(0x0488, 0x0489) \
+               BIG_(0x0591, 0x05BD) \
+               PAIR(0x05BF, 0x05BF) \
+               PAIR(0x05C1, 0x05C2) \
+               PAIR(0x05C4, 0x05C5) \
+               PAIR(0x05C7, 0x05C7) \
+               PAIR(0x0600, 0x0603) \
+               BIG_(0x0610, 0x0615) \
+               BIG_(0x064B, 0x065E) \
+               PAIR(0x0670, 0x0670) \
+               BIG_(0x06D6, 0x06E4) \
+               PAIR(0x06E7, 0x06E8) \
+               PAIR(0x06EA, 0x06ED) \
+               PAIR(0x070F, 0x070F) \
+               PAIR(0x0711, 0x0711) \
+               BIG_(0x0730, 0x074A) \
+               BIG_(0x07A6, 0x07B0) \
+               BIG_(0x07EB, 0x07F3) \
+               PAIR(0x0901, 0x0902) \
+               PAIR(0x093C, 0x093C) \
+               BIG_(0x0941, 0x0948) \
+               PAIR(0x094D, 0x094D) \
+               PAIR(0x0951, 0x0954) \
+               PAIR(0x0962, 0x0963) \
+               PAIR(0x0981, 0x0981) \
+               PAIR(0x09BC, 0x09BC) \
+               PAIR(0x09C1, 0x09C4) \
+               PAIR(0x09CD, 0x09CD) \
+               PAIR(0x09E2, 0x09E3) \
+               PAIR(0x0A01, 0x0A02) \
+               PAIR(0x0A3C, 0x0A3C) \
+               PAIR(0x0A41, 0x0A42) \
+               PAIR(0x0A47, 0x0A48) \
+               PAIR(0x0A4B, 0x0A4D) \
+               PAIR(0x0A70, 0x0A71) \
+               PAIR(0x0A81, 0x0A82) \
+               PAIR(0x0ABC, 0x0ABC) \
+               BIG_(0x0AC1, 0x0AC5) \
+               PAIR(0x0AC7, 0x0AC8) \
+               PAIR(0x0ACD, 0x0ACD) \
+               PAIR(0x0AE2, 0x0AE3) \
+               PAIR(0x0B01, 0x0B01) \
+               PAIR(0x0B3C, 0x0B3C) \
+               PAIR(0x0B3F, 0x0B3F) \
+               PAIR(0x0B41, 0x0B43) \
+               PAIR(0x0B4D, 0x0B4D) \
+               PAIR(0x0B56, 0x0B56) \
+               PAIR(0x0B82, 0x0B82) \
+               PAIR(0x0BC0, 0x0BC0) \
+               PAIR(0x0BCD, 0x0BCD) \
+               PAIR(0x0C3E, 0x0C40) \
+               PAIR(0x0C46, 0x0C48) \
+               PAIR(0x0C4A, 0x0C4D) \
+               PAIR(0x0C55, 0x0C56) \
+               PAIR(0x0CBC, 0x0CBC) \
+               PAIR(0x0CBF, 0x0CBF) \
+               PAIR(0x0CC6, 0x0CC6) \
+               PAIR(0x0CCC, 0x0CCD) \
+               PAIR(0x0CE2, 0x0CE3) \
+               PAIR(0x0D41, 0x0D43) \
+               PAIR(0x0D4D, 0x0D4D) \
+               PAIR(0x0DCA, 0x0DCA) \
+               PAIR(0x0DD2, 0x0DD4) \
+               PAIR(0x0DD6, 0x0DD6) \
+               PAIR(0x0E31, 0x0E31) \
+               BIG_(0x0E34, 0x0E3A) \
+               BIG_(0x0E47, 0x0E4E) \
+               PAIR(0x0EB1, 0x0EB1) \
+               BIG_(0x0EB4, 0x0EB9) \
+               PAIR(0x0EBB, 0x0EBC) \
+               BIG_(0x0EC8, 0x0ECD) \
+               PAIR(0x0F18, 0x0F19) \
+               PAIR(0x0F35, 0x0F35) \
+               PAIR(0x0F37, 0x0F37) \
+               PAIR(0x0F39, 0x0F39) \
+               BIG_(0x0F71, 0x0F7E) \
+               BIG_(0x0F80, 0x0F84) \
+               PAIR(0x0F86, 0x0F87) \
+               PAIR(0x0FC6, 0x0FC6) \
+               BIG_(0x0F90, 0x0F97) \
+               BIG_(0x0F99, 0x0FBC) \
+               PAIR(0x102D, 0x1030) \
+               PAIR(0x1032, 0x1032) \
+               PAIR(0x1036, 0x1037) \
+               PAIR(0x1039, 0x1039) \
+               PAIR(0x1058, 0x1059) \
+               BIG_(0x1160, 0x11FF) \
+               PAIR(0x135F, 0x135F) \
+               PAIR(0x1712, 0x1714) \
+               PAIR(0x1732, 0x1734) \
+               PAIR(0x1752, 0x1753) \
+               PAIR(0x1772, 0x1773) \
+               PAIR(0x17B4, 0x17B5) \
+               BIG_(0x17B7, 0x17BD) \
+               PAIR(0x17C6, 0x17C6) \
+               BIG_(0x17C9, 0x17D3) \
+               PAIR(0x17DD, 0x17DD) \
+               PAIR(0x180B, 0x180D) \
+               PAIR(0x18A9, 0x18A9) \
+               PAIR(0x1920, 0x1922) \
+               PAIR(0x1927, 0x1928) \
+               PAIR(0x1932, 0x1932) \
+               PAIR(0x1939, 0x193B) \
+               PAIR(0x1A17, 0x1A18) \
+               PAIR(0x1B00, 0x1B03) \
+               PAIR(0x1B34, 0x1B34) \
+               BIG_(0x1B36, 0x1B3A) \
+               PAIR(0x1B3C, 0x1B3C) \
+               PAIR(0x1B42, 0x1B42) \
+               BIG_(0x1B6B, 0x1B73) \
+               BIG_(0x1DC0, 0x1DCA) \
+               PAIR(0x1DFE, 0x1DFF) \
+               BIG_(0x200B, 0x200F) \
+               BIG_(0x202A, 0x202E) \
+               PAIR(0x2060, 0x2063) \
+               BIG_(0x206A, 0x206F) \
+               BIG_(0x20D0, 0x20EF) \
+               BIG_(0x302A, 0x302F) \
+               PAIR(0x3099, 0x309A) \
+               /* Too big to be packed in PAIRs: */ \
+               BIG_(0xA806, 0xA806) \
+               BIG_(0xA80B, 0xA80B) \
+               BIG_(0xA825, 0xA826) \
+               BIG_(0xFB1E, 0xFB1E) \
+               BIG_(0xFE00, 0xFE0F) \
+               BIG_(0xFE20, 0xFE23) \
+               BIG_(0xFEFF, 0xFEFF) \
+               BIG_(0xFFF9, 0xFFFB)
+               ARRAY
+#  undef BIG_
+#  undef PAIR
+       };
+#  define BIG_(a,b)
+#  define PAIR(a,b) (a << 2) | (b-a),
+       static const uint16_t combining1[] = { ARRAY };
+#  undef BIG_
+#  undef PAIR
+#  define BIG_(a,b) char big_##a[b < 0x4000 && b-a <= 3 ? -1 : 1];
+#  define PAIR(a,b) char pair##a[b >= 0x4000 || b-a > 3 ? -1 : 1];
+       struct CHECK { ARRAY };
+#  undef BIG_
+#  undef PAIR
+#  undef ARRAY
+# endif
+
+       if (ucs == 0)
+               return 0;
+
+       /* Test for 8-bit control characters (00-1f, 80-9f, 7f) */
+       if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
+               return -1;
+       /* Quick abort if it is an obviously invalid char */
+       if (ucs > LAST_SUPPORTED_WCHAR)
+               return -1;
+
+       /* Optimization: no combining chars below 0x300 */
+       if (LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
+               return 1;
+
+# if LAST_SUPPORTED_WCHAR >= 0x300
+       /* Binary search in table of non-spacing characters */
+       if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
+               return 0;
+       if (in_uint16_table(ucs, combining1, ARRAY_SIZE(combining1) - 1))
+               return 0;
+
+       /* Optimization: all chars below 0x1100 are not double-width */
+       if (LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
+               return 1;
+
+#  if LAST_SUPPORTED_WCHAR >= 0x1100
+       /* Invalid code points: */
+       /* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */
+       /* Private Use Area (e000..f8ff) */
+       /* Noncharacters fdd0..fdef */
+       if ((LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
+        || (LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
+       ) {
+               return -1;
+       }
+       /* 0xfffe and 0xffff in every plane are invalid */
+       if (LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
+               return -1;
+       }
+
+#   if LAST_SUPPORTED_WCHAR >= 0x10000
+       if (ucs >= 0x10000) {
+               /* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
+               static const struct interval combining0x10000[] = {
+                       { 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
+                       { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
+                       { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
+                       { 0xD242, 0xD244 }
+               };
+               /* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */
+               if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
+                       return 0;
+               /* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
+               if (LAST_SUPPORTED_WCHAR >= 0xE0001
+                && (  ucs == 0xE0001
+                   || (ucs >= 0xE0020 && ucs <= 0xE007F)
+                   || (ucs >= 0xE0100 && ucs <= 0xE01EF)
+                   )
+               ) {
+                       return 0;
+               }
+       }
+#   endif
+
+       /* If we arrive here, ucs is not a combining or C0/C1 control character.
+        * Check whether it's 1 char or 2-shar wide.
+        */
+       return 1 +
+               (  (/*ucs >= 0x1100 &&*/ ucs <= 0x115f) /* Hangul Jamo init. consonants */
+               || ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */
+               || ucs == 0x232a /* right-pointing angle bracket; also CJK punct. char */
+               || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */
+#   if LAST_SUPPORTED_WCHAR >= 0xac00
+               || (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */
+               || (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */
+               || (ucs >= 0xfe10 && ucs <= 0xfe19) /* Vertical forms */
+               || (ucs >= 0xfe30 && ucs <= 0xfe6f) /* CJK Compatibility Forms */
+               || (ucs >= 0xff00 && ucs <= 0xff60) /* Fullwidth Forms */
+               || (ucs >= 0xffe0 && ucs <= 0xffe6)
+               || ((ucs >> 17) == (2 >> 1)) /* 20000..3ffff: Supplementary and Tertiary Ideographic Planes */
+#   endif
+               );
+#  endif /* >= 0x1100 */
+# endif /* >= 0x300 */
+}
+
  
  # if ENABLE_UNICODE_BIDI_SUPPORT
  int FAST_FUNC unicode_bidi_isrtl(wint_t wc)
  
  # if ENABLE_UNICODE_BIDI_SUPPORT
  int FAST_FUNC unicode_bidi_isrtl(wint_t wc)
@@ -592,7 +1012,7 @@ static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char
                 int w;
                 wchar_t wc;
  
                 int w;
                 wchar_t wc;
  
-#if ENABLE_LOCALE_SUPPORT
+#if ENABLE_UNICODE_USING_LOCALE
                 {
                         mbstate_t mbst = { 0 };
                         ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
                 {
                         mbstate_t mbst = { 0 };
                         ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
@@ -647,7 +1067,7 @@ static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char
                 uni_count++;
                 uni_width += w;
                 dst = xrealloc(dst, dst_len + MB_CUR_MAX);
                 uni_count++;
                 uni_width += w;
                 dst = xrealloc(dst, dst_len + MB_CUR_MAX);
-#if ENABLE_LOCALE_SUPPORT
+#if ENABLE_UNICODE_USING_LOCALE
                 {
                         mbstate_t mbst = { 0 };
                         dst_len += wcrtomb(&dst[dst_len], wc, &mbst);
                 {
                         mbstate_t mbst = { 0 };
                         dst_len += wcrtomb(&dst[dst_len], wc, &mbst);
@@ -699,7 +1119,7 @@ unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
                 int w;
                 wchar_t wc;
  
                 int w;
                 wchar_t wc;
  
-#if ENABLE_LOCALE_SUPPORT
+#if ENABLE_UNICODE_USING_LOCALE
                 {
                         mbstate_t mbst = { 0 };
                         ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
                 {
                         mbstate_t mbst = { 0 };
                         ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
diff --git a/libbb/unicode_wcwidth.c b/libbb/unicode_wcwidth.c

deleted file mode 100644 (file)

index 0bb6227..0000000
--- a/libbb/unicode_wcwidth.c
+++ /dev/null
@@ -1,543 +0,0 @@
-/*
- * This is an implementation of wcwidth() and wcswidth() (defined in
- * IEEE Std 1002.1-2001) for Unicode.
- *
- * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
- * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
- *
- * In fixed-width output devices, Latin characters all occupy a single
- * "cell" position of equal width, whereas ideographic CJK characters
- * occupy two such cells. Interoperability between terminal-line
- * applications and (teletype-style) character terminals using the
- * UTF-8 encoding requires agreement on which character should advance
- * the cursor by how many cell positions. No established formal
- * standards exist at present on which Unicode character shall occupy
- * how many cell positions on character terminals. These routines are
- * a first attempt of defining such behavior based on simple rules
- * applied to data provided by the Unicode Consortium.
- *
- * For some graphical characters, the Unicode standard explicitly
- * defines a character-cell width via the definition of the East Asian
- * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
- * In all these cases, there is no ambiguity about which width a
- * terminal shall use. For characters in the East Asian Ambiguous (A)
- * class, the width choice depends purely on a preference of backward
- * compatibility with either historic CJK or Western practice.
- * Choosing single-width for these characters is easy to justify as
- * the appropriate long-term solution, as the CJK practice of
- * displaying these characters as double-width comes from historic
- * implementation simplicity (8-bit encoded characters were displayed
- * single-width and 16-bit ones double-width, even for Greek,
- * Cyrillic, etc.) and not any typographic considerations.
- *
- * Much less clear is the choice of width for the Not East Asian
- * (Neutral) class. Existing practice does not dictate a width for any
- * of these characters. It would nevertheless make sense
- * typographically to allocate two character cells to characters such
- * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
- * represented adequately with a single-width glyph. The following
- * routines at present merely assign a single-cell width to all
- * neutral characters, in the interest of simplicity. This is not
- * entirely satisfactory and should be reconsidered before
- * establishing a formal standard in this area. At the moment, the
- * decision which Not East Asian (Neutral) characters should be
- * represented by double-width glyphs cannot yet be answered by
- * applying a simple rule from the Unicode database content. Setting
- * up a proper standard for the behavior of UTF-8 character terminals
- * will require a careful analysis not only of each Unicode character,
- * but also of each presentation form, something the author of these
- * routines has avoided to do so far.
- *
- * http://www.unicode.org/unicode/reports/tr11/
- *
- * Markus Kuhn -- 2007-05-26 (Unicode 5.0)
- *
- * Permission to use, copy, modify, and distribute this software
- * for any purpose and without fee is hereby granted. The author
- * disclaims all warranties with regard to this software.
- *
- * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
- */
-
-/* Assigned Unicode character ranges:
- * Plane Range
- * 0       0000–FFFF   Basic Multilingual Plane
- * 1      10000–1FFFF  Supplementary Multilingual Plane
- * 2      20000–2FFFF  Supplementary Ideographic Plane
- * 3      30000-3FFFF  Tertiary Ideographic Plane (no chars assigned yet)
- * 4-13   40000–DFFFF  currently unassigned
- * 14     E0000–EFFFF  Supplementary Special-purpose Plane
- * 15     F0000–FFFFF  Supplementary Private Use Area-A
- * 16    100000–10FFFF Supplementary Private Use Area-B
- *
- * "Supplementary Special-purpose Plane currently contains non-graphical
- * characters in two blocks of 128 and 240 characters. The first block
- * is for language tag characters for use when language cannot be indicated
- * through other protocols (such as the xml:lang  attribute in XML).
- * The other block contains glyph variation selectors to indicate
- * an alternate glyph for a character that cannot be determined by context."
- *
- * In simpler terms: it is a tool to fix the "Han unification" mess
- * created by Unicode committee, to select Chinese/Japanese/Korean/Taiwan
- * version of a character. (They forgot that the whole purpose of the Unicode
- * was to be able to write all chars in one charset without such tricks).
- * Until East Asian users say it is actually necessary to support these
- * code points in console applications like busybox
- * (i.e. do these chars ever appear in filenames, hostnames, text files
- * and such?), we are treating these code points as invalid.
- *
- * Tertiary Ideographic Plane is also ignored for now,
- * until Unicode committee assigns something there.
- */
-
-#if LAST_SUPPORTED_WCHAR >= 0x300
-struct interval {
-       uint16_t first;
-       uint16_t last;
-};
-
-/* auxiliary function for binary search in interval table */
-static int in_interval_table(unsigned ucs, const struct interval *table, unsigned max)
-{
-       unsigned min;
-       unsigned mid;
-
-       if (ucs < table[0].first || ucs > table[max].last)
-               return 0;
-
-       min = 0;
-       while (max >= min) {
-               mid = (min + max) / 2;
-               if (ucs > table[mid].last)
-                       min = mid + 1;
-               else if (ucs < table[mid].first)
-                       max = mid - 1;
-               else
-                       return 1;
-       }
-       return 0;
-}
-
-static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
-{
-       unsigned min;
-       unsigned mid;
-       unsigned first, last;
-
-       first = table[0] >> 2;
-       last = first + (table[0] & 3);
-       if (ucs < first || ucs > last)
-               return 0;
-
-       min = 0;
-       while (max >= min) {
-               mid = (min + max) / 2;
-               first = table[mid] >> 2;
-               last = first + (table[mid] & 3);
-               if (ucs > last)
-                       min = mid + 1;
-               else if (ucs < first)
-                       max = mid - 1;
-               else
-                       return 1;
-       }
-       return 0;
-}
-#endif
-
-
-/* The following two functions define the column width of an ISO 10646
- * character as follows:
- *
- *    - The null character (U+0000) has a column width of 0.
- *
- *    - Other C0/C1 control characters and DEL will lead to a return
- *      value of -1.
- *
- *    - Non-spacing and enclosing combining characters (general
- *      category code Mn or Me in the Unicode database) have a
- *      column width of 0.
- *
- *    - SOFT HYPHEN (U+00AD) has a column width of 1.
- *
- *    - Other format characters (general category code Cf in the Unicode
- *      database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
- *
- *    - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
- *      have a column width of 0.
- *
- *    - Spacing characters in the East Asian Wide (W) or East Asian
- *      Full-width (F) category as defined in Unicode Technical
- *      Report #11 have a column width of 2.
- *
- *    - All remaining characters (including all printable
- *      ISO 8859-1 and WGL4 characters, Unicode control characters,
- *      etc.) have a column width of 1.
- *
- * This implementation assumes that wchar_t characters are encoded
- * in ISO 10646.
- */
-static int wcwidth(unsigned ucs)
-{
-#if LAST_SUPPORTED_WCHAR >= 0x300
-       /* sorted list of non-overlapping intervals of non-spacing characters */
-       /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
-       static const struct interval combining[] = {
-#define BIG_(a,b) { a, b },
-#define PAIR(a,b)
-               /* PAIR if < 0x4000 and no more than 4 chars big */
-               BIG_(0x0300, 0x036F)
-               PAIR(0x0483, 0x0486)
-               PAIR(0x0488, 0x0489)
-               BIG_(0x0591, 0x05BD)
-               PAIR(0x05BF, 0x05BF)
-               PAIR(0x05C1, 0x05C2)
-               PAIR(0x05C4, 0x05C5)
-               PAIR(0x05C7, 0x05C7)
-               PAIR(0x0600, 0x0603)
-               BIG_(0x0610, 0x0615)
-               BIG_(0x064B, 0x065E)
-               PAIR(0x0670, 0x0670)
-               BIG_(0x06D6, 0x06E4)
-               PAIR(0x06E7, 0x06E8)
-               PAIR(0x06EA, 0x06ED)
-               PAIR(0x070F, 0x070F)
-               PAIR(0x0711, 0x0711)
-               BIG_(0x0730, 0x074A)
-               BIG_(0x07A6, 0x07B0)
-               BIG_(0x07EB, 0x07F3)
-               PAIR(0x0901, 0x0902)
-               PAIR(0x093C, 0x093C)
-               BIG_(0x0941, 0x0948)
-               PAIR(0x094D, 0x094D)
-               PAIR(0x0951, 0x0954)
-               PAIR(0x0962, 0x0963)
-               PAIR(0x0981, 0x0981)
-               PAIR(0x09BC, 0x09BC)
-               PAIR(0x09C1, 0x09C4)
-               PAIR(0x09CD, 0x09CD)
-               PAIR(0x09E2, 0x09E3)
-               PAIR(0x0A01, 0x0A02)
-               PAIR(0x0A3C, 0x0A3C)
-               PAIR(0x0A41, 0x0A42)
-               PAIR(0x0A47, 0x0A48)
-               PAIR(0x0A4B, 0x0A4D)
-               PAIR(0x0A70, 0x0A71)
-               PAIR(0x0A81, 0x0A82)
-               PAIR(0x0ABC, 0x0ABC)
-               BIG_(0x0AC1, 0x0AC5)
-               PAIR(0x0AC7, 0x0AC8)
-               PAIR(0x0ACD, 0x0ACD)
-               PAIR(0x0AE2, 0x0AE3)
-               PAIR(0x0B01, 0x0B01)
-               PAIR(0x0B3C, 0x0B3C)
-               PAIR(0x0B3F, 0x0B3F)
-               PAIR(0x0B41, 0x0B43)
-               PAIR(0x0B4D, 0x0B4D)
-               PAIR(0x0B56, 0x0B56)
-               PAIR(0x0B82, 0x0B82)
-               PAIR(0x0BC0, 0x0BC0)
-               PAIR(0x0BCD, 0x0BCD)
-               PAIR(0x0C3E, 0x0C40)
-               PAIR(0x0C46, 0x0C48)
-               PAIR(0x0C4A, 0x0C4D)
-               PAIR(0x0C55, 0x0C56)
-               PAIR(0x0CBC, 0x0CBC)
-               PAIR(0x0CBF, 0x0CBF)
-               PAIR(0x0CC6, 0x0CC6)
-               PAIR(0x0CCC, 0x0CCD)
-               PAIR(0x0CE2, 0x0CE3)
-               PAIR(0x0D41, 0x0D43)
-               PAIR(0x0D4D, 0x0D4D)
-               PAIR(0x0DCA, 0x0DCA)
-               PAIR(0x0DD2, 0x0DD4)
-               PAIR(0x0DD6, 0x0DD6)
-               PAIR(0x0E31, 0x0E31)
-               BIG_(0x0E34, 0x0E3A)
-               BIG_(0x0E47, 0x0E4E)
-               PAIR(0x0EB1, 0x0EB1)
-               BIG_(0x0EB4, 0x0EB9)
-               PAIR(0x0EBB, 0x0EBC)
-               BIG_(0x0EC8, 0x0ECD)
-               PAIR(0x0F18, 0x0F19)
-               PAIR(0x0F35, 0x0F35)
-               PAIR(0x0F37, 0x0F37)
-               PAIR(0x0F39, 0x0F39)
-               BIG_(0x0F71, 0x0F7E)
-               BIG_(0x0F80, 0x0F84)
-               PAIR(0x0F86, 0x0F87)
-               PAIR(0x0FC6, 0x0FC6)
-               BIG_(0x0F90, 0x0F97)
-               BIG_(0x0F99, 0x0FBC)
-               PAIR(0x102D, 0x1030)
-               PAIR(0x1032, 0x1032)
-               PAIR(0x1036, 0x1037)
-               PAIR(0x1039, 0x1039)
-               PAIR(0x1058, 0x1059)
-               BIG_(0x1160, 0x11FF)
-               PAIR(0x135F, 0x135F)
-               PAIR(0x1712, 0x1714)
-               PAIR(0x1732, 0x1734)
-               PAIR(0x1752, 0x1753)
-               PAIR(0x1772, 0x1773)
-               PAIR(0x17B4, 0x17B5)
-               BIG_(0x17B7, 0x17BD)
-               PAIR(0x17C6, 0x17C6)
-               BIG_(0x17C9, 0x17D3)
-               PAIR(0x17DD, 0x17DD)
-               PAIR(0x180B, 0x180D)
-               PAIR(0x18A9, 0x18A9)
-               PAIR(0x1920, 0x1922)
-               PAIR(0x1927, 0x1928)
-               PAIR(0x1932, 0x1932)
-               PAIR(0x1939, 0x193B)
-               PAIR(0x1A17, 0x1A18)
-               PAIR(0x1B00, 0x1B03)
-               PAIR(0x1B34, 0x1B34)
-               BIG_(0x1B36, 0x1B3A)
-               PAIR(0x1B3C, 0x1B3C)
-               PAIR(0x1B42, 0x1B42)
-               BIG_(0x1B6B, 0x1B73)
-               BIG_(0x1DC0, 0x1DCA)
-               PAIR(0x1DFE, 0x1DFF)
-               BIG_(0x200B, 0x200F)
-               BIG_(0x202A, 0x202E)
-               PAIR(0x2060, 0x2063)
-               BIG_(0x206A, 0x206F)
-               BIG_(0x20D0, 0x20EF)
-               BIG_(0x302A, 0x302F)
-               PAIR(0x3099, 0x309A)
-               /* Too big to be packed in PAIRs: */
-               { 0xA806, 0xA806 },
-               { 0xA80B, 0xA80B },
-               { 0xA825, 0xA826 },
-               { 0xFB1E, 0xFB1E },
-               { 0xFE00, 0xFE0F },
-               { 0xFE20, 0xFE23 },
-               { 0xFEFF, 0xFEFF },
-               { 0xFFF9, 0xFFFB }
-#undef BIG_
-#undef PAIR
-       };
-       static const uint16_t combining1[] = {
-#define BIG_(a,b)
-#define PAIR(a,b) (a << 2) | (b-a),
-               /* Exact copy-n-paste of the above: */
-               BIG_(0x0300, 0x036F)
-               PAIR(0x0483, 0x0486)
-               PAIR(0x0488, 0x0489)
-               BIG_(0x0591, 0x05BD)
-               PAIR(0x05BF, 0x05BF)
-               PAIR(0x05C1, 0x05C2)
-               PAIR(0x05C4, 0x05C5)
-               PAIR(0x05C7, 0x05C7)
-               PAIR(0x0600, 0x0603)
-               BIG_(0x0610, 0x0615)
-               BIG_(0x064B, 0x065E)
-               PAIR(0x0670, 0x0670)
-               BIG_(0x06D6, 0x06E4)
-               PAIR(0x06E7, 0x06E8)
-               PAIR(0x06EA, 0x06ED)
-               PAIR(0x070F, 0x070F)
-               PAIR(0x0711, 0x0711)
-               BIG_(0x0730, 0x074A)
-               BIG_(0x07A6, 0x07B0)
-               BIG_(0x07EB, 0x07F3)
-               PAIR(0x0901, 0x0902)
-               PAIR(0x093C, 0x093C)
-               BIG_(0x0941, 0x0948)
-               PAIR(0x094D, 0x094D)
-               PAIR(0x0951, 0x0954)
-               PAIR(0x0962, 0x0963)
-               PAIR(0x0981, 0x0981)
-               PAIR(0x09BC, 0x09BC)
-               PAIR(0x09C1, 0x09C4)
-               PAIR(0x09CD, 0x09CD)
-               PAIR(0x09E2, 0x09E3)
-               PAIR(0x0A01, 0x0A02)
-               PAIR(0x0A3C, 0x0A3C)
-               PAIR(0x0A41, 0x0A42)
-               PAIR(0x0A47, 0x0A48)
-               PAIR(0x0A4B, 0x0A4D)
-               PAIR(0x0A70, 0x0A71)
-               PAIR(0x0A81, 0x0A82)
-               PAIR(0x0ABC, 0x0ABC)
-               BIG_(0x0AC1, 0x0AC5)
-               PAIR(0x0AC7, 0x0AC8)
-               PAIR(0x0ACD, 0x0ACD)
-               PAIR(0x0AE2, 0x0AE3)
-               PAIR(0x0B01, 0x0B01)
-               PAIR(0x0B3C, 0x0B3C)
-               PAIR(0x0B3F, 0x0B3F)
-               PAIR(0x0B41, 0x0B43)
-               PAIR(0x0B4D, 0x0B4D)
-               PAIR(0x0B56, 0x0B56)
-               PAIR(0x0B82, 0x0B82)
-               PAIR(0x0BC0, 0x0BC0)
-               PAIR(0x0BCD, 0x0BCD)
-               PAIR(0x0C3E, 0x0C40)
-               PAIR(0x0C46, 0x0C48)
-               PAIR(0x0C4A, 0x0C4D)
-               PAIR(0x0C55, 0x0C56)
-               PAIR(0x0CBC, 0x0CBC)
-               PAIR(0x0CBF, 0x0CBF)
-               PAIR(0x0CC6, 0x0CC6)
-               PAIR(0x0CCC, 0x0CCD)
-               PAIR(0x0CE2, 0x0CE3)
-               PAIR(0x0D41, 0x0D43)
-               PAIR(0x0D4D, 0x0D4D)
-               PAIR(0x0DCA, 0x0DCA)
-               PAIR(0x0DD2, 0x0DD4)
-               PAIR(0x0DD6, 0x0DD6)
-               PAIR(0x0E31, 0x0E31)
-               BIG_(0x0E34, 0x0E3A)
-               BIG_(0x0E47, 0x0E4E)
-               PAIR(0x0EB1, 0x0EB1)
-               BIG_(0x0EB4, 0x0EB9)
-               PAIR(0x0EBB, 0x0EBC)
-               BIG_(0x0EC8, 0x0ECD)
-               PAIR(0x0F18, 0x0F19)
-               PAIR(0x0F35, 0x0F35)
-               PAIR(0x0F37, 0x0F37)
-               PAIR(0x0F39, 0x0F39)
-               BIG_(0x0F71, 0x0F7E)
-               BIG_(0x0F80, 0x0F84)
-               PAIR(0x0F86, 0x0F87)
-               PAIR(0x0FC6, 0x0FC6)
-               BIG_(0x0F90, 0x0F97)
-               BIG_(0x0F99, 0x0FBC)
-               PAIR(0x102D, 0x1030)
-               PAIR(0x1032, 0x1032)
-               PAIR(0x1036, 0x1037)
-               PAIR(0x1039, 0x1039)
-               PAIR(0x1058, 0x1059)
-               BIG_(0x1160, 0x11FF)
-               PAIR(0x135F, 0x135F)
-               PAIR(0x1712, 0x1714)
-               PAIR(0x1732, 0x1734)
-               PAIR(0x1752, 0x1753)
-               PAIR(0x1772, 0x1773)
-               PAIR(0x17B4, 0x17B5)
-               BIG_(0x17B7, 0x17BD)
-               PAIR(0x17C6, 0x17C6)
-               BIG_(0x17C9, 0x17D3)
-               PAIR(0x17DD, 0x17DD)
-               PAIR(0x180B, 0x180D)
-               PAIR(0x18A9, 0x18A9)
-               PAIR(0x1920, 0x1922)
-               PAIR(0x1927, 0x1928)
-               PAIR(0x1932, 0x1932)
-               PAIR(0x1939, 0x193B)
-               PAIR(0x1A17, 0x1A18)
-               PAIR(0x1B00, 0x1B03)
-               PAIR(0x1B34, 0x1B34)
-               BIG_(0x1B36, 0x1B3A)
-               PAIR(0x1B3C, 0x1B3C)
-               PAIR(0x1B42, 0x1B42)
-               BIG_(0x1B6B, 0x1B73)
-               BIG_(0x1DC0, 0x1DCA)
-               PAIR(0x1DFE, 0x1DFF)
-               BIG_(0x200B, 0x200F)
-               BIG_(0x202A, 0x202E)
-               PAIR(0x2060, 0x2063)
-               BIG_(0x206A, 0x206F)
-               BIG_(0x20D0, 0x20EF)
-               BIG_(0x302A, 0x302F)
-               PAIR(0x3099, 0x309A)
-#undef BIG_
-#undef PAIR
-       };
-       struct CHECK {
-#define BIG_(a,b) char big##a[b-a <= 3 ? -1 : 1];
-#define PAIR(a,b) char pair##a[b-a > 3 ? -1 : 1];
-               /* Copy-n-paste it here again to verify correctness */
-#undef BIG_
-#undef PAIR
-       };
-#endif
-
-       if (ucs == 0)
-               return 0;
-
-       /* Test for 8-bit control characters (00-1f, 80-9f, 7f) */
-       if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
-               return -1;
-       /* Quick abort if it is an obviously invalid char */
-       if (ucs > LAST_SUPPORTED_WCHAR)
-               return -1;
-
-       /* Optimization: no combining chars below 0x300 */
-       if (LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
-               return 1;
-
-#if LAST_SUPPORTED_WCHAR >= 0x300
-       /* Binary search in table of non-spacing characters */
-       if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
-               return 0;
-       if (in_uint16_table(ucs, combining1, ARRAY_SIZE(combining1) - 1))
-               return 0;
-
-       /* Optimization: all chars below 0x1100 are not double-width */
-       if (LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
-               return 1;
-
-# if LAST_SUPPORTED_WCHAR >= 0x1100
-       /* Invalid code points: */
-       /* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */
-       /* Private Use Area (e000..f8ff) */
-       /* Noncharacters fdd0..fdef */
-       if ((LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
-        || (LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
-       ) {
-               return -1;
-       }
-       /* 0xfffe and 0xffff in every plane are invalid */
-       if (LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
-               return -1;
-       }
-
-#  if LAST_SUPPORTED_WCHAR >= 0x10000
-       if (ucs >= 0x10000) {
-               /* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
-               static const struct interval combining0x10000[] = {
-                       { 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
-                       { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
-                       { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
-                       { 0xD242, 0xD244 }
-               };
-               /* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */
-               if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
-                       return 0;
-               /* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
-               if (LAST_SUPPORTED_WCHAR >= 0xE0001
-                && (  ucs == 0xE0001
-                   || (ucs >= 0xE0020 && ucs <= 0xE007F)
-                   || (ucs >= 0xE0100 && ucs <= 0xE01EF)
-                   )
-               ) {
-                       return 0;
-               }
-       }
-#  endif
-
-       /* If we arrive here, ucs is not a combining or C0/C1 control character.
-        * Check whether it's 1 char or 2-shar wide.
-        */
-       return 1 +
-               (  (/*ucs >= 0x1100 &&*/ ucs <= 0x115f) /* Hangul Jamo init. consonants */
-               || ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */
-               || ucs == 0x232a /* right-pointing angle bracket; also CJK punct. char */
-               || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */
-#  if LAST_SUPPORTED_WCHAR >= 0xac00
-               || (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */
-               || (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */
-               || (ucs >= 0xfe10 && ucs <= 0xfe19) /* Vertical forms */
-               || (ucs >= 0xfe30 && ucs <= 0xfe6f) /* CJK Compatibility Forms */
-               || (ucs >= 0xff00 && ucs <= 0xff60) /* Fullwidth Forms */
-               || (ucs >= 0xffe0 && ucs <= 0xffe6)
-               || ((ucs >> 17) == (2 >> 1)) /* 20000..3ffff: Supplementary and Tertiary Ideographic Planes */
-#  endif
-               );
-# endif /* >= 0x1100 */
-#endif /* >= 0x300 */
-}
diff --git a/modutils/lsmod.c b/modutils/lsmod.c

index 50621c24553dc874b2184bbc27c94b82f7eeca02..97954c71f0ea02ac15ffb9da9f7dec2bde263c0a 100644 (file)
--- a/modutils/lsmod.c
+++ b/modutils/lsmod.c
@@ -60,7 +60,7 @@ int lsmod_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
                                 token[3][strlen(token[3])-1] = '\0';
                         } else
                                 token[3] = (char *) "";
                                 token[3][strlen(token[3])-1] = '\0';
                         } else
                                 token[3] = (char *) "";
-# if ENABLE_FEATURE_ASSUME_UNICODE
+# if ENABLE_UNICODE_SUPPORT
                         {
                                 uni_stat_t uni_stat;
                                 char *uni_name = unicode_conv_to_printable(&uni_stat, token[0]);
                         {
                                 uni_stat_t uni_stat;
                                 char *uni_name = unicode_conv_to_printable(&uni_stat, token[0]);
@@ -78,7 +78,7 @@ int lsmod_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
                         // or comma-separated list ended by comma
                         // so trimming the trailing char is just what we need!
                         token[3][strlen(token[3])-1] = '\0';
                         // or comma-separated list ended by comma
                         // so trimming the trailing char is just what we need!
                         token[3][strlen(token[3])-1] = '\0';
-# if ENABLE_FEATURE_ASSUME_UNICODE
+# if ENABLE_UNICODE_SUPPORT
                         {
                                 uni_stat_t uni_stat;
                                 char *uni_name = unicode_conv_to_printable(&uni_stat, token[0]);
                         {
                                 uni_stat_t uni_stat;
                                 char *uni_name = unicode_conv_to_printable(&uni_stat, token[0]);
diff --git a/networking/udhcp/dumpleases.c b/networking/udhcp/dumpleases.c

index 6ebda94b67360c80d9c4be0c856de5037e3f43bc..fb6219fba2b772a431c8c5f786ec8b83cdc7e2cc 100644 (file)
--- a/networking/udhcp/dumpleases.c
+++ b/networking/udhcp/dumpleases.c
@@ -66,7 +66,7 @@ int dumpleases_main(int argc UNUSED_PARAM, char **argv)
                         fmt = ":%02x";
                 }
                 addr.s_addr = lease.lease_nip;
                         fmt = ":%02x";
                 }
                 addr.s_addr = lease.lease_nip;
-#if ENABLE_FEATURE_ASSUME_UNICODE
+#if ENABLE_UNICODE_SUPPORT
                 {
                         char *uni_name = unicode_conv_to_printable_fixedwidth(NULL, lease.hostname, 19);
                         printf(" %-16s%s ", inet_ntoa(addr), uni_name);
                 {
                         char *uni_name = unicode_conv_to_printable_fixedwidth(NULL, lease.hostname, 19);
                         printf(" %-16s%s ", inet_ntoa(addr), uni_name);
diff --git a/scripts/defconfig b/scripts/defconfig

index 49158ceca013120560251a77a21b252e5720756d..d13f5b1b4a72c36d6eebe46ca71c25ff0dfee1b4 100644 (file)
--- a/scripts/defconfig
+++ b/scripts/defconfig
@@ -24,7 +24,7 @@ CONFIG_FEATURE_VERBOSE_USAGE=y
  CONFIG_FEATURE_COMPRESS_USAGE=y
  CONFIG_FEATURE_INSTALLER=y
  CONFIG_LOCALE_SUPPORT=y
  CONFIG_FEATURE_COMPRESS_USAGE=y
  CONFIG_FEATURE_INSTALLER=y
  CONFIG_LOCALE_SUPPORT=y
-CONFIG_FEATURE_ASSUME_UNICODE=y
+CONFIG_UNICODE_SUPPORT=y
  # CONFIG_FEATURE_CHECK_UNICODE_IN_ENV is not set
  CONFIG_LONG_OPTS=y
  CONFIG_FEATURE_DEVPTS=y
  # CONFIG_FEATURE_CHECK_UNICODE_IN_ENV is not set
  CONFIG_LONG_OPTS=y
  CONFIG_FEATURE_DEVPTS=y
diff --git a/scripts/randomtest b/scripts/randomtest

index 2a30cb638bc4b7ad41b30618fc54afde27d0ffde..6b7db92391d73b817f5fc6ffff4e1d0619de49a6 100755 (executable)
--- a/scripts/randomtest
+++ b/scripts/randomtest
@@ -50,7 +50,7 @@ cat .config \
  | grep -v ^CONFIG_BUILD_LIBBUSYBOX= \
  | grep -v ^CONFIG_PAM= \
  | grep -v ^CONFIG_TASKSET= \
  | grep -v ^CONFIG_BUILD_LIBBUSYBOX= \
  | grep -v ^CONFIG_PAM= \
  | grep -v ^CONFIG_TASKSET= \
-| grep -v ^CONFIG_FEATURE_ASSUME_UNICODE= \
+| grep -v ^CONFIG_UNICODE_SUPPORT= \
  | grep -v ^CONFIG_PIE= \
  | grep -v CONFIG_STATIC \
  | grep -v CONFIG_CROSS_COMPILER_PREFIX \
  | grep -v ^CONFIG_PIE= \
  | grep -v CONFIG_STATIC \
  | grep -v CONFIG_CROSS_COMPILER_PREFIX \
diff --git a/testsuite/cal.tests b/testsuite/cal.tests

index 36be2b4b575e36842a7430bb9243ad8d3d60797b..30985688b895c01b0f4050294f7b0a25bf61cee6 100755 (executable)
--- a/testsuite/cal.tests
+++ b/testsuite/cal.tests
@@ -20,7 +20,7 @@ Su Mo Tu We Th Fr Sa
  " "" ""
  
  test x"$CONFIG_LOCALE_SUPPORT" = x"y" \
  " "" ""
  
  test x"$CONFIG_LOCALE_SUPPORT" = x"y" \
-&& test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
+&& test x"$CONFIG_UNICODE_SUPPORT" = x"y" \
  && test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"0" \
  && test x"$CONFIG_UNICODE_WIDE_WCHARS" = x"y" \
  && test x"$CONFIG_STATIC" != x"y" \
  && test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"0" \
  && test x"$CONFIG_UNICODE_WIDE_WCHARS" = x"y" \
  && test x"$CONFIG_STATIC" != x"y" \
diff --git a/testsuite/ls.tests b/testsuite/ls.tests

index 169313a631d129a925815ed1a97a8c10abf24de4..0680762fce1c9debaec168c340658033ab02f879 100755 (executable)
--- a/testsuite/ls.tests
+++ b/testsuite/ls.tests
@@ -14,7 +14,7 @@ mkdir ls.testdir || exit 1
  # With Unicode provided by libc locale, I'm not sure this test can pass.
  # I suspect we might fail to skip exactly correct number of bytes
  # over broked unicode sequences.
  # With Unicode provided by libc locale, I'm not sure this test can pass.
  # I suspect we might fail to skip exactly correct number of bytes
  # over broked unicode sequences.
-test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
+test x"$CONFIG_UNICODE_SUPPORT" = x"y" \
  && test x"$CONFIG_LOCALE_SUPPORT" != x"y" \
  && test x"$CONFIG_SUBST_WCHAR" = x"63" \
  && test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"767" \
  && test x"$CONFIG_LOCALE_SUPPORT" != x"y" \
  && test x"$CONFIG_SUBST_WCHAR" = x"63" \
  && test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"767" \
@@ -133,7 +133,7 @@ test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
  ' "" ""
  
  # Currently fails on "0080_4.2.2__U-000007FF_=_e0_9f_bf" line
  ' "" ""
  
  # Currently fails on "0080_4.2.2__U-000007FF_=_e0_9f_bf" line
-test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
+test x"$CONFIG_UNICODE_SUPPORT" = x"y" \
  && test x"$CONFIG_LOCALE_SUPPORT" != x"y" \
  && test x"$CONFIG_SUBST_WCHAR" = x"63" \
  && test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"0" \
  && test x"$CONFIG_LOCALE_SUPPORT" != x"y" \
  && test x"$CONFIG_SUBST_WCHAR" = x"63" \
  && test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"0" \
author	Denys Vlasenko <vda.linux@googlemail.com>
	Fri, 26 Mar 2010 13:06:56 +0000 (14:06 +0100)
committer	Denys Vlasenko <vda.linux@googlemail.com>
	Fri, 26 Mar 2010 13:06:56 +0000 (14:06 +0100)
Config.in		patch \| blob \| history
TODO		patch \| blob \| history
TODO_config_nommu		patch \| blob \| history
coreutils/cal.c		patch \| blob \| history
coreutils/df.c		patch \| blob \| history
coreutils/expand.c		patch \| blob \| history
include/unicode.h		patch \| blob \| history
libbb/Kbuild		patch \| blob \| history
libbb/lineedit.c		patch \| blob \| history
libbb/printable_string.c		patch \| blob \| history
libbb/progress.c		patch \| blob \| history
libbb/unicode.c		patch \| blob \| history
libbb/unicode_wcwidth.c	[deleted file]	patch \| blob \| history
modutils/lsmod.c		patch \| blob \| history
networking/udhcp/dumpleases.c		patch \| blob \| history
scripts/defconfig		patch \| blob \| history
scripts/randomtest		patch \| blob \| history
testsuite/cal.tests		patch \| blob \| history
testsuite/ls.tests		patch \| blob \| history