hush: make ${#var} unicode-aware
authorDenys Vlasenko <vda.linux@googlemail.com>
Wed, 13 Aug 2014 07:57:44 +0000 (09:57 +0200)
committerDenys Vlasenko <vda.linux@googlemail.com>
Wed, 13 Aug 2014 07:57:44 +0000 (09:57 +0200)
This mimics bash

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
shell/hush.c
shell/hush_test/hush-misc/unicode1.right [new file with mode: 0644]
shell/hush_test/hush-misc/unicode1.tests [new file with mode: 0755]

index e1d0ece29f7b9732a57f60a61c7bcad1aae99f0b..7d35471102b37a1ac99dc83b97612f9f4b56cda0 100644 (file)
@@ -1976,6 +1976,22 @@ static struct variable *set_vars_and_save_old(char **strings)
 }
 
 
+/*
+ * Unicode helper
+ */
+static void reinit_unicode_for_hush(void)
+{
+       /* Unicode support should be activated even if LANG is set
+        * _during_ shell execution, not only if it was set when
+        * shell was started. Therefore, re-check LANG every time:
+        */
+       const char *s = get_local_var_value("LC_ALL");
+       if (!s) s = get_local_var_value("LC_CTYPE");
+       if (!s) s = get_local_var_value("LANG");
+       reinit_unicode(s);
+}
+
+
 /*
  * in_str support
  */
@@ -2042,15 +2058,7 @@ static void get_user_input(struct in_str *i)
        /* Enable command line editing only while a command line
         * is actually being read */
        do {
-               /* Unicode support should be activated even if LANG is set
-                * _during_ shell execution, not only if it was set when
-                * shell was started. Therefore, re-check LANG every time:
-                */
-               const char *s = get_local_var_value("LC_ALL");
-               if (!s) s = get_local_var_value("LC_CTYPE");
-               if (!s) s = get_local_var_value("LANG");
-               reinit_unicode(s);
-
+               reinit_unicode_for_hush();
                G.flag_SIGINT = 0;
                /* buglet: SIGINT will not make new prompt to appear _at once_,
                 * only after <Enter>. (^C will work) */
@@ -5028,8 +5036,9 @@ static NOINLINE const char *expand_one_var(char **to_be_freed_pp, char *arg, cha
 
        /* Handle any expansions */
        if (exp_op == 'L') {
+               reinit_unicode_for_hush();
                debug_printf_expand("expand: length(%s)=", val);
-               val = utoa(val ? strlen(val) : 0);
+               val = utoa(val ? unicode_strlen(val) : 0);
                debug_printf_expand("%s\n", val);
        } else if (exp_op) {
                if (exp_op == '%' || exp_op == '#') {
diff --git a/shell/hush_test/hush-misc/unicode1.right b/shell/hush_test/hush-misc/unicode1.right
new file mode 100644 (file)
index 0000000..d3bbbf6
--- /dev/null
@@ -0,0 +1,3 @@
+1
+1
+Ok
diff --git a/shell/hush_test/hush-misc/unicode1.tests b/shell/hush_test/hush-misc/unicode1.tests
new file mode 100755 (executable)
index 0000000..8788ba9
--- /dev/null
@@ -0,0 +1,13 @@
+LANG=en_US.UTF-8
+
+# A combining character U+300
+a=`printf "\xcc\x80"`
+# Should print 1
+echo ${#a}
+
+# A Japanese katakana charachter U+30a3
+a=`printf "\xe3\x82\xa3"`
+# Should print 1
+echo ${#a}
+
+echo Ok