ash: fix matching of unicode greek letter rho (cf 81) and similar cases

author Denys Vlasenko <vda.linux@googlemail.com>

Wed, 5 Jul 2017 17:10:21 +0000 (19:10 +0200)

committer Denys Vlasenko <vda.linux@googlemail.com>

Wed, 5 Jul 2017 17:10:21 +0000 (19:10 +0200)
author Denys Vlasenko <vda.linux@googlemail.com>
Wed, 5 Jul 2017 17:10:21 +0000 (19:10 +0200)
committer Denys Vlasenko <vda.linux@googlemail.com>
Wed, 5 Jul 2017 17:10:21 +0000 (19:10 +0200)
diff --git a/shell/ash.c b/shell/ash.c

index 6d46e3719bba33e03fd36f67315264bc12526178..e5fdd1646d577c3f0ccd0a48c595333e4ca1bb39 100644 (file)
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -5913,6 +5913,7 @@ rmescapes(char *str, int flag)
         while (*p) {
                 if ((unsigned char)*p == CTLQUOTEMARK) {
  // Note: both inquotes and protect_against_glob only affect whether
+// CTLESC,<ch> gets converted to <ch> or to \<ch>
                         inquotes = ~inquotes;
                         p++;
                         protect_against_glob = globbing;
@@ -5925,7 +5926,27 @@ rmescapes(char *str, int flag)
                                 ash_msg_and_raise_error("CTLESC at EOL (shouldn't happen)");
  #endif
                         if (protect_against_glob) {
-                               *q++ = '\\';
+                               /*
+                                * We used to trust glob() and fnmatch() to eat
+                                * superfluous escapes (\z where z has no
+                                * special meaning anyway). But this causes
+                                * bugs such as string of one greek letter rho
+                                * (unicode-encoded as two bytes 'cf,81")
+                                * getting encoded as "cf,CTLESC,81"
+                                * and here, converted to "cf,\,81" -
+                                * which does not go well with some flavors
+                                * of fnmatch() in unicode locales.
+                                *
+                                * Lets add "\" only on the chars which need it.
+                                */
+                               if (*p == '*'
+                                || *p == '?'
+                                || *p == '['
+                               /* || *p == ']' maybe also this? */
+                                || *p == '\\'
+                               ) {
+                                       *q++ = '\\';
+                               }
                         }
                 } else if (*p == '\\' && !inquotes) {
                         /* naked back slash */
diff --git a/shell/ash_test/ash-quoting/unicode_8x_chars.right b/shell/ash_test/ash-quoting/unicode_8x_chars.right

new file mode 100644 (file)

index 0000000..7780b88
--- /dev/null
+++ b/shell/ash_test/ash-quoting/unicode_8x_chars.right
@@ -0,0 +1,6 @@
+ok
+ok
+ok
+ok
+ok
+ok
diff --git a/shell/ash_test/ash-quoting/unicode_8x_chars.tests b/shell/ash_test/ash-quoting/unicode_8x_chars.tests

new file mode 100755 (executable)

index 0000000..1258745
--- /dev/null
+++ b/shell/ash_test/ash-quoting/unicode_8x_chars.tests
@@ -0,0 +1,28 @@
+# Unicode: cf 80
+case π in
+( "π" ) echo ok ;;
+( * )   echo WRONG ;;
+esac
+# Unicode: cf 81
+case ρ in
+( "ρ" ) echo ok ;;
+( * )   echo WRONG ;;
+esac
+# Unicode: cf 82
+case ς in
+( "ς" ) echo ok ;;
+( * )   echo WRONG ;;
+esac
+
+case "π" in
+( π ) echo ok ;;
+( * ) echo WRONG ;;
+esac
+case "ρ" in
+( ρ ) echo ok ;;
+( * ) echo WRONG ;;
+esac
+case "ς" in
+( ς ) echo ok ;;
+( * ) echo WRONG ;;
+esac
diff --git a/shell/hush_test/hush-quoting/unicode_8x_chars.right b/shell/hush_test/hush-quoting/unicode_8x_chars.right

new file mode 100644 (file)

index 0000000..7780b88
--- /dev/null
+++ b/shell/hush_test/hush-quoting/unicode_8x_chars.right
@@ -0,0 +1,6 @@
+ok
+ok
+ok
+ok
+ok
+ok
diff --git a/shell/hush_test/hush-quoting/unicode_8x_chars.tests b/shell/hush_test/hush-quoting/unicode_8x_chars.tests

new file mode 100755 (executable)

index 0000000..1258745
--- /dev/null
+++ b/shell/hush_test/hush-quoting/unicode_8x_chars.tests
@@ -0,0 +1,28 @@
+# Unicode: cf 80
+case π in
+( "π" ) echo ok ;;
+( * )   echo WRONG ;;
+esac
+# Unicode: cf 81
+case ρ in
+( "ρ" ) echo ok ;;
+( * )   echo WRONG ;;
+esac
+# Unicode: cf 82
+case ς in
+( "ς" ) echo ok ;;
+( * )   echo WRONG ;;
+esac
+
+case "π" in
+( π ) echo ok ;;
+( * ) echo WRONG ;;
+esac
+case "ρ" in
+( ρ ) echo ok ;;
+( * ) echo WRONG ;;
+esac
+case "ς" in
+( ς ) echo ok ;;
+( * ) echo WRONG ;;
+esac
author	Denys Vlasenko <vda.linux@googlemail.com>
	Wed, 5 Jul 2017 17:10:21 +0000 (19:10 +0200)
committer	Denys Vlasenko <vda.linux@googlemail.com>
	Wed, 5 Jul 2017 17:10:21 +0000 (19:10 +0200)
shell/ash.c		patch \| blob \| history
shell/ash_test/ash-quoting/unicode_8x_chars.right	[new file with mode: 0644]	patch \| blob
shell/ash_test/ash-quoting/unicode_8x_chars.tests	[new file with mode: 0755]	patch \| blob
shell/hush_test/hush-quoting/unicode_8x_chars.right	[new file with mode: 0644]	patch \| blob
shell/hush_test/hush-quoting/unicode_8x_chars.tests	[new file with mode: 0755]	patch \| blob