ash: [PARSER] Handle backslash newlines properly after dollar sign
authorDenys Vlasenko <vda.linux@googlemail.com>
Thu, 29 Sep 2016 15:17:04 +0000 (17:17 +0200)
committerDenys Vlasenko <vda.linux@googlemail.com>
Thu, 29 Sep 2016 15:17:04 +0000 (17:17 +0200)
Fixes var_unbackslash1.tests failure.

Upstream commit:

    [PARSER] Handle backslash newlines properly after dollar sign

    On Tue, Aug 26, 2014 at 12:34:42PM +0000, Eric Blake wrote:
    > On 08/26/2014 06:15 AM, Oleg Bulatov wrote:
    > > While playing with sh generators I found that dash and bash have different
    > > interpretations for <slash><newline> sequence.
    > >
    > > $ dash -c 'EDIT=xxx; echo $EDIT\
    > >> OR'
    > > xxxOR
    >
    > Buggy.
    > >
    > > $ dash -c 'echo "$\
    > > (pwd)"'
    > > $(pwd)
    > >
    > > Is it undefined behaviour in POSIX?
    >
    > No, it's well-defined, and dash is buggy.
    ...

    I agree.  This patch should resolve this problem and similar ones
    affecting blackslash newlines after we encounter a dollar sign.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
shell/ash.c

index a63d40d7e6f7ecf1f8547579980c1e8923ec84d1..b71fc02990d47ea45fc2f8c92d14486432e27acb 100644 (file)
@@ -9774,11 +9774,6 @@ popstring(void)
        INT_ON;
 }
 
-//FIXME: BASH_COMPAT with "...&" does TWO pungetc():
-//it peeks whether it is &>, and then pushes back both chars.
-//This function needs to save last *next_to_pgetc to buf[0]
-//to make two pungetc() reliable. Currently,
-// pgetc (out of buf: does preadfd), pgetc, pungetc, pungetc won't work...
 static int
 preadfd(void)
 {
@@ -10037,6 +10032,25 @@ pungetc(void)
        g_parsefile->unget++;
 }
 
+/* This one eats backslash+newline */
+static int
+pgetc_eatbnl(void)
+{
+       int c;
+
+       while ((c = pgetc()) == '\\') {
+               if (pgetc() != '\n') {
+                       pungetc();
+                       break;
+               }
+
+               g_parsefile->linno++;
+               setprompt_if(doprompt, 2);
+       }
+
+       return c;
+}
+
 /*
  * To handle the "." command, a stack of input files is used.  Pushfile
  * adds a new entry to the stack and popfile restores the previous level.
@@ -11625,7 +11639,7 @@ parsesub: {
        int typeloc;
        int flags;
 
-       c = pgetc();
+       c = pgetc_eatbnl();
        if (c > 255 /* PEOA or PEOF */
         || (c != '(' && c != '{' && !is_name(c) && !is_special(c))
        ) {
@@ -11638,7 +11652,7 @@ parsesub: {
                pungetc();
        } else if (c == '(') {
                /* $(command) or $((arith)) */
-               if (pgetc() == '(') {
+               if (pgetc_eatbnl() == '(') {
 #if ENABLE_SH_MATH_SUPPORT
                        PARSEARITH();
 #else
@@ -11655,9 +11669,9 @@ parsesub: {
                USTPUTC(VSNORMAL, out);
                subtype = VSNORMAL;
                if (c == '{') {
-                       c = pgetc();
+                       c = pgetc_eatbnl();
                        if (c == '#') {
-                               c = pgetc();
+                               c = pgetc_eatbnl();
                                if (c == '}')
                                        c = '#'; /* ${#} - same as $# */
                                else
@@ -11670,18 +11684,18 @@ parsesub: {
                        /* $[{[#]]NAME[}] */
                        do {
                                STPUTC(c, out);
-                               c = pgetc();
+                               c = pgetc_eatbnl();
                        } while (c <= 255 /* not PEOA or PEOF */ && is_in_name(c));
                } else if (isdigit(c)) {
                        /* $[{[#]]NUM[}] */
                        do {
                                STPUTC(c, out);
-                               c = pgetc();
+                               c = pgetc_eatbnl();
                        } while (isdigit(c));
                } else if (is_special(c)) {
                        /* $[{[#]]<specialchar>[}] */
                        USTPUTC(c, out);
-                       c = pgetc();
+                       c = pgetc_eatbnl();
                } else {
  badsub:
                        raise_error_syntax("bad substitution");
@@ -11699,7 +11713,7 @@ parsesub: {
                        /* c == first char after VAR */
                        switch (c) {
                        case ':':
-                               c = pgetc();
+                               c = pgetc_eatbnl();
 #if ENABLE_ASH_BASH_COMPAT
                                /* This check is only needed to not misinterpret
                                 * ${VAR:-WORD}, ${VAR:+WORD}, ${VAR:=WORD}, ${VAR:?WORD}
@@ -11724,7 +11738,7 @@ parsesub: {
                        case '#': {
                                int cc = c;
                                subtype = (c == '#' ? VSTRIMLEFT : VSTRIMRIGHT);
-                               c = pgetc();
+                               c = pgetc_eatbnl();
                                if (c != cc)
                                        goto do_pungetc;
                                subtype++;
@@ -11736,7 +11750,7 @@ parsesub: {
 //TODO: encode pattern and repl separately.
 // Currently ${v/$var_with_slash/repl} is horribly broken
                                subtype = VSREPLACE;
-                               c = pgetc();
+                               c = pgetc_eatbnl();
                                if (c != '/')
                                        goto do_pungetc;
                                subtype++; /* VSREPLACEALL */