wget: more thorough sanitization of other side's data
authorDenys Vlasenko <vda.linux@googlemail.com>
Mon, 12 Feb 2018 15:46:13 +0000 (16:46 +0100)
committerDenys Vlasenko <vda.linux@googlemail.com>
Mon, 12 Feb 2018 15:46:13 +0000 (16:46 +0100)
function                                             old     new   delta
get_sanitized_hdr                                      -     156    +156
fgets_trim_sanitize                                    -     128    +128
ftpcmd                                               129     133      +4
parse_url                                            461     454      -7
sanitize_string                                       14       -     -14
wget_main                                           2431    2381     -50
fgets_and_trim                                       119       -    -119
gethdr                                               163       -    -163
------------------------------------------------------------------------------
(add/remove: 2/3 grow/shrink: 1/2 up/down: 288/-353)          Total: -65 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
networking/wget.c

index 95b88ad37129cce803fef1dacbf8156dfcf4d99b..3a5d6817387e3c6fb080920daca795d7c8abd556 100644 (file)
@@ -352,15 +352,6 @@ static char *base64enc(const char *str)
 }
 #endif
 
-static char* sanitize_string(char *s)
-{
-       unsigned char *p = (void *) s;
-       while (*p >= ' ')
-               p++;
-       *p = '\0';
-       return s;
-}
-
 #if ENABLE_FEATURE_WGET_TIMEOUT
 static void alarm_handler(int sig UNUSED_PARAM)
 {
@@ -423,22 +414,49 @@ static FILE *open_socket(len_and_sockaddr *lsa)
        return fp;
 }
 
+/* We balk at any control chars in other side's messages.
+ * This prevents nasty surprises (e.g. ESC sequences) in "Location:" URLs
+ * and error messages.
+ *
+ * The only exception is tabs, which are converted to (one) space:
+ * HTTP's "headers: <whitespace> values" may have those.
+ */
+static char* sanitize_string(char *s)
+{
+       unsigned char *p = (void *) s;
+       while (*p) {
+               if (*p < ' ') {
+                       if (*p != '\t')
+                               break;
+                       *p = ' ';
+               }
+               p++;
+       }
+       *p = '\0';
+       return s;
+}
+
 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
-static char fgets_and_trim(FILE *fp, const char *fmt)
+static char fgets_trim_sanitize(FILE *fp, const char *fmt)
 {
        char c;
        char *buf_ptr;
 
        set_alarm();
-       if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
+       if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
                bb_perror_msg_and_die("error getting response");
        clear_alarm();
 
        buf_ptr = strchrnul(G.wget_buf, '\n');
        c = *buf_ptr;
+#if 1
+       /* Disallow any control chars: trim at first char < 0x20 */
+       sanitize_string(G.wget_buf);
+#else
        *buf_ptr = '\0';
        buf_ptr = strchrnul(G.wget_buf, '\r');
        *buf_ptr = '\0';
+#endif
 
        log_io("< %s", G.wget_buf);
 
@@ -462,8 +480,10 @@ static int ftpcmd(const char *s1, const char *s2, FILE *fp)
                log_io("> %s%s", s1, s2);
        }
 
+       /* Read until "Nxx something" is received */
+       G.wget_buf[3] = 0;
        do {
-               fgets_and_trim(fp, "%s\n");
+               fgets_trim_sanitize(fp, "%s\n");
        } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
 
        G.wget_buf[3] = '\0';
@@ -505,7 +525,7 @@ static void parse_url(const char *src_url, struct host_info *h)
                        h->protocol = P_HTTP;
                } else {
                        *p = ':';
-                       bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
+                       bb_error_msg_and_die("not an http or ftp url: %s", url);
                }
        } else {
                // GNU wget is user-friendly and falls back to http://
@@ -560,13 +580,13 @@ static void parse_url(const char *src_url, struct host_info *h)
         */
 }
 
-static char *gethdr(FILE *fp)
+static char *get_sanitized_hdr(FILE *fp)
 {
        char *s, *hdrval;
        int c;
 
        /* retrieve header line */
-       c = fgets_and_trim(fp, "  %s\n");
+       c = fgets_trim_sanitize(fp, "  %s\n");
 
        /* end of the headers? */
        if (G.wget_buf[0] == '\0')
@@ -588,7 +608,7 @@ static char *gethdr(FILE *fp)
 
        /* verify we are at the end of the header name */
        if (*s != ':')
-               bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
+               bb_error_msg_and_die("bad header line: %s", G.wget_buf);
 
        /* locate the start of the header value */
        *s++ = '\0';
@@ -755,7 +775,8 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
 #endif
 
        if (ftpcmd(NULL, NULL, sfp) != 220)
-               bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
+               bb_error_msg_and_die("%s", G.wget_buf);
+               /* note: ftpcmd() sanitizes G.wget_buf, ok to print */
 
        /*
         * Splitting username:password pair,
@@ -772,7 +793,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
                        break;
                /* fall through (failed login) */
        default:
-               bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
+               bb_error_msg_and_die("ftp login: %s", G.wget_buf);
        }
 
        ftpcmd("TYPE I", NULL, sfp);
@@ -796,7 +817,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
        } else
        if (ftpcmd("PASV", NULL, sfp) != 227) {
  pasv_error:
-               bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
+               bb_error_msg_and_die("bad response to %s: %s", "PASV", G.wget_buf);
        }
        port = parse_pasv_epsv(G.wget_buf);
        if (port < 0)
@@ -824,8 +845,11 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
                        reset_beg_range_to_zero();
        }
 
+//TODO: needs ftp-escaping 0xff and '\n' bytes here.
+//Or disallow '\n' altogether via sanitize_string() in parse_url().
+//But 0xff's are possible in valid utf8 filenames.
        if (ftpcmd("RETR ", target->path, sfp) > 150)
-               bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
+               bb_error_msg_and_die("bad response to %s: %s", "RETR", G.wget_buf);
 
        return sfp;
 }
@@ -946,9 +970,9 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
                if (!G.chunked)
                        break;
 
-               fgets_and_trim(dfp, NULL); /* Eat empty line */
+               fgets_trim_sanitize(dfp, NULL); /* Eat empty line */
  get_clen:
-               fgets_and_trim(dfp, NULL);
+               fgets_trim_sanitize(dfp, NULL);
                G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
                /* FIXME: error check? */
                if (G.content_len == 0)
@@ -1178,7 +1202,7 @@ static void download_one_url(const char *url)
                 * Retrieve HTTP response line and check for "200" status code.
                 */
  read_response:
-               fgets_and_trim(sfp, "  %s\n");
+               fgets_trim_sanitize(sfp, "  %s\n");
 
                str = G.wget_buf;
                str = skip_non_whitespace(str);
@@ -1189,7 +1213,7 @@ static void download_one_url(const char *url)
                switch (status) {
                case 0:
                case 100:
-                       while (gethdr(sfp) != NULL)
+                       while (get_sanitized_hdr(sfp) != NULL)
                                /* eat all remaining headers */;
                        goto read_response;
 
@@ -1253,13 +1277,13 @@ However, in real world it was observed that some web servers
                        /* Partial Content even though we did not ask for it??? */
                        /* fall through */
                default:
-                       bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
+                       bb_error_msg_and_die("server returned error: %s", G.wget_buf);
                }
 
                /*
                 * Retrieve HTTP headers.
                 */
-               while ((str = gethdr(sfp)) != NULL) {
+               while ((str = get_sanitized_hdr(sfp)) != NULL) {
                        static const char keywords[] ALIGN1 =
                                "content-length\0""transfer-encoding\0""location\0";
                        enum {
@@ -1267,7 +1291,7 @@ However, in real world it was observed that some web servers
                        };
                        smalluint key;
 
-                       /* gethdr converted "FOO:" string to lowercase */
+                       /* get_sanitized_hdr converted "FOO:" string to lowercase */
 
                        /* strip trailing whitespace */
                        char *s = strchrnul(str, '\0') - 1;
@@ -1279,14 +1303,14 @@ However, in real world it was observed that some web servers
                        if (key == KEY_content_length) {
                                G.content_len = BB_STRTOOFF(str, NULL, 10);
                                if (G.content_len < 0 || errno) {
-                                       bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
+                                       bb_error_msg_and_die("content-length %s is garbage", str);
                                }
                                G.got_clen = 1;
                                continue;
                        }
                        if (key == KEY_transfer_encoding) {
                                if (strcmp(str_tolower(str), "chunked") != 0)
-                                       bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
+                                       bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
                                G.chunked = 1;
                        }
                        if (key == KEY_location && status >= 300) {
@@ -1295,7 +1319,7 @@ However, in real world it was observed that some web servers
                                fclose(sfp);
                                if (str[0] == '/') {
                                        free(redirected_path);
-                                       target.path = redirected_path = xstrdup(str+1);
+                                       target.path = redirected_path = xstrdup(str + 1);
                                        /* lsa stays the same: it's on the same server */
                                } else {
                                        parse_url(str, &target);
@@ -1342,7 +1366,7 @@ However, in real world it was observed that some web servers
                /* It's ftp. Close data connection properly */
                fclose(dfp);
                if (ftpcmd(NULL, NULL, sfp) != 226)
-                       bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
+                       bb_error_msg_and_die("ftp error: %s", G.wget_buf);
                /* ftpcmd("QUIT", NULL, sfp); - why bother? */
        }
        fclose(sfp);