ifenslave: fix missing close paren
[oweals/busybox.git] / networking / wget.c
index 6c015dccca43f51a3b84d7094bab612de273f32d..4eafebe40119c704c39d9f8e64d0ef3c6e2c265c 100644 (file)
@@ -8,10 +8,39 @@
  * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
  * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
  */
+
+//usage:#define wget_trivial_usage
+//usage:       IF_FEATURE_WGET_LONG_OPTIONS(
+//usage:       "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
+//usage:       "       [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
+/* Since we ignore these opts, we don't show them in --help */
+/* //usage:    "       [--no-check-certificate] [--no-cache]" */
+//usage:       "       [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
+//usage:       )
+//usage:       IF_NOT_FEATURE_WGET_LONG_OPTIONS(
+//usage:       "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
+//usage:                       IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
+//usage:       )
+//usage:#define wget_full_usage "\n\n"
+//usage:       "Retrieve files via HTTP or FTP\n"
+//usage:     "\n       -s      Spider mode - only check file existence"
+//usage:     "\n       -c      Continue retrieval of aborted transfer"
+//usage:     "\n       -q      Quiet"
+//usage:     "\n       -P DIR  Save to DIR (default .)"
+//usage:       IF_FEATURE_WGET_TIMEOUT(
+//usage:     "\n       -T SEC  Network read timeout is SEC seconds"
+//usage:       )
+//usage:     "\n       -O FILE Save to FILE ('-' for stdout)"
+//usage:     "\n       -U STR  Use STR for User-Agent header"
+//usage:     "\n       -Y      Use proxy ('on' or 'off')"
+
 #include "libbb.h"
 
-//#define log_io(...) bb_error_msg(__VA_ARGS__)
-#define log_io(...) ((void)0)
+#if 0
+# define log_io(...) bb_error_msg(__VA_ARGS__)
+#else
+# define log_io(...) ((void)0)
+#endif
 
 
 struct host_info {
@@ -44,6 +73,8 @@ struct globals {
 #if ENABLE_FEATURE_WGET_TIMEOUT
        unsigned timeout_seconds;
 #endif
+       int output_fd;
+       int o_flags;
        smallint chunked;         /* chunked transfer encoding */
        smallint got_clen;        /* got content-length: from server  */
        /* Local downloads do benefit from big buffer.
@@ -90,8 +121,11 @@ static void progress_meter(int flag)
        if (flag == PROGRESS_START)
                bb_progress_init(&G.pmt, G.curfile);
 
-       bb_progress_update(&G.pmt, G.beg_range, G.transferred,
-                          G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
+       bb_progress_update(&G.pmt,
+                       G.beg_range,
+                       G.transferred,
+                       (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
+       );
 
        if (flag == PROGRESS_END) {
                bb_progress_free(&G.pmt);
@@ -268,8 +302,13 @@ static void parse_url(const char *src_url, struct host_info *h)
 
        sp = strrchr(h->host, '@');
        if (sp != NULL) {
-               h->user = h->host;
+               // URL-decode "user:password" string before base64-encoding:
+               // wget http://test:my%20pass@example.com should send
+               // Authorization: Basic dGVzdDpteSBwYXNz
+               // which decodes to "test:my pass".
+               // Standard wget and curl do this too.
                *sp = '\0';
+               h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
                h->host = sp + 1;
        }
 
@@ -281,8 +320,6 @@ static char *gethdr(FILE *fp)
        char *s, *hdrval;
        int c;
 
-       /* *istrunc = 0; */
-
        /* retrieve header line */
        c = fgets_and_trim(fp);
 
@@ -313,43 +350,14 @@ static char *gethdr(FILE *fp)
        return hdrval;
 }
 
-#if ENABLE_FEATURE_WGET_LONG_OPTIONS
-static char *URL_escape(const char *str)
+static void reset_beg_range_to_zero(void)
 {
-       /* URL encode, see RFC 2396 */
-       char *dst;
-       char *res = dst = xmalloc(strlen(str) * 3 + 1);
-       unsigned char c;
-
-       while (1) {
-               c = *str++;
-               if (c == '\0'
-               /* || strchr("!&'()*-.=_~", c) - more code */
-                || c == '!'
-                || c == '&'
-                || c == '\''
-                || c == '('
-                || c == ')'
-                || c == '*'
-                || c == '-'
-                || c == '.'
-                || c == '='
-                || c == '_'
-                || c == '~'
-                || (c >= '0' && c <= '9')
-                || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
-               ) {
-                       *dst++ = c;
-                       if (c == '\0')
-                               return res;
-               } else {
-                       *dst++ = '%';
-                       *dst++ = bb_hexdigits_upcase[c >> 4];
-                       *dst++ = bb_hexdigits_upcase[c & 0xf];
-               }
-       }
+       bb_error_msg("restart failed");
+       G.beg_range = 0;
+       xlseek(G.output_fd, 0, SEEK_SET);
+       /* Done at the end instead: */
+       /* ftruncate(G.output_fd, 0); */
 }
-#endif
 
 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
 {
@@ -414,14 +422,16 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
        str = strrchr(G.wget_buf, ',');
        if (!str) goto pasv_error;
        port += xatou_range(str+1, 0, 255) * 256;
-       set_nport(lsa, htons(port));
+       set_nport(&lsa->u.sa, htons(port));
 
        *dfpp = open_socket(lsa);
 
-       if (G.beg_range) {
+       if (G.beg_range != 0) {
                sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
                if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
                        G.content_len -= G.beg_range;
+               else
+                       reset_beg_range_to_zero();
        }
 
        if (ftpcmd("RETR ", target->path, sfp) > 150)
@@ -430,11 +440,11 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
        return sfp;
 }
 
-static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
+static void NOINLINE retrieve_file_data(FILE *dfp)
 {
 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 # if ENABLE_FEATURE_WGET_TIMEOUT
-       unsigned second_cnt;
+       unsigned second_cnt = G.timeout_seconds;
 # endif
        struct pollfd polldata;
 
@@ -455,7 +465,7 @@ static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
                 * which messes up progress bar and/or timeout logic.
                 * Because of nonblocking I/O, we need to dance
                 * very carefully around EAGAIN. See explanation at
-                * clearerr() call.
+                * clearerr() calls.
                 */
                ndelay_on(polldata.fd);
 #endif
@@ -463,32 +473,7 @@ static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
                        int n;
                        unsigned rdsz;
 
-                       rdsz = sizeof(G.wget_buf);
-                       if (G.got_clen) {
-                               if (G.content_len < (off_t)sizeof(G.wget_buf)) {
-                                       if ((int)G.content_len <= 0)
-                                               break;
-                                       rdsz = (unsigned)G.content_len;
-                               }
-                       }
-
 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
-# if ENABLE_FEATURE_WGET_TIMEOUT
-                       second_cnt = G.timeout_seconds;
-# endif
-                       while (1) {
-                               if (safe_poll(&polldata, 1, 1000) != 0)
-                                       break; /* error, EOF, or data is available */
-# if ENABLE_FEATURE_WGET_TIMEOUT
-                               if (second_cnt != 0 && --second_cnt == 0) {
-                                       progress_meter(PROGRESS_END);
-                                       bb_error_msg_and_die("download timed out");
-                               }
-# endif
-                               /* Needed for "stalled" indicator */
-                               progress_meter(PROGRESS_BUMP);
-                       }
-
                        /* fread internally uses read loop, which in our case
                         * is usually exited when we get EAGAIN.
                         * In this case, libc sets error marker on the stream.
@@ -498,36 +483,71 @@ static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
                         * into if (n <= 0) ...
                         */
                        clearerr(dfp);
-                       errno = 0;
 #endif
+                       errno = 0;
+                       rdsz = sizeof(G.wget_buf);
+                       if (G.got_clen) {
+                               if (G.content_len < (off_t)sizeof(G.wget_buf)) {
+                                       if ((int)G.content_len <= 0)
+                                               break;
+                                       rdsz = (unsigned)G.content_len;
+                               }
+                       }
                        n = fread(G.wget_buf, 1, rdsz, dfp);
-                       /* man fread:
+
+                       if (n > 0) {
+                               xwrite(G.output_fd, G.wget_buf, n);
+#if ENABLE_FEATURE_WGET_STATUSBAR
+                               G.transferred += n;
+#endif
+                               if (G.got_clen) {
+                                       G.content_len -= n;
+                                       if (G.content_len == 0)
+                                               break;
+                               }
+#if ENABLE_FEATURE_WGET_TIMEOUT
+                               second_cnt = G.timeout_seconds;
+#endif
+                               continue;
+                       }
+
+                       /* n <= 0.
+                        * man fread:
                         * If error occurs, or EOF is reached, the return value
                         * is a short item count (or zero).
                         * fread does not distinguish between EOF and error.
                         */
-                       if (n <= 0) {
-#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
-                               if (errno == EAGAIN) /* poll lied, there is no data? */
-                                       continue; /* yes */
-#endif
-                               if (ferror(dfp))
+                       if (errno != EAGAIN) {
+                               if (ferror(dfp)) {
+                                       progress_meter(PROGRESS_END);
                                        bb_perror_msg_and_die(bb_msg_read_error);
+                               }
                                break; /* EOF, not error */
                        }
 
-                       xwrite(output_fd, G.wget_buf, n);
-
-#if ENABLE_FEATURE_WGET_STATUSBAR
-                       G.transferred += n;
+#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
+                       /* It was EAGAIN. There is no data. Wait up to one second
+                        * then abort if timed out, or update the bar and try reading again.
+                        */
+                       if (safe_poll(&polldata, 1, 1000) == 0) {
+# if ENABLE_FEATURE_WGET_TIMEOUT
+                               if (second_cnt != 0 && --second_cnt == 0) {
+                                       progress_meter(PROGRESS_END);
+                                       bb_error_msg_and_die("download timed out");
+                               }
+# endif
+                               /* We used to loop back to poll here,
+                                * but there is no great harm in letting fread
+                                * to try reading anyway.
+                                */
+                       }
+                       /* Need to do it _every_ second for "stalled" indicator
+                        * to be shown properly.
+                        */
                        progress_meter(PROGRESS_BUMP);
 #endif
-                       if (G.got_clen) {
-                               G.content_len -= n;
-                               if (G.content_len == 0)
-                                       break;
-                       }
-               }
+               } /* while (reading data) */
+
 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
                clearerr(dfp);
                ndelay_off(polldata.fd); /* else fgets can get very unhappy */
@@ -543,23 +563,42 @@ static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
                if (G.content_len == 0)
                        break; /* all done! */
                G.got_clen = 1;
+               /*
+                * Note that fgets may result in some data being buffered in dfp.
+                * We loop back to fread, which will retrieve this data.
+                * Also note that code has to be arranged so that fread
+                * is done _before_ one-second poll wait - poll doesn't know
+                * about stdio buffering and can result in spurious one second waits!
+                */
+       }
+
+       /* If -c failed, we restart from the beginning,
+        * but we do not truncate file then, we do it only now, at the end.
+        * This lets user to ^C if his 99% complete 10 GB file download
+        * failed to restart *without* losing the almost complete file.
+        */
+       {
+               off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
+               if (pos != (off_t)-1)
+                       ftruncate(G.output_fd, pos);
        }
 
        /* Draw full bar and free its resources */
-       G.chunked = 0; /* makes it show 100% even for chunked download */
+       G.chunked = 0;  /* makes it show 100% even for chunked download */
+       G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
        progress_meter(PROGRESS_END);
 }
 
-static int download_one_url(const char *url)
+static void download_one_url(const char *url)
 {
        bool use_proxy;                 /* Use proxies if env vars are set  */
        int redir_limit;
-       int output_fd;
        len_and_sockaddr *lsa;
        FILE *sfp;                      /* socket to web/ftp server         */
        FILE *dfp;                      /* socket to ftp server (data)      */
        char *proxy = NULL;
        char *fname_out_alloc;
+       char *redirected_path = NULL;
        struct host_info server;
        struct host_info target;
 
@@ -574,11 +613,9 @@ static int download_one_url(const char *url)
        use_proxy = (strcmp(G.proxy_flag, "off") != 0);
        if (use_proxy) {
                proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
-               if (proxy && proxy[0]) {
+               use_proxy = (proxy && proxy[0]);
+               if (use_proxy)
                        parse_url(proxy, &server);
-               } else {
-                       use_proxy = 0;
-               }
        }
        if (!use_proxy) {
                server.port = target.port;
@@ -594,7 +631,6 @@ static int download_one_url(const char *url)
                strip_ipv6_scope_id(target.host);
 
        /* If there was no -O FILE, guess output filename */
-       output_fd = -1;
        fname_out_alloc = NULL;
        if (!(option_mask32 & WGET_OPT_OUTNAME)) {
                G.fname_out = bb_get_last_path_component_nostrip(target.path);
@@ -604,11 +640,9 @@ static int download_one_url(const char *url)
                /* -P DIR is considered only if there was no -O FILE */
                if (G.dir_prefix)
                        G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
-       } else {
-               if (LONE_DASH(G.fname_out)) {
-                       /* -O - */
-                       output_fd = 1;
-                       option_mask32 &= ~WGET_OPT_CONTINUE;
+               else {
+                       /* redirects may free target.path later, need to make a copy */
+                       G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
                }
        }
 #if ENABLE_FEATURE_WGET_STATUSBAR
@@ -616,10 +650,11 @@ static int download_one_url(const char *url)
 #endif
 
        /* Determine where to start transfer */
+       G.beg_range = 0;
        if (option_mask32 & WGET_OPT_CONTINUE) {
-               output_fd = open(G.fname_out, O_WRONLY);
-               if (output_fd >= 0) {
-                       G.beg_range = xlseek(output_fd, 0, SEEK_END);
+               G.output_fd = open(G.fname_out, O_WRONLY);
+               if (G.output_fd >= 0) {
+                       G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
                }
                /* File doesn't exist. We do not create file here yet.
                 * We are not sure it exists on remote side */
@@ -634,7 +669,9 @@ static int download_one_url(const char *url)
                free(s);
        }
  establish_session:
-       G.chunked = G.got_clen = 0;
+       /*G.content_len = 0; - redundant, got_clen = 0 is enough */
+       G.got_clen = 0;
+       G.chunked = 0;
        if (use_proxy || !target.is_ftp) {
                /*
                 *  HTTP session
@@ -677,7 +714,7 @@ static int download_one_url(const char *url)
                }
 #endif
 
-               if (G.beg_range)
+               if (G.beg_range != 0)
                        fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
 
 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
@@ -685,15 +722,13 @@ static int download_one_url(const char *url)
                        fputs(G.extra_headers, sfp);
 
                if (option_mask32 & WGET_OPT_POST_DATA) {
-                       char *estr = URL_escape(G.post_data);
                        fprintf(sfp,
                                "Content-Type: application/x-www-form-urlencoded\r\n"
                                "Content-Length: %u\r\n"
                                "\r\n"
                                "%s",
-                               (int) strlen(estr), estr
+                               (int) strlen(G.post_data), G.post_data
                        );
-                       free(estr);
                } else
 #endif
                {
@@ -746,15 +781,23 @@ However, in real world it was observed that some web servers
 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
 */
                case 204:
+                       if (G.beg_range != 0) {
+                               /* "Range:..." was not honored by the server.
+                                * Restart download from the beginning.
+                                */
+                               reset_beg_range_to_zero();
+                       }
                        break;
                case 300:  /* redirection */
                case 301:
                case 302:
                case 303:
                        break;
-               case 206:
-                       if (G.beg_range)
+               case 206: /* Partial Content */
+                       if (G.beg_range != 0)
+                               /* "Range:..." worked. Good. */
                                break;
+                       /* Partial Content even though we did not ask for it??? */
                        /* fall through */
                default:
                        bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
@@ -798,13 +841,14 @@ However, in real world it was observed that some web servers
                                        bb_error_msg_and_die("too many redirections");
                                fclose(sfp);
                                if (str[0] == '/') {
-                                       free(target.allocated);
-                                       target.path = target.allocated = xstrdup(str+1);
+                                       free(redirected_path);
+                                       target.path = redirected_path = xstrdup(str+1);
                                        /* lsa stays the same: it's on the same server */
                                } else {
                                        parse_url(str, &target);
                                        if (!use_proxy) {
                                                free(server.allocated);
+                                               server.allocated = NULL;
                                                server.host = target.host;
                                                /* strip_ipv6_scope_id(target.host); - no! */
                                                /* we assume remote never gives us IPv6 addr with scope id */
@@ -832,15 +876,13 @@ However, in real world it was observed that some web servers
        free(lsa);
 
        if (!(option_mask32 & WGET_OPT_SPIDER)) {
-               if (output_fd < 0) {
-                       int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
-                       /* compat with wget: -O FILE can overwrite */
-                       if (option_mask32 & WGET_OPT_OUTNAME)
-                               o_flags = O_WRONLY | O_CREAT | O_TRUNC;
-                       output_fd = xopen(G.fname_out, o_flags);
+               if (G.output_fd < 0)
+                       G.output_fd = xopen(G.fname_out, G.o_flags);
+               retrieve_file_data(dfp);
+               if (!(option_mask32 & WGET_OPT_OUTNAME)) {
+                       xclose(G.output_fd);
+                       G.output_fd = -1;
                }
-               retrieve_file_data(dfp, output_fd);
-               xclose(output_fd);
        }
 
        if (dfp != sfp) {
@@ -855,8 +897,7 @@ However, in real world it was observed that some web servers
        free(server.allocated);
        free(target.allocated);
        free(fname_out_alloc);
-
-       return EXIT_SUCCESS;
+       free(redirected_path);
 }
 
 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
@@ -884,10 +925,11 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
                "post-data\0"        Required_argument "\xfd"
                /* Ignored (we don't do ssl) */
                "no-check-certificate\0" No_argument   "\xfc"
+               /* Ignored (we don't support caching) */
+               "no-cache\0"         No_argument       "\xfb"
                ;
 #endif
 
-       int exitcode;
 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
        llist_t *headers_llist = NULL;
 #endif
@@ -928,9 +970,22 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
        }
 #endif
 
-       exitcode = 0;
+       G.output_fd = -1;
+       G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
+       if (G.fname_out) { /* -O FILE ? */
+               if (LONE_DASH(G.fname_out)) { /* -O - ? */
+                       G.output_fd = 1;
+                       option_mask32 &= ~WGET_OPT_CONTINUE;
+               }
+               /* compat with wget: -O FILE can overwrite */
+               G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
+       }
+
        while (*argv)
-               exitcode |= download_one_url(*argv++);
+               download_one_url(*argv++);
 
-       return exitcode;
+       if (G.output_fd >= 0)
+               xclose(G.output_fd);
+
+       return EXIT_SUCCESS;
 }