ifenslave: fix missing close paren
[oweals/busybox.git] / networking / wget.c
index 2f89c8f7fae76d7e7f54a614346c7d0ae25473d8..4eafebe40119c704c39d9f8e64d0ef3c6e2c265c 100644 (file)
@@ -13,8 +13,9 @@
 //usage:       IF_FEATURE_WGET_LONG_OPTIONS(
 //usage:       "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
 //usage:       "       [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
-//usage:       "       [--no-check-certificate] [-U|--user-agent AGENT]"
-//usage:                       IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
+/* Since we ignore these opts, we don't show them in --help */
+/* //usage:    "       [--no-check-certificate] [--no-cache]" */
+//usage:       "       [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
 //usage:       )
 //usage:       IF_NOT_FEATURE_WGET_LONG_OPTIONS(
 //usage:       "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
@@ -22,7 +23,6 @@
 //usage:       )
 //usage:#define wget_full_usage "\n\n"
 //usage:       "Retrieve files via HTTP or FTP\n"
-//usage:     "\nOptions:"
 //usage:     "\n       -s      Spider mode - only check file existence"
 //usage:     "\n       -c      Continue retrieval of aborted transfer"
 //usage:     "\n       -q      Quiet"
 
 #include "libbb.h"
 
-//#define log_io(...) bb_error_msg(__VA_ARGS__)
-#define log_io(...) ((void)0)
+#if 0
+# define log_io(...) bb_error_msg(__VA_ARGS__)
+#else
+# define log_io(...) ((void)0)
+#endif
 
 
 struct host_info {
@@ -299,8 +302,13 @@ static void parse_url(const char *src_url, struct host_info *h)
 
        sp = strrchr(h->host, '@');
        if (sp != NULL) {
-               h->user = h->host;
+               // URL-decode "user:password" string before base64-encoding:
+               // wget http://test:my%20pass@example.com should send
+               // Authorization: Basic dGVzdDpteSBwYXNz
+               // which decodes to "test:my pass".
+               // Standard wget and curl do this too.
                *sp = '\0';
+               h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
                h->host = sp + 1;
        }
 
@@ -312,8 +320,6 @@ static char *gethdr(FILE *fp)
        char *s, *hdrval;
        int c;
 
-       /* *istrunc = 0; */
-
        /* retrieve header line */
        c = fgets_and_trim(fp);
 
@@ -344,6 +350,15 @@ static char *gethdr(FILE *fp)
        return hdrval;
 }
 
+static void reset_beg_range_to_zero(void)
+{
+       bb_error_msg("restart failed");
+       G.beg_range = 0;
+       xlseek(G.output_fd, 0, SEEK_SET);
+       /* Done at the end instead: */
+       /* ftruncate(G.output_fd, 0); */
+}
+
 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
 {
        FILE *sfp;
@@ -407,14 +422,16 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
        str = strrchr(G.wget_buf, ',');
        if (!str) goto pasv_error;
        port += xatou_range(str+1, 0, 255) * 256;
-       set_nport(lsa, htons(port));
+       set_nport(&lsa->u.sa, htons(port));
 
        *dfpp = open_socket(lsa);
 
-       if (G.beg_range) {
+       if (G.beg_range != 0) {
                sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
                if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
                        G.content_len -= G.beg_range;
+               else
+                       reset_beg_range_to_zero();
        }
 
        if (ftpcmd("RETR ", target->path, sfp) > 150)
@@ -427,7 +444,7 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
 {
 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 # if ENABLE_FEATURE_WGET_TIMEOUT
-       unsigned second_cnt;
+       unsigned second_cnt = G.timeout_seconds;
 # endif
        struct pollfd polldata;
 
@@ -448,7 +465,7 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
                 * which messes up progress bar and/or timeout logic.
                 * Because of nonblocking I/O, we need to dance
                 * very carefully around EAGAIN. See explanation at
-                * clearerr() call.
+                * clearerr() calls.
                 */
                ndelay_on(polldata.fd);
 #endif
@@ -456,32 +473,7 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
                        int n;
                        unsigned rdsz;
 
-                       rdsz = sizeof(G.wget_buf);
-                       if (G.got_clen) {
-                               if (G.content_len < (off_t)sizeof(G.wget_buf)) {
-                                       if ((int)G.content_len <= 0)
-                                               break;
-                                       rdsz = (unsigned)G.content_len;
-                               }
-                       }
-
 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
-# if ENABLE_FEATURE_WGET_TIMEOUT
-                       second_cnt = G.timeout_seconds;
-# endif
-                       while (1) {
-                               if (safe_poll(&polldata, 1, 1000) != 0)
-                                       break; /* error, EOF, or data is available */
-# if ENABLE_FEATURE_WGET_TIMEOUT
-                               if (second_cnt != 0 && --second_cnt == 0) {
-                                       progress_meter(PROGRESS_END);
-                                       bb_error_msg_and_die("download timed out");
-                               }
-# endif
-                               /* Needed for "stalled" indicator */
-                               progress_meter(PROGRESS_BUMP);
-                       }
-
                        /* fread internally uses read loop, which in our case
                         * is usually exited when we get EAGAIN.
                         * In this case, libc sets error marker on the stream.
@@ -491,36 +483,71 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
                         * into if (n <= 0) ...
                         */
                        clearerr(dfp);
-                       errno = 0;
 #endif
+                       errno = 0;
+                       rdsz = sizeof(G.wget_buf);
+                       if (G.got_clen) {
+                               if (G.content_len < (off_t)sizeof(G.wget_buf)) {
+                                       if ((int)G.content_len <= 0)
+                                               break;
+                                       rdsz = (unsigned)G.content_len;
+                               }
+                       }
                        n = fread(G.wget_buf, 1, rdsz, dfp);
-                       /* man fread:
+
+                       if (n > 0) {
+                               xwrite(G.output_fd, G.wget_buf, n);
+#if ENABLE_FEATURE_WGET_STATUSBAR
+                               G.transferred += n;
+#endif
+                               if (G.got_clen) {
+                                       G.content_len -= n;
+                                       if (G.content_len == 0)
+                                               break;
+                               }
+#if ENABLE_FEATURE_WGET_TIMEOUT
+                               second_cnt = G.timeout_seconds;
+#endif
+                               continue;
+                       }
+
+                       /* n <= 0.
+                        * man fread:
                         * If error occurs, or EOF is reached, the return value
                         * is a short item count (or zero).
                         * fread does not distinguish between EOF and error.
                         */
-                       if (n <= 0) {
-#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
-                               if (errno == EAGAIN) /* poll lied, there is no data? */
-                                       continue; /* yes */
-#endif
-                               if (ferror(dfp))
+                       if (errno != EAGAIN) {
+                               if (ferror(dfp)) {
+                                       progress_meter(PROGRESS_END);
                                        bb_perror_msg_and_die(bb_msg_read_error);
+                               }
                                break; /* EOF, not error */
                        }
 
-                       xwrite(G.output_fd, G.wget_buf, n);
-
-#if ENABLE_FEATURE_WGET_STATUSBAR
-                       G.transferred += n;
+#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
+                       /* It was EAGAIN. There is no data. Wait up to one second
+                        * then abort if timed out, or update the bar and try reading again.
+                        */
+                       if (safe_poll(&polldata, 1, 1000) == 0) {
+# if ENABLE_FEATURE_WGET_TIMEOUT
+                               if (second_cnt != 0 && --second_cnt == 0) {
+                                       progress_meter(PROGRESS_END);
+                                       bb_error_msg_and_die("download timed out");
+                               }
+# endif
+                               /* We used to loop back to poll here,
+                                * but there is no great harm in letting fread
+                                * to try reading anyway.
+                                */
+                       }
+                       /* Need to do it _every_ second for "stalled" indicator
+                        * to be shown properly.
+                        */
                        progress_meter(PROGRESS_BUMP);
 #endif
-                       if (G.got_clen) {
-                               G.content_len -= n;
-                               if (G.content_len == 0)
-                                       break;
-                       }
-               }
+               } /* while (reading data) */
+
 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
                clearerr(dfp);
                ndelay_off(polldata.fd); /* else fgets can get very unhappy */
@@ -536,6 +563,24 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
                if (G.content_len == 0)
                        break; /* all done! */
                G.got_clen = 1;
+               /*
+                * Note that fgets may result in some data being buffered in dfp.
+                * We loop back to fread, which will retrieve this data.
+                * Also note that code has to be arranged so that fread
+                * is done _before_ one-second poll wait - poll doesn't know
+                * about stdio buffering and can result in spurious one second waits!
+                */
+       }
+
+       /* If -c failed, we restart from the beginning,
+        * but we do not truncate file then, we do it only now, at the end.
+        * This lets user to ^C if his 99% complete 10 GB file download
+        * failed to restart *without* losing the almost complete file.
+        */
+       {
+               off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
+               if (pos != (off_t)-1)
+                       ftruncate(G.output_fd, pos);
        }
 
        /* Draw full bar and free its resources */
@@ -553,6 +598,7 @@ static void download_one_url(const char *url)
        FILE *dfp;                      /* socket to ftp server (data)      */
        char *proxy = NULL;
        char *fname_out_alloc;
+       char *redirected_path = NULL;
        struct host_info server;
        struct host_info target;
 
@@ -592,13 +638,11 @@ static void download_one_url(const char *url)
                if (G.fname_out[0] == '/' || !G.fname_out[0])
                        G.fname_out = (char*)"index.html";
                /* -P DIR is considered only if there was no -O FILE */
+               if (G.dir_prefix)
+                       G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
                else {
-                       if (G.dir_prefix)
-                               G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
-                       else {
-                               /* redirects may free target.path later, need to make a copy */
-                               G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
-                       }
+                       /* redirects may free target.path later, need to make a copy */
+                       G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
                }
        }
 #if ENABLE_FEATURE_WGET_STATUSBAR
@@ -670,7 +714,7 @@ static void download_one_url(const char *url)
                }
 #endif
 
-               if (G.beg_range)
+               if (G.beg_range != 0)
                        fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
 
 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
@@ -737,15 +781,23 @@ However, in real world it was observed that some web servers
 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
 */
                case 204:
+                       if (G.beg_range != 0) {
+                               /* "Range:..." was not honored by the server.
+                                * Restart download from the beginning.
+                                */
+                               reset_beg_range_to_zero();
+                       }
                        break;
                case 300:  /* redirection */
                case 301:
                case 302:
                case 303:
                        break;
-               case 206:
-                       if (G.beg_range)
+               case 206: /* Partial Content */
+                       if (G.beg_range != 0)
+                               /* "Range:..." worked. Good. */
                                break;
+                       /* Partial Content even though we did not ask for it??? */
                        /* fall through */
                default:
                        bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
@@ -789,8 +841,8 @@ However, in real world it was observed that some web servers
                                        bb_error_msg_and_die("too many redirections");
                                fclose(sfp);
                                if (str[0] == '/') {
-                                       free(target.allocated);
-                                       target.path = target.allocated = xstrdup(str+1);
+                                       free(redirected_path);
+                                       target.path = redirected_path = xstrdup(str+1);
                                        /* lsa stays the same: it's on the same server */
                                } else {
                                        parse_url(str, &target);
@@ -845,6 +897,7 @@ However, in real world it was observed that some web servers
        free(server.allocated);
        free(target.allocated);
        free(fname_out_alloc);
+       free(redirected_path);
 }
 
 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
@@ -872,6 +925,8 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
                "post-data\0"        Required_argument "\xfd"
                /* Ignored (we don't do ssl) */
                "no-check-certificate\0" No_argument   "\xfc"
+               /* Ignored (we don't support caching) */
+               "no-cache\0"         No_argument       "\xfb"
                ;
 #endif