X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=networking%2Fwget.c;h=4eafebe40119c704c39d9f8e64d0ef3c6e2c265c;hb=a38f9faa9fa230eb3753381c4f626acf029379fb;hp=cb169aba33e241fee1fe6b0d4cbb1fc2da07339c;hpb=57b4909db92ab403cc955e6cef4ea2b8318586b6;p=oweals%2Fbusybox.git diff --git a/networking/wget.c b/networking/wget.c index cb169aba3..4eafebe40 100644 --- a/networking/wget.c +++ b/networking/wget.c @@ -8,10 +8,39 @@ * Copyright (C) 2010 Bradley M. Kuhn * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2. */ + +//usage:#define wget_trivial_usage +//usage: IF_FEATURE_WGET_LONG_OPTIONS( +//usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n" +//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n" +/* Since we ignore these opts, we don't show them in --help */ +/* //usage: " [--no-check-certificate] [--no-cache]" */ +//usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..." +//usage: ) +//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS( +//usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]" +//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..." +//usage: ) +//usage:#define wget_full_usage "\n\n" +//usage: "Retrieve files via HTTP or FTP\n" +//usage: "\n -s Spider mode - only check file existence" +//usage: "\n -c Continue retrieval of aborted transfer" +//usage: "\n -q Quiet" +//usage: "\n -P DIR Save to DIR (default .)" +//usage: IF_FEATURE_WGET_TIMEOUT( +//usage: "\n -T SEC Network read timeout is SEC seconds" +//usage: ) +//usage: "\n -O FILE Save to FILE ('-' for stdout)" +//usage: "\n -U STR Use STR for User-Agent header" +//usage: "\n -Y Use proxy ('on' or 'off')" + #include "libbb.h" -//#define log_io(...) bb_error_msg(__VA_ARGS__) -#define log_io(...) ((void)0) +#if 0 +# define log_io(...) bb_error_msg(__VA_ARGS__) +#else +# define log_io(...) ((void)0) +#endif struct host_info { @@ -44,6 +73,8 @@ struct globals { #if ENABLE_FEATURE_WGET_TIMEOUT unsigned timeout_seconds; #endif + int output_fd; + int o_flags; smallint chunked; /* chunked transfer encoding */ smallint got_clen; /* got content-length: from server */ /* Local downloads do benefit from big buffer. @@ -90,8 +121,11 @@ static void progress_meter(int flag) if (flag == PROGRESS_START) bb_progress_init(&G.pmt, G.curfile); - bb_progress_update(&G.pmt, G.beg_range, G.transferred, - G.chunked ? 0 : G.beg_range + G.transferred + G.content_len); + bb_progress_update(&G.pmt, + G.beg_range, + G.transferred, + (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len + ); if (flag == PROGRESS_END) { bb_progress_free(&G.pmt); @@ -268,8 +302,13 @@ static void parse_url(const char *src_url, struct host_info *h) sp = strrchr(h->host, '@'); if (sp != NULL) { - h->user = h->host; + // URL-decode "user:password" string before base64-encoding: + // wget http://test:my%20pass@example.com should send + // Authorization: Basic dGVzdDpteSBwYXNz + // which decodes to "test:my pass". + // Standard wget and curl do this too. *sp = '\0'; + h->user = percent_decode_in_place(h->host, /*strict:*/ 0); h->host = sp + 1; } @@ -281,8 +320,6 @@ static char *gethdr(FILE *fp) char *s, *hdrval; int c; - /* *istrunc = 0; */ - /* retrieve header line */ c = fgets_and_trim(fp); @@ -313,43 +350,14 @@ static char *gethdr(FILE *fp) return hdrval; } -#if ENABLE_FEATURE_WGET_LONG_OPTIONS -static char *URL_escape(const char *str) +static void reset_beg_range_to_zero(void) { - /* URL encode, see RFC 2396 */ - char *dst; - char *res = dst = xmalloc(strlen(str) * 3 + 1); - unsigned char c; - - while (1) { - c = *str++; - if (c == '\0' - /* || strchr("!&'()*-.=_~", c) - more code */ - || c == '!' - || c == '&' - || c == '\'' - || c == '(' - || c == ')' - || c == '*' - || c == '-' - || c == '.' - || c == '=' - || c == '_' - || c == '~' - || (c >= '0' && c <= '9') - || ((c|0x20) >= 'a' && (c|0x20) <= 'z') - ) { - *dst++ = c; - if (c == '\0') - return res; - } else { - *dst++ = '%'; - *dst++ = bb_hexdigits_upcase[c >> 4]; - *dst++ = bb_hexdigits_upcase[c & 0xf]; - } - } + bb_error_msg("restart failed"); + G.beg_range = 0; + xlseek(G.output_fd, 0, SEEK_SET); + /* Done at the end instead: */ + /* ftruncate(G.output_fd, 0); */ } -#endif static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa) { @@ -414,14 +422,16 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ str = strrchr(G.wget_buf, ','); if (!str) goto pasv_error; port += xatou_range(str+1, 0, 255) * 256; - set_nport(lsa, htons(port)); + set_nport(&lsa->u.sa, htons(port)); *dfpp = open_socket(lsa); - if (G.beg_range) { + if (G.beg_range != 0) { sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range); if (ftpcmd(G.wget_buf, NULL, sfp) == 350) G.content_len -= G.beg_range; + else + reset_beg_range_to_zero(); } if (ftpcmd("RETR ", target->path, sfp) > 150) @@ -430,11 +440,11 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ return sfp; } -static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd) +static void NOINLINE retrieve_file_data(FILE *dfp) { #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT # if ENABLE_FEATURE_WGET_TIMEOUT - unsigned second_cnt; + unsigned second_cnt = G.timeout_seconds; # endif struct pollfd polldata; @@ -455,7 +465,7 @@ static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd) * which messes up progress bar and/or timeout logic. * Because of nonblocking I/O, we need to dance * very carefully around EAGAIN. See explanation at - * clearerr() call. + * clearerr() calls. */ ndelay_on(polldata.fd); #endif @@ -463,32 +473,7 @@ static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd) int n; unsigned rdsz; - rdsz = sizeof(G.wget_buf); - if (G.got_clen) { - if (G.content_len < (off_t)sizeof(G.wget_buf)) { - if ((int)G.content_len <= 0) - break; - rdsz = (unsigned)G.content_len; - } - } - #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT -# if ENABLE_FEATURE_WGET_TIMEOUT - second_cnt = G.timeout_seconds; -# endif - while (1) { - if (safe_poll(&polldata, 1, 1000) != 0) - break; /* error, EOF, or data is available */ -# if ENABLE_FEATURE_WGET_TIMEOUT - if (second_cnt != 0 && --second_cnt == 0) { - progress_meter(PROGRESS_END); - bb_error_msg_and_die("download timed out"); - } -# endif - /* Needed for "stalled" indicator */ - progress_meter(PROGRESS_BUMP); - } - /* fread internally uses read loop, which in our case * is usually exited when we get EAGAIN. * In this case, libc sets error marker on the stream. @@ -498,36 +483,71 @@ static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd) * into if (n <= 0) ... */ clearerr(dfp); - errno = 0; #endif + errno = 0; + rdsz = sizeof(G.wget_buf); + if (G.got_clen) { + if (G.content_len < (off_t)sizeof(G.wget_buf)) { + if ((int)G.content_len <= 0) + break; + rdsz = (unsigned)G.content_len; + } + } n = fread(G.wget_buf, 1, rdsz, dfp); - /* man fread: + + if (n > 0) { + xwrite(G.output_fd, G.wget_buf, n); +#if ENABLE_FEATURE_WGET_STATUSBAR + G.transferred += n; +#endif + if (G.got_clen) { + G.content_len -= n; + if (G.content_len == 0) + break; + } +#if ENABLE_FEATURE_WGET_TIMEOUT + second_cnt = G.timeout_seconds; +#endif + continue; + } + + /* n <= 0. + * man fread: * If error occurs, or EOF is reached, the return value * is a short item count (or zero). * fread does not distinguish between EOF and error. */ - if (n <= 0) { -#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT - if (errno == EAGAIN) /* poll lied, there is no data? */ - continue; /* yes */ -#endif - if (ferror(dfp)) + if (errno != EAGAIN) { + if (ferror(dfp)) { + progress_meter(PROGRESS_END); bb_perror_msg_and_die(bb_msg_read_error); + } break; /* EOF, not error */ } - xwrite(output_fd, G.wget_buf, n); - -#if ENABLE_FEATURE_WGET_STATUSBAR - G.transferred += n; +#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT + /* It was EAGAIN. There is no data. Wait up to one second + * then abort if timed out, or update the bar and try reading again. + */ + if (safe_poll(&polldata, 1, 1000) == 0) { +# if ENABLE_FEATURE_WGET_TIMEOUT + if (second_cnt != 0 && --second_cnt == 0) { + progress_meter(PROGRESS_END); + bb_error_msg_and_die("download timed out"); + } +# endif + /* We used to loop back to poll here, + * but there is no great harm in letting fread + * to try reading anyway. + */ + } + /* Need to do it _every_ second for "stalled" indicator + * to be shown properly. + */ progress_meter(PROGRESS_BUMP); #endif - if (G.got_clen) { - G.content_len -= n; - if (G.content_len == 0) - break; - } - } + } /* while (reading data) */ + #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT clearerr(dfp); ndelay_off(polldata.fd); /* else fgets can get very unhappy */ @@ -543,23 +563,42 @@ static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd) if (G.content_len == 0) break; /* all done! */ G.got_clen = 1; + /* + * Note that fgets may result in some data being buffered in dfp. + * We loop back to fread, which will retrieve this data. + * Also note that code has to be arranged so that fread + * is done _before_ one-second poll wait - poll doesn't know + * about stdio buffering and can result in spurious one second waits! + */ + } + + /* If -c failed, we restart from the beginning, + * but we do not truncate file then, we do it only now, at the end. + * This lets user to ^C if his 99% complete 10 GB file download + * failed to restart *without* losing the almost complete file. + */ + { + off_t pos = lseek(G.output_fd, 0, SEEK_CUR); + if (pos != (off_t)-1) + ftruncate(G.output_fd, pos); } /* Draw full bar and free its resources */ - G.chunked = 0; /* makes it show 100% even for chunked download */ + G.chunked = 0; /* makes it show 100% even for chunked download */ + G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */ progress_meter(PROGRESS_END); } -static int download_one_url(const char *url) +static void download_one_url(const char *url) { bool use_proxy; /* Use proxies if env vars are set */ int redir_limit; - int output_fd; len_and_sockaddr *lsa; FILE *sfp; /* socket to web/ftp server */ FILE *dfp; /* socket to ftp server (data) */ char *proxy = NULL; char *fname_out_alloc; + char *redirected_path = NULL; struct host_info server; struct host_info target; @@ -574,11 +613,9 @@ static int download_one_url(const char *url) use_proxy = (strcmp(G.proxy_flag, "off") != 0); if (use_proxy) { proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy"); - if (proxy && proxy[0]) { + use_proxy = (proxy && proxy[0]); + if (use_proxy) parse_url(proxy, &server); - } else { - use_proxy = 0; - } } if (!use_proxy) { server.port = target.port; @@ -594,7 +631,6 @@ static int download_one_url(const char *url) strip_ipv6_scope_id(target.host); /* If there was no -O FILE, guess output filename */ - output_fd = -1; fname_out_alloc = NULL; if (!(option_mask32 & WGET_OPT_OUTNAME)) { G.fname_out = bb_get_last_path_component_nostrip(target.path); @@ -604,11 +640,9 @@ static int download_one_url(const char *url) /* -P DIR is considered only if there was no -O FILE */ if (G.dir_prefix) G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out); - } else { - if (LONE_DASH(G.fname_out)) { - /* -O - */ - output_fd = 1; - option_mask32 &= ~WGET_OPT_CONTINUE; + else { + /* redirects may free target.path later, need to make a copy */ + G.fname_out = fname_out_alloc = xstrdup(G.fname_out); } } #if ENABLE_FEATURE_WGET_STATUSBAR @@ -616,10 +650,11 @@ static int download_one_url(const char *url) #endif /* Determine where to start transfer */ + G.beg_range = 0; if (option_mask32 & WGET_OPT_CONTINUE) { - output_fd = open(G.fname_out, O_WRONLY); - if (output_fd >= 0) { - G.beg_range = xlseek(output_fd, 0, SEEK_END); + G.output_fd = open(G.fname_out, O_WRONLY); + if (G.output_fd >= 0) { + G.beg_range = xlseek(G.output_fd, 0, SEEK_END); } /* File doesn't exist. We do not create file here yet. * We are not sure it exists on remote side */ @@ -634,7 +669,9 @@ static int download_one_url(const char *url) free(s); } establish_session: - G.chunked = G.got_clen = 0; + /*G.content_len = 0; - redundant, got_clen = 0 is enough */ + G.got_clen = 0; + G.chunked = 0; if (use_proxy || !target.is_ftp) { /* * HTTP session @@ -677,7 +714,7 @@ static int download_one_url(const char *url) } #endif - if (G.beg_range) + if (G.beg_range != 0) fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range); #if ENABLE_FEATURE_WGET_LONG_OPTIONS @@ -685,15 +722,13 @@ static int download_one_url(const char *url) fputs(G.extra_headers, sfp); if (option_mask32 & WGET_OPT_POST_DATA) { - char *estr = URL_escape(G.post_data); fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n" "Content-Length: %u\r\n" "\r\n" "%s", - (int) strlen(estr), estr + (int) strlen(G.post_data), G.post_data ); - free(estr); } else #endif { @@ -746,15 +781,23 @@ However, in real world it was observed that some web servers (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero. */ case 204: + if (G.beg_range != 0) { + /* "Range:..." was not honored by the server. + * Restart download from the beginning. + */ + reset_beg_range_to_zero(); + } break; case 300: /* redirection */ case 301: case 302: case 303: break; - case 206: - if (G.beg_range) + case 206: /* Partial Content */ + if (G.beg_range != 0) + /* "Range:..." worked. Good. */ break; + /* Partial Content even though we did not ask for it??? */ /* fall through */ default: bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf)); @@ -798,8 +841,8 @@ However, in real world it was observed that some web servers bb_error_msg_and_die("too many redirections"); fclose(sfp); if (str[0] == '/') { - free(target.allocated); - target.path = target.allocated = xstrdup(str+1); + free(redirected_path); + target.path = redirected_path = xstrdup(str+1); /* lsa stays the same: it's on the same server */ } else { parse_url(str, &target); @@ -833,15 +876,13 @@ However, in real world it was observed that some web servers free(lsa); if (!(option_mask32 & WGET_OPT_SPIDER)) { - if (output_fd < 0) { - int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL; - /* compat with wget: -O FILE can overwrite */ - if (option_mask32 & WGET_OPT_OUTNAME) - o_flags = O_WRONLY | O_CREAT | O_TRUNC; - output_fd = xopen(G.fname_out, o_flags); + if (G.output_fd < 0) + G.output_fd = xopen(G.fname_out, G.o_flags); + retrieve_file_data(dfp); + if (!(option_mask32 & WGET_OPT_OUTNAME)) { + xclose(G.output_fd); + G.output_fd = -1; } - retrieve_file_data(dfp, output_fd); - xclose(output_fd); } if (dfp != sfp) { @@ -856,8 +897,7 @@ However, in real world it was observed that some web servers free(server.allocated); free(target.allocated); free(fname_out_alloc); - - return EXIT_SUCCESS; + free(redirected_path); } int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; @@ -885,10 +925,11 @@ int wget_main(int argc UNUSED_PARAM, char **argv) "post-data\0" Required_argument "\xfd" /* Ignored (we don't do ssl) */ "no-check-certificate\0" No_argument "\xfc" + /* Ignored (we don't support caching) */ + "no-cache\0" No_argument "\xfb" ; #endif - int exitcode; #if ENABLE_FEATURE_WGET_LONG_OPTIONS llist_t *headers_llist = NULL; #endif @@ -929,9 +970,22 @@ int wget_main(int argc UNUSED_PARAM, char **argv) } #endif - exitcode = 0; + G.output_fd = -1; + G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL; + if (G.fname_out) { /* -O FILE ? */ + if (LONE_DASH(G.fname_out)) { /* -O - ? */ + G.output_fd = 1; + option_mask32 &= ~WGET_OPT_CONTINUE; + } + /* compat with wget: -O FILE can overwrite */ + G.o_flags = O_WRONLY | O_CREAT | O_TRUNC; + } + while (*argv) - exitcode |= download_one_url(*argv++); + download_one_url(*argv++); - return exitcode; + if (G.output_fd >= 0) + xclose(G.output_fd); + + return EXIT_SUCCESS; }