X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=networking%2Fwget.c;h=4eafebe40119c704c39d9f8e64d0ef3c6e2c265c;hb=a38f9faa9fa230eb3753381c4f626acf029379fb;hp=64553d49aebd179e7dc23adc51f9323b42398dc5;hpb=a3aa3e309506ea96fa8f7546f6b22fa85263a934;p=oweals%2Fbusybox.git diff --git a/networking/wget.c b/networking/wget.c index 64553d49a..4eafebe40 100644 --- a/networking/wget.c +++ b/networking/wget.c @@ -3,14 +3,48 @@ * wget - retrieve a file using HTTP or FTP * * Chip Rosenthal Covad Communications + * Licensed under GPLv2, see file LICENSE in this source tree. * - * Licensed under GPLv2, see file LICENSE in this tarball for details. + * Copyright (C) 2010 Bradley M. Kuhn + * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2. */ + +//usage:#define wget_trivial_usage +//usage: IF_FEATURE_WGET_LONG_OPTIONS( +//usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n" +//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n" +/* Since we ignore these opts, we don't show them in --help */ +/* //usage: " [--no-check-certificate] [--no-cache]" */ +//usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..." +//usage: ) +//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS( +//usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]" +//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..." +//usage: ) +//usage:#define wget_full_usage "\n\n" +//usage: "Retrieve files via HTTP or FTP\n" +//usage: "\n -s Spider mode - only check file existence" +//usage: "\n -c Continue retrieval of aborted transfer" +//usage: "\n -q Quiet" +//usage: "\n -P DIR Save to DIR (default .)" +//usage: IF_FEATURE_WGET_TIMEOUT( +//usage: "\n -T SEC Network read timeout is SEC seconds" +//usage: ) +//usage: "\n -O FILE Save to FILE ('-' for stdout)" +//usage: "\n -U STR Use STR for User-Agent header" +//usage: "\n -Y Use proxy ('on' or 'off')" + #include "libbb.h" +#if 0 +# define log_io(...) bb_error_msg(__VA_ARGS__) +#else +# define log_io(...) ((void)0) +#endif + + struct host_info { - // May be used if we ever will want to free() all xstrdup()s... - /* char *allocated; */ + char *allocated; const char *path; const char *user; char *host; @@ -19,7 +53,7 @@ struct host_info { }; -/* Globals (can be accessed from signal handlers) */ +/* Globals */ struct globals { off_t content_len; /* Content-length of the file */ off_t beg_range; /* Range at which continue begins */ @@ -28,49 +62,79 @@ struct globals { const char *curfile; /* Name of current file being transferred */ bb_progress_t pmt; #endif + char *dir_prefix; +#if ENABLE_FEATURE_WGET_LONG_OPTIONS + char *post_data; + char *extra_headers; +#endif + char *fname_out; /* where to direct output (-O) */ + const char *proxy_flag; /* Use proxies if env vars are set */ + const char *user_agent; /* "User-Agent" header field */ +#if ENABLE_FEATURE_WGET_TIMEOUT + unsigned timeout_seconds; +#endif + int output_fd; + int o_flags; smallint chunked; /* chunked transfer encoding */ smallint got_clen; /* got content-length: from server */ -}; -#define G (*(struct globals*)&bb_common_bufsiz1) -struct BUG_G_too_big { - char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1]; -}; -#define INIT_G() do { } while (0) + /* Local downloads do benefit from big buffer. + * With 512 byte buffer, it was measured to be + * an order of magnitude slower than with big one. + */ + uint64_t just_to_align_next_member; + char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024]; +} FIX_ALIASING; +#define G (*ptr_to_globals) +#define INIT_G() do { \ + SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \ + IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \ +} while (0) -#if ENABLE_FEATURE_WGET_STATUSBAR +/* Must match option string! */ +enum { + WGET_OPT_CONTINUE = (1 << 0), + WGET_OPT_SPIDER = (1 << 1), + WGET_OPT_QUIET = (1 << 2), + WGET_OPT_OUTNAME = (1 << 3), + WGET_OPT_PREFIX = (1 << 4), + WGET_OPT_PROXY = (1 << 5), + WGET_OPT_USER_AGENT = (1 << 6), + WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7), + WGET_OPT_RETRIES = (1 << 8), + WGET_OPT_PASSIVE = (1 << 9), + WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS, + WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS, +}; +enum { + PROGRESS_START = -1, + PROGRESS_END = 0, + PROGRESS_BUMP = 1, +}; +#if ENABLE_FEATURE_WGET_STATUSBAR static void progress_meter(int flag) { - /* We can be called from signal handler */ - int save_errno = errno; + if (option_mask32 & WGET_OPT_QUIET) + return; - if (flag == -1) { /* first call to progress_meter */ - bb_progress_init(&G.pmt); - } + if (flag == PROGRESS_START) + bb_progress_init(&G.pmt, G.curfile); - bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred, - G.chunked ? 0 : G.content_len + G.beg_range); + bb_progress_update(&G.pmt, + G.beg_range, + G.transferred, + (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len + ); - if (flag == 0) { - /* last call to progress_meter */ - alarm(0); - fputc('\n', stderr); + if (flag == PROGRESS_END) { + bb_progress_free(&G.pmt); + bb_putchar_stderr('\n'); G.transferred = 0; - } else { - if (flag == -1) { /* first call to progress_meter */ - signal_SA_RESTART_empty_mask(SIGALRM, progress_meter); - } - alarm(1); } - - errno = save_errno; } - -#else /* FEATURE_WGET_STATUSBAR */ - +#else static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { } - #endif @@ -110,48 +174,15 @@ static void strip_ipv6_scope_id(char *host) overlapping_strcpy(scope, cp); } -/* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read, - * and a short count if an eof or non-interrupt error is encountered. */ -static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream) -{ - size_t ret; - char *p = (char*)ptr; - - do { - clearerr(stream); - errno = 0; - ret = fread(p, 1, nmemb, stream); - p += ret; - nmemb -= ret; - } while (nmemb && ferror(stream) && errno == EINTR); - - return p - (char*)ptr; -} - -/* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM. - * Returns S, or NULL if an eof or non-interrupt error is encountered. */ -static char *safe_fgets(char *s, int size, FILE *stream) -{ - char *ret; - - do { - clearerr(stream); - errno = 0; - ret = fgets(s, size, stream); - } while (ret == NULL && ferror(stream) && errno == EINTR); - - return ret; -} - #if ENABLE_FEATURE_WGET_AUTHENTICATION -/* Base64-encode character string. buf is assumed to be char buf[512]. */ -static char *base64enc_512(char buf[512], const char *str) +/* Base64-encode character string. */ +static char *base64enc(const char *str) { unsigned len = strlen(str); - if (len > 512/4*3 - 10) /* paranoia */ - len = 512/4*3 - 10; - bb_uuencode(buf, str, len, bb_uuenc_tbl_base64); - return buf; + if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */ + len = sizeof(G.wget_buf)/4*3 - 10; + bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64); + return G.wget_buf; } #endif @@ -172,43 +203,58 @@ static FILE *open_socket(len_and_sockaddr *lsa) /* hopefully it understands what ESPIPE means... */ fp = fdopen(xconnect_stream(lsa), "r+"); if (fp == NULL) - bb_perror_msg_and_die("fdopen"); + bb_perror_msg_and_die(bb_msg_memory_exhausted); return fp; } -static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf) +/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */ +static char fgets_and_trim(FILE *fp) +{ + char c; + char *buf_ptr; + + if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL) + bb_perror_msg_and_die("error getting response"); + + buf_ptr = strchrnul(G.wget_buf, '\n'); + c = *buf_ptr; + *buf_ptr = '\0'; + buf_ptr = strchrnul(G.wget_buf, '\r'); + *buf_ptr = '\0'; + + log_io("< %s", G.wget_buf); + + return c; +} + +static int ftpcmd(const char *s1, const char *s2, FILE *fp) { int result; if (s1) { - if (!s2) s2 = ""; + if (!s2) + s2 = ""; fprintf(fp, "%s%s\r\n", s1, s2); fflush(fp); + log_io("> %s%s", s1, s2); } do { - char *buf_ptr; - - if (fgets(buf, 510, fp) == NULL) { - bb_perror_msg_and_die("error getting response"); - } - buf_ptr = strstr(buf, "\r\n"); - if (buf_ptr) { - *buf_ptr = '\0'; - } - } while (!isdigit(buf[0]) || buf[3] != ' '); + fgets_and_trim(fp); + } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' '); - buf[3] = '\0'; - result = xatoi_u(buf); - buf[3] = ' '; + G.wget_buf[3] = '\0'; + result = xatoi_positive(G.wget_buf); + G.wget_buf[3] = ' '; return result; } -static void parse_url(char *src_url, struct host_info *h) +static void parse_url(const char *src_url, struct host_info *h) { char *url, *p, *sp; - /* h->allocated = */ url = xstrdup(src_url); + free(h->allocated); + h->allocated = url = xstrdup(src_url); if (strncmp(url, "http://", 7) == 0) { h->port = bb_lookup_port("http", "tcp", 80); @@ -256,101 +302,65 @@ static void parse_url(char *src_url, struct host_info *h) sp = strrchr(h->host, '@'); if (sp != NULL) { - h->user = h->host; + // URL-decode "user:password" string before base64-encoding: + // wget http://test:my%20pass@example.com should send + // Authorization: Basic dGVzdDpteSBwYXNz + // which decodes to "test:my pass". + // Standard wget and curl do this too. *sp = '\0'; + h->user = percent_decode_in_place(h->host, /*strict:*/ 0); h->host = sp + 1; } sp = h->host; } -static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/) +static char *gethdr(FILE *fp) { char *s, *hdrval; int c; - /* *istrunc = 0; */ - /* retrieve header line */ - if (fgets(buf, bufsiz, fp) == NULL) - return NULL; + c = fgets_and_trim(fp); - /* see if we are at the end of the headers */ - for (s = buf; *s == '\r'; ++s) - continue; - if (*s == '\n') + /* end of the headers? */ + if (G.wget_buf[0] == '\0') return NULL; /* convert the header name to lower case */ - for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) - *s = tolower(*s); + for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) { + /* tolower for "A-Z", no-op for "0-9a-z-." */ + *s |= 0x20; + } /* verify we are at the end of the header name */ if (*s != ':') - bb_error_msg_and_die("bad header line: %s", sanitize_string(buf)); + bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf)); /* locate the start of the header value */ *s++ = '\0'; hdrval = skip_whitespace(s); - /* locate the end of header */ - while (*s && *s != '\r' && *s != '\n') - ++s; - - /* end of header found */ - if (*s) { - *s = '\0'; - return hdrval; + if (c != '\n') { + /* Rats! The buffer isn't big enough to hold the entire header value */ + while (c = getc(fp), c != EOF && c != '\n') + continue; } - /* Rats! The buffer isn't big enough to hold the entire header value */ - while (c = getc(fp), c != EOF && c != '\n') - continue; - /* *istrunc = 1; */ return hdrval; } -#if ENABLE_FEATURE_WGET_LONG_OPTIONS -static char *URL_escape(const char *str) +static void reset_beg_range_to_zero(void) { - /* URL encode, see RFC 2396 */ - char *dst; - char *res = dst = xmalloc(strlen(str) * 3 + 1); - unsigned char c; - - while (1) { - c = *str++; - if (c == '\0' - /* || strchr("!&'()*-.=_~", c) - more code */ - || c == '!' - || c == '&' - || c == '\'' - || c == '(' - || c == ')' - || c == '*' - || c == '-' - || c == '.' - || c == '=' - || c == '_' - || c == '~' - || (c >= '0' && c <= '9') - || ((c|0x20) >= 'a' && (c|0x20) <= 'z') - ) { - *dst++ = c; - if (c == '\0') - return res; - } else { - *dst++ = '%'; - *dst++ = bb_hexdigits_upcase[c >> 4]; - *dst++ = bb_hexdigits_upcase[c & 0xf]; - } - } + bb_error_msg("restart failed"); + G.beg_range = 0; + xlseek(G.output_fd, 0, SEEK_SET); + /* Done at the end instead: */ + /* ftruncate(G.output_fd, 0); */ } -#endif static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa) { - char buf[512]; FILE *sfp; char *str; int port; @@ -359,8 +369,8 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ target->user = xstrdup("anonymous:busybox@"); sfp = open_socket(lsa); - if (ftpcmd(NULL, NULL, sfp, buf) != 220) - bb_error_msg_and_die("%s", sanitize_string(buf+4)); + if (ftpcmd(NULL, NULL, sfp) != 220) + bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4)); /* * Splitting username:password pair, @@ -369,24 +379,24 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ str = strchr(target->user, ':'); if (str) *str++ = '\0'; - switch (ftpcmd("USER ", target->user, sfp, buf)) { + switch (ftpcmd("USER ", target->user, sfp)) { case 230: break; case 331: - if (ftpcmd("PASS ", str, sfp, buf) == 230) + if (ftpcmd("PASS ", str, sfp) == 230) break; /* fall through (failed login) */ default: - bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4)); + bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4)); } - ftpcmd("TYPE I", NULL, sfp, buf); + ftpcmd("TYPE I", NULL, sfp); /* * Querying file size */ - if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) { - G.content_len = BB_STRTOOFF(buf+4, NULL, 10); + if (ftpcmd("SIZE ", target->path, sfp) == 213) { + G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10); if (G.content_len < 0 || errno) { bb_error_msg_and_die("SIZE value is garbage"); } @@ -396,209 +406,222 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ /* * Entering passive mode */ - if (ftpcmd("PASV", NULL, sfp, buf) != 227) { + if (ftpcmd("PASV", NULL, sfp) != 227) { pasv_error: - bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf)); + bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf)); } // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage] // Server's IP is N1.N2.N3.N4 (we ignore it) // Server's port for data connection is P1*256+P2 - str = strrchr(buf, ')'); + str = strrchr(G.wget_buf, ')'); if (str) str[0] = '\0'; - str = strrchr(buf, ','); + str = strrchr(G.wget_buf, ','); if (!str) goto pasv_error; port = xatou_range(str+1, 0, 255); *str = '\0'; - str = strrchr(buf, ','); + str = strrchr(G.wget_buf, ','); if (!str) goto pasv_error; port += xatou_range(str+1, 0, 255) * 256; - set_nport(lsa, htons(port)); + set_nport(&lsa->u.sa, htons(port)); *dfpp = open_socket(lsa); - if (G.beg_range) { - sprintf(buf, "REST %"OFF_FMT"u", G.beg_range); - if (ftpcmd(buf, NULL, sfp, buf) == 350) + if (G.beg_range != 0) { + sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range); + if (ftpcmd(G.wget_buf, NULL, sfp) == 350) G.content_len -= G.beg_range; + else + reset_beg_range_to_zero(); } - if (ftpcmd("RETR ", target->path, sfp, buf) > 150) - bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf)); + if (ftpcmd("RETR ", target->path, sfp) > 150) + bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf)); return sfp; } -/* Must match option string! */ -enum { - WGET_OPT_CONTINUE = (1 << 0), - WGET_OPT_SPIDER = (1 << 1), - WGET_OPT_QUIET = (1 << 2), - WGET_OPT_OUTNAME = (1 << 3), - WGET_OPT_PREFIX = (1 << 4), - WGET_OPT_PROXY = (1 << 5), - WGET_OPT_USER_AGENT = (1 << 6), - WGET_OPT_RETRIES = (1 << 7), - WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 8), - WGET_OPT_PASSIVE = (1 << 9), - WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS, - WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS, -}; - -static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd) +static void NOINLINE retrieve_file_data(FILE *dfp) { - char buf[512]; - - if (!(option_mask32 & WGET_OPT_QUIET)) - progress_meter(-1); +#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT +# if ENABLE_FEATURE_WGET_TIMEOUT + unsigned second_cnt = G.timeout_seconds; +# endif + struct pollfd polldata; + + polldata.fd = fileno(dfp); + polldata.events = POLLIN | POLLPRI; +#endif + progress_meter(PROGRESS_START); if (G.chunked) goto get_clen; /* Loops only if chunked */ while (1) { + +#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT + /* Must use nonblocking I/O, otherwise fread will loop + * and *block* until it reads full buffer, + * which messes up progress bar and/or timeout logic. + * Because of nonblocking I/O, we need to dance + * very carefully around EAGAIN. See explanation at + * clearerr() calls. + */ + ndelay_on(polldata.fd); +#endif while (1) { int n; unsigned rdsz; - rdsz = sizeof(buf); +#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT + /* fread internally uses read loop, which in our case + * is usually exited when we get EAGAIN. + * In this case, libc sets error marker on the stream. + * Need to clear it before next fread to avoid possible + * rare false positive ferror below. Rare because usually + * fread gets more than zero bytes, and we don't fall + * into if (n <= 0) ... + */ + clearerr(dfp); +#endif + errno = 0; + rdsz = sizeof(G.wget_buf); if (G.got_clen) { - if (G.content_len < sizeof(buf)) { + if (G.content_len < (off_t)sizeof(G.wget_buf)) { if ((int)G.content_len <= 0) break; rdsz = (unsigned)G.content_len; } } - n = safe_fread(buf, rdsz, dfp); - if (n <= 0) { + n = fread(G.wget_buf, 1, rdsz, dfp); + + if (n > 0) { + xwrite(G.output_fd, G.wget_buf, n); +#if ENABLE_FEATURE_WGET_STATUSBAR + G.transferred += n; +#endif + if (G.got_clen) { + G.content_len -= n; + if (G.content_len == 0) + break; + } +#if ENABLE_FEATURE_WGET_TIMEOUT + second_cnt = G.timeout_seconds; +#endif + continue; + } + + /* n <= 0. + * man fread: + * If error occurs, or EOF is reached, the return value + * is a short item count (or zero). + * fread does not distinguish between EOF and error. + */ + if (errno != EAGAIN) { if (ferror(dfp)) { - /* perror will not work: ferror doesn't set errno */ - bb_error_msg_and_die(bb_msg_read_error); + progress_meter(PROGRESS_END); + bb_perror_msg_and_die(bb_msg_read_error); } - break; + break; /* EOF, not error */ } - xwrite(output_fd, buf, n); -#if ENABLE_FEATURE_WGET_STATUSBAR - G.transferred += n; + +#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT + /* It was EAGAIN. There is no data. Wait up to one second + * then abort if timed out, or update the bar and try reading again. + */ + if (safe_poll(&polldata, 1, 1000) == 0) { +# if ENABLE_FEATURE_WGET_TIMEOUT + if (second_cnt != 0 && --second_cnt == 0) { + progress_meter(PROGRESS_END); + bb_error_msg_and_die("download timed out"); + } +# endif + /* We used to loop back to poll here, + * but there is no great harm in letting fread + * to try reading anyway. + */ + } + /* Need to do it _every_ second for "stalled" indicator + * to be shown properly. + */ + progress_meter(PROGRESS_BUMP); #endif - if (G.got_clen) - G.content_len -= n; - } + } /* while (reading data) */ +#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT + clearerr(dfp); + ndelay_off(polldata.fd); /* else fgets can get very unhappy */ +#endif if (!G.chunked) break; - safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */ + fgets_and_trim(dfp); /* Eat empty line */ get_clen: - safe_fgets(buf, sizeof(buf), dfp); - G.content_len = STRTOOFF(buf, NULL, 16); + fgets_and_trim(dfp); + G.content_len = STRTOOFF(G.wget_buf, NULL, 16); /* FIXME: error check? */ if (G.content_len == 0) break; /* all done! */ G.got_clen = 1; + /* + * Note that fgets may result in some data being buffered in dfp. + * We loop back to fread, which will retrieve this data. + * Also note that code has to be arranged so that fread + * is done _before_ one-second poll wait - poll doesn't know + * about stdio buffering and can result in spurious one second waits! + */ + } + + /* If -c failed, we restart from the beginning, + * but we do not truncate file then, we do it only now, at the end. + * This lets user to ^C if his 99% complete 10 GB file download + * failed to restart *without* losing the almost complete file. + */ + { + off_t pos = lseek(G.output_fd, 0, SEEK_CUR); + if (pos != (off_t)-1) + ftruncate(G.output_fd, pos); } - if (!(option_mask32 & WGET_OPT_QUIET)) - progress_meter(0); + /* Draw full bar and free its resources */ + G.chunked = 0; /* makes it show 100% even for chunked download */ + G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */ + progress_meter(PROGRESS_END); } -int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; -int wget_main(int argc UNUSED_PARAM, char **argv) +static void download_one_url(const char *url) { - char buf[512]; - struct host_info server, target; - len_and_sockaddr *lsa; - unsigned opt; + bool use_proxy; /* Use proxies if env vars are set */ int redir_limit; - char *proxy = NULL; - char *dir_prefix = NULL; -#if ENABLE_FEATURE_WGET_LONG_OPTIONS - char *post_data; - char *extra_headers = NULL; - llist_t *headers_llist = NULL; -#endif + len_and_sockaddr *lsa; FILE *sfp; /* socket to web/ftp server */ FILE *dfp; /* socket to ftp server (data) */ - char *fname_out; /* where to direct output (-O) */ - int output_fd = -1; - bool use_proxy; /* Use proxies if env vars are set */ - const char *proxy_flag = "on"; /* Use proxies if env vars are set */ - const char *user_agent = "Wget";/* "User-Agent" header field */ - - static const char keywords[] ALIGN1 = - "content-length\0""transfer-encoding\0""chunked\0""location\0"; - enum { - KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location - }; -#if ENABLE_FEATURE_WGET_LONG_OPTIONS - static const char wget_longopts[] ALIGN1 = - /* name, has_arg, val */ - "continue\0" No_argument "c" - "spider\0" No_argument "s" - "quiet\0" No_argument "q" - "output-document\0" Required_argument "O" - "directory-prefix\0" Required_argument "P" - "proxy\0" Required_argument "Y" - "user-agent\0" Required_argument "U" - /* Ignored: */ - // "tries\0" Required_argument "t" - // "timeout\0" Required_argument "T" - /* Ignored (we always use PASV): */ - "passive-ftp\0" No_argument "\xff" - "header\0" Required_argument "\xfe" - "post-data\0" Required_argument "\xfd" - ; -#endif - - INIT_G(); - -#if ENABLE_FEATURE_WGET_LONG_OPTIONS - applet_long_options = wget_longopts; -#endif - /* server.allocated = target.allocated = NULL; */ - opt_complementary = "-1" IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::"); - opt = getopt32(argv, "csqO:P:Y:U:" /*ignored:*/ "t:T:", - &fname_out, &dir_prefix, - &proxy_flag, &user_agent, - NULL, /* -t RETRIES */ - NULL /* -T NETWORK_READ_TIMEOUT */ - IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist) - IF_FEATURE_WGET_LONG_OPTIONS(, &post_data) - ); -#if ENABLE_FEATURE_WGET_LONG_OPTIONS - if (headers_llist) { - int size = 1; - char *cp; - llist_t *ll = headers_llist; - while (ll) { - size += strlen(ll->data) + 2; - ll = ll->link; - } - extra_headers = cp = xmalloc(size); - while (headers_llist) { - cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist)); - } - } -#endif - - /* TODO: compat issue: should handle "wget URL1 URL2..." */ - + char *proxy = NULL; + char *fname_out_alloc; + char *redirected_path = NULL; + struct host_info server; + struct host_info target; + + server.allocated = NULL; + target.allocated = NULL; + server.user = NULL; target.user = NULL; - parse_url(argv[optind], &target); + + parse_url(url, &target); /* Use the proxy if necessary */ - use_proxy = (strcmp(proxy_flag, "off") != 0); + use_proxy = (strcmp(G.proxy_flag, "off") != 0); if (use_proxy) { proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy"); - if (proxy && proxy[0]) { + use_proxy = (proxy && proxy[0]); + if (use_proxy) parse_url(proxy, &server); - } else { - use_proxy = 0; - } } if (!use_proxy) { server.port = target.port; if (ENABLE_FEATURE_IPV6) { - server.host = xstrdup(target.host); + //free(server.allocated); - can't be non-NULL + server.host = server.allocated = xstrdup(target.host); } else { server.host = target.host; } @@ -607,50 +630,48 @@ int wget_main(int argc UNUSED_PARAM, char **argv) if (ENABLE_FEATURE_IPV6) strip_ipv6_scope_id(target.host); - /* Guess an output filename, if there was no -O FILE */ - if (!(opt & WGET_OPT_OUTNAME)) { - fname_out = bb_get_last_path_component_nostrip(target.path); + /* If there was no -O FILE, guess output filename */ + fname_out_alloc = NULL; + if (!(option_mask32 & WGET_OPT_OUTNAME)) { + G.fname_out = bb_get_last_path_component_nostrip(target.path); /* handle "wget http://kernel.org//" */ - if (fname_out[0] == '/' || !fname_out[0]) - fname_out = (char*)"index.html"; + if (G.fname_out[0] == '/' || !G.fname_out[0]) + G.fname_out = (char*)"index.html"; /* -P DIR is considered only if there was no -O FILE */ - if (dir_prefix) - fname_out = concat_path_file(dir_prefix, fname_out); - } else { - if (LONE_DASH(fname_out)) { - /* -O - */ - output_fd = 1; - opt &= ~WGET_OPT_CONTINUE; + if (G.dir_prefix) + G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out); + else { + /* redirects may free target.path later, need to make a copy */ + G.fname_out = fname_out_alloc = xstrdup(G.fname_out); } } #if ENABLE_FEATURE_WGET_STATUSBAR - G.curfile = bb_get_last_path_component_nostrip(fname_out); + G.curfile = bb_get_last_path_component_nostrip(G.fname_out); #endif - /* Impossible? - if ((opt & WGET_OPT_CONTINUE) && !fname_out) - bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)"); - */ - /* Determine where to start transfer */ - if (opt & WGET_OPT_CONTINUE) { - output_fd = open(fname_out, O_WRONLY); - if (output_fd >= 0) { - G.beg_range = xlseek(output_fd, 0, SEEK_END); + G.beg_range = 0; + if (option_mask32 & WGET_OPT_CONTINUE) { + G.output_fd = open(G.fname_out, O_WRONLY); + if (G.output_fd >= 0) { + G.beg_range = xlseek(G.output_fd, 0, SEEK_END); } /* File doesn't exist. We do not create file here yet. - * We are not sure it exists on remove side */ + * We are not sure it exists on remote side */ } redir_limit = 5; resolve_lsa: lsa = xhost2sockaddr(server.host, server.port); - if (!(opt & WGET_OPT_QUIET)) { + if (!(option_mask32 & WGET_OPT_QUIET)) { char *s = xmalloc_sockaddr2dotted(&lsa->u.sa); fprintf(stderr, "Connecting to %s (%s)\n", server.host, s); free(s); } establish_session: + /*G.content_len = 0; - redundant, got_clen = 0 is enough */ + G.got_clen = 0; + G.chunked = 0; if (use_proxy || !target.is_ftp) { /* * HTTP session @@ -658,6 +679,7 @@ int wget_main(int argc UNUSED_PARAM, char **argv) char *str; int status; + /* Open socket to http server */ sfp = open_socket(lsa); @@ -667,54 +689,61 @@ int wget_main(int argc UNUSED_PARAM, char **argv) target.is_ftp ? "f" : "ht", target.host, target.path); } else { - if (opt & WGET_OPT_POST_DATA) + if (option_mask32 & WGET_OPT_POST_DATA) fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path); else fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path); } fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n", - target.host, user_agent); + target.host, G.user_agent); + + /* Ask server to close the connection as soon as we are done + * (IOW: we do not intend to send more requests) + */ + fprintf(sfp, "Connection: close\r\n"); #if ENABLE_FEATURE_WGET_AUTHENTICATION if (target.user) { fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6, - base64enc_512(buf, target.user)); + base64enc(target.user)); } if (use_proxy && server.user) { fprintf(sfp, "Proxy-Authorization: Basic %s\r\n", - base64enc_512(buf, server.user)); + base64enc(server.user)); } #endif - if (G.beg_range) + if (G.beg_range != 0) fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range); + #if ENABLE_FEATURE_WGET_LONG_OPTIONS - if (extra_headers) - fputs(extra_headers, sfp); - - if (opt & WGET_OPT_POST_DATA) { - char *estr = URL_escape(post_data); - fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n"); - fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s", - (int) strlen(estr), estr); - /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/ - /*fprintf(sfp, "%s\r\n", estr);*/ - free(estr); + if (G.extra_headers) + fputs(G.extra_headers, sfp); + + if (option_mask32 & WGET_OPT_POST_DATA) { + fprintf(sfp, + "Content-Type: application/x-www-form-urlencoded\r\n" + "Content-Length: %u\r\n" + "\r\n" + "%s", + (int) strlen(G.post_data), G.post_data + ); } else #endif - { /* If "Connection:" is needed, document why */ - fprintf(sfp, /* "Connection: close\r\n" */ "\r\n"); + { + fprintf(sfp, "\r\n"); } + fflush(sfp); + /* * Retrieve HTTP response line and check for "200" status code. */ read_response: - if (fgets(buf, sizeof(buf), sfp) == NULL) - bb_error_msg_and_die("no response from server"); + fgets_and_trim(sfp); - str = buf; + str = G.wget_buf; str = skip_non_whitespace(str); str = skip_whitespace(str); // FIXME: no error check @@ -723,7 +752,7 @@ int wget_main(int argc UNUSED_PARAM, char **argv) switch (status) { case 0: case 100: - while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL) + while (gethdr(sfp) != NULL) /* eat all remaining headers */; goto read_response; case 200: @@ -752,33 +781,48 @@ However, in real world it was observed that some web servers (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero. */ case 204: + if (G.beg_range != 0) { + /* "Range:..." was not honored by the server. + * Restart download from the beginning. + */ + reset_beg_range_to_zero(); + } break; - case 300: /* redirection */ + case 300: /* redirection */ case 301: case 302: case 303: break; - case 206: - if (G.beg_range) + case 206: /* Partial Content */ + if (G.beg_range != 0) + /* "Range:..." worked. Good. */ break; + /* Partial Content even though we did not ask for it??? */ /* fall through */ default: - bb_error_msg_and_die("server returned error: %s", sanitize_string(buf)); + bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf)); } /* * Retrieve HTTP headers. */ - while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) { - /* gethdr converted "FOO:" string to lowercase */ + while ((str = gethdr(sfp)) != NULL) { + static const char keywords[] ALIGN1 = + "content-length\0""transfer-encoding\0""location\0"; + enum { + KEY_content_length = 1, KEY_transfer_encoding, KEY_location + }; smalluint key; + + /* gethdr converted "FOO:" string to lowercase */ + /* strip trailing whitespace */ char *s = strchrnul(str, '\0') - 1; while (s >= str && (*s == ' ' || *s == '\t')) { *s = '\0'; s--; } - key = index_in_strings(keywords, buf) + 1; + key = index_in_strings(keywords, G.wget_buf) + 1; if (key == KEY_content_length) { G.content_len = BB_STRTOOFF(str, NULL, 10); if (G.content_len < 0 || errno) { @@ -788,23 +832,23 @@ However, in real world it was observed that some web servers continue; } if (key == KEY_transfer_encoding) { - if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked) + if (strcmp(str_tolower(str), "chunked") != 0) bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str)); - G.chunked = G.got_clen = 1; + G.chunked = 1; } if (key == KEY_location && status >= 300) { if (--redir_limit == 0) bb_error_msg_and_die("too many redirections"); fclose(sfp); - G.got_clen = 0; - G.chunked = 0; - if (str[0] == '/') - /* free(target.allocated); */ - target.path = /* target.allocated = */ xstrdup(str+1); + if (str[0] == '/') { + free(redirected_path); + target.path = redirected_path = xstrdup(str+1); /* lsa stays the same: it's on the same server */ - else { + } else { parse_url(str, &target); if (!use_proxy) { + free(server.allocated); + server.allocated = NULL; server.host = target.host; /* strip_ipv6_scope_id(target.host); - no! */ /* we assume remote never gives us IPv6 addr with scope id */ @@ -829,30 +873,119 @@ However, in real world it was observed that some web servers sfp = prepare_ftp_session(&dfp, &target, lsa); } - if (opt & WGET_OPT_SPIDER) { - if (ENABLE_FEATURE_CLEAN_UP) - fclose(sfp); - return EXIT_SUCCESS; - } + free(lsa); - if (output_fd < 0) { - int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL; - /* compat with wget: -O FILE can overwrite */ - if (opt & WGET_OPT_OUTNAME) - o_flags = O_WRONLY | O_CREAT | O_TRUNC; - output_fd = xopen(fname_out, o_flags); + if (!(option_mask32 & WGET_OPT_SPIDER)) { + if (G.output_fd < 0) + G.output_fd = xopen(G.fname_out, G.o_flags); + retrieve_file_data(dfp); + if (!(option_mask32 & WGET_OPT_OUTNAME)) { + xclose(G.output_fd); + G.output_fd = -1; + } } - retrieve_file_data(dfp, output_fd); - xclose(output_fd); - if (dfp != sfp) { - /* It's ftp. Close it properly */ + /* It's ftp. Close data connection properly */ fclose(dfp); - if (ftpcmd(NULL, NULL, sfp, buf) != 226) - bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4)); - /* ftpcmd("QUIT", NULL, sfp, buf); - why bother? */ + if (ftpcmd(NULL, NULL, sfp) != 226) + bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4)); + /* ftpcmd("QUIT", NULL, sfp); - why bother? */ + } + fclose(sfp); + + free(server.allocated); + free(target.allocated); + free(fname_out_alloc); + free(redirected_path); +} + +int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int wget_main(int argc UNUSED_PARAM, char **argv) +{ +#if ENABLE_FEATURE_WGET_LONG_OPTIONS + static const char wget_longopts[] ALIGN1 = + /* name, has_arg, val */ + "continue\0" No_argument "c" +//FIXME: -s isn't --spider, it's --save-headers! + "spider\0" No_argument "s" + "quiet\0" No_argument "q" + "output-document\0" Required_argument "O" + "directory-prefix\0" Required_argument "P" + "proxy\0" Required_argument "Y" + "user-agent\0" Required_argument "U" +#if ENABLE_FEATURE_WGET_TIMEOUT + "timeout\0" Required_argument "T" +#endif + /* Ignored: */ + // "tries\0" Required_argument "t" + /* Ignored (we always use PASV): */ + "passive-ftp\0" No_argument "\xff" + "header\0" Required_argument "\xfe" + "post-data\0" Required_argument "\xfd" + /* Ignored (we don't do ssl) */ + "no-check-certificate\0" No_argument "\xfc" + /* Ignored (we don't support caching) */ + "no-cache\0" No_argument "\xfb" + ; +#endif + +#if ENABLE_FEATURE_WGET_LONG_OPTIONS + llist_t *headers_llist = NULL; +#endif + + INIT_G(); + + IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) + G.proxy_flag = "on"; /* use proxies if env vars are set */ + G.user_agent = "Wget"; /* "User-Agent" header field */ + +#if ENABLE_FEATURE_WGET_LONG_OPTIONS + applet_long_options = wget_longopts; +#endif + opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::"); + getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:", + &G.fname_out, &G.dir_prefix, + &G.proxy_flag, &G.user_agent, + IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL), + NULL /* -t RETRIES */ + IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist) + IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data) + ); + argv += optind; + +#if ENABLE_FEATURE_WGET_LONG_OPTIONS + if (headers_llist) { + int size = 1; + char *cp; + llist_t *ll = headers_llist; + while (ll) { + size += strlen(ll->data) + 2; + ll = ll->link; + } + G.extra_headers = cp = xmalloc(size); + while (headers_llist) { + cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist)); + } } +#endif + + G.output_fd = -1; + G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL; + if (G.fname_out) { /* -O FILE ? */ + if (LONE_DASH(G.fname_out)) { /* -O - ? */ + G.output_fd = 1; + option_mask32 &= ~WGET_OPT_CONTINUE; + } + /* compat with wget: -O FILE can overwrite */ + G.o_flags = O_WRONLY | O_CREAT | O_TRUNC; + } + + while (*argv) + download_one_url(*argv++); + + if (G.output_fd >= 0) + xclose(G.output_fd); return EXIT_SUCCESS; }