* wget - retrieve a file using HTTP or FTP
*
* Chip Rosenthal Covad Communications <chip@laserlink.net>
+ * Licensed under GPLv2, see file LICENSE in this source tree.
*
+ * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
+ * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
*/
-/* We want libc to give us xxx64 functions also */
-/* http://www.unix.org/version2/whatsnew/lfs20mar.html */
-//#define _LARGEFILE64_SOURCE 1
+//usage:#define wget_trivial_usage
+//usage: IF_FEATURE_WGET_LONG_OPTIONS(
+//usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
+//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
+/* Since we ignore these opts, we don't show them in --help */
+/* //usage: " [--no-check-certificate] [--no-cache]" */
+//usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
+//usage: )
+//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
+//usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
+//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
+//usage: )
+//usage:#define wget_full_usage "\n\n"
+//usage: "Retrieve files via HTTP or FTP\n"
+//usage: "\n -s Spider mode - only check file existence"
+//usage: "\n -c Continue retrieval of aborted transfer"
+//usage: "\n -q Quiet"
+//usage: "\n -P DIR Save to DIR (default .)"
+//usage: IF_FEATURE_WGET_TIMEOUT(
+//usage: "\n -T SEC Network read timeout is SEC seconds"
+//usage: )
+//usage: "\n -O FILE Save to FILE ('-' for stdout)"
+//usage: "\n -U STR Use STR for User-Agent header"
+//usage: "\n -Y Use proxy ('on' or 'off')"
-#include <getopt.h> /* for struct option */
#include "libbb.h"
+#if 0
+# define log_io(...) bb_error_msg(__VA_ARGS__)
+#else
+# define log_io(...) ((void)0)
+#endif
+
+
struct host_info {
- // May be used if we ever will want to free() all xstrdup()s...
- /* char *allocated; */
- char *host;
- int port;
- char *path;
- int is_ftp;
- char *user;
+ char *allocated;
+ const char *path;
+ const char *user;
+ char *host;
+ int port;
+ smallint is_ftp;
};
-static void parse_url(char *url, struct host_info *h);
-static FILE *open_socket(len_and_sockaddr *lsa);
-static char *gethdr(char *buf, size_t bufsiz, FILE *fp, int *istrunc);
-static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf);
-/* Globals (can be accessed from signal handlers */
-static off_t content_len; /* Content-length of the file */
-static off_t beg_range; /* Range at which continue begins */
+/* Globals */
+struct globals {
+ off_t content_len; /* Content-length of the file */
+ off_t beg_range; /* Range at which continue begins */
#if ENABLE_FEATURE_WGET_STATUSBAR
-static off_t transferred; /* Number of bytes transferred so far */
+ off_t transferred; /* Number of bytes transferred so far */
+ const char *curfile; /* Name of current file being transferred */
+ bb_progress_t pmt;
#endif
-static bool chunked; /* chunked transfer encoding */
-#if ENABLE_FEATURE_WGET_STATUSBAR
-static void progressmeter(int flag);
-static const char *curfile; /* Name of current file being transferred */
-enum {
- STALLTIME = 5 /* Seconds when xfer considered "stalled" */
-};
-#else
-static void progressmeter(int flag) {}
+ char *dir_prefix;
+#if ENABLE_FEATURE_WGET_LONG_OPTIONS
+ char *post_data;
+ char *extra_headers;
#endif
+ char *fname_out; /* where to direct output (-O) */
+ const char *proxy_flag; /* Use proxies if env vars are set */
+ const char *user_agent; /* "User-Agent" header field */
+#if ENABLE_FEATURE_WGET_TIMEOUT
+ unsigned timeout_seconds;
+#endif
+ int output_fd;
+ int o_flags;
+ smallint chunked; /* chunked transfer encoding */
+ smallint got_clen; /* got content-length: from server */
+ /* Local downloads do benefit from big buffer.
+ * With 512 byte buffer, it was measured to be
+ * an order of magnitude slower than with big one.
+ */
+ uint64_t just_to_align_next_member;
+ char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
+} FIX_ALIASING;
+#define G (*ptr_to_globals)
+#define INIT_G() do { \
+ SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
+ IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
+} while (0)
-/* Read NMEMB elements of SIZE bytes into PTR from STREAM. Returns the
- * number of elements read, and a short count if an eof or non-interrupt
- * error is encountered. */
-static size_t safe_fread(void *ptr, size_t size, size_t nmemb, FILE *stream)
-{
- size_t ret = 0;
-
- do {
- clearerr(stream);
- ret += fread((char *)ptr + (ret * size), size, nmemb - ret, stream);
- } while (ret < nmemb && ferror(stream) && errno == EINTR);
-
- return ret;
-}
-
-/* Read a line or SIZE - 1 bytes into S, whichever is less, from STREAM.
- * Returns S, or NULL if an eof or non-interrupt error is encountered. */
-static char *safe_fgets(char *s, int size, FILE *stream)
-{
- char *ret;
-
- do {
- clearerr(stream);
- ret = fgets(s, size, stream);
- } while (ret == NULL && ferror(stream) && errno == EINTR);
- return ret;
-}
+/* Must match option string! */
+enum {
+ WGET_OPT_CONTINUE = (1 << 0),
+ WGET_OPT_SPIDER = (1 << 1),
+ WGET_OPT_QUIET = (1 << 2),
+ WGET_OPT_OUTNAME = (1 << 3),
+ WGET_OPT_PREFIX = (1 << 4),
+ WGET_OPT_PROXY = (1 << 5),
+ WGET_OPT_USER_AGENT = (1 << 6),
+ WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
+ WGET_OPT_RETRIES = (1 << 8),
+ WGET_OPT_PASSIVE = (1 << 9),
+ WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
+ WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
+};
-#if ENABLE_FEATURE_WGET_AUTHENTICATION
-/* Base64-encode character string and return the string. */
-static char *base64enc(unsigned char *p, char *buf, int len)
+enum {
+ PROGRESS_START = -1,
+ PROGRESS_END = 0,
+ PROGRESS_BUMP = 1,
+};
+#if ENABLE_FEATURE_WGET_STATUSBAR
+static void progress_meter(int flag)
{
- bb_uuencode(p, buf, len, bb_uuenc_tbl_base64);
- return buf;
+ if (option_mask32 & WGET_OPT_QUIET)
+ return;
+
+ if (flag == PROGRESS_START)
+ bb_progress_init(&G.pmt, G.curfile);
+
+ bb_progress_update(&G.pmt,
+ G.beg_range,
+ G.transferred,
+ (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
+ );
+
+ if (flag == PROGRESS_END) {
+ bb_progress_free(&G.pmt);
+ bb_putchar_stderr('\n');
+ G.transferred = 0;
+ }
}
+#else
+static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
#endif
-int wget_main(int argc, char **argv);
-int wget_main(int argc, char **argv)
-{
- char buf[512];
- struct host_info server, target;
- len_and_sockaddr *lsa;
- int n, status;
- int port;
- int try = 5;
- unsigned opt;
- char *str;
- char *proxy = 0;
- char *dir_prefix = NULL;
-#if ENABLE_FEATURE_WGET_LONG_OPTIONS
- char *extra_headers = NULL;
- llist_t *headers_llist = NULL;
-#endif
- FILE *sfp = NULL; /* socket to web/ftp server */
- FILE *dfp = NULL; /* socket to ftp server (data) */
- char *fname_out = NULL; /* where to direct output (-O) */
- bool got_clen = 0; /* got content-length: from server */
- int output_fd = -1;
- bool use_proxy = 1; /* Use proxies if env vars are set */
- const char *proxy_flag = "on"; /* Use proxies if env vars are set */
- const char *user_agent = "Wget";/* "User-Agent" header field */
- static const char * const keywords[] = {
- "content-length", "transfer-encoding", "chunked", "location", NULL
- };
- enum {
- KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
- };
- enum {
- WGET_OPT_CONTINUE = 0x1,
- WGET_OPT_SPIDER = 0x2,
- WGET_OPT_QUIET = 0x4,
- WGET_OPT_OUTNAME = 0x8,
- WGET_OPT_PREFIX = 0x10,
- WGET_OPT_PROXY = 0x20,
- WGET_OPT_USER_AGENT = 0x40,
- WGET_OPT_PASSIVE = 0x80,
- WGET_OPT_HEADER = 0x100,
- };
-#if ENABLE_FEATURE_WGET_LONG_OPTIONS
- static const struct option wget_long_options[] = {
- /* name, has_arg, flag, val */
- { "continue", no_argument, NULL, 'c' },
- { "spider", no_argument, NULL, 's' },
- { "quiet", no_argument, NULL, 'q' },
- { "output-document", required_argument, NULL, 'O' },
- { "directory-prefix", required_argument, NULL, 'P' },
- { "proxy", required_argument, NULL, 'Y' },
- { "user-agent", required_argument, NULL, 'U' },
- { "passive-ftp", no_argument, NULL, 0xff },
- { "header", required_argument, NULL, 0xfe },
- { 0, 0, 0, 0 }
- };
- applet_long_options = wget_long_options;
-#endif
- /* server.allocated = target.allocated = NULL; */
- opt_complementary = "-1" USE_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
- opt = getopt32(argc, argv, "csqO:P:Y:U:",
- &fname_out, &dir_prefix,
- &proxy_flag, &user_agent
- USE_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
- );
- if (strcmp(proxy_flag, "off") == 0) {
- /* Use the proxy if necessary */
- use_proxy = 0;
- }
-#if ENABLE_FEATURE_WGET_LONG_OPTIONS
- if (headers_llist) {
- int size = 1;
- char *cp;
- llist_t *ll = headers_llist;
- while (ll) {
- size += strlen(ll->data) + 2;
- ll = ll->link;
- }
- extra_headers = cp = xmalloc(size);
- while (headers_llist) {
- cp += sprintf(cp, "%s\r\n", headers_llist->data);
- headers_llist = headers_llist->link;
- }
- }
-#endif
+/* IPv6 knows scoped address types i.e. link and site local addresses. Link
+ * local addresses can have a scope identifier to specify the
+ * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
+ * identifier is only valid on a single node.
+ *
+ * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
+ * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
+ * in the Host header as invalid requests, see
+ * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
+ */
+static void strip_ipv6_scope_id(char *host)
+{
+ char *scope, *cp;
- parse_url(argv[optind], &target);
- server.host = target.host;
- server.port = target.port;
+ /* bbox wget actually handles IPv6 addresses without [], like
+ * wget "http://::1/xxx", but this is not standard.
+ * To save code, _here_ we do not support it. */
- /* Use the proxy if necessary */
- if (use_proxy) {
- proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
- if (proxy && *proxy) {
- parse_url(proxy, &server);
- } else {
- use_proxy = 0;
- }
- }
+ if (host[0] != '[')
+ return; /* not IPv6 */
- /* Guess an output filename */
- if (!fname_out) {
- // Dirty hack. Needed because bb_get_last_path_component
- // will destroy trailing / by storing '\0' in last byte!
- if (!last_char_is(target.path, '/')) {
- fname_out = bb_get_last_path_component(target.path);
-#if ENABLE_FEATURE_WGET_STATUSBAR
- curfile = fname_out;
-#endif
- }
- if (!fname_out || !fname_out[0]) {
- /* bb_get_last_path_component writes
- * to last '/' only. We don't have one here... */
- fname_out = (char*)"index.html";
-#if ENABLE_FEATURE_WGET_STATUSBAR
- curfile = fname_out;
-#endif
- }
- if (dir_prefix != NULL)
- fname_out = concat_path_file(dir_prefix, fname_out);
-#if ENABLE_FEATURE_WGET_STATUSBAR
- } else {
- curfile = bb_get_last_path_component(fname_out);
-#endif
- }
- /* Impossible?
- if ((opt & WGET_OPT_CONTINUE) && !fname_out)
- bb_error_msg_and_die("cannot specify continue (-c) without a filename (-O)"); */
-
- /* Determine where to start transfer */
- if (LONE_DASH(fname_out)) {
- output_fd = 1;
- opt &= ~WGET_OPT_CONTINUE;
- }
- if (opt & WGET_OPT_CONTINUE) {
- output_fd = open(fname_out, O_WRONLY);
- if (output_fd >= 0) {
- beg_range = xlseek(output_fd, 0, SEEK_END);
- }
- /* File doesn't exist. We do not create file here yet.
- We are not sure it exists on remove side */
- }
+ scope = strchr(host, '%');
+ if (!scope)
+ return;
- /* We want to do exactly _one_ DNS lookup, since some
- * sites (i.e. ftp.us.debian.org) use round-robin DNS
- * and we want to connect to only one IP... */
- lsa = xhost2sockaddr(server.host, server.port);
- if (!(opt & WGET_OPT_QUIET)) {
- fprintf(stderr, "Connecting to %s (%s)\n", server.host,
- xmalloc_sockaddr2dotted(&lsa->sa, lsa->len));
- /* We leak result of xmalloc_sockaddr2dotted */
+ /* Remove the IPv6 zone identifier from the host address */
+ cp = strchr(host, ']');
+ if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
+ /* malformed address (not "[xx]:nn" or "[xx]") */
+ return;
}
- if (use_proxy || !target.is_ftp) {
- /*
- * HTTP session
- */
- do {
- got_clen = chunked = 0;
-
- if (!--try)
- bb_error_msg_and_die("too many redirections");
-
- /* Open socket to http server */
- if (sfp) fclose(sfp);
- sfp = open_socket(lsa);
-
- /* Send HTTP request. */
- if (use_proxy) {
- fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
- target.is_ftp ? "f" : "ht", target.host,
- target.path);
- } else {
- fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
- }
-
- fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
- target.host, user_agent);
+ /* cp points to "]...", scope points to "%eth0]..." */
+ overlapping_strcpy(scope, cp);
+}
#if ENABLE_FEATURE_WGET_AUTHENTICATION
- if (target.user) {
- fprintf(sfp, "Authorization: Basic %s\r\n",
- base64enc((unsigned char*)target.user, buf, sizeof(buf)));
- }
- if (use_proxy && server.user) {
- fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
- base64enc((unsigned char*)server.user, buf, sizeof(buf)));
- }
-#endif
-
- if (beg_range)
- fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range);
-#if ENABLE_FEATURE_WGET_LONG_OPTIONS
- if (extra_headers)
- fputs(extra_headers, sfp);
+/* Base64-encode character string. */
+static char *base64enc(const char *str)
+{
+ unsigned len = strlen(str);
+ if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
+ len = sizeof(G.wget_buf)/4*3 - 10;
+ bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
+ return G.wget_buf;
+}
#endif
- fprintf(sfp, "Connection: close\r\n\r\n");
- /*
- * Retrieve HTTP response line and check for "200" status code.
- */
- read_response:
- if (fgets(buf, sizeof(buf), sfp) == NULL)
- bb_error_msg_and_die("no response from server");
-
- str = buf;
- str = skip_non_whitespace(str);
- str = skip_whitespace(str);
- // FIXME: no error check
- // xatou wouldn't work: "200 OK"
- status = atoi(str);
- switch (status) {
- case 0:
- case 100:
- while (gethdr(buf, sizeof(buf), sfp, &n) != NULL)
- /* eat all remaining headers */;
- goto read_response;
- case 200:
- break;
- case 300: /* redirection */
- case 301:
- case 302:
- case 303:
- break;
- case 206:
- if (beg_range)
- break;
- /*FALLTHRU*/
- default:
- /* Show first line only and kill any ESC tricks */
- buf[strcspn(buf, "\n\r\x1b")] = '\0';
- bb_error_msg_and_die("server returned error: %s", buf);
- }
-
- /*
- * Retrieve HTTP headers.
- */
- while ((str = gethdr(buf, sizeof(buf), sfp, &n)) != NULL) {
- /* gethdr did already convert the "FOO:" string to lowercase */
- smalluint key = index_in_str_array(keywords, *&buf) + 1;
- if (key == KEY_content_length) {
- content_len = BB_STRTOOFF(str, NULL, 10);
- if (errno || content_len < 0) {
- bb_error_msg_and_die("content-length %s is garbage", str);
- }
- got_clen = 1;
- continue;
- }
- if (key == KEY_transfer_encoding) {
- if (index_in_str_array(keywords, str_tolower(str)) + 1 != KEY_chunked)
- bb_error_msg_and_die("server wants to do %s transfer encoding", str);
- chunked = got_clen = 1;
- }
- if (key == KEY_location) {
- if (str[0] == '/')
- /* free(target.allocated); */
- target.path = /* target.allocated = */ xstrdup(str+1);
- else {
- parse_url(str, &target);
- if (use_proxy == 0) {
- server.host = target.host;
- server.port = target.port;
- }
- free(lsa);
- lsa = xhost2sockaddr(server.host, server.port);
- break;
- }
- }
- }
- } while (status >= 300);
-
- dfp = sfp;
+static char* sanitize_string(char *s)
+{
+ unsigned char *p = (void *) s;
+ while (*p >= ' ')
+ p++;
+ *p = '\0';
+ return s;
+}
- } else {
+static FILE *open_socket(len_and_sockaddr *lsa)
+{
+ FILE *fp;
- /*
- * FTP session
- */
- if (!target.user)
- target.user = xstrdup("anonymous:busybox@");
+ /* glibc 2.4 seems to try seeking on it - ??! */
+ /* hopefully it understands what ESPIPE means... */
+ fp = fdopen(xconnect_stream(lsa), "r+");
+ if (fp == NULL)
+ bb_perror_msg_and_die(bb_msg_memory_exhausted);
- sfp = open_socket(lsa);
- if (ftpcmd(NULL, NULL, sfp, buf) != 220)
- bb_error_msg_and_die("%s", buf+4);
+ return fp;
+}
- /*
- * Splitting username:password pair,
- * trying to log in
- */
- str = strchr(target.user, ':');
- if (str)
- *(str++) = '\0';
- switch (ftpcmd("USER ", target.user, sfp, buf)) {
- case 230:
- break;
- case 331:
- if (ftpcmd("PASS ", str, sfp, buf) == 230)
- break;
- /* FALLTHRU (failed login) */
- default:
- bb_error_msg_and_die("ftp login: %s", buf+4);
- }
+/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
+static char fgets_and_trim(FILE *fp)
+{
+ char c;
+ char *buf_ptr;
- ftpcmd("TYPE I", NULL, sfp, buf);
+ if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
+ bb_perror_msg_and_die("error getting response");
- /*
- * Querying file size
- */
- if (ftpcmd("SIZE ", target.path, sfp, buf) == 213) {
- content_len = BB_STRTOOFF(buf+4, NULL, 10);
- if (errno || content_len < 0) {
- bb_error_msg_and_die("SIZE value is garbage");
- }
- got_clen = 1;
- }
+ buf_ptr = strchrnul(G.wget_buf, '\n');
+ c = *buf_ptr;
+ *buf_ptr = '\0';
+ buf_ptr = strchrnul(G.wget_buf, '\r');
+ *buf_ptr = '\0';
- /*
- * Entering passive mode
- */
- if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
- pasv_error:
- bb_error_msg_and_die("bad response to %s: %s", "PASV", buf);
- }
- // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
- // Server's IP is N1.N2.N3.N4 (we ignore it)
- // Server's port for data connection is P1*256+P2
- str = strrchr(buf, ')');
- if (str) str[0] = '\0';
- str = strrchr(buf, ',');
- if (!str) goto pasv_error;
- port = xatou_range(str+1, 0, 255);
- *str = '\0';
- str = strrchr(buf, ',');
- if (!str) goto pasv_error;
- port += xatou_range(str+1, 0, 255) * 256;
- set_nport(lsa, htons(port));
- dfp = open_socket(lsa);
-
- if (beg_range) {
- sprintf(buf, "REST %"OFF_FMT"d", beg_range);
- if (ftpcmd(buf, NULL, sfp, buf) == 350)
- content_len -= beg_range;
- }
+ log_io("< %s", G.wget_buf);
- if (ftpcmd("RETR ", target.path, sfp, buf) > 150)
- bb_error_msg_and_die("bad response to RETR: %s", buf);
- }
- if (opt & WGET_OPT_SPIDER) {
- if (ENABLE_FEATURE_CLEAN_UP)
- fclose(sfp);
- goto done;
- }
+ return c;
+}
- /*
- * Retrieve file
- */
- if (chunked) {
- fgets(buf, sizeof(buf), dfp);
- content_len = STRTOOFF(buf, NULL, 16);
- /* FIXME: error check?? */
+static int ftpcmd(const char *s1, const char *s2, FILE *fp)
+{
+ int result;
+ if (s1) {
+ if (!s2)
+ s2 = "";
+ fprintf(fp, "%s%s\r\n", s1, s2);
+ fflush(fp);
+ log_io("> %s%s", s1, s2);
}
- /* Do it before progressmeter (want to have nice error message) */
- if (output_fd < 0)
- output_fd = xopen(fname_out,
- O_WRONLY|O_CREAT|O_EXCL|O_TRUNC);
-
- if (!(opt & WGET_OPT_QUIET))
- progressmeter(-1);
-
do {
- while (content_len > 0 || !got_clen) {
- unsigned rdsz = sizeof(buf);
- if (content_len < sizeof(buf) && (chunked || got_clen))
- rdsz = (unsigned)content_len;
- n = safe_fread(buf, 1, rdsz, dfp);
- if (n <= 0)
- break;
- if (full_write(output_fd, buf, n) != n) {
- bb_perror_msg_and_die(bb_msg_write_error);
- }
-#if ENABLE_FEATURE_WGET_STATUSBAR
- transferred += n;
-#endif
- if (got_clen) {
- content_len -= n;
- }
- }
-
- if (chunked) {
- safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
- safe_fgets(buf, sizeof(buf), dfp);
- content_len = STRTOOFF(buf, NULL, 16);
- /* FIXME: error check? */
- if (content_len == 0) {
- chunked = 0; /* all done! */
- }
- }
-
- if (n == 0 && ferror(dfp)) {
- bb_perror_msg_and_die(bb_msg_read_error);
- }
- } while (chunked);
-
- if (!(opt & WGET_OPT_QUIET))
- progressmeter(1);
+ fgets_and_trim(fp);
+ } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
- if ((use_proxy == 0) && target.is_ftp) {
- fclose(dfp);
- if (ftpcmd(NULL, NULL, sfp, buf) != 226)
- bb_error_msg_and_die("ftp error: %s", buf+4);
- ftpcmd("QUIT", NULL, sfp, buf);
- }
-done:
- exit(EXIT_SUCCESS);
+ G.wget_buf[3] = '\0';
+ result = xatoi_positive(G.wget_buf);
+ G.wget_buf[3] = ' ';
+ return result;
}
-
-static void parse_url(char *src_url, struct host_info *h)
+static void parse_url(const char *src_url, struct host_info *h)
{
char *url, *p, *sp;
- /* h->allocated = */ url = xstrdup(src_url);
+ free(h->allocated);
+ h->allocated = url = xstrdup(src_url);
if (strncmp(url, "http://", 7) == 0) {
h->port = bb_lookup_port("http", "tcp", 80);
h->host = url + 6;
h->is_ftp = 1;
} else
- bb_error_msg_and_die("not an http or ftp url: %s", url);
+ bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
// FYI:
// "Real" wget 'http://busybox.net?var=a/b' sends this request:
p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
if (!sp) {
- /* must be writable because of bb_get_last_path_component() */
- static char nullstr[] = "";
- h->path = nullstr;
+ h->path = "";
} else if (*sp == '/') {
*sp = '\0';
h->path = sp + 1;
// http://busybox.net?login=john@doe is a valid URL
// memmove converts to:
// http:/busybox.nett?login=john@doe...
- memmove(h->host-1, h->host, sp - h->host);
+ memmove(h->host - 1, h->host, sp - h->host);
h->host--;
sp[-1] = '\0';
h->path = sp;
}
+ // We used to set h->user to NULL here, but this interferes
+ // with handling of code 302 ("object was moved")
+
sp = strrchr(h->host, '@');
- h->user = NULL;
if (sp != NULL) {
- h->user = h->host;
+ // URL-decode "user:password" string before base64-encoding:
+ // wget http://test:my%20pass@example.com should send
+ // Authorization: Basic dGVzdDpteSBwYXNz
+ // which decodes to "test:my pass".
+ // Standard wget and curl do this too.
*sp = '\0';
+ h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
h->host = sp + 1;
}
sp = h->host;
}
-
-static FILE *open_socket(len_and_sockaddr *lsa)
-{
- FILE *fp;
-
- /* glibc 2.4 seems to try seeking on it - ??! */
- /* hopefully it understands what ESPIPE means... */
- fp = fdopen(xconnect_stream(lsa), "r+");
- if (fp == NULL)
- bb_perror_msg_and_die("fdopen");
-
- return fp;
-}
-
-
-static char *gethdr(char *buf, size_t bufsiz, FILE *fp, int *istrunc)
+static char *gethdr(FILE *fp)
{
char *s, *hdrval;
int c;
- *istrunc = 0;
-
/* retrieve header line */
- if (fgets(buf, bufsiz, fp) == NULL)
- return NULL;
+ c = fgets_and_trim(fp);
- /* see if we are at the end of the headers */
- for (s = buf; *s == '\r'; ++s)
- ;
- if (s[0] == '\n')
+ /* end of the headers? */
+ if (G.wget_buf[0] == '\0')
return NULL;
/* convert the header name to lower case */
- for (s = buf; isalnum(*s) || *s == '-'; ++s)
- *s = tolower(*s);
+ for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
+ /* tolower for "A-Z", no-op for "0-9a-z-." */
+ *s |= 0x20;
+ }
/* verify we are at the end of the header name */
if (*s != ':')
- bb_error_msg_and_die("bad header line: %s", buf);
+ bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
/* locate the start of the header value */
- for (*s++ = '\0'; *s == ' ' || *s == '\t'; ++s)
- ;
- hdrval = s;
+ *s++ = '\0';
+ hdrval = skip_whitespace(s);
- /* locate the end of header */
- while (*s != '\0' && *s != '\r' && *s != '\n')
- ++s;
-
- /* end of header found */
- if (*s != '\0') {
- *s = '\0';
- return hdrval;
+ if (c != '\n') {
+ /* Rats! The buffer isn't big enough to hold the entire header value */
+ while (c = getc(fp), c != EOF && c != '\n')
+ continue;
}
- /* Rats! The buffer isn't big enough to hold the entire header value. */
- while (c = getc(fp), c != EOF && c != '\n')
- ;
- *istrunc = 1;
return hdrval;
}
-static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
+static void reset_beg_range_to_zero(void)
{
- int result;
- if (s1) {
- if (!s2) s2 = "";
- fprintf(fp, "%s%s\r\n", s1, s2);
- fflush(fp);
+ bb_error_msg("restart failed");
+ G.beg_range = 0;
+ xlseek(G.output_fd, 0, SEEK_SET);
+ /* Done at the end instead: */
+ /* ftruncate(G.output_fd, 0); */
+}
+
+static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
+{
+ FILE *sfp;
+ char *str;
+ int port;
+
+ if (!target->user)
+ target->user = xstrdup("anonymous:busybox@");
+
+ sfp = open_socket(lsa);
+ if (ftpcmd(NULL, NULL, sfp) != 220)
+ bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
+
+ /*
+ * Splitting username:password pair,
+ * trying to log in
+ */
+ str = strchr(target->user, ':');
+ if (str)
+ *str++ = '\0';
+ switch (ftpcmd("USER ", target->user, sfp)) {
+ case 230:
+ break;
+ case 331:
+ if (ftpcmd("PASS ", str, sfp) == 230)
+ break;
+ /* fall through (failed login) */
+ default:
+ bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
}
- do {
- char *buf_ptr;
+ ftpcmd("TYPE I", NULL, sfp);
- if (fgets(buf, 510, fp) == NULL) {
- bb_perror_msg_and_die("error getting response");
- }
- buf_ptr = strstr(buf, "\r\n");
- if (buf_ptr) {
- *buf_ptr = '\0';
+ /*
+ * Querying file size
+ */
+ if (ftpcmd("SIZE ", target->path, sfp) == 213) {
+ G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
+ if (G.content_len < 0 || errno) {
+ bb_error_msg_and_die("SIZE value is garbage");
}
- } while (!isdigit(buf[0]) || buf[3] != ' ');
+ G.got_clen = 1;
+ }
- buf[3] = '\0';
- result = xatoi_u(buf);
- buf[3] = ' ';
- return result;
-}
+ /*
+ * Entering passive mode
+ */
+ if (ftpcmd("PASV", NULL, sfp) != 227) {
+ pasv_error:
+ bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
+ }
+ // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
+ // Server's IP is N1.N2.N3.N4 (we ignore it)
+ // Server's port for data connection is P1*256+P2
+ str = strrchr(G.wget_buf, ')');
+ if (str) str[0] = '\0';
+ str = strrchr(G.wget_buf, ',');
+ if (!str) goto pasv_error;
+ port = xatou_range(str+1, 0, 255);
+ *str = '\0';
+ str = strrchr(G.wget_buf, ',');
+ if (!str) goto pasv_error;
+ port += xatou_range(str+1, 0, 255) * 256;
+ set_nport(&lsa->u.sa, htons(port));
+
+ *dfpp = open_socket(lsa);
+
+ if (G.beg_range != 0) {
+ sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
+ if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
+ G.content_len -= G.beg_range;
+ else
+ reset_beg_range_to_zero();
+ }
-#if ENABLE_FEATURE_WGET_STATUSBAR
-/* Stuff below is from BSD rcp util.c, as added to openshh.
- * Original copyright notice is retained at the end of this file.
- */
-static int
-getttywidth(void)
-{
- int width;
- get_terminal_width_height(0, &width, NULL);
- return width;
+ if (ftpcmd("RETR ", target->path, sfp) > 150)
+ bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
+
+ return sfp;
}
-static void
-updateprogressmeter(int ignore)
+static void NOINLINE retrieve_file_data(FILE *dfp)
{
- int save_errno = errno;
+#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
+# if ENABLE_FEATURE_WGET_TIMEOUT
+ unsigned second_cnt = G.timeout_seconds;
+# endif
+ struct pollfd polldata;
+
+ polldata.fd = fileno(dfp);
+ polldata.events = POLLIN | POLLPRI;
+#endif
+ progress_meter(PROGRESS_START);
- progressmeter(0);
- errno = save_errno;
-}
+ if (G.chunked)
+ goto get_clen;
-static void alarmtimer(int iwait)
-{
- struct itimerval itv;
+ /* Loops only if chunked */
+ while (1) {
- itv.it_value.tv_sec = iwait;
- itv.it_value.tv_usec = 0;
- itv.it_interval = itv.it_value;
- setitimer(ITIMER_REAL, &itv, NULL);
-}
+#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
+ /* Must use nonblocking I/O, otherwise fread will loop
+ * and *block* until it reads full buffer,
+ * which messes up progress bar and/or timeout logic.
+ * Because of nonblocking I/O, we need to dance
+ * very carefully around EAGAIN. See explanation at
+ * clearerr() calls.
+ */
+ ndelay_on(polldata.fd);
+#endif
+ while (1) {
+ int n;
+ unsigned rdsz;
+
+#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
+ /* fread internally uses read loop, which in our case
+ * is usually exited when we get EAGAIN.
+ * In this case, libc sets error marker on the stream.
+ * Need to clear it before next fread to avoid possible
+ * rare false positive ferror below. Rare because usually
+ * fread gets more than zero bytes, and we don't fall
+ * into if (n <= 0) ...
+ */
+ clearerr(dfp);
+#endif
+ errno = 0;
+ rdsz = sizeof(G.wget_buf);
+ if (G.got_clen) {
+ if (G.content_len < (off_t)sizeof(G.wget_buf)) {
+ if ((int)G.content_len <= 0)
+ break;
+ rdsz = (unsigned)G.content_len;
+ }
+ }
+ n = fread(G.wget_buf, 1, rdsz, dfp);
-static void
-progressmeter(int flag)
-{
- static unsigned lastupdate_sec;
- static unsigned start_sec;
- static off_t lastsize, totalsize;
-
- off_t abbrevsize;
- unsigned since_last_update, elapsed;
- unsigned ratio;
- int barlength, i;
-
- if (flag == -1) { /* first call to progressmeter */
- start_sec = monotonic_sec();
- lastupdate_sec = start_sec;
- lastsize = 0;
- totalsize = content_len + beg_range; /* as content_len changes.. */
+ if (n > 0) {
+ xwrite(G.output_fd, G.wget_buf, n);
+#if ENABLE_FEATURE_WGET_STATUSBAR
+ G.transferred += n;
+#endif
+ if (G.got_clen) {
+ G.content_len -= n;
+ if (G.content_len == 0)
+ break;
+ }
+#if ENABLE_FEATURE_WGET_TIMEOUT
+ second_cnt = G.timeout_seconds;
+#endif
+ continue;
+ }
+
+ /* n <= 0.
+ * man fread:
+ * If error occurs, or EOF is reached, the return value
+ * is a short item count (or zero).
+ * fread does not distinguish between EOF and error.
+ */
+ if (errno != EAGAIN) {
+ if (ferror(dfp)) {
+ progress_meter(PROGRESS_END);
+ bb_perror_msg_and_die(bb_msg_read_error);
+ }
+ break; /* EOF, not error */
+ }
+
+#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
+ /* It was EAGAIN. There is no data. Wait up to one second
+ * then abort if timed out, or update the bar and try reading again.
+ */
+ if (safe_poll(&polldata, 1, 1000) == 0) {
+# if ENABLE_FEATURE_WGET_TIMEOUT
+ if (second_cnt != 0 && --second_cnt == 0) {
+ progress_meter(PROGRESS_END);
+ bb_error_msg_and_die("download timed out");
+ }
+# endif
+ /* We used to loop back to poll here,
+ * but there is no great harm in letting fread
+ * to try reading anyway.
+ */
+ }
+ /* Need to do it _every_ second for "stalled" indicator
+ * to be shown properly.
+ */
+ progress_meter(PROGRESS_BUMP);
+#endif
+ } /* while (reading data) */
+
+#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
+ clearerr(dfp);
+ ndelay_off(polldata.fd); /* else fgets can get very unhappy */
+#endif
+ if (!G.chunked)
+ break;
+
+ fgets_and_trim(dfp); /* Eat empty line */
+ get_clen:
+ fgets_and_trim(dfp);
+ G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
+ /* FIXME: error check? */
+ if (G.content_len == 0)
+ break; /* all done! */
+ G.got_clen = 1;
+ /*
+ * Note that fgets may result in some data being buffered in dfp.
+ * We loop back to fread, which will retrieve this data.
+ * Also note that code has to be arranged so that fread
+ * is done _before_ one-second poll wait - poll doesn't know
+ * about stdio buffering and can result in spurious one second waits!
+ */
}
- ratio = 100;
- if (totalsize != 0 && !chunked) {
- /* long long helps to have it working even if !LFS */
- ratio = (unsigned) (100ULL * (transferred+beg_range) / totalsize);
- if (ratio > 100) ratio = 100;
+ /* If -c failed, we restart from the beginning,
+ * but we do not truncate file then, we do it only now, at the end.
+ * This lets user to ^C if his 99% complete 10 GB file download
+ * failed to restart *without* losing the almost complete file.
+ */
+ {
+ off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
+ if (pos != (off_t)-1)
+ ftruncate(G.output_fd, pos);
}
- fprintf(stderr, "\r%-20.20s%4d%% ", curfile, ratio);
+ /* Draw full bar and free its resources */
+ G.chunked = 0; /* makes it show 100% even for chunked download */
+ G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
+ progress_meter(PROGRESS_END);
+}
- barlength = getttywidth() - 49;
- if (barlength > 0) {
- /* god bless gcc for variable arrays :) */
- i = barlength * ratio / 100;
- {
- char buf[i+1];
- memset(buf, '*', i);
- buf[i] = '\0';
- fprintf(stderr, "|%s%*s|", buf, barlength - i, "");
+static void download_one_url(const char *url)
+{
+ bool use_proxy; /* Use proxies if env vars are set */
+ int redir_limit;
+ len_and_sockaddr *lsa;
+ FILE *sfp; /* socket to web/ftp server */
+ FILE *dfp; /* socket to ftp server (data) */
+ char *proxy = NULL;
+ char *fname_out_alloc;
+ char *redirected_path = NULL;
+ struct host_info server;
+ struct host_info target;
+
+ server.allocated = NULL;
+ target.allocated = NULL;
+ server.user = NULL;
+ target.user = NULL;
+
+ parse_url(url, &target);
+
+ /* Use the proxy if necessary */
+ use_proxy = (strcmp(G.proxy_flag, "off") != 0);
+ if (use_proxy) {
+ proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
+ use_proxy = (proxy && proxy[0]);
+ if (use_proxy)
+ parse_url(proxy, &server);
+ }
+ if (!use_proxy) {
+ server.port = target.port;
+ if (ENABLE_FEATURE_IPV6) {
+ //free(server.allocated); - can't be non-NULL
+ server.host = server.allocated = xstrdup(target.host);
+ } else {
+ server.host = target.host;
}
}
- i = 0;
- abbrevsize = transferred + beg_range;
- while (abbrevsize >= 100000) {
- i++;
- abbrevsize >>= 10;
+
+ if (ENABLE_FEATURE_IPV6)
+ strip_ipv6_scope_id(target.host);
+
+ /* If there was no -O FILE, guess output filename */
+ fname_out_alloc = NULL;
+ if (!(option_mask32 & WGET_OPT_OUTNAME)) {
+ G.fname_out = bb_get_last_path_component_nostrip(target.path);
+ /* handle "wget http://kernel.org//" */
+ if (G.fname_out[0] == '/' || !G.fname_out[0])
+ G.fname_out = (char*)"index.html";
+ /* -P DIR is considered only if there was no -O FILE */
+ if (G.dir_prefix)
+ G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
+ else {
+ /* redirects may free target.path later, need to make a copy */
+ G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
+ }
}
- /* see http://en.wikipedia.org/wiki/Tera */
- fprintf(stderr, "%6d%c ", (int)abbrevsize, " kMGTPEZY"[i]);
-
-// Nuts! Ain't it easier to update progress meter ONLY when we transferred++?
-// FIXME: get rid of alarmtimer + updateprogressmeter mess
-
- elapsed = monotonic_sec();
- since_last_update = elapsed - lastupdate_sec;
- if (transferred > lastsize) {
- lastupdate_sec = elapsed;
- lastsize = transferred;
- if (since_last_update >= STALLTIME) {
- /* We "cut off" these seconds from elapsed time
- * by adjusting start time */
- start_sec += since_last_update;
+#if ENABLE_FEATURE_WGET_STATUSBAR
+ G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
+#endif
+
+ /* Determine where to start transfer */
+ G.beg_range = 0;
+ if (option_mask32 & WGET_OPT_CONTINUE) {
+ G.output_fd = open(G.fname_out, O_WRONLY);
+ if (G.output_fd >= 0) {
+ G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
}
- since_last_update = 0; /* we are un-stalled now */
+ /* File doesn't exist. We do not create file here yet.
+ * We are not sure it exists on remote side */
}
- elapsed -= start_sec; /* now it's "elapsed since start" */
- if (since_last_update >= STALLTIME) {
- fprintf(stderr, " - stalled -");
- } else {
- off_t to_download = totalsize - beg_range;
- if (transferred <= 0 || (int)elapsed <= 0 || transferred > to_download || chunked) {
- fprintf(stderr, "--:--:-- ETA");
+ redir_limit = 5;
+ resolve_lsa:
+ lsa = xhost2sockaddr(server.host, server.port);
+ if (!(option_mask32 & WGET_OPT_QUIET)) {
+ char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
+ fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
+ free(s);
+ }
+ establish_session:
+ /*G.content_len = 0; - redundant, got_clen = 0 is enough */
+ G.got_clen = 0;
+ G.chunked = 0;
+ if (use_proxy || !target.is_ftp) {
+ /*
+ * HTTP session
+ */
+ char *str;
+ int status;
+
+
+ /* Open socket to http server */
+ sfp = open_socket(lsa);
+
+ /* Send HTTP request */
+ if (use_proxy) {
+ fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
+ target.is_ftp ? "f" : "ht", target.host,
+ target.path);
} else {
- /* to_download / (transferred/elapsed) - elapsed: */
- int eta = (int) ((unsigned long long)to_download*elapsed/transferred - elapsed);
- /* (long long helps to have working ETA even if !LFS) */
- i = eta % 3600;
- fprintf(stderr, "%02d:%02d:%02d ETA", eta / 3600, i / 60, i % 60);
+ if (option_mask32 & WGET_OPT_POST_DATA)
+ fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
+ else
+ fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
+ }
+
+ fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
+ target.host, G.user_agent);
+
+ /* Ask server to close the connection as soon as we are done
+ * (IOW: we do not intend to send more requests)
+ */
+ fprintf(sfp, "Connection: close\r\n");
+
+#if ENABLE_FEATURE_WGET_AUTHENTICATION
+ if (target.user) {
+ fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
+ base64enc(target.user));
+ }
+ if (use_proxy && server.user) {
+ fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
+ base64enc(server.user));
+ }
+#endif
+
+ if (G.beg_range != 0)
+ fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
+
+#if ENABLE_FEATURE_WGET_LONG_OPTIONS
+ if (G.extra_headers)
+ fputs(G.extra_headers, sfp);
+
+ if (option_mask32 & WGET_OPT_POST_DATA) {
+ fprintf(sfp,
+ "Content-Type: application/x-www-form-urlencoded\r\n"
+ "Content-Length: %u\r\n"
+ "\r\n"
+ "%s",
+ (int) strlen(G.post_data), G.post_data
+ );
+ } else
+#endif
+ {
+ fprintf(sfp, "\r\n");
+ }
+
+ fflush(sfp);
+
+ /*
+ * Retrieve HTTP response line and check for "200" status code.
+ */
+ read_response:
+ fgets_and_trim(sfp);
+
+ str = G.wget_buf;
+ str = skip_non_whitespace(str);
+ str = skip_whitespace(str);
+ // FIXME: no error check
+ // xatou wouldn't work: "200 OK"
+ status = atoi(str);
+ switch (status) {
+ case 0:
+ case 100:
+ while (gethdr(sfp) != NULL)
+ /* eat all remaining headers */;
+ goto read_response;
+ case 200:
+/*
+Response 204 doesn't say "null file", it says "metadata
+has changed but data didn't":
+
+"10.2.5 204 No Content
+The server has fulfilled the request but does not need to return
+an entity-body, and might want to return updated metainformation.
+The response MAY include new or updated metainformation in the form
+of entity-headers, which if present SHOULD be associated with
+the requested variant.
+
+If the client is a user agent, it SHOULD NOT change its document
+view from that which caused the request to be sent. This response
+is primarily intended to allow input for actions to take place
+without causing a change to the user agent's active document view,
+although any new or updated metainformation SHOULD be applied
+to the document currently in the user agent's active view.
+
+The 204 response MUST NOT include a message-body, and thus
+is always terminated by the first empty line after the header fields."
+
+However, in real world it was observed that some web servers
+(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
+*/
+ case 204:
+ if (G.beg_range != 0) {
+ /* "Range:..." was not honored by the server.
+ * Restart download from the beginning.
+ */
+ reset_beg_range_to_zero();
+ }
+ break;
+ case 300: /* redirection */
+ case 301:
+ case 302:
+ case 303:
+ break;
+ case 206: /* Partial Content */
+ if (G.beg_range != 0)
+ /* "Range:..." worked. Good. */
+ break;
+ /* Partial Content even though we did not ask for it??? */
+ /* fall through */
+ default:
+ bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
+ }
+
+ /*
+ * Retrieve HTTP headers.
+ */
+ while ((str = gethdr(sfp)) != NULL) {
+ static const char keywords[] ALIGN1 =
+ "content-length\0""transfer-encoding\0""location\0";
+ enum {
+ KEY_content_length = 1, KEY_transfer_encoding, KEY_location
+ };
+ smalluint key;
+
+ /* gethdr converted "FOO:" string to lowercase */
+
+ /* strip trailing whitespace */
+ char *s = strchrnul(str, '\0') - 1;
+ while (s >= str && (*s == ' ' || *s == '\t')) {
+ *s = '\0';
+ s--;
+ }
+ key = index_in_strings(keywords, G.wget_buf) + 1;
+ if (key == KEY_content_length) {
+ G.content_len = BB_STRTOOFF(str, NULL, 10);
+ if (G.content_len < 0 || errno) {
+ bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
+ }
+ G.got_clen = 1;
+ continue;
+ }
+ if (key == KEY_transfer_encoding) {
+ if (strcmp(str_tolower(str), "chunked") != 0)
+ bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
+ G.chunked = 1;
+ }
+ if (key == KEY_location && status >= 300) {
+ if (--redir_limit == 0)
+ bb_error_msg_and_die("too many redirections");
+ fclose(sfp);
+ if (str[0] == '/') {
+ free(redirected_path);
+ target.path = redirected_path = xstrdup(str+1);
+ /* lsa stays the same: it's on the same server */
+ } else {
+ parse_url(str, &target);
+ if (!use_proxy) {
+ free(server.allocated);
+ server.allocated = NULL;
+ server.host = target.host;
+ /* strip_ipv6_scope_id(target.host); - no! */
+ /* we assume remote never gives us IPv6 addr with scope id */
+ server.port = target.port;
+ free(lsa);
+ goto resolve_lsa;
+ } /* else: lsa stays the same: we use proxy */
+ }
+ goto establish_session;
+ }
+ }
+// if (status >= 300)
+// bb_error_msg_and_die("bad redirection (no Location: header from server)");
+
+ /* For HTTP, data is pumped over the same connection */
+ dfp = sfp;
+
+ } else {
+ /*
+ * FTP session
+ */
+ sfp = prepare_ftp_session(&dfp, &target, lsa);
+ }
+
+ free(lsa);
+
+ if (!(option_mask32 & WGET_OPT_SPIDER)) {
+ if (G.output_fd < 0)
+ G.output_fd = xopen(G.fname_out, G.o_flags);
+ retrieve_file_data(dfp);
+ if (!(option_mask32 & WGET_OPT_OUTNAME)) {
+ xclose(G.output_fd);
+ G.output_fd = -1;
}
}
- if (flag == -1) { /* first call to progressmeter */
- struct sigaction sa;
- sa.sa_handler = updateprogressmeter;
- sigemptyset(&sa.sa_mask);
- sa.sa_flags = SA_RESTART;
- sigaction(SIGALRM, &sa, NULL);
- alarmtimer(1);
- } else if (flag == 1) { /* last call to progressmeter */
- alarmtimer(0);
- transferred = 0;
- putc('\n', stderr);
+ if (dfp != sfp) {
+ /* It's ftp. Close data connection properly */
+ fclose(dfp);
+ if (ftpcmd(NULL, NULL, sfp) != 226)
+ bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
+ /* ftpcmd("QUIT", NULL, sfp); - why bother? */
}
+ fclose(sfp);
+
+ free(server.allocated);
+ free(target.allocated);
+ free(fname_out_alloc);
+ free(redirected_path);
}
+
+int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
+int wget_main(int argc UNUSED_PARAM, char **argv)
+{
+#if ENABLE_FEATURE_WGET_LONG_OPTIONS
+ static const char wget_longopts[] ALIGN1 =
+ /* name, has_arg, val */
+ "continue\0" No_argument "c"
+//FIXME: -s isn't --spider, it's --save-headers!
+ "spider\0" No_argument "s"
+ "quiet\0" No_argument "q"
+ "output-document\0" Required_argument "O"
+ "directory-prefix\0" Required_argument "P"
+ "proxy\0" Required_argument "Y"
+ "user-agent\0" Required_argument "U"
+#if ENABLE_FEATURE_WGET_TIMEOUT
+ "timeout\0" Required_argument "T"
+#endif
+ /* Ignored: */
+ // "tries\0" Required_argument "t"
+ /* Ignored (we always use PASV): */
+ "passive-ftp\0" No_argument "\xff"
+ "header\0" Required_argument "\xfe"
+ "post-data\0" Required_argument "\xfd"
+ /* Ignored (we don't do ssl) */
+ "no-check-certificate\0" No_argument "\xfc"
+ /* Ignored (we don't support caching) */
+ "no-cache\0" No_argument "\xfb"
+ ;
#endif
-/* Original copyright notice which applies to the CONFIG_FEATURE_WGET_STATUSBAR stuff,
- * much of which was blatantly stolen from openssh. */
+#if ENABLE_FEATURE_WGET_LONG_OPTIONS
+ llist_t *headers_llist = NULL;
+#endif
-/*-
- * Copyright (c) 1992, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * 3. <BSD Advertising Clause omitted per the July 22, 1999 licensing change
- * ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change>
- *
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- */
+ INIT_G();
+
+ IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
+ G.proxy_flag = "on"; /* use proxies if env vars are set */
+ G.user_agent = "Wget"; /* "User-Agent" header field */
+
+#if ENABLE_FEATURE_WGET_LONG_OPTIONS
+ applet_long_options = wget_longopts;
+#endif
+ opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
+ getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
+ &G.fname_out, &G.dir_prefix,
+ &G.proxy_flag, &G.user_agent,
+ IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
+ NULL /* -t RETRIES */
+ IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
+ IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
+ );
+ argv += optind;
+
+#if ENABLE_FEATURE_WGET_LONG_OPTIONS
+ if (headers_llist) {
+ int size = 1;
+ char *cp;
+ llist_t *ll = headers_llist;
+ while (ll) {
+ size += strlen(ll->data) + 2;
+ ll = ll->link;
+ }
+ G.extra_headers = cp = xmalloc(size);
+ while (headers_llist) {
+ cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
+ }
+ }
+#endif
+
+ G.output_fd = -1;
+ G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
+ if (G.fname_out) { /* -O FILE ? */
+ if (LONE_DASH(G.fname_out)) { /* -O - ? */
+ G.output_fd = 1;
+ option_mask32 &= ~WGET_OPT_CONTINUE;
+ }
+ /* compat with wget: -O FILE can overwrite */
+ G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
+ }
+
+ while (*argv)
+ download_one_url(*argv++);
+
+ if (G.output_fd >= 0)
+ xclose(G.output_fd);
+
+ return EXIT_SUCCESS;
+}