1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
12 //config: bool "wget (35 kb)"
15 //config: wget is a utility for non-interactive download of files from HTTP
16 //config: and FTP servers.
18 //config:config FEATURE_WGET_LONG_OPTIONS
19 //config: bool "Enable long options"
21 //config: depends on WGET && LONG_OPTS
23 //config:config FEATURE_WGET_STATUSBAR
24 //config: bool "Enable progress bar (+2k)"
26 //config: depends on WGET
28 //config:config FEATURE_WGET_AUTHENTICATION
29 //config: bool "Enable HTTP authentication"
31 //config: depends on WGET
33 //config: Support authenticated HTTP transfers.
35 //config:config FEATURE_WGET_TIMEOUT
36 //config: bool "Enable timeout option -T SEC"
38 //config: depends on WGET
40 //config: Supports network read and connect timeouts for wget,
41 //config: so that wget will give up and timeout, through the -T
42 //config: command line option.
44 //config: Currently only connect and network data read timeout are
45 //config: supported (i.e., timeout is not applied to the DNS query). When
46 //config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
47 //config: will work in addition to -T.
49 //config:config FEATURE_WGET_HTTPS
50 //config: bool "Support HTTPS using internal TLS code"
51 //it also enables FTPS support, but it's not well tested yet
53 //config: depends on WGET
56 //config: wget will use internal TLS code to connect to https:// URLs.
58 //config: On NOMMU machines, ssl_helper applet should be available
59 //config: in the $PATH for this to work. Make sure to select that applet.
61 //config: Note: currently, TLS code only makes TLS I/O work, it
62 //config: does *not* check that the peer is who it claims to be, etc.
63 //config: IOW: it uses peer-supplied public keys to establish encryption
64 //config: and signing keys, then encrypts and signs outgoing data and
65 //config: decrypts incoming data.
66 //config: It does not check signature hashes on the incoming data:
67 //config: this means that attackers manipulating TCP packets can
68 //config: send altered data and we unknowingly receive garbage.
69 //config: (This check might be relatively easy to add).
70 //config: It does not check public key's certificate:
71 //config: this means that the peer may be an attacker impersonating
72 //config: the server we think we are talking to.
74 //config: If you think this is unacceptable, consider this. As more and more
75 //config: servers switch to HTTPS-only operation, without such "crippled"
76 //config: TLS code it is *impossible* to simply download a kernel source
77 //config: from kernel.org. Which can in real world translate into
78 //config: "my small automatic tooling to build cross-compilers from sources
79 //config: no longer works, I need to additionally keep a local copy
80 //config: of ~4 megabyte source tarball of a SSL library and ~2 megabyte
81 //config: source of wget, need to compile and built both before I can
82 //config: download anything. All this despite the fact that the build
83 //config: is done in a QEMU sandbox on a machine with absolutely nothing
84 //config: worth stealing, so I don't care if someone would go to a lot
85 //config: of trouble to intercept my HTTPS download to send me an altered
86 //config: kernel tarball".
88 //config: If you still think this is unacceptable, send patches.
90 //config: If you still think this is unacceptable, do not want to send
91 //config: patches, but do want to waste bandwidth expaining how wrong
92 //config: it is, you will be ignored.
94 //config:config FEATURE_WGET_OPENSSL
95 //config: bool "Try to connect to HTTPS using openssl"
97 //config: depends on WGET
99 //config: Try to use openssl to handle HTTPS.
101 //config: OpenSSL has a simple SSL client for debug purposes.
102 //config: If you select this option, wget will effectively run:
103 //config: "openssl s_client -quiet -connect hostname:443
104 //config: -servername hostname 2>/dev/null" and pipe its data
105 //config: through it. -servername is not used if hostname is numeric.
106 //config: Note inconvenient API: host resolution is done twice,
107 //config: and there is no guarantee openssl's idea of IPv6 address
108 //config: format is the same as ours.
109 //config: Another problem is that s_client prints debug information
110 //config: to stderr, and it needs to be suppressed. This means
111 //config: all error messages get suppressed too.
112 //config: openssl is also a big binary, often dynamically linked
113 //config: against ~15 libraries.
115 //config: If openssl can't be executed, internal TLS code will be used
116 //config: (if you enabled it); if openssl can be executed but fails later,
117 //config: wget can't detect this, and download will fail.
119 //applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
121 //kbuild:lib-$(CONFIG_WGET) += wget.o
123 //usage:#define wget_trivial_usage
124 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
125 //usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
126 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
127 /* Since we ignore these opts, we don't show them in --help */
128 /* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
129 /* //usage: " [-nv] [-nc] [-nH] [-np]" */
130 //usage: " [-S|--server-response] [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
132 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
133 //usage: "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-S] [-U AGENT]"
134 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
136 //usage:#define wget_full_usage "\n\n"
137 //usage: "Retrieve files via HTTP or FTP\n"
138 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
139 //usage: "\n --spider Only check URL existence: $? is 0 if exists"
141 //usage: "\n -c Continue retrieval of aborted transfer"
142 //usage: "\n -q Quiet"
143 //usage: "\n -P DIR Save to DIR (default .)"
144 //usage: "\n -S Show server response"
145 //usage: IF_FEATURE_WGET_TIMEOUT(
146 //usage: "\n -T SEC Network read timeout is SEC seconds"
148 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
149 //usage: "\n -U STR Use STR for User-Agent header"
150 //usage: "\n -Y on/off Use proxy"
155 # define log_io(...) bb_error_msg(__VA_ARGS__)
156 # define SENDFMT(fp, fmt, ...) \
158 log_io("> " fmt, ##__VA_ARGS__); \
159 fprintf(fp, fmt, ##__VA_ARGS__); \
162 # define log_io(...) ((void)0)
163 # define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
167 #define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_HTTPS)
173 const char *protocol;
177 static const char P_FTP[] ALIGN1 = "ftp";
178 static const char P_HTTP[] ALIGN1 = "http";
180 # if ENABLE_FEATURE_WGET_HTTPS
181 static const char P_FTPS[] ALIGN1 = "ftps";
183 static const char P_HTTPS[] ALIGN1 = "https";
186 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
187 /* User-specified headers prevent using our corresponding built-in headers. */
190 HDR_USER_AGENT = (1<<1),
192 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
193 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
195 static const char wget_user_headers[] ALIGN1 =
199 # if ENABLE_FEATURE_WGET_AUTHENTICATION
201 "Proxy-Authorization:\0"
204 # define USR_HEADER_HOST (G.user_headers & HDR_HOST)
205 # define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
206 # define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
207 # define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
208 # define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
209 #else /* No long options, no user-headers :( */
210 # define USR_HEADER_HOST 0
211 # define USR_HEADER_USER_AGENT 0
212 # define USR_HEADER_RANGE 0
213 # define USR_HEADER_AUTH 0
214 # define USR_HEADER_PROXY_AUTH 0
219 off_t content_len; /* Content-length of the file */
220 off_t beg_range; /* Range at which continue begins */
221 #if ENABLE_FEATURE_WGET_STATUSBAR
222 off_t transferred; /* Number of bytes transferred so far */
223 const char *curfile; /* Name of current file being transferred */
227 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
230 unsigned char user_headers; /* Headers mentioned by the user */
232 char *fname_out; /* where to direct output (-O) */
233 const char *proxy_flag; /* Use proxies if env vars are set */
234 const char *user_agent; /* "User-Agent" header field */
235 #if ENABLE_FEATURE_WGET_TIMEOUT
236 unsigned timeout_seconds;
237 bool die_if_timed_out;
241 smallint chunked; /* chunked transfer encoding */
242 smallint got_clen; /* got content-length: from server */
243 /* Local downloads do benefit from big buffer.
244 * With 512 byte buffer, it was measured to be
245 * an order of magnitude slower than with big one.
247 uint64_t just_to_align_next_member;
248 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
250 #define G (*ptr_to_globals)
251 #define INIT_G() do { \
252 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
254 #define FINI_G() do { \
255 FREE_PTR_TO_GLOBALS(); \
259 /* Must match option string! */
261 WGET_OPT_CONTINUE = (1 << 0),
262 WGET_OPT_QUIET = (1 << 1),
263 WGET_OPT_SERVER_RESPONSE = (1 << 2),
264 WGET_OPT_OUTNAME = (1 << 3),
265 WGET_OPT_PREFIX = (1 << 4),
266 WGET_OPT_PROXY = (1 << 5),
267 WGET_OPT_USER_AGENT = (1 << 6),
268 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
269 WGET_OPT_RETRIES = (1 << 8),
270 WGET_OPT_nsomething = (1 << 9),
271 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
272 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
273 WGET_OPT_SPIDER = (1 << 12) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
281 #if ENABLE_FEATURE_WGET_STATUSBAR
282 static void progress_meter(int flag)
284 if (option_mask32 & WGET_OPT_QUIET)
287 if (flag == PROGRESS_START)
288 bb_progress_init(&G.pmt, G.curfile);
290 bb_progress_update(&G.pmt,
293 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
296 if (flag == PROGRESS_END) {
297 bb_progress_free(&G.pmt);
298 bb_putchar_stderr('\n');
303 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
307 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
308 * local addresses can have a scope identifier to specify the
309 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
310 * identifier is only valid on a single node.
312 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
313 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
314 * in the Host header as invalid requests, see
315 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
317 static void strip_ipv6_scope_id(char *host)
321 /* bbox wget actually handles IPv6 addresses without [], like
322 * wget "http://::1/xxx", but this is not standard.
323 * To save code, _here_ we do not support it. */
326 return; /* not IPv6 */
328 scope = strchr(host, '%');
332 /* Remove the IPv6 zone identifier from the host address */
333 cp = strchr(host, ']');
334 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
335 /* malformed address (not "[xx]:nn" or "[xx]") */
339 /* cp points to "]...", scope points to "%eth0]..." */
340 overlapping_strcpy(scope, cp);
343 #if ENABLE_FEATURE_WGET_AUTHENTICATION
344 /* Base64-encode character string. */
345 static char *base64enc(const char *str)
347 unsigned len = strlen(str);
348 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
349 len = sizeof(G.wget_buf)/4*3 - 10;
350 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
355 static char* sanitize_string(char *s)
357 unsigned char *p = (void *) s;
364 #if ENABLE_FEATURE_WGET_TIMEOUT
365 static void alarm_handler(int sig UNUSED_PARAM)
367 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
368 if (G.die_if_timed_out)
369 bb_error_msg_and_die("download timed out");
371 static void set_alarm(void)
373 if (G.timeout_seconds) {
374 alarm(G.timeout_seconds);
375 G.die_if_timed_out = 1;
378 # define clear_alarm() ((void)(G.die_if_timed_out = 0))
380 # define set_alarm() ((void)0)
381 # define clear_alarm() ((void)0)
384 #if ENABLE_FEATURE_WGET_OPENSSL
386 * is_ip_address() attempts to verify whether or not a string
387 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
388 * of inet_pton() can be used to determine this.
390 * TODO add proper error checking when inet_pton() returns -1
391 * (some form of system error has occurred, and errno is set)
393 static int is_ip_address(const char *string)
395 struct sockaddr_in sa;
397 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
398 # if ENABLE_FEATURE_IPV6
400 struct sockaddr_in6 sa6;
401 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
404 return (result == 1);
408 static FILE *open_socket(len_and_sockaddr *lsa)
414 fd = xconnect_stream(lsa);
417 /* glibc 2.4 seems to try seeking on it - ??! */
418 /* hopefully it understands what ESPIPE means... */
419 fp = fdopen(fd, "r+");
421 bb_perror_msg_and_die(bb_msg_memory_exhausted);
426 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
427 static char fgets_and_trim(FILE *fp, const char *fmt)
433 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
434 bb_perror_msg_and_die("error getting response");
437 buf_ptr = strchrnul(G.wget_buf, '\n');
440 buf_ptr = strchrnul(G.wget_buf, '\r');
443 log_io("< %s", G.wget_buf);
445 if (fmt && (option_mask32 & WGET_OPT_SERVER_RESPONSE))
446 fprintf(stderr, fmt, G.wget_buf);
451 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
457 fprintf(fp, "%s%s\r\n", s1, s2);
458 /* With --server-response, wget also shows its ftp commands */
459 if (option_mask32 & WGET_OPT_SERVER_RESPONSE)
460 fprintf(stderr, "--> %s%s\n\n", s1, s2);
462 log_io("> %s%s", s1, s2);
466 fgets_and_trim(fp, "%s\n");
467 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
469 G.wget_buf[3] = '\0';
470 result = xatoi_positive(G.wget_buf);
475 static void parse_url(const char *src_url, struct host_info *h)
480 h->allocated = url = xstrdup(src_url);
483 p = strstr(url, "://");
487 if (strcmp(url, P_FTP) == 0) {
488 h->port = bb_lookup_port(P_FTP, "tcp", 21);
491 # if ENABLE_FEATURE_WGET_HTTPS
492 if (strcmp(url, P_FTPS) == 0) {
493 h->port = bb_lookup_port(P_FTPS, "tcp", 990);
494 h->protocol = P_FTPS;
497 if (strcmp(url, P_HTTPS) == 0) {
498 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
499 h->protocol = P_HTTPS;
502 if (strcmp(url, P_HTTP) == 0) {
504 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
505 h->protocol = P_HTTP;
508 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
511 // GNU wget is user-friendly and falls back to http://
517 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
518 // 'GET /?var=a/b HTTP/1.0'
519 // and saves 'index.html?var=a%2Fb' (we save 'b')
520 // wget 'http://busybox.net?login=john@doe':
521 // request: 'GET /?login=john@doe HTTP/1.0'
522 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
523 // wget 'http://busybox.net#test/test':
524 // request: 'GET / HTTP/1.0'
525 // saves: 'index.html' (we save 'test')
527 // We also don't add unique .N suffix if file exists...
528 sp = strchr(h->host, '/');
529 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
530 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
533 } else if (*sp == '/') {
536 } else { // '#' or '?'
537 // http://busybox.net?login=john@doe is a valid URL
538 // memmove converts to:
539 // http:/busybox.nett?login=john@doe...
540 memmove(h->host - 1, h->host, sp - h->host);
546 sp = strrchr(h->host, '@');
548 // URL-decode "user:password" string before base64-encoding:
549 // wget http://test:my%20pass@example.com should send
550 // Authorization: Basic dGVzdDpteSBwYXNz
551 // which decodes to "test:my pass".
552 // Standard wget and curl do this too.
555 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
558 /* else: h->user remains NULL, or as set by original request
559 * before redirect (if we are here after a redirect).
563 static char *gethdr(FILE *fp)
568 /* retrieve header line */
569 c = fgets_and_trim(fp, " %s\n");
571 /* end of the headers? */
572 if (G.wget_buf[0] == '\0')
575 /* convert the header name to lower case */
576 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
578 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
579 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
580 * "A-Z" maps to "a-z".
581 * "@[\]" can't occur in header names.
582 * "^_" maps to "~,DEL" (which is wrong).
583 * "^" was never seen yet, "_" was seen from web.archive.org
584 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
589 /* verify we are at the end of the header name */
591 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
593 /* locate the start of the header value */
595 hdrval = skip_whitespace(s);
598 /* Rats! The buffer isn't big enough to hold the entire header value */
599 while (c = getc(fp), c != EOF && c != '\n')
606 static void reset_beg_range_to_zero(void)
608 bb_error_msg("restart failed");
610 xlseek(G.output_fd, 0, SEEK_SET);
611 /* Done at the end instead: */
612 /* ftruncate(G.output_fd, 0); */
615 #if ENABLE_FEATURE_WGET_OPENSSL
616 static int spawn_https_helper_openssl(const char *host, unsigned port)
618 char *allocated = NULL;
622 IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;)
624 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
625 /* Kernel can have AF_UNIX support disabled */
626 bb_perror_msg_and_die("socketpair");
628 if (!strchr(host, ':'))
629 host = allocated = xasprintf("%s:%u", host, port);
630 servername = xstrdup(host);
631 strrchr(servername, ':')[0] = '\0';
643 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
644 * It prints some debug stuff on stderr, don't know how to suppress it.
645 * Work around by dev-nulling stderr. We lose all error messages :(
648 xopen("/dev/null", O_RDWR);
649 memset(&argv, 0, sizeof(argv));
650 argv[0] = (char*)"openssl";
651 argv[1] = (char*)"s_client";
652 argv[2] = (char*)"-quiet";
653 argv[3] = (char*)"-connect";
654 argv[4] = (char*)host;
656 * Per RFC 6066 Section 3, the only permitted values in the
657 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
658 * IPv4 and IPv6 addresses, port numbers are not allowed.
660 if (!is_ip_address(servername)) {
661 argv[5] = (char*)"-servername";
662 argv[6] = (char*)servername;
665 BB_EXECVP(argv[0], argv);
667 # if ENABLE_FEATURE_WGET_HTTPS
671 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
680 # if ENABLE_FEATURE_WGET_HTTPS
690 #if ENABLE_FEATURE_WGET_HTTPS
691 static void spawn_ssl_client(const char *host, int network_fd, int flags)
695 char *servername, *p;
697 servername = xstrdup(host);
698 p = strrchr(servername, ':');
701 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
702 /* Kernel can have AF_UNIX support disabled */
703 bb_perror_msg_and_die("socketpair");
706 pid = BB_MMU ? xfork() : xvfork();
713 tls_state_t *tls = new_tls_state();
714 tls->ifd = tls->ofd = network_fd;
715 tls_handshake(tls, servername);
716 tls_run_copy_loop(tls, flags);
721 xmove_fd(network_fd, 3);
722 argv[0] = (char*)"ssl_client";
723 argv[1] = (char*)"-s3";
724 //TODO: if (!is_ip_address(servername))...
725 argv[2] = (char*)"-n";
726 argv[3] = servername;
727 argv[4] = (flags & TLSLOOP_EXIT_ON_LOCAL_EOF ? (char*)"-e" : NULL);
729 BB_EXECVP(argv[0], argv);
730 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
738 xmove_fd(sp[0], network_fd);
742 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
749 target->user = xstrdup("anonymous:busybox@");
751 sfp = open_socket(lsa);
752 #if ENABLE_FEATURE_WGET_HTTPS
753 if (target->protocol == P_FTPS)
754 spawn_ssl_client(target->host, fileno(sfp), TLSLOOP_EXIT_ON_LOCAL_EOF);
757 if (ftpcmd(NULL, NULL, sfp) != 220)
758 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
761 * Splitting username:password pair,
764 str = strchr(target->user, ':');
767 switch (ftpcmd("USER ", target->user, sfp)) {
771 if (ftpcmd("PASS ", str, sfp) == 230)
773 /* fall through (failed login) */
775 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
778 ftpcmd("TYPE I", NULL, sfp);
783 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
784 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
785 if (G.content_len < 0 || errno) {
786 bb_error_msg_and_die("SIZE value is garbage");
792 * Entering passive mode
794 if (ENABLE_FEATURE_IPV6 && ftpcmd("EPSV", NULL, sfp) == 229) {
797 if (ftpcmd("PASV", NULL, sfp) != 227) {
799 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
801 port = parse_pasv_epsv(G.wget_buf);
805 set_nport(&lsa->u.sa, htons(port));
807 *dfpp = open_socket(lsa);
809 #if ENABLE_FEATURE_WGET_HTTPS
810 if (target->protocol == P_FTPS) {
811 /* "PROT P" enables encryption of data stream.
812 * Without it (or with "PROT C"), data is sent unencrypted.
814 if (ftpcmd("PROT P", NULL, sfp) == 200)
815 spawn_ssl_client(target->host, fileno(*dfpp), /*flags*/ 0);
819 if (G.beg_range != 0) {
820 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
821 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
822 G.content_len -= G.beg_range;
824 reset_beg_range_to_zero();
827 if (ftpcmd("RETR ", target->path, sfp) > 150)
828 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
833 static void NOINLINE retrieve_file_data(FILE *dfp)
835 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
836 # if ENABLE_FEATURE_WGET_TIMEOUT
837 unsigned second_cnt = G.timeout_seconds;
839 struct pollfd polldata;
841 polldata.fd = fileno(dfp);
842 polldata.events = POLLIN | POLLPRI;
844 progress_meter(PROGRESS_START);
849 /* Loops only if chunked */
852 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
853 /* Must use nonblocking I/O, otherwise fread will loop
854 * and *block* until it reads full buffer,
855 * which messes up progress bar and/or timeout logic.
856 * Because of nonblocking I/O, we need to dance
857 * very carefully around EAGAIN. See explanation at
860 ndelay_on(polldata.fd);
866 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
867 /* fread internally uses read loop, which in our case
868 * is usually exited when we get EAGAIN.
869 * In this case, libc sets error marker on the stream.
870 * Need to clear it before next fread to avoid possible
871 * rare false positive ferror below. Rare because usually
872 * fread gets more than zero bytes, and we don't fall
873 * into if (n <= 0) ...
878 rdsz = sizeof(G.wget_buf);
880 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
881 if ((int)G.content_len <= 0)
883 rdsz = (unsigned)G.content_len;
886 n = fread(G.wget_buf, 1, rdsz, dfp);
889 xwrite(G.output_fd, G.wget_buf, n);
890 #if ENABLE_FEATURE_WGET_STATUSBAR
895 if (G.content_len == 0)
898 #if ENABLE_FEATURE_WGET_TIMEOUT
899 second_cnt = G.timeout_seconds;
906 * If error occurs, or EOF is reached, the return value
907 * is a short item count (or zero).
908 * fread does not distinguish between EOF and error.
910 if (errno != EAGAIN) {
912 progress_meter(PROGRESS_END);
913 bb_perror_msg_and_die(bb_msg_read_error);
915 break; /* EOF, not error */
918 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
919 /* It was EAGAIN. There is no data. Wait up to one second
920 * then abort if timed out, or update the bar and try reading again.
922 if (safe_poll(&polldata, 1, 1000) == 0) {
923 # if ENABLE_FEATURE_WGET_TIMEOUT
924 if (second_cnt != 0 && --second_cnt == 0) {
925 progress_meter(PROGRESS_END);
926 bb_error_msg_and_die("download timed out");
929 /* We used to loop back to poll here,
930 * but there is no great harm in letting fread
931 * to try reading anyway.
936 /* Need to do it _every_ second for "stalled" indicator
937 * to be shown properly.
939 progress_meter(PROGRESS_BUMP);
940 } /* while (reading data) */
942 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
944 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
949 fgets_and_trim(dfp, NULL); /* Eat empty line */
951 fgets_and_trim(dfp, NULL);
952 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
953 /* FIXME: error check? */
954 if (G.content_len == 0)
955 break; /* all done! */
958 * Note that fgets may result in some data being buffered in dfp.
959 * We loop back to fread, which will retrieve this data.
960 * Also note that code has to be arranged so that fread
961 * is done _before_ one-second poll wait - poll doesn't know
962 * about stdio buffering and can result in spurious one second waits!
966 /* If -c failed, we restart from the beginning,
967 * but we do not truncate file then, we do it only now, at the end.
968 * This lets user to ^C if his 99% complete 10 GB file download
969 * failed to restart *without* losing the almost complete file.
972 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
973 if (pos != (off_t)-1)
974 ftruncate(G.output_fd, pos);
977 /* Draw full bar and free its resources */
978 G.chunked = 0; /* makes it show 100% even for chunked download */
979 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
980 progress_meter(PROGRESS_END);
983 static void download_one_url(const char *url)
985 bool use_proxy; /* Use proxies if env vars are set */
987 len_and_sockaddr *lsa;
988 FILE *sfp; /* socket to web/ftp server */
989 FILE *dfp; /* socket to ftp server (data) */
991 char *fname_out_alloc;
992 char *redirected_path = NULL;
993 struct host_info server;
994 struct host_info target;
996 server.allocated = NULL;
997 target.allocated = NULL;
1001 parse_url(url, &target);
1003 /* Use the proxy if necessary */
1004 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
1006 proxy = getenv(target.protocol[0] == 'f' ? "ftp_proxy" : "http_proxy");
1007 //FIXME: what if protocol is https? Ok to use http_proxy?
1008 use_proxy = (proxy && proxy[0]);
1010 parse_url(proxy, &server);
1013 server.port = target.port;
1014 if (ENABLE_FEATURE_IPV6) {
1015 //free(server.allocated); - can't be non-NULL
1016 server.host = server.allocated = xstrdup(target.host);
1018 server.host = target.host;
1022 if (ENABLE_FEATURE_IPV6)
1023 strip_ipv6_scope_id(target.host);
1025 /* If there was no -O FILE, guess output filename */
1026 fname_out_alloc = NULL;
1027 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1028 G.fname_out = bb_get_last_path_component_nostrip(target.path);
1029 /* handle "wget http://kernel.org//" */
1030 if (G.fname_out[0] == '/' || !G.fname_out[0])
1031 G.fname_out = (char*)"index.html";
1032 /* -P DIR is considered only if there was no -O FILE */
1034 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
1036 /* redirects may free target.path later, need to make a copy */
1037 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
1040 #if ENABLE_FEATURE_WGET_STATUSBAR
1041 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
1044 /* Determine where to start transfer */
1046 if (option_mask32 & WGET_OPT_CONTINUE) {
1047 G.output_fd = open(G.fname_out, O_WRONLY);
1048 if (G.output_fd >= 0) {
1049 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
1051 /* File doesn't exist. We do not create file here yet.
1052 * We are not sure it exists on remote side */
1057 lsa = xhost2sockaddr(server.host, server.port);
1058 if (!(option_mask32 & WGET_OPT_QUIET)) {
1059 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
1060 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
1064 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
1067 if (use_proxy || target.protocol[0] != 'f' /*not ftp[s]*/) {
1074 /* Open socket to http(s) server */
1075 #if ENABLE_FEATURE_WGET_OPENSSL
1076 /* openssl (and maybe internal TLS) support is configured */
1077 if (target.protocol == P_HTTPS) {
1078 /* openssl-based helper
1079 * Inconvenient API since we can't give it an open fd
1081 int fd = spawn_https_helper_openssl(server.host, server.port);
1082 # if ENABLE_FEATURE_WGET_HTTPS
1083 if (fd < 0) { /* no openssl? try internal */
1084 sfp = open_socket(lsa);
1085 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
1089 /* We don't check for exec("openssl") failure in this case */
1091 sfp = fdopen(fd, "r+");
1093 bb_perror_msg_and_die(bb_msg_memory_exhausted);
1096 sfp = open_socket(lsa);
1098 #elif ENABLE_FEATURE_WGET_HTTPS
1099 /* Only internal TLS support is configured */
1100 sfp = open_socket(lsa);
1101 if (target.protocol == P_HTTPS)
1102 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
1104 /* ssl (https) support is not configured */
1105 sfp = open_socket(lsa);
1107 /* Send HTTP request */
1109 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
1110 target.protocol, target.host,
1113 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
1114 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1117 if (!USR_HEADER_HOST)
1118 SENDFMT(sfp, "Host: %s\r\n", target.host);
1119 if (!USR_HEADER_USER_AGENT)
1120 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
1122 /* Ask server to close the connection as soon as we are done
1123 * (IOW: we do not intend to send more requests)
1125 SENDFMT(sfp, "Connection: close\r\n");
1127 #if ENABLE_FEATURE_WGET_AUTHENTICATION
1128 if (target.user && !USR_HEADER_AUTH) {
1129 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
1130 base64enc(target.user));
1132 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1133 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
1134 base64enc(server.user));
1138 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1139 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
1141 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1142 if (G.extra_headers) {
1143 log_io(G.extra_headers);
1144 fputs(G.extra_headers, sfp);
1147 if (option_mask32 & WGET_OPT_POST_DATA) {
1149 "Content-Type: application/x-www-form-urlencoded\r\n"
1150 "Content-Length: %u\r\n"
1153 (int) strlen(G.post_data), G.post_data
1158 SENDFMT(sfp, "\r\n");
1163 /* Tried doing this unconditionally.
1164 * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS.
1167 if (target.protocol == P_HTTPS) {
1168 /* If we use SSL helper, keeping our end of the socket open for writing
1169 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1170 * even after child closes its copy of the fd.
1173 shutdown(fileno(sfp), SHUT_WR);
1178 * Retrieve HTTP response line and check for "200" status code.
1181 fgets_and_trim(sfp, " %s\n");
1184 str = skip_non_whitespace(str);
1185 str = skip_whitespace(str);
1186 // FIXME: no error check
1187 // xatou wouldn't work: "200 OK"
1192 while (gethdr(sfp) != NULL)
1193 /* eat all remaining headers */;
1196 /* Success responses */
1199 case 201: /* 201 Created */
1200 /* "The request has been fulfilled and resulted in a new resource being created" */
1201 /* Standard wget is reported to treat this as success */
1203 case 202: /* 202 Accepted */
1204 /* "The request has been accepted for processing, but the processing has not been completed" */
1205 /* Treat as success: fall through */
1206 case 203: /* 203 Non-Authoritative Information */
1207 /* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1209 case 204: /* 204 No Content */
1211 Response 204 doesn't say "null file", it says "metadata
1212 has changed but data didn't":
1214 "10.2.5 204 No Content
1215 The server has fulfilled the request but does not need to return
1216 an entity-body, and might want to return updated metainformation.
1217 The response MAY include new or updated metainformation in the form
1218 of entity-headers, which if present SHOULD be associated with
1219 the requested variant.
1221 If the client is a user agent, it SHOULD NOT change its document
1222 view from that which caused the request to be sent. This response
1223 is primarily intended to allow input for actions to take place
1224 without causing a change to the user agent's active document view,
1225 although any new or updated metainformation SHOULD be applied
1226 to the document currently in the user agent's active view.
1228 The 204 response MUST NOT include a message-body, and thus
1229 is always terminated by the first empty line after the header fields."
1231 However, in real world it was observed that some web servers
1232 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1234 if (G.beg_range != 0) {
1235 /* "Range:..." was not honored by the server.
1236 * Restart download from the beginning.
1238 reset_beg_range_to_zero();
1241 /* 205 Reset Content ?? what to do on this ?? */
1243 case 300: /* redirection */
1249 case 206: /* Partial Content */
1250 if (G.beg_range != 0)
1251 /* "Range:..." worked. Good. */
1253 /* Partial Content even though we did not ask for it??? */
1256 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
1260 * Retrieve HTTP headers.
1262 while ((str = gethdr(sfp)) != NULL) {
1263 static const char keywords[] ALIGN1 =
1264 "content-length\0""transfer-encoding\0""location\0";
1266 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1270 /* gethdr converted "FOO:" string to lowercase */
1272 /* strip trailing whitespace */
1273 char *s = strchrnul(str, '\0') - 1;
1274 while (s >= str && (*s == ' ' || *s == '\t')) {
1278 key = index_in_strings(keywords, G.wget_buf) + 1;
1279 if (key == KEY_content_length) {
1280 G.content_len = BB_STRTOOFF(str, NULL, 10);
1281 if (G.content_len < 0 || errno) {
1282 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
1287 if (key == KEY_transfer_encoding) {
1288 if (strcmp(str_tolower(str), "chunked") != 0)
1289 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
1292 if (key == KEY_location && status >= 300) {
1293 if (--redir_limit == 0)
1294 bb_error_msg_and_die("too many redirections");
1296 if (str[0] == '/') {
1297 free(redirected_path);
1298 target.path = redirected_path = xstrdup(str+1);
1299 /* lsa stays the same: it's on the same server */
1301 parse_url(str, &target);
1303 /* server.user remains untouched */
1304 free(server.allocated);
1305 server.allocated = NULL;
1306 server.host = target.host;
1307 /* strip_ipv6_scope_id(target.host); - no! */
1308 /* we assume remote never gives us IPv6 addr with scope id */
1309 server.port = target.port;
1312 } /* else: lsa stays the same: we use proxy */
1314 goto establish_session;
1317 // if (status >= 300)
1318 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
1320 /* For HTTP, data is pumped over the same connection */
1326 sfp = prepare_ftp_session(&dfp, &target, lsa);
1331 if (!(option_mask32 & WGET_OPT_SPIDER)) {
1332 if (G.output_fd < 0)
1333 G.output_fd = xopen(G.fname_out, G.o_flags);
1334 retrieve_file_data(dfp);
1335 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1336 xclose(G.output_fd);
1342 /* It's ftp. Close data connection properly */
1344 if (ftpcmd(NULL, NULL, sfp) != 226)
1345 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1346 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1350 free(server.allocated);
1351 free(target.allocated);
1354 free(fname_out_alloc);
1355 free(redirected_path);
1358 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1359 int wget_main(int argc UNUSED_PARAM, char **argv)
1361 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1362 static const char wget_longopts[] ALIGN1 =
1363 /* name, has_arg, val */
1364 "continue\0" No_argument "c"
1365 "quiet\0" No_argument "q"
1366 "server-response\0" No_argument "S"
1367 "output-document\0" Required_argument "O"
1368 "directory-prefix\0" Required_argument "P"
1369 "proxy\0" Required_argument "Y"
1370 "user-agent\0" Required_argument "U"
1371 IF_FEATURE_WGET_TIMEOUT(
1372 "timeout\0" Required_argument "T")
1374 IF_DESKTOP( "tries\0" Required_argument "t")
1375 "header\0" Required_argument "\xff"
1376 "post-data\0" Required_argument "\xfe"
1377 "spider\0" No_argument "\xfd"
1378 /* Ignored (we always use PASV): */
1379 IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
1380 /* Ignored (we don't do ssl) */
1381 IF_DESKTOP( "no-check-certificate\0" No_argument "\xf0")
1382 /* Ignored (we don't support caching) */
1383 IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1384 IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1385 IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1386 IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1387 IF_DESKTOP( "no-parent\0" No_argument "\xf0")
1389 # define GETOPT32 getopt32long
1390 # define LONGOPTS ,wget_longopts
1392 # define GETOPT32 getopt32
1396 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1397 llist_t *headers_llist = NULL;
1402 #if ENABLE_FEATURE_WGET_TIMEOUT
1403 G.timeout_seconds = 900;
1404 signal(SIGALRM, alarm_handler);
1406 G.proxy_flag = "on"; /* use proxies if env vars are set */
1407 G.user_agent = "Wget"; /* "User-Agent" header field */
1409 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1415 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1416 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1417 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1418 * -nH --no-host-directories: wget -r http://host/ won't create host/
1420 * "n::" above says that we accept -n[ARG].
1421 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1424 "-1" /* at least one URL */
1425 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::") /* --header is a list */
1427 , &G.fname_out, &G.dir_prefix,
1428 &G.proxy_flag, &G.user_agent,
1429 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1430 NULL, /* -t RETRIES */
1432 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1433 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1435 #if 0 /* option bits debug */
1436 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1437 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1438 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1439 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1440 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
1445 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1446 if (headers_llist) {
1449 llist_t *ll = headers_llist;
1451 size += strlen(ll->data) + 2;
1454 G.extra_headers = hdr = xmalloc(size + 1);
1455 while (headers_llist) {
1459 size = sprintf(hdr, "%s\r\n",
1460 (char*)llist_pop(&headers_llist));
1461 /* a bit like index_in_substrings but don't match full key */
1463 words = wget_user_headers;
1465 if (strstr(hdr, words) == hdr) {
1466 G.user_headers |= bit;
1470 words += strlen(words) + 1;
1478 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1479 if (G.fname_out) { /* -O FILE ? */
1480 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1482 option_mask32 &= ~WGET_OPT_CONTINUE;
1484 /* compat with wget: -O FILE can overwrite */
1485 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1489 download_one_url(*argv++);
1491 if (G.output_fd >= 0)
1492 xclose(G.output_fd);
1494 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1495 free(G.extra_headers);
1499 return EXIT_SUCCESS;