1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
12 //config: bool "wget (35 kb)"
15 //config: wget is a utility for non-interactive download of files from HTTP
16 //config: and FTP servers.
18 //config:config FEATURE_WGET_LONG_OPTIONS
19 //config: bool "Enable long options"
21 //config: depends on WGET && LONG_OPTS
23 //config:config FEATURE_WGET_STATUSBAR
24 //config: bool "Enable progress bar (+2k)"
26 //config: depends on WGET
28 //config:config FEATURE_WGET_AUTHENTICATION
29 //config: bool "Enable HTTP authentication"
31 //config: depends on WGET
33 //config: Support authenticated HTTP transfers.
35 //config:config FEATURE_WGET_TIMEOUT
36 //config: bool "Enable timeout option -T SEC"
38 //config: depends on WGET
40 //config: Supports network read and connect timeouts for wget,
41 //config: so that wget will give up and timeout, through the -T
42 //config: command line option.
44 //config: Currently only connect and network data read timeout are
45 //config: supported (i.e., timeout is not applied to the DNS query). When
46 //config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
47 //config: will work in addition to -T.
49 //config:config FEATURE_WGET_HTTPS
50 //config: bool "Support HTTPS using internal TLS code"
51 //it also enables FTPS support, but it's not well tested yet
53 //config: depends on WGET
56 //config: wget will use internal TLS code to connect to https:// URLs.
58 //config: On NOMMU machines, ssl_helper applet should be available
59 //config: in the $PATH for this to work. Make sure to select that applet.
61 //config: Note: currently, TLS code only makes TLS I/O work, it
62 //config: does *not* check that the peer is who it claims to be, etc.
63 //config: IOW: it uses peer-supplied public keys to establish encryption
64 //config: and signing keys, then encrypts and signs outgoing data and
65 //config: decrypts incoming data.
66 //config: It does not check signature hashes on the incoming data:
67 //config: this means that attackers manipulating TCP packets can
68 //config: send altered data and we unknowingly receive garbage.
69 //config: (This check might be relatively easy to add).
70 //config: It does not check public key's certificate:
71 //config: this means that the peer may be an attacker impersonating
72 //config: the server we think we are talking to.
74 //config: If you think this is unacceptable, consider this. As more and more
75 //config: servers switch to HTTPS-only operation, without such "crippled"
76 //config: TLS code it is *impossible* to simply download a kernel source
77 //config: from kernel.org. Which can in real world translate into
78 //config: "my small automatic tooling to build cross-compilers from sources
79 //config: no longer works, I need to additionally keep a local copy
80 //config: of ~4 megabyte source tarball of a SSL library and ~2 megabyte
81 //config: source of wget, need to compile and built both before I can
82 //config: download anything. All this despite the fact that the build
83 //config: is done in a QEMU sandbox on a machine with absolutely nothing
84 //config: worth stealing, so I don't care if someone would go to a lot
85 //config: of trouble to intercept my HTTPS download to send me an altered
86 //config: kernel tarball".
88 //config: If you still think this is unacceptable, send patches.
90 //config: If you still think this is unacceptable, do not want to send
91 //config: patches, but do want to waste bandwidth expaining how wrong
92 //config: it is, you will be ignored.
94 //config:config FEATURE_WGET_OPENSSL
95 //config: bool "Try to connect to HTTPS using openssl"
97 //config: depends on WGET
99 //config: Try to use openssl to handle HTTPS.
101 //config: OpenSSL has a simple SSL client for debug purposes.
102 //config: If you select this option, wget will effectively run:
103 //config: "openssl s_client -quiet -connect hostname:443
104 //config: -servername hostname 2>/dev/null" and pipe its data
105 //config: through it. -servername is not used if hostname is numeric.
106 //config: Note inconvenient API: host resolution is done twice,
107 //config: and there is no guarantee openssl's idea of IPv6 address
108 //config: format is the same as ours.
109 //config: Another problem is that s_client prints debug information
110 //config: to stderr, and it needs to be suppressed. This means
111 //config: all error messages get suppressed too.
112 //config: openssl is also a big binary, often dynamically linked
113 //config: against ~15 libraries.
115 //config: If openssl can't be executed, internal TLS code will be used
116 //config: (if you enabled it); if openssl can be executed but fails later,
117 //config: wget can't detect this, and download will fail.
119 //applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
121 //kbuild:lib-$(CONFIG_WGET) += wget.o
123 //usage:#define wget_trivial_usage
124 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
125 //usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
126 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
127 /* Since we ignore these opts, we don't show them in --help */
128 /* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
129 /* //usage: " [-nv] [-nc] [-nH] [-np]" */
130 //usage: " [-S|--server-response] [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
132 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
133 //usage: "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-S] [-U AGENT]"
134 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
136 //usage:#define wget_full_usage "\n\n"
137 //usage: "Retrieve files via HTTP or FTP\n"
138 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
139 //usage: "\n --spider Only check URL existence: $? is 0 if exists"
141 //usage: "\n -c Continue retrieval of aborted transfer"
142 //usage: "\n -q Quiet"
143 //usage: "\n -P DIR Save to DIR (default .)"
144 //usage: "\n -S Show server response"
145 //usage: IF_FEATURE_WGET_TIMEOUT(
146 //usage: "\n -T SEC Network read timeout is SEC seconds"
148 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
149 //usage: "\n -U STR Use STR for User-Agent header"
150 //usage: "\n -Y on/off Use proxy"
155 # define log_io(...) bb_error_msg(__VA_ARGS__)
156 # define SENDFMT(fp, fmt, ...) \
158 log_io("> " fmt, ##__VA_ARGS__); \
159 fprintf(fp, fmt, ##__VA_ARGS__); \
162 # define log_io(...) ((void)0)
163 # define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
167 #define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_HTTPS)
173 const char *protocol;
177 static const char P_FTP[] ALIGN1 = "ftp";
178 static const char P_HTTP[] ALIGN1 = "http";
180 # if ENABLE_FEATURE_WGET_HTTPS
181 static const char P_FTPS[] ALIGN1 = "ftps";
183 static const char P_HTTPS[] ALIGN1 = "https";
186 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
187 /* User-specified headers prevent using our corresponding built-in headers. */
190 HDR_USER_AGENT = (1<<1),
192 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
193 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
195 static const char wget_user_headers[] ALIGN1 =
199 # if ENABLE_FEATURE_WGET_AUTHENTICATION
201 "Proxy-Authorization:\0"
204 # define USR_HEADER_HOST (G.user_headers & HDR_HOST)
205 # define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
206 # define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
207 # define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
208 # define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
209 #else /* No long options, no user-headers :( */
210 # define USR_HEADER_HOST 0
211 # define USR_HEADER_USER_AGENT 0
212 # define USR_HEADER_RANGE 0
213 # define USR_HEADER_AUTH 0
214 # define USR_HEADER_PROXY_AUTH 0
219 off_t content_len; /* Content-length of the file */
220 off_t beg_range; /* Range at which continue begins */
221 #if ENABLE_FEATURE_WGET_STATUSBAR
222 off_t transferred; /* Number of bytes transferred so far */
223 const char *curfile; /* Name of current file being transferred */
227 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
230 unsigned char user_headers; /* Headers mentioned by the user */
232 char *fname_out; /* where to direct output (-O) */
233 const char *proxy_flag; /* Use proxies if env vars are set */
234 const char *user_agent; /* "User-Agent" header field */
235 #if ENABLE_FEATURE_WGET_TIMEOUT
236 unsigned timeout_seconds;
237 bool die_if_timed_out;
241 smallint chunked; /* chunked transfer encoding */
242 smallint got_clen; /* got content-length: from server */
243 /* Local downloads do benefit from big buffer.
244 * With 512 byte buffer, it was measured to be
245 * an order of magnitude slower than with big one.
247 uint64_t just_to_align_next_member;
248 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
250 #define G (*ptr_to_globals)
251 #define INIT_G() do { \
252 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
254 #define FINI_G() do { \
255 FREE_PTR_TO_GLOBALS(); \
259 /* Must match option string! */
261 WGET_OPT_CONTINUE = (1 << 0),
262 WGET_OPT_QUIET = (1 << 1),
263 WGET_OPT_SERVER_RESPONSE = (1 << 2),
264 WGET_OPT_OUTNAME = (1 << 3),
265 WGET_OPT_PREFIX = (1 << 4),
266 WGET_OPT_PROXY = (1 << 5),
267 WGET_OPT_USER_AGENT = (1 << 6),
268 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
269 WGET_OPT_RETRIES = (1 << 8),
270 WGET_OPT_nsomething = (1 << 9),
271 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
272 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
273 WGET_OPT_SPIDER = (1 << 12) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
281 #if ENABLE_FEATURE_WGET_STATUSBAR
282 static void progress_meter(int flag)
284 if (option_mask32 & WGET_OPT_QUIET)
287 if (flag == PROGRESS_START)
288 bb_progress_init(&G.pmt, G.curfile);
290 bb_progress_update(&G.pmt,
293 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
296 if (flag == PROGRESS_END) {
297 bb_progress_free(&G.pmt);
298 bb_putchar_stderr('\n');
303 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
307 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
308 * local addresses can have a scope identifier to specify the
309 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
310 * identifier is only valid on a single node.
312 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
313 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
314 * in the Host header as invalid requests, see
315 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
317 static void strip_ipv6_scope_id(char *host)
321 /* bbox wget actually handles IPv6 addresses without [], like
322 * wget "http://::1/xxx", but this is not standard.
323 * To save code, _here_ we do not support it. */
326 return; /* not IPv6 */
328 scope = strchr(host, '%');
332 /* Remove the IPv6 zone identifier from the host address */
333 cp = strchr(host, ']');
334 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
335 /* malformed address (not "[xx]:nn" or "[xx]") */
339 /* cp points to "]...", scope points to "%eth0]..." */
340 overlapping_strcpy(scope, cp);
343 #if ENABLE_FEATURE_WGET_AUTHENTICATION
344 /* Base64-encode character string. */
345 static char *base64enc(const char *str)
347 unsigned len = strlen(str);
348 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
349 len = sizeof(G.wget_buf)/4*3 - 10;
350 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
355 #if ENABLE_FEATURE_WGET_TIMEOUT
356 static void alarm_handler(int sig UNUSED_PARAM)
358 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
359 if (G.die_if_timed_out)
360 bb_error_msg_and_die("download timed out");
362 static void set_alarm(void)
364 if (G.timeout_seconds) {
365 alarm(G.timeout_seconds);
366 G.die_if_timed_out = 1;
369 # define clear_alarm() ((void)(G.die_if_timed_out = 0))
371 # define set_alarm() ((void)0)
372 # define clear_alarm() ((void)0)
375 #if ENABLE_FEATURE_WGET_OPENSSL
377 * is_ip_address() attempts to verify whether or not a string
378 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
379 * of inet_pton() can be used to determine this.
381 * TODO add proper error checking when inet_pton() returns -1
382 * (some form of system error has occurred, and errno is set)
384 static int is_ip_address(const char *string)
386 struct sockaddr_in sa;
388 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
389 # if ENABLE_FEATURE_IPV6
391 struct sockaddr_in6 sa6;
392 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
395 return (result == 1);
399 static FILE *open_socket(len_and_sockaddr *lsa)
405 fd = xconnect_stream(lsa);
408 /* glibc 2.4 seems to try seeking on it - ??! */
409 /* hopefully it understands what ESPIPE means... */
410 fp = fdopen(fd, "r+");
412 bb_die_memory_exhausted();
417 /* We balk at any control chars in other side's messages.
418 * This prevents nasty surprises (e.g. ESC sequences) in "Location:" URLs
419 * and error messages.
421 * The only exception is tabs, which are converted to (one) space:
422 * HTTP's "headers: <whitespace> values" may have those.
424 static char* sanitize_string(char *s)
426 unsigned char *p = (void *) s;
439 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
440 static char fgets_trim_sanitize(FILE *fp, const char *fmt)
446 if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
447 bb_perror_msg_and_die("error getting response");
450 buf_ptr = strchrnul(G.wget_buf, '\n');
453 /* Disallow any control chars: trim at first char < 0x20 */
454 sanitize_string(G.wget_buf);
457 buf_ptr = strchrnul(G.wget_buf, '\r');
461 log_io("< %s", G.wget_buf);
463 if (fmt && (option_mask32 & WGET_OPT_SERVER_RESPONSE))
464 fprintf(stderr, fmt, G.wget_buf);
469 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
475 fprintf(fp, "%s%s\r\n", s1, s2);
476 /* With --server-response, wget also shows its ftp commands */
477 if (option_mask32 & WGET_OPT_SERVER_RESPONSE)
478 fprintf(stderr, "--> %s%s\n\n", s1, s2);
480 log_io("> %s%s", s1, s2);
483 /* Read until "Nxx something" is received */
486 fgets_trim_sanitize(fp, "%s\n");
487 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
489 G.wget_buf[3] = '\0';
490 result = xatoi_positive(G.wget_buf);
495 static void parse_url(const char *src_url, struct host_info *h)
500 h->allocated = url = xstrdup(src_url);
503 p = strstr(url, "://");
507 if (strcmp(url, P_FTP) == 0) {
508 h->port = bb_lookup_std_port(P_FTP, "tcp", 21);
511 # if ENABLE_FEATURE_WGET_HTTPS
512 if (strcmp(url, P_FTPS) == 0) {
513 h->port = bb_lookup_std_port(P_FTPS, "tcp", 990);
514 h->protocol = P_FTPS;
517 if (strcmp(url, P_HTTPS) == 0) {
518 h->port = bb_lookup_std_port(P_HTTPS, "tcp", 443);
519 h->protocol = P_HTTPS;
522 if (strcmp(url, P_HTTP) == 0) {
524 h->port = bb_lookup_std_port(P_HTTP, "tcp", 80);
525 h->protocol = P_HTTP;
528 bb_error_msg_and_die("not an http or ftp url: %s", url);
531 // GNU wget is user-friendly and falls back to http://
537 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
538 // 'GET /?var=a/b HTTP/1.0'
539 // and saves 'index.html?var=a%2Fb' (we save 'b')
540 // wget 'http://busybox.net?login=john@doe':
541 // request: 'GET /?login=john@doe HTTP/1.0'
542 // saves: 'index.html?login=john@doe' (we save 'login=john@doe')
543 // wget 'http://busybox.net#test/test':
544 // request: 'GET / HTTP/1.0'
545 // saves: 'index.html' (we save 'test')
547 // We also don't add unique .N suffix if file exists...
548 sp = strchr(h->host, '/');
549 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
550 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
553 } else if (*sp == '/') {
557 // sp points to '#' or '?'
559 // http://busybox.net?login=john@doe is a valid URL
560 // (without '/' between ".net" and "?"),
561 // can't store NUL at sp[-1] - this destroys hostname.
566 sp = strrchr(h->host, '@');
568 // URL-decode "user:password" string before base64-encoding:
569 // wget http://test:my%20pass@example.com should send
570 // Authorization: Basic dGVzdDpteSBwYXNz
571 // which decodes to "test:my pass".
572 // Standard wget and curl do this too.
575 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
578 /* else: h->user remains NULL, or as set by original request
579 * before redirect (if we are here after a redirect).
583 static char *get_sanitized_hdr(FILE *fp)
588 /* retrieve header line */
589 c = fgets_trim_sanitize(fp, " %s\n");
591 /* end of the headers? */
592 if (G.wget_buf[0] == '\0')
595 /* convert the header name to lower case */
596 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
598 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
599 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
600 * "A-Z" maps to "a-z".
601 * "@[\]" can't occur in header names.
602 * "^_" maps to "~,DEL" (which is wrong).
603 * "^" was never seen yet, "_" was seen from web.archive.org
604 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
609 /* verify we are at the end of the header name */
611 bb_error_msg_and_die("bad header line: %s", G.wget_buf);
613 /* locate the start of the header value */
615 hdrval = skip_whitespace(s);
618 /* Rats! The buffer isn't big enough to hold the entire header value */
619 while (c = getc(fp), c != EOF && c != '\n')
626 static void reset_beg_range_to_zero(void)
628 bb_error_msg("restart failed");
630 xlseek(G.output_fd, 0, SEEK_SET);
631 /* Done at the end instead: */
632 /* ftruncate(G.output_fd, 0); */
635 #if ENABLE_FEATURE_WGET_OPENSSL
636 static int spawn_https_helper_openssl(const char *host, unsigned port)
638 char *allocated = NULL;
642 IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;)
644 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
645 /* Kernel can have AF_UNIX support disabled */
646 bb_perror_msg_and_die("socketpair");
648 if (!strchr(host, ':'))
649 host = allocated = xasprintf("%s:%u", host, port);
650 servername = xstrdup(host);
651 strrchr(servername, ':')[0] = '\0';
663 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
664 * It prints some debug stuff on stderr, don't know how to suppress it.
665 * Work around by dev-nulling stderr. We lose all error messages :(
668 xopen("/dev/null", O_RDWR);
669 memset(&argv, 0, sizeof(argv));
670 argv[0] = (char*)"openssl";
671 argv[1] = (char*)"s_client";
672 argv[2] = (char*)"-quiet";
673 argv[3] = (char*)"-connect";
674 argv[4] = (char*)host;
676 * Per RFC 6066 Section 3, the only permitted values in the
677 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
678 * IPv4 and IPv6 addresses, port numbers are not allowed.
680 if (!is_ip_address(servername)) {
681 argv[5] = (char*)"-servername";
682 argv[6] = (char*)servername;
685 BB_EXECVP(argv[0], argv);
687 # if ENABLE_FEATURE_WGET_HTTPS
691 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
700 # if ENABLE_FEATURE_WGET_HTTPS
710 #if ENABLE_FEATURE_WGET_HTTPS
711 static void spawn_ssl_client(const char *host, int network_fd, int flags)
715 char *servername, *p;
717 servername = xstrdup(host);
718 p = strrchr(servername, ':');
721 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
722 /* Kernel can have AF_UNIX support disabled */
723 bb_perror_msg_and_die("socketpair");
726 pid = BB_MMU ? xfork() : xvfork();
733 tls_state_t *tls = new_tls_state();
734 tls->ifd = tls->ofd = network_fd;
735 tls_handshake(tls, servername);
736 tls_run_copy_loop(tls, flags);
741 xmove_fd(network_fd, 3);
742 argv[0] = (char*)"ssl_client";
743 argv[1] = (char*)"-s3";
744 //TODO: if (!is_ip_address(servername))...
745 argv[2] = (char*)"-n";
746 argv[3] = servername;
747 argv[4] = (flags & TLSLOOP_EXIT_ON_LOCAL_EOF ? (char*)"-e" : NULL);
749 BB_EXECVP(argv[0], argv);
750 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
758 xmove_fd(sp[0], network_fd);
762 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
768 sfp = open_socket(lsa);
769 #if ENABLE_FEATURE_WGET_HTTPS
770 if (target->protocol == P_FTPS)
771 spawn_ssl_client(target->host, fileno(sfp), TLSLOOP_EXIT_ON_LOCAL_EOF);
774 if (ftpcmd(NULL, NULL, sfp) != 220)
775 bb_error_msg_and_die("%s", G.wget_buf);
776 /* note: ftpcmd() sanitizes G.wget_buf, ok to print */
778 /* Split username:password pair */
779 pass = (char*)"busybox"; /* password for "anonymous" */
781 pass = strchr(target->user, ':');
787 switch (ftpcmd("USER ", target->user ?: "anonymous", sfp)) {
791 if (ftpcmd("PASS ", pass, sfp) == 230)
793 /* fall through (failed login) */
795 bb_error_msg_and_die("ftp login: %s", G.wget_buf);
798 ftpcmd("TYPE I", NULL, sfp);
800 /* Query file size */
801 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
802 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
803 if (G.content_len < 0 || errno) {
804 bb_error_msg_and_die("bad SIZE value '%s'", G.wget_buf + 4);
809 /* Enter passive mode */
810 if (ENABLE_FEATURE_IPV6 && ftpcmd("EPSV", NULL, sfp) == 229) {
813 if (ftpcmd("PASV", NULL, sfp) != 227) {
815 bb_error_msg_and_die("bad response to %s: %s", "PASV", G.wget_buf);
817 port = parse_pasv_epsv(G.wget_buf);
821 set_nport(&lsa->u.sa, htons(port));
823 *dfpp = open_socket(lsa);
825 #if ENABLE_FEATURE_WGET_HTTPS
826 if (target->protocol == P_FTPS) {
827 /* "PROT P" enables encryption of data stream.
828 * Without it (or with "PROT C"), data is sent unencrypted.
830 if (ftpcmd("PROT P", NULL, sfp) == 200)
831 spawn_ssl_client(target->host, fileno(*dfpp), /*flags*/ 0);
835 if (G.beg_range != 0) {
836 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
837 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
838 G.content_len -= G.beg_range;
840 reset_beg_range_to_zero();
843 //TODO: needs ftp-escaping 0xff and '\n' bytes here.
844 //Or disallow '\n' altogether via sanitize_string() in parse_url().
845 //But 0xff's are possible in valid utf8 filenames.
846 if (ftpcmd("RETR ", target->path, sfp) > 150)
847 bb_error_msg_and_die("bad response to %s: %s", "RETR", G.wget_buf);
852 static void NOINLINE retrieve_file_data(FILE *dfp)
854 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
855 # if ENABLE_FEATURE_WGET_TIMEOUT
856 unsigned second_cnt = G.timeout_seconds;
858 struct pollfd polldata;
860 polldata.fd = fileno(dfp);
861 polldata.events = POLLIN | POLLPRI;
863 progress_meter(PROGRESS_START);
868 /* Loops only if chunked */
871 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
872 /* Must use nonblocking I/O, otherwise fread will loop
873 * and *block* until it reads full buffer,
874 * which messes up progress bar and/or timeout logic.
875 * Because of nonblocking I/O, we need to dance
876 * very carefully around EAGAIN. See explanation at
879 ndelay_on(polldata.fd);
885 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
886 /* fread internally uses read loop, which in our case
887 * is usually exited when we get EAGAIN.
888 * In this case, libc sets error marker on the stream.
889 * Need to clear it before next fread to avoid possible
890 * rare false positive ferror below. Rare because usually
891 * fread gets more than zero bytes, and we don't fall
892 * into if (n <= 0) ...
897 rdsz = sizeof(G.wget_buf);
899 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
900 if ((int)G.content_len <= 0)
902 rdsz = (unsigned)G.content_len;
905 n = fread(G.wget_buf, 1, rdsz, dfp);
908 xwrite(G.output_fd, G.wget_buf, n);
909 #if ENABLE_FEATURE_WGET_STATUSBAR
914 if (G.content_len == 0)
917 #if ENABLE_FEATURE_WGET_TIMEOUT
918 second_cnt = G.timeout_seconds;
925 * If error occurs, or EOF is reached, the return value
926 * is a short item count (or zero).
927 * fread does not distinguish between EOF and error.
929 if (errno != EAGAIN) {
931 progress_meter(PROGRESS_END);
932 bb_perror_msg_and_die(bb_msg_read_error);
934 break; /* EOF, not error */
937 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
938 /* It was EAGAIN. There is no data. Wait up to one second
939 * then abort if timed out, or update the bar and try reading again.
941 if (safe_poll(&polldata, 1, 1000) == 0) {
942 # if ENABLE_FEATURE_WGET_TIMEOUT
943 if (second_cnt != 0 && --second_cnt == 0) {
944 progress_meter(PROGRESS_END);
945 bb_error_msg_and_die("download timed out");
948 /* We used to loop back to poll here,
949 * but there is no great harm in letting fread
950 * to try reading anyway.
955 /* Need to do it _every_ second for "stalled" indicator
956 * to be shown properly.
958 progress_meter(PROGRESS_BUMP);
959 } /* while (reading data) */
961 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
963 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
968 /* Each chunk ends with "\r\n" - eat it */
969 fgets_trim_sanitize(dfp, NULL);
971 /* chunk size format is "HEXNUM[;name[=val]]\r\n" */
972 fgets_trim_sanitize(dfp, NULL);
974 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
976 * Had a bug with inputs like "ffffffff0001f400"
977 * smashing the heap later. Ensure >= 0.
979 if (G.content_len < 0 || errno)
980 bb_error_msg_and_die("bad chunk length '%s'", G.wget_buf);
981 if (G.content_len == 0)
982 break; /* all done! */
985 * Note that fgets may result in some data being buffered in dfp.
986 * We loop back to fread, which will retrieve this data.
987 * Also note that code has to be arranged so that fread
988 * is done _before_ one-second poll wait - poll doesn't know
989 * about stdio buffering and can result in spurious one second waits!
993 /* If -c failed, we restart from the beginning,
994 * but we do not truncate file then, we do it only now, at the end.
995 * This lets user to ^C if his 99% complete 10 GB file download
996 * failed to restart *without* losing the almost complete file.
999 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
1000 if (pos != (off_t)-1)
1001 ftruncate(G.output_fd, pos);
1004 /* Draw full bar and free its resources */
1005 G.chunked = 0; /* makes it show 100% even for chunked download */
1006 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
1007 progress_meter(PROGRESS_END);
1010 static void download_one_url(const char *url)
1012 bool use_proxy; /* Use proxies if env vars are set */
1014 len_and_sockaddr *lsa;
1015 FILE *sfp; /* socket to web/ftp server */
1016 FILE *dfp; /* socket to ftp server (data) */
1017 char *fname_out_alloc;
1018 char *redirected_path = NULL;
1019 struct host_info server;
1020 struct host_info target;
1022 server.allocated = NULL;
1023 target.allocated = NULL;
1027 parse_url(url, &target);
1029 /* Use the proxy if necessary */
1030 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
1032 char *proxy = getenv(target.protocol[0] == 'f' ? "ftp_proxy" : "http_proxy");
1033 //FIXME: what if protocol is https? Ok to use http_proxy?
1034 use_proxy = (proxy && proxy[0]);
1036 parse_url(proxy, &server);
1039 server.protocol = target.protocol;
1040 server.port = target.port;
1041 if (ENABLE_FEATURE_IPV6) {
1042 //free(server.allocated); - can't be non-NULL
1043 server.host = server.allocated = xstrdup(target.host);
1045 server.host = target.host;
1049 if (ENABLE_FEATURE_IPV6)
1050 strip_ipv6_scope_id(target.host);
1052 /* If there was no -O FILE, guess output filename */
1053 fname_out_alloc = NULL;
1054 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1055 G.fname_out = bb_get_last_path_component_nostrip(target.path);
1056 /* handle "wget http://kernel.org//" */
1057 if (G.fname_out[0] == '/' || !G.fname_out[0])
1058 G.fname_out = (char*)"index.html";
1059 /* -P DIR is considered only if there was no -O FILE */
1061 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
1063 /* redirects may free target.path later, need to make a copy */
1064 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
1067 #if ENABLE_FEATURE_WGET_STATUSBAR
1068 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
1071 /* Determine where to start transfer */
1073 if (option_mask32 & WGET_OPT_CONTINUE) {
1074 G.output_fd = open(G.fname_out, O_WRONLY);
1075 if (G.output_fd >= 0) {
1076 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
1078 /* File doesn't exist. We do not create file here yet.
1079 * We are not sure it exists on remote side */
1084 lsa = xhost2sockaddr(server.host, server.port);
1085 if (!(option_mask32 & WGET_OPT_QUIET)) {
1086 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
1087 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
1091 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
1094 if (use_proxy || target.protocol[0] != 'f' /*not ftp[s]*/) {
1101 /* Open socket to http(s) server */
1102 #if ENABLE_FEATURE_WGET_OPENSSL
1103 /* openssl (and maybe internal TLS) support is configured */
1104 if (server.protocol == P_HTTPS) {
1105 /* openssl-based helper
1106 * Inconvenient API since we can't give it an open fd
1108 int fd = spawn_https_helper_openssl(server.host, server.port);
1109 # if ENABLE_FEATURE_WGET_HTTPS
1110 if (fd < 0) { /* no openssl? try internal */
1111 sfp = open_socket(lsa);
1112 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
1116 /* We don't check for exec("openssl") failure in this case */
1118 sfp = fdopen(fd, "r+");
1120 bb_die_memory_exhausted();
1123 sfp = open_socket(lsa);
1125 #elif ENABLE_FEATURE_WGET_HTTPS
1126 /* Only internal TLS support is configured */
1127 sfp = open_socket(lsa);
1128 if (server.protocol == P_HTTPS)
1129 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
1131 /* ssl (https) support is not configured */
1132 sfp = open_socket(lsa);
1134 /* Send HTTP request */
1136 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
1137 target.protocol, target.host,
1140 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
1141 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1144 if (!USR_HEADER_HOST)
1145 SENDFMT(sfp, "Host: %s\r\n", target.host);
1146 if (!USR_HEADER_USER_AGENT)
1147 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
1149 /* Ask server to close the connection as soon as we are done
1150 * (IOW: we do not intend to send more requests)
1152 SENDFMT(sfp, "Connection: close\r\n");
1154 #if ENABLE_FEATURE_WGET_AUTHENTICATION
1155 if (target.user && !USR_HEADER_AUTH) {
1156 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
1157 base64enc(target.user));
1159 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1160 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
1161 base64enc(server.user));
1165 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1166 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
1168 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1169 if (G.extra_headers) {
1170 log_io(G.extra_headers);
1171 fputs(G.extra_headers, sfp);
1174 if (option_mask32 & WGET_OPT_POST_DATA) {
1176 "Content-Type: application/x-www-form-urlencoded\r\n"
1177 "Content-Length: %u\r\n"
1180 (int) strlen(G.post_data), G.post_data
1185 SENDFMT(sfp, "\r\n");
1190 /* Tried doing this unconditionally.
1191 * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS.
1194 if (target.protocol == P_HTTPS) {
1195 /* If we use SSL helper, keeping our end of the socket open for writing
1196 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1197 * even after child closes its copy of the fd.
1200 shutdown(fileno(sfp), SHUT_WR);
1205 * Retrieve HTTP response line and check for "200" status code.
1208 fgets_trim_sanitize(sfp, " %s\n");
1211 str = skip_non_whitespace(str);
1212 str = skip_whitespace(str);
1213 // FIXME: no error check
1214 // xatou wouldn't work: "200 OK"
1219 while (get_sanitized_hdr(sfp) != NULL)
1220 /* eat all remaining headers */;
1223 /* Success responses */
1226 case 201: /* 201 Created */
1227 /* "The request has been fulfilled and resulted in a new resource being created" */
1228 /* Standard wget is reported to treat this as success */
1230 case 202: /* 202 Accepted */
1231 /* "The request has been accepted for processing, but the processing has not been completed" */
1232 /* Treat as success: fall through */
1233 case 203: /* 203 Non-Authoritative Information */
1234 /* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1236 case 204: /* 204 No Content */
1238 Response 204 doesn't say "null file", it says "metadata
1239 has changed but data didn't":
1241 "10.2.5 204 No Content
1242 The server has fulfilled the request but does not need to return
1243 an entity-body, and might want to return updated metainformation.
1244 The response MAY include new or updated metainformation in the form
1245 of entity-headers, which if present SHOULD be associated with
1246 the requested variant.
1248 If the client is a user agent, it SHOULD NOT change its document
1249 view from that which caused the request to be sent. This response
1250 is primarily intended to allow input for actions to take place
1251 without causing a change to the user agent's active document view,
1252 although any new or updated metainformation SHOULD be applied
1253 to the document currently in the user agent's active view.
1255 The 204 response MUST NOT include a message-body, and thus
1256 is always terminated by the first empty line after the header fields."
1258 However, in real world it was observed that some web servers
1259 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1261 if (G.beg_range != 0) {
1262 /* "Range:..." was not honored by the server.
1263 * Restart download from the beginning.
1265 reset_beg_range_to_zero();
1268 /* 205 Reset Content ?? what to do on this ?? */
1270 case 300: /* redirection */
1276 case 206: /* Partial Content */
1277 if (G.beg_range != 0)
1278 /* "Range:..." worked. Good. */
1280 /* Partial Content even though we did not ask for it??? */
1283 bb_error_msg_and_die("server returned error: %s", G.wget_buf);
1287 * Retrieve HTTP headers.
1289 while ((str = get_sanitized_hdr(sfp)) != NULL) {
1290 static const char keywords[] ALIGN1 =
1291 "content-length\0""transfer-encoding\0""location\0";
1293 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1297 /* get_sanitized_hdr converted "FOO:" string to lowercase */
1299 /* strip trailing whitespace */
1300 char *s = strchrnul(str, '\0') - 1;
1301 while (s >= str && (*s == ' ' || *s == '\t')) {
1305 key = index_in_strings(keywords, G.wget_buf) + 1;
1306 if (key == KEY_content_length) {
1307 G.content_len = BB_STRTOOFF(str, NULL, 10);
1308 if (G.content_len < 0 || errno) {
1309 bb_error_msg_and_die("content-length %s is garbage", str);
1314 if (key == KEY_transfer_encoding) {
1315 if (strcmp(str_tolower(str), "chunked") != 0)
1316 bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
1319 if (key == KEY_location && status >= 300) {
1320 if (--redir_limit == 0)
1321 bb_error_msg_and_die("too many redirections");
1323 if (str[0] == '/') {
1324 free(redirected_path);
1325 target.path = redirected_path = xstrdup(str + 1);
1326 /* lsa stays the same: it's on the same server */
1328 parse_url(str, &target);
1330 /* server.user remains untouched */
1331 free(server.allocated);
1332 server.allocated = NULL;
1333 server.host = target.host;
1334 /* strip_ipv6_scope_id(target.host); - no! */
1335 /* we assume remote never gives us IPv6 addr with scope id */
1336 server.port = target.port;
1339 } /* else: lsa stays the same: we use proxy */
1341 goto establish_session;
1344 // if (status >= 300)
1345 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
1347 /* For HTTP, data is pumped over the same connection */
1353 sfp = prepare_ftp_session(&dfp, &target, lsa);
1358 if (!(option_mask32 & WGET_OPT_SPIDER)) {
1359 if (G.output_fd < 0)
1360 G.output_fd = xopen(G.fname_out, G.o_flags);
1361 retrieve_file_data(dfp);
1362 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1363 xclose(G.output_fd);
1369 /* It's ftp. Close data connection properly */
1371 if (ftpcmd(NULL, NULL, sfp) != 226)
1372 bb_error_msg_and_die("ftp error: %s", G.wget_buf);
1373 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1377 free(server.allocated);
1378 free(target.allocated);
1381 free(fname_out_alloc);
1382 free(redirected_path);
1385 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1386 int wget_main(int argc UNUSED_PARAM, char **argv)
1388 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1389 static const char wget_longopts[] ALIGN1 =
1390 /* name, has_arg, val */
1391 "continue\0" No_argument "c"
1392 "quiet\0" No_argument "q"
1393 "server-response\0" No_argument "S"
1394 "output-document\0" Required_argument "O"
1395 "directory-prefix\0" Required_argument "P"
1396 "proxy\0" Required_argument "Y"
1397 "user-agent\0" Required_argument "U"
1398 IF_FEATURE_WGET_TIMEOUT(
1399 "timeout\0" Required_argument "T")
1401 IF_DESKTOP( "tries\0" Required_argument "t")
1402 "header\0" Required_argument "\xff"
1403 "post-data\0" Required_argument "\xfe"
1404 "spider\0" No_argument "\xfd"
1405 /* Ignored (we always use PASV): */
1406 IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
1407 /* Ignored (we don't do ssl) */
1408 IF_DESKTOP( "no-check-certificate\0" No_argument "\xf0")
1409 /* Ignored (we don't support caching) */
1410 IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1411 IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1412 IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1413 IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1414 IF_DESKTOP( "no-parent\0" No_argument "\xf0")
1416 # define GETOPT32 getopt32long
1417 # define LONGOPTS ,wget_longopts
1419 # define GETOPT32 getopt32
1423 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1424 llist_t *headers_llist = NULL;
1429 #if ENABLE_FEATURE_WGET_TIMEOUT
1430 G.timeout_seconds = 900;
1431 signal(SIGALRM, alarm_handler);
1433 G.proxy_flag = "on"; /* use proxies if env vars are set */
1434 G.user_agent = "Wget"; /* "User-Agent" header field */
1436 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1442 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1443 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1444 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1445 * -nH --no-host-directories: wget -r http://host/ won't create host/
1447 * "n::" above says that we accept -n[ARG].
1448 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1451 "-1" /* at least one URL */
1452 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::") /* --header is a list */
1454 , &G.fname_out, &G.dir_prefix,
1455 &G.proxy_flag, &G.user_agent,
1456 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1457 NULL, /* -t RETRIES */
1459 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1460 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1462 #if 0 /* option bits debug */
1463 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1464 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1465 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1466 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1467 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
1472 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1473 if (headers_llist) {
1476 llist_t *ll = headers_llist;
1478 size += strlen(ll->data) + 2;
1481 G.extra_headers = hdr = xmalloc(size + 1);
1482 while (headers_llist) {
1486 size = sprintf(hdr, "%s\r\n",
1487 (char*)llist_pop(&headers_llist));
1488 /* a bit like index_in_substrings but don't match full key */
1490 words = wget_user_headers;
1492 if (strstr(hdr, words) == hdr) {
1493 G.user_headers |= bit;
1497 words += strlen(words) + 1;
1505 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1506 if (G.fname_out) { /* -O FILE ? */
1507 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1509 option_mask32 &= ~WGET_OPT_CONTINUE;
1511 /* compat with wget: -O FILE can overwrite */
1512 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1516 download_one_url(*argv++);
1518 if (G.output_fd >= 0)
1519 xclose(G.output_fd);
1521 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1522 free(G.extra_headers);
1526 return EXIT_SUCCESS;