1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
12 //config: bool "wget (38 kb)"
15 //config: wget is a utility for non-interactive download of files from HTTP
16 //config: and FTP servers.
18 //config:config FEATURE_WGET_LONG_OPTIONS
19 //config: bool "Enable long options"
21 //config: depends on WGET && LONG_OPTS
23 //config:config FEATURE_WGET_STATUSBAR
24 //config: bool "Enable progress bar (+2k)"
26 //config: depends on WGET
28 //config:config FEATURE_WGET_AUTHENTICATION
29 //config: bool "Enable HTTP authentication"
31 //config: depends on WGET
33 //config: Support authenticated HTTP transfers.
35 //config:config FEATURE_WGET_TIMEOUT
36 //config: bool "Enable timeout option -T SEC"
38 //config: depends on WGET
40 //config: Supports network read and connect timeouts for wget,
41 //config: so that wget will give up and timeout, through the -T
42 //config: command line option.
44 //config: Currently only connect and network data read timeout are
45 //config: supported (i.e., timeout is not applied to the DNS query). When
46 //config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
47 //config: will work in addition to -T.
49 //config:config FEATURE_WGET_HTTPS
50 //config: bool "Support HTTPS using internal TLS code"
51 //it also enables FTPS support, but it's not well tested yet
53 //config: depends on WGET
56 //config: wget will use internal TLS code to connect to https:// URLs.
58 //config: On NOMMU machines, ssl_helper applet should be available
59 //config: in the $PATH for this to work. Make sure to select that applet.
61 //config: Note: currently, TLS code only makes TLS I/O work, it
62 //config: does *not* check that the peer is who it claims to be, etc.
63 //config: IOW: it uses peer-supplied public keys to establish encryption
64 //config: and signing keys, then encrypts and signs outgoing data and
65 //config: decrypts incoming data.
66 //config: It does not check signature hashes on the incoming data:
67 //config: this means that attackers manipulating TCP packets can
68 //config: send altered data and we unknowingly receive garbage.
69 //config: (This check might be relatively easy to add).
70 //config: It does not check public key's certificate:
71 //config: this means that the peer may be an attacker impersonating
72 //config: the server we think we are talking to.
74 //config: If you think this is unacceptable, consider this. As more and more
75 //config: servers switch to HTTPS-only operation, without such "crippled"
76 //config: TLS code it is *impossible* to simply download a kernel source
77 //config: from kernel.org. Which can in real world translate into
78 //config: "my small automatic tooling to build cross-compilers from sources
79 //config: no longer works, I need to additionally keep a local copy
80 //config: of ~4 megabyte source tarball of a SSL library and ~2 megabyte
81 //config: source of wget, need to compile and built both before I can
82 //config: download anything. All this despite the fact that the build
83 //config: is done in a QEMU sandbox on a machine with absolutely nothing
84 //config: worth stealing, so I don't care if someone would go to a lot
85 //config: of trouble to intercept my HTTPS download to send me an altered
86 //config: kernel tarball".
88 //config: If you still think this is unacceptable, send patches.
90 //config: If you still think this is unacceptable, do not want to send
91 //config: patches, but do want to waste bandwidth expaining how wrong
92 //config: it is, you will be ignored.
94 //config:config FEATURE_WGET_OPENSSL
95 //config: bool "Try to connect to HTTPS using openssl"
97 //config: depends on WGET
99 //config: Try to use openssl to handle HTTPS.
101 //config: OpenSSL has a simple SSL client for debug purposes.
102 //config: If you select this option, wget will effectively run:
103 //config: "openssl s_client -quiet -connect hostname:443
104 //config: -servername hostname 2>/dev/null" and pipe its data
105 //config: through it. -servername is not used if hostname is numeric.
106 //config: Note inconvenient API: host resolution is done twice,
107 //config: and there is no guarantee openssl's idea of IPv6 address
108 //config: format is the same as ours.
109 //config: Another problem is that s_client prints debug information
110 //config: to stderr, and it needs to be suppressed. This means
111 //config: all error messages get suppressed too.
112 //config: openssl is also a big binary, often dynamically linked
113 //config: against ~15 libraries.
115 //config: If openssl can't be executed, internal TLS code will be used
116 //config: (if you enabled it); if openssl can be executed but fails later,
117 //config: wget can't detect this, and download will fail.
119 //applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
121 //kbuild:lib-$(CONFIG_WGET) += wget.o
123 //usage:#define wget_trivial_usage
124 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
125 //usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
126 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
127 /* Since we ignore these opts, we don't show them in --help */
128 /* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
129 /* //usage: " [-nv] [-nc] [-nH] [-np]" */
130 //usage: " [-S|--server-response] [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
132 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
133 //usage: "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-S] [-U AGENT]"
134 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
136 //usage:#define wget_full_usage "\n\n"
137 //usage: "Retrieve files via HTTP or FTP\n"
138 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
139 //usage: "\n --spider Only check URL existence: $? is 0 if exists"
140 ///////: "\n --no-check-certificate Don't validate the server's certificate"
142 //usage: "\n -c Continue retrieval of aborted transfer"
143 //usage: "\n -q Quiet"
144 //usage: "\n -P DIR Save to DIR (default .)"
145 //usage: "\n -S Show server response"
146 //usage: IF_FEATURE_WGET_TIMEOUT(
147 //usage: "\n -T SEC Network read timeout is SEC seconds"
149 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
150 //usage: "\n -U STR Use STR for User-Agent header"
151 //usage: "\n -Y on/off Use proxy"
156 # define log_io(...) bb_error_msg(__VA_ARGS__)
157 # define SENDFMT(fp, fmt, ...) \
159 log_io("> " fmt, ##__VA_ARGS__); \
160 fprintf(fp, fmt, ##__VA_ARGS__); \
163 # define log_io(...) ((void)0)
164 # define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
168 #define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_HTTPS)
174 const char *protocol;
178 static const char P_FTP[] ALIGN1 = "ftp";
179 static const char P_HTTP[] ALIGN1 = "http";
181 # if ENABLE_FEATURE_WGET_HTTPS
182 static const char P_FTPS[] ALIGN1 = "ftps";
184 static const char P_HTTPS[] ALIGN1 = "https";
187 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
188 /* User-specified headers prevent using our corresponding built-in headers. */
191 HDR_USER_AGENT = (1<<1),
193 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
194 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
196 static const char wget_user_headers[] ALIGN1 =
200 # if ENABLE_FEATURE_WGET_AUTHENTICATION
202 "Proxy-Authorization:\0"
205 # define USR_HEADER_HOST (G.user_headers & HDR_HOST)
206 # define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
207 # define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
208 # define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
209 # define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
210 #else /* No long options, no user-headers :( */
211 # define USR_HEADER_HOST 0
212 # define USR_HEADER_USER_AGENT 0
213 # define USR_HEADER_RANGE 0
214 # define USR_HEADER_AUTH 0
215 # define USR_HEADER_PROXY_AUTH 0
220 off_t content_len; /* Content-length of the file */
221 off_t beg_range; /* Range at which continue begins */
222 #if ENABLE_FEATURE_WGET_STATUSBAR
223 off_t transferred; /* Number of bytes transferred so far */
224 const char *curfile; /* Name of current file being transferred */
228 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
231 unsigned char user_headers; /* Headers mentioned by the user */
233 char *fname_out; /* where to direct output (-O) */
234 const char *proxy_flag; /* Use proxies if env vars are set */
235 const char *user_agent; /* "User-Agent" header field */
238 #if ENABLE_FEATURE_WGET_TIMEOUT
239 unsigned timeout_seconds;
240 smallint die_if_timed_out;
242 smallint chunked; /* chunked transfer encoding */
243 smallint got_clen; /* got content-length: from server */
244 /* Local downloads do benefit from big buffer.
245 * With 512 byte buffer, it was measured to be
246 * an order of magnitude slower than with big one.
248 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024] ALIGNED(sizeof(long));
250 #define G (*ptr_to_globals)
251 #define INIT_G() do { \
252 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
254 #define FINI_G() do { \
255 FREE_PTR_TO_GLOBALS(); \
259 /* Must match option string! */
261 WGET_OPT_CONTINUE = (1 << 0),
262 WGET_OPT_QUIET = (1 << 1),
263 WGET_OPT_SERVER_RESPONSE = (1 << 2),
264 WGET_OPT_OUTNAME = (1 << 3),
265 WGET_OPT_PREFIX = (1 << 4),
266 WGET_OPT_PROXY = (1 << 5),
267 WGET_OPT_USER_AGENT = (1 << 6),
268 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
269 WGET_OPT_RETRIES = (1 << 8),
270 WGET_OPT_nsomething = (1 << 9),
271 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
272 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
273 WGET_OPT_SPIDER = (1 << 12) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
274 WGET_OPT_NO_CHECK_CERT = (1 << 13) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
282 #if ENABLE_FEATURE_WGET_STATUSBAR
283 static void progress_meter(int flag)
287 if (option_mask32 & WGET_OPT_QUIET)
290 if (flag == PROGRESS_START)
291 bb_progress_init(&G.pmt, G.curfile);
293 notty = bb_progress_update(&G.pmt,
296 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
299 if (flag == PROGRESS_END) {
300 bb_progress_free(&G.pmt);
302 bb_putchar_stderr('\n'); /* it's tty */
307 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
311 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
312 * local addresses can have a scope identifier to specify the
313 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
314 * identifier is only valid on a single node.
316 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
317 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
318 * in the Host header as invalid requests, see
319 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
321 static void strip_ipv6_scope_id(char *host)
325 /* bbox wget actually handles IPv6 addresses without [], like
326 * wget "http://::1/xxx", but this is not standard.
327 * To save code, _here_ we do not support it. */
330 return; /* not IPv6 */
332 scope = strchr(host, '%');
336 /* Remove the IPv6 zone identifier from the host address */
337 cp = strchr(host, ']');
338 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
339 /* malformed address (not "[xx]:nn" or "[xx]") */
343 /* cp points to "]...", scope points to "%eth0]..." */
344 overlapping_strcpy(scope, cp);
347 #if ENABLE_FEATURE_WGET_AUTHENTICATION
348 /* Base64-encode character string. */
349 static char *base64enc(const char *str)
352 unsigned len = strnlen(str, sizeof(G.wget_buf)/4*3 - 10);
353 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
358 #if ENABLE_FEATURE_WGET_TIMEOUT
359 static void alarm_handler(int sig UNUSED_PARAM)
361 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
362 if (G.die_if_timed_out)
363 bb_error_msg_and_die("download timed out");
365 static void set_alarm(void)
367 if (G.timeout_seconds) {
368 alarm(G.timeout_seconds);
369 G.die_if_timed_out = 1;
372 # define clear_alarm() ((void)(G.die_if_timed_out = 0))
374 # define set_alarm() ((void)0)
375 # define clear_alarm() ((void)0)
378 #if ENABLE_FEATURE_WGET_OPENSSL
380 * is_ip_address() attempts to verify whether or not a string
381 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
382 * of inet_pton() can be used to determine this.
384 * TODO add proper error checking when inet_pton() returns -1
385 * (some form of system error has occurred, and errno is set)
387 static int is_ip_address(const char *string)
389 struct sockaddr_in sa;
391 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
392 # if ENABLE_FEATURE_IPV6
394 struct sockaddr_in6 sa6;
395 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
398 return (result == 1);
402 static FILE *open_socket(len_and_sockaddr *lsa)
408 fd = xconnect_stream(lsa);
411 /* glibc 2.4 seems to try seeking on it - ??! */
412 /* hopefully it understands what ESPIPE means... */
413 fp = fdopen(fd, "r+");
415 bb_die_memory_exhausted();
420 /* We balk at any control chars in other side's messages.
421 * This prevents nasty surprises (e.g. ESC sequences) in "Location:" URLs
422 * and error messages.
424 * The only exception is tabs, which are converted to (one) space:
425 * HTTP's "headers: <whitespace> values" may have those.
427 static char* sanitize_string(char *s)
429 unsigned char *p = (void *) s;
442 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
443 static char fgets_trim_sanitize(FILE *fp, const char *fmt)
449 if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
450 bb_perror_msg_and_die("error getting response");
453 buf_ptr = strchrnul(G.wget_buf, '\n');
456 /* Disallow any control chars: trim at first char < 0x20 */
457 sanitize_string(G.wget_buf);
460 buf_ptr = strchrnul(G.wget_buf, '\r');
464 log_io("< %s", G.wget_buf);
466 if (fmt && (option_mask32 & WGET_OPT_SERVER_RESPONSE))
467 fprintf(stderr, fmt, G.wget_buf);
472 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
478 fprintf(fp, "%s%s\r\n", s1, s2);
479 /* With --server-response, wget also shows its ftp commands */
480 if (option_mask32 & WGET_OPT_SERVER_RESPONSE)
481 fprintf(stderr, "--> %s%s\n\n", s1, s2);
483 log_io("> %s%s", s1, s2);
486 /* Read until "Nxx something" is received */
489 fgets_trim_sanitize(fp, "%s\n");
490 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
492 G.wget_buf[3] = '\0';
493 result = xatoi_positive(G.wget_buf);
498 static void parse_url(const char *src_url, struct host_info *h)
503 h->allocated = url = xstrdup(src_url);
506 p = strstr(url, "://");
510 if (strcmp(url, P_FTP) == 0) {
511 h->port = bb_lookup_std_port(P_FTP, "tcp", 21);
514 # if ENABLE_FEATURE_WGET_HTTPS
515 if (strcmp(url, P_FTPS) == 0) {
516 h->port = bb_lookup_std_port(P_FTPS, "tcp", 990);
517 h->protocol = P_FTPS;
520 if (strcmp(url, P_HTTPS) == 0) {
521 h->port = bb_lookup_std_port(P_HTTPS, "tcp", 443);
522 h->protocol = P_HTTPS;
525 if (strcmp(url, P_HTTP) == 0) {
527 h->port = bb_lookup_std_port(P_HTTP, "tcp", 80);
528 h->protocol = P_HTTP;
531 bb_error_msg_and_die("not an http or ftp url: %s", url);
534 // GNU wget is user-friendly and falls back to http://
540 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
541 // 'GET /?var=a/b HTTP/1.0'
542 // and saves 'index.html?var=a%2Fb' (we save 'b')
543 // wget 'http://busybox.net?login=john@doe':
544 // request: 'GET /?login=john@doe HTTP/1.0'
545 // saves: 'index.html?login=john@doe' (we save 'login=john@doe')
546 // wget 'http://busybox.net#test/test':
547 // request: 'GET / HTTP/1.0'
548 // saves: 'index.html' (we save 'test')
550 // We also don't add unique .N suffix if file exists...
551 sp = strchr(h->host, '/');
552 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
553 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
556 } else if (*sp == '/') {
560 // sp points to '#' or '?'
562 // http://busybox.net?login=john@doe is a valid URL
563 // (without '/' between ".net" and "?"),
564 // can't store NUL at sp[-1] - this destroys hostname.
569 sp = strrchr(h->host, '@');
571 // URL-decode "user:password" string before base64-encoding:
572 // wget http://test:my%20pass@example.com should send
573 // Authorization: Basic dGVzdDpteSBwYXNz
574 // which decodes to "test:my pass".
575 // Standard wget and curl do this too.
578 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
581 /* else: h->user remains NULL, or as set by original request
582 * before redirect (if we are here after a redirect).
586 static char *get_sanitized_hdr(FILE *fp)
591 /* retrieve header line */
592 c = fgets_trim_sanitize(fp, " %s\n");
594 /* end of the headers? */
595 if (G.wget_buf[0] == '\0')
598 /* convert the header name to lower case */
599 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
601 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
602 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
603 * "A-Z" maps to "a-z".
604 * "@[\]" can't occur in header names.
605 * "^_" maps to "~,DEL" (which is wrong).
606 * "^" was never seen yet, "_" was seen from web.archive.org
607 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
612 /* verify we are at the end of the header name */
614 bb_error_msg_and_die("bad header line: %s", G.wget_buf);
616 /* locate the start of the header value */
618 hdrval = skip_whitespace(s);
621 /* Rats! The buffer isn't big enough to hold the entire header value */
622 while (c = getc(fp), c != EOF && c != '\n')
629 static void reset_beg_range_to_zero(void)
631 bb_error_msg("restart failed");
633 xlseek(G.output_fd, 0, SEEK_SET);
634 /* Done at the end instead: */
635 /* ftruncate(G.output_fd, 0); */
638 #if ENABLE_FEATURE_WGET_OPENSSL
639 static int spawn_https_helper_openssl(const char *host, unsigned port)
641 char *allocated = NULL;
645 IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;)
647 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
648 /* Kernel can have AF_UNIX support disabled */
649 bb_perror_msg_and_die("socketpair");
651 if (!strchr(host, ':'))
652 host = allocated = xasprintf("%s:%u", host, port);
653 servername = xstrdup(host);
654 strrchr(servername, ':')[0] = '\0';
666 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
667 * It prints some debug stuff on stderr, don't know how to suppress it.
668 * Work around by dev-nulling stderr. We lose all error messages :(
671 xopen("/dev/null", O_RDWR);
672 memset(&argv, 0, sizeof(argv));
673 argv[0] = (char*)"openssl";
674 argv[1] = (char*)"s_client";
675 argv[2] = (char*)"-quiet";
676 argv[3] = (char*)"-connect";
677 argv[4] = (char*)host;
679 * Per RFC 6066 Section 3, the only permitted values in the
680 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
681 * IPv4 and IPv6 addresses, port numbers are not allowed.
683 if (!is_ip_address(servername)) {
684 argv[5] = (char*)"-servername";
685 argv[6] = (char*)servername;
688 BB_EXECVP(argv[0], argv);
690 # if ENABLE_FEATURE_WGET_HTTPS
694 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
703 # if ENABLE_FEATURE_WGET_HTTPS
713 #if ENABLE_FEATURE_WGET_HTTPS
714 static void spawn_ssl_client(const char *host, int network_fd, int flags)
718 char *servername, *p;
720 if (!(option_mask32 & WGET_OPT_NO_CHECK_CERT)) {
721 option_mask32 |= WGET_OPT_NO_CHECK_CERT;
722 bb_error_msg("note: TLS certificate validation not implemented");
725 servername = xstrdup(host);
726 p = strrchr(servername, ':');
729 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
730 /* Kernel can have AF_UNIX support disabled */
731 bb_perror_msg_and_die("socketpair");
734 pid = BB_MMU ? xfork() : xvfork();
741 tls_state_t *tls = new_tls_state();
742 tls->ifd = tls->ofd = network_fd;
743 tls_handshake(tls, servername);
744 tls_run_copy_loop(tls, flags);
749 xmove_fd(network_fd, 3);
750 argv[0] = (char*)"ssl_client";
751 argv[1] = (char*)"-s3";
752 //TODO: if (!is_ip_address(servername))...
753 argv[2] = (char*)"-n";
754 argv[3] = servername;
755 argv[4] = (flags & TLSLOOP_EXIT_ON_LOCAL_EOF ? (char*)"-e" : NULL);
757 BB_EXECVP(argv[0], argv);
758 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
766 xmove_fd(sp[0], network_fd);
770 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
776 sfp = open_socket(lsa);
777 #if ENABLE_FEATURE_WGET_HTTPS
778 if (target->protocol == P_FTPS)
779 spawn_ssl_client(target->host, fileno(sfp), TLSLOOP_EXIT_ON_LOCAL_EOF);
782 if (ftpcmd(NULL, NULL, sfp) != 220)
783 bb_error_msg_and_die("%s", G.wget_buf);
784 /* note: ftpcmd() sanitizes G.wget_buf, ok to print */
786 /* Split username:password pair */
787 pass = (char*)"busybox"; /* password for "anonymous" */
789 pass = strchr(target->user, ':');
795 switch (ftpcmd("USER ", target->user ?: "anonymous", sfp)) {
799 if (ftpcmd("PASS ", pass, sfp) == 230)
801 /* fall through (failed login) */
803 bb_error_msg_and_die("ftp login: %s", G.wget_buf);
806 ftpcmd("TYPE I", NULL, sfp);
808 /* Query file size */
809 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
810 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
811 if (G.content_len < 0 || errno) {
812 bb_error_msg_and_die("bad SIZE value '%s'", G.wget_buf + 4);
817 /* Enter passive mode */
818 if (ENABLE_FEATURE_IPV6 && ftpcmd("EPSV", NULL, sfp) == 229) {
821 if (ftpcmd("PASV", NULL, sfp) != 227) {
823 bb_error_msg_and_die("bad response to %s: %s", "PASV", G.wget_buf);
825 port = parse_pasv_epsv(G.wget_buf);
829 set_nport(&lsa->u.sa, htons(port));
831 *dfpp = open_socket(lsa);
833 #if ENABLE_FEATURE_WGET_HTTPS
834 if (target->protocol == P_FTPS) {
835 /* "PROT P" enables encryption of data stream.
836 * Without it (or with "PROT C"), data is sent unencrypted.
838 if (ftpcmd("PROT P", NULL, sfp) == 200)
839 spawn_ssl_client(target->host, fileno(*dfpp), /*flags*/ 0);
843 if (G.beg_range != 0) {
844 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
845 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
846 G.content_len -= G.beg_range;
848 reset_beg_range_to_zero();
851 //TODO: needs ftp-escaping 0xff and '\n' bytes here.
852 //Or disallow '\n' altogether via sanitize_string() in parse_url().
853 //But 0xff's are possible in valid utf8 filenames.
854 if (ftpcmd("RETR ", target->path, sfp) > 150)
855 bb_error_msg_and_die("bad response to %s: %s", "RETR", G.wget_buf);
860 static void NOINLINE retrieve_file_data(FILE *dfp)
862 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
863 # if ENABLE_FEATURE_WGET_TIMEOUT
864 unsigned second_cnt = G.timeout_seconds;
866 struct pollfd polldata;
868 polldata.fd = fileno(dfp);
869 polldata.events = POLLIN | POLLPRI;
871 progress_meter(PROGRESS_START);
876 /* Loops only if chunked */
879 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
880 /* Must use nonblocking I/O, otherwise fread will loop
881 * and *block* until it reads full buffer,
882 * which messes up progress bar and/or timeout logic.
883 * Because of nonblocking I/O, we need to dance
884 * very carefully around EAGAIN. See explanation at
887 ndelay_on(polldata.fd);
893 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
894 /* fread internally uses read loop, which in our case
895 * is usually exited when we get EAGAIN.
896 * In this case, libc sets error marker on the stream.
897 * Need to clear it before next fread to avoid possible
898 * rare false positive ferror below. Rare because usually
899 * fread gets more than zero bytes, and we don't fall
900 * into if (n <= 0) ...
905 rdsz = sizeof(G.wget_buf);
907 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
908 if ((int)G.content_len <= 0)
910 rdsz = (unsigned)G.content_len;
913 n = fread(G.wget_buf, 1, rdsz, dfp);
916 xwrite(G.output_fd, G.wget_buf, n);
917 #if ENABLE_FEATURE_WGET_STATUSBAR
922 if (G.content_len == 0)
925 #if ENABLE_FEATURE_WGET_TIMEOUT
926 second_cnt = G.timeout_seconds;
933 * If error occurs, or EOF is reached, the return value
934 * is a short item count (or zero).
935 * fread does not distinguish between EOF and error.
937 if (errno != EAGAIN) {
939 progress_meter(PROGRESS_END);
940 bb_perror_msg_and_die(bb_msg_read_error);
942 break; /* EOF, not error */
945 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
946 /* It was EAGAIN. There is no data. Wait up to one second
947 * then abort if timed out, or update the bar and try reading again.
949 if (safe_poll(&polldata, 1, 1000) == 0) {
950 # if ENABLE_FEATURE_WGET_TIMEOUT
951 if (second_cnt != 0 && --second_cnt == 0) {
952 progress_meter(PROGRESS_END);
953 bb_error_msg_and_die("download timed out");
956 /* We used to loop back to poll here,
957 * but there is no great harm in letting fread
958 * to try reading anyway.
963 /* Need to do it _every_ second for "stalled" indicator
964 * to be shown properly.
966 progress_meter(PROGRESS_BUMP);
967 } /* while (reading data) */
969 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
971 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
976 /* Each chunk ends with "\r\n" - eat it */
977 fgets_trim_sanitize(dfp, NULL);
979 /* chunk size format is "HEXNUM[;name[=val]]\r\n" */
980 fgets_trim_sanitize(dfp, NULL);
982 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
984 * Had a bug with inputs like "ffffffff0001f400"
985 * smashing the heap later. Ensure >= 0.
987 if (G.content_len < 0 || errno)
988 bb_error_msg_and_die("bad chunk length '%s'", G.wget_buf);
989 if (G.content_len == 0)
990 break; /* all done! */
993 * Note that fgets may result in some data being buffered in dfp.
994 * We loop back to fread, which will retrieve this data.
995 * Also note that code has to be arranged so that fread
996 * is done _before_ one-second poll wait - poll doesn't know
997 * about stdio buffering and can result in spurious one second waits!
1001 /* If -c failed, we restart from the beginning,
1002 * but we do not truncate file then, we do it only now, at the end.
1003 * This lets user to ^C if his 99% complete 10 GB file download
1004 * failed to restart *without* losing the almost complete file.
1007 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
1008 if (pos != (off_t)-1)
1009 ftruncate(G.output_fd, pos);
1012 /* Draw full bar and free its resources */
1013 G.chunked = 0; /* makes it show 100% even for chunked download */
1014 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
1015 progress_meter(PROGRESS_END);
1018 static void download_one_url(const char *url)
1020 bool use_proxy; /* Use proxies if env vars are set */
1022 len_and_sockaddr *lsa;
1023 FILE *sfp; /* socket to web/ftp server */
1024 FILE *dfp; /* socket to ftp server (data) */
1025 char *fname_out_alloc;
1026 char *redirected_path = NULL;
1027 struct host_info server;
1028 struct host_info target;
1030 server.allocated = NULL;
1031 target.allocated = NULL;
1035 parse_url(url, &target);
1037 /* Use the proxy if necessary */
1038 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
1040 char *proxy = getenv(target.protocol[0] == 'f' ? "ftp_proxy" : "http_proxy");
1041 //FIXME: what if protocol is https? Ok to use http_proxy?
1042 use_proxy = (proxy && proxy[0]);
1044 parse_url(proxy, &server);
1047 server.protocol = target.protocol;
1048 server.port = target.port;
1049 if (ENABLE_FEATURE_IPV6) {
1050 //free(server.allocated); - can't be non-NULL
1051 server.host = server.allocated = xstrdup(target.host);
1053 server.host = target.host;
1057 if (ENABLE_FEATURE_IPV6)
1058 strip_ipv6_scope_id(target.host);
1060 /* If there was no -O FILE, guess output filename */
1061 fname_out_alloc = NULL;
1062 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1063 G.fname_out = bb_get_last_path_component_nostrip(target.path);
1064 /* handle "wget http://kernel.org//" */
1065 if (G.fname_out[0] == '/' || !G.fname_out[0])
1066 G.fname_out = (char*)"index.html";
1067 /* -P DIR is considered only if there was no -O FILE */
1069 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
1071 /* redirects may free target.path later, need to make a copy */
1072 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
1075 #if ENABLE_FEATURE_WGET_STATUSBAR
1076 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
1079 /* Determine where to start transfer */
1081 if (option_mask32 & WGET_OPT_CONTINUE) {
1082 G.output_fd = open(G.fname_out, O_WRONLY);
1083 if (G.output_fd >= 0) {
1084 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
1086 /* File doesn't exist. We do not create file here yet.
1087 * We are not sure it exists on remote side */
1092 lsa = xhost2sockaddr(server.host, server.port);
1093 if (!(option_mask32 & WGET_OPT_QUIET)) {
1094 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
1095 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
1099 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
1102 if (use_proxy || target.protocol[0] != 'f' /*not ftp[s]*/) {
1109 /* Open socket to http(s) server */
1110 #if ENABLE_FEATURE_WGET_OPENSSL
1111 /* openssl (and maybe internal TLS) support is configured */
1112 if (server.protocol == P_HTTPS) {
1113 /* openssl-based helper
1114 * Inconvenient API since we can't give it an open fd
1116 int fd = spawn_https_helper_openssl(server.host, server.port);
1117 # if ENABLE_FEATURE_WGET_HTTPS
1118 if (fd < 0) { /* no openssl? try internal */
1119 sfp = open_socket(lsa);
1120 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
1124 /* We don't check for exec("openssl") failure in this case */
1126 sfp = fdopen(fd, "r+");
1128 bb_die_memory_exhausted();
1131 sfp = open_socket(lsa);
1133 #elif ENABLE_FEATURE_WGET_HTTPS
1134 /* Only internal TLS support is configured */
1135 sfp = open_socket(lsa);
1136 if (server.protocol == P_HTTPS)
1137 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
1139 /* ssl (https) support is not configured */
1140 sfp = open_socket(lsa);
1142 /* Send HTTP request */
1144 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
1145 target.protocol, target.host,
1148 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
1149 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1152 if (!USR_HEADER_HOST)
1153 SENDFMT(sfp, "Host: %s\r\n", target.host);
1154 if (!USR_HEADER_USER_AGENT)
1155 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
1157 /* Ask server to close the connection as soon as we are done
1158 * (IOW: we do not intend to send more requests)
1160 SENDFMT(sfp, "Connection: close\r\n");
1162 #if ENABLE_FEATURE_WGET_AUTHENTICATION
1163 if (target.user && !USR_HEADER_AUTH) {
1164 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
1165 base64enc(target.user));
1167 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1168 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
1169 base64enc(server.user));
1173 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1174 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
1176 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1177 if (G.extra_headers) {
1178 log_io(G.extra_headers);
1179 fputs(G.extra_headers, sfp);
1182 if (option_mask32 & WGET_OPT_POST_DATA) {
1184 "Content-Type: application/x-www-form-urlencoded\r\n"
1185 "Content-Length: %u\r\n"
1188 (int) strlen(G.post_data), G.post_data
1193 SENDFMT(sfp, "\r\n");
1198 /* Tried doing this unconditionally.
1199 * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS.
1202 if (target.protocol == P_HTTPS) {
1203 /* If we use SSL helper, keeping our end of the socket open for writing
1204 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1205 * even after child closes its copy of the fd.
1208 shutdown(fileno(sfp), SHUT_WR);
1213 * Retrieve HTTP response line and check for "200" status code.
1216 fgets_trim_sanitize(sfp, " %s\n");
1219 str = skip_non_whitespace(str);
1220 str = skip_whitespace(str);
1221 // FIXME: no error check
1222 // xatou wouldn't work: "200 OK"
1227 while (get_sanitized_hdr(sfp) != NULL)
1228 /* eat all remaining headers */;
1231 /* Success responses */
1234 case 201: /* 201 Created */
1235 /* "The request has been fulfilled and resulted in a new resource being created" */
1236 /* Standard wget is reported to treat this as success */
1238 case 202: /* 202 Accepted */
1239 /* "The request has been accepted for processing, but the processing has not been completed" */
1240 /* Treat as success: fall through */
1241 case 203: /* 203 Non-Authoritative Information */
1242 /* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1244 case 204: /* 204 No Content */
1246 Response 204 doesn't say "null file", it says "metadata
1247 has changed but data didn't":
1249 "10.2.5 204 No Content
1250 The server has fulfilled the request but does not need to return
1251 an entity-body, and might want to return updated metainformation.
1252 The response MAY include new or updated metainformation in the form
1253 of entity-headers, which if present SHOULD be associated with
1254 the requested variant.
1256 If the client is a user agent, it SHOULD NOT change its document
1257 view from that which caused the request to be sent. This response
1258 is primarily intended to allow input for actions to take place
1259 without causing a change to the user agent's active document view,
1260 although any new or updated metainformation SHOULD be applied
1261 to the document currently in the user agent's active view.
1263 The 204 response MUST NOT include a message-body, and thus
1264 is always terminated by the first empty line after the header fields."
1266 However, in real world it was observed that some web servers
1267 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1269 if (G.beg_range != 0) {
1270 /* "Range:..." was not honored by the server.
1271 * Restart download from the beginning.
1273 reset_beg_range_to_zero();
1276 /* 205 Reset Content ?? what to do on this ?? */
1278 case 300: /* redirection */
1284 case 206: /* Partial Content */
1285 if (G.beg_range != 0)
1286 /* "Range:..." worked. Good. */
1288 /* Partial Content even though we did not ask for it??? */
1291 bb_error_msg_and_die("server returned error: %s", G.wget_buf);
1295 * Retrieve HTTP headers.
1297 while ((str = get_sanitized_hdr(sfp)) != NULL) {
1298 static const char keywords[] ALIGN1 =
1299 "content-length\0""transfer-encoding\0""location\0";
1301 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1305 /* get_sanitized_hdr converted "FOO:" string to lowercase */
1307 /* strip trailing whitespace */
1308 char *s = strchrnul(str, '\0') - 1;
1309 while (s >= str && (*s == ' ' || *s == '\t')) {
1313 key = index_in_strings(keywords, G.wget_buf) + 1;
1314 if (key == KEY_content_length) {
1315 G.content_len = BB_STRTOOFF(str, NULL, 10);
1316 if (G.content_len < 0 || errno) {
1317 bb_error_msg_and_die("content-length %s is garbage", str);
1322 if (key == KEY_transfer_encoding) {
1323 if (strcmp(str_tolower(str), "chunked") != 0)
1324 bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
1327 if (key == KEY_location && status >= 300) {
1328 if (--redir_limit == 0)
1329 bb_error_msg_and_die("too many redirections");
1331 if (str[0] == '/') {
1332 free(redirected_path);
1333 target.path = redirected_path = xstrdup(str + 1);
1334 /* lsa stays the same: it's on the same server */
1336 parse_url(str, &target);
1338 /* server.user remains untouched */
1339 free(server.allocated);
1340 server.allocated = NULL;
1341 server.protocol = target.protocol;
1342 server.host = target.host;
1343 /* strip_ipv6_scope_id(target.host); - no! */
1344 /* we assume remote never gives us IPv6 addr with scope id */
1345 server.port = target.port;
1348 } /* else: lsa stays the same: we use proxy */
1350 goto establish_session;
1353 // if (status >= 300)
1354 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
1356 /* For HTTP, data is pumped over the same connection */
1362 sfp = prepare_ftp_session(&dfp, &target, lsa);
1367 if (!(option_mask32 & WGET_OPT_SPIDER)) {
1368 if (G.output_fd < 0)
1369 G.output_fd = xopen(G.fname_out, G.o_flags);
1370 retrieve_file_data(dfp);
1371 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1372 xclose(G.output_fd);
1378 /* It's ftp. Close data connection properly */
1380 if (ftpcmd(NULL, NULL, sfp) != 226)
1381 bb_error_msg_and_die("ftp error: %s", G.wget_buf);
1382 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1386 free(server.allocated);
1387 free(target.allocated);
1390 free(fname_out_alloc);
1391 free(redirected_path);
1394 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1395 int wget_main(int argc UNUSED_PARAM, char **argv)
1397 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1398 static const char wget_longopts[] ALIGN1 =
1399 /* name, has_arg, val */
1400 "continue\0" No_argument "c"
1401 "quiet\0" No_argument "q"
1402 "server-response\0" No_argument "S"
1403 "output-document\0" Required_argument "O"
1404 "directory-prefix\0" Required_argument "P"
1405 "proxy\0" Required_argument "Y"
1406 "user-agent\0" Required_argument "U"
1407 IF_FEATURE_WGET_TIMEOUT(
1408 "timeout\0" Required_argument "T")
1410 IF_DESKTOP( "tries\0" Required_argument "t")
1411 "header\0" Required_argument "\xff"
1412 "post-data\0" Required_argument "\xfe"
1413 "spider\0" No_argument "\xfd"
1414 "no-check-certificate\0" No_argument "\xfc"
1415 /* Ignored (we always use PASV): */
1416 IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
1417 /* Ignored (we don't support caching) */
1418 IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1419 IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1420 IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1421 IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1422 IF_DESKTOP( "no-parent\0" No_argument "\xf0")
1424 # define GETOPT32 getopt32long
1425 # define LONGOPTS ,wget_longopts
1427 # define GETOPT32 getopt32
1431 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1432 llist_t *headers_llist = NULL;
1437 #if ENABLE_FEATURE_WGET_TIMEOUT
1438 G.timeout_seconds = 900;
1439 signal(SIGALRM, alarm_handler);
1441 G.proxy_flag = "on"; /* use proxies if env vars are set */
1442 G.user_agent = "Wget"; /* "User-Agent" header field */
1444 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1450 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1451 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1452 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1453 * -nH --no-host-directories: wget -r http://host/ won't create host/
1455 * "n::" above says that we accept -n[ARG].
1456 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1459 "-1" /* at least one URL */
1460 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::") /* --header is a list */
1462 , &G.fname_out, &G.dir_prefix,
1463 &G.proxy_flag, &G.user_agent,
1464 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1465 NULL, /* -t RETRIES */
1467 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1468 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1470 #if 0 /* option bits debug */
1471 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1472 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1473 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1474 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1475 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
1476 if (option_mask32 & WGET_OPT_NO_CHECK_CERT) bb_error_msg("--no-check-certificate");
1481 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1482 if (headers_llist) {
1485 llist_t *ll = headers_llist;
1487 size += strlen(ll->data) + 2;
1490 G.extra_headers = hdr = xmalloc(size + 1);
1491 while (headers_llist) {
1495 size = sprintf(hdr, "%s\r\n",
1496 (char*)llist_pop(&headers_llist));
1497 /* a bit like index_in_substrings but don't match full key */
1499 words = wget_user_headers;
1501 if (strstr(hdr, words) == hdr) {
1502 G.user_headers |= bit;
1506 words += strlen(words) + 1;
1514 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1515 if (G.fname_out) { /* -O FILE ? */
1516 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1518 option_mask32 &= ~WGET_OPT_CONTINUE;
1520 /* compat with wget: -O FILE can overwrite */
1521 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1525 download_one_url(*argv++);
1527 if (G.output_fd >= 0)
1528 xclose(G.output_fd);
1530 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1531 free(G.extra_headers);
1535 return EXIT_SUCCESS;