1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
12 //config: bool "wget (35 kb)"
15 //config: wget is a utility for non-interactive download of files from HTTP
16 //config: and FTP servers.
18 //config:config FEATURE_WGET_LONG_OPTIONS
19 //config: bool "Enable long options"
21 //config: depends on WGET && LONG_OPTS
23 //config:config FEATURE_WGET_STATUSBAR
24 //config: bool "Enable progress bar (+2k)"
26 //config: depends on WGET
28 //config:config FEATURE_WGET_AUTHENTICATION
29 //config: bool "Enable HTTP authentication"
31 //config: depends on WGET
33 //config: Support authenticated HTTP transfers.
35 //config:config FEATURE_WGET_TIMEOUT
36 //config: bool "Enable timeout option -T SEC"
38 //config: depends on WGET
40 //config: Supports network read and connect timeouts for wget,
41 //config: so that wget will give up and timeout, through the -T
42 //config: command line option.
44 //config: Currently only connect and network data read timeout are
45 //config: supported (i.e., timeout is not applied to the DNS query). When
46 //config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
47 //config: will work in addition to -T.
49 //config:config FEATURE_WGET_HTTPS
50 //config: bool "Support HTTPS using internal TLS code"
52 //config: depends on WGET
55 //config: wget will use internal TLS code to connect to https:// URLs.
57 //config: On NOMMU machines, ssl_helper applet should be available
58 //config: in the $PATH for this to work. Make sure to select that applet.
60 //config: Note: currently, TLS code only makes TLS I/O work, it
61 //config: does *not* check that the peer is who it claims to be, etc.
62 //config: IOW: it uses peer-supplied public keys to establish encryption
63 //config: and signing keys, then encrypts and signs outgoing data and
64 //config: decrypts incoming data.
65 //config: It does not check signature hashes on the incoming data:
66 //config: this means that attackers manipulating TCP packets can
67 //config: send altered data and we unknowingly receive garbage.
68 //config: (This check might be relatively easy to add).
69 //config: It does not check public key's certificate:
70 //config: this means that the peer may be an attacker impersonating
71 //config: the server we think we are talking to.
73 //config: If you think this is unacceptable, consider this. As more and more
74 //config: servers switch to HTTPS-only operation, without such "crippled"
75 //config: TLS code it is *impossible* to simply download a kernel source
76 //config: from kernel.org. Which can in real world translate into
77 //config: "my small automatic tooling to build cross-compilers from sources
78 //config: no longer works, I need to additionally keep a local copy
79 //config: of ~4 megabyte source tarball of a SSL library and ~2 megabyte
80 //config: source of wget, need to compile and built both before I can
81 //config: download anything. All this despite the fact that the build
82 //config: is done in a QEMU sandbox on a machine with absolutely nothing
83 //config: worth stealing, so I don't care if someone would go to a lot
84 //config: of trouble to intercept my HTTPS download to send me an altered
85 //config: kernel tarball".
87 //config: If you still think this is unacceptable, send patches.
89 //config: If you still think this is unacceptable, do not want to send
90 //config: patches, but do want to waste bandwidth expaining how wrong
91 //config: it is, you will be ignored.
93 //config:config FEATURE_WGET_OPENSSL
94 //config: bool "Try to connect to HTTPS using openssl"
96 //config: depends on WGET
98 //config: Try to use openssl to handle HTTPS.
100 //config: OpenSSL has a simple SSL client for debug purposes.
101 //config: If you select this option, wget will effectively run:
102 //config: "openssl s_client -quiet -connect hostname:443
103 //config: -servername hostname 2>/dev/null" and pipe its data
104 //config: through it. -servername is not used if hostname is numeric.
105 //config: Note inconvenient API: host resolution is done twice,
106 //config: and there is no guarantee openssl's idea of IPv6 address
107 //config: format is the same as ours.
108 //config: Another problem is that s_client prints debug information
109 //config: to stderr, and it needs to be suppressed. This means
110 //config: all error messages get suppressed too.
111 //config: openssl is also a big binary, often dynamically linked
112 //config: against ~15 libraries.
114 //config: If openssl can't be executed, internal TLS code will be used
115 //config: (if you enabled it); if openssl can be executed but fails later,
116 //config: wget can't detect this, and download will fail.
118 //applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
120 //kbuild:lib-$(CONFIG_WGET) += wget.o
122 //usage:#define wget_trivial_usage
123 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
124 //usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
125 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
126 /* Since we ignore these opts, we don't show them in --help */
127 /* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
128 /* //usage: " [-nv] [-nc] [-nH] [-np]" */
129 //usage: " [-S|--server-response] [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
131 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
132 //usage: "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-S] [-U AGENT]"
133 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
135 //usage:#define wget_full_usage "\n\n"
136 //usage: "Retrieve files via HTTP or FTP\n"
137 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
138 //usage: "\n --spider Only check URL existence: $? is 0 if exists"
140 //usage: "\n -c Continue retrieval of aborted transfer"
141 //usage: "\n -q Quiet"
142 //usage: "\n -P DIR Save to DIR (default .)"
143 //usage: "\n -S Show server response"
144 //usage: IF_FEATURE_WGET_TIMEOUT(
145 //usage: "\n -T SEC Network read timeout is SEC seconds"
147 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
148 //usage: "\n -U STR Use STR for User-Agent header"
149 //usage: "\n -Y on/off Use proxy"
154 # define log_io(...) bb_error_msg(__VA_ARGS__)
155 # define SENDFMT(fp, fmt, ...) \
157 log_io("> " fmt, ##__VA_ARGS__); \
158 fprintf(fp, fmt, ##__VA_ARGS__); \
161 # define log_io(...) ((void)0)
162 # define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
166 #define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_HTTPS)
172 const char *protocol;
176 static const char P_FTP[] ALIGN1 = "ftp";
177 static const char P_HTTP[] ALIGN1 = "http";
179 static const char P_HTTPS[] ALIGN1 = "https";
182 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
183 /* User-specified headers prevent using our corresponding built-in headers. */
186 HDR_USER_AGENT = (1<<1),
188 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
189 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
191 static const char wget_user_headers[] ALIGN1 =
195 # if ENABLE_FEATURE_WGET_AUTHENTICATION
197 "Proxy-Authorization:\0"
200 # define USR_HEADER_HOST (G.user_headers & HDR_HOST)
201 # define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
202 # define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
203 # define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
204 # define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
205 #else /* No long options, no user-headers :( */
206 # define USR_HEADER_HOST 0
207 # define USR_HEADER_USER_AGENT 0
208 # define USR_HEADER_RANGE 0
209 # define USR_HEADER_AUTH 0
210 # define USR_HEADER_PROXY_AUTH 0
215 off_t content_len; /* Content-length of the file */
216 off_t beg_range; /* Range at which continue begins */
217 #if ENABLE_FEATURE_WGET_STATUSBAR
218 off_t transferred; /* Number of bytes transferred so far */
219 const char *curfile; /* Name of current file being transferred */
223 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
226 unsigned char user_headers; /* Headers mentioned by the user */
228 char *fname_out; /* where to direct output (-O) */
229 const char *proxy_flag; /* Use proxies if env vars are set */
230 const char *user_agent; /* "User-Agent" header field */
231 #if ENABLE_FEATURE_WGET_TIMEOUT
232 unsigned timeout_seconds;
233 bool die_if_timed_out;
237 smallint chunked; /* chunked transfer encoding */
238 smallint got_clen; /* got content-length: from server */
239 /* Local downloads do benefit from big buffer.
240 * With 512 byte buffer, it was measured to be
241 * an order of magnitude slower than with big one.
243 uint64_t just_to_align_next_member;
244 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
246 #define G (*ptr_to_globals)
247 #define INIT_G() do { \
248 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
250 #define FINI_G() do { \
251 FREE_PTR_TO_GLOBALS(); \
255 /* Must match option string! */
257 WGET_OPT_CONTINUE = (1 << 0),
258 WGET_OPT_QUIET = (1 << 1),
259 WGET_OPT_SERVER_RESPONSE = (1 << 2),
260 WGET_OPT_OUTNAME = (1 << 3),
261 WGET_OPT_PREFIX = (1 << 4),
262 WGET_OPT_PROXY = (1 << 5),
263 WGET_OPT_USER_AGENT = (1 << 6),
264 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
265 WGET_OPT_RETRIES = (1 << 8),
266 WGET_OPT_nsomething = (1 << 9),
267 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
268 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
269 WGET_OPT_SPIDER = (1 << 12) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
277 #if ENABLE_FEATURE_WGET_STATUSBAR
278 static void progress_meter(int flag)
280 if (option_mask32 & WGET_OPT_QUIET)
283 if (flag == PROGRESS_START)
284 bb_progress_init(&G.pmt, G.curfile);
286 bb_progress_update(&G.pmt,
289 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
292 if (flag == PROGRESS_END) {
293 bb_progress_free(&G.pmt);
294 bb_putchar_stderr('\n');
299 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
303 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
304 * local addresses can have a scope identifier to specify the
305 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
306 * identifier is only valid on a single node.
308 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
309 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
310 * in the Host header as invalid requests, see
311 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
313 static void strip_ipv6_scope_id(char *host)
317 /* bbox wget actually handles IPv6 addresses without [], like
318 * wget "http://::1/xxx", but this is not standard.
319 * To save code, _here_ we do not support it. */
322 return; /* not IPv6 */
324 scope = strchr(host, '%');
328 /* Remove the IPv6 zone identifier from the host address */
329 cp = strchr(host, ']');
330 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
331 /* malformed address (not "[xx]:nn" or "[xx]") */
335 /* cp points to "]...", scope points to "%eth0]..." */
336 overlapping_strcpy(scope, cp);
339 #if ENABLE_FEATURE_WGET_AUTHENTICATION
340 /* Base64-encode character string. */
341 static char *base64enc(const char *str)
343 unsigned len = strlen(str);
344 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
345 len = sizeof(G.wget_buf)/4*3 - 10;
346 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
351 static char* sanitize_string(char *s)
353 unsigned char *p = (void *) s;
360 #if ENABLE_FEATURE_WGET_TIMEOUT
361 static void alarm_handler(int sig UNUSED_PARAM)
363 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
364 if (G.die_if_timed_out)
365 bb_error_msg_and_die("download timed out");
367 static void set_alarm(void)
369 if (G.timeout_seconds) {
370 alarm(G.timeout_seconds);
371 G.die_if_timed_out = 1;
374 # define clear_alarm() ((void)(G.die_if_timed_out = 0))
376 # define set_alarm() ((void)0)
377 # define clear_alarm() ((void)0)
380 #if ENABLE_FEATURE_WGET_OPENSSL
382 * is_ip_address() attempts to verify whether or not a string
383 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
384 * of inet_pton() can be used to determine this.
386 * TODO add proper error checking when inet_pton() returns -1
387 * (some form of system error has occurred, and errno is set)
389 static int is_ip_address(const char *string)
391 struct sockaddr_in sa;
393 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
394 # if ENABLE_FEATURE_IPV6
396 struct sockaddr_in6 sa6;
397 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
400 return (result == 1);
404 static FILE *open_socket(len_and_sockaddr *lsa)
410 fd = xconnect_stream(lsa);
413 /* glibc 2.4 seems to try seeking on it - ??! */
414 /* hopefully it understands what ESPIPE means... */
415 fp = fdopen(fd, "r+");
417 bb_perror_msg_and_die(bb_msg_memory_exhausted);
422 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
423 static char fgets_and_trim(FILE *fp, const char *fmt)
429 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
430 bb_perror_msg_and_die("error getting response");
433 buf_ptr = strchrnul(G.wget_buf, '\n');
436 buf_ptr = strchrnul(G.wget_buf, '\r');
439 log_io("< %s", G.wget_buf);
441 if (fmt && (option_mask32 & WGET_OPT_SERVER_RESPONSE))
442 fprintf(stderr, fmt, G.wget_buf);
447 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
453 fprintf(fp, "%s%s\r\n", s1, s2);
454 /* With --server-response, wget also shows its ftp commands */
455 if (option_mask32 & WGET_OPT_SERVER_RESPONSE)
456 fprintf(stderr, "--> %s%s\n\n", s1, s2);
458 log_io("> %s%s", s1, s2);
462 fgets_and_trim(fp, "%s\n");
463 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
465 G.wget_buf[3] = '\0';
466 result = xatoi_positive(G.wget_buf);
471 static void parse_url(const char *src_url, struct host_info *h)
476 h->allocated = url = xstrdup(src_url);
479 p = strstr(url, "://");
483 if (strcmp(url, P_FTP) == 0) {
484 h->port = bb_lookup_port(P_FTP, "tcp", 21);
487 if (strcmp(url, P_HTTPS) == 0) {
488 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
489 h->protocol = P_HTTPS;
492 if (strcmp(url, P_HTTP) == 0) {
494 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
495 h->protocol = P_HTTP;
498 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
501 // GNU wget is user-friendly and falls back to http://
507 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
508 // 'GET /?var=a/b HTTP/1.0'
509 // and saves 'index.html?var=a%2Fb' (we save 'b')
510 // wget 'http://busybox.net?login=john@doe':
511 // request: 'GET /?login=john@doe HTTP/1.0'
512 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
513 // wget 'http://busybox.net#test/test':
514 // request: 'GET / HTTP/1.0'
515 // saves: 'index.html' (we save 'test')
517 // We also don't add unique .N suffix if file exists...
518 sp = strchr(h->host, '/');
519 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
520 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
523 } else if (*sp == '/') {
526 } else { // '#' or '?'
527 // http://busybox.net?login=john@doe is a valid URL
528 // memmove converts to:
529 // http:/busybox.nett?login=john@doe...
530 memmove(h->host - 1, h->host, sp - h->host);
536 sp = strrchr(h->host, '@');
538 // URL-decode "user:password" string before base64-encoding:
539 // wget http://test:my%20pass@example.com should send
540 // Authorization: Basic dGVzdDpteSBwYXNz
541 // which decodes to "test:my pass".
542 // Standard wget and curl do this too.
545 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
548 /* else: h->user remains NULL, or as set by original request
549 * before redirect (if we are here after a redirect).
553 static char *gethdr(FILE *fp)
558 /* retrieve header line */
559 c = fgets_and_trim(fp, " %s\n");
561 /* end of the headers? */
562 if (G.wget_buf[0] == '\0')
565 /* convert the header name to lower case */
566 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
568 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
569 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
570 * "A-Z" maps to "a-z".
571 * "@[\]" can't occur in header names.
572 * "^_" maps to "~,DEL" (which is wrong).
573 * "^" was never seen yet, "_" was seen from web.archive.org
574 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
579 /* verify we are at the end of the header name */
581 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
583 /* locate the start of the header value */
585 hdrval = skip_whitespace(s);
588 /* Rats! The buffer isn't big enough to hold the entire header value */
589 while (c = getc(fp), c != EOF && c != '\n')
596 static void reset_beg_range_to_zero(void)
598 bb_error_msg("restart failed");
600 xlseek(G.output_fd, 0, SEEK_SET);
601 /* Done at the end instead: */
602 /* ftruncate(G.output_fd, 0); */
605 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
612 target->user = xstrdup("anonymous:busybox@");
614 sfp = open_socket(lsa);
615 if (ftpcmd(NULL, NULL, sfp) != 220)
616 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
619 * Splitting username:password pair,
622 str = strchr(target->user, ':');
625 switch (ftpcmd("USER ", target->user, sfp)) {
629 if (ftpcmd("PASS ", str, sfp) == 230)
631 /* fall through (failed login) */
633 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
636 ftpcmd("TYPE I", NULL, sfp);
641 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
642 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
643 if (G.content_len < 0 || errno) {
644 bb_error_msg_and_die("SIZE value is garbage");
650 * Entering passive mode
652 if (ftpcmd("PASV", NULL, sfp) != 227) {
654 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
656 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
657 // Server's IP is N1.N2.N3.N4 (we ignore it)
658 // Server's port for data connection is P1*256+P2
659 str = strrchr(G.wget_buf, ')');
660 if (str) str[0] = '\0';
661 str = strrchr(G.wget_buf, ',');
662 if (!str) goto pasv_error;
663 port = xatou_range(str+1, 0, 255);
665 str = strrchr(G.wget_buf, ',');
666 if (!str) goto pasv_error;
667 port += xatou_range(str+1, 0, 255) * 256;
668 set_nport(&lsa->u.sa, htons(port));
670 *dfpp = open_socket(lsa);
672 if (G.beg_range != 0) {
673 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
674 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
675 G.content_len -= G.beg_range;
677 reset_beg_range_to_zero();
680 if (ftpcmd("RETR ", target->path, sfp) > 150)
681 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
686 #if ENABLE_FEATURE_WGET_OPENSSL
687 static int spawn_https_helper_openssl(const char *host, unsigned port)
689 char *allocated = NULL;
693 IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;)
695 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
696 /* Kernel can have AF_UNIX support disabled */
697 bb_perror_msg_and_die("socketpair");
699 if (!strchr(host, ':'))
700 host = allocated = xasprintf("%s:%u", host, port);
701 servername = xstrdup(host);
702 strrchr(servername, ':')[0] = '\0';
714 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
715 * It prints some debug stuff on stderr, don't know how to suppress it.
716 * Work around by dev-nulling stderr. We lose all error messages :(
719 xopen("/dev/null", O_RDWR);
720 memset(&argv, 0, sizeof(argv));
721 argv[0] = (char*)"openssl";
722 argv[1] = (char*)"s_client";
723 argv[2] = (char*)"-quiet";
724 argv[3] = (char*)"-connect";
725 argv[4] = (char*)host;
727 * Per RFC 6066 Section 3, the only permitted values in the
728 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
729 * IPv4 and IPv6 addresses, port numbers are not allowed.
731 if (!is_ip_address(servername)) {
732 argv[5] = (char*)"-servername";
733 argv[6] = (char*)servername;
736 BB_EXECVP(argv[0], argv);
738 # if ENABLE_FEATURE_WGET_HTTPS
742 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
751 # if ENABLE_FEATURE_WGET_HTTPS
761 #if ENABLE_FEATURE_WGET_HTTPS
762 static void spawn_ssl_client(const char *host, int network_fd)
766 char *servername, *p;
768 servername = xstrdup(host);
769 p = strrchr(servername, ':');
772 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
773 /* Kernel can have AF_UNIX support disabled */
774 bb_perror_msg_and_die("socketpair");
777 pid = BB_MMU ? xfork() : xvfork();
784 tls_state_t *tls = new_tls_state();
785 tls->ifd = tls->ofd = network_fd;
786 tls_handshake(tls, servername);
787 tls_run_copy_loop(tls);
791 xmove_fd(network_fd, 3);
792 argv[0] = (char*)"ssl_client";
793 argv[1] = (char*)"-s3";
794 //TODO: if (!is_ip_address(servername))...
795 argv[2] = (char*)"-n";
796 argv[3] = servername;
798 BB_EXECVP(argv[0], argv);
799 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
807 xmove_fd(sp[0], network_fd);
811 static void NOINLINE retrieve_file_data(FILE *dfp)
813 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
814 # if ENABLE_FEATURE_WGET_TIMEOUT
815 unsigned second_cnt = G.timeout_seconds;
817 struct pollfd polldata;
819 polldata.fd = fileno(dfp);
820 polldata.events = POLLIN | POLLPRI;
822 progress_meter(PROGRESS_START);
827 /* Loops only if chunked */
830 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
831 /* Must use nonblocking I/O, otherwise fread will loop
832 * and *block* until it reads full buffer,
833 * which messes up progress bar and/or timeout logic.
834 * Because of nonblocking I/O, we need to dance
835 * very carefully around EAGAIN. See explanation at
838 ndelay_on(polldata.fd);
844 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
845 /* fread internally uses read loop, which in our case
846 * is usually exited when we get EAGAIN.
847 * In this case, libc sets error marker on the stream.
848 * Need to clear it before next fread to avoid possible
849 * rare false positive ferror below. Rare because usually
850 * fread gets more than zero bytes, and we don't fall
851 * into if (n <= 0) ...
856 rdsz = sizeof(G.wget_buf);
858 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
859 if ((int)G.content_len <= 0)
861 rdsz = (unsigned)G.content_len;
864 n = fread(G.wget_buf, 1, rdsz, dfp);
867 xwrite(G.output_fd, G.wget_buf, n);
868 #if ENABLE_FEATURE_WGET_STATUSBAR
873 if (G.content_len == 0)
876 #if ENABLE_FEATURE_WGET_TIMEOUT
877 second_cnt = G.timeout_seconds;
884 * If error occurs, or EOF is reached, the return value
885 * is a short item count (or zero).
886 * fread does not distinguish between EOF and error.
888 if (errno != EAGAIN) {
890 progress_meter(PROGRESS_END);
891 bb_perror_msg_and_die(bb_msg_read_error);
893 break; /* EOF, not error */
896 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
897 /* It was EAGAIN. There is no data. Wait up to one second
898 * then abort if timed out, or update the bar and try reading again.
900 if (safe_poll(&polldata, 1, 1000) == 0) {
901 # if ENABLE_FEATURE_WGET_TIMEOUT
902 if (second_cnt != 0 && --second_cnt == 0) {
903 progress_meter(PROGRESS_END);
904 bb_error_msg_and_die("download timed out");
907 /* We used to loop back to poll here,
908 * but there is no great harm in letting fread
909 * to try reading anyway.
914 /* Need to do it _every_ second for "stalled" indicator
915 * to be shown properly.
917 progress_meter(PROGRESS_BUMP);
918 } /* while (reading data) */
920 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
922 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
927 fgets_and_trim(dfp, NULL); /* Eat empty line */
929 fgets_and_trim(dfp, NULL);
930 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
931 /* FIXME: error check? */
932 if (G.content_len == 0)
933 break; /* all done! */
936 * Note that fgets may result in some data being buffered in dfp.
937 * We loop back to fread, which will retrieve this data.
938 * Also note that code has to be arranged so that fread
939 * is done _before_ one-second poll wait - poll doesn't know
940 * about stdio buffering and can result in spurious one second waits!
944 /* If -c failed, we restart from the beginning,
945 * but we do not truncate file then, we do it only now, at the end.
946 * This lets user to ^C if his 99% complete 10 GB file download
947 * failed to restart *without* losing the almost complete file.
950 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
951 if (pos != (off_t)-1)
952 ftruncate(G.output_fd, pos);
955 /* Draw full bar and free its resources */
956 G.chunked = 0; /* makes it show 100% even for chunked download */
957 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
958 progress_meter(PROGRESS_END);
961 static void download_one_url(const char *url)
963 bool use_proxy; /* Use proxies if env vars are set */
965 len_and_sockaddr *lsa;
966 FILE *sfp; /* socket to web/ftp server */
967 FILE *dfp; /* socket to ftp server (data) */
969 char *fname_out_alloc;
970 char *redirected_path = NULL;
971 struct host_info server;
972 struct host_info target;
974 server.allocated = NULL;
975 target.allocated = NULL;
979 parse_url(url, &target);
981 /* Use the proxy if necessary */
982 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
984 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
985 //FIXME: what if protocol is https? Ok to use http_proxy?
986 use_proxy = (proxy && proxy[0]);
988 parse_url(proxy, &server);
991 server.port = target.port;
992 if (ENABLE_FEATURE_IPV6) {
993 //free(server.allocated); - can't be non-NULL
994 server.host = server.allocated = xstrdup(target.host);
996 server.host = target.host;
1000 if (ENABLE_FEATURE_IPV6)
1001 strip_ipv6_scope_id(target.host);
1003 /* If there was no -O FILE, guess output filename */
1004 fname_out_alloc = NULL;
1005 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1006 G.fname_out = bb_get_last_path_component_nostrip(target.path);
1007 /* handle "wget http://kernel.org//" */
1008 if (G.fname_out[0] == '/' || !G.fname_out[0])
1009 G.fname_out = (char*)"index.html";
1010 /* -P DIR is considered only if there was no -O FILE */
1012 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
1014 /* redirects may free target.path later, need to make a copy */
1015 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
1018 #if ENABLE_FEATURE_WGET_STATUSBAR
1019 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
1022 /* Determine where to start transfer */
1024 if (option_mask32 & WGET_OPT_CONTINUE) {
1025 G.output_fd = open(G.fname_out, O_WRONLY);
1026 if (G.output_fd >= 0) {
1027 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
1029 /* File doesn't exist. We do not create file here yet.
1030 * We are not sure it exists on remote side */
1035 lsa = xhost2sockaddr(server.host, server.port);
1036 if (!(option_mask32 & WGET_OPT_QUIET)) {
1037 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
1038 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
1042 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
1045 if (use_proxy || target.protocol != P_FTP) {
1052 /* Open socket to http(s) server */
1053 #if ENABLE_FEATURE_WGET_OPENSSL
1054 /* openssl (and maybe internal TLS) support is configured */
1055 if (target.protocol == P_HTTPS) {
1056 /* openssl-based helper
1057 * Inconvenient API since we can't give it an open fd
1059 int fd = spawn_https_helper_openssl(server.host, server.port);
1060 # if ENABLE_FEATURE_WGET_HTTPS
1061 if (fd < 0) { /* no openssl? try internal */
1062 sfp = open_socket(lsa);
1063 spawn_ssl_client(server.host, fileno(sfp));
1067 /* We don't check for exec("openssl") failure in this case */
1069 sfp = fdopen(fd, "r+");
1071 bb_perror_msg_and_die(bb_msg_memory_exhausted);
1074 sfp = open_socket(lsa);
1076 #elif ENABLE_FEATURE_WGET_HTTPS
1077 /* Only internal TLS support is configured */
1078 sfp = open_socket(lsa);
1079 if (target.protocol == P_HTTPS)
1080 spawn_ssl_client(server.host, fileno(sfp));
1082 /* ssl (https) support is not configured */
1083 sfp = open_socket(lsa);
1085 /* Send HTTP request */
1087 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
1088 target.protocol, target.host,
1091 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
1092 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1095 if (!USR_HEADER_HOST)
1096 SENDFMT(sfp, "Host: %s\r\n", target.host);
1097 if (!USR_HEADER_USER_AGENT)
1098 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
1100 /* Ask server to close the connection as soon as we are done
1101 * (IOW: we do not intend to send more requests)
1103 SENDFMT(sfp, "Connection: close\r\n");
1105 #if ENABLE_FEATURE_WGET_AUTHENTICATION
1106 if (target.user && !USR_HEADER_AUTH) {
1107 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
1108 base64enc(target.user));
1110 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1111 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
1112 base64enc(server.user));
1116 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1117 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
1119 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1120 if (G.extra_headers) {
1121 log_io(G.extra_headers);
1122 fputs(G.extra_headers, sfp);
1125 if (option_mask32 & WGET_OPT_POST_DATA) {
1127 "Content-Type: application/x-www-form-urlencoded\r\n"
1128 "Content-Length: %u\r\n"
1131 (int) strlen(G.post_data), G.post_data
1136 SENDFMT(sfp, "\r\n");
1141 /* Tried doing this unconditionally.
1142 * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS.
1145 if (target.protocol == P_HTTPS) {
1146 /* If we use SSL helper, keeping our end of the socket open for writing
1147 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1148 * even after child closes its copy of the fd.
1151 shutdown(fileno(sfp), SHUT_WR);
1156 * Retrieve HTTP response line and check for "200" status code.
1159 fgets_and_trim(sfp, " %s\n");
1162 str = skip_non_whitespace(str);
1163 str = skip_whitespace(str);
1164 // FIXME: no error check
1165 // xatou wouldn't work: "200 OK"
1170 while (gethdr(sfp) != NULL)
1171 /* eat all remaining headers */;
1174 /* Success responses */
1177 case 201: /* 201 Created */
1178 /* "The request has been fulfilled and resulted in a new resource being created" */
1179 /* Standard wget is reported to treat this as success */
1181 case 202: /* 202 Accepted */
1182 /* "The request has been accepted for processing, but the processing has not been completed" */
1183 /* Treat as success: fall through */
1184 case 203: /* 203 Non-Authoritative Information */
1185 /* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1187 case 204: /* 204 No Content */
1189 Response 204 doesn't say "null file", it says "metadata
1190 has changed but data didn't":
1192 "10.2.5 204 No Content
1193 The server has fulfilled the request but does not need to return
1194 an entity-body, and might want to return updated metainformation.
1195 The response MAY include new or updated metainformation in the form
1196 of entity-headers, which if present SHOULD be associated with
1197 the requested variant.
1199 If the client is a user agent, it SHOULD NOT change its document
1200 view from that which caused the request to be sent. This response
1201 is primarily intended to allow input for actions to take place
1202 without causing a change to the user agent's active document view,
1203 although any new or updated metainformation SHOULD be applied
1204 to the document currently in the user agent's active view.
1206 The 204 response MUST NOT include a message-body, and thus
1207 is always terminated by the first empty line after the header fields."
1209 However, in real world it was observed that some web servers
1210 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1212 if (G.beg_range != 0) {
1213 /* "Range:..." was not honored by the server.
1214 * Restart download from the beginning.
1216 reset_beg_range_to_zero();
1219 /* 205 Reset Content ?? what to do on this ?? */
1221 case 300: /* redirection */
1227 case 206: /* Partial Content */
1228 if (G.beg_range != 0)
1229 /* "Range:..." worked. Good. */
1231 /* Partial Content even though we did not ask for it??? */
1234 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
1238 * Retrieve HTTP headers.
1240 while ((str = gethdr(sfp)) != NULL) {
1241 static const char keywords[] ALIGN1 =
1242 "content-length\0""transfer-encoding\0""location\0";
1244 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1248 /* gethdr converted "FOO:" string to lowercase */
1250 /* strip trailing whitespace */
1251 char *s = strchrnul(str, '\0') - 1;
1252 while (s >= str && (*s == ' ' || *s == '\t')) {
1256 key = index_in_strings(keywords, G.wget_buf) + 1;
1257 if (key == KEY_content_length) {
1258 G.content_len = BB_STRTOOFF(str, NULL, 10);
1259 if (G.content_len < 0 || errno) {
1260 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
1265 if (key == KEY_transfer_encoding) {
1266 if (strcmp(str_tolower(str), "chunked") != 0)
1267 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
1270 if (key == KEY_location && status >= 300) {
1271 if (--redir_limit == 0)
1272 bb_error_msg_and_die("too many redirections");
1274 if (str[0] == '/') {
1275 free(redirected_path);
1276 target.path = redirected_path = xstrdup(str+1);
1277 /* lsa stays the same: it's on the same server */
1279 parse_url(str, &target);
1281 /* server.user remains untouched */
1282 free(server.allocated);
1283 server.allocated = NULL;
1284 server.host = target.host;
1285 /* strip_ipv6_scope_id(target.host); - no! */
1286 /* we assume remote never gives us IPv6 addr with scope id */
1287 server.port = target.port;
1290 } /* else: lsa stays the same: we use proxy */
1292 goto establish_session;
1295 // if (status >= 300)
1296 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
1298 /* For HTTP, data is pumped over the same connection */
1304 sfp = prepare_ftp_session(&dfp, &target, lsa);
1309 if (!(option_mask32 & WGET_OPT_SPIDER)) {
1310 if (G.output_fd < 0)
1311 G.output_fd = xopen(G.fname_out, G.o_flags);
1312 retrieve_file_data(dfp);
1313 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1314 xclose(G.output_fd);
1320 /* It's ftp. Close data connection properly */
1322 if (ftpcmd(NULL, NULL, sfp) != 226)
1323 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1324 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1328 free(server.allocated);
1329 free(target.allocated);
1332 free(fname_out_alloc);
1333 free(redirected_path);
1336 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1337 int wget_main(int argc UNUSED_PARAM, char **argv)
1339 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1340 static const char wget_longopts[] ALIGN1 =
1341 /* name, has_arg, val */
1342 "continue\0" No_argument "c"
1343 "quiet\0" No_argument "q"
1344 "server-response\0" No_argument "S"
1345 "output-document\0" Required_argument "O"
1346 "directory-prefix\0" Required_argument "P"
1347 "proxy\0" Required_argument "Y"
1348 "user-agent\0" Required_argument "U"
1349 IF_FEATURE_WGET_TIMEOUT(
1350 "timeout\0" Required_argument "T")
1352 IF_DESKTOP( "tries\0" Required_argument "t")
1353 "header\0" Required_argument "\xff"
1354 "post-data\0" Required_argument "\xfe"
1355 "spider\0" No_argument "\xfd"
1356 /* Ignored (we always use PASV): */
1357 IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
1358 /* Ignored (we don't do ssl) */
1359 IF_DESKTOP( "no-check-certificate\0" No_argument "\xf0")
1360 /* Ignored (we don't support caching) */
1361 IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1362 IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1363 IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1364 IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1365 IF_DESKTOP( "no-parent\0" No_argument "\xf0")
1367 # define GETOPT32 getopt32long
1368 # define LONGOPTS ,wget_longopts
1370 # define GETOPT32 getopt32
1374 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1375 llist_t *headers_llist = NULL;
1380 #if ENABLE_FEATURE_WGET_TIMEOUT
1381 G.timeout_seconds = 900;
1382 signal(SIGALRM, alarm_handler);
1384 G.proxy_flag = "on"; /* use proxies if env vars are set */
1385 G.user_agent = "Wget"; /* "User-Agent" header field */
1387 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1393 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1394 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1395 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1396 * -nH --no-host-directories: wget -r http://host/ won't create host/
1398 * "n::" above says that we accept -n[ARG].
1399 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1402 "-1" /* at least one URL */
1403 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::") /* --header is a list */
1405 , &G.fname_out, &G.dir_prefix,
1406 &G.proxy_flag, &G.user_agent,
1407 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1408 NULL, /* -t RETRIES */
1410 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1411 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1413 #if 0 /* option bits debug */
1414 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1415 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1416 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1417 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1418 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
1423 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1424 if (headers_llist) {
1427 llist_t *ll = headers_llist;
1429 size += strlen(ll->data) + 2;
1432 G.extra_headers = hdr = xmalloc(size + 1);
1433 while (headers_llist) {
1437 size = sprintf(hdr, "%s\r\n",
1438 (char*)llist_pop(&headers_llist));
1439 /* a bit like index_in_substrings but don't match full key */
1441 words = wget_user_headers;
1443 if (strstr(hdr, words) == hdr) {
1444 G.user_headers |= bit;
1448 words += strlen(words) + 1;
1456 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1457 if (G.fname_out) { /* -O FILE ? */
1458 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1460 option_mask32 &= ~WGET_OPT_CONTINUE;
1462 /* compat with wget: -O FILE can overwrite */
1463 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1467 download_one_url(*argv++);
1469 if (G.output_fd >= 0)
1470 xclose(G.output_fd);
1472 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1473 free(G.extra_headers);
1477 return EXIT_SUCCESS;