1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
16 //config: wget is a utility for non-interactive download of files from HTTP
17 //config: and FTP servers.
19 //config:config FEATURE_WGET_LONG_OPTIONS
20 //config: bool "Enable long options"
22 //config: depends on WGET && LONG_OPTS
24 //config:config FEATURE_WGET_STATUSBAR
25 //config: bool "Enable progress bar (+2k)"
27 //config: depends on WGET
29 //config:config FEATURE_WGET_AUTHENTICATION
30 //config: bool "Enable HTTP authentication"
32 //config: depends on WGET
34 //config: Support authenticated HTTP transfers.
36 //config:config FEATURE_WGET_TIMEOUT
37 //config: bool "Enable timeout option -T SEC"
39 //config: depends on WGET
41 //config: Supports network read and connect timeouts for wget,
42 //config: so that wget will give up and timeout, through the -T
43 //config: command line option.
45 //config: Currently only connect and network data read timeout are
46 //config: supported (i.e., timeout is not applied to the DNS query). When
47 //config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
48 //config: will work in addition to -T.
50 //config:config FEATURE_WGET_OPENSSL
51 //config: bool "Try to connect to HTTPS using openssl"
53 //config: depends on WGET
55 //config: Choose how wget establishes SSL connection for https:// URLs.
57 //config: Busybox itself contains no SSL code. wget will spawn
58 //config: a helper program to talk over HTTPS.
60 //config: OpenSSL has a simple SSL client for debug purposes.
61 //config: If you select "openssl" helper, wget will effectively run:
62 //config: "openssl s_client -quiet -connect hostname:443
63 //config: -servername hostname 2>/dev/null" and pipe its data
64 //config: through it. -servername is not used if hostname is numeric.
65 //config: Note inconvenient API: host resolution is done twice,
66 //config: and there is no guarantee openssl's idea of IPv6 address
67 //config: format is the same as ours.
68 //config: Another problem is that s_client prints debug information
69 //config: to stderr, and it needs to be suppressed. This means
70 //config: all error messages get suppressed too.
71 //config: openssl is also a big binary, often dynamically linked
72 //config: against ~15 libraries.
74 //config:config FEATURE_WGET_SSL_HELPER
75 //config: bool "Try to connect to HTTPS using ssl_helper"
77 //config: depends on WGET
79 //config: Choose how wget establishes SSL connection for https:// URLs.
81 //config: Busybox itself contains no SSL code. wget will spawn
82 //config: a helper program to talk over HTTPS.
84 //config: ssl_helper is a tool which can be built statically
85 //config: from busybox sources against a small embedded SSL library.
86 //config: Please see networking/ssl_helper/README.
87 //config: It does not require double host resolution and emits
88 //config: error messages to stderr.
90 //config: Precompiled static binary may be available at
91 //config: http://busybox.net/downloads/binaries/
93 //applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
95 //kbuild:lib-$(CONFIG_WGET) += wget.o
97 //usage:#define wget_trivial_usage
98 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
99 //usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
100 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
101 /* Since we ignore these opts, we don't show them in --help */
102 /* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
103 /* //usage: " [-nv] [-nc] [-nH] [-np]" */
104 //usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
106 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
107 //usage: "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
108 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
110 //usage:#define wget_full_usage "\n\n"
111 //usage: "Retrieve files via HTTP or FTP\n"
112 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
113 //usage: "\n --spider Spider mode - only check file existence"
115 //usage: "\n -c Continue retrieval of aborted transfer"
116 //usage: "\n -q Quiet"
117 //usage: "\n -P DIR Save to DIR (default .)"
118 //usage: IF_FEATURE_WGET_TIMEOUT(
119 //usage: "\n -T SEC Network read timeout is SEC seconds"
121 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
122 //usage: "\n -U STR Use STR for User-Agent header"
123 //usage: "\n -Y on/off Use proxy"
128 # define log_io(...) bb_error_msg(__VA_ARGS__)
129 # define SENDFMT(fp, fmt, ...) \
131 log_io("> " fmt, ##__VA_ARGS__); \
132 fprintf(fp, fmt, ##__VA_ARGS__); \
135 # define log_io(...) ((void)0)
136 # define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
140 #define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER)
146 const char *protocol;
150 static const char P_FTP[] ALIGN1 = "ftp";
151 static const char P_HTTP[] ALIGN1 = "http";
153 static const char P_HTTPS[] ALIGN1 = "https";
156 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
157 /* User-specified headers prevent using our corresponding built-in headers. */
160 HDR_USER_AGENT = (1<<1),
162 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
163 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
165 static const char wget_user_headers[] ALIGN1 =
169 # if ENABLE_FEATURE_WGET_AUTHENTICATION
171 "Proxy-Authorization:\0"
174 # define USR_HEADER_HOST (G.user_headers & HDR_HOST)
175 # define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
176 # define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
177 # define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
178 # define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
179 #else /* No long options, no user-headers :( */
180 # define USR_HEADER_HOST 0
181 # define USR_HEADER_USER_AGENT 0
182 # define USR_HEADER_RANGE 0
183 # define USR_HEADER_AUTH 0
184 # define USR_HEADER_PROXY_AUTH 0
189 off_t content_len; /* Content-length of the file */
190 off_t beg_range; /* Range at which continue begins */
191 #if ENABLE_FEATURE_WGET_STATUSBAR
192 off_t transferred; /* Number of bytes transferred so far */
193 const char *curfile; /* Name of current file being transferred */
197 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
200 unsigned char user_headers; /* Headers mentioned by the user */
202 char *fname_out; /* where to direct output (-O) */
203 const char *proxy_flag; /* Use proxies if env vars are set */
204 const char *user_agent; /* "User-Agent" header field */
205 #if ENABLE_FEATURE_WGET_TIMEOUT
206 unsigned timeout_seconds;
207 bool die_if_timed_out;
211 smallint chunked; /* chunked transfer encoding */
212 smallint got_clen; /* got content-length: from server */
213 /* Local downloads do benefit from big buffer.
214 * With 512 byte buffer, it was measured to be
215 * an order of magnitude slower than with big one.
217 uint64_t just_to_align_next_member;
218 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
220 #define G (*ptr_to_globals)
221 #define INIT_G() do { \
222 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
224 #define FINI_G() do { \
225 FREE_PTR_TO_GLOBALS(); \
229 /* Must match option string! */
231 WGET_OPT_CONTINUE = (1 << 0),
232 WGET_OPT_QUIET = (1 << 1),
233 WGET_OPT_OUTNAME = (1 << 2),
234 WGET_OPT_PREFIX = (1 << 3),
235 WGET_OPT_PROXY = (1 << 4),
236 WGET_OPT_USER_AGENT = (1 << 5),
237 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 6),
238 WGET_OPT_RETRIES = (1 << 7),
239 WGET_OPT_nsomething = (1 << 8),
240 WGET_OPT_HEADER = (1 << 9) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
241 WGET_OPT_POST_DATA = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
242 WGET_OPT_SPIDER = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
250 #if ENABLE_FEATURE_WGET_STATUSBAR
251 static void progress_meter(int flag)
253 if (option_mask32 & WGET_OPT_QUIET)
256 if (flag == PROGRESS_START)
257 bb_progress_init(&G.pmt, G.curfile);
259 bb_progress_update(&G.pmt,
262 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
265 if (flag == PROGRESS_END) {
266 bb_progress_free(&G.pmt);
267 bb_putchar_stderr('\n');
272 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
276 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
277 * local addresses can have a scope identifier to specify the
278 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
279 * identifier is only valid on a single node.
281 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
282 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
283 * in the Host header as invalid requests, see
284 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
286 static void strip_ipv6_scope_id(char *host)
290 /* bbox wget actually handles IPv6 addresses without [], like
291 * wget "http://::1/xxx", but this is not standard.
292 * To save code, _here_ we do not support it. */
295 return; /* not IPv6 */
297 scope = strchr(host, '%');
301 /* Remove the IPv6 zone identifier from the host address */
302 cp = strchr(host, ']');
303 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
304 /* malformed address (not "[xx]:nn" or "[xx]") */
308 /* cp points to "]...", scope points to "%eth0]..." */
309 overlapping_strcpy(scope, cp);
312 #if ENABLE_FEATURE_WGET_AUTHENTICATION
313 /* Base64-encode character string. */
314 static char *base64enc(const char *str)
316 unsigned len = strlen(str);
317 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
318 len = sizeof(G.wget_buf)/4*3 - 10;
319 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
324 static char* sanitize_string(char *s)
326 unsigned char *p = (void *) s;
333 #if ENABLE_FEATURE_WGET_TIMEOUT
334 static void alarm_handler(int sig UNUSED_PARAM)
336 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
337 if (G.die_if_timed_out)
338 bb_error_msg_and_die("download timed out");
340 static void set_alarm(void)
342 if (G.timeout_seconds) {
343 alarm(G.timeout_seconds);
344 G.die_if_timed_out = 1;
347 # define clear_alarm() ((void)(G.die_if_timed_out = 0))
349 # define set_alarm() ((void)0)
350 # define clear_alarm() ((void)0)
353 #if ENABLE_FEATURE_WGET_OPENSSL
355 * is_ip_address() attempts to verify whether or not a string
356 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
357 * of inet_pton() can be used to determine this.
359 * TODO add proper error checking when inet_pton() returns -1
360 * (some form of system error has occurred, and errno is set)
362 static int is_ip_address(const char *string)
364 struct sockaddr_in sa;
366 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
367 # if ENABLE_FEATURE_IPV6
369 struct sockaddr_in6 sa6;
370 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
373 return (result == 1);
377 static FILE *open_socket(len_and_sockaddr *lsa)
383 fd = xconnect_stream(lsa);
386 /* glibc 2.4 seems to try seeking on it - ??! */
387 /* hopefully it understands what ESPIPE means... */
388 fp = fdopen(fd, "r+");
390 bb_perror_msg_and_die(bb_msg_memory_exhausted);
395 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
396 static char fgets_and_trim(FILE *fp)
402 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
403 bb_perror_msg_and_die("error getting response");
406 buf_ptr = strchrnul(G.wget_buf, '\n');
409 buf_ptr = strchrnul(G.wget_buf, '\r');
412 log_io("< %s", G.wget_buf);
417 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
423 fprintf(fp, "%s%s\r\n", s1, s2);
425 log_io("> %s%s", s1, s2);
430 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
432 G.wget_buf[3] = '\0';
433 result = xatoi_positive(G.wget_buf);
438 static void parse_url(const char *src_url, struct host_info *h)
443 h->allocated = url = xstrdup(src_url);
446 p = strstr(url, "://");
450 if (strcmp(url, P_FTP) == 0) {
451 h->port = bb_lookup_port(P_FTP, "tcp", 21);
454 if (strcmp(url, P_HTTPS) == 0) {
455 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
456 h->protocol = P_HTTPS;
459 if (strcmp(url, P_HTTP) == 0) {
461 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
462 h->protocol = P_HTTP;
465 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
468 // GNU wget is user-friendly and falls back to http://
474 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
475 // 'GET /?var=a/b HTTP 1.0'
476 // and saves 'index.html?var=a%2Fb' (we save 'b')
477 // wget 'http://busybox.net?login=john@doe':
478 // request: 'GET /?login=john@doe HTTP/1.0'
479 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
480 // wget 'http://busybox.net#test/test':
481 // request: 'GET / HTTP/1.0'
482 // saves: 'index.html' (we save 'test')
484 // We also don't add unique .N suffix if file exists...
485 sp = strchr(h->host, '/');
486 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
487 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
490 } else if (*sp == '/') {
493 } else { // '#' or '?'
494 // http://busybox.net?login=john@doe is a valid URL
495 // memmove converts to:
496 // http:/busybox.nett?login=john@doe...
497 memmove(h->host - 1, h->host, sp - h->host);
503 sp = strrchr(h->host, '@');
505 // URL-decode "user:password" string before base64-encoding:
506 // wget http://test:my%20pass@example.com should send
507 // Authorization: Basic dGVzdDpteSBwYXNz
508 // which decodes to "test:my pass".
509 // Standard wget and curl do this too.
512 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
515 /* else: h->user remains NULL, or as set by original request
516 * before redirect (if we are here after a redirect).
520 static char *gethdr(FILE *fp)
525 /* retrieve header line */
526 c = fgets_and_trim(fp);
528 /* end of the headers? */
529 if (G.wget_buf[0] == '\0')
532 /* convert the header name to lower case */
533 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
535 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
536 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
537 * "A-Z" maps to "a-z".
538 * "@[\]" can't occur in header names.
539 * "^_" maps to "~,DEL" (which is wrong).
540 * "^" was never seen yet, "_" was seen from web.archive.org
541 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
546 /* verify we are at the end of the header name */
548 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
550 /* locate the start of the header value */
552 hdrval = skip_whitespace(s);
555 /* Rats! The buffer isn't big enough to hold the entire header value */
556 while (c = getc(fp), c != EOF && c != '\n')
563 static void reset_beg_range_to_zero(void)
565 bb_error_msg("restart failed");
567 xlseek(G.output_fd, 0, SEEK_SET);
568 /* Done at the end instead: */
569 /* ftruncate(G.output_fd, 0); */
572 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
579 target->user = xstrdup("anonymous:busybox@");
581 sfp = open_socket(lsa);
582 if (ftpcmd(NULL, NULL, sfp) != 220)
583 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
586 * Splitting username:password pair,
589 str = strchr(target->user, ':');
592 switch (ftpcmd("USER ", target->user, sfp)) {
596 if (ftpcmd("PASS ", str, sfp) == 230)
598 /* fall through (failed login) */
600 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
603 ftpcmd("TYPE I", NULL, sfp);
608 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
609 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
610 if (G.content_len < 0 || errno) {
611 bb_error_msg_and_die("SIZE value is garbage");
617 * Entering passive mode
619 if (ftpcmd("PASV", NULL, sfp) != 227) {
621 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
623 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
624 // Server's IP is N1.N2.N3.N4 (we ignore it)
625 // Server's port for data connection is P1*256+P2
626 str = strrchr(G.wget_buf, ')');
627 if (str) str[0] = '\0';
628 str = strrchr(G.wget_buf, ',');
629 if (!str) goto pasv_error;
630 port = xatou_range(str+1, 0, 255);
632 str = strrchr(G.wget_buf, ',');
633 if (!str) goto pasv_error;
634 port += xatou_range(str+1, 0, 255) * 256;
635 set_nport(&lsa->u.sa, htons(port));
637 *dfpp = open_socket(lsa);
639 if (G.beg_range != 0) {
640 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
641 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
642 G.content_len -= G.beg_range;
644 reset_beg_range_to_zero();
647 if (ftpcmd("RETR ", target->path, sfp) > 150)
648 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
653 #if ENABLE_FEATURE_WGET_OPENSSL
654 static int spawn_https_helper_openssl(const char *host, unsigned port)
656 char *allocated = NULL;
660 IF_FEATURE_WGET_SSL_HELPER(volatile int child_failed = 0;)
662 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
663 /* Kernel can have AF_UNIX support disabled */
664 bb_perror_msg_and_die("socketpair");
666 if (!strchr(host, ':'))
667 host = allocated = xasprintf("%s:%u", host, port);
668 servername = xstrdup(host);
669 strrchr(servername, ':')[0] = '\0';
681 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
682 * It prints some debug stuff on stderr, don't know how to suppress it.
683 * Work around by dev-nulling stderr. We lose all error messages :(
686 xopen("/dev/null", O_RDWR);
687 memset(&argv, 0, sizeof(argv));
688 argv[0] = (char*)"openssl";
689 argv[1] = (char*)"s_client";
690 argv[2] = (char*)"-quiet";
691 argv[3] = (char*)"-connect";
692 argv[4] = (char*)host;
694 * Per RFC 6066 Section 3, the only permitted values in the
695 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
696 * IPv4 and IPv6 addresses, port numbers are not allowed.
698 if (!is_ip_address(servername)) {
699 argv[5] = (char*)"-servername";
700 argv[6] = (char*)servername;
703 BB_EXECVP(argv[0], argv);
705 # if ENABLE_FEATURE_WGET_SSL_HELPER
709 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
718 # if ENABLE_FEATURE_WGET_SSL_HELPER
728 /* See networking/ssl_helper/README how to build one */
729 #if ENABLE_FEATURE_WGET_SSL_HELPER
730 static void spawn_https_helper_small(int network_fd)
735 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
736 /* Kernel can have AF_UNIX support disabled */
737 bb_perror_msg_and_die("socketpair");
739 pid = BB_MMU ? xfork() : xvfork();
747 xmove_fd(network_fd, 3);
749 * A simple ssl/tls helper
751 argv[0] = (char*)"ssl_helper";
752 argv[1] = (char*)"-d3";
754 BB_EXECVP(argv[0], argv);
755 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
761 xmove_fd(sp[0], network_fd);
765 static void NOINLINE retrieve_file_data(FILE *dfp)
767 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
768 # if ENABLE_FEATURE_WGET_TIMEOUT
769 unsigned second_cnt = G.timeout_seconds;
771 struct pollfd polldata;
773 polldata.fd = fileno(dfp);
774 polldata.events = POLLIN | POLLPRI;
776 progress_meter(PROGRESS_START);
781 /* Loops only if chunked */
784 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
785 /* Must use nonblocking I/O, otherwise fread will loop
786 * and *block* until it reads full buffer,
787 * which messes up progress bar and/or timeout logic.
788 * Because of nonblocking I/O, we need to dance
789 * very carefully around EAGAIN. See explanation at
792 ndelay_on(polldata.fd);
798 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
799 /* fread internally uses read loop, which in our case
800 * is usually exited when we get EAGAIN.
801 * In this case, libc sets error marker on the stream.
802 * Need to clear it before next fread to avoid possible
803 * rare false positive ferror below. Rare because usually
804 * fread gets more than zero bytes, and we don't fall
805 * into if (n <= 0) ...
810 rdsz = sizeof(G.wget_buf);
812 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
813 if ((int)G.content_len <= 0)
815 rdsz = (unsigned)G.content_len;
818 n = fread(G.wget_buf, 1, rdsz, dfp);
821 xwrite(G.output_fd, G.wget_buf, n);
822 #if ENABLE_FEATURE_WGET_STATUSBAR
827 if (G.content_len == 0)
830 #if ENABLE_FEATURE_WGET_TIMEOUT
831 second_cnt = G.timeout_seconds;
838 * If error occurs, or EOF is reached, the return value
839 * is a short item count (or zero).
840 * fread does not distinguish between EOF and error.
842 if (errno != EAGAIN) {
844 progress_meter(PROGRESS_END);
845 bb_perror_msg_and_die(bb_msg_read_error);
847 break; /* EOF, not error */
850 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
851 /* It was EAGAIN. There is no data. Wait up to one second
852 * then abort if timed out, or update the bar and try reading again.
854 if (safe_poll(&polldata, 1, 1000) == 0) {
855 # if ENABLE_FEATURE_WGET_TIMEOUT
856 if (second_cnt != 0 && --second_cnt == 0) {
857 progress_meter(PROGRESS_END);
858 bb_error_msg_and_die("download timed out");
861 /* We used to loop back to poll here,
862 * but there is no great harm in letting fread
863 * to try reading anyway.
868 /* Need to do it _every_ second for "stalled" indicator
869 * to be shown properly.
871 progress_meter(PROGRESS_BUMP);
872 } /* while (reading data) */
874 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
876 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
881 fgets_and_trim(dfp); /* Eat empty line */
884 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
885 /* FIXME: error check? */
886 if (G.content_len == 0)
887 break; /* all done! */
890 * Note that fgets may result in some data being buffered in dfp.
891 * We loop back to fread, which will retrieve this data.
892 * Also note that code has to be arranged so that fread
893 * is done _before_ one-second poll wait - poll doesn't know
894 * about stdio buffering and can result in spurious one second waits!
898 /* If -c failed, we restart from the beginning,
899 * but we do not truncate file then, we do it only now, at the end.
900 * This lets user to ^C if his 99% complete 10 GB file download
901 * failed to restart *without* losing the almost complete file.
904 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
905 if (pos != (off_t)-1)
906 ftruncate(G.output_fd, pos);
909 /* Draw full bar and free its resources */
910 G.chunked = 0; /* makes it show 100% even for chunked download */
911 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
912 progress_meter(PROGRESS_END);
915 static void download_one_url(const char *url)
917 bool use_proxy; /* Use proxies if env vars are set */
919 len_and_sockaddr *lsa;
920 FILE *sfp; /* socket to web/ftp server */
921 FILE *dfp; /* socket to ftp server (data) */
923 char *fname_out_alloc;
924 char *redirected_path = NULL;
925 struct host_info server;
926 struct host_info target;
928 server.allocated = NULL;
929 target.allocated = NULL;
933 parse_url(url, &target);
935 /* Use the proxy if necessary */
936 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
938 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
939 //FIXME: what if protocol is https? Ok to use http_proxy?
940 use_proxy = (proxy && proxy[0]);
942 parse_url(proxy, &server);
945 server.port = target.port;
946 if (ENABLE_FEATURE_IPV6) {
947 //free(server.allocated); - can't be non-NULL
948 server.host = server.allocated = xstrdup(target.host);
950 server.host = target.host;
954 if (ENABLE_FEATURE_IPV6)
955 strip_ipv6_scope_id(target.host);
957 /* If there was no -O FILE, guess output filename */
958 fname_out_alloc = NULL;
959 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
960 G.fname_out = bb_get_last_path_component_nostrip(target.path);
961 /* handle "wget http://kernel.org//" */
962 if (G.fname_out[0] == '/' || !G.fname_out[0])
963 G.fname_out = (char*)"index.html";
964 /* -P DIR is considered only if there was no -O FILE */
966 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
968 /* redirects may free target.path later, need to make a copy */
969 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
972 #if ENABLE_FEATURE_WGET_STATUSBAR
973 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
976 /* Determine where to start transfer */
978 if (option_mask32 & WGET_OPT_CONTINUE) {
979 G.output_fd = open(G.fname_out, O_WRONLY);
980 if (G.output_fd >= 0) {
981 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
983 /* File doesn't exist. We do not create file here yet.
984 * We are not sure it exists on remote side */
989 lsa = xhost2sockaddr(server.host, server.port);
990 if (!(option_mask32 & WGET_OPT_QUIET)) {
991 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
992 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
996 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
999 if (use_proxy || target.protocol != P_FTP) {
1006 /* Open socket to http(s) server */
1007 #if ENABLE_FEATURE_WGET_OPENSSL
1008 /* openssl (and maybe ssl_helper) support is configured */
1009 if (target.protocol == P_HTTPS) {
1010 /* openssl-based helper
1011 * Inconvenient API since we can't give it an open fd
1013 int fd = spawn_https_helper_openssl(server.host, server.port);
1014 # if ENABLE_FEATURE_WGET_SSL_HELPER
1015 if (fd < 0) { /* no openssl? try ssl_helper */
1016 sfp = open_socket(lsa);
1017 spawn_https_helper_small(fileno(sfp));
1021 /* We don't check for exec("openssl") failure in this case */
1023 sfp = fdopen(fd, "r+");
1025 bb_perror_msg_and_die(bb_msg_memory_exhausted);
1028 sfp = open_socket(lsa);
1030 #elif ENABLE_FEATURE_WGET_SSL_HELPER
1031 /* Only ssl_helper support is configured */
1032 sfp = open_socket(lsa);
1033 if (target.protocol == P_HTTPS)
1034 spawn_https_helper_small(fileno(sfp));
1036 /* ssl (https) support is not configured */
1037 sfp = open_socket(lsa);
1039 /* Send HTTP request */
1041 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
1042 target.protocol, target.host,
1045 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
1046 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1049 if (!USR_HEADER_HOST)
1050 SENDFMT(sfp, "Host: %s\r\n", target.host);
1051 if (!USR_HEADER_USER_AGENT)
1052 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
1054 /* Ask server to close the connection as soon as we are done
1055 * (IOW: we do not intend to send more requests)
1057 SENDFMT(sfp, "Connection: close\r\n");
1059 #if ENABLE_FEATURE_WGET_AUTHENTICATION
1060 if (target.user && !USR_HEADER_AUTH) {
1061 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
1062 base64enc(target.user));
1064 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1065 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
1066 base64enc(server.user));
1070 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1071 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
1073 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1074 if (G.extra_headers) {
1075 log_io(G.extra_headers);
1076 fputs(G.extra_headers, sfp);
1079 if (option_mask32 & WGET_OPT_POST_DATA) {
1081 "Content-Type: application/x-www-form-urlencoded\r\n"
1082 "Content-Length: %u\r\n"
1085 (int) strlen(G.post_data), G.post_data
1090 SENDFMT(sfp, "\r\n");
1095 /* Tried doing this unconditionally.
1096 * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS.
1099 if (target.protocol == P_HTTPS) {
1100 /* If we use SSL helper, keeping our end of the socket open for writing
1101 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1102 * even after child closes its copy of the fd.
1105 shutdown(fileno(sfp), SHUT_WR);
1110 * Retrieve HTTP response line and check for "200" status code.
1113 fgets_and_trim(sfp);
1116 str = skip_non_whitespace(str);
1117 str = skip_whitespace(str);
1118 // FIXME: no error check
1119 // xatou wouldn't work: "200 OK"
1124 while (gethdr(sfp) != NULL)
1125 /* eat all remaining headers */;
1128 /* Success responses */
1131 case 201: /* 201 Created */
1132 /* "The request has been fulfilled and resulted in a new resource being created" */
1133 /* Standard wget is reported to treat this as success */
1135 case 202: /* 202 Accepted */
1136 /* "The request has been accepted for processing, but the processing has not been completed" */
1137 /* Treat as success: fall through */
1138 case 203: /* 203 Non-Authoritative Information */
1139 /* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1141 case 204: /* 204 No Content */
1143 Response 204 doesn't say "null file", it says "metadata
1144 has changed but data didn't":
1146 "10.2.5 204 No Content
1147 The server has fulfilled the request but does not need to return
1148 an entity-body, and might want to return updated metainformation.
1149 The response MAY include new or updated metainformation in the form
1150 of entity-headers, which if present SHOULD be associated with
1151 the requested variant.
1153 If the client is a user agent, it SHOULD NOT change its document
1154 view from that which caused the request to be sent. This response
1155 is primarily intended to allow input for actions to take place
1156 without causing a change to the user agent's active document view,
1157 although any new or updated metainformation SHOULD be applied
1158 to the document currently in the user agent's active view.
1160 The 204 response MUST NOT include a message-body, and thus
1161 is always terminated by the first empty line after the header fields."
1163 However, in real world it was observed that some web servers
1164 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1166 if (G.beg_range != 0) {
1167 /* "Range:..." was not honored by the server.
1168 * Restart download from the beginning.
1170 reset_beg_range_to_zero();
1173 /* 205 Reset Content ?? what to do on this ?? */
1175 case 300: /* redirection */
1181 case 206: /* Partial Content */
1182 if (G.beg_range != 0)
1183 /* "Range:..." worked. Good. */
1185 /* Partial Content even though we did not ask for it??? */
1188 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
1192 * Retrieve HTTP headers.
1194 while ((str = gethdr(sfp)) != NULL) {
1195 static const char keywords[] ALIGN1 =
1196 "content-length\0""transfer-encoding\0""location\0";
1198 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1202 /* gethdr converted "FOO:" string to lowercase */
1204 /* strip trailing whitespace */
1205 char *s = strchrnul(str, '\0') - 1;
1206 while (s >= str && (*s == ' ' || *s == '\t')) {
1210 key = index_in_strings(keywords, G.wget_buf) + 1;
1211 if (key == KEY_content_length) {
1212 G.content_len = BB_STRTOOFF(str, NULL, 10);
1213 if (G.content_len < 0 || errno) {
1214 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
1219 if (key == KEY_transfer_encoding) {
1220 if (strcmp(str_tolower(str), "chunked") != 0)
1221 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
1224 if (key == KEY_location && status >= 300) {
1225 if (--redir_limit == 0)
1226 bb_error_msg_and_die("too many redirections");
1228 if (str[0] == '/') {
1229 free(redirected_path);
1230 target.path = redirected_path = xstrdup(str+1);
1231 /* lsa stays the same: it's on the same server */
1233 parse_url(str, &target);
1235 /* server.user remains untouched */
1236 free(server.allocated);
1237 server.allocated = NULL;
1238 server.host = target.host;
1239 /* strip_ipv6_scope_id(target.host); - no! */
1240 /* we assume remote never gives us IPv6 addr with scope id */
1241 server.port = target.port;
1244 } /* else: lsa stays the same: we use proxy */
1246 goto establish_session;
1249 // if (status >= 300)
1250 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
1252 /* For HTTP, data is pumped over the same connection */
1258 sfp = prepare_ftp_session(&dfp, &target, lsa);
1263 if (!(option_mask32 & WGET_OPT_SPIDER)) {
1264 if (G.output_fd < 0)
1265 G.output_fd = xopen(G.fname_out, G.o_flags);
1266 retrieve_file_data(dfp);
1267 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1268 xclose(G.output_fd);
1274 /* It's ftp. Close data connection properly */
1276 if (ftpcmd(NULL, NULL, sfp) != 226)
1277 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1278 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1282 free(server.allocated);
1283 free(target.allocated);
1286 free(fname_out_alloc);
1287 free(redirected_path);
1290 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1291 int wget_main(int argc UNUSED_PARAM, char **argv)
1293 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1294 static const char wget_longopts[] ALIGN1 =
1295 /* name, has_arg, val */
1296 "continue\0" No_argument "c"
1297 "quiet\0" No_argument "q"
1298 "output-document\0" Required_argument "O"
1299 "directory-prefix\0" Required_argument "P"
1300 "proxy\0" Required_argument "Y"
1301 "user-agent\0" Required_argument "U"
1302 IF_FEATURE_WGET_TIMEOUT(
1303 "timeout\0" Required_argument "T")
1305 IF_DESKTOP( "tries\0" Required_argument "t")
1306 "header\0" Required_argument "\xff"
1307 "post-data\0" Required_argument "\xfe"
1308 "spider\0" No_argument "\xfd"
1309 /* Ignored (we always use PASV): */
1310 IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
1311 /* Ignored (we don't do ssl) */
1312 IF_DESKTOP( "no-check-certificate\0" No_argument "\xf0")
1313 /* Ignored (we don't support caching) */
1314 IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1315 IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1316 IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1317 IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1318 IF_DESKTOP( "no-parent\0" No_argument "\xf0")
1322 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1323 llist_t *headers_llist = NULL;
1328 #if ENABLE_FEATURE_WGET_TIMEOUT
1329 G.timeout_seconds = 900;
1330 signal(SIGALRM, alarm_handler);
1332 G.proxy_flag = "on"; /* use proxies if env vars are set */
1333 G.user_agent = "Wget"; /* "User-Agent" header field */
1335 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1336 applet_long_options = wget_longopts;
1338 opt_complementary = "-1" /* at least one URL */
1339 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::"); /* --header is a list */
1340 getopt32(argv, "cqO:P:Y:U:T:+"
1343 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1344 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1345 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1346 * -nH --no-host-directories: wget -r http://host/ won't create host/
1348 * "n::" above says that we accept -n[ARG].
1349 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1351 , &G.fname_out, &G.dir_prefix,
1352 &G.proxy_flag, &G.user_agent,
1353 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1354 NULL, /* -t RETRIES */
1356 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1357 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1359 #if 0 /* option bits debug */
1360 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1361 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1362 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1363 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1364 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
1369 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1370 if (headers_llist) {
1373 llist_t *ll = headers_llist;
1375 size += strlen(ll->data) + 2;
1378 G.extra_headers = hdr = xmalloc(size + 1);
1379 while (headers_llist) {
1383 size = sprintf(hdr, "%s\r\n",
1384 (char*)llist_pop(&headers_llist));
1385 /* a bit like index_in_substrings but don't match full key */
1387 words = wget_user_headers;
1389 if (strstr(hdr, words) == hdr) {
1390 G.user_headers |= bit;
1394 words += strlen(words) + 1;
1402 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1403 if (G.fname_out) { /* -O FILE ? */
1404 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1406 option_mask32 &= ~WGET_OPT_CONTINUE;
1408 /* compat with wget: -O FILE can overwrite */
1409 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1413 download_one_url(*argv++);
1415 if (G.output_fd >= 0)
1416 xclose(G.output_fd);
1418 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1419 free(G.extra_headers);
1423 return EXIT_SUCCESS;