1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
16 //config: wget is a utility for non-interactive download of files from HTTP
17 //config: and FTP servers.
19 //config:config FEATURE_WGET_LONG_OPTIONS
20 //config: bool "Enable long options"
22 //config: depends on WGET && LONG_OPTS
24 //config:config FEATURE_WGET_STATUSBAR
25 //config: bool "Enable progress bar (+2k)"
27 //config: depends on WGET
29 //config:config FEATURE_WGET_AUTHENTICATION
30 //config: bool "Enable HTTP authentication"
32 //config: depends on WGET
34 //config: Support authenticated HTTP transfers.
36 //config:config FEATURE_WGET_TIMEOUT
37 //config: bool "Enable timeout option -T SEC"
39 //config: depends on WGET
41 //config: Supports network read and connect timeouts for wget,
42 //config: so that wget will give up and timeout, through the -T
43 //config: command line option.
45 //config: Currently only connect and network data read timeout are
46 //config: supported (i.e., timeout is not applied to the DNS query). When
47 //config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
48 //config: will work in addition to -T.
50 //config:config FEATURE_WGET_OPENSSL
51 //config: bool "Try to connect to HTTPS using openssl"
53 //config: depends on WGET
55 //config: Choose how wget establishes SSL connection for https:// URLs.
57 //config: Busybox itself contains no SSL code. wget will spawn
58 //config: a helper program to talk over HTTPS.
60 //config: OpenSSL has a simple SSL client for debug purposes.
61 //config: If you select "openssl" helper, wget will effectively run:
62 //config: "openssl s_client -quiet -connect hostname:443
63 //config: -servername hostname 2>/dev/null" and pipe its data
64 //config: through it. -servername is not used if hostname is numeric.
65 //config: Note inconvenient API: host resolution is done twice,
66 //config: and there is no guarantee openssl's idea of IPv6 address
67 //config: format is the same as ours.
68 //config: Another problem is that s_client prints debug information
69 //config: to stderr, and it needs to be suppressed. This means
70 //config: all error messages get suppressed too.
71 //config: openssl is also a big binary, often dynamically linked
72 //config: against ~15 libraries.
74 //config:config FEATURE_WGET_SSL_HELPER
75 //config: bool "Try to connect to HTTPS using ssl_helper"
77 //config: depends on WGET
79 //config: Choose how wget establishes SSL connection for https:// URLs.
81 //config: Busybox itself contains no SSL code. wget will spawn
82 //config: a helper program to talk over HTTPS.
84 //config: ssl_helper is a tool which can be built statically
85 //config: from busybox sources against a small embedded SSL library.
86 //config: Please see networking/ssl_helper/README.
87 //config: It does not require double host resolution and emits
88 //config: error messages to stderr.
90 //config: Precompiled static binary may be available at
91 //config: http://busybox.net/downloads/binaries/
93 //applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
95 //kbuild:lib-$(CONFIG_WGET) += wget.o
97 //usage:#define wget_trivial_usage
98 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
99 //usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
100 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
101 /* Since we ignore these opts, we don't show them in --help */
102 /* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
103 /* //usage: " [-nv] [-nc] [-nH] [-np]" */
104 //usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
106 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
107 //usage: "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
108 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
110 //usage:#define wget_full_usage "\n\n"
111 //usage: "Retrieve files via HTTP or FTP\n"
112 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
113 //usage: "\n --spider Spider mode - only check file existence"
115 //usage: "\n -c Continue retrieval of aborted transfer"
116 //usage: "\n -q Quiet"
117 //usage: "\n -P DIR Save to DIR (default .)"
118 //usage: IF_FEATURE_WGET_TIMEOUT(
119 //usage: "\n -T SEC Network read timeout is SEC seconds"
121 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
122 //usage: "\n -U STR Use STR for User-Agent header"
123 //usage: "\n -Y on/off Use proxy"
128 # define log_io(...) bb_error_msg(__VA_ARGS__)
129 # define SENDFMT(fp, fmt, ...) \
131 log_io("> " fmt, ##__VA_ARGS__); \
132 fprintf(fp, fmt, ##__VA_ARGS__); \
135 # define log_io(...) ((void)0)
136 # define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
144 const char *protocol;
148 static const char P_FTP[] ALIGN1 = "ftp";
149 static const char P_HTTP[] ALIGN1 = "http";
150 #if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
151 static const char P_HTTPS[] ALIGN1 = "https";
154 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
155 /* User-specified headers prevent using our corresponding built-in headers. */
158 HDR_USER_AGENT = (1<<1),
160 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
161 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
163 static const char wget_user_headers[] ALIGN1 =
167 # if ENABLE_FEATURE_WGET_AUTHENTICATION
169 "Proxy-Authorization:\0"
172 # define USR_HEADER_HOST (G.user_headers & HDR_HOST)
173 # define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
174 # define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
175 # define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
176 # define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
177 #else /* No long options, no user-headers :( */
178 # define USR_HEADER_HOST 0
179 # define USR_HEADER_USER_AGENT 0
180 # define USR_HEADER_RANGE 0
181 # define USR_HEADER_AUTH 0
182 # define USR_HEADER_PROXY_AUTH 0
187 off_t content_len; /* Content-length of the file */
188 off_t beg_range; /* Range at which continue begins */
189 #if ENABLE_FEATURE_WGET_STATUSBAR
190 off_t transferred; /* Number of bytes transferred so far */
191 const char *curfile; /* Name of current file being transferred */
195 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
198 unsigned char user_headers; /* Headers mentioned by the user */
200 char *fname_out; /* where to direct output (-O) */
201 const char *proxy_flag; /* Use proxies if env vars are set */
202 const char *user_agent; /* "User-Agent" header field */
203 #if ENABLE_FEATURE_WGET_TIMEOUT
204 unsigned timeout_seconds;
205 bool die_if_timed_out;
209 smallint chunked; /* chunked transfer encoding */
210 smallint got_clen; /* got content-length: from server */
211 /* Local downloads do benefit from big buffer.
212 * With 512 byte buffer, it was measured to be
213 * an order of magnitude slower than with big one.
215 uint64_t just_to_align_next_member;
216 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
218 #define G (*ptr_to_globals)
219 #define INIT_G() do { \
220 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
222 #define FINI_G() do { \
223 FREE_PTR_TO_GLOBALS(); \
227 /* Must match option string! */
229 WGET_OPT_CONTINUE = (1 << 0),
230 WGET_OPT_QUIET = (1 << 1),
231 WGET_OPT_OUTNAME = (1 << 2),
232 WGET_OPT_PREFIX = (1 << 3),
233 WGET_OPT_PROXY = (1 << 4),
234 WGET_OPT_USER_AGENT = (1 << 5),
235 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 6),
236 WGET_OPT_RETRIES = (1 << 7),
237 WGET_OPT_nsomething = (1 << 8),
238 WGET_OPT_HEADER = (1 << 9) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
239 WGET_OPT_POST_DATA = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
240 WGET_OPT_SPIDER = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
248 #if ENABLE_FEATURE_WGET_STATUSBAR
249 static void progress_meter(int flag)
251 if (option_mask32 & WGET_OPT_QUIET)
254 if (flag == PROGRESS_START)
255 bb_progress_init(&G.pmt, G.curfile);
257 bb_progress_update(&G.pmt,
260 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
263 if (flag == PROGRESS_END) {
264 bb_progress_free(&G.pmt);
265 bb_putchar_stderr('\n');
270 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
274 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
275 * local addresses can have a scope identifier to specify the
276 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
277 * identifier is only valid on a single node.
279 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
280 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
281 * in the Host header as invalid requests, see
282 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
284 static void strip_ipv6_scope_id(char *host)
288 /* bbox wget actually handles IPv6 addresses without [], like
289 * wget "http://::1/xxx", but this is not standard.
290 * To save code, _here_ we do not support it. */
293 return; /* not IPv6 */
295 scope = strchr(host, '%');
299 /* Remove the IPv6 zone identifier from the host address */
300 cp = strchr(host, ']');
301 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
302 /* malformed address (not "[xx]:nn" or "[xx]") */
306 /* cp points to "]...", scope points to "%eth0]..." */
307 overlapping_strcpy(scope, cp);
310 #if ENABLE_FEATURE_WGET_AUTHENTICATION
311 /* Base64-encode character string. */
312 static char *base64enc(const char *str)
314 unsigned len = strlen(str);
315 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
316 len = sizeof(G.wget_buf)/4*3 - 10;
317 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
322 static char* sanitize_string(char *s)
324 unsigned char *p = (void *) s;
331 #if ENABLE_FEATURE_WGET_TIMEOUT
332 static void alarm_handler(int sig UNUSED_PARAM)
334 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
335 if (G.die_if_timed_out)
336 bb_error_msg_and_die("download timed out");
338 static void set_alarm(void)
340 if (G.timeout_seconds) {
341 alarm(G.timeout_seconds);
342 G.die_if_timed_out = 1;
345 # define clear_alarm() ((void)(G.die_if_timed_out = 0))
347 # define set_alarm() ((void)0)
348 # define clear_alarm() ((void)0)
351 #if ENABLE_FEATURE_WGET_OPENSSL
353 * is_ip_address() attempts to verify whether or not a string
354 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
355 * of inet_pton() can be used to determine this.
357 * TODO add proper error checking when inet_pton() returns -1
358 * (some form of system error has occurred, and errno is set)
360 static int is_ip_address(const char *string)
362 struct sockaddr_in sa;
364 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
365 # if ENABLE_FEATURE_IPV6
367 struct sockaddr_in6 sa6;
368 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
371 return (result == 1);
375 static FILE *open_socket(len_and_sockaddr *lsa)
381 fd = xconnect_stream(lsa);
384 /* glibc 2.4 seems to try seeking on it - ??! */
385 /* hopefully it understands what ESPIPE means... */
386 fp = fdopen(fd, "r+");
388 bb_perror_msg_and_die(bb_msg_memory_exhausted);
393 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
394 static char fgets_and_trim(FILE *fp)
400 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
401 bb_perror_msg_and_die("error getting response");
404 buf_ptr = strchrnul(G.wget_buf, '\n');
407 buf_ptr = strchrnul(G.wget_buf, '\r');
410 log_io("< %s", G.wget_buf);
415 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
421 fprintf(fp, "%s%s\r\n", s1, s2);
423 log_io("> %s%s", s1, s2);
428 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
430 G.wget_buf[3] = '\0';
431 result = xatoi_positive(G.wget_buf);
436 static void parse_url(const char *src_url, struct host_info *h)
441 h->allocated = url = xstrdup(src_url);
444 p = strstr(url, "://");
448 if (strcmp(url, P_FTP) == 0) {
449 h->port = bb_lookup_port(P_FTP, "tcp", 21);
451 #if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
452 if (strcmp(url, P_HTTPS) == 0) {
453 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
454 h->protocol = P_HTTPS;
457 if (strcmp(url, P_HTTP) == 0) {
459 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
460 h->protocol = P_HTTP;
463 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
466 // GNU wget is user-friendly and falls back to http://
472 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
473 // 'GET /?var=a/b HTTP 1.0'
474 // and saves 'index.html?var=a%2Fb' (we save 'b')
475 // wget 'http://busybox.net?login=john@doe':
476 // request: 'GET /?login=john@doe HTTP/1.0'
477 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
478 // wget 'http://busybox.net#test/test':
479 // request: 'GET / HTTP/1.0'
480 // saves: 'index.html' (we save 'test')
482 // We also don't add unique .N suffix if file exists...
483 sp = strchr(h->host, '/');
484 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
485 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
488 } else if (*sp == '/') {
491 } else { // '#' or '?'
492 // http://busybox.net?login=john@doe is a valid URL
493 // memmove converts to:
494 // http:/busybox.nett?login=john@doe...
495 memmove(h->host - 1, h->host, sp - h->host);
501 sp = strrchr(h->host, '@');
503 // URL-decode "user:password" string before base64-encoding:
504 // wget http://test:my%20pass@example.com should send
505 // Authorization: Basic dGVzdDpteSBwYXNz
506 // which decodes to "test:my pass".
507 // Standard wget and curl do this too.
510 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
513 /* else: h->user remains NULL, or as set by original request
514 * before redirect (if we are here after a redirect).
518 static char *gethdr(FILE *fp)
523 /* retrieve header line */
524 c = fgets_and_trim(fp);
526 /* end of the headers? */
527 if (G.wget_buf[0] == '\0')
530 /* convert the header name to lower case */
531 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
533 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
534 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
535 * "A-Z" maps to "a-z".
536 * "@[\]" can't occur in header names.
537 * "^_" maps to "~,DEL" (which is wrong).
538 * "^" was never seen yet, "_" was seen from web.archive.org
539 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
544 /* verify we are at the end of the header name */
546 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
548 /* locate the start of the header value */
550 hdrval = skip_whitespace(s);
553 /* Rats! The buffer isn't big enough to hold the entire header value */
554 while (c = getc(fp), c != EOF && c != '\n')
561 static void reset_beg_range_to_zero(void)
563 bb_error_msg("restart failed");
565 xlseek(G.output_fd, 0, SEEK_SET);
566 /* Done at the end instead: */
567 /* ftruncate(G.output_fd, 0); */
570 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
577 target->user = xstrdup("anonymous:busybox@");
579 sfp = open_socket(lsa);
580 if (ftpcmd(NULL, NULL, sfp) != 220)
581 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
584 * Splitting username:password pair,
587 str = strchr(target->user, ':');
590 switch (ftpcmd("USER ", target->user, sfp)) {
594 if (ftpcmd("PASS ", str, sfp) == 230)
596 /* fall through (failed login) */
598 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
601 ftpcmd("TYPE I", NULL, sfp);
606 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
607 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
608 if (G.content_len < 0 || errno) {
609 bb_error_msg_and_die("SIZE value is garbage");
615 * Entering passive mode
617 if (ftpcmd("PASV", NULL, sfp) != 227) {
619 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
621 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
622 // Server's IP is N1.N2.N3.N4 (we ignore it)
623 // Server's port for data connection is P1*256+P2
624 str = strrchr(G.wget_buf, ')');
625 if (str) str[0] = '\0';
626 str = strrchr(G.wget_buf, ',');
627 if (!str) goto pasv_error;
628 port = xatou_range(str+1, 0, 255);
630 str = strrchr(G.wget_buf, ',');
631 if (!str) goto pasv_error;
632 port += xatou_range(str+1, 0, 255) * 256;
633 set_nport(&lsa->u.sa, htons(port));
635 *dfpp = open_socket(lsa);
637 if (G.beg_range != 0) {
638 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
639 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
640 G.content_len -= G.beg_range;
642 reset_beg_range_to_zero();
645 if (ftpcmd("RETR ", target->path, sfp) > 150)
646 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
651 #if ENABLE_FEATURE_WGET_OPENSSL
652 static int spawn_https_helper_openssl(const char *host, unsigned port)
654 char *allocated = NULL;
658 IF_FEATURE_WGET_SSL_HELPER(volatile int child_failed = 0;)
660 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
661 /* Kernel can have AF_UNIX support disabled */
662 bb_perror_msg_and_die("socketpair");
664 if (!strchr(host, ':'))
665 host = allocated = xasprintf("%s:%u", host, port);
666 servername = xstrdup(host);
667 strrchr(servername, ':')[0] = '\0';
679 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
680 * It prints some debug stuff on stderr, don't know how to suppress it.
681 * Work around by dev-nulling stderr. We lose all error messages :(
684 xopen("/dev/null", O_RDWR);
685 memset(&argv, 0, sizeof(argv));
686 argv[0] = (char*)"openssl";
687 argv[1] = (char*)"s_client";
688 argv[2] = (char*)"-quiet";
689 argv[3] = (char*)"-connect";
690 argv[4] = (char*)host;
692 * Per RFC 6066 Section 3, the only permitted values in the
693 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
694 * IPv4 and IPv6 addresses, port numbers are not allowed.
696 if (!is_ip_address(servername)) {
697 argv[5] = (char*)"-servername";
698 argv[6] = (char*)servername;
701 BB_EXECVP(argv[0], argv);
703 # if ENABLE_FEATURE_WGET_SSL_HELPER
707 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
716 # if ENABLE_FEATURE_WGET_SSL_HELPER
726 /* See networking/ssl_helper/README how to build one */
727 #if ENABLE_FEATURE_WGET_SSL_HELPER
728 static void spawn_https_helper_small(int network_fd)
733 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
734 /* Kernel can have AF_UNIX support disabled */
735 bb_perror_msg_and_die("socketpair");
737 pid = BB_MMU ? xfork() : xvfork();
745 xmove_fd(network_fd, 3);
747 * A simple ssl/tls helper
749 argv[0] = (char*)"ssl_helper";
750 argv[1] = (char*)"-d3";
752 BB_EXECVP(argv[0], argv);
753 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
759 xmove_fd(sp[0], network_fd);
763 static void NOINLINE retrieve_file_data(FILE *dfp)
765 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
766 # if ENABLE_FEATURE_WGET_TIMEOUT
767 unsigned second_cnt = G.timeout_seconds;
769 struct pollfd polldata;
771 polldata.fd = fileno(dfp);
772 polldata.events = POLLIN | POLLPRI;
774 progress_meter(PROGRESS_START);
779 /* Loops only if chunked */
782 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
783 /* Must use nonblocking I/O, otherwise fread will loop
784 * and *block* until it reads full buffer,
785 * which messes up progress bar and/or timeout logic.
786 * Because of nonblocking I/O, we need to dance
787 * very carefully around EAGAIN. See explanation at
790 ndelay_on(polldata.fd);
796 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
797 /* fread internally uses read loop, which in our case
798 * is usually exited when we get EAGAIN.
799 * In this case, libc sets error marker on the stream.
800 * Need to clear it before next fread to avoid possible
801 * rare false positive ferror below. Rare because usually
802 * fread gets more than zero bytes, and we don't fall
803 * into if (n <= 0) ...
808 rdsz = sizeof(G.wget_buf);
810 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
811 if ((int)G.content_len <= 0)
813 rdsz = (unsigned)G.content_len;
816 n = fread(G.wget_buf, 1, rdsz, dfp);
819 xwrite(G.output_fd, G.wget_buf, n);
820 #if ENABLE_FEATURE_WGET_STATUSBAR
825 if (G.content_len == 0)
828 #if ENABLE_FEATURE_WGET_TIMEOUT
829 second_cnt = G.timeout_seconds;
836 * If error occurs, or EOF is reached, the return value
837 * is a short item count (or zero).
838 * fread does not distinguish between EOF and error.
840 if (errno != EAGAIN) {
842 progress_meter(PROGRESS_END);
843 bb_perror_msg_and_die(bb_msg_read_error);
845 break; /* EOF, not error */
848 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
849 /* It was EAGAIN. There is no data. Wait up to one second
850 * then abort if timed out, or update the bar and try reading again.
852 if (safe_poll(&polldata, 1, 1000) == 0) {
853 # if ENABLE_FEATURE_WGET_TIMEOUT
854 if (second_cnt != 0 && --second_cnt == 0) {
855 progress_meter(PROGRESS_END);
856 bb_error_msg_and_die("download timed out");
859 /* We used to loop back to poll here,
860 * but there is no great harm in letting fread
861 * to try reading anyway.
866 /* Need to do it _every_ second for "stalled" indicator
867 * to be shown properly.
869 progress_meter(PROGRESS_BUMP);
870 } /* while (reading data) */
872 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
874 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
879 fgets_and_trim(dfp); /* Eat empty line */
882 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
883 /* FIXME: error check? */
884 if (G.content_len == 0)
885 break; /* all done! */
888 * Note that fgets may result in some data being buffered in dfp.
889 * We loop back to fread, which will retrieve this data.
890 * Also note that code has to be arranged so that fread
891 * is done _before_ one-second poll wait - poll doesn't know
892 * about stdio buffering and can result in spurious one second waits!
896 /* If -c failed, we restart from the beginning,
897 * but we do not truncate file then, we do it only now, at the end.
898 * This lets user to ^C if his 99% complete 10 GB file download
899 * failed to restart *without* losing the almost complete file.
902 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
903 if (pos != (off_t)-1)
904 ftruncate(G.output_fd, pos);
907 /* Draw full bar and free its resources */
908 G.chunked = 0; /* makes it show 100% even for chunked download */
909 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
910 progress_meter(PROGRESS_END);
913 static void download_one_url(const char *url)
915 bool use_proxy; /* Use proxies if env vars are set */
917 len_and_sockaddr *lsa;
918 FILE *sfp; /* socket to web/ftp server */
919 FILE *dfp; /* socket to ftp server (data) */
921 char *fname_out_alloc;
922 char *redirected_path = NULL;
923 struct host_info server;
924 struct host_info target;
926 server.allocated = NULL;
927 target.allocated = NULL;
931 parse_url(url, &target);
933 /* Use the proxy if necessary */
934 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
936 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
937 //FIXME: what if protocol is https? Ok to use http_proxy?
938 use_proxy = (proxy && proxy[0]);
940 parse_url(proxy, &server);
943 server.port = target.port;
944 if (ENABLE_FEATURE_IPV6) {
945 //free(server.allocated); - can't be non-NULL
946 server.host = server.allocated = xstrdup(target.host);
948 server.host = target.host;
952 if (ENABLE_FEATURE_IPV6)
953 strip_ipv6_scope_id(target.host);
955 /* If there was no -O FILE, guess output filename */
956 fname_out_alloc = NULL;
957 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
958 G.fname_out = bb_get_last_path_component_nostrip(target.path);
959 /* handle "wget http://kernel.org//" */
960 if (G.fname_out[0] == '/' || !G.fname_out[0])
961 G.fname_out = (char*)"index.html";
962 /* -P DIR is considered only if there was no -O FILE */
964 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
966 /* redirects may free target.path later, need to make a copy */
967 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
970 #if ENABLE_FEATURE_WGET_STATUSBAR
971 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
974 /* Determine where to start transfer */
976 if (option_mask32 & WGET_OPT_CONTINUE) {
977 G.output_fd = open(G.fname_out, O_WRONLY);
978 if (G.output_fd >= 0) {
979 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
981 /* File doesn't exist. We do not create file here yet.
982 * We are not sure it exists on remote side */
987 lsa = xhost2sockaddr(server.host, server.port);
988 if (!(option_mask32 & WGET_OPT_QUIET)) {
989 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
990 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
994 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
997 if (use_proxy || target.protocol != P_FTP) {
1004 /* Open socket to http(s) server */
1005 #if ENABLE_FEATURE_WGET_OPENSSL
1006 /* openssl (and maybe ssl_helper) support is configured */
1007 if (target.protocol == P_HTTPS) {
1008 /* openssl-based helper
1009 * Inconvenient API since we can't give it an open fd
1011 int fd = spawn_https_helper_openssl(server.host, server.port);
1012 # if ENABLE_FEATURE_WGET_SSL_HELPER
1013 if (fd < 0) { /* no openssl? try ssl_helper */
1014 sfp = open_socket(lsa);
1015 spawn_https_helper_small(fileno(sfp));
1019 /* We don't check for exec("openssl") failure in this case */
1021 sfp = fdopen(fd, "r+");
1023 bb_perror_msg_and_die(bb_msg_memory_exhausted);
1026 sfp = open_socket(lsa);
1028 #elif ENABLE_FEATURE_WGET_SSL_HELPER
1029 /* Only ssl_helper support is configured */
1030 sfp = open_socket(lsa);
1031 if (target.protocol == P_HTTPS)
1032 spawn_https_helper_small(fileno(sfp));
1034 /* ssl (https) support is not configured */
1035 sfp = open_socket(lsa);
1037 /* Send HTTP request */
1039 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
1040 target.protocol, target.host,
1043 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
1044 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1047 if (!USR_HEADER_HOST)
1048 SENDFMT(sfp, "Host: %s\r\n", target.host);
1049 if (!USR_HEADER_USER_AGENT)
1050 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
1052 /* Ask server to close the connection as soon as we are done
1053 * (IOW: we do not intend to send more requests)
1055 SENDFMT(sfp, "Connection: close\r\n");
1057 #if ENABLE_FEATURE_WGET_AUTHENTICATION
1058 if (target.user && !USR_HEADER_AUTH) {
1059 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
1060 base64enc(target.user));
1062 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1063 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
1064 base64enc(server.user));
1068 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1069 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
1071 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1072 if (G.extra_headers) {
1073 log_io(G.extra_headers);
1074 fputs(G.extra_headers, sfp);
1077 if (option_mask32 & WGET_OPT_POST_DATA) {
1079 "Content-Type: application/x-www-form-urlencoded\r\n"
1080 "Content-Length: %u\r\n"
1083 (int) strlen(G.post_data), G.post_data
1088 SENDFMT(sfp, "\r\n");
1092 /* If we use SSL helper, keeping our end of the socket open for writing
1093 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1094 * even after child closes its copy of the fd.
1097 shutdown(fileno(sfp), SHUT_WR);
1100 * Retrieve HTTP response line and check for "200" status code.
1103 fgets_and_trim(sfp);
1106 str = skip_non_whitespace(str);
1107 str = skip_whitespace(str);
1108 // FIXME: no error check
1109 // xatou wouldn't work: "200 OK"
1114 while (gethdr(sfp) != NULL)
1115 /* eat all remaining headers */;
1118 /* Success responses */
1121 case 201: /* 201 Created */
1122 /* "The request has been fulfilled and resulted in a new resource being created" */
1123 /* Standard wget is reported to treat this as success */
1125 case 202: /* 202 Accepted */
1126 /* "The request has been accepted for processing, but the processing has not been completed" */
1127 /* Treat as success: fall through */
1128 case 203: /* 203 Non-Authoritative Information */
1129 /* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1131 case 204: /* 204 No Content */
1133 Response 204 doesn't say "null file", it says "metadata
1134 has changed but data didn't":
1136 "10.2.5 204 No Content
1137 The server has fulfilled the request but does not need to return
1138 an entity-body, and might want to return updated metainformation.
1139 The response MAY include new or updated metainformation in the form
1140 of entity-headers, which if present SHOULD be associated with
1141 the requested variant.
1143 If the client is a user agent, it SHOULD NOT change its document
1144 view from that which caused the request to be sent. This response
1145 is primarily intended to allow input for actions to take place
1146 without causing a change to the user agent's active document view,
1147 although any new or updated metainformation SHOULD be applied
1148 to the document currently in the user agent's active view.
1150 The 204 response MUST NOT include a message-body, and thus
1151 is always terminated by the first empty line after the header fields."
1153 However, in real world it was observed that some web servers
1154 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1156 if (G.beg_range != 0) {
1157 /* "Range:..." was not honored by the server.
1158 * Restart download from the beginning.
1160 reset_beg_range_to_zero();
1163 /* 205 Reset Content ?? what to do on this ?? */
1165 case 300: /* redirection */
1171 case 206: /* Partial Content */
1172 if (G.beg_range != 0)
1173 /* "Range:..." worked. Good. */
1175 /* Partial Content even though we did not ask for it??? */
1178 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
1182 * Retrieve HTTP headers.
1184 while ((str = gethdr(sfp)) != NULL) {
1185 static const char keywords[] ALIGN1 =
1186 "content-length\0""transfer-encoding\0""location\0";
1188 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1192 /* gethdr converted "FOO:" string to lowercase */
1194 /* strip trailing whitespace */
1195 char *s = strchrnul(str, '\0') - 1;
1196 while (s >= str && (*s == ' ' || *s == '\t')) {
1200 key = index_in_strings(keywords, G.wget_buf) + 1;
1201 if (key == KEY_content_length) {
1202 G.content_len = BB_STRTOOFF(str, NULL, 10);
1203 if (G.content_len < 0 || errno) {
1204 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
1209 if (key == KEY_transfer_encoding) {
1210 if (strcmp(str_tolower(str), "chunked") != 0)
1211 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
1214 if (key == KEY_location && status >= 300) {
1215 if (--redir_limit == 0)
1216 bb_error_msg_and_die("too many redirections");
1218 if (str[0] == '/') {
1219 free(redirected_path);
1220 target.path = redirected_path = xstrdup(str+1);
1221 /* lsa stays the same: it's on the same server */
1223 parse_url(str, &target);
1225 /* server.user remains untouched */
1226 free(server.allocated);
1227 server.allocated = NULL;
1228 server.host = target.host;
1229 /* strip_ipv6_scope_id(target.host); - no! */
1230 /* we assume remote never gives us IPv6 addr with scope id */
1231 server.port = target.port;
1234 } /* else: lsa stays the same: we use proxy */
1236 goto establish_session;
1239 // if (status >= 300)
1240 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
1242 /* For HTTP, data is pumped over the same connection */
1248 sfp = prepare_ftp_session(&dfp, &target, lsa);
1253 if (!(option_mask32 & WGET_OPT_SPIDER)) {
1254 if (G.output_fd < 0)
1255 G.output_fd = xopen(G.fname_out, G.o_flags);
1256 retrieve_file_data(dfp);
1257 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1258 xclose(G.output_fd);
1264 /* It's ftp. Close data connection properly */
1266 if (ftpcmd(NULL, NULL, sfp) != 226)
1267 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1268 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1272 free(server.allocated);
1273 free(target.allocated);
1276 free(fname_out_alloc);
1277 free(redirected_path);
1280 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1281 int wget_main(int argc UNUSED_PARAM, char **argv)
1283 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1284 static const char wget_longopts[] ALIGN1 =
1285 /* name, has_arg, val */
1286 "continue\0" No_argument "c"
1287 "quiet\0" No_argument "q"
1288 "output-document\0" Required_argument "O"
1289 "directory-prefix\0" Required_argument "P"
1290 "proxy\0" Required_argument "Y"
1291 "user-agent\0" Required_argument "U"
1292 IF_FEATURE_WGET_TIMEOUT(
1293 "timeout\0" Required_argument "T")
1295 IF_DESKTOP( "tries\0" Required_argument "t")
1296 "header\0" Required_argument "\xff"
1297 "post-data\0" Required_argument "\xfe"
1298 "spider\0" No_argument "\xfd"
1299 /* Ignored (we always use PASV): */
1300 IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
1301 /* Ignored (we don't do ssl) */
1302 IF_DESKTOP( "no-check-certificate\0" No_argument "\xf0")
1303 /* Ignored (we don't support caching) */
1304 IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1305 IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1306 IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1307 IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1308 IF_DESKTOP( "no-parent\0" No_argument "\xf0")
1312 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1313 llist_t *headers_llist = NULL;
1318 #if ENABLE_FEATURE_WGET_TIMEOUT
1319 G.timeout_seconds = 900;
1320 signal(SIGALRM, alarm_handler);
1322 G.proxy_flag = "on"; /* use proxies if env vars are set */
1323 G.user_agent = "Wget"; /* "User-Agent" header field */
1325 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1326 applet_long_options = wget_longopts;
1328 opt_complementary = "-1" /* at least one URL */
1329 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::"); /* --header is a list */
1330 getopt32(argv, "cqO:P:Y:U:T:+"
1333 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1334 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1335 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1336 * -nH --no-host-directories: wget -r http://host/ won't create host/
1338 * "n::" above says that we accept -n[ARG].
1339 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1341 , &G.fname_out, &G.dir_prefix,
1342 &G.proxy_flag, &G.user_agent,
1343 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1344 NULL, /* -t RETRIES */
1346 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1347 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1349 #if 0 /* option bits debug */
1350 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1351 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1352 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1353 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1354 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
1359 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1360 if (headers_llist) {
1363 llist_t *ll = headers_llist;
1365 size += strlen(ll->data) + 2;
1368 G.extra_headers = hdr = xmalloc(size + 1);
1369 while (headers_llist) {
1373 size = sprintf(hdr, "%s\r\n",
1374 (char*)llist_pop(&headers_llist));
1375 /* a bit like index_in_substrings but don't match full key */
1377 words = wget_user_headers;
1379 if (strstr(hdr, words) == hdr) {
1380 G.user_headers |= bit;
1384 words += strlen(words) + 1;
1392 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1393 if (G.fname_out) { /* -O FILE ? */
1394 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1396 option_mask32 &= ~WGET_OPT_CONTINUE;
1398 /* compat with wget: -O FILE can overwrite */
1399 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1403 download_one_url(*argv++);
1405 if (G.output_fd >= 0)
1406 xclose(G.output_fd);
1408 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1409 free(G.extra_headers);
1413 return EXIT_SUCCESS;