1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
16 //config: wget is a utility for non-interactive download of files from HTTP
17 //config: and FTP servers.
19 //config:config FEATURE_WGET_STATUSBAR
20 //config: bool "Enable a nifty process meter (+2k)"
22 //config: depends on WGET
24 //config: Enable the transfer progress bar for wget transfers.
26 //config:config FEATURE_WGET_AUTHENTICATION
27 //config: bool "Enable HTTP authentication"
29 //config: depends on WGET
31 //config: Support authenticated HTTP transfers.
33 //config:config FEATURE_WGET_LONG_OPTIONS
34 //config: bool "Enable long options"
36 //config: depends on WGET && LONG_OPTS
38 //config: Support long options for the wget applet.
40 //config:config FEATURE_WGET_TIMEOUT
41 //config: bool "Enable timeout option -T SEC"
43 //config: depends on WGET
45 //config: Supports network read and connect timeouts for wget,
46 //config: so that wget will give up and timeout, through the -T
47 //config: command line option.
49 //config: Currently only connect and network data read timeout are
50 //config: supported (i.e., timeout is not applied to the DNS query). When
51 //config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
52 //config: will work in addition to -T.
54 //config:config FEATURE_WGET_OPENSSL
55 //config: bool "Try to connect to HTTPS using openssl"
57 //config: depends on WGET
59 //config: Choose how wget establishes SSL connection for https:// URLs.
61 //config: Busybox itself contains no SSL code. wget will spawn
62 //config: a helper program to talk over HTTPS.
64 //config: OpenSSL has a simple SSL client for debug purposes.
65 //config: If you select "openssl" helper, wget will effectively run:
66 //config: "openssl s_client -quiet -connect hostname:443
67 //config: -servername hostname 2>/dev/null" and pipe its data
68 //config: through it. -servername is not used if hostname is numeric.
69 //config: Note inconvenient API: host resolution is done twice,
70 //config: and there is no guarantee openssl's idea of IPv6 address
71 //config: format is the same as ours.
72 //config: Another problem is that s_client prints debug information
73 //config: to stderr, and it needs to be suppressed. This means
74 //config: all error messages get suppressed too.
75 //config: openssl is also a big binary, often dynamically linked
76 //config: against ~15 libraries.
78 //config:config FEATURE_WGET_SSL_HELPER
79 //config: bool "Try to connect to HTTPS using ssl_helper"
81 //config: depends on WGET
83 //config: Choose how wget establishes SSL connection for https:// URLs.
85 //config: Busybox itself contains no SSL code. wget will spawn
86 //config: a helper program to talk over HTTPS.
88 //config: ssl_helper is a tool which can be built statically
89 //config: from busybox sources against a small embedded SSL library.
90 //config: Please see networking/ssl_helper/README.
91 //config: It does not require double host resolution and emits
92 //config: error messages to stderr.
94 //config: Precompiled static binary may be available at
95 //config: http://busybox.net/downloads/binaries/
97 //applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
99 //kbuild:lib-$(CONFIG_WGET) += wget.o
101 //usage:#define wget_trivial_usage
102 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
103 //usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
104 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
105 /* Since we ignore these opts, we don't show them in --help */
106 /* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
107 /* //usage: " [-nv] [-nc] [-nH] [-np]" */
108 //usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
110 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
111 //usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
112 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
114 //usage:#define wget_full_usage "\n\n"
115 //usage: "Retrieve files via HTTP or FTP\n"
116 //usage: "\n -s Spider mode - only check file existence"
117 //usage: "\n -c Continue retrieval of aborted transfer"
118 //usage: "\n -q Quiet"
119 //usage: "\n -P DIR Save to DIR (default .)"
120 //usage: IF_FEATURE_WGET_TIMEOUT(
121 //usage: "\n -T SEC Network read timeout is SEC seconds"
123 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
124 //usage: "\n -U STR Use STR for User-Agent header"
125 //usage: "\n -Y Use proxy ('on' or 'off')"
130 # define log_io(...) bb_error_msg(__VA_ARGS__)
131 # define SENDFMT(fp, fmt, ...) \
133 log_io("> " fmt, ##__VA_ARGS__); \
134 fprintf(fp, fmt, ##__VA_ARGS__); \
137 # define log_io(...) ((void)0)
138 # define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
146 const char *protocol;
150 static const char P_FTP[] ALIGN1 = "ftp";
151 static const char P_HTTP[] ALIGN1 = "http";
152 #if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
153 static const char P_HTTPS[] ALIGN1 = "https";
156 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
157 /* User-specified headers prevent using our corresponding built-in headers. */
160 HDR_USER_AGENT = (1<<1),
162 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
163 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
165 static const char wget_user_headers[] ALIGN1 =
169 # if ENABLE_FEATURE_WGET_AUTHENTICATION
171 "Proxy-Authorization:\0"
174 # define USR_HEADER_HOST (G.user_headers & HDR_HOST)
175 # define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
176 # define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
177 # define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
178 # define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
179 #else /* No long options, no user-headers :( */
180 # define USR_HEADER_HOST 0
181 # define USR_HEADER_USER_AGENT 0
182 # define USR_HEADER_RANGE 0
183 # define USR_HEADER_AUTH 0
184 # define USR_HEADER_PROXY_AUTH 0
189 off_t content_len; /* Content-length of the file */
190 off_t beg_range; /* Range at which continue begins */
191 #if ENABLE_FEATURE_WGET_STATUSBAR
192 off_t transferred; /* Number of bytes transferred so far */
193 const char *curfile; /* Name of current file being transferred */
197 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
200 unsigned char user_headers; /* Headers mentioned by the user */
202 char *fname_out; /* where to direct output (-O) */
203 const char *proxy_flag; /* Use proxies if env vars are set */
204 const char *user_agent; /* "User-Agent" header field */
205 #if ENABLE_FEATURE_WGET_TIMEOUT
206 unsigned timeout_seconds;
207 bool die_if_timed_out;
211 smallint chunked; /* chunked transfer encoding */
212 smallint got_clen; /* got content-length: from server */
213 /* Local downloads do benefit from big buffer.
214 * With 512 byte buffer, it was measured to be
215 * an order of magnitude slower than with big one.
217 uint64_t just_to_align_next_member;
218 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
220 #define G (*ptr_to_globals)
221 #define INIT_G() do { \
222 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
224 #define FINI_G() do { \
225 FREE_PTR_TO_GLOBALS(); \
229 /* Must match option string! */
231 WGET_OPT_CONTINUE = (1 << 0),
232 WGET_OPT_SPIDER = (1 << 1),
233 WGET_OPT_QUIET = (1 << 2),
234 WGET_OPT_OUTNAME = (1 << 3),
235 WGET_OPT_PREFIX = (1 << 4),
236 WGET_OPT_PROXY = (1 << 5),
237 WGET_OPT_USER_AGENT = (1 << 6),
238 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
239 WGET_OPT_RETRIES = (1 << 8),
240 WGET_OPT_PASSIVE = (1 << 9),
241 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
242 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
250 #if ENABLE_FEATURE_WGET_STATUSBAR
251 static void progress_meter(int flag)
253 if (option_mask32 & WGET_OPT_QUIET)
256 if (flag == PROGRESS_START)
257 bb_progress_init(&G.pmt, G.curfile);
259 bb_progress_update(&G.pmt,
262 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
265 if (flag == PROGRESS_END) {
266 bb_progress_free(&G.pmt);
267 bb_putchar_stderr('\n');
272 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
276 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
277 * local addresses can have a scope identifier to specify the
278 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
279 * identifier is only valid on a single node.
281 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
282 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
283 * in the Host header as invalid requests, see
284 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
286 static void strip_ipv6_scope_id(char *host)
290 /* bbox wget actually handles IPv6 addresses without [], like
291 * wget "http://::1/xxx", but this is not standard.
292 * To save code, _here_ we do not support it. */
295 return; /* not IPv6 */
297 scope = strchr(host, '%');
301 /* Remove the IPv6 zone identifier from the host address */
302 cp = strchr(host, ']');
303 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
304 /* malformed address (not "[xx]:nn" or "[xx]") */
308 /* cp points to "]...", scope points to "%eth0]..." */
309 overlapping_strcpy(scope, cp);
312 #if ENABLE_FEATURE_WGET_AUTHENTICATION
313 /* Base64-encode character string. */
314 static char *base64enc(const char *str)
316 unsigned len = strlen(str);
317 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
318 len = sizeof(G.wget_buf)/4*3 - 10;
319 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
324 static char* sanitize_string(char *s)
326 unsigned char *p = (void *) s;
333 #if ENABLE_FEATURE_WGET_TIMEOUT
334 static void alarm_handler(int sig UNUSED_PARAM)
336 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
337 if (G.die_if_timed_out)
338 bb_error_msg_and_die("download timed out");
340 static void set_alarm(void)
342 if (G.timeout_seconds) {
343 alarm(G.timeout_seconds);
344 G.die_if_timed_out = 1;
347 # define clear_alarm() ((void)(G.die_if_timed_out = 0))
349 # define set_alarm() ((void)0)
350 # define clear_alarm() ((void)0)
353 #if ENABLE_FEATURE_WGET_OPENSSL
355 * is_ip_address() attempts to verify whether or not a string
356 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
357 * of inet_pton() can be used to determine this.
359 * TODO add proper error checking when inet_pton() returns -1
360 * (some form of system error has occurred, and errno is set)
362 static int is_ip_address(const char *string)
364 struct sockaddr_in sa;
366 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
367 # if ENABLE_FEATURE_IPV6
369 struct sockaddr_in6 sa6;
370 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
373 return (result == 1);
377 static FILE *open_socket(len_and_sockaddr *lsa)
383 fd = xconnect_stream(lsa);
386 /* glibc 2.4 seems to try seeking on it - ??! */
387 /* hopefully it understands what ESPIPE means... */
388 fp = fdopen(fd, "r+");
390 bb_perror_msg_and_die(bb_msg_memory_exhausted);
395 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
396 static char fgets_and_trim(FILE *fp)
402 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
403 bb_perror_msg_and_die("error getting response");
406 buf_ptr = strchrnul(G.wget_buf, '\n');
409 buf_ptr = strchrnul(G.wget_buf, '\r');
412 log_io("< %s", G.wget_buf);
417 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
423 fprintf(fp, "%s%s\r\n", s1, s2);
425 log_io("> %s%s", s1, s2);
430 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
432 G.wget_buf[3] = '\0';
433 result = xatoi_positive(G.wget_buf);
438 static void parse_url(const char *src_url, struct host_info *h)
443 h->allocated = url = xstrdup(src_url);
446 p = strstr(url, "://");
450 if (strcmp(url, P_FTP) == 0) {
451 h->port = bb_lookup_port(P_FTP, "tcp", 21);
453 #if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
454 if (strcmp(url, P_HTTPS) == 0) {
455 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
456 h->protocol = P_HTTPS;
459 if (strcmp(url, P_HTTP) == 0) {
461 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
462 h->protocol = P_HTTP;
465 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
468 // GNU wget is user-friendly and falls back to http://
474 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
475 // 'GET /?var=a/b HTTP 1.0'
476 // and saves 'index.html?var=a%2Fb' (we save 'b')
477 // wget 'http://busybox.net?login=john@doe':
478 // request: 'GET /?login=john@doe HTTP/1.0'
479 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
480 // wget 'http://busybox.net#test/test':
481 // request: 'GET / HTTP/1.0'
482 // saves: 'index.html' (we save 'test')
484 // We also don't add unique .N suffix if file exists...
485 sp = strchr(h->host, '/');
486 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
487 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
490 } else if (*sp == '/') {
493 } else { // '#' or '?'
494 // http://busybox.net?login=john@doe is a valid URL
495 // memmove converts to:
496 // http:/busybox.nett?login=john@doe...
497 memmove(h->host - 1, h->host, sp - h->host);
503 sp = strrchr(h->host, '@');
505 // URL-decode "user:password" string before base64-encoding:
506 // wget http://test:my%20pass@example.com should send
507 // Authorization: Basic dGVzdDpteSBwYXNz
508 // which decodes to "test:my pass".
509 // Standard wget and curl do this too.
512 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
515 /* else: h->user remains NULL, or as set by original request
516 * before redirect (if we are here after a redirect).
520 static char *gethdr(FILE *fp)
525 /* retrieve header line */
526 c = fgets_and_trim(fp);
528 /* end of the headers? */
529 if (G.wget_buf[0] == '\0')
532 /* convert the header name to lower case */
533 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
535 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
536 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
537 * "A-Z" maps to "a-z".
538 * "@[\]" can't occur in header names.
539 * "^_" maps to "~,DEL" (which is wrong).
540 * "^" was never seen yet, "_" was seen from web.archive.org
541 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
546 /* verify we are at the end of the header name */
548 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
550 /* locate the start of the header value */
552 hdrval = skip_whitespace(s);
555 /* Rats! The buffer isn't big enough to hold the entire header value */
556 while (c = getc(fp), c != EOF && c != '\n')
563 static void reset_beg_range_to_zero(void)
565 bb_error_msg("restart failed");
567 xlseek(G.output_fd, 0, SEEK_SET);
568 /* Done at the end instead: */
569 /* ftruncate(G.output_fd, 0); */
572 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
579 target->user = xstrdup("anonymous:busybox@");
581 sfp = open_socket(lsa);
582 if (ftpcmd(NULL, NULL, sfp) != 220)
583 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
586 * Splitting username:password pair,
589 str = strchr(target->user, ':');
592 switch (ftpcmd("USER ", target->user, sfp)) {
596 if (ftpcmd("PASS ", str, sfp) == 230)
598 /* fall through (failed login) */
600 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
603 ftpcmd("TYPE I", NULL, sfp);
608 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
609 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
610 if (G.content_len < 0 || errno) {
611 bb_error_msg_and_die("SIZE value is garbage");
617 * Entering passive mode
619 if (ftpcmd("PASV", NULL, sfp) != 227) {
621 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
623 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
624 // Server's IP is N1.N2.N3.N4 (we ignore it)
625 // Server's port for data connection is P1*256+P2
626 str = strrchr(G.wget_buf, ')');
627 if (str) str[0] = '\0';
628 str = strrchr(G.wget_buf, ',');
629 if (!str) goto pasv_error;
630 port = xatou_range(str+1, 0, 255);
632 str = strrchr(G.wget_buf, ',');
633 if (!str) goto pasv_error;
634 port += xatou_range(str+1, 0, 255) * 256;
635 set_nport(&lsa->u.sa, htons(port));
637 *dfpp = open_socket(lsa);
639 if (G.beg_range != 0) {
640 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
641 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
642 G.content_len -= G.beg_range;
644 reset_beg_range_to_zero();
647 if (ftpcmd("RETR ", target->path, sfp) > 150)
648 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
653 #if ENABLE_FEATURE_WGET_OPENSSL
654 static int spawn_https_helper_openssl(const char *host, unsigned port)
656 char *allocated = NULL;
660 IF_FEATURE_WGET_SSL_HELPER(volatile int child_failed = 0;)
662 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
663 /* Kernel can have AF_UNIX support disabled */
664 bb_perror_msg_and_die("socketpair");
666 if (!strchr(host, ':'))
667 host = allocated = xasprintf("%s:%u", host, port);
668 servername = xstrdup(host);
669 strrchr(servername, ':')[0] = '\0';
681 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
682 * It prints some debug stuff on stderr, don't know how to suppress it.
683 * Work around by dev-nulling stderr. We lose all error messages :(
686 xopen("/dev/null", O_RDWR);
687 memset(&argv, 0, sizeof(argv));
688 argv[0] = (char*)"openssl";
689 argv[1] = (char*)"s_client";
690 argv[2] = (char*)"-quiet";
691 argv[3] = (char*)"-connect";
692 argv[4] = (char*)host;
694 * Per RFC 6066 Section 3, the only permitted values in the
695 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
696 * IPv4 and IPv6 addresses, port numbers are not allowed.
698 if (!is_ip_address(servername)) {
699 argv[5] = (char*)"-servername";
700 argv[6] = (char*)servername;
703 BB_EXECVP(argv[0], argv);
705 # if ENABLE_FEATURE_WGET_SSL_HELPER
709 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
718 # if ENABLE_FEATURE_WGET_SSL_HELPER
728 /* See networking/ssl_helper/README how to build one */
729 #if ENABLE_FEATURE_WGET_SSL_HELPER
730 static void spawn_https_helper_small(int network_fd)
735 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
736 /* Kernel can have AF_UNIX support disabled */
737 bb_perror_msg_and_die("socketpair");
739 pid = BB_MMU ? xfork() : xvfork();
747 xmove_fd(network_fd, 3);
749 * A simple ssl/tls helper
751 argv[0] = (char*)"ssl_helper";
752 argv[1] = (char*)"-d3";
754 BB_EXECVP(argv[0], argv);
755 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
761 xmove_fd(sp[0], network_fd);
765 static void NOINLINE retrieve_file_data(FILE *dfp)
767 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
768 # if ENABLE_FEATURE_WGET_TIMEOUT
769 unsigned second_cnt = G.timeout_seconds;
771 struct pollfd polldata;
773 polldata.fd = fileno(dfp);
774 polldata.events = POLLIN | POLLPRI;
776 progress_meter(PROGRESS_START);
781 /* Loops only if chunked */
784 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
785 /* Must use nonblocking I/O, otherwise fread will loop
786 * and *block* until it reads full buffer,
787 * which messes up progress bar and/or timeout logic.
788 * Because of nonblocking I/O, we need to dance
789 * very carefully around EAGAIN. See explanation at
792 ndelay_on(polldata.fd);
798 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
799 /* fread internally uses read loop, which in our case
800 * is usually exited when we get EAGAIN.
801 * In this case, libc sets error marker on the stream.
802 * Need to clear it before next fread to avoid possible
803 * rare false positive ferror below. Rare because usually
804 * fread gets more than zero bytes, and we don't fall
805 * into if (n <= 0) ...
810 rdsz = sizeof(G.wget_buf);
812 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
813 if ((int)G.content_len <= 0)
815 rdsz = (unsigned)G.content_len;
818 n = fread(G.wget_buf, 1, rdsz, dfp);
821 xwrite(G.output_fd, G.wget_buf, n);
822 #if ENABLE_FEATURE_WGET_STATUSBAR
827 if (G.content_len == 0)
830 #if ENABLE_FEATURE_WGET_TIMEOUT
831 second_cnt = G.timeout_seconds;
838 * If error occurs, or EOF is reached, the return value
839 * is a short item count (or zero).
840 * fread does not distinguish between EOF and error.
842 if (errno != EAGAIN) {
844 progress_meter(PROGRESS_END);
845 bb_perror_msg_and_die(bb_msg_read_error);
847 break; /* EOF, not error */
850 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
851 /* It was EAGAIN. There is no data. Wait up to one second
852 * then abort if timed out, or update the bar and try reading again.
854 if (safe_poll(&polldata, 1, 1000) == 0) {
855 # if ENABLE_FEATURE_WGET_TIMEOUT
856 if (second_cnt != 0 && --second_cnt == 0) {
857 progress_meter(PROGRESS_END);
858 bb_error_msg_and_die("download timed out");
861 /* We used to loop back to poll here,
862 * but there is no great harm in letting fread
863 * to try reading anyway.
868 /* Need to do it _every_ second for "stalled" indicator
869 * to be shown properly.
871 progress_meter(PROGRESS_BUMP);
872 } /* while (reading data) */
874 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
876 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
881 fgets_and_trim(dfp); /* Eat empty line */
884 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
885 /* FIXME: error check? */
886 if (G.content_len == 0)
887 break; /* all done! */
890 * Note that fgets may result in some data being buffered in dfp.
891 * We loop back to fread, which will retrieve this data.
892 * Also note that code has to be arranged so that fread
893 * is done _before_ one-second poll wait - poll doesn't know
894 * about stdio buffering and can result in spurious one second waits!
898 /* If -c failed, we restart from the beginning,
899 * but we do not truncate file then, we do it only now, at the end.
900 * This lets user to ^C if his 99% complete 10 GB file download
901 * failed to restart *without* losing the almost complete file.
904 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
905 if (pos != (off_t)-1)
906 ftruncate(G.output_fd, pos);
909 /* Draw full bar and free its resources */
910 G.chunked = 0; /* makes it show 100% even for chunked download */
911 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
912 progress_meter(PROGRESS_END);
915 static void download_one_url(const char *url)
917 bool use_proxy; /* Use proxies if env vars are set */
919 len_and_sockaddr *lsa;
920 FILE *sfp; /* socket to web/ftp server */
921 FILE *dfp; /* socket to ftp server (data) */
923 char *fname_out_alloc;
924 char *redirected_path = NULL;
925 struct host_info server;
926 struct host_info target;
928 server.allocated = NULL;
929 target.allocated = NULL;
933 parse_url(url, &target);
935 /* Use the proxy if necessary */
936 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
938 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
939 //FIXME: what if protocol is https? Ok to use http_proxy?
940 use_proxy = (proxy && proxy[0]);
942 parse_url(proxy, &server);
945 server.port = target.port;
946 if (ENABLE_FEATURE_IPV6) {
947 //free(server.allocated); - can't be non-NULL
948 server.host = server.allocated = xstrdup(target.host);
950 server.host = target.host;
954 if (ENABLE_FEATURE_IPV6)
955 strip_ipv6_scope_id(target.host);
957 /* If there was no -O FILE, guess output filename */
958 fname_out_alloc = NULL;
959 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
960 G.fname_out = bb_get_last_path_component_nostrip(target.path);
961 /* handle "wget http://kernel.org//" */
962 if (G.fname_out[0] == '/' || !G.fname_out[0])
963 G.fname_out = (char*)"index.html";
964 /* -P DIR is considered only if there was no -O FILE */
966 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
968 /* redirects may free target.path later, need to make a copy */
969 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
972 #if ENABLE_FEATURE_WGET_STATUSBAR
973 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
976 /* Determine where to start transfer */
978 if (option_mask32 & WGET_OPT_CONTINUE) {
979 G.output_fd = open(G.fname_out, O_WRONLY);
980 if (G.output_fd >= 0) {
981 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
983 /* File doesn't exist. We do not create file here yet.
984 * We are not sure it exists on remote side */
989 lsa = xhost2sockaddr(server.host, server.port);
990 if (!(option_mask32 & WGET_OPT_QUIET)) {
991 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
992 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
996 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
999 if (use_proxy || target.protocol != P_FTP) {
1006 /* Open socket to http(s) server */
1007 #if ENABLE_FEATURE_WGET_OPENSSL
1008 /* openssl (and maybe ssl_helper) support is configured */
1009 if (target.protocol == P_HTTPS) {
1010 /* openssl-based helper
1011 * Inconvenient API since we can't give it an open fd
1013 int fd = spawn_https_helper_openssl(server.host, server.port);
1014 # if ENABLE_FEATURE_WGET_SSL_HELPER
1015 if (fd < 0) { /* no openssl? try ssl_helper */
1016 sfp = open_socket(lsa);
1017 spawn_https_helper_small(fileno(sfp));
1021 /* We don't check for exec("openssl") failure in this case */
1023 sfp = fdopen(fd, "r+");
1025 bb_perror_msg_and_die(bb_msg_memory_exhausted);
1028 sfp = open_socket(lsa);
1030 #elif ENABLE_FEATURE_WGET_SSL_HELPER
1031 /* Only ssl_helper support is configured */
1032 sfp = open_socket(lsa);
1033 if (target.protocol == P_HTTPS)
1034 spawn_https_helper_small(fileno(sfp));
1036 /* ssl (https) support is not configured */
1037 sfp = open_socket(lsa);
1039 /* Send HTTP request */
1041 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
1042 target.protocol, target.host,
1045 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
1046 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1049 if (!USR_HEADER_HOST)
1050 SENDFMT(sfp, "Host: %s\r\n", target.host);
1051 if (!USR_HEADER_USER_AGENT)
1052 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
1054 /* Ask server to close the connection as soon as we are done
1055 * (IOW: we do not intend to send more requests)
1057 SENDFMT(sfp, "Connection: close\r\n");
1059 #if ENABLE_FEATURE_WGET_AUTHENTICATION
1060 if (target.user && !USR_HEADER_AUTH) {
1061 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
1062 base64enc(target.user));
1064 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1065 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
1066 base64enc(server.user));
1070 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1071 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
1073 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1074 if (G.extra_headers) {
1075 log_io(G.extra_headers);
1076 fputs(G.extra_headers, sfp);
1079 if (option_mask32 & WGET_OPT_POST_DATA) {
1081 "Content-Type: application/x-www-form-urlencoded\r\n"
1082 "Content-Length: %u\r\n"
1085 (int) strlen(G.post_data), G.post_data
1090 SENDFMT(sfp, "\r\n");
1094 /* If we use SSL helper, keeping our end of the socket open for writing
1095 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1096 * even after child closes its copy of the fd.
1099 shutdown(fileno(sfp), SHUT_WR);
1102 * Retrieve HTTP response line and check for "200" status code.
1105 fgets_and_trim(sfp);
1108 str = skip_non_whitespace(str);
1109 str = skip_whitespace(str);
1110 // FIXME: no error check
1111 // xatou wouldn't work: "200 OK"
1116 while (gethdr(sfp) != NULL)
1117 /* eat all remaining headers */;
1121 Response 204 doesn't say "null file", it says "metadata
1122 has changed but data didn't":
1124 "10.2.5 204 No Content
1125 The server has fulfilled the request but does not need to return
1126 an entity-body, and might want to return updated metainformation.
1127 The response MAY include new or updated metainformation in the form
1128 of entity-headers, which if present SHOULD be associated with
1129 the requested variant.
1131 If the client is a user agent, it SHOULD NOT change its document
1132 view from that which caused the request to be sent. This response
1133 is primarily intended to allow input for actions to take place
1134 without causing a change to the user agent's active document view,
1135 although any new or updated metainformation SHOULD be applied
1136 to the document currently in the user agent's active view.
1138 The 204 response MUST NOT include a message-body, and thus
1139 is always terminated by the first empty line after the header fields."
1141 However, in real world it was observed that some web servers
1142 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1145 if (G.beg_range != 0) {
1146 /* "Range:..." was not honored by the server.
1147 * Restart download from the beginning.
1149 reset_beg_range_to_zero();
1152 case 300: /* redirection */
1157 case 206: /* Partial Content */
1158 if (G.beg_range != 0)
1159 /* "Range:..." worked. Good. */
1161 /* Partial Content even though we did not ask for it??? */
1164 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
1168 * Retrieve HTTP headers.
1170 while ((str = gethdr(sfp)) != NULL) {
1171 static const char keywords[] ALIGN1 =
1172 "content-length\0""transfer-encoding\0""location\0";
1174 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1178 /* gethdr converted "FOO:" string to lowercase */
1180 /* strip trailing whitespace */
1181 char *s = strchrnul(str, '\0') - 1;
1182 while (s >= str && (*s == ' ' || *s == '\t')) {
1186 key = index_in_strings(keywords, G.wget_buf) + 1;
1187 if (key == KEY_content_length) {
1188 G.content_len = BB_STRTOOFF(str, NULL, 10);
1189 if (G.content_len < 0 || errno) {
1190 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
1195 if (key == KEY_transfer_encoding) {
1196 if (strcmp(str_tolower(str), "chunked") != 0)
1197 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
1200 if (key == KEY_location && status >= 300) {
1201 if (--redir_limit == 0)
1202 bb_error_msg_and_die("too many redirections");
1204 if (str[0] == '/') {
1205 free(redirected_path);
1206 target.path = redirected_path = xstrdup(str+1);
1207 /* lsa stays the same: it's on the same server */
1209 parse_url(str, &target);
1211 /* server.user remains untouched */
1212 free(server.allocated);
1213 server.allocated = NULL;
1214 server.host = target.host;
1215 /* strip_ipv6_scope_id(target.host); - no! */
1216 /* we assume remote never gives us IPv6 addr with scope id */
1217 server.port = target.port;
1220 } /* else: lsa stays the same: we use proxy */
1222 goto establish_session;
1225 // if (status >= 300)
1226 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
1228 /* For HTTP, data is pumped over the same connection */
1234 sfp = prepare_ftp_session(&dfp, &target, lsa);
1239 if (!(option_mask32 & WGET_OPT_SPIDER)) {
1240 if (G.output_fd < 0)
1241 G.output_fd = xopen(G.fname_out, G.o_flags);
1242 retrieve_file_data(dfp);
1243 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1244 xclose(G.output_fd);
1250 /* It's ftp. Close data connection properly */
1252 if (ftpcmd(NULL, NULL, sfp) != 226)
1253 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1254 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1258 free(server.allocated);
1259 free(target.allocated);
1262 free(fname_out_alloc);
1263 free(redirected_path);
1266 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1267 int wget_main(int argc UNUSED_PARAM, char **argv)
1269 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1270 static const char wget_longopts[] ALIGN1 =
1271 /* name, has_arg, val */
1272 "continue\0" No_argument "c"
1273 //FIXME: -s isn't --spider, it's --save-headers!
1274 "spider\0" No_argument "s"
1275 "quiet\0" No_argument "q"
1276 "output-document\0" Required_argument "O"
1277 "directory-prefix\0" Required_argument "P"
1278 "proxy\0" Required_argument "Y"
1279 "user-agent\0" Required_argument "U"
1280 IF_FEATURE_WGET_TIMEOUT(
1281 "timeout\0" Required_argument "T")
1283 IF_DESKTOP( "tries\0" Required_argument "t")
1284 "header\0" Required_argument "\xff"
1285 "post-data\0" Required_argument "\xfe"
1286 /* Ignored (we always use PASV): */
1287 IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
1288 /* Ignored (we don't do ssl) */
1289 IF_DESKTOP( "no-check-certificate\0" No_argument "\xf0")
1290 /* Ignored (we don't support caching) */
1291 IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1292 IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1293 IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1294 IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1295 IF_DESKTOP( "no-parent\0" No_argument "\xf0")
1299 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1300 llist_t *headers_llist = NULL;
1305 #if ENABLE_FEATURE_WGET_TIMEOUT
1306 G.timeout_seconds = 900;
1307 signal(SIGALRM, alarm_handler);
1309 G.proxy_flag = "on"; /* use proxies if env vars are set */
1310 G.user_agent = "Wget"; /* "User-Agent" header field */
1312 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1313 applet_long_options = wget_longopts;
1315 opt_complementary = "-1" /* at least one URL */
1316 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::"); /* --header is a list */
1317 getopt32(argv, "csqO:P:Y:U:T:+"
1320 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1321 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1322 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1323 * -nH --no-host-directories: wget -r http://host/ won't create host/
1325 * "n::" above says that we accept -n[ARG].
1326 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1328 , &G.fname_out, &G.dir_prefix,
1329 &G.proxy_flag, &G.user_agent,
1330 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1331 NULL, /* -t RETRIES */
1333 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1334 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1338 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1339 if (headers_llist) {
1342 llist_t *ll = headers_llist;
1344 size += strlen(ll->data) + 2;
1347 G.extra_headers = hdr = xmalloc(size + 1);
1348 while (headers_llist) {
1352 size = sprintf(hdr, "%s\r\n",
1353 (char*)llist_pop(&headers_llist));
1354 /* a bit like index_in_substrings but don't match full key */
1356 words = wget_user_headers;
1358 if (strstr(hdr, words) == hdr) {
1359 G.user_headers |= bit;
1363 words += strlen(words) + 1;
1371 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1372 if (G.fname_out) { /* -O FILE ? */
1373 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1375 option_mask32 &= ~WGET_OPT_CONTINUE;
1377 /* compat with wget: -O FILE can overwrite */
1378 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1382 download_one_url(*argv++);
1384 if (G.output_fd >= 0)
1385 xclose(G.output_fd);
1387 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1388 free(G.extra_headers);
1392 return EXIT_SUCCESS;