1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
16 //config: wget is a utility for non-interactive download of files from HTTP
17 //config: and FTP servers.
19 //config:config FEATURE_WGET_STATUSBAR
20 //config: bool "Enable a nifty process meter (+2k)"
22 //config: depends on WGET
24 //config: Enable the transfer progress bar for wget transfers.
26 //config:config FEATURE_WGET_AUTHENTICATION
27 //config: bool "Enable HTTP authentication"
29 //config: depends on WGET
31 //config: Support authenticated HTTP transfers.
33 //config:config FEATURE_WGET_LONG_OPTIONS
34 //config: bool "Enable long options"
36 //config: depends on WGET && LONG_OPTS
38 //config: Support long options for the wget applet.
40 //config:config FEATURE_WGET_TIMEOUT
41 //config: bool "Enable timeout option -T SEC"
43 //config: depends on WGET
45 //config: Supports network read and connect timeouts for wget,
46 //config: so that wget will give up and timeout, through the -T
47 //config: command line option.
49 //config: Currently only connect and network data read timeout are
50 //config: supported (i.e., timeout is not applied to the DNS query). When
51 //config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
52 //config: will work in addition to -T.
54 //config:config FEATURE_WGET_OPENSSL
55 //config: bool "Try to connect to HTTPS using openssl"
57 //config: depends on WGET
59 //config: Choose how wget establishes SSL connection for https:// URLs.
61 //config: Busybox itself contains no SSL code. wget will spawn
62 //config: a helper program to talk over HTTPS.
64 //config: OpenSSL has a simple SSL client for debug purposes.
65 //config: If you select "openssl" helper, wget will effectively call
66 //config: "openssl s_client -quiet -connect IP:443 2>/dev/null"
67 //config: and pipe its data through it.
68 //config: Note inconvenient API: host resolution is done twice,
69 //config: and there is no guarantee openssl's idea of IPv6 address
70 //config: format is the same as ours.
71 //config: Another problem is that s_client prints debug information
72 //config: to stderr, and it needs to be suppressed. This means
73 //config: all error messages get suppressed too.
74 //config: openssl is also a big binary, often dynamically linked
75 //config: against ~15 libraries.
77 //config:config FEATURE_WGET_SSL_HELPER
78 //config: bool "Try to connect to HTTPS using ssl_helper"
80 //config: depends on WGET
82 //config: Choose how wget establishes SSL connection for https:// URLs.
84 //config: Busybox itself contains no SSL code. wget will spawn
85 //config: a helper program to talk over HTTPS.
87 //config: ssl_helper is a tool which can be built statically
88 //config: from busybox sources against a small embedded SSL library.
89 //config: Please see networking/ssl_helper/README.
90 //config: It does not require double host resolution and emits
91 //config: error messages to stderr.
93 //config: Precompiled static binary may be available at
94 //config: http://busybox.net/downloads/binaries/
96 //applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
98 //kbuild:lib-$(CONFIG_WGET) += wget.o
100 //usage:#define wget_trivial_usage
101 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
102 //usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
103 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
104 /* Since we ignore these opts, we don't show them in --help */
105 /* //usage: " [--no-check-certificate] [--no-cache]" */
106 //usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
108 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
109 //usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
110 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
112 //usage:#define wget_full_usage "\n\n"
113 //usage: "Retrieve files via HTTP or FTP\n"
114 //usage: "\n -s Spider mode - only check file existence"
115 //usage: "\n -c Continue retrieval of aborted transfer"
116 //usage: "\n -q Quiet"
117 //usage: "\n -P DIR Save to DIR (default .)"
118 //usage: IF_FEATURE_WGET_TIMEOUT(
119 //usage: "\n -T SEC Network read timeout is SEC seconds"
121 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
122 //usage: "\n -U STR Use STR for User-Agent header"
123 //usage: "\n -Y Use proxy ('on' or 'off')"
128 # define log_io(...) bb_error_msg(__VA_ARGS__)
129 # define SENDFMT(fp, fmt, ...) \
131 log_io("> " fmt, ##__VA_ARGS__); \
132 fprintf(fp, fmt, ##__VA_ARGS__); \
135 # define log_io(...) ((void)0)
136 # define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
144 const char *protocol;
148 static const char P_FTP[] = "ftp";
149 static const char P_HTTP[] = "http";
150 #if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
151 static const char P_HTTPS[] = "https";
154 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
155 /* User-specified headers prevent using our corresponding built-in headers. */
158 HDR_USER_AGENT = (1<<1),
160 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
161 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
163 static const char wget_user_headers[] ALIGN1 =
167 # if ENABLE_FEATURE_WGET_AUTHENTICATION
169 "Proxy-Authorization:\0"
172 # define USR_HEADER_HOST (G.user_headers & HDR_HOST)
173 # define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
174 # define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
175 # define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
176 # define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
177 #else /* No long options, no user-headers :( */
178 # define USR_HEADER_HOST 0
179 # define USR_HEADER_USER_AGENT 0
180 # define USR_HEADER_RANGE 0
181 # define USR_HEADER_AUTH 0
182 # define USR_HEADER_PROXY_AUTH 0
187 off_t content_len; /* Content-length of the file */
188 off_t beg_range; /* Range at which continue begins */
189 #if ENABLE_FEATURE_WGET_STATUSBAR
190 off_t transferred; /* Number of bytes transferred so far */
191 const char *curfile; /* Name of current file being transferred */
195 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
198 unsigned char user_headers; /* Headers mentioned by the user */
200 char *fname_out; /* where to direct output (-O) */
201 const char *proxy_flag; /* Use proxies if env vars are set */
202 const char *user_agent; /* "User-Agent" header field */
203 #if ENABLE_FEATURE_WGET_TIMEOUT
204 unsigned timeout_seconds;
209 smallint chunked; /* chunked transfer encoding */
210 smallint got_clen; /* got content-length: from server */
211 /* Local downloads do benefit from big buffer.
212 * With 512 byte buffer, it was measured to be
213 * an order of magnitude slower than with big one.
215 uint64_t just_to_align_next_member;
216 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
218 #define G (*ptr_to_globals)
219 #define INIT_G() do { \
220 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
222 #define FINI_G() do { \
223 FREE_PTR_TO_GLOBALS(); \
227 /* Must match option string! */
229 WGET_OPT_CONTINUE = (1 << 0),
230 WGET_OPT_SPIDER = (1 << 1),
231 WGET_OPT_QUIET = (1 << 2),
232 WGET_OPT_OUTNAME = (1 << 3),
233 WGET_OPT_PREFIX = (1 << 4),
234 WGET_OPT_PROXY = (1 << 5),
235 WGET_OPT_USER_AGENT = (1 << 6),
236 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
237 WGET_OPT_RETRIES = (1 << 8),
238 WGET_OPT_PASSIVE = (1 << 9),
239 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
240 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
248 #if ENABLE_FEATURE_WGET_STATUSBAR
249 static void progress_meter(int flag)
251 if (option_mask32 & WGET_OPT_QUIET)
254 if (flag == PROGRESS_START)
255 bb_progress_init(&G.pmt, G.curfile);
257 bb_progress_update(&G.pmt,
260 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
263 if (flag == PROGRESS_END) {
264 bb_progress_free(&G.pmt);
265 bb_putchar_stderr('\n');
270 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
274 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
275 * local addresses can have a scope identifier to specify the
276 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
277 * identifier is only valid on a single node.
279 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
280 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
281 * in the Host header as invalid requests, see
282 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
284 static void strip_ipv6_scope_id(char *host)
288 /* bbox wget actually handles IPv6 addresses without [], like
289 * wget "http://::1/xxx", but this is not standard.
290 * To save code, _here_ we do not support it. */
293 return; /* not IPv6 */
295 scope = strchr(host, '%');
299 /* Remove the IPv6 zone identifier from the host address */
300 cp = strchr(host, ']');
301 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
302 /* malformed address (not "[xx]:nn" or "[xx]") */
306 /* cp points to "]...", scope points to "%eth0]..." */
307 overlapping_strcpy(scope, cp);
310 #if ENABLE_FEATURE_WGET_AUTHENTICATION
311 /* Base64-encode character string. */
312 static char *base64enc(const char *str)
314 unsigned len = strlen(str);
315 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
316 len = sizeof(G.wget_buf)/4*3 - 10;
317 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
322 static char* sanitize_string(char *s)
324 unsigned char *p = (void *) s;
331 #if ENABLE_FEATURE_WGET_TIMEOUT
332 static void alarm_handler(int sig UNUSED_PARAM)
334 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
336 bb_error_msg_and_die("download timed out");
340 static FILE *open_socket(len_and_sockaddr *lsa)
345 IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
346 fd = xconnect_stream(lsa);
347 IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
349 /* glibc 2.4 seems to try seeking on it - ??! */
350 /* hopefully it understands what ESPIPE means... */
351 fp = fdopen(fd, "r+");
353 bb_perror_msg_and_die(bb_msg_memory_exhausted);
358 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
359 /* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
360 static char fgets_and_trim(FILE *fp)
365 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
366 bb_perror_msg_and_die("error getting response");
368 buf_ptr = strchrnul(G.wget_buf, '\n');
371 buf_ptr = strchrnul(G.wget_buf, '\r');
374 log_io("< %s", G.wget_buf);
379 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
385 fprintf(fp, "%s%s\r\n", s1, s2);
387 log_io("> %s%s", s1, s2);
392 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
394 G.wget_buf[3] = '\0';
395 result = xatoi_positive(G.wget_buf);
400 static void parse_url(const char *src_url, struct host_info *h)
405 h->allocated = url = xstrdup(src_url);
408 p = strstr(url, "://");
412 if (strcmp(url, P_FTP) == 0) {
413 h->port = bb_lookup_port(P_FTP, "tcp", 21);
415 #if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
416 if (strcmp(url, P_HTTPS) == 0) {
417 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
418 h->protocol = P_HTTPS;
421 if (strcmp(url, P_HTTP) == 0) {
423 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
424 h->protocol = P_HTTP;
427 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
430 // GNU wget is user-friendly and falls back to http://
436 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
437 // 'GET /?var=a/b HTTP 1.0'
438 // and saves 'index.html?var=a%2Fb' (we save 'b')
439 // wget 'http://busybox.net?login=john@doe':
440 // request: 'GET /?login=john@doe HTTP/1.0'
441 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
442 // wget 'http://busybox.net#test/test':
443 // request: 'GET / HTTP/1.0'
444 // saves: 'index.html' (we save 'test')
446 // We also don't add unique .N suffix if file exists...
447 sp = strchr(h->host, '/');
448 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
449 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
452 } else if (*sp == '/') {
455 } else { // '#' or '?'
456 // http://busybox.net?login=john@doe is a valid URL
457 // memmove converts to:
458 // http:/busybox.nett?login=john@doe...
459 memmove(h->host - 1, h->host, sp - h->host);
465 sp = strrchr(h->host, '@');
467 // URL-decode "user:password" string before base64-encoding:
468 // wget http://test:my%20pass@example.com should send
469 // Authorization: Basic dGVzdDpteSBwYXNz
470 // which decodes to "test:my pass".
471 // Standard wget and curl do this too.
474 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
477 /* else: h->user remains NULL, or as set by original request
478 * before redirect (if we are here after a redirect).
482 static char *gethdr(FILE *fp)
487 /* retrieve header line */
488 c = fgets_and_trim(fp);
490 /* end of the headers? */
491 if (G.wget_buf[0] == '\0')
494 /* convert the header name to lower case */
495 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
497 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
498 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
499 * "A-Z" maps to "a-z".
500 * "@[\]" can't occur in header names.
501 * "^_" maps to "~,DEL" (which is wrong).
502 * "^" was never seen yet, "_" was seen from web.archive.org
503 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
508 /* verify we are at the end of the header name */
510 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
512 /* locate the start of the header value */
514 hdrval = skip_whitespace(s);
517 /* Rats! The buffer isn't big enough to hold the entire header value */
518 while (c = getc(fp), c != EOF && c != '\n')
525 static void reset_beg_range_to_zero(void)
527 bb_error_msg("restart failed");
529 xlseek(G.output_fd, 0, SEEK_SET);
530 /* Done at the end instead: */
531 /* ftruncate(G.output_fd, 0); */
534 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
541 target->user = xstrdup("anonymous:busybox@");
543 sfp = open_socket(lsa);
544 if (ftpcmd(NULL, NULL, sfp) != 220)
545 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
548 * Splitting username:password pair,
551 str = strchr(target->user, ':');
554 switch (ftpcmd("USER ", target->user, sfp)) {
558 if (ftpcmd("PASS ", str, sfp) == 230)
560 /* fall through (failed login) */
562 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
565 ftpcmd("TYPE I", NULL, sfp);
570 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
571 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
572 if (G.content_len < 0 || errno) {
573 bb_error_msg_and_die("SIZE value is garbage");
579 * Entering passive mode
581 if (ftpcmd("PASV", NULL, sfp) != 227) {
583 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
585 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
586 // Server's IP is N1.N2.N3.N4 (we ignore it)
587 // Server's port for data connection is P1*256+P2
588 str = strrchr(G.wget_buf, ')');
589 if (str) str[0] = '\0';
590 str = strrchr(G.wget_buf, ',');
591 if (!str) goto pasv_error;
592 port = xatou_range(str+1, 0, 255);
594 str = strrchr(G.wget_buf, ',');
595 if (!str) goto pasv_error;
596 port += xatou_range(str+1, 0, 255) * 256;
597 set_nport(&lsa->u.sa, htons(port));
599 *dfpp = open_socket(lsa);
601 if (G.beg_range != 0) {
602 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
603 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
604 G.content_len -= G.beg_range;
606 reset_beg_range_to_zero();
609 if (ftpcmd("RETR ", target->path, sfp) > 150)
610 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
615 #if ENABLE_FEATURE_WGET_OPENSSL
616 static int spawn_https_helper_openssl(const char *host, unsigned port)
618 char *allocated = NULL;
621 IF_FEATURE_WGET_SSL_HELPER(volatile int child_failed = 0;)
623 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
624 /* Kernel can have AF_UNIX support disabled */
625 bb_perror_msg_and_die("socketpair");
627 if (!strchr(host, ':'))
628 host = allocated = xasprintf("%s:%u", host, port);
640 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
641 * It prints some debug stuff on stderr, don't know how to suppress it.
642 * Work around by dev-nulling stderr. We lose all error messages :(
645 xopen("/dev/null", O_RDWR);
646 argv[0] = (char*)"openssl";
647 argv[1] = (char*)"s_client";
648 argv[2] = (char*)"-quiet";
649 argv[3] = (char*)"-connect";
650 argv[4] = (char*)host;
652 BB_EXECVP(argv[0], argv);
654 # if ENABLE_FEATURE_WGET_SSL_HELPER
658 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
666 # if ENABLE_FEATURE_WGET_SSL_HELPER
676 /* See networking/ssl_helper/README how to build one */
677 #if ENABLE_FEATURE_WGET_SSL_HELPER
678 static void spawn_https_helper_small(int network_fd)
683 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
684 /* Kernel can have AF_UNIX support disabled */
685 bb_perror_msg_and_die("socketpair");
687 pid = BB_MMU ? xfork() : xvfork();
695 xmove_fd(network_fd, 3);
697 * A simple ssl/tls helper
699 argv[0] = (char*)"ssl_helper";
700 argv[1] = (char*)"-d3";
702 BB_EXECVP(argv[0], argv);
703 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
709 xmove_fd(sp[0], network_fd);
713 static void NOINLINE retrieve_file_data(FILE *dfp)
715 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
716 # if ENABLE_FEATURE_WGET_TIMEOUT
717 unsigned second_cnt = G.timeout_seconds;
719 struct pollfd polldata;
721 polldata.fd = fileno(dfp);
722 polldata.events = POLLIN | POLLPRI;
724 progress_meter(PROGRESS_START);
729 /* Loops only if chunked */
732 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
733 /* Must use nonblocking I/O, otherwise fread will loop
734 * and *block* until it reads full buffer,
735 * which messes up progress bar and/or timeout logic.
736 * Because of nonblocking I/O, we need to dance
737 * very carefully around EAGAIN. See explanation at
740 ndelay_on(polldata.fd);
746 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
747 /* fread internally uses read loop, which in our case
748 * is usually exited when we get EAGAIN.
749 * In this case, libc sets error marker on the stream.
750 * Need to clear it before next fread to avoid possible
751 * rare false positive ferror below. Rare because usually
752 * fread gets more than zero bytes, and we don't fall
753 * into if (n <= 0) ...
758 rdsz = sizeof(G.wget_buf);
760 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
761 if ((int)G.content_len <= 0)
763 rdsz = (unsigned)G.content_len;
766 n = fread(G.wget_buf, 1, rdsz, dfp);
769 xwrite(G.output_fd, G.wget_buf, n);
770 #if ENABLE_FEATURE_WGET_STATUSBAR
775 if (G.content_len == 0)
778 #if ENABLE_FEATURE_WGET_TIMEOUT
779 second_cnt = G.timeout_seconds;
786 * If error occurs, or EOF is reached, the return value
787 * is a short item count (or zero).
788 * fread does not distinguish between EOF and error.
790 if (errno != EAGAIN) {
792 progress_meter(PROGRESS_END);
793 bb_perror_msg_and_die(bb_msg_read_error);
795 break; /* EOF, not error */
798 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
799 /* It was EAGAIN. There is no data. Wait up to one second
800 * then abort if timed out, or update the bar and try reading again.
802 if (safe_poll(&polldata, 1, 1000) == 0) {
803 # if ENABLE_FEATURE_WGET_TIMEOUT
804 if (second_cnt != 0 && --second_cnt == 0) {
805 progress_meter(PROGRESS_END);
806 bb_error_msg_and_die("download timed out");
809 /* We used to loop back to poll here,
810 * but there is no great harm in letting fread
811 * to try reading anyway.
816 /* Need to do it _every_ second for "stalled" indicator
817 * to be shown properly.
819 progress_meter(PROGRESS_BUMP);
820 } /* while (reading data) */
822 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
824 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
829 fgets_and_trim(dfp); /* Eat empty line */
832 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
833 /* FIXME: error check? */
834 if (G.content_len == 0)
835 break; /* all done! */
838 * Note that fgets may result in some data being buffered in dfp.
839 * We loop back to fread, which will retrieve this data.
840 * Also note that code has to be arranged so that fread
841 * is done _before_ one-second poll wait - poll doesn't know
842 * about stdio buffering and can result in spurious one second waits!
846 /* If -c failed, we restart from the beginning,
847 * but we do not truncate file then, we do it only now, at the end.
848 * This lets user to ^C if his 99% complete 10 GB file download
849 * failed to restart *without* losing the almost complete file.
852 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
853 if (pos != (off_t)-1)
854 ftruncate(G.output_fd, pos);
857 /* Draw full bar and free its resources */
858 G.chunked = 0; /* makes it show 100% even for chunked download */
859 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
860 progress_meter(PROGRESS_END);
863 static void download_one_url(const char *url)
865 bool use_proxy; /* Use proxies if env vars are set */
867 len_and_sockaddr *lsa;
868 FILE *sfp; /* socket to web/ftp server */
869 FILE *dfp; /* socket to ftp server (data) */
871 char *fname_out_alloc;
872 char *redirected_path = NULL;
873 struct host_info server;
874 struct host_info target;
876 server.allocated = NULL;
877 target.allocated = NULL;
881 parse_url(url, &target);
883 /* Use the proxy if necessary */
884 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
886 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
887 //FIXME: what if protocol is https? Ok to use http_proxy?
888 use_proxy = (proxy && proxy[0]);
890 parse_url(proxy, &server);
893 server.port = target.port;
894 if (ENABLE_FEATURE_IPV6) {
895 //free(server.allocated); - can't be non-NULL
896 server.host = server.allocated = xstrdup(target.host);
898 server.host = target.host;
902 if (ENABLE_FEATURE_IPV6)
903 strip_ipv6_scope_id(target.host);
905 /* If there was no -O FILE, guess output filename */
906 fname_out_alloc = NULL;
907 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
908 G.fname_out = bb_get_last_path_component_nostrip(target.path);
909 /* handle "wget http://kernel.org//" */
910 if (G.fname_out[0] == '/' || !G.fname_out[0])
911 G.fname_out = (char*)"index.html";
912 /* -P DIR is considered only if there was no -O FILE */
914 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
916 /* redirects may free target.path later, need to make a copy */
917 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
920 #if ENABLE_FEATURE_WGET_STATUSBAR
921 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
924 /* Determine where to start transfer */
926 if (option_mask32 & WGET_OPT_CONTINUE) {
927 G.output_fd = open(G.fname_out, O_WRONLY);
928 if (G.output_fd >= 0) {
929 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
931 /* File doesn't exist. We do not create file here yet.
932 * We are not sure it exists on remote side */
937 lsa = xhost2sockaddr(server.host, server.port);
938 if (!(option_mask32 & WGET_OPT_QUIET)) {
939 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
940 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
944 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
947 if (use_proxy || target.protocol != P_FTP) {
954 /* Open socket to http(s) server */
955 #if ENABLE_FEATURE_WGET_OPENSSL
956 /* openssl (and maybe ssl_helper) support is configured */
957 if (target.protocol == P_HTTPS) {
958 /* openssl-based helper
959 * Inconvenient API since we can't give it an open fd
961 int fd = spawn_https_helper_openssl(server.host, server.port);
962 # if ENABLE_FEATURE_WGET_SSL_HELPER
963 if (fd < 0) { /* no openssl? try ssl_helper */
964 sfp = open_socket(lsa);
965 spawn_https_helper_small(fileno(sfp));
969 /* We don't check for exec("openssl") failure in this case */
971 sfp = fdopen(fd, "r+");
973 bb_perror_msg_and_die(bb_msg_memory_exhausted);
976 sfp = open_socket(lsa);
978 #elif ENABLE_FEATURE_WGET_SSL_HELPER
979 /* Only ssl_helper support is configured */
980 sfp = open_socket(lsa);
981 if (target.protocol == P_HTTPS)
982 spawn_https_helper_small(fileno(sfp));
984 /* ssl (https) support is not configured */
985 sfp = open_socket(lsa);
987 /* Send HTTP request */
989 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
990 target.protocol, target.host,
993 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
994 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
997 if (!USR_HEADER_HOST)
998 SENDFMT(sfp, "Host: %s\r\n", target.host);
999 if (!USR_HEADER_USER_AGENT)
1000 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
1002 /* Ask server to close the connection as soon as we are done
1003 * (IOW: we do not intend to send more requests)
1005 SENDFMT(sfp, "Connection: close\r\n");
1007 #if ENABLE_FEATURE_WGET_AUTHENTICATION
1008 if (target.user && !USR_HEADER_AUTH) {
1009 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
1010 base64enc(target.user));
1012 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1013 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
1014 base64enc(server.user));
1018 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1019 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
1021 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1022 if (G.extra_headers) {
1023 log_io(G.extra_headers);
1024 fputs(G.extra_headers, sfp);
1027 if (option_mask32 & WGET_OPT_POST_DATA) {
1029 "Content-Type: application/x-www-form-urlencoded\r\n"
1030 "Content-Length: %u\r\n"
1033 (int) strlen(G.post_data), G.post_data
1038 SENDFMT(sfp, "\r\n");
1044 * Retrieve HTTP response line and check for "200" status code.
1047 fgets_and_trim(sfp);
1050 str = skip_non_whitespace(str);
1051 str = skip_whitespace(str);
1052 // FIXME: no error check
1053 // xatou wouldn't work: "200 OK"
1058 while (gethdr(sfp) != NULL)
1059 /* eat all remaining headers */;
1063 Response 204 doesn't say "null file", it says "metadata
1064 has changed but data didn't":
1066 "10.2.5 204 No Content
1067 The server has fulfilled the request but does not need to return
1068 an entity-body, and might want to return updated metainformation.
1069 The response MAY include new or updated metainformation in the form
1070 of entity-headers, which if present SHOULD be associated with
1071 the requested variant.
1073 If the client is a user agent, it SHOULD NOT change its document
1074 view from that which caused the request to be sent. This response
1075 is primarily intended to allow input for actions to take place
1076 without causing a change to the user agent's active document view,
1077 although any new or updated metainformation SHOULD be applied
1078 to the document currently in the user agent's active view.
1080 The 204 response MUST NOT include a message-body, and thus
1081 is always terminated by the first empty line after the header fields."
1083 However, in real world it was observed that some web servers
1084 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1087 if (G.beg_range != 0) {
1088 /* "Range:..." was not honored by the server.
1089 * Restart download from the beginning.
1091 reset_beg_range_to_zero();
1094 case 300: /* redirection */
1099 case 206: /* Partial Content */
1100 if (G.beg_range != 0)
1101 /* "Range:..." worked. Good. */
1103 /* Partial Content even though we did not ask for it??? */
1106 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
1110 * Retrieve HTTP headers.
1112 while ((str = gethdr(sfp)) != NULL) {
1113 static const char keywords[] ALIGN1 =
1114 "content-length\0""transfer-encoding\0""location\0";
1116 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1120 /* gethdr converted "FOO:" string to lowercase */
1122 /* strip trailing whitespace */
1123 char *s = strchrnul(str, '\0') - 1;
1124 while (s >= str && (*s == ' ' || *s == '\t')) {
1128 key = index_in_strings(keywords, G.wget_buf) + 1;
1129 if (key == KEY_content_length) {
1130 G.content_len = BB_STRTOOFF(str, NULL, 10);
1131 if (G.content_len < 0 || errno) {
1132 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
1137 if (key == KEY_transfer_encoding) {
1138 if (strcmp(str_tolower(str), "chunked") != 0)
1139 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
1142 if (key == KEY_location && status >= 300) {
1143 if (--redir_limit == 0)
1144 bb_error_msg_and_die("too many redirections");
1146 if (str[0] == '/') {
1147 free(redirected_path);
1148 target.path = redirected_path = xstrdup(str+1);
1149 /* lsa stays the same: it's on the same server */
1151 parse_url(str, &target);
1153 /* server.user remains untouched */
1154 free(server.allocated);
1155 server.allocated = NULL;
1156 server.host = target.host;
1157 /* strip_ipv6_scope_id(target.host); - no! */
1158 /* we assume remote never gives us IPv6 addr with scope id */
1159 server.port = target.port;
1162 } /* else: lsa stays the same: we use proxy */
1164 goto establish_session;
1167 // if (status >= 300)
1168 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
1170 /* For HTTP, data is pumped over the same connection */
1176 sfp = prepare_ftp_session(&dfp, &target, lsa);
1181 if (!(option_mask32 & WGET_OPT_SPIDER)) {
1182 if (G.output_fd < 0)
1183 G.output_fd = xopen(G.fname_out, G.o_flags);
1184 retrieve_file_data(dfp);
1185 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1186 xclose(G.output_fd);
1192 /* It's ftp. Close data connection properly */
1194 if (ftpcmd(NULL, NULL, sfp) != 226)
1195 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1196 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1200 free(server.allocated);
1201 free(target.allocated);
1204 free(fname_out_alloc);
1205 free(redirected_path);
1208 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1209 int wget_main(int argc UNUSED_PARAM, char **argv)
1211 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1212 static const char wget_longopts[] ALIGN1 =
1213 /* name, has_arg, val */
1214 "continue\0" No_argument "c"
1215 //FIXME: -s isn't --spider, it's --save-headers!
1216 "spider\0" No_argument "s"
1217 "quiet\0" No_argument "q"
1218 "output-document\0" Required_argument "O"
1219 "directory-prefix\0" Required_argument "P"
1220 "proxy\0" Required_argument "Y"
1221 "user-agent\0" Required_argument "U"
1222 #if ENABLE_FEATURE_WGET_TIMEOUT
1223 "timeout\0" Required_argument "T"
1226 // "tries\0" Required_argument "t"
1227 /* Ignored (we always use PASV): */
1228 "passive-ftp\0" No_argument "\xff"
1229 "header\0" Required_argument "\xfe"
1230 "post-data\0" Required_argument "\xfd"
1231 /* Ignored (we don't do ssl) */
1232 "no-check-certificate\0" No_argument "\xfc"
1233 /* Ignored (we don't support caching) */
1234 "no-cache\0" No_argument "\xfb"
1238 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1239 llist_t *headers_llist = NULL;
1244 #if ENABLE_FEATURE_WGET_TIMEOUT
1245 G.timeout_seconds = 900;
1246 signal(SIGALRM, alarm_handler);
1248 G.proxy_flag = "on"; /* use proxies if env vars are set */
1249 G.user_agent = "Wget"; /* "User-Agent" header field */
1251 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1252 applet_long_options = wget_longopts;
1254 opt_complementary = "-1"
1255 IF_FEATURE_WGET_TIMEOUT(":T+")
1256 IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
1257 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
1258 &G.fname_out, &G.dir_prefix,
1259 &G.proxy_flag, &G.user_agent,
1260 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1261 NULL /* -t RETRIES */
1262 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1263 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1267 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1268 if (headers_llist) {
1271 llist_t *ll = headers_llist;
1273 size += strlen(ll->data) + 2;
1276 G.extra_headers = hdr = xmalloc(size + 1);
1277 while (headers_llist) {
1281 size = sprintf(hdr, "%s\r\n",
1282 (char*)llist_pop(&headers_llist));
1283 /* a bit like index_in_substrings but don't match full key */
1285 words = wget_user_headers;
1287 if (strstr(hdr, words) == hdr) {
1288 G.user_headers |= bit;
1292 words += strlen(words) + 1;
1300 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1301 if (G.fname_out) { /* -O FILE ? */
1302 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1304 option_mask32 &= ~WGET_OPT_CONTINUE;
1306 /* compat with wget: -O FILE can overwrite */
1307 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1311 download_one_url(*argv++);
1313 if (G.output_fd >= 0)
1314 xclose(G.output_fd);
1316 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1317 free(G.extra_headers);
1321 return EXIT_SUCCESS;