1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
16 //config: wget is a utility for non-interactive download of files from HTTP
17 //config: and FTP servers.
19 //config:config FEATURE_WGET_STATUSBAR
20 //config: bool "Enable a nifty process meter (+2k)"
22 //config: depends on WGET
24 //config: Enable the transfer progress bar for wget transfers.
26 //config:config FEATURE_WGET_AUTHENTICATION
27 //config: bool "Enable HTTP authentication"
29 //config: depends on WGET
31 //config: Support authenticated HTTP transfers.
33 //config:config FEATURE_WGET_LONG_OPTIONS
34 //config: bool "Enable long options"
36 //config: depends on WGET && LONG_OPTS
38 //config: Support long options for the wget applet.
40 //config:config FEATURE_WGET_TIMEOUT
41 //config: bool "Enable timeout option -T SEC"
43 //config: depends on WGET
45 //config: Supports network read and connect timeouts for wget,
46 //config: so that wget will give up and timeout, through the -T
47 //config: command line option.
49 //config: Currently only connect and network data read timeout are
50 //config: supported (i.e., timeout is not applied to the DNS query). When
51 //config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
52 //config: will work in addition to -T.
54 //config:config FEATURE_WGET_OPENSSL
55 //config: bool "Try to connect to HTTPS using openssl"
57 //config: depends on WGET
59 //config: Choose how wget establishes SSL connection for https:// URLs.
61 //config: Busybox itself contains no SSL code. wget will spawn
62 //config: a helper program to talk over HTTPS.
64 //config: OpenSSL has a simple SSL client for debug purposes.
65 //config: If you select "openssl" helper, wget will effectively call
66 //config: "openssl s_client -quiet -connect IP:443 2>/dev/null"
67 //config: and pipe its data through it.
68 //config: Note inconvenient API: host resolution is done twice,
69 //config: and there is no guarantee openssl's idea of IPv6 address
70 //config: format is the same as ours.
71 //config: Another problem is that s_client prints debug information
72 //config: to stderr, and it needs to be suppressed. This means
73 //config: all error messages get suppressed too.
74 //config: openssl is also a big binary, often dynamically linked
75 //config: against ~15 libraries.
77 //config:config FEATURE_WGET_SSL_HELPER
78 //config: bool "Try to connect to HTTPS using ssl_helper"
80 //config: depends on WGET
82 //config: Choose how wget establishes SSL connection for https:// URLs.
84 //config: Busybox itself contains no SSL code. wget will spawn
85 //config: a helper program to talk over HTTPS.
87 //config: ssl_helper is a tool which can be built statically
88 //config: from busybox sources against a small embedded SSL library.
89 //config: Please see networking/ssl_helper/README.
90 //config: It does not require double host resolution and emits
91 //config: error messages to stderr.
93 //config: Precompiled static binary may be available at
94 //config: http://busybox.net/downloads/binaries/
96 //applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
98 //kbuild:lib-$(CONFIG_WGET) += wget.o
100 //usage:#define wget_trivial_usage
101 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
102 //usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
103 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
104 /* Since we ignore these opts, we don't show them in --help */
105 /* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
106 /* //usage: " [-nv] [-nc] [-nH] [-np]" */
107 //usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
109 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
110 //usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
111 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
113 //usage:#define wget_full_usage "\n\n"
114 //usage: "Retrieve files via HTTP or FTP\n"
115 //usage: "\n -s Spider mode - only check file existence"
116 //usage: "\n -c Continue retrieval of aborted transfer"
117 //usage: "\n -q Quiet"
118 //usage: "\n -P DIR Save to DIR (default .)"
119 //usage: IF_FEATURE_WGET_TIMEOUT(
120 //usage: "\n -T SEC Network read timeout is SEC seconds"
122 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
123 //usage: "\n -U STR Use STR for User-Agent header"
124 //usage: "\n -Y Use proxy ('on' or 'off')"
129 # define log_io(...) bb_error_msg(__VA_ARGS__)
130 # define SENDFMT(fp, fmt, ...) \
132 log_io("> " fmt, ##__VA_ARGS__); \
133 fprintf(fp, fmt, ##__VA_ARGS__); \
136 # define log_io(...) ((void)0)
137 # define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
145 const char *protocol;
149 static const char P_FTP[] = "ftp";
150 static const char P_HTTP[] = "http";
151 #if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
152 static const char P_HTTPS[] = "https";
155 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
156 /* User-specified headers prevent using our corresponding built-in headers. */
159 HDR_USER_AGENT = (1<<1),
161 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
162 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
164 static const char wget_user_headers[] ALIGN1 =
168 # if ENABLE_FEATURE_WGET_AUTHENTICATION
170 "Proxy-Authorization:\0"
173 # define USR_HEADER_HOST (G.user_headers & HDR_HOST)
174 # define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
175 # define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
176 # define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
177 # define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
178 #else /* No long options, no user-headers :( */
179 # define USR_HEADER_HOST 0
180 # define USR_HEADER_USER_AGENT 0
181 # define USR_HEADER_RANGE 0
182 # define USR_HEADER_AUTH 0
183 # define USR_HEADER_PROXY_AUTH 0
188 off_t content_len; /* Content-length of the file */
189 off_t beg_range; /* Range at which continue begins */
190 #if ENABLE_FEATURE_WGET_STATUSBAR
191 off_t transferred; /* Number of bytes transferred so far */
192 const char *curfile; /* Name of current file being transferred */
196 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
199 unsigned char user_headers; /* Headers mentioned by the user */
201 char *fname_out; /* where to direct output (-O) */
202 const char *proxy_flag; /* Use proxies if env vars are set */
203 const char *user_agent; /* "User-Agent" header field */
204 #if ENABLE_FEATURE_WGET_TIMEOUT
205 unsigned timeout_seconds;
210 smallint chunked; /* chunked transfer encoding */
211 smallint got_clen; /* got content-length: from server */
212 /* Local downloads do benefit from big buffer.
213 * With 512 byte buffer, it was measured to be
214 * an order of magnitude slower than with big one.
216 uint64_t just_to_align_next_member;
217 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
219 #define G (*ptr_to_globals)
220 #define INIT_G() do { \
221 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
223 #define FINI_G() do { \
224 FREE_PTR_TO_GLOBALS(); \
228 /* Must match option string! */
230 WGET_OPT_CONTINUE = (1 << 0),
231 WGET_OPT_SPIDER = (1 << 1),
232 WGET_OPT_QUIET = (1 << 2),
233 WGET_OPT_OUTNAME = (1 << 3),
234 WGET_OPT_PREFIX = (1 << 4),
235 WGET_OPT_PROXY = (1 << 5),
236 WGET_OPT_USER_AGENT = (1 << 6),
237 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
238 WGET_OPT_RETRIES = (1 << 8),
239 WGET_OPT_PASSIVE = (1 << 9),
240 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
241 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
249 #if ENABLE_FEATURE_WGET_STATUSBAR
250 static void progress_meter(int flag)
252 if (option_mask32 & WGET_OPT_QUIET)
255 if (flag == PROGRESS_START)
256 bb_progress_init(&G.pmt, G.curfile);
258 bb_progress_update(&G.pmt,
261 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
264 if (flag == PROGRESS_END) {
265 bb_progress_free(&G.pmt);
266 bb_putchar_stderr('\n');
271 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
275 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
276 * local addresses can have a scope identifier to specify the
277 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
278 * identifier is only valid on a single node.
280 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
281 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
282 * in the Host header as invalid requests, see
283 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
285 static void strip_ipv6_scope_id(char *host)
289 /* bbox wget actually handles IPv6 addresses without [], like
290 * wget "http://::1/xxx", but this is not standard.
291 * To save code, _here_ we do not support it. */
294 return; /* not IPv6 */
296 scope = strchr(host, '%');
300 /* Remove the IPv6 zone identifier from the host address */
301 cp = strchr(host, ']');
302 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
303 /* malformed address (not "[xx]:nn" or "[xx]") */
307 /* cp points to "]...", scope points to "%eth0]..." */
308 overlapping_strcpy(scope, cp);
311 #if ENABLE_FEATURE_WGET_AUTHENTICATION
312 /* Base64-encode character string. */
313 static char *base64enc(const char *str)
315 unsigned len = strlen(str);
316 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
317 len = sizeof(G.wget_buf)/4*3 - 10;
318 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
323 static char* sanitize_string(char *s)
325 unsigned char *p = (void *) s;
332 #if ENABLE_FEATURE_WGET_TIMEOUT
333 static void alarm_handler(int sig UNUSED_PARAM)
335 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
337 bb_error_msg_and_die("download timed out");
341 static FILE *open_socket(len_and_sockaddr *lsa)
346 IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
347 fd = xconnect_stream(lsa);
348 IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
350 /* glibc 2.4 seems to try seeking on it - ??! */
351 /* hopefully it understands what ESPIPE means... */
352 fp = fdopen(fd, "r+");
354 bb_perror_msg_and_die(bb_msg_memory_exhausted);
359 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
360 /* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
361 static char fgets_and_trim(FILE *fp)
366 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
367 bb_perror_msg_and_die("error getting response");
369 buf_ptr = strchrnul(G.wget_buf, '\n');
372 buf_ptr = strchrnul(G.wget_buf, '\r');
375 log_io("< %s", G.wget_buf);
380 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
386 fprintf(fp, "%s%s\r\n", s1, s2);
388 log_io("> %s%s", s1, s2);
393 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
395 G.wget_buf[3] = '\0';
396 result = xatoi_positive(G.wget_buf);
401 static void parse_url(const char *src_url, struct host_info *h)
406 h->allocated = url = xstrdup(src_url);
409 p = strstr(url, "://");
413 if (strcmp(url, P_FTP) == 0) {
414 h->port = bb_lookup_port(P_FTP, "tcp", 21);
416 #if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
417 if (strcmp(url, P_HTTPS) == 0) {
418 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
419 h->protocol = P_HTTPS;
422 if (strcmp(url, P_HTTP) == 0) {
424 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
425 h->protocol = P_HTTP;
428 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
431 // GNU wget is user-friendly and falls back to http://
437 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
438 // 'GET /?var=a/b HTTP 1.0'
439 // and saves 'index.html?var=a%2Fb' (we save 'b')
440 // wget 'http://busybox.net?login=john@doe':
441 // request: 'GET /?login=john@doe HTTP/1.0'
442 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
443 // wget 'http://busybox.net#test/test':
444 // request: 'GET / HTTP/1.0'
445 // saves: 'index.html' (we save 'test')
447 // We also don't add unique .N suffix if file exists...
448 sp = strchr(h->host, '/');
449 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
450 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
453 } else if (*sp == '/') {
456 } else { // '#' or '?'
457 // http://busybox.net?login=john@doe is a valid URL
458 // memmove converts to:
459 // http:/busybox.nett?login=john@doe...
460 memmove(h->host - 1, h->host, sp - h->host);
466 sp = strrchr(h->host, '@');
468 // URL-decode "user:password" string before base64-encoding:
469 // wget http://test:my%20pass@example.com should send
470 // Authorization: Basic dGVzdDpteSBwYXNz
471 // which decodes to "test:my pass".
472 // Standard wget and curl do this too.
475 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
478 /* else: h->user remains NULL, or as set by original request
479 * before redirect (if we are here after a redirect).
483 static char *gethdr(FILE *fp)
488 /* retrieve header line */
489 c = fgets_and_trim(fp);
491 /* end of the headers? */
492 if (G.wget_buf[0] == '\0')
495 /* convert the header name to lower case */
496 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
498 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
499 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
500 * "A-Z" maps to "a-z".
501 * "@[\]" can't occur in header names.
502 * "^_" maps to "~,DEL" (which is wrong).
503 * "^" was never seen yet, "_" was seen from web.archive.org
504 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
509 /* verify we are at the end of the header name */
511 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
513 /* locate the start of the header value */
515 hdrval = skip_whitespace(s);
518 /* Rats! The buffer isn't big enough to hold the entire header value */
519 while (c = getc(fp), c != EOF && c != '\n')
526 static void reset_beg_range_to_zero(void)
528 bb_error_msg("restart failed");
530 xlseek(G.output_fd, 0, SEEK_SET);
531 /* Done at the end instead: */
532 /* ftruncate(G.output_fd, 0); */
535 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
542 target->user = xstrdup("anonymous:busybox@");
544 sfp = open_socket(lsa);
545 if (ftpcmd(NULL, NULL, sfp) != 220)
546 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
549 * Splitting username:password pair,
552 str = strchr(target->user, ':');
555 switch (ftpcmd("USER ", target->user, sfp)) {
559 if (ftpcmd("PASS ", str, sfp) == 230)
561 /* fall through (failed login) */
563 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
566 ftpcmd("TYPE I", NULL, sfp);
571 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
572 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
573 if (G.content_len < 0 || errno) {
574 bb_error_msg_and_die("SIZE value is garbage");
580 * Entering passive mode
582 if (ftpcmd("PASV", NULL, sfp) != 227) {
584 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
586 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
587 // Server's IP is N1.N2.N3.N4 (we ignore it)
588 // Server's port for data connection is P1*256+P2
589 str = strrchr(G.wget_buf, ')');
590 if (str) str[0] = '\0';
591 str = strrchr(G.wget_buf, ',');
592 if (!str) goto pasv_error;
593 port = xatou_range(str+1, 0, 255);
595 str = strrchr(G.wget_buf, ',');
596 if (!str) goto pasv_error;
597 port += xatou_range(str+1, 0, 255) * 256;
598 set_nport(&lsa->u.sa, htons(port));
600 *dfpp = open_socket(lsa);
602 if (G.beg_range != 0) {
603 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
604 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
605 G.content_len -= G.beg_range;
607 reset_beg_range_to_zero();
610 if (ftpcmd("RETR ", target->path, sfp) > 150)
611 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
616 #if ENABLE_FEATURE_WGET_OPENSSL
617 static int spawn_https_helper_openssl(const char *host, unsigned port)
619 char *allocated = NULL;
622 IF_FEATURE_WGET_SSL_HELPER(volatile int child_failed = 0;)
624 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
625 /* Kernel can have AF_UNIX support disabled */
626 bb_perror_msg_and_die("socketpair");
628 if (!strchr(host, ':'))
629 host = allocated = xasprintf("%s:%u", host, port);
641 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
642 * It prints some debug stuff on stderr, don't know how to suppress it.
643 * Work around by dev-nulling stderr. We lose all error messages :(
646 xopen("/dev/null", O_RDWR);
647 argv[0] = (char*)"openssl";
648 argv[1] = (char*)"s_client";
649 argv[2] = (char*)"-quiet";
650 argv[3] = (char*)"-connect";
651 argv[4] = (char*)host;
653 BB_EXECVP(argv[0], argv);
655 # if ENABLE_FEATURE_WGET_SSL_HELPER
659 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
667 # if ENABLE_FEATURE_WGET_SSL_HELPER
677 /* See networking/ssl_helper/README how to build one */
678 #if ENABLE_FEATURE_WGET_SSL_HELPER
679 static void spawn_https_helper_small(int network_fd)
684 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
685 /* Kernel can have AF_UNIX support disabled */
686 bb_perror_msg_and_die("socketpair");
688 pid = BB_MMU ? xfork() : xvfork();
696 xmove_fd(network_fd, 3);
698 * A simple ssl/tls helper
700 argv[0] = (char*)"ssl_helper";
701 argv[1] = (char*)"-d3";
703 BB_EXECVP(argv[0], argv);
704 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
710 xmove_fd(sp[0], network_fd);
714 static void NOINLINE retrieve_file_data(FILE *dfp)
716 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
717 # if ENABLE_FEATURE_WGET_TIMEOUT
718 unsigned second_cnt = G.timeout_seconds;
720 struct pollfd polldata;
722 polldata.fd = fileno(dfp);
723 polldata.events = POLLIN | POLLPRI;
725 progress_meter(PROGRESS_START);
730 /* Loops only if chunked */
733 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
734 /* Must use nonblocking I/O, otherwise fread will loop
735 * and *block* until it reads full buffer,
736 * which messes up progress bar and/or timeout logic.
737 * Because of nonblocking I/O, we need to dance
738 * very carefully around EAGAIN. See explanation at
741 ndelay_on(polldata.fd);
747 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
748 /* fread internally uses read loop, which in our case
749 * is usually exited when we get EAGAIN.
750 * In this case, libc sets error marker on the stream.
751 * Need to clear it before next fread to avoid possible
752 * rare false positive ferror below. Rare because usually
753 * fread gets more than zero bytes, and we don't fall
754 * into if (n <= 0) ...
759 rdsz = sizeof(G.wget_buf);
761 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
762 if ((int)G.content_len <= 0)
764 rdsz = (unsigned)G.content_len;
767 n = fread(G.wget_buf, 1, rdsz, dfp);
770 xwrite(G.output_fd, G.wget_buf, n);
771 #if ENABLE_FEATURE_WGET_STATUSBAR
776 if (G.content_len == 0)
779 #if ENABLE_FEATURE_WGET_TIMEOUT
780 second_cnt = G.timeout_seconds;
787 * If error occurs, or EOF is reached, the return value
788 * is a short item count (or zero).
789 * fread does not distinguish between EOF and error.
791 if (errno != EAGAIN) {
793 progress_meter(PROGRESS_END);
794 bb_perror_msg_and_die(bb_msg_read_error);
796 break; /* EOF, not error */
799 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
800 /* It was EAGAIN. There is no data. Wait up to one second
801 * then abort if timed out, or update the bar and try reading again.
803 if (safe_poll(&polldata, 1, 1000) == 0) {
804 # if ENABLE_FEATURE_WGET_TIMEOUT
805 if (second_cnt != 0 && --second_cnt == 0) {
806 progress_meter(PROGRESS_END);
807 bb_error_msg_and_die("download timed out");
810 /* We used to loop back to poll here,
811 * but there is no great harm in letting fread
812 * to try reading anyway.
817 /* Need to do it _every_ second for "stalled" indicator
818 * to be shown properly.
820 progress_meter(PROGRESS_BUMP);
821 } /* while (reading data) */
823 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
825 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
830 fgets_and_trim(dfp); /* Eat empty line */
833 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
834 /* FIXME: error check? */
835 if (G.content_len == 0)
836 break; /* all done! */
839 * Note that fgets may result in some data being buffered in dfp.
840 * We loop back to fread, which will retrieve this data.
841 * Also note that code has to be arranged so that fread
842 * is done _before_ one-second poll wait - poll doesn't know
843 * about stdio buffering and can result in spurious one second waits!
847 /* If -c failed, we restart from the beginning,
848 * but we do not truncate file then, we do it only now, at the end.
849 * This lets user to ^C if his 99% complete 10 GB file download
850 * failed to restart *without* losing the almost complete file.
853 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
854 if (pos != (off_t)-1)
855 ftruncate(G.output_fd, pos);
858 /* Draw full bar and free its resources */
859 G.chunked = 0; /* makes it show 100% even for chunked download */
860 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
861 progress_meter(PROGRESS_END);
864 static void download_one_url(const char *url)
866 bool use_proxy; /* Use proxies if env vars are set */
868 len_and_sockaddr *lsa;
869 FILE *sfp; /* socket to web/ftp server */
870 FILE *dfp; /* socket to ftp server (data) */
872 char *fname_out_alloc;
873 char *redirected_path = NULL;
874 struct host_info server;
875 struct host_info target;
877 server.allocated = NULL;
878 target.allocated = NULL;
882 parse_url(url, &target);
884 /* Use the proxy if necessary */
885 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
887 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
888 //FIXME: what if protocol is https? Ok to use http_proxy?
889 use_proxy = (proxy && proxy[0]);
891 parse_url(proxy, &server);
894 server.port = target.port;
895 if (ENABLE_FEATURE_IPV6) {
896 //free(server.allocated); - can't be non-NULL
897 server.host = server.allocated = xstrdup(target.host);
899 server.host = target.host;
903 if (ENABLE_FEATURE_IPV6)
904 strip_ipv6_scope_id(target.host);
906 /* If there was no -O FILE, guess output filename */
907 fname_out_alloc = NULL;
908 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
909 G.fname_out = bb_get_last_path_component_nostrip(target.path);
910 /* handle "wget http://kernel.org//" */
911 if (G.fname_out[0] == '/' || !G.fname_out[0])
912 G.fname_out = (char*)"index.html";
913 /* -P DIR is considered only if there was no -O FILE */
915 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
917 /* redirects may free target.path later, need to make a copy */
918 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
921 #if ENABLE_FEATURE_WGET_STATUSBAR
922 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
925 /* Determine where to start transfer */
927 if (option_mask32 & WGET_OPT_CONTINUE) {
928 G.output_fd = open(G.fname_out, O_WRONLY);
929 if (G.output_fd >= 0) {
930 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
932 /* File doesn't exist. We do not create file here yet.
933 * We are not sure it exists on remote side */
938 lsa = xhost2sockaddr(server.host, server.port);
939 if (!(option_mask32 & WGET_OPT_QUIET)) {
940 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
941 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
945 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
948 if (use_proxy || target.protocol != P_FTP) {
955 /* Open socket to http(s) server */
956 #if ENABLE_FEATURE_WGET_OPENSSL
957 /* openssl (and maybe ssl_helper) support is configured */
958 if (target.protocol == P_HTTPS) {
959 /* openssl-based helper
960 * Inconvenient API since we can't give it an open fd
962 int fd = spawn_https_helper_openssl(server.host, server.port);
963 # if ENABLE_FEATURE_WGET_SSL_HELPER
964 if (fd < 0) { /* no openssl? try ssl_helper */
965 sfp = open_socket(lsa);
966 spawn_https_helper_small(fileno(sfp));
970 /* We don't check for exec("openssl") failure in this case */
972 sfp = fdopen(fd, "r+");
974 bb_perror_msg_and_die(bb_msg_memory_exhausted);
977 sfp = open_socket(lsa);
979 #elif ENABLE_FEATURE_WGET_SSL_HELPER
980 /* Only ssl_helper support is configured */
981 sfp = open_socket(lsa);
982 if (target.protocol == P_HTTPS)
983 spawn_https_helper_small(fileno(sfp));
985 /* ssl (https) support is not configured */
986 sfp = open_socket(lsa);
988 /* Send HTTP request */
990 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
991 target.protocol, target.host,
994 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
995 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
998 if (!USR_HEADER_HOST)
999 SENDFMT(sfp, "Host: %s\r\n", target.host);
1000 if (!USR_HEADER_USER_AGENT)
1001 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
1003 /* Ask server to close the connection as soon as we are done
1004 * (IOW: we do not intend to send more requests)
1006 SENDFMT(sfp, "Connection: close\r\n");
1008 #if ENABLE_FEATURE_WGET_AUTHENTICATION
1009 if (target.user && !USR_HEADER_AUTH) {
1010 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
1011 base64enc(target.user));
1013 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1014 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
1015 base64enc(server.user));
1019 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1020 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
1022 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1023 if (G.extra_headers) {
1024 log_io(G.extra_headers);
1025 fputs(G.extra_headers, sfp);
1028 if (option_mask32 & WGET_OPT_POST_DATA) {
1030 "Content-Type: application/x-www-form-urlencoded\r\n"
1031 "Content-Length: %u\r\n"
1034 (int) strlen(G.post_data), G.post_data
1039 SENDFMT(sfp, "\r\n");
1045 * Retrieve HTTP response line and check for "200" status code.
1048 fgets_and_trim(sfp);
1051 str = skip_non_whitespace(str);
1052 str = skip_whitespace(str);
1053 // FIXME: no error check
1054 // xatou wouldn't work: "200 OK"
1059 while (gethdr(sfp) != NULL)
1060 /* eat all remaining headers */;
1064 Response 204 doesn't say "null file", it says "metadata
1065 has changed but data didn't":
1067 "10.2.5 204 No Content
1068 The server has fulfilled the request but does not need to return
1069 an entity-body, and might want to return updated metainformation.
1070 The response MAY include new or updated metainformation in the form
1071 of entity-headers, which if present SHOULD be associated with
1072 the requested variant.
1074 If the client is a user agent, it SHOULD NOT change its document
1075 view from that which caused the request to be sent. This response
1076 is primarily intended to allow input for actions to take place
1077 without causing a change to the user agent's active document view,
1078 although any new or updated metainformation SHOULD be applied
1079 to the document currently in the user agent's active view.
1081 The 204 response MUST NOT include a message-body, and thus
1082 is always terminated by the first empty line after the header fields."
1084 However, in real world it was observed that some web servers
1085 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1088 if (G.beg_range != 0) {
1089 /* "Range:..." was not honored by the server.
1090 * Restart download from the beginning.
1092 reset_beg_range_to_zero();
1095 case 300: /* redirection */
1100 case 206: /* Partial Content */
1101 if (G.beg_range != 0)
1102 /* "Range:..." worked. Good. */
1104 /* Partial Content even though we did not ask for it??? */
1107 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
1111 * Retrieve HTTP headers.
1113 while ((str = gethdr(sfp)) != NULL) {
1114 static const char keywords[] ALIGN1 =
1115 "content-length\0""transfer-encoding\0""location\0";
1117 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1121 /* gethdr converted "FOO:" string to lowercase */
1123 /* strip trailing whitespace */
1124 char *s = strchrnul(str, '\0') - 1;
1125 while (s >= str && (*s == ' ' || *s == '\t')) {
1129 key = index_in_strings(keywords, G.wget_buf) + 1;
1130 if (key == KEY_content_length) {
1131 G.content_len = BB_STRTOOFF(str, NULL, 10);
1132 if (G.content_len < 0 || errno) {
1133 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
1138 if (key == KEY_transfer_encoding) {
1139 if (strcmp(str_tolower(str), "chunked") != 0)
1140 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
1143 if (key == KEY_location && status >= 300) {
1144 if (--redir_limit == 0)
1145 bb_error_msg_and_die("too many redirections");
1147 if (str[0] == '/') {
1148 free(redirected_path);
1149 target.path = redirected_path = xstrdup(str+1);
1150 /* lsa stays the same: it's on the same server */
1152 parse_url(str, &target);
1154 /* server.user remains untouched */
1155 free(server.allocated);
1156 server.allocated = NULL;
1157 server.host = target.host;
1158 /* strip_ipv6_scope_id(target.host); - no! */
1159 /* we assume remote never gives us IPv6 addr with scope id */
1160 server.port = target.port;
1163 } /* else: lsa stays the same: we use proxy */
1165 goto establish_session;
1168 // if (status >= 300)
1169 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
1171 /* For HTTP, data is pumped over the same connection */
1177 sfp = prepare_ftp_session(&dfp, &target, lsa);
1182 if (!(option_mask32 & WGET_OPT_SPIDER)) {
1183 if (G.output_fd < 0)
1184 G.output_fd = xopen(G.fname_out, G.o_flags);
1185 retrieve_file_data(dfp);
1186 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1187 xclose(G.output_fd);
1193 /* It's ftp. Close data connection properly */
1195 if (ftpcmd(NULL, NULL, sfp) != 226)
1196 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1197 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1201 free(server.allocated);
1202 free(target.allocated);
1205 free(fname_out_alloc);
1206 free(redirected_path);
1209 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1210 int wget_main(int argc UNUSED_PARAM, char **argv)
1212 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1213 static const char wget_longopts[] ALIGN1 =
1214 /* name, has_arg, val */
1215 "continue\0" No_argument "c"
1216 //FIXME: -s isn't --spider, it's --save-headers!
1217 "spider\0" No_argument "s"
1218 "quiet\0" No_argument "q"
1219 "output-document\0" Required_argument "O"
1220 "directory-prefix\0" Required_argument "P"
1221 "proxy\0" Required_argument "Y"
1222 "user-agent\0" Required_argument "U"
1223 IF_FEATURE_WGET_TIMEOUT(
1224 "timeout\0" Required_argument "T")
1226 IF_DESKTOP( "tries\0" Required_argument "t")
1227 "header\0" Required_argument "\xff"
1228 "post-data\0" Required_argument "\xfe"
1229 /* Ignored (we always use PASV): */
1230 IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
1231 /* Ignored (we don't do ssl) */
1232 IF_DESKTOP( "no-check-certificate\0" No_argument "\xf0")
1233 /* Ignored (we don't support caching) */
1234 IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1235 IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1236 IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1237 IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1238 IF_DESKTOP( "no-parent\0" No_argument "\xf0")
1242 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1243 llist_t *headers_llist = NULL;
1248 #if ENABLE_FEATURE_WGET_TIMEOUT
1249 G.timeout_seconds = 900;
1250 signal(SIGALRM, alarm_handler);
1252 G.proxy_flag = "on"; /* use proxies if env vars are set */
1253 G.user_agent = "Wget"; /* "User-Agent" header field */
1255 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1256 applet_long_options = wget_longopts;
1258 opt_complementary = "-1" /* at least one URL */
1259 IF_FEATURE_WGET_TIMEOUT(":T+") /* -T NUM */
1260 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::"); /* --header is a list */
1261 getopt32(argv, "csqO:P:Y:U:T:"
1264 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1265 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1266 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1267 * -nH --no-host-directories: wget -r http://host/ won't create host/
1269 * "n::" above says that we accept -n[ARG].
1270 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1272 , &G.fname_out, &G.dir_prefix,
1273 &G.proxy_flag, &G.user_agent,
1274 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1275 NULL, /* -t RETRIES */
1277 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1278 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1282 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1283 if (headers_llist) {
1286 llist_t *ll = headers_llist;
1288 size += strlen(ll->data) + 2;
1291 G.extra_headers = hdr = xmalloc(size + 1);
1292 while (headers_llist) {
1296 size = sprintf(hdr, "%s\r\n",
1297 (char*)llist_pop(&headers_llist));
1298 /* a bit like index_in_substrings but don't match full key */
1300 words = wget_user_headers;
1302 if (strstr(hdr, words) == hdr) {
1303 G.user_headers |= bit;
1307 words += strlen(words) + 1;
1315 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1316 if (G.fname_out) { /* -O FILE ? */
1317 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1319 option_mask32 &= ~WGET_OPT_CONTINUE;
1321 /* compat with wget: -O FILE can overwrite */
1322 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1326 download_one_url(*argv++);
1328 if (G.output_fd >= 0)
1329 xclose(G.output_fd);
1331 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1332 free(G.extra_headers);
1336 return EXIT_SUCCESS;