1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
16 //config: wget is a utility for non-interactive download of files from HTTP
17 //config: and FTP servers.
19 //config:config FEATURE_WGET_STATUSBAR
20 //config: bool "Enable a nifty process meter (+2k)"
22 //config: depends on WGET
24 //config: Enable the transfer progress bar for wget transfers.
26 //config:config FEATURE_WGET_AUTHENTICATION
27 //config: bool "Enable HTTP authentication"
29 //config: depends on WGET
31 //config: Support authenticated HTTP transfers.
33 //config:config FEATURE_WGET_LONG_OPTIONS
34 //config: bool "Enable long options"
36 //config: depends on WGET && LONG_OPTS
38 //config: Support long options for the wget applet.
40 //config:config FEATURE_WGET_TIMEOUT
41 //config: bool "Enable timeout option -T SEC"
43 //config: depends on WGET
45 //config: Supports network read and connect timeouts for wget,
46 //config: so that wget will give up and timeout, through the -T
47 //config: command line option.
49 //config: Currently only connect and network data read timeout are
50 //config: supported (i.e., timeout is not applied to the DNS query). When
51 //config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
52 //config: will work in addition to -T.
54 //config:config FEATURE_WGET_OPENSSL
55 //config: bool "Try to connect to HTTPS using openssl"
57 //config: depends on WGET
59 //config: Choose how wget establishes SSL connection for https:// URLs.
61 //config: Busybox itself contains no SSL code. wget will spawn
62 //config: a helper program to talk over HTTPS.
64 //config: OpenSSL has a simple SSL client for debug purposes.
65 //config: If you select "openssl" helper, wget will effectively call
66 //config: "openssl s_client -quiet -connect IP:443 2>/dev/null"
67 //config: and pipe its data through it.
68 //config: Note inconvenient API: host resolution is done twice,
69 //config: and there is no guarantee openssl's idea of IPv6 address
70 //config: format is the same as ours.
71 //config: Another problem is that s_client prints debug information
72 //config: to stderr, and it needs to be suppressed. This means
73 //config: all error messages get suppressed too.
74 //config: openssl is also a big binary, often dynamically linked
75 //config: against ~15 libraries.
77 //config:config FEATURE_WGET_SSL_HELPER
78 //config: bool "Try to connect to HTTPS using ssl_helper"
80 //config: depends on WGET
82 //config: Choose how wget establishes SSL connection for https:// URLs.
84 //config: Busybox itself contains no SSL code. wget will spawn
85 //config: a helper program to talk over HTTPS.
87 //config: ssl_helper is a tool which can be built statically
88 //config: from busybox sources against a small embedded SSL library.
89 //config: Please see networking/ssl_helper/README.
90 //config: It does not require double host resolution and emits
91 //config: error messages to stderr.
93 //config: Precompiled static binary may be available at
94 //config: http://busybox.net/downloads/binaries/
96 //applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
98 //kbuild:lib-$(CONFIG_WGET) += wget.o
100 //usage:#define wget_trivial_usage
101 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
102 //usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
103 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
104 /* Since we ignore these opts, we don't show them in --help */
105 /* //usage: " [--no-check-certificate] [--no-cache]" */
106 //usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
108 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
109 //usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
110 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
112 //usage:#define wget_full_usage "\n\n"
113 //usage: "Retrieve files via HTTP or FTP\n"
114 //usage: "\n -s Spider mode - only check file existence"
115 //usage: "\n -c Continue retrieval of aborted transfer"
116 //usage: "\n -q Quiet"
117 //usage: "\n -P DIR Save to DIR (default .)"
118 //usage: IF_FEATURE_WGET_TIMEOUT(
119 //usage: "\n -T SEC Network read timeout is SEC seconds"
121 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
122 //usage: "\n -U STR Use STR for User-Agent header"
123 //usage: "\n -Y Use proxy ('on' or 'off')"
128 # define log_io(...) bb_error_msg(__VA_ARGS__)
129 # define SENDFMT(fp, fmt, ...) \
131 log_io("> " fmt, ##__VA_ARGS__); \
132 fprintf(fp, fmt, ##__VA_ARGS__); \
135 # define log_io(...) ((void)0)
136 # define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
144 const char *protocol;
148 static const char P_FTP[] = "ftp";
149 static const char P_HTTP[] = "http";
150 static const char P_HTTPS[] = "https";
152 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
153 /* User-specified headers prevent using our corresponding built-in headers. */
156 HDR_USER_AGENT = (1<<1),
158 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
159 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
161 static const char wget_user_headers[] ALIGN1 =
165 # if ENABLE_FEATURE_WGET_AUTHENTICATION
167 "Proxy-Authorization:\0"
170 # define USR_HEADER_HOST (G.user_headers & HDR_HOST)
171 # define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
172 # define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
173 # define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
174 # define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
175 #else /* No long options, no user-headers :( */
176 # define USR_HEADER_HOST 0
177 # define USR_HEADER_USER_AGENT 0
178 # define USR_HEADER_RANGE 0
179 # define USR_HEADER_AUTH 0
180 # define USR_HEADER_PROXY_AUTH 0
185 off_t content_len; /* Content-length of the file */
186 off_t beg_range; /* Range at which continue begins */
187 #if ENABLE_FEATURE_WGET_STATUSBAR
188 off_t transferred; /* Number of bytes transferred so far */
189 const char *curfile; /* Name of current file being transferred */
193 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
196 unsigned char user_headers; /* Headers mentioned by the user */
198 char *fname_out; /* where to direct output (-O) */
199 const char *proxy_flag; /* Use proxies if env vars are set */
200 const char *user_agent; /* "User-Agent" header field */
201 #if ENABLE_FEATURE_WGET_TIMEOUT
202 unsigned timeout_seconds;
207 smallint chunked; /* chunked transfer encoding */
208 smallint got_clen; /* got content-length: from server */
209 /* Local downloads do benefit from big buffer.
210 * With 512 byte buffer, it was measured to be
211 * an order of magnitude slower than with big one.
213 uint64_t just_to_align_next_member;
214 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
216 #define G (*ptr_to_globals)
217 #define INIT_G() do { \
218 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
220 #define FINI_G() do { \
221 FREE_PTR_TO_GLOBALS(); \
225 /* Must match option string! */
227 WGET_OPT_CONTINUE = (1 << 0),
228 WGET_OPT_SPIDER = (1 << 1),
229 WGET_OPT_QUIET = (1 << 2),
230 WGET_OPT_OUTNAME = (1 << 3),
231 WGET_OPT_PREFIX = (1 << 4),
232 WGET_OPT_PROXY = (1 << 5),
233 WGET_OPT_USER_AGENT = (1 << 6),
234 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
235 WGET_OPT_RETRIES = (1 << 8),
236 WGET_OPT_PASSIVE = (1 << 9),
237 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
238 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
246 #if ENABLE_FEATURE_WGET_STATUSBAR
247 static void progress_meter(int flag)
249 if (option_mask32 & WGET_OPT_QUIET)
252 if (flag == PROGRESS_START)
253 bb_progress_init(&G.pmt, G.curfile);
255 bb_progress_update(&G.pmt,
258 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
261 if (flag == PROGRESS_END) {
262 bb_progress_free(&G.pmt);
263 bb_putchar_stderr('\n');
268 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
272 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
273 * local addresses can have a scope identifier to specify the
274 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
275 * identifier is only valid on a single node.
277 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
278 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
279 * in the Host header as invalid requests, see
280 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
282 static void strip_ipv6_scope_id(char *host)
286 /* bbox wget actually handles IPv6 addresses without [], like
287 * wget "http://::1/xxx", but this is not standard.
288 * To save code, _here_ we do not support it. */
291 return; /* not IPv6 */
293 scope = strchr(host, '%');
297 /* Remove the IPv6 zone identifier from the host address */
298 cp = strchr(host, ']');
299 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
300 /* malformed address (not "[xx]:nn" or "[xx]") */
304 /* cp points to "]...", scope points to "%eth0]..." */
305 overlapping_strcpy(scope, cp);
308 #if ENABLE_FEATURE_WGET_AUTHENTICATION
309 /* Base64-encode character string. */
310 static char *base64enc(const char *str)
312 unsigned len = strlen(str);
313 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
314 len = sizeof(G.wget_buf)/4*3 - 10;
315 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
320 static char* sanitize_string(char *s)
322 unsigned char *p = (void *) s;
329 #if ENABLE_FEATURE_WGET_TIMEOUT
330 static void alarm_handler(int sig UNUSED_PARAM)
332 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
334 bb_error_msg_and_die("download timed out");
338 static FILE *open_socket(len_and_sockaddr *lsa)
343 IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
344 fd = xconnect_stream(lsa);
345 IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
347 /* glibc 2.4 seems to try seeking on it - ??! */
348 /* hopefully it understands what ESPIPE means... */
349 fp = fdopen(fd, "r+");
351 bb_perror_msg_and_die(bb_msg_memory_exhausted);
356 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
357 /* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
358 static char fgets_and_trim(FILE *fp)
363 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
364 bb_perror_msg_and_die("error getting response");
366 buf_ptr = strchrnul(G.wget_buf, '\n');
369 buf_ptr = strchrnul(G.wget_buf, '\r');
372 log_io("< %s", G.wget_buf);
377 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
383 fprintf(fp, "%s%s\r\n", s1, s2);
385 log_io("> %s%s", s1, s2);
390 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
392 G.wget_buf[3] = '\0';
393 result = xatoi_positive(G.wget_buf);
398 static void parse_url(const char *src_url, struct host_info *h)
403 h->allocated = url = xstrdup(src_url);
406 p = strstr(url, "://");
410 if (strcmp(url, P_FTP) == 0) {
411 h->port = bb_lookup_port(P_FTP, "tcp", 21);
413 if (strcmp(url, P_HTTPS) == 0) {
414 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
415 h->protocol = P_HTTPS;
417 if (strcmp(url, P_HTTP) == 0) {
419 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
420 h->protocol = P_HTTP;
423 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
426 // GNU wget is user-friendly and falls back to http://
432 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
433 // 'GET /?var=a/b HTTP 1.0'
434 // and saves 'index.html?var=a%2Fb' (we save 'b')
435 // wget 'http://busybox.net?login=john@doe':
436 // request: 'GET /?login=john@doe HTTP/1.0'
437 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
438 // wget 'http://busybox.net#test/test':
439 // request: 'GET / HTTP/1.0'
440 // saves: 'index.html' (we save 'test')
442 // We also don't add unique .N suffix if file exists...
443 sp = strchr(h->host, '/');
444 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
445 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
448 } else if (*sp == '/') {
451 } else { // '#' or '?'
452 // http://busybox.net?login=john@doe is a valid URL
453 // memmove converts to:
454 // http:/busybox.nett?login=john@doe...
455 memmove(h->host - 1, h->host, sp - h->host);
461 sp = strrchr(h->host, '@');
463 // URL-decode "user:password" string before base64-encoding:
464 // wget http://test:my%20pass@example.com should send
465 // Authorization: Basic dGVzdDpteSBwYXNz
466 // which decodes to "test:my pass".
467 // Standard wget and curl do this too.
470 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
473 /* else: h->user remains NULL, or as set by original request
474 * before redirect (if we are here after a redirect).
478 static char *gethdr(FILE *fp)
483 /* retrieve header line */
484 c = fgets_and_trim(fp);
486 /* end of the headers? */
487 if (G.wget_buf[0] == '\0')
490 /* convert the header name to lower case */
491 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
493 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
494 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
495 * "A-Z" maps to "a-z".
496 * "@[\]" can't occur in header names.
497 * "^_" maps to "~,DEL" (which is wrong).
498 * "^" was never seen yet, "_" was seen from web.archive.org
499 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
504 /* verify we are at the end of the header name */
506 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
508 /* locate the start of the header value */
510 hdrval = skip_whitespace(s);
513 /* Rats! The buffer isn't big enough to hold the entire header value */
514 while (c = getc(fp), c != EOF && c != '\n')
521 static void reset_beg_range_to_zero(void)
523 bb_error_msg("restart failed");
525 xlseek(G.output_fd, 0, SEEK_SET);
526 /* Done at the end instead: */
527 /* ftruncate(G.output_fd, 0); */
530 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
537 target->user = xstrdup("anonymous:busybox@");
539 sfp = open_socket(lsa);
540 if (ftpcmd(NULL, NULL, sfp) != 220)
541 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
544 * Splitting username:password pair,
547 str = strchr(target->user, ':');
550 switch (ftpcmd("USER ", target->user, sfp)) {
554 if (ftpcmd("PASS ", str, sfp) == 230)
556 /* fall through (failed login) */
558 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
561 ftpcmd("TYPE I", NULL, sfp);
566 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
567 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
568 if (G.content_len < 0 || errno) {
569 bb_error_msg_and_die("SIZE value is garbage");
575 * Entering passive mode
577 if (ftpcmd("PASV", NULL, sfp) != 227) {
579 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
581 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
582 // Server's IP is N1.N2.N3.N4 (we ignore it)
583 // Server's port for data connection is P1*256+P2
584 str = strrchr(G.wget_buf, ')');
585 if (str) str[0] = '\0';
586 str = strrchr(G.wget_buf, ',');
587 if (!str) goto pasv_error;
588 port = xatou_range(str+1, 0, 255);
590 str = strrchr(G.wget_buf, ',');
591 if (!str) goto pasv_error;
592 port += xatou_range(str+1, 0, 255) * 256;
593 set_nport(&lsa->u.sa, htons(port));
595 *dfpp = open_socket(lsa);
597 if (G.beg_range != 0) {
598 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
599 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
600 G.content_len -= G.beg_range;
602 reset_beg_range_to_zero();
605 if (ftpcmd("RETR ", target->path, sfp) > 150)
606 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
611 #if ENABLE_FEATURE_WGET_OPENSSL
612 static int spawn_https_helper_openssl(const char *host, unsigned port)
614 char *allocated = NULL;
617 IF_FEATURE_WGET_SSL_HELPER(volatile int child_failed = 0;)
619 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
620 /* Kernel can have AF_UNIX support disabled */
621 bb_perror_msg_and_die("socketpair");
623 if (!strchr(host, ':'))
624 host = allocated = xasprintf("%s:%u", host, port);
636 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
637 * It prints some debug stuff on stderr, don't know how to suppress it.
638 * Work around by dev-nulling stderr. We lose all error messages :(
641 xopen("/dev/null", O_RDWR);
642 argv[0] = (char*)"openssl";
643 argv[1] = (char*)"s_client";
644 argv[2] = (char*)"-quiet";
645 argv[3] = (char*)"-connect";
646 argv[4] = (char*)host;
648 BB_EXECVP(argv[0], argv);
650 # if ENABLE_FEATURE_WGET_SSL_HELPER
654 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
662 # if ENABLE_FEATURE_WGET_SSL_HELPER
672 /* See networking/ssl_helper/README how to build one */
673 #if ENABLE_FEATURE_WGET_SSL_HELPER
674 static void spawn_https_helper_small(int network_fd)
679 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
680 /* Kernel can have AF_UNIX support disabled */
681 bb_perror_msg_and_die("socketpair");
683 pid = BB_MMU ? xfork() : xvfork();
691 xmove_fd(network_fd, 3);
693 * A simple ssl/tls helper
695 argv[0] = (char*)"ssl_helper";
696 argv[1] = (char*)"-d3";
698 BB_EXECVP(argv[0], argv);
699 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
705 xmove_fd(sp[0], network_fd);
709 static void NOINLINE retrieve_file_data(FILE *dfp)
711 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
712 # if ENABLE_FEATURE_WGET_TIMEOUT
713 unsigned second_cnt = G.timeout_seconds;
715 struct pollfd polldata;
717 polldata.fd = fileno(dfp);
718 polldata.events = POLLIN | POLLPRI;
720 progress_meter(PROGRESS_START);
725 /* Loops only if chunked */
728 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
729 /* Must use nonblocking I/O, otherwise fread will loop
730 * and *block* until it reads full buffer,
731 * which messes up progress bar and/or timeout logic.
732 * Because of nonblocking I/O, we need to dance
733 * very carefully around EAGAIN. See explanation at
736 ndelay_on(polldata.fd);
742 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
743 /* fread internally uses read loop, which in our case
744 * is usually exited when we get EAGAIN.
745 * In this case, libc sets error marker on the stream.
746 * Need to clear it before next fread to avoid possible
747 * rare false positive ferror below. Rare because usually
748 * fread gets more than zero bytes, and we don't fall
749 * into if (n <= 0) ...
754 rdsz = sizeof(G.wget_buf);
756 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
757 if ((int)G.content_len <= 0)
759 rdsz = (unsigned)G.content_len;
762 n = fread(G.wget_buf, 1, rdsz, dfp);
765 xwrite(G.output_fd, G.wget_buf, n);
766 #if ENABLE_FEATURE_WGET_STATUSBAR
771 if (G.content_len == 0)
774 #if ENABLE_FEATURE_WGET_TIMEOUT
775 second_cnt = G.timeout_seconds;
782 * If error occurs, or EOF is reached, the return value
783 * is a short item count (or zero).
784 * fread does not distinguish between EOF and error.
786 if (errno != EAGAIN) {
788 progress_meter(PROGRESS_END);
789 bb_perror_msg_and_die(bb_msg_read_error);
791 break; /* EOF, not error */
794 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
795 /* It was EAGAIN. There is no data. Wait up to one second
796 * then abort if timed out, or update the bar and try reading again.
798 if (safe_poll(&polldata, 1, 1000) == 0) {
799 # if ENABLE_FEATURE_WGET_TIMEOUT
800 if (second_cnt != 0 && --second_cnt == 0) {
801 progress_meter(PROGRESS_END);
802 bb_error_msg_and_die("download timed out");
805 /* We used to loop back to poll here,
806 * but there is no great harm in letting fread
807 * to try reading anyway.
812 /* Need to do it _every_ second for "stalled" indicator
813 * to be shown properly.
815 progress_meter(PROGRESS_BUMP);
816 } /* while (reading data) */
818 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
820 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
825 fgets_and_trim(dfp); /* Eat empty line */
828 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
829 /* FIXME: error check? */
830 if (G.content_len == 0)
831 break; /* all done! */
834 * Note that fgets may result in some data being buffered in dfp.
835 * We loop back to fread, which will retrieve this data.
836 * Also note that code has to be arranged so that fread
837 * is done _before_ one-second poll wait - poll doesn't know
838 * about stdio buffering and can result in spurious one second waits!
842 /* If -c failed, we restart from the beginning,
843 * but we do not truncate file then, we do it only now, at the end.
844 * This lets user to ^C if his 99% complete 10 GB file download
845 * failed to restart *without* losing the almost complete file.
848 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
849 if (pos != (off_t)-1)
850 ftruncate(G.output_fd, pos);
853 /* Draw full bar and free its resources */
854 G.chunked = 0; /* makes it show 100% even for chunked download */
855 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
856 progress_meter(PROGRESS_END);
859 static void download_one_url(const char *url)
861 bool use_proxy; /* Use proxies if env vars are set */
863 len_and_sockaddr *lsa;
864 FILE *sfp; /* socket to web/ftp server */
865 FILE *dfp; /* socket to ftp server (data) */
867 char *fname_out_alloc;
868 char *redirected_path = NULL;
869 struct host_info server;
870 struct host_info target;
872 server.allocated = NULL;
873 target.allocated = NULL;
877 parse_url(url, &target);
879 /* Use the proxy if necessary */
880 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
882 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
883 //FIXME: what if protocol is https? Ok to use http_proxy?
884 use_proxy = (proxy && proxy[0]);
886 parse_url(proxy, &server);
889 server.port = target.port;
890 if (ENABLE_FEATURE_IPV6) {
891 //free(server.allocated); - can't be non-NULL
892 server.host = server.allocated = xstrdup(target.host);
894 server.host = target.host;
898 if (ENABLE_FEATURE_IPV6)
899 strip_ipv6_scope_id(target.host);
901 /* If there was no -O FILE, guess output filename */
902 fname_out_alloc = NULL;
903 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
904 G.fname_out = bb_get_last_path_component_nostrip(target.path);
905 /* handle "wget http://kernel.org//" */
906 if (G.fname_out[0] == '/' || !G.fname_out[0])
907 G.fname_out = (char*)"index.html";
908 /* -P DIR is considered only if there was no -O FILE */
910 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
912 /* redirects may free target.path later, need to make a copy */
913 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
916 #if ENABLE_FEATURE_WGET_STATUSBAR
917 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
920 /* Determine where to start transfer */
922 if (option_mask32 & WGET_OPT_CONTINUE) {
923 G.output_fd = open(G.fname_out, O_WRONLY);
924 if (G.output_fd >= 0) {
925 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
927 /* File doesn't exist. We do not create file here yet.
928 * We are not sure it exists on remote side */
933 lsa = xhost2sockaddr(server.host, server.port);
934 if (!(option_mask32 & WGET_OPT_QUIET)) {
935 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
936 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
940 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
943 if (use_proxy || target.protocol != P_FTP) {
950 /* Open socket to http(s) server */
951 #if ENABLE_FEATURE_WGET_OPENSSL
952 /* openssl (and maybe ssl_helper) support is configured */
953 if (target.protocol == P_HTTPS) {
954 /* openssl-based helper
955 * Inconvenient API since we can't give it an open fd
957 int fd = spawn_https_helper_openssl(server.host, server.port);
958 # if ENABLE_FEATURE_WGET_SSL_HELPER
959 if (fd < 0) { /* no openssl? try ssl_helper */
960 sfp = open_socket(lsa);
961 spawn_https_helper_small(fileno(sfp));
965 /* We don't check for exec("openssl") failure in this case */
967 sfp = fdopen(fd, "r+");
969 bb_perror_msg_and_die(bb_msg_memory_exhausted);
972 sfp = open_socket(lsa);
974 #elif ENABLE_FEATURE_WGET_SSL_HELPER
975 /* Only ssl_helper support is configured */
976 sfp = open_socket(lsa);
977 if (target.protocol == P_HTTPS)
978 spawn_https_helper_small(fileno(sfp));
980 /* ssl (https) support is not configured */
981 sfp = open_socket(lsa);
983 /* Send HTTP request */
985 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
986 target.protocol, target.host,
989 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
990 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
993 if (!USR_HEADER_HOST)
994 SENDFMT(sfp, "Host: %s\r\n", target.host);
995 if (!USR_HEADER_USER_AGENT)
996 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
998 /* Ask server to close the connection as soon as we are done
999 * (IOW: we do not intend to send more requests)
1001 SENDFMT(sfp, "Connection: close\r\n");
1003 #if ENABLE_FEATURE_WGET_AUTHENTICATION
1004 if (target.user && !USR_HEADER_AUTH) {
1005 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
1006 base64enc(target.user));
1008 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1009 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
1010 base64enc(server.user));
1014 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1015 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
1017 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1018 if (G.extra_headers) {
1019 log_io(G.extra_headers);
1020 fputs(G.extra_headers, sfp);
1023 if (option_mask32 & WGET_OPT_POST_DATA) {
1025 "Content-Type: application/x-www-form-urlencoded\r\n"
1026 "Content-Length: %u\r\n"
1029 (int) strlen(G.post_data), G.post_data
1034 SENDFMT(sfp, "\r\n");
1040 * Retrieve HTTP response line and check for "200" status code.
1043 fgets_and_trim(sfp);
1046 str = skip_non_whitespace(str);
1047 str = skip_whitespace(str);
1048 // FIXME: no error check
1049 // xatou wouldn't work: "200 OK"
1054 while (gethdr(sfp) != NULL)
1055 /* eat all remaining headers */;
1059 Response 204 doesn't say "null file", it says "metadata
1060 has changed but data didn't":
1062 "10.2.5 204 No Content
1063 The server has fulfilled the request but does not need to return
1064 an entity-body, and might want to return updated metainformation.
1065 The response MAY include new or updated metainformation in the form
1066 of entity-headers, which if present SHOULD be associated with
1067 the requested variant.
1069 If the client is a user agent, it SHOULD NOT change its document
1070 view from that which caused the request to be sent. This response
1071 is primarily intended to allow input for actions to take place
1072 without causing a change to the user agent's active document view,
1073 although any new or updated metainformation SHOULD be applied
1074 to the document currently in the user agent's active view.
1076 The 204 response MUST NOT include a message-body, and thus
1077 is always terminated by the first empty line after the header fields."
1079 However, in real world it was observed that some web servers
1080 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1083 if (G.beg_range != 0) {
1084 /* "Range:..." was not honored by the server.
1085 * Restart download from the beginning.
1087 reset_beg_range_to_zero();
1090 case 300: /* redirection */
1095 case 206: /* Partial Content */
1096 if (G.beg_range != 0)
1097 /* "Range:..." worked. Good. */
1099 /* Partial Content even though we did not ask for it??? */
1102 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
1106 * Retrieve HTTP headers.
1108 while ((str = gethdr(sfp)) != NULL) {
1109 static const char keywords[] ALIGN1 =
1110 "content-length\0""transfer-encoding\0""location\0";
1112 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1116 /* gethdr converted "FOO:" string to lowercase */
1118 /* strip trailing whitespace */
1119 char *s = strchrnul(str, '\0') - 1;
1120 while (s >= str && (*s == ' ' || *s == '\t')) {
1124 key = index_in_strings(keywords, G.wget_buf) + 1;
1125 if (key == KEY_content_length) {
1126 G.content_len = BB_STRTOOFF(str, NULL, 10);
1127 if (G.content_len < 0 || errno) {
1128 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
1133 if (key == KEY_transfer_encoding) {
1134 if (strcmp(str_tolower(str), "chunked") != 0)
1135 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
1138 if (key == KEY_location && status >= 300) {
1139 if (--redir_limit == 0)
1140 bb_error_msg_and_die("too many redirections");
1142 if (str[0] == '/') {
1143 free(redirected_path);
1144 target.path = redirected_path = xstrdup(str+1);
1145 /* lsa stays the same: it's on the same server */
1147 parse_url(str, &target);
1149 /* server.user remains untouched */
1150 free(server.allocated);
1151 server.allocated = NULL;
1152 server.host = target.host;
1153 /* strip_ipv6_scope_id(target.host); - no! */
1154 /* we assume remote never gives us IPv6 addr with scope id */
1155 server.port = target.port;
1158 } /* else: lsa stays the same: we use proxy */
1160 goto establish_session;
1163 // if (status >= 300)
1164 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
1166 /* For HTTP, data is pumped over the same connection */
1172 sfp = prepare_ftp_session(&dfp, &target, lsa);
1177 if (!(option_mask32 & WGET_OPT_SPIDER)) {
1178 if (G.output_fd < 0)
1179 G.output_fd = xopen(G.fname_out, G.o_flags);
1180 retrieve_file_data(dfp);
1181 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1182 xclose(G.output_fd);
1188 /* It's ftp. Close data connection properly */
1190 if (ftpcmd(NULL, NULL, sfp) != 226)
1191 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1192 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1196 free(server.allocated);
1197 free(target.allocated);
1200 free(fname_out_alloc);
1201 free(redirected_path);
1204 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1205 int wget_main(int argc UNUSED_PARAM, char **argv)
1207 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1208 static const char wget_longopts[] ALIGN1 =
1209 /* name, has_arg, val */
1210 "continue\0" No_argument "c"
1211 //FIXME: -s isn't --spider, it's --save-headers!
1212 "spider\0" No_argument "s"
1213 "quiet\0" No_argument "q"
1214 "output-document\0" Required_argument "O"
1215 "directory-prefix\0" Required_argument "P"
1216 "proxy\0" Required_argument "Y"
1217 "user-agent\0" Required_argument "U"
1218 #if ENABLE_FEATURE_WGET_TIMEOUT
1219 "timeout\0" Required_argument "T"
1222 // "tries\0" Required_argument "t"
1223 /* Ignored (we always use PASV): */
1224 "passive-ftp\0" No_argument "\xff"
1225 "header\0" Required_argument "\xfe"
1226 "post-data\0" Required_argument "\xfd"
1227 /* Ignored (we don't do ssl) */
1228 "no-check-certificate\0" No_argument "\xfc"
1229 /* Ignored (we don't support caching) */
1230 "no-cache\0" No_argument "\xfb"
1234 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1235 llist_t *headers_llist = NULL;
1240 #if ENABLE_FEATURE_WGET_TIMEOUT
1241 G.timeout_seconds = 900;
1242 signal(SIGALRM, alarm_handler);
1244 G.proxy_flag = "on"; /* use proxies if env vars are set */
1245 G.user_agent = "Wget"; /* "User-Agent" header field */
1247 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1248 applet_long_options = wget_longopts;
1250 opt_complementary = "-1"
1251 IF_FEATURE_WGET_TIMEOUT(":T+")
1252 IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
1253 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
1254 &G.fname_out, &G.dir_prefix,
1255 &G.proxy_flag, &G.user_agent,
1256 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1257 NULL /* -t RETRIES */
1258 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1259 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1263 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1264 if (headers_llist) {
1267 llist_t *ll = headers_llist;
1269 size += strlen(ll->data) + 2;
1272 G.extra_headers = hdr = xmalloc(size + 1);
1273 while (headers_llist) {
1277 size = sprintf(hdr, "%s\r\n",
1278 (char*)llist_pop(&headers_llist));
1279 /* a bit like index_in_substrings but don't match full key */
1281 words = wget_user_headers;
1283 if (strstr(hdr, words) == hdr) {
1284 G.user_headers |= bit;
1288 words += strlen(words) + 1;
1296 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1297 if (G.fname_out) { /* -O FILE ? */
1298 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1300 option_mask32 &= ~WGET_OPT_CONTINUE;
1302 /* compat with wget: -O FILE can overwrite */
1303 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1307 download_one_url(*argv++);
1309 if (G.output_fd >= 0)
1310 xclose(G.output_fd);
1312 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1313 free(G.extra_headers);
1317 return EXIT_SUCCESS;