1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
12 //usage:#define wget_trivial_usage
13 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage: " [--no-check-certificate] [--no-cache]" */
18 //usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
20 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
24 //usage:#define wget_full_usage "\n\n"
25 //usage: "Retrieve files via HTTP or FTP\n"
26 //usage: "\n -s Spider mode - only check file existence"
27 //usage: "\n -c Continue retrieval of aborted transfer"
28 //usage: "\n -q Quiet"
29 //usage: "\n -P DIR Save to DIR (default .)"
30 //usage: IF_FEATURE_WGET_TIMEOUT(
31 //usage: "\n -T SEC Network read timeout is SEC seconds"
33 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
34 //usage: "\n -U STR Use STR for User-Agent header"
35 //usage: "\n -Y Use proxy ('on' or 'off')"
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
42 # define log_io(...) ((void)0)
54 static const char P_FTP[] = "ftp";
55 static const char P_HTTP[] = "http";
56 static const char P_HTTPS[] = "https";
61 off_t content_len; /* Content-length of the file */
62 off_t beg_range; /* Range at which continue begins */
63 #if ENABLE_FEATURE_WGET_STATUSBAR
64 off_t transferred; /* Number of bytes transferred so far */
65 const char *curfile; /* Name of current file being transferred */
69 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
73 char *fname_out; /* where to direct output (-O) */
74 const char *proxy_flag; /* Use proxies if env vars are set */
75 const char *user_agent; /* "User-Agent" header field */
76 #if ENABLE_FEATURE_WGET_TIMEOUT
77 unsigned timeout_seconds;
82 smallint chunked; /* chunked transfer encoding */
83 smallint got_clen; /* got content-length: from server */
84 /* Local downloads do benefit from big buffer.
85 * With 512 byte buffer, it was measured to be
86 * an order of magnitude slower than with big one.
88 uint64_t just_to_align_next_member;
89 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
91 #define G (*ptr_to_globals)
92 #define INIT_G() do { \
93 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
95 #define FINI_G() do { \
96 FREE_PTR_TO_GLOBALS(); \
100 /* Must match option string! */
102 WGET_OPT_CONTINUE = (1 << 0),
103 WGET_OPT_SPIDER = (1 << 1),
104 WGET_OPT_QUIET = (1 << 2),
105 WGET_OPT_OUTNAME = (1 << 3),
106 WGET_OPT_PREFIX = (1 << 4),
107 WGET_OPT_PROXY = (1 << 5),
108 WGET_OPT_USER_AGENT = (1 << 6),
109 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
110 WGET_OPT_RETRIES = (1 << 8),
111 WGET_OPT_PASSIVE = (1 << 9),
112 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
113 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
121 #if ENABLE_FEATURE_WGET_STATUSBAR
122 static void progress_meter(int flag)
124 if (option_mask32 & WGET_OPT_QUIET)
127 if (flag == PROGRESS_START)
128 bb_progress_init(&G.pmt, G.curfile);
130 bb_progress_update(&G.pmt,
133 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
136 if (flag == PROGRESS_END) {
137 bb_progress_free(&G.pmt);
138 bb_putchar_stderr('\n');
143 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
147 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
148 * local addresses can have a scope identifier to specify the
149 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
150 * identifier is only valid on a single node.
152 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
153 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
154 * in the Host header as invalid requests, see
155 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
157 static void strip_ipv6_scope_id(char *host)
161 /* bbox wget actually handles IPv6 addresses without [], like
162 * wget "http://::1/xxx", but this is not standard.
163 * To save code, _here_ we do not support it. */
166 return; /* not IPv6 */
168 scope = strchr(host, '%');
172 /* Remove the IPv6 zone identifier from the host address */
173 cp = strchr(host, ']');
174 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
175 /* malformed address (not "[xx]:nn" or "[xx]") */
179 /* cp points to "]...", scope points to "%eth0]..." */
180 overlapping_strcpy(scope, cp);
183 #if ENABLE_FEATURE_WGET_AUTHENTICATION
184 /* Base64-encode character string. */
185 static char *base64enc(const char *str)
187 unsigned len = strlen(str);
188 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
189 len = sizeof(G.wget_buf)/4*3 - 10;
190 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
195 static char* sanitize_string(char *s)
197 unsigned char *p = (void *) s;
204 #if ENABLE_FEATURE_WGET_TIMEOUT
205 static void alarm_handler(int sig UNUSED_PARAM)
207 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
209 bb_error_msg_and_die("download timed out");
213 static FILE *open_socket(len_and_sockaddr *lsa)
218 IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
219 fd = xconnect_stream(lsa);
220 IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
222 /* glibc 2.4 seems to try seeking on it - ??! */
223 /* hopefully it understands what ESPIPE means... */
224 fp = fdopen(fd, "r+");
226 bb_perror_msg_and_die(bb_msg_memory_exhausted);
231 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
232 /* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
233 static char fgets_and_trim(FILE *fp)
238 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
239 bb_perror_msg_and_die("error getting response");
241 buf_ptr = strchrnul(G.wget_buf, '\n');
244 buf_ptr = strchrnul(G.wget_buf, '\r');
247 log_io("< %s", G.wget_buf);
252 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
258 fprintf(fp, "%s%s\r\n", s1, s2);
260 log_io("> %s%s", s1, s2);
265 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
267 G.wget_buf[3] = '\0';
268 result = xatoi_positive(G.wget_buf);
273 static void parse_url(const char *src_url, struct host_info *h)
278 h->allocated = url = xstrdup(src_url);
281 p = strstr(url, "://");
285 if (strcmp(url, P_FTP) == 0) {
286 h->port = bb_lookup_port(P_FTP, "tcp", 21);
288 if (strcmp(url, P_HTTPS) == 0) {
289 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
290 h->protocol = P_HTTPS;
292 if (strcmp(url, P_HTTP) == 0) {
294 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
295 h->protocol = P_HTTP;
298 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
301 // GNU wget is user-friendly and falls back to http://
307 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
308 // 'GET /?var=a/b HTTP 1.0'
309 // and saves 'index.html?var=a%2Fb' (we save 'b')
310 // wget 'http://busybox.net?login=john@doe':
311 // request: 'GET /?login=john@doe HTTP/1.0'
312 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
313 // wget 'http://busybox.net#test/test':
314 // request: 'GET / HTTP/1.0'
315 // saves: 'index.html' (we save 'test')
317 // We also don't add unique .N suffix if file exists...
318 sp = strchr(h->host, '/');
319 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
320 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
323 } else if (*sp == '/') {
326 } else { // '#' or '?'
327 // http://busybox.net?login=john@doe is a valid URL
328 // memmove converts to:
329 // http:/busybox.nett?login=john@doe...
330 memmove(h->host - 1, h->host, sp - h->host);
336 sp = strrchr(h->host, '@');
338 // URL-decode "user:password" string before base64-encoding:
339 // wget http://test:my%20pass@example.com should send
340 // Authorization: Basic dGVzdDpteSBwYXNz
341 // which decodes to "test:my pass".
342 // Standard wget and curl do this too.
345 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
348 /* else: h->user remains NULL, or as set by original request
349 * before redirect (if we are here after a redirect).
353 static char *gethdr(FILE *fp)
358 /* retrieve header line */
359 c = fgets_and_trim(fp);
361 /* end of the headers? */
362 if (G.wget_buf[0] == '\0')
365 /* convert the header name to lower case */
366 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
368 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
369 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
370 * "A-Z" maps to "a-z".
371 * "@[\]" can't occur in header names.
372 * "^_" maps to "~,DEL" (which is wrong).
373 * "^" was never seen yet, "_" was seen from web.archive.org
374 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
379 /* verify we are at the end of the header name */
381 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
383 /* locate the start of the header value */
385 hdrval = skip_whitespace(s);
388 /* Rats! The buffer isn't big enough to hold the entire header value */
389 while (c = getc(fp), c != EOF && c != '\n')
396 static void reset_beg_range_to_zero(void)
398 bb_error_msg("restart failed");
400 xlseek(G.output_fd, 0, SEEK_SET);
401 /* Done at the end instead: */
402 /* ftruncate(G.output_fd, 0); */
405 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
412 target->user = xstrdup("anonymous:busybox@");
414 sfp = open_socket(lsa);
415 if (ftpcmd(NULL, NULL, sfp) != 220)
416 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
419 * Splitting username:password pair,
422 str = strchr(target->user, ':');
425 switch (ftpcmd("USER ", target->user, sfp)) {
429 if (ftpcmd("PASS ", str, sfp) == 230)
431 /* fall through (failed login) */
433 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
436 ftpcmd("TYPE I", NULL, sfp);
441 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
442 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
443 if (G.content_len < 0 || errno) {
444 bb_error_msg_and_die("SIZE value is garbage");
450 * Entering passive mode
452 if (ftpcmd("PASV", NULL, sfp) != 227) {
454 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
456 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
457 // Server's IP is N1.N2.N3.N4 (we ignore it)
458 // Server's port for data connection is P1*256+P2
459 str = strrchr(G.wget_buf, ')');
460 if (str) str[0] = '\0';
461 str = strrchr(G.wget_buf, ',');
462 if (!str) goto pasv_error;
463 port = xatou_range(str+1, 0, 255);
465 str = strrchr(G.wget_buf, ',');
466 if (!str) goto pasv_error;
467 port += xatou_range(str+1, 0, 255) * 256;
468 set_nport(&lsa->u.sa, htons(port));
470 *dfpp = open_socket(lsa);
472 if (G.beg_range != 0) {
473 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
474 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
475 G.content_len -= G.beg_range;
477 reset_beg_range_to_zero();
480 if (ftpcmd("RETR ", target->path, sfp) > 150)
481 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
486 static int spawn_https_helper(const char *host, unsigned port)
488 char *allocated = NULL;
492 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
493 /* Kernel can have AF_UNIX support disabled */
494 bb_perror_msg_and_die("socketpair");
496 if (!strchr(host, ':'))
497 host = allocated = xasprintf("%s:%u", host, port);
499 pid = BB_MMU ? xfork() : xvfork();
508 * TODO: develop a tiny ssl/tls helper (using matrixssl?),
509 * try to exec it here before falling back to big fat openssl.
512 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
513 * It prints some debug stuff on stderr, don't know how to suppress it.
514 * Work around by dev-nulling stderr. We lose all error messages :(
517 xopen("/dev/null", O_RDWR);
518 argv[0] = (char*)"openssl";
519 argv[1] = (char*)"s_client";
520 argv[2] = (char*)"-quiet";
521 argv[3] = (char*)"-connect";
522 argv[4] = (char*)host;
524 BB_EXECVP(argv[0], argv);
526 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
536 /* See networking/ssl_helper/README */
540 static void spawn_https_helper1(int network_fd)
545 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
546 /* Kernel can have AF_UNIX support disabled */
547 bb_perror_msg_and_die("socketpair");
549 pid = BB_MMU ? xfork() : xvfork();
557 xmove_fd(network_fd, 3);
559 * A simple ssl/tls helper
561 argv[0] = (char*)"ssl_helper";
562 argv[1] = (char*)"-d3";
564 BB_EXECVP(argv[0], argv);
565 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
571 xmove_fd(sp[0], network_fd);
575 static void NOINLINE retrieve_file_data(FILE *dfp)
577 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
578 # if ENABLE_FEATURE_WGET_TIMEOUT
579 unsigned second_cnt = G.timeout_seconds;
581 struct pollfd polldata;
583 polldata.fd = fileno(dfp);
584 polldata.events = POLLIN | POLLPRI;
586 progress_meter(PROGRESS_START);
591 /* Loops only if chunked */
594 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
595 /* Must use nonblocking I/O, otherwise fread will loop
596 * and *block* until it reads full buffer,
597 * which messes up progress bar and/or timeout logic.
598 * Because of nonblocking I/O, we need to dance
599 * very carefully around EAGAIN. See explanation at
602 ndelay_on(polldata.fd);
608 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
609 /* fread internally uses read loop, which in our case
610 * is usually exited when we get EAGAIN.
611 * In this case, libc sets error marker on the stream.
612 * Need to clear it before next fread to avoid possible
613 * rare false positive ferror below. Rare because usually
614 * fread gets more than zero bytes, and we don't fall
615 * into if (n <= 0) ...
620 rdsz = sizeof(G.wget_buf);
622 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
623 if ((int)G.content_len <= 0)
625 rdsz = (unsigned)G.content_len;
628 n = fread(G.wget_buf, 1, rdsz, dfp);
631 xwrite(G.output_fd, G.wget_buf, n);
632 #if ENABLE_FEATURE_WGET_STATUSBAR
637 if (G.content_len == 0)
640 #if ENABLE_FEATURE_WGET_TIMEOUT
641 second_cnt = G.timeout_seconds;
648 * If error occurs, or EOF is reached, the return value
649 * is a short item count (or zero).
650 * fread does not distinguish between EOF and error.
652 if (errno != EAGAIN) {
654 progress_meter(PROGRESS_END);
655 bb_perror_msg_and_die(bb_msg_read_error);
657 break; /* EOF, not error */
660 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
661 /* It was EAGAIN. There is no data. Wait up to one second
662 * then abort if timed out, or update the bar and try reading again.
664 if (safe_poll(&polldata, 1, 1000) == 0) {
665 # if ENABLE_FEATURE_WGET_TIMEOUT
666 if (second_cnt != 0 && --second_cnt == 0) {
667 progress_meter(PROGRESS_END);
668 bb_error_msg_and_die("download timed out");
671 /* We used to loop back to poll here,
672 * but there is no great harm in letting fread
673 * to try reading anyway.
678 /* Need to do it _every_ second for "stalled" indicator
679 * to be shown properly.
681 progress_meter(PROGRESS_BUMP);
682 } /* while (reading data) */
684 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
686 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
691 fgets_and_trim(dfp); /* Eat empty line */
694 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
695 /* FIXME: error check? */
696 if (G.content_len == 0)
697 break; /* all done! */
700 * Note that fgets may result in some data being buffered in dfp.
701 * We loop back to fread, which will retrieve this data.
702 * Also note that code has to be arranged so that fread
703 * is done _before_ one-second poll wait - poll doesn't know
704 * about stdio buffering and can result in spurious one second waits!
708 /* If -c failed, we restart from the beginning,
709 * but we do not truncate file then, we do it only now, at the end.
710 * This lets user to ^C if his 99% complete 10 GB file download
711 * failed to restart *without* losing the almost complete file.
714 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
715 if (pos != (off_t)-1)
716 ftruncate(G.output_fd, pos);
719 /* Draw full bar and free its resources */
720 G.chunked = 0; /* makes it show 100% even for chunked download */
721 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
722 progress_meter(PROGRESS_END);
725 static void download_one_url(const char *url)
727 bool use_proxy; /* Use proxies if env vars are set */
729 len_and_sockaddr *lsa;
730 FILE *sfp; /* socket to web/ftp server */
731 FILE *dfp; /* socket to ftp server (data) */
733 char *fname_out_alloc;
734 char *redirected_path = NULL;
735 struct host_info server;
736 struct host_info target;
738 server.allocated = NULL;
739 target.allocated = NULL;
743 parse_url(url, &target);
745 /* Use the proxy if necessary */
746 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
748 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
749 //FIXME: what if protocol is https? Ok to use http_proxy?
750 use_proxy = (proxy && proxy[0]);
752 parse_url(proxy, &server);
755 server.port = target.port;
756 if (ENABLE_FEATURE_IPV6) {
757 //free(server.allocated); - can't be non-NULL
758 server.host = server.allocated = xstrdup(target.host);
760 server.host = target.host;
764 if (ENABLE_FEATURE_IPV6)
765 strip_ipv6_scope_id(target.host);
767 /* If there was no -O FILE, guess output filename */
768 fname_out_alloc = NULL;
769 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
770 G.fname_out = bb_get_last_path_component_nostrip(target.path);
771 /* handle "wget http://kernel.org//" */
772 if (G.fname_out[0] == '/' || !G.fname_out[0])
773 G.fname_out = (char*)"index.html";
774 /* -P DIR is considered only if there was no -O FILE */
776 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
778 /* redirects may free target.path later, need to make a copy */
779 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
782 #if ENABLE_FEATURE_WGET_STATUSBAR
783 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
786 /* Determine where to start transfer */
788 if (option_mask32 & WGET_OPT_CONTINUE) {
789 G.output_fd = open(G.fname_out, O_WRONLY);
790 if (G.output_fd >= 0) {
791 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
793 /* File doesn't exist. We do not create file here yet.
794 * We are not sure it exists on remote side */
799 lsa = xhost2sockaddr(server.host, server.port);
800 if (!(option_mask32 & WGET_OPT_QUIET)) {
801 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
802 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
806 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
809 if (use_proxy || target.protocol != P_FTP) {
816 /* Open socket to http(s) server */
817 if (target.protocol == P_HTTPS) {
818 /* openssl-based helper
819 * Inconvenient API since we can't give it an open fd
821 int fd = spawn_https_helper(server.host, server.port);
822 sfp = fdopen(fd, "r+");
824 bb_perror_msg_and_die(bb_msg_memory_exhausted);
826 sfp = open_socket(lsa);
828 if (target.protocol == P_HTTPS)
829 spawn_https_helper1(fileno(sfp));
831 /* Send HTTP request */
833 fprintf(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
834 target.protocol, target.host,
837 fprintf(sfp, "%s /%s HTTP/1.1\r\n",
838 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
842 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
843 target.host, G.user_agent);
845 /* Ask server to close the connection as soon as we are done
846 * (IOW: we do not intend to send more requests)
848 fprintf(sfp, "Connection: close\r\n");
850 #if ENABLE_FEATURE_WGET_AUTHENTICATION
852 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
853 base64enc(target.user));
855 if (use_proxy && server.user) {
856 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
857 base64enc(server.user));
861 if (G.beg_range != 0)
862 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
864 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
866 fputs(G.extra_headers, sfp);
868 if (option_mask32 & WGET_OPT_POST_DATA) {
870 "Content-Type: application/x-www-form-urlencoded\r\n"
871 "Content-Length: %u\r\n"
874 (int) strlen(G.post_data), G.post_data
879 fprintf(sfp, "\r\n");
885 * Retrieve HTTP response line and check for "200" status code.
891 str = skip_non_whitespace(str);
892 str = skip_whitespace(str);
893 // FIXME: no error check
894 // xatou wouldn't work: "200 OK"
899 while (gethdr(sfp) != NULL)
900 /* eat all remaining headers */;
904 Response 204 doesn't say "null file", it says "metadata
905 has changed but data didn't":
907 "10.2.5 204 No Content
908 The server has fulfilled the request but does not need to return
909 an entity-body, and might want to return updated metainformation.
910 The response MAY include new or updated metainformation in the form
911 of entity-headers, which if present SHOULD be associated with
912 the requested variant.
914 If the client is a user agent, it SHOULD NOT change its document
915 view from that which caused the request to be sent. This response
916 is primarily intended to allow input for actions to take place
917 without causing a change to the user agent's active document view,
918 although any new or updated metainformation SHOULD be applied
919 to the document currently in the user agent's active view.
921 The 204 response MUST NOT include a message-body, and thus
922 is always terminated by the first empty line after the header fields."
924 However, in real world it was observed that some web servers
925 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
928 if (G.beg_range != 0) {
929 /* "Range:..." was not honored by the server.
930 * Restart download from the beginning.
932 reset_beg_range_to_zero();
935 case 300: /* redirection */
940 case 206: /* Partial Content */
941 if (G.beg_range != 0)
942 /* "Range:..." worked. Good. */
944 /* Partial Content even though we did not ask for it??? */
947 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
951 * Retrieve HTTP headers.
953 while ((str = gethdr(sfp)) != NULL) {
954 static const char keywords[] ALIGN1 =
955 "content-length\0""transfer-encoding\0""location\0";
957 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
961 /* gethdr converted "FOO:" string to lowercase */
963 /* strip trailing whitespace */
964 char *s = strchrnul(str, '\0') - 1;
965 while (s >= str && (*s == ' ' || *s == '\t')) {
969 key = index_in_strings(keywords, G.wget_buf) + 1;
970 if (key == KEY_content_length) {
971 G.content_len = BB_STRTOOFF(str, NULL, 10);
972 if (G.content_len < 0 || errno) {
973 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
978 if (key == KEY_transfer_encoding) {
979 if (strcmp(str_tolower(str), "chunked") != 0)
980 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
983 if (key == KEY_location && status >= 300) {
984 if (--redir_limit == 0)
985 bb_error_msg_and_die("too many redirections");
988 free(redirected_path);
989 target.path = redirected_path = xstrdup(str+1);
990 /* lsa stays the same: it's on the same server */
992 parse_url(str, &target);
994 /* server.user remains untouched */
995 free(server.allocated);
996 server.allocated = NULL;
997 server.host = target.host;
998 /* strip_ipv6_scope_id(target.host); - no! */
999 /* we assume remote never gives us IPv6 addr with scope id */
1000 server.port = target.port;
1003 } /* else: lsa stays the same: we use proxy */
1005 goto establish_session;
1008 // if (status >= 300)
1009 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
1011 /* For HTTP, data is pumped over the same connection */
1018 sfp = prepare_ftp_session(&dfp, &target, lsa);
1023 if (!(option_mask32 & WGET_OPT_SPIDER)) {
1024 if (G.output_fd < 0)
1025 G.output_fd = xopen(G.fname_out, G.o_flags);
1026 retrieve_file_data(dfp);
1027 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1028 xclose(G.output_fd);
1034 /* It's ftp. Close data connection properly */
1036 if (ftpcmd(NULL, NULL, sfp) != 226)
1037 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1038 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1042 free(server.allocated);
1043 free(target.allocated);
1046 free(fname_out_alloc);
1047 free(redirected_path);
1050 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1051 int wget_main(int argc UNUSED_PARAM, char **argv)
1053 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1054 static const char wget_longopts[] ALIGN1 =
1055 /* name, has_arg, val */
1056 "continue\0" No_argument "c"
1057 //FIXME: -s isn't --spider, it's --save-headers!
1058 "spider\0" No_argument "s"
1059 "quiet\0" No_argument "q"
1060 "output-document\0" Required_argument "O"
1061 "directory-prefix\0" Required_argument "P"
1062 "proxy\0" Required_argument "Y"
1063 "user-agent\0" Required_argument "U"
1064 #if ENABLE_FEATURE_WGET_TIMEOUT
1065 "timeout\0" Required_argument "T"
1068 // "tries\0" Required_argument "t"
1069 /* Ignored (we always use PASV): */
1070 "passive-ftp\0" No_argument "\xff"
1071 "header\0" Required_argument "\xfe"
1072 "post-data\0" Required_argument "\xfd"
1073 /* Ignored (we don't do ssl) */
1074 "no-check-certificate\0" No_argument "\xfc"
1075 /* Ignored (we don't support caching) */
1076 "no-cache\0" No_argument "\xfb"
1080 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1081 llist_t *headers_llist = NULL;
1086 #if ENABLE_FEATURE_WGET_TIMEOUT
1087 G.timeout_seconds = 900;
1088 signal(SIGALRM, alarm_handler);
1090 G.proxy_flag = "on"; /* use proxies if env vars are set */
1091 G.user_agent = "Wget"; /* "User-Agent" header field */
1093 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1094 applet_long_options = wget_longopts;
1096 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
1097 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
1098 &G.fname_out, &G.dir_prefix,
1099 &G.proxy_flag, &G.user_agent,
1100 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1101 NULL /* -t RETRIES */
1102 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1103 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1107 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1108 if (headers_llist) {
1111 llist_t *ll = headers_llist;
1113 size += strlen(ll->data) + 2;
1116 G.extra_headers = cp = xmalloc(size);
1117 while (headers_llist) {
1118 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
1124 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1125 if (G.fname_out) { /* -O FILE ? */
1126 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1128 option_mask32 &= ~WGET_OPT_CONTINUE;
1130 /* compat with wget: -O FILE can overwrite */
1131 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1135 download_one_url(*argv++);
1137 if (G.output_fd >= 0)
1138 xclose(G.output_fd);
1140 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1141 free(G.extra_headers);
1145 return EXIT_SUCCESS;