1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
14 // May be used if we ever will want to free() all xstrdup()s...
15 /* char *allocated; */
26 off_t content_len; /* Content-length of the file */
27 off_t beg_range; /* Range at which continue begins */
28 #if ENABLE_FEATURE_WGET_STATUSBAR
29 off_t transferred; /* Number of bytes transferred so far */
30 const char *curfile; /* Name of current file being transferred */
33 #if ENABLE_FEATURE_WGET_TIMEOUT
34 unsigned timeout_seconds;
36 smallint chunked; /* chunked transfer encoding */
37 smallint got_clen; /* got content-length: from server */
39 #define G (*(struct globals*)&bb_common_bufsiz1)
40 struct BUG_G_too_big {
41 char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
43 #define INIT_G() do { \
44 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
48 /* Must match option string! */
50 WGET_OPT_CONTINUE = (1 << 0),
51 WGET_OPT_SPIDER = (1 << 1),
52 WGET_OPT_QUIET = (1 << 2),
53 WGET_OPT_OUTNAME = (1 << 3),
54 WGET_OPT_PREFIX = (1 << 4),
55 WGET_OPT_PROXY = (1 << 5),
56 WGET_OPT_USER_AGENT = (1 << 6),
57 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
58 WGET_OPT_RETRIES = (1 << 8),
59 WGET_OPT_PASSIVE = (1 << 9),
60 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
61 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
69 #if ENABLE_FEATURE_WGET_STATUSBAR
70 static void progress_meter(int flag)
72 if (option_mask32 & WGET_OPT_QUIET)
75 if (flag == PROGRESS_START)
76 bb_progress_init(&G.pmt);
78 bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
79 G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
81 if (flag == PROGRESS_END) {
82 bb_putchar_stderr('\n');
87 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
91 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
92 * local addresses can have a scope identifier to specify the
93 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
94 * identifier is only valid on a single node.
96 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
97 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
98 * in the Host header as invalid requests, see
99 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
101 static void strip_ipv6_scope_id(char *host)
105 /* bbox wget actually handles IPv6 addresses without [], like
106 * wget "http://::1/xxx", but this is not standard.
107 * To save code, _here_ we do not support it. */
110 return; /* not IPv6 */
112 scope = strchr(host, '%');
116 /* Remove the IPv6 zone identifier from the host address */
117 cp = strchr(host, ']');
118 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
119 /* malformed address (not "[xx]:nn" or "[xx]") */
123 /* cp points to "]...", scope points to "%eth0]..." */
124 overlapping_strcpy(scope, cp);
127 /* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
128 * and a short count if an eof or non-interrupt error is encountered. */
129 static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
132 char *p = (char*)ptr;
137 ret = fread(p, 1, nmemb, stream);
140 } while (nmemb && ferror(stream) && errno == EINTR);
142 return p - (char*)ptr;
145 /* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
146 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
147 static char *safe_fgets(char *s, int size, FILE *stream)
154 ret = fgets(s, size, stream);
155 } while (ret == NULL && ferror(stream) && errno == EINTR);
160 #if ENABLE_FEATURE_WGET_AUTHENTICATION
161 /* Base64-encode character string. buf is assumed to be char buf[512]. */
162 static char *base64enc_512(char buf[512], const char *str)
164 unsigned len = strlen(str);
165 if (len > 512/4*3 - 10) /* paranoia */
167 bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
172 static char* sanitize_string(char *s)
174 unsigned char *p = (void *) s;
181 static FILE *open_socket(len_and_sockaddr *lsa)
185 /* glibc 2.4 seems to try seeking on it - ??! */
186 /* hopefully it understands what ESPIPE means... */
187 fp = fdopen(xconnect_stream(lsa), "r+");
189 bb_perror_msg_and_die("fdopen");
194 static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
199 fprintf(fp, "%s%s\r\n", s1, s2);
206 if (fgets(buf, 510, fp) == NULL) {
207 bb_perror_msg_and_die("error getting response");
209 buf_ptr = strstr(buf, "\r\n");
213 } while (!isdigit(buf[0]) || buf[3] != ' ');
216 result = xatoi_positive(buf);
221 static void parse_url(char *src_url, struct host_info *h)
225 /* h->allocated = */ url = xstrdup(src_url);
227 if (strncmp(url, "http://", 7) == 0) {
228 h->port = bb_lookup_port("http", "tcp", 80);
231 } else if (strncmp(url, "ftp://", 6) == 0) {
232 h->port = bb_lookup_port("ftp", "tcp", 21);
236 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
239 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
240 // 'GET /?var=a/b HTTP 1.0'
241 // and saves 'index.html?var=a%2Fb' (we save 'b')
242 // wget 'http://busybox.net?login=john@doe':
243 // request: 'GET /?login=john@doe HTTP/1.0'
244 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
245 // wget 'http://busybox.net#test/test':
246 // request: 'GET / HTTP/1.0'
247 // saves: 'index.html' (we save 'test')
249 // We also don't add unique .N suffix if file exists...
250 sp = strchr(h->host, '/');
251 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
252 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
255 } else if (*sp == '/') {
258 } else { // '#' or '?'
259 // http://busybox.net?login=john@doe is a valid URL
260 // memmove converts to:
261 // http:/busybox.nett?login=john@doe...
262 memmove(h->host - 1, h->host, sp - h->host);
268 // We used to set h->user to NULL here, but this interferes
269 // with handling of code 302 ("object was moved")
271 sp = strrchr(h->host, '@');
281 static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
288 /* retrieve header line */
289 if (fgets(buf, bufsiz, fp) == NULL)
292 /* see if we are at the end of the headers */
293 for (s = buf; *s == '\r'; ++s)
298 /* convert the header name to lower case */
299 for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
300 /* tolower for "A-Z", no-op for "0-9a-z-." */
304 /* verify we are at the end of the header name */
306 bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
308 /* locate the start of the header value */
310 hdrval = skip_whitespace(s);
312 /* locate the end of header */
313 while (*s && *s != '\r' && *s != '\n')
316 /* end of header found */
322 /* Rats! The buffer isn't big enough to hold the entire header value */
323 while (c = getc(fp), c != EOF && c != '\n')
329 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
330 static char *URL_escape(const char *str)
332 /* URL encode, see RFC 2396 */
334 char *res = dst = xmalloc(strlen(str) * 3 + 1);
340 /* || strchr("!&'()*-.=_~", c) - more code */
352 || (c >= '0' && c <= '9')
353 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
360 *dst++ = bb_hexdigits_upcase[c >> 4];
361 *dst++ = bb_hexdigits_upcase[c & 0xf];
367 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
375 target->user = xstrdup("anonymous:busybox@");
377 sfp = open_socket(lsa);
378 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
379 bb_error_msg_and_die("%s", sanitize_string(buf+4));
382 * Splitting username:password pair,
385 str = strchr(target->user, ':');
388 switch (ftpcmd("USER ", target->user, sfp, buf)) {
392 if (ftpcmd("PASS ", str, sfp, buf) == 230)
394 /* fall through (failed login) */
396 bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
399 ftpcmd("TYPE I", NULL, sfp, buf);
404 if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) {
405 G.content_len = BB_STRTOOFF(buf+4, NULL, 10);
406 if (G.content_len < 0 || errno) {
407 bb_error_msg_and_die("SIZE value is garbage");
413 * Entering passive mode
415 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
417 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
419 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
420 // Server's IP is N1.N2.N3.N4 (we ignore it)
421 // Server's port for data connection is P1*256+P2
422 str = strrchr(buf, ')');
423 if (str) str[0] = '\0';
424 str = strrchr(buf, ',');
425 if (!str) goto pasv_error;
426 port = xatou_range(str+1, 0, 255);
428 str = strrchr(buf, ',');
429 if (!str) goto pasv_error;
430 port += xatou_range(str+1, 0, 255) * 256;
431 set_nport(lsa, htons(port));
433 *dfpp = open_socket(lsa);
436 sprintf(buf, "REST %"OFF_FMT"u", G.beg_range);
437 if (ftpcmd(buf, NULL, sfp, buf) == 350)
438 G.content_len -= G.beg_range;
441 if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
442 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
447 static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
450 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
451 # if ENABLE_FEATURE_WGET_TIMEOUT
454 struct pollfd polldata;
456 polldata.fd = fileno(dfp);
457 polldata.events = POLLIN | POLLPRI;
458 ndelay_on(polldata.fd);
460 progress_meter(PROGRESS_START);
465 /* Loops only if chunked */
473 if (G.content_len < (off_t)sizeof(buf)) {
474 if ((int)G.content_len <= 0)
476 rdsz = (unsigned)G.content_len;
479 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
480 # if ENABLE_FEATURE_WGET_TIMEOUT
481 second_cnt = G.timeout_seconds;
484 if (safe_poll(&polldata, 1, 1000) != 0)
485 break; /* error, EOF, or data is available */
486 # if ENABLE_FEATURE_WGET_TIMEOUT
487 if (second_cnt != 0 && --second_cnt == 0) {
488 progress_meter(PROGRESS_END);
489 bb_perror_msg_and_die("download timed out");
492 /* Needed for "stalled" indicator */
493 progress_meter(PROGRESS_BUMP);
496 n = safe_fread(buf, rdsz, dfp);
499 /* perror will not work: ferror doesn't set errno */
500 bb_error_msg_and_die(bb_msg_read_error);
504 xwrite(output_fd, buf, n);
505 #if ENABLE_FEATURE_WGET_STATUSBAR
507 progress_meter(PROGRESS_BUMP);
516 safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
518 safe_fgets(buf, sizeof(buf), dfp);
519 G.content_len = STRTOOFF(buf, NULL, 16);
520 /* FIXME: error check? */
521 if (G.content_len == 0)
522 break; /* all done! */
526 progress_meter(PROGRESS_END);
529 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
530 int wget_main(int argc UNUSED_PARAM, char **argv)
533 struct host_info server, target;
534 len_and_sockaddr *lsa;
538 char *dir_prefix = NULL;
539 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
541 char *extra_headers = NULL;
542 llist_t *headers_llist = NULL;
544 FILE *sfp; /* socket to web/ftp server */
545 FILE *dfp; /* socket to ftp server (data) */
546 char *fname_out; /* where to direct output (-O) */
548 bool use_proxy; /* Use proxies if env vars are set */
549 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
550 const char *user_agent = "Wget";/* "User-Agent" header field */
552 static const char keywords[] ALIGN1 =
553 "content-length\0""transfer-encoding\0""chunked\0""location\0";
555 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
557 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
558 static const char wget_longopts[] ALIGN1 =
559 /* name, has_arg, val */
560 "continue\0" No_argument "c"
561 "spider\0" No_argument "s"
562 "quiet\0" No_argument "q"
563 "output-document\0" Required_argument "O"
564 "directory-prefix\0" Required_argument "P"
565 "proxy\0" Required_argument "Y"
566 "user-agent\0" Required_argument "U"
567 #if ENABLE_FEATURE_WGET_TIMEOUT
568 "timeout\0" Required_argument "T"
571 // "tries\0" Required_argument "t"
572 /* Ignored (we always use PASV): */
573 "passive-ftp\0" No_argument "\xff"
574 "header\0" Required_argument "\xfe"
575 "post-data\0" Required_argument "\xfd"
576 /* Ignored (we don't do ssl) */
577 "no-check-certificate\0" No_argument "\xfc"
583 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
584 applet_long_options = wget_longopts;
586 /* server.allocated = target.allocated = NULL; */
587 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
588 opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
589 &fname_out, &dir_prefix,
590 &proxy_flag, &user_agent,
591 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
592 NULL /* -t RETRIES */
593 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
594 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
596 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
600 llist_t *ll = headers_llist;
602 size += strlen(ll->data) + 2;
605 extra_headers = cp = xmalloc(size);
606 while (headers_llist) {
607 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
612 /* TODO: compat issue: should handle "wget URL1 URL2..." */
615 parse_url(argv[optind], &target);
617 /* Use the proxy if necessary */
618 use_proxy = (strcmp(proxy_flag, "off") != 0);
620 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
621 if (proxy && proxy[0]) {
623 parse_url(proxy, &server);
629 server.port = target.port;
630 if (ENABLE_FEATURE_IPV6) {
631 server.host = xstrdup(target.host);
633 server.host = target.host;
637 if (ENABLE_FEATURE_IPV6)
638 strip_ipv6_scope_id(target.host);
640 /* Guess an output filename, if there was no -O FILE */
641 if (!(opt & WGET_OPT_OUTNAME)) {
642 fname_out = bb_get_last_path_component_nostrip(target.path);
643 /* handle "wget http://kernel.org//" */
644 if (fname_out[0] == '/' || !fname_out[0])
645 fname_out = (char*)"index.html";
646 /* -P DIR is considered only if there was no -O FILE */
648 fname_out = concat_path_file(dir_prefix, fname_out);
650 if (LONE_DASH(fname_out)) {
653 opt &= ~WGET_OPT_CONTINUE;
656 #if ENABLE_FEATURE_WGET_STATUSBAR
657 G.curfile = bb_get_last_path_component_nostrip(fname_out);
661 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
662 bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
665 /* Determine where to start transfer */
666 if (opt & WGET_OPT_CONTINUE) {
667 output_fd = open(fname_out, O_WRONLY);
668 if (output_fd >= 0) {
669 G.beg_range = xlseek(output_fd, 0, SEEK_END);
671 /* File doesn't exist. We do not create file here yet.
672 * We are not sure it exists on remove side */
677 lsa = xhost2sockaddr(server.host, server.port);
678 if (!(opt & WGET_OPT_QUIET)) {
679 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
680 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
684 if (use_proxy || !target.is_ftp) {
691 /* Open socket to http server */
692 sfp = open_socket(lsa);
694 /* Send HTTP request */
696 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
697 target.is_ftp ? "f" : "ht", target.host,
700 if (opt & WGET_OPT_POST_DATA)
701 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
703 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
706 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
707 target.host, user_agent);
709 #if ENABLE_FEATURE_WGET_AUTHENTICATION
711 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
712 base64enc_512(buf, target.user));
714 if (use_proxy && server.user) {
715 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
716 base64enc_512(buf, server.user));
721 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
722 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
724 fputs(extra_headers, sfp);
726 if (opt & WGET_OPT_POST_DATA) {
727 char *estr = URL_escape(post_data);
728 fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
729 fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
730 (int) strlen(estr), estr);
731 /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
732 /*fprintf(sfp, "%s\r\n", estr);*/
736 { /* If "Connection:" is needed, document why */
737 fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
743 * Retrieve HTTP response line and check for "200" status code.
746 if (fgets(buf, sizeof(buf), sfp) == NULL)
747 bb_error_msg_and_die("no response from server");
750 str = skip_non_whitespace(str);
751 str = skip_whitespace(str);
752 // FIXME: no error check
753 // xatou wouldn't work: "200 OK"
758 while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
759 /* eat all remaining headers */;
763 Response 204 doesn't say "null file", it says "metadata
764 has changed but data didn't":
766 "10.2.5 204 No Content
767 The server has fulfilled the request but does not need to return
768 an entity-body, and might want to return updated metainformation.
769 The response MAY include new or updated metainformation in the form
770 of entity-headers, which if present SHOULD be associated with
771 the requested variant.
773 If the client is a user agent, it SHOULD NOT change its document
774 view from that which caused the request to be sent. This response
775 is primarily intended to allow input for actions to take place
776 without causing a change to the user agent's active document view,
777 although any new or updated metainformation SHOULD be applied
778 to the document currently in the user agent's active view.
780 The 204 response MUST NOT include a message-body, and thus
781 is always terminated by the first empty line after the header fields."
783 However, in real world it was observed that some web servers
784 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
788 case 300: /* redirection */
798 bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
802 * Retrieve HTTP headers.
804 while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
805 /* gethdr converted "FOO:" string to lowercase */
807 /* strip trailing whitespace */
808 char *s = strchrnul(str, '\0') - 1;
809 while (s >= str && (*s == ' ' || *s == '\t')) {
813 key = index_in_strings(keywords, buf) + 1;
814 if (key == KEY_content_length) {
815 G.content_len = BB_STRTOOFF(str, NULL, 10);
816 if (G.content_len < 0 || errno) {
817 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
822 if (key == KEY_transfer_encoding) {
823 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
824 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
825 G.chunked = G.got_clen = 1;
827 if (key == KEY_location && status >= 300) {
828 if (--redir_limit == 0)
829 bb_error_msg_and_die("too many redirections");
834 /* free(target.allocated); */
835 target.path = /* target.allocated = */ xstrdup(str+1);
836 /* lsa stays the same: it's on the same server */
838 parse_url(str, &target);
840 server.host = target.host;
841 /* strip_ipv6_scope_id(target.host); - no! */
842 /* we assume remote never gives us IPv6 addr with scope id */
843 server.port = target.port;
846 } /* else: lsa stays the same: we use proxy */
848 goto establish_session;
851 // if (status >= 300)
852 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
854 /* For HTTP, data is pumped over the same connection */
861 sfp = prepare_ftp_session(&dfp, &target, lsa);
864 if (opt & WGET_OPT_SPIDER) {
865 if (ENABLE_FEATURE_CLEAN_UP)
871 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
872 /* compat with wget: -O FILE can overwrite */
873 if (opt & WGET_OPT_OUTNAME)
874 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
875 output_fd = xopen(fname_out, o_flags);
878 retrieve_file_data(dfp, output_fd);
882 /* It's ftp. Close it properly */
884 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
885 bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
886 /* ftpcmd("QUIT", NULL, sfp, buf); - why bother? */