1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this tarball for details.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
14 // May be used if we ever will want to free() all xstrdup()s...
15 /* char *allocated; */
26 off_t content_len; /* Content-length of the file */
27 off_t beg_range; /* Range at which continue begins */
28 #if ENABLE_FEATURE_WGET_STATUSBAR
29 off_t transferred; /* Number of bytes transferred so far */
30 const char *curfile; /* Name of current file being transferred */
33 #if ENABLE_FEATURE_WGET_TIMEOUT
34 unsigned timeout_seconds;
36 smallint chunked; /* chunked transfer encoding */
37 smallint got_clen; /* got content-length: from server */
39 #define G (*(struct globals*)&bb_common_bufsiz1)
40 struct BUG_G_too_big {
41 char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
43 #define INIT_G() do { \
44 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
48 /* Must match option string! */
50 WGET_OPT_CONTINUE = (1 << 0),
51 WGET_OPT_SPIDER = (1 << 1),
52 WGET_OPT_QUIET = (1 << 2),
53 WGET_OPT_OUTNAME = (1 << 3),
54 WGET_OPT_PREFIX = (1 << 4),
55 WGET_OPT_PROXY = (1 << 5),
56 WGET_OPT_USER_AGENT = (1 << 6),
57 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
58 WGET_OPT_RETRIES = (1 << 8),
59 WGET_OPT_PASSIVE = (1 << 9),
60 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
61 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
69 #if ENABLE_FEATURE_WGET_STATUSBAR
70 static void progress_meter(int flag)
72 if (option_mask32 & WGET_OPT_QUIET)
75 if (flag == PROGRESS_START)
76 bb_progress_init(&G.pmt);
78 bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
79 G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
81 if (flag == PROGRESS_END) {
82 bb_putchar_stderr('\n');
87 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
91 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
92 * local addresses can have a scope identifier to specify the
93 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
94 * identifier is only valid on a single node.
96 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
97 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
98 * in the Host header as invalid requests, see
99 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
101 static void strip_ipv6_scope_id(char *host)
105 /* bbox wget actually handles IPv6 addresses without [], like
106 * wget "http://::1/xxx", but this is not standard.
107 * To save code, _here_ we do not support it. */
110 return; /* not IPv6 */
112 scope = strchr(host, '%');
116 /* Remove the IPv6 zone identifier from the host address */
117 cp = strchr(host, ']');
118 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
119 /* malformed address (not "[xx]:nn" or "[xx]") */
123 /* cp points to "]...", scope points to "%eth0]..." */
124 overlapping_strcpy(scope, cp);
127 /* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
128 * and a short count if an eof or non-interrupt error is encountered. */
129 static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
132 char *p = (char*)ptr;
137 ret = fread(p, 1, nmemb, stream);
140 } while (nmemb && ferror(stream) && errno == EINTR);
142 return p - (char*)ptr;
145 /* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
146 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
147 static char *safe_fgets(char *s, int size, FILE *stream)
154 ret = fgets(s, size, stream);
155 } while (ret == NULL && ferror(stream) && errno == EINTR);
160 #if ENABLE_FEATURE_WGET_AUTHENTICATION
161 /* Base64-encode character string. buf is assumed to be char buf[512]. */
162 static char *base64enc_512(char buf[512], const char *str)
164 unsigned len = strlen(str);
165 if (len > 512/4*3 - 10) /* paranoia */
167 bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
172 static char* sanitize_string(char *s)
174 unsigned char *p = (void *) s;
181 static FILE *open_socket(len_and_sockaddr *lsa)
185 /* glibc 2.4 seems to try seeking on it - ??! */
186 /* hopefully it understands what ESPIPE means... */
187 fp = fdopen(xconnect_stream(lsa), "r+");
189 bb_perror_msg_and_die("fdopen");
194 static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
199 fprintf(fp, "%s%s\r\n", s1, s2);
206 if (fgets(buf, 510, fp) == NULL) {
207 bb_perror_msg_and_die("error getting response");
209 buf_ptr = strstr(buf, "\r\n");
213 } while (!isdigit(buf[0]) || buf[3] != ' ');
216 result = xatoi_u(buf);
221 static void parse_url(char *src_url, struct host_info *h)
225 /* h->allocated = */ url = xstrdup(src_url);
227 if (strncmp(url, "http://", 7) == 0) {
228 h->port = bb_lookup_port("http", "tcp", 80);
231 } else if (strncmp(url, "ftp://", 6) == 0) {
232 h->port = bb_lookup_port("ftp", "tcp", 21);
236 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
239 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
240 // 'GET /?var=a/b HTTP 1.0'
241 // and saves 'index.html?var=a%2Fb' (we save 'b')
242 // wget 'http://busybox.net?login=john@doe':
243 // request: 'GET /?login=john@doe HTTP/1.0'
244 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
245 // wget 'http://busybox.net#test/test':
246 // request: 'GET / HTTP/1.0'
247 // saves: 'index.html' (we save 'test')
249 // We also don't add unique .N suffix if file exists...
250 sp = strchr(h->host, '/');
251 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
252 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
255 } else if (*sp == '/') {
258 } else { // '#' or '?'
259 // http://busybox.net?login=john@doe is a valid URL
260 // memmove converts to:
261 // http:/busybox.nett?login=john@doe...
262 memmove(h->host - 1, h->host, sp - h->host);
268 // We used to set h->user to NULL here, but this interferes
269 // with handling of code 302 ("object was moved")
271 sp = strrchr(h->host, '@');
281 static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
288 /* retrieve header line */
289 if (fgets(buf, bufsiz, fp) == NULL)
292 /* see if we are at the end of the headers */
293 for (s = buf; *s == '\r'; ++s)
298 /* convert the header name to lower case */
299 for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
300 /* tolower for "A-Z", no-op for "0-9a-z-." */
304 /* verify we are at the end of the header name */
306 bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
308 /* locate the start of the header value */
310 hdrval = skip_whitespace(s);
312 /* locate the end of header */
313 while (*s && *s != '\r' && *s != '\n')
316 /* end of header found */
322 /* Rats! The buffer isn't big enough to hold the entire header value */
323 while (c = getc(fp), c != EOF && c != '\n')
329 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
330 static char *URL_escape(const char *str)
332 /* URL encode, see RFC 2396 */
334 char *res = dst = xmalloc(strlen(str) * 3 + 1);
340 /* || strchr("!&'()*-.=_~", c) - more code */
352 || (c >= '0' && c <= '9')
353 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
360 *dst++ = bb_hexdigits_upcase[c >> 4];
361 *dst++ = bb_hexdigits_upcase[c & 0xf];
367 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
375 target->user = xstrdup("anonymous:busybox@");
377 sfp = open_socket(lsa);
378 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
379 bb_error_msg_and_die("%s", sanitize_string(buf+4));
382 * Splitting username:password pair,
385 str = strchr(target->user, ':');
388 switch (ftpcmd("USER ", target->user, sfp, buf)) {
392 if (ftpcmd("PASS ", str, sfp, buf) == 230)
394 /* fall through (failed login) */
396 bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
399 ftpcmd("TYPE I", NULL, sfp, buf);
404 if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) {
405 G.content_len = BB_STRTOOFF(buf+4, NULL, 10);
406 if (G.content_len < 0 || errno) {
407 bb_error_msg_and_die("SIZE value is garbage");
413 * Entering passive mode
415 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
417 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
419 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
420 // Server's IP is N1.N2.N3.N4 (we ignore it)
421 // Server's port for data connection is P1*256+P2
422 str = strrchr(buf, ')');
423 if (str) str[0] = '\0';
424 str = strrchr(buf, ',');
425 if (!str) goto pasv_error;
426 port = xatou_range(str+1, 0, 255);
428 str = strrchr(buf, ',');
429 if (!str) goto pasv_error;
430 port += xatou_range(str+1, 0, 255) * 256;
431 set_nport(lsa, htons(port));
433 *dfpp = open_socket(lsa);
436 sprintf(buf, "REST %"OFF_FMT"u", G.beg_range);
437 if (ftpcmd(buf, NULL, sfp, buf) == 350)
438 G.content_len -= G.beg_range;
441 if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
442 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
447 static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
450 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
451 # if ENABLE_FEATURE_WGET_TIMEOUT
454 struct pollfd polldata;
456 polldata.fd = fileno(dfp);
457 polldata.events = POLLIN | POLLPRI;
460 progress_meter(PROGRESS_START);
465 /* Loops only if chunked */
473 if (G.content_len < (off_t)sizeof(buf)) {
474 if ((int)G.content_len <= 0)
476 rdsz = (unsigned)G.content_len;
479 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
480 # if ENABLE_FEATURE_WGET_TIMEOUT
481 second_cnt = G.timeout_seconds;
484 if (safe_poll(&polldata, 1, 1000) != 0)
485 break; /* error, EOF, or data is available */
486 # if ENABLE_FEATURE_WGET_TIMEOUT
487 if (second_cnt != 0 && --second_cnt == 0) {
488 progress_meter(PROGRESS_END);
489 bb_perror_msg_and_die("download timed out");
492 /* Needed for "stalled" indicator */
493 progress_meter(PROGRESS_BUMP);
496 n = safe_fread(buf, rdsz, dfp);
499 /* perror will not work: ferror doesn't set errno */
500 bb_error_msg_and_die(bb_msg_read_error);
504 xwrite(output_fd, buf, n);
505 #if ENABLE_FEATURE_WGET_STATUSBAR
507 progress_meter(PROGRESS_BUMP);
516 safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
518 safe_fgets(buf, sizeof(buf), dfp);
519 G.content_len = STRTOOFF(buf, NULL, 16);
520 /* FIXME: error check? */
521 if (G.content_len == 0)
522 break; /* all done! */
526 progress_meter(PROGRESS_END);
529 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
530 int wget_main(int argc UNUSED_PARAM, char **argv)
533 struct host_info server, target;
534 len_and_sockaddr *lsa;
538 char *dir_prefix = NULL;
539 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
541 char *extra_headers = NULL;
542 llist_t *headers_llist = NULL;
544 FILE *sfp; /* socket to web/ftp server */
545 FILE *dfp; /* socket to ftp server (data) */
546 char *fname_out; /* where to direct output (-O) */
548 bool use_proxy; /* Use proxies if env vars are set */
549 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
550 const char *user_agent = "Wget";/* "User-Agent" header field */
552 static const char keywords[] ALIGN1 =
553 "content-length\0""transfer-encoding\0""chunked\0""location\0";
555 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
557 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
558 static const char wget_longopts[] ALIGN1 =
559 /* name, has_arg, val */
560 "continue\0" No_argument "c"
561 "spider\0" No_argument "s"
562 "quiet\0" No_argument "q"
563 "output-document\0" Required_argument "O"
564 "directory-prefix\0" Required_argument "P"
565 "proxy\0" Required_argument "Y"
566 "user-agent\0" Required_argument "U"
567 #if ENABLE_FEATURE_WGET_TIMEOUT
568 "timeout\0" Required_argument "T"
571 // "tries\0" Required_argument "t"
572 /* Ignored (we always use PASV): */
573 "passive-ftp\0" No_argument "\xff"
574 "header\0" Required_argument "\xfe"
575 "post-data\0" Required_argument "\xfd"
576 /* Ignored (we don't do ssl) */
577 "no-check-certificate\0" No_argument "\xfc"
583 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
584 applet_long_options = wget_longopts;
586 /* server.allocated = target.allocated = NULL; */
587 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
588 opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
589 &fname_out, &dir_prefix,
590 &proxy_flag, &user_agent,
591 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
592 NULL /* -t RETRIES */
593 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
594 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
596 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
600 llist_t *ll = headers_llist;
602 size += strlen(ll->data) + 2;
605 extra_headers = cp = xmalloc(size);
606 while (headers_llist) {
607 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
612 /* TODO: compat issue: should handle "wget URL1 URL2..." */
615 parse_url(argv[optind], &target);
617 /* Use the proxy if necessary */
618 use_proxy = (strcmp(proxy_flag, "off") != 0);
620 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
621 if (proxy && proxy[0]) {
623 parse_url(proxy, &server);
629 server.port = target.port;
630 if (ENABLE_FEATURE_IPV6) {
631 server.host = xstrdup(target.host);
633 server.host = target.host;
637 if (ENABLE_FEATURE_IPV6)
638 strip_ipv6_scope_id(target.host);
640 /* Guess an output filename, if there was no -O FILE */
641 if (!(opt & WGET_OPT_OUTNAME)) {
642 fname_out = bb_get_last_path_component_nostrip(target.path);
643 /* handle "wget http://kernel.org//" */
644 if (fname_out[0] == '/' || !fname_out[0])
645 fname_out = (char*)"index.html";
646 /* -P DIR is considered only if there was no -O FILE */
648 fname_out = concat_path_file(dir_prefix, fname_out);
650 if (LONE_DASH(fname_out)) {
653 opt &= ~WGET_OPT_CONTINUE;
656 #if ENABLE_FEATURE_WGET_STATUSBAR
657 G.curfile = bb_get_last_path_component_nostrip(fname_out);
661 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
662 bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
665 /* Determine where to start transfer */
666 if (opt & WGET_OPT_CONTINUE) {
667 output_fd = open(fname_out, O_WRONLY);
668 if (output_fd >= 0) {
669 G.beg_range = xlseek(output_fd, 0, SEEK_END);
671 /* File doesn't exist. We do not create file here yet.
672 * We are not sure it exists on remove side */
677 lsa = xhost2sockaddr(server.host, server.port);
678 if (!(opt & WGET_OPT_QUIET)) {
679 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
680 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
684 if (use_proxy || !target.is_ftp) {
691 /* Open socket to http server */
692 sfp = open_socket(lsa);
694 /* Send HTTP request */
696 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
697 target.is_ftp ? "f" : "ht", target.host,
700 if (opt & WGET_OPT_POST_DATA)
701 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
703 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
706 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
707 target.host, user_agent);
709 #if ENABLE_FEATURE_WGET_AUTHENTICATION
711 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
712 base64enc_512(buf, target.user));
714 if (use_proxy && server.user) {
715 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
716 base64enc_512(buf, server.user));
721 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
722 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
724 fputs(extra_headers, sfp);
726 if (opt & WGET_OPT_POST_DATA) {
727 char *estr = URL_escape(post_data);
728 fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
729 fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
730 (int) strlen(estr), estr);
731 /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
732 /*fprintf(sfp, "%s\r\n", estr);*/
736 { /* If "Connection:" is needed, document why */
737 fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
741 * Retrieve HTTP response line and check for "200" status code.
744 if (fgets(buf, sizeof(buf), sfp) == NULL)
745 bb_error_msg_and_die("no response from server");
748 str = skip_non_whitespace(str);
749 str = skip_whitespace(str);
750 // FIXME: no error check
751 // xatou wouldn't work: "200 OK"
756 while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
757 /* eat all remaining headers */;
761 Response 204 doesn't say "null file", it says "metadata
762 has changed but data didn't":
764 "10.2.5 204 No Content
765 The server has fulfilled the request but does not need to return
766 an entity-body, and might want to return updated metainformation.
767 The response MAY include new or updated metainformation in the form
768 of entity-headers, which if present SHOULD be associated with
769 the requested variant.
771 If the client is a user agent, it SHOULD NOT change its document
772 view from that which caused the request to be sent. This response
773 is primarily intended to allow input for actions to take place
774 without causing a change to the user agent's active document view,
775 although any new or updated metainformation SHOULD be applied
776 to the document currently in the user agent's active view.
778 The 204 response MUST NOT include a message-body, and thus
779 is always terminated by the first empty line after the header fields."
781 However, in real world it was observed that some web servers
782 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
786 case 300: /* redirection */
796 bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
800 * Retrieve HTTP headers.
802 while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
803 /* gethdr converted "FOO:" string to lowercase */
805 /* strip trailing whitespace */
806 char *s = strchrnul(str, '\0') - 1;
807 while (s >= str && (*s == ' ' || *s == '\t')) {
811 key = index_in_strings(keywords, buf) + 1;
812 if (key == KEY_content_length) {
813 G.content_len = BB_STRTOOFF(str, NULL, 10);
814 if (G.content_len < 0 || errno) {
815 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
820 if (key == KEY_transfer_encoding) {
821 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
822 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
823 G.chunked = G.got_clen = 1;
825 if (key == KEY_location && status >= 300) {
826 if (--redir_limit == 0)
827 bb_error_msg_and_die("too many redirections");
832 /* free(target.allocated); */
833 target.path = /* target.allocated = */ xstrdup(str+1);
834 /* lsa stays the same: it's on the same server */
836 parse_url(str, &target);
838 server.host = target.host;
839 /* strip_ipv6_scope_id(target.host); - no! */
840 /* we assume remote never gives us IPv6 addr with scope id */
841 server.port = target.port;
844 } /* else: lsa stays the same: we use proxy */
846 goto establish_session;
849 // if (status >= 300)
850 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
852 /* For HTTP, data is pumped over the same connection */
859 sfp = prepare_ftp_session(&dfp, &target, lsa);
862 if (opt & WGET_OPT_SPIDER) {
863 if (ENABLE_FEATURE_CLEAN_UP)
869 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
870 /* compat with wget: -O FILE can overwrite */
871 if (opt & WGET_OPT_OUTNAME)
872 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
873 output_fd = xopen(fname_out, o_flags);
876 retrieve_file_data(dfp, output_fd);
880 /* It's ftp. Close it properly */
882 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
883 bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
884 /* ftpcmd("QUIT", NULL, sfp, buf); - why bother? */