1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
7 * Licensed under GPLv2, see file LICENSE in this tarball for details.
13 // May be used if we ever will want to free() all xstrdup()s...
14 /* char *allocated; */
23 /* Globals (can be accessed from signal handlers) */
25 off_t content_len; /* Content-length of the file */
26 off_t beg_range; /* Range at which continue begins */
27 #if ENABLE_FEATURE_WGET_STATUSBAR
30 off_t transferred; /* Number of bytes transferred so far */
31 const char *curfile; /* Name of current file being transferred */
32 unsigned lastupdate_sec;
35 smallint chunked; /* chunked transfer encoding */
37 #define G (*(struct globals*)&bb_common_bufsiz1)
38 struct BUG_G_too_big {
39 char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
41 #define content_len (G.content_len )
42 #define beg_range (G.beg_range )
43 #define lastsize (G.lastsize )
44 #define totalsize (G.totalsize )
45 #define transferred (G.transferred )
46 #define curfile (G.curfile )
47 #define lastupdate_sec (G.lastupdate_sec )
48 #define start_sec (G.start_sec )
49 #define chunked (G.chunked )
50 #define INIT_G() do { } while (0)
53 #if ENABLE_FEATURE_WGET_STATUSBAR
55 STALLTIME = 5 /* Seconds when xfer considered "stalled" */
58 static unsigned int get_tty2_width(void)
61 get_terminal_width_height(2, &width, NULL);
65 static void progress_meter(int flag)
67 /* We can be called from signal handler */
68 int save_errno = errno;
70 unsigned since_last_update, elapsed;
74 if (flag == -1) { /* first call to progress_meter */
75 start_sec = monotonic_sec();
76 lastupdate_sec = start_sec;
78 totalsize = content_len + beg_range; /* as content_len changes.. */
82 if (totalsize != 0 && !chunked) {
83 /* long long helps to have it working even if !LFS */
84 ratio = (unsigned) (100ULL * (transferred+beg_range) / totalsize);
85 if (ratio > 100) ratio = 100;
88 fprintf(stderr, "\r%-20.20s%4d%% ", curfile, ratio);
90 barlength = get_tty2_width() - 49;
92 /* god bless gcc for variable arrays :) */
93 i = barlength * ratio / 100;
98 fprintf(stderr, "|%s%*s|", buf, barlength - i, "");
102 abbrevsize = transferred + beg_range;
103 while (abbrevsize >= 100000) {
107 /* see http://en.wikipedia.org/wiki/Tera */
108 fprintf(stderr, "%6d%c ", (int)abbrevsize, " kMGTPEZY"[i]);
110 // Nuts! Ain't it easier to update progress meter ONLY when we transferred++?
112 elapsed = monotonic_sec();
113 since_last_update = elapsed - lastupdate_sec;
114 if (transferred > lastsize) {
115 lastupdate_sec = elapsed;
116 lastsize = transferred;
117 if (since_last_update >= STALLTIME) {
118 /* We "cut off" these seconds from elapsed time
119 * by adjusting start time */
120 start_sec += since_last_update;
122 since_last_update = 0; /* we are un-stalled now */
124 elapsed -= start_sec; /* now it's "elapsed since start" */
126 if (since_last_update >= STALLTIME) {
127 fprintf(stderr, " - stalled -");
129 off_t to_download = totalsize - beg_range;
130 if (transferred <= 0 || (int)elapsed <= 0 || transferred > to_download || chunked) {
131 fprintf(stderr, "--:--:-- ETA");
133 /* to_download / (transferred/elapsed) - elapsed: */
134 int eta = (int) ((unsigned long long)to_download*elapsed/transferred - elapsed);
135 /* (long long helps to have working ETA even if !LFS) */
137 fprintf(stderr, "%02d:%02d:%02d ETA", eta / 3600, i / 60, i % 60);
142 /* last call to progress_meter */
147 if (flag == -1) { /* first call to progress_meter */
148 signal_SA_RESTART_empty_mask(SIGALRM, progress_meter);
155 /* Original copyright notice which applies to the CONFIG_FEATURE_WGET_STATUSBAR stuff,
156 * much of which was blatantly stolen from openssh. */
158 * Copyright (c) 1992, 1993
159 * The Regents of the University of California. All rights reserved.
161 * Redistribution and use in source and binary forms, with or without
162 * modification, are permitted provided that the following conditions
164 * 1. Redistributions of source code must retain the above copyright
165 * notice, this list of conditions and the following disclaimer.
166 * 2. Redistributions in binary form must reproduce the above copyright
167 * notice, this list of conditions and the following disclaimer in the
168 * documentation and/or other materials provided with the distribution.
170 * 3. <BSD Advertising Clause omitted per the July 22, 1999 licensing change
171 * ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change>
173 * 4. Neither the name of the University nor the names of its contributors
174 * may be used to endorse or promote products derived from this software
175 * without specific prior written permission.
177 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
178 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
179 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
180 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
181 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
182 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
183 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
184 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
185 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
186 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
190 #else /* FEATURE_WGET_STATUSBAR */
192 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
197 /* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
198 * and a short count if an eof or non-interrupt error is encountered. */
199 static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
202 char *p = (char*)ptr;
207 ret = fread(p, 1, nmemb, stream);
210 } while (nmemb && ferror(stream) && errno == EINTR);
212 return p - (char*)ptr;
215 /* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
216 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
217 static char *safe_fgets(char *s, int size, FILE *stream)
224 ret = fgets(s, size, stream);
225 } while (ret == NULL && ferror(stream) && errno == EINTR);
230 #if ENABLE_FEATURE_WGET_AUTHENTICATION
231 /* Base64-encode character string. buf is assumed to be char buf[512]. */
232 static char *base64enc_512(char buf[512], const char *str)
234 unsigned len = strlen(str);
235 if (len > 512/4*3 - 10) /* paranoia */
237 bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
243 static FILE *open_socket(len_and_sockaddr *lsa)
247 /* glibc 2.4 seems to try seeking on it - ??! */
248 /* hopefully it understands what ESPIPE means... */
249 fp = fdopen(xconnect_stream(lsa), "r+");
251 bb_perror_msg_and_die("fdopen");
257 static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
262 fprintf(fp, "%s%s\r\n", s1, s2);
269 if (fgets(buf, 510, fp) == NULL) {
270 bb_perror_msg_and_die("error getting response");
272 buf_ptr = strstr(buf, "\r\n");
276 } while (!isdigit(buf[0]) || buf[3] != ' ');
279 result = xatoi_u(buf);
285 static void parse_url(char *src_url, struct host_info *h)
289 /* h->allocated = */ url = xstrdup(src_url);
291 if (strncmp(url, "http://", 7) == 0) {
292 h->port = bb_lookup_port("http", "tcp", 80);
295 } else if (strncmp(url, "ftp://", 6) == 0) {
296 h->port = bb_lookup_port("ftp", "tcp", 21);
300 bb_error_msg_and_die("not an http or ftp url: %s", url);
303 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
304 // 'GET /?var=a/b HTTP 1.0'
305 // and saves 'index.html?var=a%2Fb' (we save 'b')
306 // wget 'http://busybox.net?login=john@doe':
307 // request: 'GET /?login=john@doe HTTP/1.0'
308 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
309 // wget 'http://busybox.net#test/test':
310 // request: 'GET / HTTP/1.0'
311 // saves: 'index.html' (we save 'test')
313 // We also don't add unique .N suffix if file exists...
314 sp = strchr(h->host, '/');
315 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
316 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
319 } else if (*sp == '/') {
322 } else { // '#' or '?'
323 // http://busybox.net?login=john@doe is a valid URL
324 // memmove converts to:
325 // http:/busybox.nett?login=john@doe...
326 memmove(h->host - 1, h->host, sp - h->host);
332 sp = strrchr(h->host, '@');
344 static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
351 /* retrieve header line */
352 if (fgets(buf, bufsiz, fp) == NULL)
355 /* see if we are at the end of the headers */
356 for (s = buf; *s == '\r'; ++s)
361 /* convert the header name to lower case */
362 for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s)
365 /* verify we are at the end of the header name */
367 bb_error_msg_and_die("bad header line: %s", buf);
369 /* locate the start of the header value */
371 hdrval = skip_whitespace(s);
373 /* locate the end of header */
374 while (*s && *s != '\r' && *s != '\n')
377 /* end of header found */
383 /* Rats! The buffer isn't big enough to hold the entire header value. */
384 while (c = getc(fp), c != EOF && c != '\n')
391 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
392 int wget_main(int argc UNUSED_PARAM, char **argv)
395 struct host_info server, target;
396 len_and_sockaddr *lsa;
403 char *dir_prefix = NULL;
404 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
405 char *extra_headers = NULL;
406 llist_t *headers_llist = NULL;
408 FILE *sfp = NULL; /* socket to web/ftp server */
409 FILE *dfp; /* socket to ftp server (data) */
410 char *fname_out; /* where to direct output (-O) */
411 bool got_clen = 0; /* got content-length: from server */
413 bool use_proxy = 1; /* Use proxies if env vars are set */
414 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
415 const char *user_agent = "Wget";/* "User-Agent" header field */
417 static const char keywords[] ALIGN1 =
418 "content-length\0""transfer-encoding\0""chunked\0""location\0";
420 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
423 WGET_OPT_CONTINUE = 0x1,
424 WGET_OPT_SPIDER = 0x2,
425 WGET_OPT_QUIET = 0x4,
426 WGET_OPT_OUTNAME = 0x8,
427 WGET_OPT_PREFIX = 0x10,
428 WGET_OPT_PROXY = 0x20,
429 WGET_OPT_USER_AGENT = 0x40,
430 WGET_OPT_PASSIVE = 0x80,
431 WGET_OPT_HEADER = 0x100,
433 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
434 static const char wget_longopts[] ALIGN1 =
435 /* name, has_arg, val */
436 "continue\0" No_argument "c"
437 "spider\0" No_argument "s"
438 "quiet\0" No_argument "q"
439 "output-document\0" Required_argument "O"
440 "directory-prefix\0" Required_argument "P"
441 "proxy\0" Required_argument "Y"
442 "user-agent\0" Required_argument "U"
443 "passive-ftp\0" No_argument "\xff"
444 "header\0" Required_argument "\xfe"
450 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
451 applet_long_options = wget_longopts;
453 /* server.allocated = target.allocated = NULL; */
454 opt_complementary = "-1" USE_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
455 opt = getopt32(argv, "csqO:P:Y:U:" /*ignored:*/ "t:T:",
456 &fname_out, &dir_prefix,
457 &proxy_flag, &user_agent,
458 NULL, /* -t RETRIES */
459 NULL /* -T NETWORK_READ_TIMEOUT */
460 USE_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
462 if (strcmp(proxy_flag, "off") == 0) {
463 /* Use the proxy if necessary */
466 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
470 llist_t *ll = headers_llist;
472 size += strlen(ll->data) + 2;
475 extra_headers = cp = xmalloc(size);
476 while (headers_llist) {
477 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
482 parse_url(argv[optind], &target);
483 server.host = target.host;
484 server.port = target.port;
486 /* Use the proxy if necessary */
488 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
489 if (proxy && *proxy) {
490 parse_url(proxy, &server);
496 /* Guess an output filename, if there was no -O FILE */
497 if (!(opt & WGET_OPT_OUTNAME)) {
498 fname_out = bb_get_last_path_component_nostrip(target.path);
499 /* handle "wget http://kernel.org//" */
500 if (fname_out[0] == '/' || !fname_out[0])
501 fname_out = (char*)"index.html";
502 /* -P DIR is considered only if there was no -O FILE */
504 fname_out = concat_path_file(dir_prefix, fname_out);
506 if (LONE_DASH(fname_out)) {
509 opt &= ~WGET_OPT_CONTINUE;
512 #if ENABLE_FEATURE_WGET_STATUSBAR
513 curfile = bb_get_last_path_component_nostrip(fname_out);
517 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
518 bb_error_msg_and_die("cannot specify continue (-c) without a filename (-O)"); */
520 /* Determine where to start transfer */
521 if (opt & WGET_OPT_CONTINUE) {
522 output_fd = open(fname_out, O_WRONLY);
523 if (output_fd >= 0) {
524 beg_range = xlseek(output_fd, 0, SEEK_END);
526 /* File doesn't exist. We do not create file here yet.
527 We are not sure it exists on remove side */
530 /* We want to do exactly _one_ DNS lookup, since some
531 * sites (i.e. ftp.us.debian.org) use round-robin DNS
532 * and we want to connect to only one IP... */
533 lsa = xhost2sockaddr(server.host, server.port);
534 if (!(opt & WGET_OPT_QUIET)) {
535 fprintf(stderr, "Connecting to %s (%s)\n", server.host,
536 xmalloc_sockaddr2dotted(&lsa->u.sa));
537 /* We leak result of xmalloc_sockaddr2dotted */
540 if (use_proxy || !target.is_ftp) {
549 bb_error_msg_and_die("too many redirections");
551 /* Open socket to http server */
552 if (sfp) fclose(sfp);
553 sfp = open_socket(lsa);
555 /* Send HTTP request. */
557 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
558 target.is_ftp ? "f" : "ht", target.host,
561 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
564 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
565 target.host, user_agent);
567 #if ENABLE_FEATURE_WGET_AUTHENTICATION
569 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
570 base64enc_512(buf, target.user));
572 if (use_proxy && server.user) {
573 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
574 base64enc_512(buf, server.user));
579 fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range);
580 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
582 fputs(extra_headers, sfp);
584 fprintf(sfp, "Connection: close\r\n\r\n");
587 * Retrieve HTTP response line and check for "200" status code.
590 if (fgets(buf, sizeof(buf), sfp) == NULL)
591 bb_error_msg_and_die("no response from server");
594 str = skip_non_whitespace(str);
595 str = skip_whitespace(str);
596 // FIXME: no error check
597 // xatou wouldn't work: "200 OK"
602 while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
603 /* eat all remaining headers */;
607 Response 204 doesn't say "null file", it says "metadata
608 has changed but data didn't":
610 "10.2.5 204 No Content
611 The server has fulfilled the request but does not need to return
612 an entity-body, and might want to return updated metainformation.
613 The response MAY include new or updated metainformation in the form
614 of entity-headers, which if present SHOULD be associated with
615 the requested variant.
617 If the client is a user agent, it SHOULD NOT change its document
618 view from that which caused the request to be sent. This response
619 is primarily intended to allow input for actions to take place
620 without causing a change to the user agent's active document view,
621 although any new or updated metainformation SHOULD be applied
622 to the document currently in the user agent's active view.
624 The 204 response MUST NOT include a message-body, and thus
625 is always terminated by the first empty line after the header fields."
627 However, in real world it was observed that some web servers
628 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
632 case 300: /* redirection */
642 /* Show first line only and kill any ESC tricks */
643 buf[strcspn(buf, "\n\r\x1b")] = '\0';
644 bb_error_msg_and_die("server returned error: %s", buf);
648 * Retrieve HTTP headers.
650 while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
651 /* gethdr did already convert the "FOO:" string to lowercase */
652 smalluint key = index_in_strings(keywords, *&buf) + 1;
653 if (key == KEY_content_length) {
654 content_len = BB_STRTOOFF(str, NULL, 10);
655 if (errno || content_len < 0) {
656 bb_error_msg_and_die("content-length %s is garbage", str);
661 if (key == KEY_transfer_encoding) {
662 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
663 bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
664 chunked = got_clen = 1;
666 if (key == KEY_location) {
668 /* free(target.allocated); */
669 target.path = /* target.allocated = */ xstrdup(str+1);
671 parse_url(str, &target);
672 if (use_proxy == 0) {
673 server.host = target.host;
674 server.port = target.port;
677 lsa = xhost2sockaddr(server.host, server.port);
682 } while (status >= 300);
692 target.user = xstrdup("anonymous:busybox@");
694 sfp = open_socket(lsa);
695 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
696 bb_error_msg_and_die("%s", buf+4);
699 * Splitting username:password pair,
702 str = strchr(target.user, ':');
705 switch (ftpcmd("USER ", target.user, sfp, buf)) {
709 if (ftpcmd("PASS ", str, sfp, buf) == 230)
711 /* fall through (failed login) */
713 bb_error_msg_and_die("ftp login: %s", buf+4);
716 ftpcmd("TYPE I", NULL, sfp, buf);
721 if (ftpcmd("SIZE ", target.path, sfp, buf) == 213) {
722 content_len = BB_STRTOOFF(buf+4, NULL, 10);
723 if (errno || content_len < 0) {
724 bb_error_msg_and_die("SIZE value is garbage");
730 * Entering passive mode
732 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
734 bb_error_msg_and_die("bad response to %s: %s", "PASV", buf);
736 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
737 // Server's IP is N1.N2.N3.N4 (we ignore it)
738 // Server's port for data connection is P1*256+P2
739 str = strrchr(buf, ')');
740 if (str) str[0] = '\0';
741 str = strrchr(buf, ',');
742 if (!str) goto pasv_error;
743 port = xatou_range(str+1, 0, 255);
745 str = strrchr(buf, ',');
746 if (!str) goto pasv_error;
747 port += xatou_range(str+1, 0, 255) * 256;
748 set_nport(lsa, htons(port));
749 dfp = open_socket(lsa);
752 sprintf(buf, "REST %"OFF_FMT"d", beg_range);
753 if (ftpcmd(buf, NULL, sfp, buf) == 350)
754 content_len -= beg_range;
757 if (ftpcmd("RETR ", target.path, sfp, buf) > 150)
758 bb_error_msg_and_die("bad response to %s: %s", "RETR", buf);
761 if (opt & WGET_OPT_SPIDER) {
762 if (ENABLE_FEATURE_CLEAN_UP)
771 /* Do it before progress_meter (want to have nice error message) */
773 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
774 /* compat with wget: -O FILE can overwrite */
775 if (opt & WGET_OPT_OUTNAME)
776 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
777 output_fd = xopen(fname_out, o_flags);
780 if (!(opt & WGET_OPT_QUIET))
786 /* Loops only if chunked */
788 while (content_len > 0 || !got_clen) {
790 unsigned rdsz = sizeof(buf);
792 if (content_len < sizeof(buf) && (chunked || got_clen))
793 rdsz = (unsigned)content_len;
794 n = safe_fread(buf, rdsz, dfp);
797 /* perror will not work: ferror doesn't set errno */
798 bb_error_msg_and_die(bb_msg_read_error);
802 xwrite(output_fd, buf, n);
803 #if ENABLE_FEATURE_WGET_STATUSBAR
813 safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
815 safe_fgets(buf, sizeof(buf), dfp);
816 content_len = STRTOOFF(buf, NULL, 16);
817 /* FIXME: error check? */
818 if (content_len == 0)
819 break; /* all done! */
822 if (!(opt & WGET_OPT_QUIET))
825 if ((use_proxy == 0) && target.is_ftp) {
827 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
828 bb_error_msg_and_die("ftp error: %s", buf+4);
829 ftpcmd("QUIT", NULL, sfp, buf);