1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
9 #include <getopt.h> /* for struct option */
13 // May be used if we ever will want to free() all xstrdup()s...
14 /* char *allocated; */
23 /* Globals (can be accessed from signal handlers) */
25 off_t content_len; /* Content-length of the file */
26 off_t beg_range; /* Range at which continue begins */
27 #if ENABLE_FEATURE_WGET_STATUSBAR
30 off_t transferred; /* Number of bytes transferred so far */
31 const char *curfile; /* Name of current file being transferred */
32 unsigned lastupdate_sec;
35 smallint chunked; /* chunked transfer encoding */
37 #define G (*(struct globals*)&bb_common_bufsiz1)
38 struct BUG_G_too_big {
39 char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
41 #define content_len (G.content_len )
42 #define beg_range (G.beg_range )
43 #define lastsize (G.lastsize )
44 #define totalsize (G.totalsize )
45 #define transferred (G.transferred )
46 #define curfile (G.curfile )
47 #define lastupdate_sec (G.lastupdate_sec )
48 #define start_sec (G.start_sec )
49 #define chunked (G.chunked )
50 #define INIT_G() do { } while (0)
53 #if ENABLE_FEATURE_WGET_STATUSBAR
55 STALLTIME = 5 /* Seconds when xfer considered "stalled" */
58 static int getttywidth(void)
61 get_terminal_width_height(0, &width, NULL);
65 static void progressmeter(int flag)
67 /* We can be called from signal handler */
68 int save_errno = errno;
70 unsigned since_last_update, elapsed;
74 if (flag == -1) { /* first call to progressmeter */
75 start_sec = monotonic_sec();
76 lastupdate_sec = start_sec;
78 totalsize = content_len + beg_range; /* as content_len changes.. */
82 if (totalsize != 0 && !chunked) {
83 /* long long helps to have it working even if !LFS */
84 ratio = (unsigned) (100ULL * (transferred+beg_range) / totalsize);
85 if (ratio > 100) ratio = 100;
88 fprintf(stderr, "\r%-20.20s%4d%% ", curfile, ratio);
90 barlength = getttywidth() - 49;
92 /* god bless gcc for variable arrays :) */
93 i = barlength * ratio / 100;
98 fprintf(stderr, "|%s%*s|", buf, barlength - i, "");
102 abbrevsize = transferred + beg_range;
103 while (abbrevsize >= 100000) {
107 /* see http://en.wikipedia.org/wiki/Tera */
108 fprintf(stderr, "%6d%c ", (int)abbrevsize, " kMGTPEZY"[i]);
110 // Nuts! Ain't it easier to update progress meter ONLY when we transferred++?
112 elapsed = monotonic_sec();
113 since_last_update = elapsed - lastupdate_sec;
114 if (transferred > lastsize) {
115 lastupdate_sec = elapsed;
116 lastsize = transferred;
117 if (since_last_update >= STALLTIME) {
118 /* We "cut off" these seconds from elapsed time
119 * by adjusting start time */
120 start_sec += since_last_update;
122 since_last_update = 0; /* we are un-stalled now */
124 elapsed -= start_sec; /* now it's "elapsed since start" */
126 if (since_last_update >= STALLTIME) {
127 fprintf(stderr, " - stalled -");
129 off_t to_download = totalsize - beg_range;
130 if (transferred <= 0 || (int)elapsed <= 0 || transferred > to_download || chunked) {
131 fprintf(stderr, "--:--:-- ETA");
133 /* to_download / (transferred/elapsed) - elapsed: */
134 int eta = (int) ((unsigned long long)to_download*elapsed/transferred - elapsed);
135 /* (long long helps to have working ETA even if !LFS) */
137 fprintf(stderr, "%02d:%02d:%02d ETA", eta / 3600, i / 60, i % 60);
142 /* last call to progressmeter */
147 if (flag == -1) { /* first call to progressmeter */
148 signal_SA_RESTART_empty_mask(SIGALRM, progressmeter);
155 /* Original copyright notice which applies to the CONFIG_FEATURE_WGET_STATUSBAR stuff,
156 * much of which was blatantly stolen from openssh. */
158 * Copyright (c) 1992, 1993
159 * The Regents of the University of California. All rights reserved.
161 * Redistribution and use in source and binary forms, with or without
162 * modification, are permitted provided that the following conditions
164 * 1. Redistributions of source code must retain the above copyright
165 * notice, this list of conditions and the following disclaimer.
166 * 2. Redistributions in binary form must reproduce the above copyright
167 * notice, this list of conditions and the following disclaimer in the
168 * documentation and/or other materials provided with the distribution.
170 * 3. <BSD Advertising Clause omitted per the July 22, 1999 licensing change
171 * ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change>
173 * 4. Neither the name of the University nor the names of its contributors
174 * may be used to endorse or promote products derived from this software
175 * without specific prior written permission.
177 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
178 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
179 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
180 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
181 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
182 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
183 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
184 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
185 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
186 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
190 #else /* FEATURE_WGET_STATUSBAR */
192 static ALWAYS_INLINE void progressmeter(int flag ATTRIBUTE_UNUSED) { }
197 /* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
198 * and a short count if an eof or non-interrupt error is encountered. */
199 static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
202 char *p = (char*)ptr;
206 ret = fread(p, 1, nmemb, stream);
209 } while (nmemb && ferror(stream) && errno == EINTR);
211 return p - (char*)ptr;
214 /* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
215 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
216 static char *safe_fgets(char *s, int size, FILE *stream)
222 ret = fgets(s, size, stream);
223 } while (ret == NULL && ferror(stream) && errno == EINTR);
228 #if ENABLE_FEATURE_WGET_AUTHENTICATION
229 /* Base64-encode character string. buf is assumed to be char buf[512]. */
230 static char *base64enc_512(char buf[512], const char *str)
232 unsigned len = strlen(str);
233 if (len > 512/4*3 - 10) /* paranoia */
235 bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
241 static FILE *open_socket(len_and_sockaddr *lsa)
245 /* glibc 2.4 seems to try seeking on it - ??! */
246 /* hopefully it understands what ESPIPE means... */
247 fp = fdopen(xconnect_stream(lsa), "r+");
249 bb_perror_msg_and_die("fdopen");
255 static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
260 fprintf(fp, "%s%s\r\n", s1, s2);
267 if (fgets(buf, 510, fp) == NULL) {
268 bb_perror_msg_and_die("error getting response");
270 buf_ptr = strstr(buf, "\r\n");
274 } while (!isdigit(buf[0]) || buf[3] != ' ');
277 result = xatoi_u(buf);
283 static void parse_url(char *src_url, struct host_info *h)
287 /* h->allocated = */ url = xstrdup(src_url);
289 if (strncmp(url, "http://", 7) == 0) {
290 h->port = bb_lookup_port("http", "tcp", 80);
293 } else if (strncmp(url, "ftp://", 6) == 0) {
294 h->port = bb_lookup_port("ftp", "tcp", 21);
298 bb_error_msg_and_die("not an http or ftp url: %s", url);
301 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
302 // 'GET /?var=a/b HTTP 1.0'
303 // and saves 'index.html?var=a%2Fb' (we save 'b')
304 // wget 'http://busybox.net?login=john@doe':
305 // request: 'GET /?login=john@doe HTTP/1.0'
306 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
307 // wget 'http://busybox.net#test/test':
308 // request: 'GET / HTTP/1.0'
309 // saves: 'index.html' (we save 'test')
311 // We also don't add unique .N suffix if file exists...
312 sp = strchr(h->host, '/');
313 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
314 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
317 } else if (*sp == '/') {
320 } else { // '#' or '?'
321 // http://busybox.net?login=john@doe is a valid URL
322 // memmove converts to:
323 // http:/busybox.nett?login=john@doe...
324 memmove(h->host - 1, h->host, sp - h->host);
330 sp = strrchr(h->host, '@');
342 static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
349 /* retrieve header line */
350 if (fgets(buf, bufsiz, fp) == NULL)
353 /* see if we are at the end of the headers */
354 for (s = buf; *s == '\r'; ++s)
359 /* convert the header name to lower case */
360 for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s)
363 /* verify we are at the end of the header name */
365 bb_error_msg_and_die("bad header line: %s", buf);
367 /* locate the start of the header value */
369 hdrval = skip_whitespace(s);
371 /* locate the end of header */
372 while (*s && *s != '\r' && *s != '\n')
375 /* end of header found */
381 /* Rats! The buffer isn't big enough to hold the entire header value. */
382 while (c = getc(fp), c != EOF && c != '\n')
389 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
390 int wget_main(int argc ATTRIBUTE_UNUSED, char **argv)
393 struct host_info server, target;
394 len_and_sockaddr *lsa;
401 char *dir_prefix = NULL;
402 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
403 char *extra_headers = NULL;
404 llist_t *headers_llist = NULL;
406 FILE *sfp = NULL; /* socket to web/ftp server */
407 FILE *dfp; /* socket to ftp server (data) */
408 char *fname_out; /* where to direct output (-O) */
409 bool got_clen = 0; /* got content-length: from server */
411 bool use_proxy = 1; /* Use proxies if env vars are set */
412 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
413 const char *user_agent = "Wget";/* "User-Agent" header field */
415 static const char keywords[] ALIGN1 =
416 "content-length\0""transfer-encoding\0""chunked\0""location\0";
418 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
421 WGET_OPT_CONTINUE = 0x1,
422 WGET_OPT_SPIDER = 0x2,
423 WGET_OPT_QUIET = 0x4,
424 WGET_OPT_OUTNAME = 0x8,
425 WGET_OPT_PREFIX = 0x10,
426 WGET_OPT_PROXY = 0x20,
427 WGET_OPT_USER_AGENT = 0x40,
428 WGET_OPT_PASSIVE = 0x80,
429 WGET_OPT_HEADER = 0x100,
431 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
432 static const char wget_longopts[] ALIGN1 =
433 /* name, has_arg, val */
434 "continue\0" No_argument "c"
435 "spider\0" No_argument "s"
436 "quiet\0" No_argument "q"
437 "output-document\0" Required_argument "O"
438 "directory-prefix\0" Required_argument "P"
439 "proxy\0" Required_argument "Y"
440 "user-agent\0" Required_argument "U"
441 "passive-ftp\0" No_argument "\xff"
442 "header\0" Required_argument "\xfe"
448 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
449 applet_long_options = wget_longopts;
451 /* server.allocated = target.allocated = NULL; */
452 opt_complementary = "-1" USE_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
453 opt = getopt32(argv, "csqO:P:Y:U:",
454 &fname_out, &dir_prefix,
455 &proxy_flag, &user_agent
456 USE_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
458 if (strcmp(proxy_flag, "off") == 0) {
459 /* Use the proxy if necessary */
462 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
466 llist_t *ll = headers_llist;
468 size += strlen(ll->data) + 2;
471 extra_headers = cp = xmalloc(size);
472 while (headers_llist) {
473 cp += sprintf(cp, "%s\r\n", headers_llist->data);
474 headers_llist = headers_llist->link;
479 parse_url(argv[optind], &target);
480 server.host = target.host;
481 server.port = target.port;
483 /* Use the proxy if necessary */
485 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
486 if (proxy && *proxy) {
487 parse_url(proxy, &server);
493 /* Guess an output filename, if there was no -O FILE */
494 if (!(opt & WGET_OPT_OUTNAME)) {
495 fname_out = bb_get_last_path_component_nostrip(target.path);
496 /* handle "wget http://kernel.org//" */
497 if (fname_out[0] == '/' || !fname_out[0])
498 fname_out = (char*)"index.html";
499 /* -P DIR is considered only if there was no -O FILE */
501 fname_out = concat_path_file(dir_prefix, fname_out);
503 if (LONE_DASH(fname_out)) {
506 opt &= ~WGET_OPT_CONTINUE;
509 #if ENABLE_FEATURE_WGET_STATUSBAR
510 curfile = bb_get_last_path_component_nostrip(fname_out);
514 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
515 bb_error_msg_and_die("cannot specify continue (-c) without a filename (-O)"); */
517 /* Determine where to start transfer */
518 if (opt & WGET_OPT_CONTINUE) {
519 output_fd = open(fname_out, O_WRONLY);
520 if (output_fd >= 0) {
521 beg_range = xlseek(output_fd, 0, SEEK_END);
523 /* File doesn't exist. We do not create file here yet.
524 We are not sure it exists on remove side */
527 /* We want to do exactly _one_ DNS lookup, since some
528 * sites (i.e. ftp.us.debian.org) use round-robin DNS
529 * and we want to connect to only one IP... */
530 lsa = xhost2sockaddr(server.host, server.port);
531 if (!(opt & WGET_OPT_QUIET)) {
532 fprintf(stderr, "Connecting to %s (%s)\n", server.host,
533 xmalloc_sockaddr2dotted(&lsa->u.sa));
534 /* We leak result of xmalloc_sockaddr2dotted */
537 if (use_proxy || !target.is_ftp) {
546 bb_error_msg_and_die("too many redirections");
548 /* Open socket to http server */
549 if (sfp) fclose(sfp);
550 sfp = open_socket(lsa);
552 /* Send HTTP request. */
554 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
555 target.is_ftp ? "f" : "ht", target.host,
558 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
561 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
562 target.host, user_agent);
564 #if ENABLE_FEATURE_WGET_AUTHENTICATION
566 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
567 base64enc_512(buf, target.user));
569 if (use_proxy && server.user) {
570 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
571 base64enc_512(buf, server.user));
576 fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range);
577 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
579 fputs(extra_headers, sfp);
581 fprintf(sfp, "Connection: close\r\n\r\n");
584 * Retrieve HTTP response line and check for "200" status code.
587 if (fgets(buf, sizeof(buf), sfp) == NULL)
588 bb_error_msg_and_die("no response from server");
591 str = skip_non_whitespace(str);
592 str = skip_whitespace(str);
593 // FIXME: no error check
594 // xatou wouldn't work: "200 OK"
599 while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
600 /* eat all remaining headers */;
604 case 300: /* redirection */
614 /* Show first line only and kill any ESC tricks */
615 buf[strcspn(buf, "\n\r\x1b")] = '\0';
616 bb_error_msg_and_die("server returned error: %s", buf);
620 * Retrieve HTTP headers.
622 while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
623 /* gethdr did already convert the "FOO:" string to lowercase */
624 smalluint key = index_in_strings(keywords, *&buf) + 1;
625 if (key == KEY_content_length) {
626 content_len = BB_STRTOOFF(str, NULL, 10);
627 if (errno || content_len < 0) {
628 bb_error_msg_and_die("content-length %s is garbage", str);
633 if (key == KEY_transfer_encoding) {
634 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
635 bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
636 chunked = got_clen = 1;
638 if (key == KEY_location) {
640 /* free(target.allocated); */
641 target.path = /* target.allocated = */ xstrdup(str+1);
643 parse_url(str, &target);
644 if (use_proxy == 0) {
645 server.host = target.host;
646 server.port = target.port;
649 lsa = xhost2sockaddr(server.host, server.port);
654 } while (status >= 300);
664 target.user = xstrdup("anonymous:busybox@");
666 sfp = open_socket(lsa);
667 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
668 bb_error_msg_and_die("%s", buf+4);
671 * Splitting username:password pair,
674 str = strchr(target.user, ':');
677 switch (ftpcmd("USER ", target.user, sfp, buf)) {
681 if (ftpcmd("PASS ", str, sfp, buf) == 230)
683 /* fall through (failed login) */
685 bb_error_msg_and_die("ftp login: %s", buf+4);
688 ftpcmd("TYPE I", NULL, sfp, buf);
693 if (ftpcmd("SIZE ", target.path, sfp, buf) == 213) {
694 content_len = BB_STRTOOFF(buf+4, NULL, 10);
695 if (errno || content_len < 0) {
696 bb_error_msg_and_die("SIZE value is garbage");
702 * Entering passive mode
704 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
706 bb_error_msg_and_die("bad response to %s: %s", "PASV", buf);
708 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
709 // Server's IP is N1.N2.N3.N4 (we ignore it)
710 // Server's port for data connection is P1*256+P2
711 str = strrchr(buf, ')');
712 if (str) str[0] = '\0';
713 str = strrchr(buf, ',');
714 if (!str) goto pasv_error;
715 port = xatou_range(str+1, 0, 255);
717 str = strrchr(buf, ',');
718 if (!str) goto pasv_error;
719 port += xatou_range(str+1, 0, 255) * 256;
720 set_nport(lsa, htons(port));
721 dfp = open_socket(lsa);
724 sprintf(buf, "REST %"OFF_FMT"d", beg_range);
725 if (ftpcmd(buf, NULL, sfp, buf) == 350)
726 content_len -= beg_range;
729 if (ftpcmd("RETR ", target.path, sfp, buf) > 150)
730 bb_error_msg_and_die("bad response to %s: %s", "RETR", buf);
733 if (opt & WGET_OPT_SPIDER) {
734 if (ENABLE_FEATURE_CLEAN_UP)
743 /* Do it before progressmeter (want to have nice error message) */
745 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
746 /* compat with wget: -O FILE can overwrite */
747 if (opt & WGET_OPT_OUTNAME)
748 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
749 output_fd = xopen(fname_out, o_flags);
752 if (!(opt & WGET_OPT_QUIET))
758 /* Loops only if chunked */
760 while (content_len > 0 || !got_clen) {
762 unsigned rdsz = sizeof(buf);
764 if (content_len < sizeof(buf) && (chunked || got_clen))
765 rdsz = (unsigned)content_len;
766 n = safe_fread(buf, rdsz, dfp);
769 /* perror will not work: ferror doesn't set errno */
770 bb_error_msg_and_die(bb_msg_read_error);
774 xwrite(output_fd, buf, n);
775 #if ENABLE_FEATURE_WGET_STATUSBAR
785 safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
787 safe_fgets(buf, sizeof(buf), dfp);
788 content_len = STRTOOFF(buf, NULL, 16);
789 /* FIXME: error check? */
790 if (content_len == 0)
791 break; /* all done! */
794 if (!(opt & WGET_OPT_QUIET))
797 if ((use_proxy == 0) && target.is_ftp) {
799 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
800 bb_error_msg_and_die("ftp error: %s", buf+4);
801 ftpcmd("QUIT", NULL, sfp, buf);