1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
9 #include <getopt.h> /* for struct option */
13 // May be used if we ever will want to free() all xstrdup()s...
14 /* char *allocated; */
23 /* Globals (can be accessed from signal handlers) */
25 off_t content_len; /* Content-length of the file */
26 off_t beg_range; /* Range at which continue begins */
27 #if ENABLE_FEATURE_WGET_STATUSBAR
30 off_t transferred; /* Number of bytes transferred so far */
31 const char *curfile; /* Name of current file being transferred */
32 unsigned lastupdate_sec;
35 smallint chunked; /* chunked transfer encoding */
37 #define G (*(struct globals*)&bb_common_bufsiz1)
38 struct BUG_G_too_big {
39 char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
41 #define content_len (G.content_len )
42 #define beg_range (G.beg_range )
43 #define lastsize (G.lastsize )
44 #define totalsize (G.totalsize )
45 #define transferred (G.transferred )
46 #define curfile (G.curfile )
47 #define lastupdate_sec (G.lastupdate_sec )
48 #define start_sec (G.start_sec )
49 #define chunked (G.chunked )
50 #define INIT_G() do { } while (0)
53 #if ENABLE_FEATURE_WGET_STATUSBAR
55 STALLTIME = 5 /* Seconds when xfer considered "stalled" */
58 static int getttywidth(void)
61 get_terminal_width_height(0, &width, NULL);
65 static void progressmeter(int flag)
67 /* We can be called from signal handler */
68 int save_errno = errno;
70 unsigned since_last_update, elapsed;
74 if (flag == -1) { /* first call to progressmeter */
75 start_sec = monotonic_sec();
76 lastupdate_sec = start_sec;
78 totalsize = content_len + beg_range; /* as content_len changes.. */
82 if (totalsize != 0 && !chunked) {
83 /* long long helps to have it working even if !LFS */
84 ratio = (unsigned) (100ULL * (transferred+beg_range) / totalsize);
85 if (ratio > 100) ratio = 100;
88 fprintf(stderr, "\r%-20.20s%4d%% ", curfile, ratio);
90 barlength = getttywidth() - 49;
92 /* god bless gcc for variable arrays :) */
93 i = barlength * ratio / 100;
98 fprintf(stderr, "|%s%*s|", buf, barlength - i, "");
102 abbrevsize = transferred + beg_range;
103 while (abbrevsize >= 100000) {
107 /* see http://en.wikipedia.org/wiki/Tera */
108 fprintf(stderr, "%6d%c ", (int)abbrevsize, " kMGTPEZY"[i]);
110 // Nuts! Ain't it easier to update progress meter ONLY when we transferred++?
112 elapsed = monotonic_sec();
113 since_last_update = elapsed - lastupdate_sec;
114 if (transferred > lastsize) {
115 lastupdate_sec = elapsed;
116 lastsize = transferred;
117 if (since_last_update >= STALLTIME) {
118 /* We "cut off" these seconds from elapsed time
119 * by adjusting start time */
120 start_sec += since_last_update;
122 since_last_update = 0; /* we are un-stalled now */
124 elapsed -= start_sec; /* now it's "elapsed since start" */
126 if (since_last_update >= STALLTIME) {
127 fprintf(stderr, " - stalled -");
129 off_t to_download = totalsize - beg_range;
130 if (transferred <= 0 || (int)elapsed <= 0 || transferred > to_download || chunked) {
131 fprintf(stderr, "--:--:-- ETA");
133 /* to_download / (transferred/elapsed) - elapsed: */
134 int eta = (int) ((unsigned long long)to_download*elapsed/transferred - elapsed);
135 /* (long long helps to have working ETA even if !LFS) */
137 fprintf(stderr, "%02d:%02d:%02d ETA", eta / 3600, i / 60, i % 60);
142 /* last call to progressmeter */
148 /* first call to progressmeter */
150 sa.sa_handler = progressmeter;
151 sigemptyset(&sa.sa_mask);
152 sa.sa_flags = SA_RESTART;
153 sigaction(SIGALRM, &sa, NULL);
160 /* Original copyright notice which applies to the CONFIG_FEATURE_WGET_STATUSBAR stuff,
161 * much of which was blatantly stolen from openssh. */
163 * Copyright (c) 1992, 1993
164 * The Regents of the University of California. All rights reserved.
166 * Redistribution and use in source and binary forms, with or without
167 * modification, are permitted provided that the following conditions
169 * 1. Redistributions of source code must retain the above copyright
170 * notice, this list of conditions and the following disclaimer.
171 * 2. Redistributions in binary form must reproduce the above copyright
172 * notice, this list of conditions and the following disclaimer in the
173 * documentation and/or other materials provided with the distribution.
175 * 3. <BSD Advertising Clause omitted per the July 22, 1999 licensing change
176 * ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change>
178 * 4. Neither the name of the University nor the names of its contributors
179 * may be used to endorse or promote products derived from this software
180 * without specific prior written permission.
182 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
183 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
184 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
185 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
186 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
187 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
188 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
189 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
190 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
191 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
195 #else /* FEATURE_WGET_STATUSBAR */
197 static ALWAYS_INLINE void progressmeter(int flag) { }
202 /* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
203 * and a short count if an eof or non-interrupt error is encountered. */
204 static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
207 char *p = (char*)ptr;
211 ret = fread(p, 1, nmemb, stream);
214 } while (nmemb && ferror(stream) && errno == EINTR);
216 return p - (char*)ptr;
219 /* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
220 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
221 static char *safe_fgets(char *s, int size, FILE *stream)
227 ret = fgets(s, size, stream);
228 } while (ret == NULL && ferror(stream) && errno == EINTR);
233 #if ENABLE_FEATURE_WGET_AUTHENTICATION
234 /* Base64-encode character string. buf is assumed to be char buf[512]. */
235 static char *base64enc_512(char buf[512], const char *str)
237 unsigned len = strlen(str);
238 if (len > 512/4*3 - 10) /* paranoia */
240 bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
246 static FILE *open_socket(len_and_sockaddr *lsa)
250 /* glibc 2.4 seems to try seeking on it - ??! */
251 /* hopefully it understands what ESPIPE means... */
252 fp = fdopen(xconnect_stream(lsa), "r+");
254 bb_perror_msg_and_die("fdopen");
260 static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
265 fprintf(fp, "%s%s\r\n", s1, s2);
272 if (fgets(buf, 510, fp) == NULL) {
273 bb_perror_msg_and_die("error getting response");
275 buf_ptr = strstr(buf, "\r\n");
279 } while (!isdigit(buf[0]) || buf[3] != ' ');
282 result = xatoi_u(buf);
288 static void parse_url(char *src_url, struct host_info *h)
292 /* h->allocated = */ url = xstrdup(src_url);
294 if (strncmp(url, "http://", 7) == 0) {
295 h->port = bb_lookup_port("http", "tcp", 80);
298 } else if (strncmp(url, "ftp://", 6) == 0) {
299 h->port = bb_lookup_port("ftp", "tcp", 21);
303 bb_error_msg_and_die("not an http or ftp url: %s", url);
306 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
307 // 'GET /?var=a/b HTTP 1.0'
308 // and saves 'index.html?var=a%2Fb' (we save 'b')
309 // wget 'http://busybox.net?login=john@doe':
310 // request: 'GET /?login=john@doe HTTP/1.0'
311 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
312 // wget 'http://busybox.net#test/test':
313 // request: 'GET / HTTP/1.0'
314 // saves: 'index.html' (we save 'test')
316 // We also don't add unique .N suffix if file exists...
317 sp = strchr(h->host, '/');
318 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
319 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
322 } else if (*sp == '/') {
325 } else { // '#' or '?'
326 // http://busybox.net?login=john@doe is a valid URL
327 // memmove converts to:
328 // http:/busybox.nett?login=john@doe...
329 memmove(h->host - 1, h->host, sp - h->host);
335 sp = strrchr(h->host, '@');
347 static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
354 /* retrieve header line */
355 if (fgets(buf, bufsiz, fp) == NULL)
358 /* see if we are at the end of the headers */
359 for (s = buf; *s == '\r'; ++s)
364 /* convert the header name to lower case */
365 for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s)
368 /* verify we are at the end of the header name */
370 bb_error_msg_and_die("bad header line: %s", buf);
372 /* locate the start of the header value */
374 hdrval = skip_whitespace(s);
376 /* locate the end of header */
377 while (*s && *s != '\r' && *s != '\n')
380 /* end of header found */
386 /* Rats! The buffer isn't big enough to hold the entire header value. */
387 while (c = getc(fp), c != EOF && c != '\n')
394 int wget_main(int argc, char **argv);
395 int wget_main(int argc, char **argv)
398 struct host_info server, target;
399 len_and_sockaddr *lsa;
406 char *dir_prefix = NULL;
407 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
408 char *extra_headers = NULL;
409 llist_t *headers_llist = NULL;
411 FILE *sfp = NULL; /* socket to web/ftp server */
412 FILE *dfp = NULL; /* socket to ftp server (data) */
413 char *fname_out = NULL; /* where to direct output (-O) */
414 bool got_clen = 0; /* got content-length: from server */
416 bool use_proxy = 1; /* Use proxies if env vars are set */
417 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
418 const char *user_agent = "Wget";/* "User-Agent" header field */
420 static const char keywords[] ALIGN1 =
421 "content-length\0""transfer-encoding\0""chunked\0""location\0";
423 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
426 WGET_OPT_CONTINUE = 0x1,
427 WGET_OPT_SPIDER = 0x2,
428 WGET_OPT_QUIET = 0x4,
429 WGET_OPT_OUTNAME = 0x8,
430 WGET_OPT_PREFIX = 0x10,
431 WGET_OPT_PROXY = 0x20,
432 WGET_OPT_USER_AGENT = 0x40,
433 WGET_OPT_PASSIVE = 0x80,
434 WGET_OPT_HEADER = 0x100,
436 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
437 static const char wget_longopts[] ALIGN1 =
438 /* name, has_arg, val */
439 "continue\0" No_argument "c"
440 "spider\0" No_argument "s"
441 "quiet\0" No_argument "q"
442 "output-document\0" Required_argument "O"
443 "directory-prefix\0" Required_argument "P"
444 "proxy\0" Required_argument "Y"
445 "user-agent\0" Required_argument "U"
446 "passive-ftp\0" No_argument "\xff"
447 "header\0" Required_argument "\xfe"
453 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
454 applet_long_options = wget_longopts;
456 /* server.allocated = target.allocated = NULL; */
457 opt_complementary = "-1" USE_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
458 opt = getopt32(argv, "csqO:P:Y:U:",
459 &fname_out, &dir_prefix,
460 &proxy_flag, &user_agent
461 USE_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
463 if (strcmp(proxy_flag, "off") == 0) {
464 /* Use the proxy if necessary */
467 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
471 llist_t *ll = headers_llist;
473 size += strlen(ll->data) + 2;
476 extra_headers = cp = xmalloc(size);
477 while (headers_llist) {
478 cp += sprintf(cp, "%s\r\n", headers_llist->data);
479 headers_llist = headers_llist->link;
484 parse_url(argv[optind], &target);
485 server.host = target.host;
486 server.port = target.port;
488 /* Use the proxy if necessary */
490 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
491 if (proxy && *proxy) {
492 parse_url(proxy, &server);
498 /* Guess an output filename, if there was no -O FILE */
500 fname_out = bb_get_last_path_component_nostrip(target.path);
501 /* handle "wget http://kernel.org//" */
502 if (fname_out[0] == '/' || !fname_out[0])
503 fname_out = (char*)"index.html";
504 /* -P DIR is considered only if there was no -O FILE */
506 fname_out = concat_path_file(dir_prefix, fname_out);
508 #if ENABLE_FEATURE_WGET_STATUSBAR
509 curfile = bb_get_last_path_component_nostrip(fname_out);
513 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
514 bb_error_msg_and_die("cannot specify continue (-c) without a filename (-O)"); */
516 /* Determine where to start transfer */
517 if (LONE_DASH(fname_out)) {
519 opt &= ~WGET_OPT_CONTINUE;
521 if (opt & WGET_OPT_CONTINUE) {
522 output_fd = open(fname_out, O_WRONLY);
523 if (output_fd >= 0) {
524 beg_range = xlseek(output_fd, 0, SEEK_END);
526 /* File doesn't exist. We do not create file here yet.
527 We are not sure it exists on remove side */
530 /* We want to do exactly _one_ DNS lookup, since some
531 * sites (i.e. ftp.us.debian.org) use round-robin DNS
532 * and we want to connect to only one IP... */
533 lsa = xhost2sockaddr(server.host, server.port);
534 if (!(opt & WGET_OPT_QUIET)) {
535 fprintf(stderr, "Connecting to %s (%s)\n", server.host,
536 xmalloc_sockaddr2dotted(&lsa->sa));
537 /* We leak result of xmalloc_sockaddr2dotted */
540 if (use_proxy || !target.is_ftp) {
549 bb_error_msg_and_die("too many redirections");
551 /* Open socket to http server */
552 if (sfp) fclose(sfp);
553 sfp = open_socket(lsa);
555 /* Send HTTP request. */
557 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
558 target.is_ftp ? "f" : "ht", target.host,
561 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
564 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
565 target.host, user_agent);
567 #if ENABLE_FEATURE_WGET_AUTHENTICATION
569 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
570 base64enc_512(buf, target.user));
572 if (use_proxy && server.user) {
573 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
574 base64enc_512(buf, server.user));
579 fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range);
580 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
582 fputs(extra_headers, sfp);
584 fprintf(sfp, "Connection: close\r\n\r\n");
587 * Retrieve HTTP response line and check for "200" status code.
590 if (fgets(buf, sizeof(buf), sfp) == NULL)
591 bb_error_msg_and_die("no response from server");
594 str = skip_non_whitespace(str);
595 str = skip_whitespace(str);
596 // FIXME: no error check
597 // xatou wouldn't work: "200 OK"
602 while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
603 /* eat all remaining headers */;
607 case 300: /* redirection */
617 /* Show first line only and kill any ESC tricks */
618 buf[strcspn(buf, "\n\r\x1b")] = '\0';
619 bb_error_msg_and_die("server returned error: %s", buf);
623 * Retrieve HTTP headers.
625 while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
626 /* gethdr did already convert the "FOO:" string to lowercase */
627 smalluint key = index_in_strings(keywords, *&buf) + 1;
628 if (key == KEY_content_length) {
629 content_len = BB_STRTOOFF(str, NULL, 10);
630 if (errno || content_len < 0) {
631 bb_error_msg_and_die("content-length %s is garbage", str);
636 if (key == KEY_transfer_encoding) {
637 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
638 bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
639 chunked = got_clen = 1;
641 if (key == KEY_location) {
643 /* free(target.allocated); */
644 target.path = /* target.allocated = */ xstrdup(str+1);
646 parse_url(str, &target);
647 if (use_proxy == 0) {
648 server.host = target.host;
649 server.port = target.port;
652 lsa = xhost2sockaddr(server.host, server.port);
657 } while (status >= 300);
667 target.user = xstrdup("anonymous:busybox@");
669 sfp = open_socket(lsa);
670 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
671 bb_error_msg_and_die("%s", buf+4);
674 * Splitting username:password pair,
677 str = strchr(target.user, ':');
680 switch (ftpcmd("USER ", target.user, sfp, buf)) {
684 if (ftpcmd("PASS ", str, sfp, buf) == 230)
686 /* FALLTHRU (failed login) */
688 bb_error_msg_and_die("ftp login: %s", buf+4);
691 ftpcmd("TYPE I", NULL, sfp, buf);
696 if (ftpcmd("SIZE ", target.path, sfp, buf) == 213) {
697 content_len = BB_STRTOOFF(buf+4, NULL, 10);
698 if (errno || content_len < 0) {
699 bb_error_msg_and_die("SIZE value is garbage");
705 * Entering passive mode
707 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
709 bb_error_msg_and_die("bad response to %s: %s", "PASV", buf);
711 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
712 // Server's IP is N1.N2.N3.N4 (we ignore it)
713 // Server's port for data connection is P1*256+P2
714 str = strrchr(buf, ')');
715 if (str) str[0] = '\0';
716 str = strrchr(buf, ',');
717 if (!str) goto pasv_error;
718 port = xatou_range(str+1, 0, 255);
720 str = strrchr(buf, ',');
721 if (!str) goto pasv_error;
722 port += xatou_range(str+1, 0, 255) * 256;
723 set_nport(lsa, htons(port));
724 dfp = open_socket(lsa);
727 sprintf(buf, "REST %"OFF_FMT"d", beg_range);
728 if (ftpcmd(buf, NULL, sfp, buf) == 350)
729 content_len -= beg_range;
732 if (ftpcmd("RETR ", target.path, sfp, buf) > 150)
733 bb_error_msg_and_die("bad response to %s: %s", "RETR", buf);
736 if (opt & WGET_OPT_SPIDER) {
737 if (ENABLE_FEATURE_CLEAN_UP)
746 /* Do it before progressmeter (want to have nice error message) */
748 output_fd = xopen(fname_out,
749 O_WRONLY|O_CREAT|O_EXCL|O_TRUNC);
751 if (!(opt & WGET_OPT_QUIET))
757 /* Loops only if chunked */
759 while (content_len > 0 || !got_clen) {
761 unsigned rdsz = sizeof(buf);
763 if (content_len < sizeof(buf) && (chunked || got_clen))
764 rdsz = (unsigned)content_len;
765 n = safe_fread(buf, rdsz, dfp);
768 /* perror will not work: ferror doesn't set errno */
769 bb_error_msg_and_die(bb_msg_read_error);
773 xwrite(output_fd, buf, n);
774 #if ENABLE_FEATURE_WGET_STATUSBAR
784 safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
786 safe_fgets(buf, sizeof(buf), dfp);
787 content_len = STRTOOFF(buf, NULL, 16);
788 /* FIXME: error check? */
789 if (content_len == 0)
790 break; /* all done! */
793 if (!(opt & WGET_OPT_QUIET))
796 if ((use_proxy == 0) && target.is_ftp) {
798 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
799 bb_error_msg_and_die("ftp error: %s", buf+4);
800 ftpcmd("QUIT", NULL, sfp, buf);