1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
12 // May be used if we ever will want to free() all xstrdup()s...
13 /* char *allocated; */
22 /* Globals (can be accessed from signal handlers) */
24 off_t content_len; /* Content-length of the file */
25 off_t beg_range; /* Range at which continue begins */
26 #if ENABLE_FEATURE_WGET_STATUSBAR
29 off_t transferred; /* Number of bytes transferred so far */
30 const char *curfile; /* Name of current file being transferred */
31 unsigned lastupdate_sec;
34 smallint chunked; /* chunked transfer encoding */
36 #define G (*(struct globals*)&bb_common_bufsiz1)
37 struct BUG_G_too_big {
38 char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
40 #define content_len (G.content_len )
41 #define beg_range (G.beg_range )
42 #define lastsize (G.lastsize )
43 #define totalsize (G.totalsize )
44 #define transferred (G.transferred )
45 #define curfile (G.curfile )
46 #define lastupdate_sec (G.lastupdate_sec )
47 #define start_sec (G.start_sec )
48 #define chunked (G.chunked )
49 #define INIT_G() do { } while (0)
52 #if ENABLE_FEATURE_WGET_STATUSBAR
54 STALLTIME = 5 /* Seconds when xfer considered "stalled" */
57 static unsigned int get_tty2_width(void)
60 get_terminal_width_height(2, &width, NULL);
64 static void progress_meter(int flag)
66 /* We can be called from signal handler */
67 int save_errno = errno;
69 unsigned since_last_update, elapsed;
73 if (flag == -1) { /* first call to progress_meter */
74 start_sec = monotonic_sec();
75 lastupdate_sec = start_sec;
77 totalsize = content_len + beg_range; /* as content_len changes.. */
81 if (totalsize != 0 && !chunked) {
82 /* long long helps to have it working even if !LFS */
83 ratio = (unsigned) (100ULL * (transferred+beg_range) / totalsize);
84 if (ratio > 100) ratio = 100;
87 fprintf(stderr, "\r%-20.20s%4d%% ", curfile, ratio);
89 barlength = get_tty2_width() - 49;
91 /* god bless gcc for variable arrays :) */
92 i = barlength * ratio / 100;
97 fprintf(stderr, "|%s%*s|", buf, barlength - i, "");
101 abbrevsize = transferred + beg_range;
102 while (abbrevsize >= 100000) {
106 /* see http://en.wikipedia.org/wiki/Tera */
107 fprintf(stderr, "%6d%c ", (int)abbrevsize, " kMGTPEZY"[i]);
109 // Nuts! Ain't it easier to update progress meter ONLY when we transferred++?
111 elapsed = monotonic_sec();
112 since_last_update = elapsed - lastupdate_sec;
113 if (transferred > lastsize) {
114 lastupdate_sec = elapsed;
115 lastsize = transferred;
116 if (since_last_update >= STALLTIME) {
117 /* We "cut off" these seconds from elapsed time
118 * by adjusting start time */
119 start_sec += since_last_update;
121 since_last_update = 0; /* we are un-stalled now */
123 elapsed -= start_sec; /* now it's "elapsed since start" */
125 if (since_last_update >= STALLTIME) {
126 fprintf(stderr, " - stalled -");
128 off_t to_download = totalsize - beg_range;
129 if (transferred <= 0 || (int)elapsed <= 0 || transferred > to_download || chunked) {
130 fprintf(stderr, "--:--:-- ETA");
132 /* to_download / (transferred/elapsed) - elapsed: */
133 int eta = (int) ((unsigned long long)to_download*elapsed/transferred - elapsed);
134 /* (long long helps to have working ETA even if !LFS) */
136 fprintf(stderr, "%02d:%02d:%02d ETA", eta / 3600, i / 60, i % 60);
141 /* last call to progress_meter */
146 if (flag == -1) { /* first call to progress_meter */
147 signal_SA_RESTART_empty_mask(SIGALRM, progress_meter);
154 /* Original copyright notice which applies to the CONFIG_FEATURE_WGET_STATUSBAR stuff,
155 * much of which was blatantly stolen from openssh. */
157 * Copyright (c) 1992, 1993
158 * The Regents of the University of California. All rights reserved.
160 * Redistribution and use in source and binary forms, with or without
161 * modification, are permitted provided that the following conditions
163 * 1. Redistributions of source code must retain the above copyright
164 * notice, this list of conditions and the following disclaimer.
165 * 2. Redistributions in binary form must reproduce the above copyright
166 * notice, this list of conditions and the following disclaimer in the
167 * documentation and/or other materials provided with the distribution.
169 * 3. <BSD Advertising Clause omitted per the July 22, 1999 licensing change
170 * ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change>
172 * 4. Neither the name of the University nor the names of its contributors
173 * may be used to endorse or promote products derived from this software
174 * without specific prior written permission.
176 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
177 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
178 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
179 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
180 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
181 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
182 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
183 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
184 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
185 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
189 #else /* FEATURE_WGET_STATUSBAR */
191 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
196 /* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
197 * and a short count if an eof or non-interrupt error is encountered. */
198 static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
201 char *p = (char*)ptr;
206 ret = fread(p, 1, nmemb, stream);
209 } while (nmemb && ferror(stream) && errno == EINTR);
211 return p - (char*)ptr;
214 /* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
215 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
216 static char *safe_fgets(char *s, int size, FILE *stream)
223 ret = fgets(s, size, stream);
224 } while (ret == NULL && ferror(stream) && errno == EINTR);
229 #if ENABLE_FEATURE_WGET_AUTHENTICATION
230 /* Base64-encode character string. buf is assumed to be char buf[512]. */
231 static char *base64enc_512(char buf[512], const char *str)
233 unsigned len = strlen(str);
234 if (len > 512/4*3 - 10) /* paranoia */
236 bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
242 static FILE *open_socket(len_and_sockaddr *lsa)
246 /* glibc 2.4 seems to try seeking on it - ??! */
247 /* hopefully it understands what ESPIPE means... */
248 fp = fdopen(xconnect_stream(lsa), "r+");
250 bb_perror_msg_and_die("fdopen");
256 static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
261 fprintf(fp, "%s%s\r\n", s1, s2);
268 if (fgets(buf, 510, fp) == NULL) {
269 bb_perror_msg_and_die("error getting response");
271 buf_ptr = strstr(buf, "\r\n");
275 } while (!isdigit(buf[0]) || buf[3] != ' ');
278 result = xatoi_u(buf);
284 static void parse_url(char *src_url, struct host_info *h)
288 /* h->allocated = */ url = xstrdup(src_url);
290 if (strncmp(url, "http://", 7) == 0) {
291 h->port = bb_lookup_port("http", "tcp", 80);
294 } else if (strncmp(url, "ftp://", 6) == 0) {
295 h->port = bb_lookup_port("ftp", "tcp", 21);
299 bb_error_msg_and_die("not an http or ftp url: %s", url);
302 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
303 // 'GET /?var=a/b HTTP 1.0'
304 // and saves 'index.html?var=a%2Fb' (we save 'b')
305 // wget 'http://busybox.net?login=john@doe':
306 // request: 'GET /?login=john@doe HTTP/1.0'
307 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
308 // wget 'http://busybox.net#test/test':
309 // request: 'GET / HTTP/1.0'
310 // saves: 'index.html' (we save 'test')
312 // We also don't add unique .N suffix if file exists...
313 sp = strchr(h->host, '/');
314 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
315 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
318 } else if (*sp == '/') {
321 } else { // '#' or '?'
322 // http://busybox.net?login=john@doe is a valid URL
323 // memmove converts to:
324 // http:/busybox.nett?login=john@doe...
325 memmove(h->host - 1, h->host, sp - h->host);
331 sp = strrchr(h->host, '@');
343 static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
350 /* retrieve header line */
351 if (fgets(buf, bufsiz, fp) == NULL)
354 /* see if we are at the end of the headers */
355 for (s = buf; *s == '\r'; ++s)
360 /* convert the header name to lower case */
361 for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s)
364 /* verify we are at the end of the header name */
366 bb_error_msg_and_die("bad header line: %s", buf);
368 /* locate the start of the header value */
370 hdrval = skip_whitespace(s);
372 /* locate the end of header */
373 while (*s && *s != '\r' && *s != '\n')
376 /* end of header found */
382 /* Rats! The buffer isn't big enough to hold the entire header value. */
383 while (c = getc(fp), c != EOF && c != '\n')
390 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
391 int wget_main(int argc UNUSED_PARAM, char **argv)
394 struct host_info server, target;
395 len_and_sockaddr *lsa;
402 char *dir_prefix = NULL;
403 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
404 char *extra_headers = NULL;
405 llist_t *headers_llist = NULL;
407 FILE *sfp = NULL; /* socket to web/ftp server */
408 FILE *dfp; /* socket to ftp server (data) */
409 char *fname_out; /* where to direct output (-O) */
410 bool got_clen = 0; /* got content-length: from server */
412 bool use_proxy = 1; /* Use proxies if env vars are set */
413 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
414 const char *user_agent = "Wget";/* "User-Agent" header field */
416 static const char keywords[] ALIGN1 =
417 "content-length\0""transfer-encoding\0""chunked\0""location\0";
419 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
422 WGET_OPT_CONTINUE = 0x1,
423 WGET_OPT_SPIDER = 0x2,
424 WGET_OPT_QUIET = 0x4,
425 WGET_OPT_OUTNAME = 0x8,
426 WGET_OPT_PREFIX = 0x10,
427 WGET_OPT_PROXY = 0x20,
428 WGET_OPT_USER_AGENT = 0x40,
429 WGET_OPT_PASSIVE = 0x80,
430 WGET_OPT_HEADER = 0x100,
432 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
433 static const char wget_longopts[] ALIGN1 =
434 /* name, has_arg, val */
435 "continue\0" No_argument "c"
436 "spider\0" No_argument "s"
437 "quiet\0" No_argument "q"
438 "output-document\0" Required_argument "O"
439 "directory-prefix\0" Required_argument "P"
440 "proxy\0" Required_argument "Y"
441 "user-agent\0" Required_argument "U"
442 "passive-ftp\0" No_argument "\xff"
443 "header\0" Required_argument "\xfe"
449 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
450 applet_long_options = wget_longopts;
452 /* server.allocated = target.allocated = NULL; */
453 opt_complementary = "-1" USE_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
454 opt = getopt32(argv, "csqO:P:Y:U:" /*ignored:*/ "t:T:",
455 &fname_out, &dir_prefix,
456 &proxy_flag, &user_agent,
457 NULL, /* -t RETRIES */
458 NULL /* -T NETWORK_READ_TIMEOUT */
459 USE_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
461 if (strcmp(proxy_flag, "off") == 0) {
462 /* Use the proxy if necessary */
465 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
469 llist_t *ll = headers_llist;
471 size += strlen(ll->data) + 2;
474 extra_headers = cp = xmalloc(size);
475 while (headers_llist) {
476 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
481 parse_url(argv[optind], &target);
482 server.host = target.host;
483 server.port = target.port;
485 /* Use the proxy if necessary */
487 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
488 if (proxy && *proxy) {
489 parse_url(proxy, &server);
495 /* Guess an output filename, if there was no -O FILE */
496 if (!(opt & WGET_OPT_OUTNAME)) {
497 fname_out = bb_get_last_path_component_nostrip(target.path);
498 /* handle "wget http://kernel.org//" */
499 if (fname_out[0] == '/' || !fname_out[0])
500 fname_out = (char*)"index.html";
501 /* -P DIR is considered only if there was no -O FILE */
503 fname_out = concat_path_file(dir_prefix, fname_out);
505 if (LONE_DASH(fname_out)) {
508 opt &= ~WGET_OPT_CONTINUE;
511 #if ENABLE_FEATURE_WGET_STATUSBAR
512 curfile = bb_get_last_path_component_nostrip(fname_out);
516 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
517 bb_error_msg_and_die("cannot specify continue (-c) without a filename (-O)"); */
519 /* Determine where to start transfer */
520 if (opt & WGET_OPT_CONTINUE) {
521 output_fd = open(fname_out, O_WRONLY);
522 if (output_fd >= 0) {
523 beg_range = xlseek(output_fd, 0, SEEK_END);
525 /* File doesn't exist. We do not create file here yet.
526 We are not sure it exists on remove side */
529 /* We want to do exactly _one_ DNS lookup, since some
530 * sites (i.e. ftp.us.debian.org) use round-robin DNS
531 * and we want to connect to only one IP... */
532 lsa = xhost2sockaddr(server.host, server.port);
533 if (!(opt & WGET_OPT_QUIET)) {
534 fprintf(stderr, "Connecting to %s (%s)\n", server.host,
535 xmalloc_sockaddr2dotted(&lsa->u.sa));
536 /* We leak result of xmalloc_sockaddr2dotted */
539 if (use_proxy || !target.is_ftp) {
548 bb_error_msg_and_die("too many redirections");
550 /* Open socket to http server */
551 if (sfp) fclose(sfp);
552 sfp = open_socket(lsa);
554 /* Send HTTP request. */
556 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
557 target.is_ftp ? "f" : "ht", target.host,
560 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
563 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
564 target.host, user_agent);
566 #if ENABLE_FEATURE_WGET_AUTHENTICATION
568 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
569 base64enc_512(buf, target.user));
571 if (use_proxy && server.user) {
572 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
573 base64enc_512(buf, server.user));
578 fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range);
579 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
581 fputs(extra_headers, sfp);
583 fprintf(sfp, "Connection: close\r\n\r\n");
586 * Retrieve HTTP response line and check for "200" status code.
589 if (fgets(buf, sizeof(buf), sfp) == NULL)
590 bb_error_msg_and_die("no response from server");
593 str = skip_non_whitespace(str);
594 str = skip_whitespace(str);
595 // FIXME: no error check
596 // xatou wouldn't work: "200 OK"
601 while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
602 /* eat all remaining headers */;
606 Response 204 doesn't say "null file", it says "metadata
607 has changed but data didn't":
609 "10.2.5 204 No Content
610 The server has fulfilled the request but does not need to return
611 an entity-body, and might want to return updated metainformation.
612 The response MAY include new or updated metainformation in the form
613 of entity-headers, which if present SHOULD be associated with
614 the requested variant.
616 If the client is a user agent, it SHOULD NOT change its document
617 view from that which caused the request to be sent. This response
618 is primarily intended to allow input for actions to take place
619 without causing a change to the user agent's active document view,
620 although any new or updated metainformation SHOULD be applied
621 to the document currently in the user agent's active view.
623 The 204 response MUST NOT include a message-body, and thus
624 is always terminated by the first empty line after the header fields."
626 However, in real world it was observed that some web servers
627 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
631 case 300: /* redirection */
641 /* Show first line only and kill any ESC tricks */
642 buf[strcspn(buf, "\n\r\x1b")] = '\0';
643 bb_error_msg_and_die("server returned error: %s", buf);
647 * Retrieve HTTP headers.
649 while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
650 /* gethdr did already convert the "FOO:" string to lowercase */
651 smalluint key = index_in_strings(keywords, *&buf) + 1;
652 if (key == KEY_content_length) {
653 content_len = BB_STRTOOFF(str, NULL, 10);
654 if (errno || content_len < 0) {
655 bb_error_msg_and_die("content-length %s is garbage", str);
660 if (key == KEY_transfer_encoding) {
661 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
662 bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
663 chunked = got_clen = 1;
665 if (key == KEY_location) {
667 /* free(target.allocated); */
668 target.path = /* target.allocated = */ xstrdup(str+1);
670 parse_url(str, &target);
671 if (use_proxy == 0) {
672 server.host = target.host;
673 server.port = target.port;
676 lsa = xhost2sockaddr(server.host, server.port);
681 } while (status >= 300);
691 target.user = xstrdup("anonymous:busybox@");
693 sfp = open_socket(lsa);
694 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
695 bb_error_msg_and_die("%s", buf+4);
698 * Splitting username:password pair,
701 str = strchr(target.user, ':');
704 switch (ftpcmd("USER ", target.user, sfp, buf)) {
708 if (ftpcmd("PASS ", str, sfp, buf) == 230)
710 /* fall through (failed login) */
712 bb_error_msg_and_die("ftp login: %s", buf+4);
715 ftpcmd("TYPE I", NULL, sfp, buf);
720 if (ftpcmd("SIZE ", target.path, sfp, buf) == 213) {
721 content_len = BB_STRTOOFF(buf+4, NULL, 10);
722 if (errno || content_len < 0) {
723 bb_error_msg_and_die("SIZE value is garbage");
729 * Entering passive mode
731 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
733 bb_error_msg_and_die("bad response to %s: %s", "PASV", buf);
735 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
736 // Server's IP is N1.N2.N3.N4 (we ignore it)
737 // Server's port for data connection is P1*256+P2
738 str = strrchr(buf, ')');
739 if (str) str[0] = '\0';
740 str = strrchr(buf, ',');
741 if (!str) goto pasv_error;
742 port = xatou_range(str+1, 0, 255);
744 str = strrchr(buf, ',');
745 if (!str) goto pasv_error;
746 port += xatou_range(str+1, 0, 255) * 256;
747 set_nport(lsa, htons(port));
748 dfp = open_socket(lsa);
751 sprintf(buf, "REST %"OFF_FMT"d", beg_range);
752 if (ftpcmd(buf, NULL, sfp, buf) == 350)
753 content_len -= beg_range;
756 if (ftpcmd("RETR ", target.path, sfp, buf) > 150)
757 bb_error_msg_and_die("bad response to %s: %s", "RETR", buf);
760 if (opt & WGET_OPT_SPIDER) {
761 if (ENABLE_FEATURE_CLEAN_UP)
770 /* Do it before progress_meter (want to have nice error message) */
772 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
773 /* compat with wget: -O FILE can overwrite */
774 if (opt & WGET_OPT_OUTNAME)
775 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
776 output_fd = xopen(fname_out, o_flags);
779 if (!(opt & WGET_OPT_QUIET))
785 /* Loops only if chunked */
787 while (content_len > 0 || !got_clen) {
789 unsigned rdsz = sizeof(buf);
791 if (content_len < sizeof(buf) && (chunked || got_clen))
792 rdsz = (unsigned)content_len;
793 n = safe_fread(buf, rdsz, dfp);
796 /* perror will not work: ferror doesn't set errno */
797 bb_error_msg_and_die(bb_msg_read_error);
801 xwrite(output_fd, buf, n);
802 #if ENABLE_FEATURE_WGET_STATUSBAR
812 safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
814 safe_fgets(buf, sizeof(buf), dfp);
815 content_len = STRTOOFF(buf, NULL, 16);
816 /* FIXME: error check? */
817 if (content_len == 0)
818 break; /* all done! */
821 if (!(opt & WGET_OPT_QUIET))
824 if ((use_proxy == 0) && target.is_ftp) {
826 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
827 bb_error_msg_and_die("ftp error: %s", buf+4);
828 ftpcmd("QUIT", NULL, sfp, buf);