1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
12 // May be used if we ever will want to free() all xstrdup()s...
13 /* char *allocated; */
22 /* Globals (can be accessed from signal handlers) */
24 off_t content_len; /* Content-length of the file */
25 off_t beg_range; /* Range at which continue begins */
26 #if ENABLE_FEATURE_WGET_STATUSBAR
29 off_t transferred; /* Number of bytes transferred so far */
30 const char *curfile; /* Name of current file being transferred */
31 unsigned lastupdate_sec;
34 smallint chunked; /* chunked transfer encoding */
36 #define G (*(struct globals*)&bb_common_bufsiz1)
37 struct BUG_G_too_big {
38 char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
40 #define content_len (G.content_len )
41 #define beg_range (G.beg_range )
42 #define lastsize (G.lastsize )
43 #define totalsize (G.totalsize )
44 #define transferred (G.transferred )
45 #define curfile (G.curfile )
46 #define lastupdate_sec (G.lastupdate_sec )
47 #define start_sec (G.start_sec )
48 #define chunked (G.chunked )
49 #define INIT_G() do { } while (0)
52 #if ENABLE_FEATURE_WGET_STATUSBAR
54 STALLTIME = 5 /* Seconds when xfer considered "stalled" */
57 static unsigned int getttywidth(void)
60 get_terminal_width_height(0, &width, NULL);
64 static void progressmeter(int flag)
66 /* We can be called from signal handler */
67 int save_errno = errno;
69 unsigned since_last_update, elapsed;
73 if (flag == -1) { /* first call to progressmeter */
74 start_sec = monotonic_sec();
75 lastupdate_sec = start_sec;
77 totalsize = content_len + beg_range; /* as content_len changes.. */
81 if (totalsize != 0 && !chunked) {
82 /* long long helps to have it working even if !LFS */
83 ratio = (unsigned) (100ULL * (transferred+beg_range) / totalsize);
84 if (ratio > 100) ratio = 100;
87 fprintf(stderr, "\r%-20.20s%4d%% ", curfile, ratio);
89 barlength = getttywidth() - 49;
91 /* god bless gcc for variable arrays :) */
92 i = barlength * ratio / 100;
97 fprintf(stderr, "|%s%*s|", buf, barlength - i, "");
101 abbrevsize = transferred + beg_range;
102 while (abbrevsize >= 100000) {
106 /* see http://en.wikipedia.org/wiki/Tera */
107 fprintf(stderr, "%6d%c ", (int)abbrevsize, " kMGTPEZY"[i]);
109 // Nuts! Ain't it easier to update progress meter ONLY when we transferred++?
111 elapsed = monotonic_sec();
112 since_last_update = elapsed - lastupdate_sec;
113 if (transferred > lastsize) {
114 lastupdate_sec = elapsed;
115 lastsize = transferred;
116 if (since_last_update >= STALLTIME) {
117 /* We "cut off" these seconds from elapsed time
118 * by adjusting start time */
119 start_sec += since_last_update;
121 since_last_update = 0; /* we are un-stalled now */
123 elapsed -= start_sec; /* now it's "elapsed since start" */
125 if (since_last_update >= STALLTIME) {
126 fprintf(stderr, " - stalled -");
128 off_t to_download = totalsize - beg_range;
129 if (transferred <= 0 || (int)elapsed <= 0 || transferred > to_download || chunked) {
130 fprintf(stderr, "--:--:-- ETA");
132 /* to_download / (transferred/elapsed) - elapsed: */
133 int eta = (int) ((unsigned long long)to_download*elapsed/transferred - elapsed);
134 /* (long long helps to have working ETA even if !LFS) */
136 fprintf(stderr, "%02d:%02d:%02d ETA", eta / 3600, i / 60, i % 60);
141 /* last call to progressmeter */
146 if (flag == -1) { /* first call to progressmeter */
147 signal_SA_RESTART_empty_mask(SIGALRM, progressmeter);
154 /* Original copyright notice which applies to the CONFIG_FEATURE_WGET_STATUSBAR stuff,
155 * much of which was blatantly stolen from openssh. */
157 * Copyright (c) 1992, 1993
158 * The Regents of the University of California. All rights reserved.
160 * Redistribution and use in source and binary forms, with or without
161 * modification, are permitted provided that the following conditions
163 * 1. Redistributions of source code must retain the above copyright
164 * notice, this list of conditions and the following disclaimer.
165 * 2. Redistributions in binary form must reproduce the above copyright
166 * notice, this list of conditions and the following disclaimer in the
167 * documentation and/or other materials provided with the distribution.
169 * 3. <BSD Advertising Clause omitted per the July 22, 1999 licensing change
170 * ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change>
172 * 4. Neither the name of the University nor the names of its contributors
173 * may be used to endorse or promote products derived from this software
174 * without specific prior written permission.
176 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
177 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
178 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
179 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
180 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
181 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
182 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
183 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
184 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
185 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
189 #else /* FEATURE_WGET_STATUSBAR */
191 static ALWAYS_INLINE void progressmeter(int flag ATTRIBUTE_UNUSED) { }
196 /* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
197 * and a short count if an eof or non-interrupt error is encountered. */
198 static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
201 char *p = (char*)ptr;
205 ret = fread(p, 1, nmemb, stream);
208 } while (nmemb && ferror(stream) && errno == EINTR);
210 return p - (char*)ptr;
213 /* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
214 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
215 static char *safe_fgets(char *s, int size, FILE *stream)
221 ret = fgets(s, size, stream);
222 } while (ret == NULL && ferror(stream) && errno == EINTR);
227 #if ENABLE_FEATURE_WGET_AUTHENTICATION
228 /* Base64-encode character string. buf is assumed to be char buf[512]. */
229 static char *base64enc_512(char buf[512], const char *str)
231 unsigned len = strlen(str);
232 if (len > 512/4*3 - 10) /* paranoia */
234 bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
240 static FILE *open_socket(len_and_sockaddr *lsa)
244 /* glibc 2.4 seems to try seeking on it - ??! */
245 /* hopefully it understands what ESPIPE means... */
246 fp = fdopen(xconnect_stream(lsa), "r+");
248 bb_perror_msg_and_die("fdopen");
254 static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
259 fprintf(fp, "%s%s\r\n", s1, s2);
266 if (fgets(buf, 510, fp) == NULL) {
267 bb_perror_msg_and_die("error getting response");
269 buf_ptr = strstr(buf, "\r\n");
273 } while (!isdigit(buf[0]) || buf[3] != ' ');
276 result = xatoi_u(buf);
282 static void parse_url(char *src_url, struct host_info *h)
286 /* h->allocated = */ url = xstrdup(src_url);
288 if (strncmp(url, "http://", 7) == 0) {
289 h->port = bb_lookup_port("http", "tcp", 80);
292 } else if (strncmp(url, "ftp://", 6) == 0) {
293 h->port = bb_lookup_port("ftp", "tcp", 21);
297 bb_error_msg_and_die("not an http or ftp url: %s", url);
300 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
301 // 'GET /?var=a/b HTTP 1.0'
302 // and saves 'index.html?var=a%2Fb' (we save 'b')
303 // wget 'http://busybox.net?login=john@doe':
304 // request: 'GET /?login=john@doe HTTP/1.0'
305 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
306 // wget 'http://busybox.net#test/test':
307 // request: 'GET / HTTP/1.0'
308 // saves: 'index.html' (we save 'test')
310 // We also don't add unique .N suffix if file exists...
311 sp = strchr(h->host, '/');
312 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
313 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
316 } else if (*sp == '/') {
319 } else { // '#' or '?'
320 // http://busybox.net?login=john@doe is a valid URL
321 // memmove converts to:
322 // http:/busybox.nett?login=john@doe...
323 memmove(h->host - 1, h->host, sp - h->host);
329 sp = strrchr(h->host, '@');
341 static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
348 /* retrieve header line */
349 if (fgets(buf, bufsiz, fp) == NULL)
352 /* see if we are at the end of the headers */
353 for (s = buf; *s == '\r'; ++s)
358 /* convert the header name to lower case */
359 for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s)
362 /* verify we are at the end of the header name */
364 bb_error_msg_and_die("bad header line: %s", buf);
366 /* locate the start of the header value */
368 hdrval = skip_whitespace(s);
370 /* locate the end of header */
371 while (*s && *s != '\r' && *s != '\n')
374 /* end of header found */
380 /* Rats! The buffer isn't big enough to hold the entire header value. */
381 while (c = getc(fp), c != EOF && c != '\n')
388 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
389 int wget_main(int argc ATTRIBUTE_UNUSED, char **argv)
392 struct host_info server, target;
393 len_and_sockaddr *lsa;
400 char *dir_prefix = NULL;
401 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
402 char *extra_headers = NULL;
403 llist_t *headers_llist = NULL;
405 FILE *sfp = NULL; /* socket to web/ftp server */
406 FILE *dfp; /* socket to ftp server (data) */
407 char *fname_out; /* where to direct output (-O) */
408 bool got_clen = 0; /* got content-length: from server */
410 bool use_proxy = 1; /* Use proxies if env vars are set */
411 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
412 const char *user_agent = "Wget";/* "User-Agent" header field */
414 static const char keywords[] ALIGN1 =
415 "content-length\0""transfer-encoding\0""chunked\0""location\0";
417 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
420 WGET_OPT_CONTINUE = 0x1,
421 WGET_OPT_SPIDER = 0x2,
422 WGET_OPT_QUIET = 0x4,
423 WGET_OPT_OUTNAME = 0x8,
424 WGET_OPT_PREFIX = 0x10,
425 WGET_OPT_PROXY = 0x20,
426 WGET_OPT_USER_AGENT = 0x40,
427 WGET_OPT_PASSIVE = 0x80,
428 WGET_OPT_HEADER = 0x100,
430 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
431 static const char wget_longopts[] ALIGN1 =
432 /* name, has_arg, val */
433 "continue\0" No_argument "c"
434 "spider\0" No_argument "s"
435 "quiet\0" No_argument "q"
436 "output-document\0" Required_argument "O"
437 "directory-prefix\0" Required_argument "P"
438 "proxy\0" Required_argument "Y"
439 "user-agent\0" Required_argument "U"
440 "passive-ftp\0" No_argument "\xff"
441 "header\0" Required_argument "\xfe"
447 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
448 applet_long_options = wget_longopts;
450 /* server.allocated = target.allocated = NULL; */
451 opt_complementary = "-1" USE_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
452 opt = getopt32(argv, "csqO:P:Y:U:" /*ignored:*/ "t:T:",
453 &fname_out, &dir_prefix,
454 &proxy_flag, &user_agent,
455 NULL, /* -t RETRIES */
456 NULL /* -T NETWORK_READ_TIMEOUT */
457 USE_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
459 if (strcmp(proxy_flag, "off") == 0) {
460 /* Use the proxy if necessary */
463 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
467 llist_t *ll = headers_llist;
469 size += strlen(ll->data) + 2;
472 extra_headers = cp = xmalloc(size);
473 while (headers_llist) {
474 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
479 parse_url(argv[optind], &target);
480 server.host = target.host;
481 server.port = target.port;
483 /* Use the proxy if necessary */
485 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
486 if (proxy && *proxy) {
487 parse_url(proxy, &server);
493 /* Guess an output filename, if there was no -O FILE */
494 if (!(opt & WGET_OPT_OUTNAME)) {
495 fname_out = bb_get_last_path_component_nostrip(target.path);
496 /* handle "wget http://kernel.org//" */
497 if (fname_out[0] == '/' || !fname_out[0])
498 fname_out = (char*)"index.html";
499 /* -P DIR is considered only if there was no -O FILE */
501 fname_out = concat_path_file(dir_prefix, fname_out);
503 if (LONE_DASH(fname_out)) {
506 opt &= ~WGET_OPT_CONTINUE;
509 #if ENABLE_FEATURE_WGET_STATUSBAR
510 curfile = bb_get_last_path_component_nostrip(fname_out);
514 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
515 bb_error_msg_and_die("cannot specify continue (-c) without a filename (-O)"); */
517 /* Determine where to start transfer */
518 if (opt & WGET_OPT_CONTINUE) {
519 output_fd = open(fname_out, O_WRONLY);
520 if (output_fd >= 0) {
521 beg_range = xlseek(output_fd, 0, SEEK_END);
523 /* File doesn't exist. We do not create file here yet.
524 We are not sure it exists on remove side */
527 /* We want to do exactly _one_ DNS lookup, since some
528 * sites (i.e. ftp.us.debian.org) use round-robin DNS
529 * and we want to connect to only one IP... */
530 lsa = xhost2sockaddr(server.host, server.port);
531 if (!(opt & WGET_OPT_QUIET)) {
532 fprintf(stderr, "Connecting to %s (%s)\n", server.host,
533 xmalloc_sockaddr2dotted(&lsa->u.sa));
534 /* We leak result of xmalloc_sockaddr2dotted */
537 if (use_proxy || !target.is_ftp) {
546 bb_error_msg_and_die("too many redirections");
548 /* Open socket to http server */
549 if (sfp) fclose(sfp);
550 sfp = open_socket(lsa);
552 /* Send HTTP request. */
554 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
555 target.is_ftp ? "f" : "ht", target.host,
558 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
561 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
562 target.host, user_agent);
564 #if ENABLE_FEATURE_WGET_AUTHENTICATION
566 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
567 base64enc_512(buf, target.user));
569 if (use_proxy && server.user) {
570 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
571 base64enc_512(buf, server.user));
576 fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range);
577 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
579 fputs(extra_headers, sfp);
581 fprintf(sfp, "Connection: close\r\n\r\n");
584 * Retrieve HTTP response line and check for "200" status code.
587 if (fgets(buf, sizeof(buf), sfp) == NULL)
588 bb_error_msg_and_die("no response from server");
591 str = skip_non_whitespace(str);
592 str = skip_whitespace(str);
593 // FIXME: no error check
594 // xatou wouldn't work: "200 OK"
599 while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
600 /* eat all remaining headers */;
604 Response 204 doesn't say "null file", it says "metadata
605 has changed but data didn't":
607 "10.2.5 204 No Content
608 The server has fulfilled the request but does not need to return
609 an entity-body, and might want to return updated metainformation.
610 The response MAY include new or updated metainformation in the form
611 of entity-headers, which if present SHOULD be associated with
612 the requested variant.
614 If the client is a user agent, it SHOULD NOT change its document
615 view from that which caused the request to be sent. This response
616 is primarily intended to allow input for actions to take place
617 without causing a change to the user agent's active document view,
618 although any new or updated metainformation SHOULD be applied
619 to the document currently in the user agent's active view.
621 The 204 response MUST NOT include a message-body, and thus
622 is always terminated by the first empty line after the header fields."
624 However, in real world it was observed that some web servers
625 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
629 case 300: /* redirection */
639 /* Show first line only and kill any ESC tricks */
640 buf[strcspn(buf, "\n\r\x1b")] = '\0';
641 bb_error_msg_and_die("server returned error: %s", buf);
645 * Retrieve HTTP headers.
647 while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
648 /* gethdr did already convert the "FOO:" string to lowercase */
649 smalluint key = index_in_strings(keywords, *&buf) + 1;
650 if (key == KEY_content_length) {
651 content_len = BB_STRTOOFF(str, NULL, 10);
652 if (errno || content_len < 0) {
653 bb_error_msg_and_die("content-length %s is garbage", str);
658 if (key == KEY_transfer_encoding) {
659 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
660 bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
661 chunked = got_clen = 1;
663 if (key == KEY_location) {
665 /* free(target.allocated); */
666 target.path = /* target.allocated = */ xstrdup(str+1);
668 parse_url(str, &target);
669 if (use_proxy == 0) {
670 server.host = target.host;
671 server.port = target.port;
674 lsa = xhost2sockaddr(server.host, server.port);
679 } while (status >= 300);
689 target.user = xstrdup("anonymous:busybox@");
691 sfp = open_socket(lsa);
692 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
693 bb_error_msg_and_die("%s", buf+4);
696 * Splitting username:password pair,
699 str = strchr(target.user, ':');
702 switch (ftpcmd("USER ", target.user, sfp, buf)) {
706 if (ftpcmd("PASS ", str, sfp, buf) == 230)
708 /* fall through (failed login) */
710 bb_error_msg_and_die("ftp login: %s", buf+4);
713 ftpcmd("TYPE I", NULL, sfp, buf);
718 if (ftpcmd("SIZE ", target.path, sfp, buf) == 213) {
719 content_len = BB_STRTOOFF(buf+4, NULL, 10);
720 if (errno || content_len < 0) {
721 bb_error_msg_and_die("SIZE value is garbage");
727 * Entering passive mode
729 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
731 bb_error_msg_and_die("bad response to %s: %s", "PASV", buf);
733 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
734 // Server's IP is N1.N2.N3.N4 (we ignore it)
735 // Server's port for data connection is P1*256+P2
736 str = strrchr(buf, ')');
737 if (str) str[0] = '\0';
738 str = strrchr(buf, ',');
739 if (!str) goto pasv_error;
740 port = xatou_range(str+1, 0, 255);
742 str = strrchr(buf, ',');
743 if (!str) goto pasv_error;
744 port += xatou_range(str+1, 0, 255) * 256;
745 set_nport(lsa, htons(port));
746 dfp = open_socket(lsa);
749 sprintf(buf, "REST %"OFF_FMT"d", beg_range);
750 if (ftpcmd(buf, NULL, sfp, buf) == 350)
751 content_len -= beg_range;
754 if (ftpcmd("RETR ", target.path, sfp, buf) > 150)
755 bb_error_msg_and_die("bad response to %s: %s", "RETR", buf);
758 if (opt & WGET_OPT_SPIDER) {
759 if (ENABLE_FEATURE_CLEAN_UP)
768 /* Do it before progressmeter (want to have nice error message) */
770 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
771 /* compat with wget: -O FILE can overwrite */
772 if (opt & WGET_OPT_OUTNAME)
773 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
774 output_fd = xopen(fname_out, o_flags);
777 if (!(opt & WGET_OPT_QUIET))
783 /* Loops only if chunked */
785 while (content_len > 0 || !got_clen) {
787 unsigned rdsz = sizeof(buf);
789 if (content_len < sizeof(buf) && (chunked || got_clen))
790 rdsz = (unsigned)content_len;
791 n = safe_fread(buf, rdsz, dfp);
794 /* perror will not work: ferror doesn't set errno */
795 bb_error_msg_and_die(bb_msg_read_error);
799 xwrite(output_fd, buf, n);
800 #if ENABLE_FEATURE_WGET_STATUSBAR
810 safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
812 safe_fgets(buf, sizeof(buf), dfp);
813 content_len = STRTOOFF(buf, NULL, 16);
814 /* FIXME: error check? */
815 if (content_len == 0)
816 break; /* all done! */
819 if (!(opt & WGET_OPT_QUIET))
822 if ((use_proxy == 0) && target.is_ftp) {
824 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
825 bb_error_msg_and_die("ftp error: %s", buf+4);
826 ftpcmd("QUIT", NULL, sfp, buf);