1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
7 * Licensed under GPLv2, see file LICENSE in this tarball for details.
12 // May be used if we ever will want to free() all xstrdup()s...
13 /* char *allocated; */
22 /* Globals (can be accessed from signal handlers) */
24 off_t content_len; /* Content-length of the file */
25 off_t beg_range; /* Range at which continue begins */
26 #if ENABLE_FEATURE_WGET_STATUSBAR
27 off_t transferred; /* Number of bytes transferred so far */
28 const char *curfile; /* Name of current file being transferred */
31 smallint chunked; /* chunked transfer encoding */
32 smallint got_clen; /* got content-length: from server */
34 #define G (*(struct globals*)&bb_common_bufsiz1)
35 struct BUG_G_too_big {
36 char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
38 #define INIT_G() do { } while (0)
41 #if ENABLE_FEATURE_WGET_STATUSBAR
43 static void progress_meter(int flag)
45 /* We can be called from signal handler */
46 int save_errno = errno;
48 if (flag == -1) { /* first call to progress_meter */
49 bb_progress_init(&G.pmt);
52 bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
53 G.chunked ? 0 : G.content_len + G.beg_range);
56 /* last call to progress_meter */
61 if (flag == -1) { /* first call to progress_meter */
62 signal_SA_RESTART_empty_mask(SIGALRM, progress_meter);
70 #else /* FEATURE_WGET_STATUSBAR */
72 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
77 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
78 * local addresses can have a scope identifier to specify the
79 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
80 * identifier is only valid on a single node.
82 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
83 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
84 * in the Host header as invalid requests, see
85 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
87 static void strip_ipv6_scope_id(char *host)
91 /* bbox wget actually handles IPv6 addresses without [], like
92 * wget "http://::1/xxx", but this is not standard.
93 * To save code, _here_ we do not support it. */
96 return; /* not IPv6 */
98 scope = strchr(host, '%');
102 /* Remove the IPv6 zone identifier from the host address */
103 cp = strchr(host, ']');
104 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
105 /* malformed address (not "[xx]:nn" or "[xx]") */
109 /* cp points to "]...", scope points to "%eth0]..." */
110 overlapping_strcpy(scope, cp);
113 /* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
114 * and a short count if an eof or non-interrupt error is encountered. */
115 static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
118 char *p = (char*)ptr;
123 ret = fread(p, 1, nmemb, stream);
126 } while (nmemb && ferror(stream) && errno == EINTR);
128 return p - (char*)ptr;
131 /* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
132 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
133 static char *safe_fgets(char *s, int size, FILE *stream)
140 ret = fgets(s, size, stream);
141 } while (ret == NULL && ferror(stream) && errno == EINTR);
146 #if ENABLE_FEATURE_WGET_AUTHENTICATION
147 /* Base64-encode character string. buf is assumed to be char buf[512]. */
148 static char *base64enc_512(char buf[512], const char *str)
150 unsigned len = strlen(str);
151 if (len > 512/4*3 - 10) /* paranoia */
153 bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
158 static char* sanitize_string(char *s)
160 unsigned char *p = (void *) s;
167 static FILE *open_socket(len_and_sockaddr *lsa)
171 /* glibc 2.4 seems to try seeking on it - ??! */
172 /* hopefully it understands what ESPIPE means... */
173 fp = fdopen(xconnect_stream(lsa), "r+");
175 bb_perror_msg_and_die("fdopen");
180 static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
185 fprintf(fp, "%s%s\r\n", s1, s2);
192 if (fgets(buf, 510, fp) == NULL) {
193 bb_perror_msg_and_die("error getting response");
195 buf_ptr = strstr(buf, "\r\n");
199 } while (!isdigit(buf[0]) || buf[3] != ' ');
202 result = xatoi_u(buf);
207 static void parse_url(char *src_url, struct host_info *h)
211 /* h->allocated = */ url = xstrdup(src_url);
213 if (strncmp(url, "http://", 7) == 0) {
214 h->port = bb_lookup_port("http", "tcp", 80);
217 } else if (strncmp(url, "ftp://", 6) == 0) {
218 h->port = bb_lookup_port("ftp", "tcp", 21);
222 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
225 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
226 // 'GET /?var=a/b HTTP 1.0'
227 // and saves 'index.html?var=a%2Fb' (we save 'b')
228 // wget 'http://busybox.net?login=john@doe':
229 // request: 'GET /?login=john@doe HTTP/1.0'
230 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
231 // wget 'http://busybox.net#test/test':
232 // request: 'GET / HTTP/1.0'
233 // saves: 'index.html' (we save 'test')
235 // We also don't add unique .N suffix if file exists...
236 sp = strchr(h->host, '/');
237 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
238 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
241 } else if (*sp == '/') {
244 } else { // '#' or '?'
245 // http://busybox.net?login=john@doe is a valid URL
246 // memmove converts to:
247 // http:/busybox.nett?login=john@doe...
248 memmove(h->host - 1, h->host, sp - h->host);
254 // We used to set h->user to NULL here, but this interferes
255 // with handling of code 302 ("object was moved")
257 sp = strrchr(h->host, '@');
267 static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
274 /* retrieve header line */
275 if (fgets(buf, bufsiz, fp) == NULL)
278 /* see if we are at the end of the headers */
279 for (s = buf; *s == '\r'; ++s)
284 /* convert the header name to lower case */
285 for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s)
288 /* verify we are at the end of the header name */
290 bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
292 /* locate the start of the header value */
294 hdrval = skip_whitespace(s);
296 /* locate the end of header */
297 while (*s && *s != '\r' && *s != '\n')
300 /* end of header found */
306 /* Rats! The buffer isn't big enough to hold the entire header value */
307 while (c = getc(fp), c != EOF && c != '\n')
313 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
314 static char *URL_escape(const char *str)
316 /* URL encode, see RFC 2396 */
318 char *res = dst = xmalloc(strlen(str) * 3 + 1);
324 /* || strchr("!&'()*-.=_~", c) - more code */
336 || (c >= '0' && c <= '9')
337 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
344 *dst++ = bb_hexdigits_upcase[c >> 4];
345 *dst++ = bb_hexdigits_upcase[c & 0xf];
351 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
359 target->user = xstrdup("anonymous:busybox@");
361 sfp = open_socket(lsa);
362 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
363 bb_error_msg_and_die("%s", sanitize_string(buf+4));
366 * Splitting username:password pair,
369 str = strchr(target->user, ':');
372 switch (ftpcmd("USER ", target->user, sfp, buf)) {
376 if (ftpcmd("PASS ", str, sfp, buf) == 230)
378 /* fall through (failed login) */
380 bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
383 ftpcmd("TYPE I", NULL, sfp, buf);
388 if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) {
389 G.content_len = BB_STRTOOFF(buf+4, NULL, 10);
390 if (G.content_len < 0 || errno) {
391 bb_error_msg_and_die("SIZE value is garbage");
397 * Entering passive mode
399 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
401 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
403 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
404 // Server's IP is N1.N2.N3.N4 (we ignore it)
405 // Server's port for data connection is P1*256+P2
406 str = strrchr(buf, ')');
407 if (str) str[0] = '\0';
408 str = strrchr(buf, ',');
409 if (!str) goto pasv_error;
410 port = xatou_range(str+1, 0, 255);
412 str = strrchr(buf, ',');
413 if (!str) goto pasv_error;
414 port += xatou_range(str+1, 0, 255) * 256;
415 set_nport(lsa, htons(port));
417 *dfpp = open_socket(lsa);
420 sprintf(buf, "REST %"OFF_FMT"u", G.beg_range);
421 if (ftpcmd(buf, NULL, sfp, buf) == 350)
422 G.content_len -= G.beg_range;
425 if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
426 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
431 /* Must match option string! */
433 WGET_OPT_CONTINUE = (1 << 0),
434 WGET_OPT_SPIDER = (1 << 1),
435 WGET_OPT_QUIET = (1 << 2),
436 WGET_OPT_OUTNAME = (1 << 3),
437 WGET_OPT_PREFIX = (1 << 4),
438 WGET_OPT_PROXY = (1 << 5),
439 WGET_OPT_USER_AGENT = (1 << 6),
440 WGET_OPT_RETRIES = (1 << 7),
441 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 8),
442 WGET_OPT_PASSIVE = (1 << 9),
443 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
444 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
447 static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
451 if (!(option_mask32 & WGET_OPT_QUIET))
457 /* Loops only if chunked */
465 if (G.content_len < (off_t)sizeof(buf)) {
466 if ((int)G.content_len <= 0)
468 rdsz = (unsigned)G.content_len;
471 n = safe_fread(buf, rdsz, dfp);
474 /* perror will not work: ferror doesn't set errno */
475 bb_error_msg_and_die(bb_msg_read_error);
479 xwrite(output_fd, buf, n);
480 #if ENABLE_FEATURE_WGET_STATUSBAR
490 safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
492 safe_fgets(buf, sizeof(buf), dfp);
493 G.content_len = STRTOOFF(buf, NULL, 16);
494 /* FIXME: error check? */
495 if (G.content_len == 0)
496 break; /* all done! */
500 if (!(option_mask32 & WGET_OPT_QUIET))
504 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
505 int wget_main(int argc UNUSED_PARAM, char **argv)
508 struct host_info server, target;
509 len_and_sockaddr *lsa;
513 char *dir_prefix = NULL;
514 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
516 char *extra_headers = NULL;
517 llist_t *headers_llist = NULL;
519 FILE *sfp; /* socket to web/ftp server */
520 FILE *dfp; /* socket to ftp server (data) */
521 char *fname_out; /* where to direct output (-O) */
523 bool use_proxy; /* Use proxies if env vars are set */
524 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
525 const char *user_agent = "Wget";/* "User-Agent" header field */
527 static const char keywords[] ALIGN1 =
528 "content-length\0""transfer-encoding\0""chunked\0""location\0";
530 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
532 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
533 static const char wget_longopts[] ALIGN1 =
534 /* name, has_arg, val */
535 "continue\0" No_argument "c"
536 "spider\0" No_argument "s"
537 "quiet\0" No_argument "q"
538 "output-document\0" Required_argument "O"
539 "directory-prefix\0" Required_argument "P"
540 "proxy\0" Required_argument "Y"
541 "user-agent\0" Required_argument "U"
543 // "tries\0" Required_argument "t"
544 // "timeout\0" Required_argument "T"
545 /* Ignored (we always use PASV): */
546 "passive-ftp\0" No_argument "\xff"
547 "header\0" Required_argument "\xfe"
548 "post-data\0" Required_argument "\xfd"
549 /* Ignored (we don't do ssl) */
550 "no-check-certificate\0" No_argument "\xfc"
556 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
557 applet_long_options = wget_longopts;
559 /* server.allocated = target.allocated = NULL; */
560 opt_complementary = "-1" IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
561 opt = getopt32(argv, "csqO:P:Y:U:" /*ignored:*/ "t:T:",
562 &fname_out, &dir_prefix,
563 &proxy_flag, &user_agent,
564 NULL, /* -t RETRIES */
565 NULL /* -T NETWORK_READ_TIMEOUT */
566 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
567 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
569 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
573 llist_t *ll = headers_llist;
575 size += strlen(ll->data) + 2;
578 extra_headers = cp = xmalloc(size);
579 while (headers_llist) {
580 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
585 /* TODO: compat issue: should handle "wget URL1 URL2..." */
588 parse_url(argv[optind], &target);
590 /* Use the proxy if necessary */
591 use_proxy = (strcmp(proxy_flag, "off") != 0);
593 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
594 if (proxy && proxy[0]) {
595 parse_url(proxy, &server);
601 server.port = target.port;
602 if (ENABLE_FEATURE_IPV6) {
603 server.host = xstrdup(target.host);
605 server.host = target.host;
609 if (ENABLE_FEATURE_IPV6)
610 strip_ipv6_scope_id(target.host);
612 /* Guess an output filename, if there was no -O FILE */
613 if (!(opt & WGET_OPT_OUTNAME)) {
614 fname_out = bb_get_last_path_component_nostrip(target.path);
615 /* handle "wget http://kernel.org//" */
616 if (fname_out[0] == '/' || !fname_out[0])
617 fname_out = (char*)"index.html";
618 /* -P DIR is considered only if there was no -O FILE */
620 fname_out = concat_path_file(dir_prefix, fname_out);
622 if (LONE_DASH(fname_out)) {
625 opt &= ~WGET_OPT_CONTINUE;
628 #if ENABLE_FEATURE_WGET_STATUSBAR
629 G.curfile = bb_get_last_path_component_nostrip(fname_out);
633 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
634 bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
637 /* Determine where to start transfer */
638 if (opt & WGET_OPT_CONTINUE) {
639 output_fd = open(fname_out, O_WRONLY);
640 if (output_fd >= 0) {
641 G.beg_range = xlseek(output_fd, 0, SEEK_END);
643 /* File doesn't exist. We do not create file here yet.
644 * We are not sure it exists on remove side */
649 lsa = xhost2sockaddr(server.host, server.port);
650 if (!(opt & WGET_OPT_QUIET)) {
651 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
652 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
656 if (use_proxy || !target.is_ftp) {
663 /* Open socket to http server */
664 sfp = open_socket(lsa);
666 /* Send HTTP request */
668 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
669 target.is_ftp ? "f" : "ht", target.host,
672 if (opt & WGET_OPT_POST_DATA)
673 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
675 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
678 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
679 target.host, user_agent);
681 #if ENABLE_FEATURE_WGET_AUTHENTICATION
683 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
684 base64enc_512(buf, target.user));
686 if (use_proxy && server.user) {
687 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
688 base64enc_512(buf, server.user));
693 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
694 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
696 fputs(extra_headers, sfp);
698 if (opt & WGET_OPT_POST_DATA) {
699 char *estr = URL_escape(post_data);
700 fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
701 fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
702 (int) strlen(estr), estr);
703 /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
704 /*fprintf(sfp, "%s\r\n", estr);*/
708 { /* If "Connection:" is needed, document why */
709 fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
713 * Retrieve HTTP response line and check for "200" status code.
716 if (fgets(buf, sizeof(buf), sfp) == NULL)
717 bb_error_msg_and_die("no response from server");
720 str = skip_non_whitespace(str);
721 str = skip_whitespace(str);
722 // FIXME: no error check
723 // xatou wouldn't work: "200 OK"
728 while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
729 /* eat all remaining headers */;
733 Response 204 doesn't say "null file", it says "metadata
734 has changed but data didn't":
736 "10.2.5 204 No Content
737 The server has fulfilled the request but does not need to return
738 an entity-body, and might want to return updated metainformation.
739 The response MAY include new or updated metainformation in the form
740 of entity-headers, which if present SHOULD be associated with
741 the requested variant.
743 If the client is a user agent, it SHOULD NOT change its document
744 view from that which caused the request to be sent. This response
745 is primarily intended to allow input for actions to take place
746 without causing a change to the user agent's active document view,
747 although any new or updated metainformation SHOULD be applied
748 to the document currently in the user agent's active view.
750 The 204 response MUST NOT include a message-body, and thus
751 is always terminated by the first empty line after the header fields."
753 However, in real world it was observed that some web servers
754 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
758 case 300: /* redirection */
768 bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
772 * Retrieve HTTP headers.
774 while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
775 /* gethdr converted "FOO:" string to lowercase */
777 /* strip trailing whitespace */
778 char *s = strchrnul(str, '\0') - 1;
779 while (s >= str && (*s == ' ' || *s == '\t')) {
783 key = index_in_strings(keywords, buf) + 1;
784 if (key == KEY_content_length) {
785 G.content_len = BB_STRTOOFF(str, NULL, 10);
786 if (G.content_len < 0 || errno) {
787 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
792 if (key == KEY_transfer_encoding) {
793 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
794 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
795 G.chunked = G.got_clen = 1;
797 if (key == KEY_location && status >= 300) {
798 if (--redir_limit == 0)
799 bb_error_msg_and_die("too many redirections");
804 /* free(target.allocated); */
805 target.path = /* target.allocated = */ xstrdup(str+1);
806 /* lsa stays the same: it's on the same server */
808 parse_url(str, &target);
810 server.host = target.host;
811 /* strip_ipv6_scope_id(target.host); - no! */
812 /* we assume remote never gives us IPv6 addr with scope id */
813 server.port = target.port;
816 } /* else: lsa stays the same: we use proxy */
818 goto establish_session;
821 // if (status >= 300)
822 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
824 /* For HTTP, data is pumped over the same connection */
831 sfp = prepare_ftp_session(&dfp, &target, lsa);
834 if (opt & WGET_OPT_SPIDER) {
835 if (ENABLE_FEATURE_CLEAN_UP)
841 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
842 /* compat with wget: -O FILE can overwrite */
843 if (opt & WGET_OPT_OUTNAME)
844 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
845 output_fd = xopen(fname_out, o_flags);
848 retrieve_file_data(dfp, output_fd);
852 /* It's ftp. Close it properly */
854 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
855 bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
856 /* ftpcmd("QUIT", NULL, sfp, buf); - why bother? */