1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
7 * Licensed under GPLv2, see file LICENSE in this tarball for details.
12 // May be used if we ever will want to free() all xstrdup()s...
13 /* char *allocated; */
22 /* Globals (can be accessed from signal handlers) */
24 off_t content_len; /* Content-length of the file */
25 off_t beg_range; /* Range at which continue begins */
26 #if ENABLE_FEATURE_WGET_STATUSBAR
27 off_t transferred; /* Number of bytes transferred so far */
28 const char *curfile; /* Name of current file being transferred */
31 smallint chunked; /* chunked transfer encoding */
32 smallint got_clen; /* got content-length: from server */
34 #define G (*(struct globals*)&bb_common_bufsiz1)
35 struct BUG_G_too_big {
36 char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
38 #define content_len (G.content_len )
39 #define beg_range (G.beg_range )
40 #define transferred (G.transferred )
41 #define curfile (G.curfile )
42 #define INIT_G() do { } while (0)
45 #if ENABLE_FEATURE_WGET_STATUSBAR
47 static void progress_meter(int flag)
49 /* We can be called from signal handler */
50 int save_errno = errno;
52 if (flag == -1) { /* first call to progress_meter */
53 bb_progress_init(&G.pmt);
56 bb_progress_update(&G.pmt, curfile, beg_range, transferred,
57 G.chunked ? 0 : content_len + beg_range);
60 /* last call to progress_meter */
65 if (flag == -1) { /* first call to progress_meter */
66 signal_SA_RESTART_empty_mask(SIGALRM, progress_meter);
74 #else /* FEATURE_WGET_STATUSBAR */
76 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
81 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
82 * local addresses can have a scope identifier to specify the
83 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
84 * identifier is only valid on a single node.
86 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
87 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
88 * in the Host header as invalid requests, see
89 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
91 static void strip_ipv6_scope_id(char *host)
95 /* bbox wget actually handles IPv6 addresses without [], like
96 * wget "http://::1/xxx", but this is not standard.
97 * To save code, _here_ we do not support it. */
100 return; /* not IPv6 */
102 scope = strchr(host, '%');
106 /* Remove the IPv6 zone identifier from the host address */
107 cp = strchr(host, ']');
108 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
109 /* malformed address (not "[xx]:nn" or "[xx]") */
113 /* cp points to "]...", scope points to "%eth0]..." */
114 overlapping_strcpy(scope, cp);
117 /* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
118 * and a short count if an eof or non-interrupt error is encountered. */
119 static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
122 char *p = (char*)ptr;
127 ret = fread(p, 1, nmemb, stream);
130 } while (nmemb && ferror(stream) && errno == EINTR);
132 return p - (char*)ptr;
135 /* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
136 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
137 static char *safe_fgets(char *s, int size, FILE *stream)
144 ret = fgets(s, size, stream);
145 } while (ret == NULL && ferror(stream) && errno == EINTR);
150 #if ENABLE_FEATURE_WGET_AUTHENTICATION
151 /* Base64-encode character string. buf is assumed to be char buf[512]. */
152 static char *base64enc_512(char buf[512], const char *str)
154 unsigned len = strlen(str);
155 if (len > 512/4*3 - 10) /* paranoia */
157 bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
162 static char* sanitize_string(char *s)
164 unsigned char *p = (void *) s;
171 static FILE *open_socket(len_and_sockaddr *lsa)
175 /* glibc 2.4 seems to try seeking on it - ??! */
176 /* hopefully it understands what ESPIPE means... */
177 fp = fdopen(xconnect_stream(lsa), "r+");
179 bb_perror_msg_and_die("fdopen");
184 static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
189 fprintf(fp, "%s%s\r\n", s1, s2);
196 if (fgets(buf, 510, fp) == NULL) {
197 bb_perror_msg_and_die("error getting response");
199 buf_ptr = strstr(buf, "\r\n");
203 } while (!isdigit(buf[0]) || buf[3] != ' ');
206 result = xatoi_u(buf);
211 static void parse_url(char *src_url, struct host_info *h)
215 /* h->allocated = */ url = xstrdup(src_url);
217 if (strncmp(url, "http://", 7) == 0) {
218 h->port = bb_lookup_port("http", "tcp", 80);
221 } else if (strncmp(url, "ftp://", 6) == 0) {
222 h->port = bb_lookup_port("ftp", "tcp", 21);
226 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
229 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
230 // 'GET /?var=a/b HTTP 1.0'
231 // and saves 'index.html?var=a%2Fb' (we save 'b')
232 // wget 'http://busybox.net?login=john@doe':
233 // request: 'GET /?login=john@doe HTTP/1.0'
234 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
235 // wget 'http://busybox.net#test/test':
236 // request: 'GET / HTTP/1.0'
237 // saves: 'index.html' (we save 'test')
239 // We also don't add unique .N suffix if file exists...
240 sp = strchr(h->host, '/');
241 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
242 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
245 } else if (*sp == '/') {
248 } else { // '#' or '?'
249 // http://busybox.net?login=john@doe is a valid URL
250 // memmove converts to:
251 // http:/busybox.nett?login=john@doe...
252 memmove(h->host - 1, h->host, sp - h->host);
258 // We used to set h->user to NULL here, but this interferes
259 // with handling of code 302 ("object was moved")
261 sp = strrchr(h->host, '@');
271 static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
278 /* retrieve header line */
279 if (fgets(buf, bufsiz, fp) == NULL)
282 /* see if we are at the end of the headers */
283 for (s = buf; *s == '\r'; ++s)
288 /* convert the header name to lower case */
289 for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s)
292 /* verify we are at the end of the header name */
294 bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
296 /* locate the start of the header value */
298 hdrval = skip_whitespace(s);
300 /* locate the end of header */
301 while (*s && *s != '\r' && *s != '\n')
304 /* end of header found */
310 /* Rats! The buffer isn't big enough to hold the entire header value */
311 while (c = getc(fp), c != EOF && c != '\n')
317 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
318 static char *URL_escape(const char *str)
320 /* URL encode, see RFC 2396 */
322 char *res = dst = xmalloc(strlen(str) * 3 + 1);
328 /* || strchr("!&'()*-.=_~", c) - more code */
340 || (c >= '0' && c <= '9')
341 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
348 *dst++ = bb_hexdigits_upcase[c >> 4];
349 *dst++ = bb_hexdigits_upcase[c & 0xf];
355 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
363 target->user = xstrdup("anonymous:busybox@");
365 sfp = open_socket(lsa);
366 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
367 bb_error_msg_and_die("%s", sanitize_string(buf+4));
370 * Splitting username:password pair,
373 str = strchr(target->user, ':');
376 switch (ftpcmd("USER ", target->user, sfp, buf)) {
380 if (ftpcmd("PASS ", str, sfp, buf) == 230)
382 /* fall through (failed login) */
384 bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
387 ftpcmd("TYPE I", NULL, sfp, buf);
392 if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) {
393 content_len = BB_STRTOOFF(buf+4, NULL, 10);
394 if (errno || content_len < 0) {
395 bb_error_msg_and_die("SIZE value is garbage");
401 * Entering passive mode
403 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
405 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
407 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
408 // Server's IP is N1.N2.N3.N4 (we ignore it)
409 // Server's port for data connection is P1*256+P2
410 str = strrchr(buf, ')');
411 if (str) str[0] = '\0';
412 str = strrchr(buf, ',');
413 if (!str) goto pasv_error;
414 port = xatou_range(str+1, 0, 255);
416 str = strrchr(buf, ',');
417 if (!str) goto pasv_error;
418 port += xatou_range(str+1, 0, 255) * 256;
419 set_nport(lsa, htons(port));
421 *dfpp = open_socket(lsa);
424 sprintf(buf, "REST %"OFF_FMT"d", beg_range);
425 if (ftpcmd(buf, NULL, sfp, buf) == 350)
426 content_len -= beg_range;
429 if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
430 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
435 /* Must match option string! */
437 WGET_OPT_CONTINUE = (1 << 0),
438 WGET_OPT_SPIDER = (1 << 1),
439 WGET_OPT_QUIET = (1 << 2),
440 WGET_OPT_OUTNAME = (1 << 3),
441 WGET_OPT_PREFIX = (1 << 4),
442 WGET_OPT_PROXY = (1 << 5),
443 WGET_OPT_USER_AGENT = (1 << 6),
444 WGET_OPT_RETRIES = (1 << 7),
445 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 8),
446 WGET_OPT_PASSIVE = (1 << 9),
447 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
448 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
451 static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
455 if (!(option_mask32 & WGET_OPT_QUIET))
461 /* Loops only if chunked */
463 while (content_len > 0 || !G.got_clen) {
465 unsigned rdsz = sizeof(buf);
467 if (content_len < sizeof(buf) && (G.chunked || G.got_clen))
468 rdsz = (unsigned)content_len;
469 n = safe_fread(buf, rdsz, dfp);
472 /* perror will not work: ferror doesn't set errno */
473 bb_error_msg_and_die(bb_msg_read_error);
477 xwrite(output_fd, buf, n);
478 #if ENABLE_FEATURE_WGET_STATUSBAR
488 safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
490 safe_fgets(buf, sizeof(buf), dfp);
491 content_len = STRTOOFF(buf, NULL, 16);
492 /* FIXME: error check? */
493 if (content_len == 0)
494 break; /* all done! */
497 if (!(option_mask32 & WGET_OPT_QUIET))
501 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
502 int wget_main(int argc UNUSED_PARAM, char **argv)
505 struct host_info server, target;
506 len_and_sockaddr *lsa;
510 char *dir_prefix = NULL;
511 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
513 char *extra_headers = NULL;
514 llist_t *headers_llist = NULL;
516 FILE *sfp; /* socket to web/ftp server */
517 FILE *dfp; /* socket to ftp server (data) */
518 char *fname_out; /* where to direct output (-O) */
520 bool use_proxy; /* Use proxies if env vars are set */
521 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
522 const char *user_agent = "Wget";/* "User-Agent" header field */
524 static const char keywords[] ALIGN1 =
525 "content-length\0""transfer-encoding\0""chunked\0""location\0";
527 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
529 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
530 static const char wget_longopts[] ALIGN1 =
531 /* name, has_arg, val */
532 "continue\0" No_argument "c"
533 "spider\0" No_argument "s"
534 "quiet\0" No_argument "q"
535 "output-document\0" Required_argument "O"
536 "directory-prefix\0" Required_argument "P"
537 "proxy\0" Required_argument "Y"
538 "user-agent\0" Required_argument "U"
540 // "tries\0" Required_argument "t"
541 // "timeout\0" Required_argument "T"
542 /* Ignored (we always use PASV): */
543 "passive-ftp\0" No_argument "\xff"
544 "header\0" Required_argument "\xfe"
545 "post-data\0" Required_argument "\xfd"
551 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
552 applet_long_options = wget_longopts;
554 /* server.allocated = target.allocated = NULL; */
555 opt_complementary = "-1" IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
556 opt = getopt32(argv, "csqO:P:Y:U:" /*ignored:*/ "t:T:",
557 &fname_out, &dir_prefix,
558 &proxy_flag, &user_agent,
559 NULL, /* -t RETRIES */
560 NULL /* -T NETWORK_READ_TIMEOUT */
561 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
562 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
564 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
568 llist_t *ll = headers_llist;
570 size += strlen(ll->data) + 2;
573 extra_headers = cp = xmalloc(size);
574 while (headers_llist) {
575 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
580 /* TODO: compat issue: should handle "wget URL1 URL2..." */
583 parse_url(argv[optind], &target);
585 /* Use the proxy if necessary */
586 use_proxy = (strcmp(proxy_flag, "off") != 0);
588 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
589 if (proxy && proxy[0]) {
590 parse_url(proxy, &server);
596 server.port = target.port;
597 if (ENABLE_FEATURE_IPV6) {
598 server.host = xstrdup(target.host);
600 server.host = target.host;
604 if (ENABLE_FEATURE_IPV6)
605 strip_ipv6_scope_id(target.host);
607 /* Guess an output filename, if there was no -O FILE */
608 if (!(opt & WGET_OPT_OUTNAME)) {
609 fname_out = bb_get_last_path_component_nostrip(target.path);
610 /* handle "wget http://kernel.org//" */
611 if (fname_out[0] == '/' || !fname_out[0])
612 fname_out = (char*)"index.html";
613 /* -P DIR is considered only if there was no -O FILE */
615 fname_out = concat_path_file(dir_prefix, fname_out);
617 if (LONE_DASH(fname_out)) {
620 opt &= ~WGET_OPT_CONTINUE;
623 #if ENABLE_FEATURE_WGET_STATUSBAR
624 curfile = bb_get_last_path_component_nostrip(fname_out);
628 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
629 bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
632 /* Determine where to start transfer */
633 if (opt & WGET_OPT_CONTINUE) {
634 output_fd = open(fname_out, O_WRONLY);
635 if (output_fd >= 0) {
636 beg_range = xlseek(output_fd, 0, SEEK_END);
638 /* File doesn't exist. We do not create file here yet.
639 * We are not sure it exists on remove side */
644 lsa = xhost2sockaddr(server.host, server.port);
645 if (!(opt & WGET_OPT_QUIET)) {
646 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
647 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
651 if (use_proxy || !target.is_ftp) {
658 /* Open socket to http server */
659 sfp = open_socket(lsa);
661 /* Send HTTP request */
663 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
664 target.is_ftp ? "f" : "ht", target.host,
667 if (opt & WGET_OPT_POST_DATA)
668 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
670 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
673 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
674 target.host, user_agent);
676 #if ENABLE_FEATURE_WGET_AUTHENTICATION
678 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
679 base64enc_512(buf, target.user));
681 if (use_proxy && server.user) {
682 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
683 base64enc_512(buf, server.user));
688 fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range);
689 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
691 fputs(extra_headers, sfp);
693 if (opt & WGET_OPT_POST_DATA) {
694 char *estr = URL_escape(post_data);
695 fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
696 fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
697 (int) strlen(estr), estr);
698 /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
699 /*fprintf(sfp, "%s\r\n", estr);*/
703 { /* If "Connection:" is needed, document why */
704 fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
708 * Retrieve HTTP response line and check for "200" status code.
711 if (fgets(buf, sizeof(buf), sfp) == NULL)
712 bb_error_msg_and_die("no response from server");
715 str = skip_non_whitespace(str);
716 str = skip_whitespace(str);
717 // FIXME: no error check
718 // xatou wouldn't work: "200 OK"
723 while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
724 /* eat all remaining headers */;
728 Response 204 doesn't say "null file", it says "metadata
729 has changed but data didn't":
731 "10.2.5 204 No Content
732 The server has fulfilled the request but does not need to return
733 an entity-body, and might want to return updated metainformation.
734 The response MAY include new or updated metainformation in the form
735 of entity-headers, which if present SHOULD be associated with
736 the requested variant.
738 If the client is a user agent, it SHOULD NOT change its document
739 view from that which caused the request to be sent. This response
740 is primarily intended to allow input for actions to take place
741 without causing a change to the user agent's active document view,
742 although any new or updated metainformation SHOULD be applied
743 to the document currently in the user agent's active view.
745 The 204 response MUST NOT include a message-body, and thus
746 is always terminated by the first empty line after the header fields."
748 However, in real world it was observed that some web servers
749 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
753 case 300: /* redirection */
763 bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
767 * Retrieve HTTP headers.
769 while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
770 /* gethdr converted "FOO:" string to lowercase */
772 /* strip trailing whitespace */
773 char *s = strchrnul(str, '\0') - 1;
774 while (s >= str && (*s == ' ' || *s == '\t')) {
778 key = index_in_strings(keywords, buf) + 1;
779 if (key == KEY_content_length) {
780 content_len = BB_STRTOOFF(str, NULL, 10);
781 if (errno || content_len < 0) {
782 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
787 if (key == KEY_transfer_encoding) {
788 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
789 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
790 G.chunked = G.got_clen = 1;
792 if (key == KEY_location && status >= 300) {
793 if (--redir_limit == 0)
794 bb_error_msg_and_die("too many redirections");
799 /* free(target.allocated); */
800 target.path = /* target.allocated = */ xstrdup(str+1);
801 /* lsa stays the same: it's on the same server */
803 parse_url(str, &target);
805 server.host = target.host;
806 /* strip_ipv6_scope_id(target.host); - no! */
807 /* we assume remote never gives us IPv6 addr with scope id */
808 server.port = target.port;
811 } /* else: lsa stays the same: we use proxy */
813 goto establish_session;
816 // if (status >= 300)
817 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
819 /* For HTTP, data is pumped over the same connection */
826 sfp = prepare_ftp_session(&dfp, &target, lsa);
829 if (opt & WGET_OPT_SPIDER) {
830 if (ENABLE_FEATURE_CLEAN_UP)
836 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
837 /* compat with wget: -O FILE can overwrite */
838 if (opt & WGET_OPT_OUTNAME)
839 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
840 output_fd = xopen(fname_out, o_flags);
843 retrieve_file_data(dfp, output_fd);
846 /* It's ftp. Close it properly */
848 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
849 bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
850 ftpcmd("QUIT", NULL, sfp, buf);