1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
12 //usage:#define wget_trivial_usage
13 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage: " [--no-check-certificate] [--no-cache]" */
18 //usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
20 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
24 //usage:#define wget_full_usage "\n\n"
25 //usage: "Retrieve files via HTTP or FTP\n"
26 //usage: "\n -s Spider mode - only check file existence"
27 //usage: "\n -c Continue retrieval of aborted transfer"
28 //usage: "\n -q Quiet"
29 //usage: "\n -P DIR Save to DIR (default .)"
30 //usage: IF_FEATURE_WGET_TIMEOUT(
31 //usage: "\n -T SEC Network read timeout is SEC seconds"
33 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
34 //usage: "\n -U STR Use STR for User-Agent header"
35 //usage: "\n -Y Use proxy ('on' or 'off')"
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
41 # define SENDFMT(fp, fmt, ...) \
43 log_io("> " fmt, ##__VA_ARGS__); \
44 fprintf(fp, fmt, ##__VA_ARGS__); \
47 # define log_io(...) ((void)0)
48 # define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
60 static const char P_FTP[] = "ftp";
61 static const char P_HTTP[] = "http";
62 static const char P_HTTPS[] = "https";
64 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
65 /* User-specified headers prevent using our corresponding built-in headers. */
68 HDR_USER_AGENT = (1<<1),
70 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
71 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
73 static const char wget_user_headers[] ALIGN1 =
77 # if ENABLE_FEATURE_WGET_AUTHENTICATION
79 "Proxy-Authorization:\0"
82 # define USR_HEADER_HOST (G.user_headers & HDR_HOST)
83 # define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
84 # define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
85 # define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
86 # define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
87 #else /* No long options, no user-headers :( */
88 # define USR_HEADER_HOST 0
89 # define USR_HEADER_USER_AGENT 0
90 # define USR_HEADER_RANGE 0
91 # define USR_HEADER_AUTH 0
92 # define USR_HEADER_PROXY_AUTH 0
97 off_t content_len; /* Content-length of the file */
98 off_t beg_range; /* Range at which continue begins */
99 #if ENABLE_FEATURE_WGET_STATUSBAR
100 off_t transferred; /* Number of bytes transferred so far */
101 const char *curfile; /* Name of current file being transferred */
105 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
108 unsigned char user_headers; /* Headers mentioned by the user */
110 char *fname_out; /* where to direct output (-O) */
111 const char *proxy_flag; /* Use proxies if env vars are set */
112 const char *user_agent; /* "User-Agent" header field */
113 #if ENABLE_FEATURE_WGET_TIMEOUT
114 unsigned timeout_seconds;
119 smallint chunked; /* chunked transfer encoding */
120 smallint got_clen; /* got content-length: from server */
121 /* Local downloads do benefit from big buffer.
122 * With 512 byte buffer, it was measured to be
123 * an order of magnitude slower than with big one.
125 uint64_t just_to_align_next_member;
126 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
128 #define G (*ptr_to_globals)
129 #define INIT_G() do { \
130 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
132 #define FINI_G() do { \
133 FREE_PTR_TO_GLOBALS(); \
137 /* Must match option string! */
139 WGET_OPT_CONTINUE = (1 << 0),
140 WGET_OPT_SPIDER = (1 << 1),
141 WGET_OPT_QUIET = (1 << 2),
142 WGET_OPT_OUTNAME = (1 << 3),
143 WGET_OPT_PREFIX = (1 << 4),
144 WGET_OPT_PROXY = (1 << 5),
145 WGET_OPT_USER_AGENT = (1 << 6),
146 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
147 WGET_OPT_RETRIES = (1 << 8),
148 WGET_OPT_PASSIVE = (1 << 9),
149 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
150 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
158 #if ENABLE_FEATURE_WGET_STATUSBAR
159 static void progress_meter(int flag)
161 if (option_mask32 & WGET_OPT_QUIET)
164 if (flag == PROGRESS_START)
165 bb_progress_init(&G.pmt, G.curfile);
167 bb_progress_update(&G.pmt,
170 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
173 if (flag == PROGRESS_END) {
174 bb_progress_free(&G.pmt);
175 bb_putchar_stderr('\n');
180 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
184 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
185 * local addresses can have a scope identifier to specify the
186 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
187 * identifier is only valid on a single node.
189 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
190 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
191 * in the Host header as invalid requests, see
192 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
194 static void strip_ipv6_scope_id(char *host)
198 /* bbox wget actually handles IPv6 addresses without [], like
199 * wget "http://::1/xxx", but this is not standard.
200 * To save code, _here_ we do not support it. */
203 return; /* not IPv6 */
205 scope = strchr(host, '%');
209 /* Remove the IPv6 zone identifier from the host address */
210 cp = strchr(host, ']');
211 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
212 /* malformed address (not "[xx]:nn" or "[xx]") */
216 /* cp points to "]...", scope points to "%eth0]..." */
217 overlapping_strcpy(scope, cp);
220 #if ENABLE_FEATURE_WGET_AUTHENTICATION
221 /* Base64-encode character string. */
222 static char *base64enc(const char *str)
224 unsigned len = strlen(str);
225 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
226 len = sizeof(G.wget_buf)/4*3 - 10;
227 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
232 static char* sanitize_string(char *s)
234 unsigned char *p = (void *) s;
241 #if ENABLE_FEATURE_WGET_TIMEOUT
242 static void alarm_handler(int sig UNUSED_PARAM)
244 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
246 bb_error_msg_and_die("download timed out");
250 static FILE *open_socket(len_and_sockaddr *lsa)
255 IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
256 fd = xconnect_stream(lsa);
257 IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
259 /* glibc 2.4 seems to try seeking on it - ??! */
260 /* hopefully it understands what ESPIPE means... */
261 fp = fdopen(fd, "r+");
263 bb_perror_msg_and_die(bb_msg_memory_exhausted);
268 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
269 /* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
270 static char fgets_and_trim(FILE *fp)
275 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
276 bb_perror_msg_and_die("error getting response");
278 buf_ptr = strchrnul(G.wget_buf, '\n');
281 buf_ptr = strchrnul(G.wget_buf, '\r');
284 log_io("< %s", G.wget_buf);
289 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
295 fprintf(fp, "%s%s\r\n", s1, s2);
297 log_io("> %s%s", s1, s2);
302 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
304 G.wget_buf[3] = '\0';
305 result = xatoi_positive(G.wget_buf);
310 static void parse_url(const char *src_url, struct host_info *h)
315 h->allocated = url = xstrdup(src_url);
318 p = strstr(url, "://");
322 if (strcmp(url, P_FTP) == 0) {
323 h->port = bb_lookup_port(P_FTP, "tcp", 21);
325 if (strcmp(url, P_HTTPS) == 0) {
326 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
327 h->protocol = P_HTTPS;
329 if (strcmp(url, P_HTTP) == 0) {
331 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
332 h->protocol = P_HTTP;
335 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
338 // GNU wget is user-friendly and falls back to http://
344 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
345 // 'GET /?var=a/b HTTP 1.0'
346 // and saves 'index.html?var=a%2Fb' (we save 'b')
347 // wget 'http://busybox.net?login=john@doe':
348 // request: 'GET /?login=john@doe HTTP/1.0'
349 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
350 // wget 'http://busybox.net#test/test':
351 // request: 'GET / HTTP/1.0'
352 // saves: 'index.html' (we save 'test')
354 // We also don't add unique .N suffix if file exists...
355 sp = strchr(h->host, '/');
356 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
357 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
360 } else if (*sp == '/') {
363 } else { // '#' or '?'
364 // http://busybox.net?login=john@doe is a valid URL
365 // memmove converts to:
366 // http:/busybox.nett?login=john@doe...
367 memmove(h->host - 1, h->host, sp - h->host);
373 sp = strrchr(h->host, '@');
375 // URL-decode "user:password" string before base64-encoding:
376 // wget http://test:my%20pass@example.com should send
377 // Authorization: Basic dGVzdDpteSBwYXNz
378 // which decodes to "test:my pass".
379 // Standard wget and curl do this too.
382 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
385 /* else: h->user remains NULL, or as set by original request
386 * before redirect (if we are here after a redirect).
390 static char *gethdr(FILE *fp)
395 /* retrieve header line */
396 c = fgets_and_trim(fp);
398 /* end of the headers? */
399 if (G.wget_buf[0] == '\0')
402 /* convert the header name to lower case */
403 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
405 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
406 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
407 * "A-Z" maps to "a-z".
408 * "@[\]" can't occur in header names.
409 * "^_" maps to "~,DEL" (which is wrong).
410 * "^" was never seen yet, "_" was seen from web.archive.org
411 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
416 /* verify we are at the end of the header name */
418 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
420 /* locate the start of the header value */
422 hdrval = skip_whitespace(s);
425 /* Rats! The buffer isn't big enough to hold the entire header value */
426 while (c = getc(fp), c != EOF && c != '\n')
433 static void reset_beg_range_to_zero(void)
435 bb_error_msg("restart failed");
437 xlseek(G.output_fd, 0, SEEK_SET);
438 /* Done at the end instead: */
439 /* ftruncate(G.output_fd, 0); */
442 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
449 target->user = xstrdup("anonymous:busybox@");
451 sfp = open_socket(lsa);
452 if (ftpcmd(NULL, NULL, sfp) != 220)
453 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
456 * Splitting username:password pair,
459 str = strchr(target->user, ':');
462 switch (ftpcmd("USER ", target->user, sfp)) {
466 if (ftpcmd("PASS ", str, sfp) == 230)
468 /* fall through (failed login) */
470 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
473 ftpcmd("TYPE I", NULL, sfp);
478 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
479 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
480 if (G.content_len < 0 || errno) {
481 bb_error_msg_and_die("SIZE value is garbage");
487 * Entering passive mode
489 if (ftpcmd("PASV", NULL, sfp) != 227) {
491 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
493 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
494 // Server's IP is N1.N2.N3.N4 (we ignore it)
495 // Server's port for data connection is P1*256+P2
496 str = strrchr(G.wget_buf, ')');
497 if (str) str[0] = '\0';
498 str = strrchr(G.wget_buf, ',');
499 if (!str) goto pasv_error;
500 port = xatou_range(str+1, 0, 255);
502 str = strrchr(G.wget_buf, ',');
503 if (!str) goto pasv_error;
504 port += xatou_range(str+1, 0, 255) * 256;
505 set_nport(&lsa->u.sa, htons(port));
507 *dfpp = open_socket(lsa);
509 if (G.beg_range != 0) {
510 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
511 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
512 G.content_len -= G.beg_range;
514 reset_beg_range_to_zero();
517 if (ftpcmd("RETR ", target->path, sfp) > 150)
518 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
523 static int spawn_https_helper(const char *host, unsigned port)
525 char *allocated = NULL;
529 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
530 /* Kernel can have AF_UNIX support disabled */
531 bb_perror_msg_and_die("socketpair");
533 if (!strchr(host, ':'))
534 host = allocated = xasprintf("%s:%u", host, port);
536 pid = BB_MMU ? xfork() : xvfork();
545 * TODO: develop a tiny ssl/tls helper (using matrixssl?),
546 * try to exec it here before falling back to big fat openssl.
549 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
550 * It prints some debug stuff on stderr, don't know how to suppress it.
551 * Work around by dev-nulling stderr. We lose all error messages :(
554 xopen("/dev/null", O_RDWR);
555 argv[0] = (char*)"openssl";
556 argv[1] = (char*)"s_client";
557 argv[2] = (char*)"-quiet";
558 argv[3] = (char*)"-connect";
559 argv[4] = (char*)host;
561 BB_EXECVP(argv[0], argv);
563 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
573 /* See networking/ssl_helper/README */
577 static void spawn_https_helper1(int network_fd)
582 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
583 /* Kernel can have AF_UNIX support disabled */
584 bb_perror_msg_and_die("socketpair");
586 pid = BB_MMU ? xfork() : xvfork();
594 xmove_fd(network_fd, 3);
596 * A simple ssl/tls helper
598 argv[0] = (char*)"ssl_helper";
599 argv[1] = (char*)"-d3";
601 BB_EXECVP(argv[0], argv);
602 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
608 xmove_fd(sp[0], network_fd);
612 static void NOINLINE retrieve_file_data(FILE *dfp)
614 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
615 # if ENABLE_FEATURE_WGET_TIMEOUT
616 unsigned second_cnt = G.timeout_seconds;
618 struct pollfd polldata;
620 polldata.fd = fileno(dfp);
621 polldata.events = POLLIN | POLLPRI;
623 progress_meter(PROGRESS_START);
628 /* Loops only if chunked */
631 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
632 /* Must use nonblocking I/O, otherwise fread will loop
633 * and *block* until it reads full buffer,
634 * which messes up progress bar and/or timeout logic.
635 * Because of nonblocking I/O, we need to dance
636 * very carefully around EAGAIN. See explanation at
639 ndelay_on(polldata.fd);
645 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
646 /* fread internally uses read loop, which in our case
647 * is usually exited when we get EAGAIN.
648 * In this case, libc sets error marker on the stream.
649 * Need to clear it before next fread to avoid possible
650 * rare false positive ferror below. Rare because usually
651 * fread gets more than zero bytes, and we don't fall
652 * into if (n <= 0) ...
657 rdsz = sizeof(G.wget_buf);
659 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
660 if ((int)G.content_len <= 0)
662 rdsz = (unsigned)G.content_len;
665 n = fread(G.wget_buf, 1, rdsz, dfp);
668 xwrite(G.output_fd, G.wget_buf, n);
669 #if ENABLE_FEATURE_WGET_STATUSBAR
674 if (G.content_len == 0)
677 #if ENABLE_FEATURE_WGET_TIMEOUT
678 second_cnt = G.timeout_seconds;
685 * If error occurs, or EOF is reached, the return value
686 * is a short item count (or zero).
687 * fread does not distinguish between EOF and error.
689 if (errno != EAGAIN) {
691 progress_meter(PROGRESS_END);
692 bb_perror_msg_and_die(bb_msg_read_error);
694 break; /* EOF, not error */
697 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
698 /* It was EAGAIN. There is no data. Wait up to one second
699 * then abort if timed out, or update the bar and try reading again.
701 if (safe_poll(&polldata, 1, 1000) == 0) {
702 # if ENABLE_FEATURE_WGET_TIMEOUT
703 if (second_cnt != 0 && --second_cnt == 0) {
704 progress_meter(PROGRESS_END);
705 bb_error_msg_and_die("download timed out");
708 /* We used to loop back to poll here,
709 * but there is no great harm in letting fread
710 * to try reading anyway.
715 /* Need to do it _every_ second for "stalled" indicator
716 * to be shown properly.
718 progress_meter(PROGRESS_BUMP);
719 } /* while (reading data) */
721 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
723 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
728 fgets_and_trim(dfp); /* Eat empty line */
731 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
732 /* FIXME: error check? */
733 if (G.content_len == 0)
734 break; /* all done! */
737 * Note that fgets may result in some data being buffered in dfp.
738 * We loop back to fread, which will retrieve this data.
739 * Also note that code has to be arranged so that fread
740 * is done _before_ one-second poll wait - poll doesn't know
741 * about stdio buffering and can result in spurious one second waits!
745 /* If -c failed, we restart from the beginning,
746 * but we do not truncate file then, we do it only now, at the end.
747 * This lets user to ^C if his 99% complete 10 GB file download
748 * failed to restart *without* losing the almost complete file.
751 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
752 if (pos != (off_t)-1)
753 ftruncate(G.output_fd, pos);
756 /* Draw full bar and free its resources */
757 G.chunked = 0; /* makes it show 100% even for chunked download */
758 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
759 progress_meter(PROGRESS_END);
762 static void download_one_url(const char *url)
764 bool use_proxy; /* Use proxies if env vars are set */
766 len_and_sockaddr *lsa;
767 FILE *sfp; /* socket to web/ftp server */
768 FILE *dfp; /* socket to ftp server (data) */
770 char *fname_out_alloc;
771 char *redirected_path = NULL;
772 struct host_info server;
773 struct host_info target;
775 server.allocated = NULL;
776 target.allocated = NULL;
780 parse_url(url, &target);
782 /* Use the proxy if necessary */
783 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
785 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
786 //FIXME: what if protocol is https? Ok to use http_proxy?
787 use_proxy = (proxy && proxy[0]);
789 parse_url(proxy, &server);
792 server.port = target.port;
793 if (ENABLE_FEATURE_IPV6) {
794 //free(server.allocated); - can't be non-NULL
795 server.host = server.allocated = xstrdup(target.host);
797 server.host = target.host;
801 if (ENABLE_FEATURE_IPV6)
802 strip_ipv6_scope_id(target.host);
804 /* If there was no -O FILE, guess output filename */
805 fname_out_alloc = NULL;
806 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
807 G.fname_out = bb_get_last_path_component_nostrip(target.path);
808 /* handle "wget http://kernel.org//" */
809 if (G.fname_out[0] == '/' || !G.fname_out[0])
810 G.fname_out = (char*)"index.html";
811 /* -P DIR is considered only if there was no -O FILE */
813 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
815 /* redirects may free target.path later, need to make a copy */
816 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
819 #if ENABLE_FEATURE_WGET_STATUSBAR
820 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
823 /* Determine where to start transfer */
825 if (option_mask32 & WGET_OPT_CONTINUE) {
826 G.output_fd = open(G.fname_out, O_WRONLY);
827 if (G.output_fd >= 0) {
828 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
830 /* File doesn't exist. We do not create file here yet.
831 * We are not sure it exists on remote side */
836 lsa = xhost2sockaddr(server.host, server.port);
837 if (!(option_mask32 & WGET_OPT_QUIET)) {
838 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
839 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
843 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
846 if (use_proxy || target.protocol != P_FTP) {
853 /* Open socket to http(s) server */
854 if (target.protocol == P_HTTPS) {
855 /* openssl-based helper
856 * Inconvenient API since we can't give it an open fd
858 int fd = spawn_https_helper(server.host, server.port);
859 sfp = fdopen(fd, "r+");
861 bb_perror_msg_and_die(bb_msg_memory_exhausted);
863 sfp = open_socket(lsa);
865 if (target.protocol == P_HTTPS)
866 spawn_https_helper1(fileno(sfp));
868 /* Send HTTP request */
870 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
871 target.protocol, target.host,
874 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
875 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
878 if (!USR_HEADER_HOST)
879 SENDFMT(sfp, "Host: %s\r\n", target.host);
880 if (!USR_HEADER_USER_AGENT)
881 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
883 /* Ask server to close the connection as soon as we are done
884 * (IOW: we do not intend to send more requests)
886 SENDFMT(sfp, "Connection: close\r\n");
888 #if ENABLE_FEATURE_WGET_AUTHENTICATION
889 if (target.user && !USR_HEADER_AUTH) {
890 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
891 base64enc(target.user));
893 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
894 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
895 base64enc(server.user));
899 if (G.beg_range != 0 && !USR_HEADER_RANGE)
900 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
902 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
903 if (G.extra_headers) {
904 log_io(G.extra_headers);
905 fputs(G.extra_headers, sfp);
908 if (option_mask32 & WGET_OPT_POST_DATA) {
910 "Content-Type: application/x-www-form-urlencoded\r\n"
911 "Content-Length: %u\r\n"
914 (int) strlen(G.post_data), G.post_data
919 SENDFMT(sfp, "\r\n");
925 * Retrieve HTTP response line and check for "200" status code.
931 str = skip_non_whitespace(str);
932 str = skip_whitespace(str);
933 // FIXME: no error check
934 // xatou wouldn't work: "200 OK"
939 while (gethdr(sfp) != NULL)
940 /* eat all remaining headers */;
944 Response 204 doesn't say "null file", it says "metadata
945 has changed but data didn't":
947 "10.2.5 204 No Content
948 The server has fulfilled the request but does not need to return
949 an entity-body, and might want to return updated metainformation.
950 The response MAY include new or updated metainformation in the form
951 of entity-headers, which if present SHOULD be associated with
952 the requested variant.
954 If the client is a user agent, it SHOULD NOT change its document
955 view from that which caused the request to be sent. This response
956 is primarily intended to allow input for actions to take place
957 without causing a change to the user agent's active document view,
958 although any new or updated metainformation SHOULD be applied
959 to the document currently in the user agent's active view.
961 The 204 response MUST NOT include a message-body, and thus
962 is always terminated by the first empty line after the header fields."
964 However, in real world it was observed that some web servers
965 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
968 if (G.beg_range != 0) {
969 /* "Range:..." was not honored by the server.
970 * Restart download from the beginning.
972 reset_beg_range_to_zero();
975 case 300: /* redirection */
980 case 206: /* Partial Content */
981 if (G.beg_range != 0)
982 /* "Range:..." worked. Good. */
984 /* Partial Content even though we did not ask for it??? */
987 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
991 * Retrieve HTTP headers.
993 while ((str = gethdr(sfp)) != NULL) {
994 static const char keywords[] ALIGN1 =
995 "content-length\0""transfer-encoding\0""location\0";
997 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1001 /* gethdr converted "FOO:" string to lowercase */
1003 /* strip trailing whitespace */
1004 char *s = strchrnul(str, '\0') - 1;
1005 while (s >= str && (*s == ' ' || *s == '\t')) {
1009 key = index_in_strings(keywords, G.wget_buf) + 1;
1010 if (key == KEY_content_length) {
1011 G.content_len = BB_STRTOOFF(str, NULL, 10);
1012 if (G.content_len < 0 || errno) {
1013 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
1018 if (key == KEY_transfer_encoding) {
1019 if (strcmp(str_tolower(str), "chunked") != 0)
1020 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
1023 if (key == KEY_location && status >= 300) {
1024 if (--redir_limit == 0)
1025 bb_error_msg_and_die("too many redirections");
1027 if (str[0] == '/') {
1028 free(redirected_path);
1029 target.path = redirected_path = xstrdup(str+1);
1030 /* lsa stays the same: it's on the same server */
1032 parse_url(str, &target);
1034 /* server.user remains untouched */
1035 free(server.allocated);
1036 server.allocated = NULL;
1037 server.host = target.host;
1038 /* strip_ipv6_scope_id(target.host); - no! */
1039 /* we assume remote never gives us IPv6 addr with scope id */
1040 server.port = target.port;
1043 } /* else: lsa stays the same: we use proxy */
1045 goto establish_session;
1048 // if (status >= 300)
1049 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
1051 /* For HTTP, data is pumped over the same connection */
1057 sfp = prepare_ftp_session(&dfp, &target, lsa);
1062 if (!(option_mask32 & WGET_OPT_SPIDER)) {
1063 if (G.output_fd < 0)
1064 G.output_fd = xopen(G.fname_out, G.o_flags);
1065 retrieve_file_data(dfp);
1066 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1067 xclose(G.output_fd);
1073 /* It's ftp. Close data connection properly */
1075 if (ftpcmd(NULL, NULL, sfp) != 226)
1076 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1077 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1081 free(server.allocated);
1082 free(target.allocated);
1085 free(fname_out_alloc);
1086 free(redirected_path);
1089 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1090 int wget_main(int argc UNUSED_PARAM, char **argv)
1092 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1093 static const char wget_longopts[] ALIGN1 =
1094 /* name, has_arg, val */
1095 "continue\0" No_argument "c"
1096 //FIXME: -s isn't --spider, it's --save-headers!
1097 "spider\0" No_argument "s"
1098 "quiet\0" No_argument "q"
1099 "output-document\0" Required_argument "O"
1100 "directory-prefix\0" Required_argument "P"
1101 "proxy\0" Required_argument "Y"
1102 "user-agent\0" Required_argument "U"
1103 #if ENABLE_FEATURE_WGET_TIMEOUT
1104 "timeout\0" Required_argument "T"
1107 // "tries\0" Required_argument "t"
1108 /* Ignored (we always use PASV): */
1109 "passive-ftp\0" No_argument "\xff"
1110 "header\0" Required_argument "\xfe"
1111 "post-data\0" Required_argument "\xfd"
1112 /* Ignored (we don't do ssl) */
1113 "no-check-certificate\0" No_argument "\xfc"
1114 /* Ignored (we don't support caching) */
1115 "no-cache\0" No_argument "\xfb"
1119 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1120 llist_t *headers_llist = NULL;
1125 #if ENABLE_FEATURE_WGET_TIMEOUT
1126 G.timeout_seconds = 900;
1127 signal(SIGALRM, alarm_handler);
1129 G.proxy_flag = "on"; /* use proxies if env vars are set */
1130 G.user_agent = "Wget"; /* "User-Agent" header field */
1132 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1133 applet_long_options = wget_longopts;
1135 opt_complementary = "-1"
1136 IF_FEATURE_WGET_TIMEOUT(":T+")
1137 IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
1138 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
1139 &G.fname_out, &G.dir_prefix,
1140 &G.proxy_flag, &G.user_agent,
1141 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1142 NULL /* -t RETRIES */
1143 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1144 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1148 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1149 if (headers_llist) {
1152 llist_t *ll = headers_llist;
1154 size += strlen(ll->data) + 2;
1157 G.extra_headers = hdr = xmalloc(size + 1);
1158 while (headers_llist) {
1162 size = sprintf(hdr, "%s\r\n",
1163 (char*)llist_pop(&headers_llist));
1164 /* a bit like index_in_substrings but don't match full key */
1166 words = wget_user_headers;
1168 if (strstr(hdr, words) == hdr) {
1169 G.user_headers |= bit;
1173 words += strlen(words) + 1;
1181 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1182 if (G.fname_out) { /* -O FILE ? */
1183 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1185 option_mask32 &= ~WGET_OPT_CONTINUE;
1187 /* compat with wget: -O FILE can overwrite */
1188 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1192 download_one_url(*argv++);
1194 if (G.output_fd >= 0)
1195 xclose(G.output_fd);
1197 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1198 free(G.extra_headers);
1202 return EXIT_SUCCESS;