1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
12 //usage:#define wget_trivial_usage
13 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage: " [--no-check-certificate] [--no-cache]" */
18 //usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
20 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
24 //usage:#define wget_full_usage "\n\n"
25 //usage: "Retrieve files via HTTP or FTP\n"
26 //usage: "\n -s Spider mode - only check file existence"
27 //usage: "\n -c Continue retrieval of aborted transfer"
28 //usage: "\n -q Quiet"
29 //usage: "\n -P DIR Save to DIR (default .)"
30 //usage: IF_FEATURE_WGET_TIMEOUT(
31 //usage: "\n -T SEC Network read timeout is SEC seconds"
33 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
34 //usage: "\n -U STR Use STR for User-Agent header"
35 //usage: "\n -Y Use proxy ('on' or 'off')"
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
42 # define log_io(...) ((void)0)
54 static const char P_FTP[] = "ftp";
55 static const char P_HTTP[] = "http";
56 static const char P_HTTPS[] = "https";
61 off_t content_len; /* Content-length of the file */
62 off_t beg_range; /* Range at which continue begins */
63 #if ENABLE_FEATURE_WGET_STATUSBAR
64 off_t transferred; /* Number of bytes transferred so far */
65 const char *curfile; /* Name of current file being transferred */
69 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
73 char *fname_out; /* where to direct output (-O) */
74 const char *proxy_flag; /* Use proxies if env vars are set */
75 const char *user_agent; /* "User-Agent" header field */
76 #if ENABLE_FEATURE_WGET_TIMEOUT
77 unsigned timeout_seconds;
82 smallint chunked; /* chunked transfer encoding */
83 smallint got_clen; /* got content-length: from server */
84 /* Local downloads do benefit from big buffer.
85 * With 512 byte buffer, it was measured to be
86 * an order of magnitude slower than with big one.
88 uint64_t just_to_align_next_member;
89 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
91 #define G (*ptr_to_globals)
92 #define INIT_G() do { \
93 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
95 #define FINI_G() do { \
96 FREE_PTR_TO_GLOBALS(); \
100 /* Must match option string! */
102 WGET_OPT_CONTINUE = (1 << 0),
103 WGET_OPT_SPIDER = (1 << 1),
104 WGET_OPT_QUIET = (1 << 2),
105 WGET_OPT_OUTNAME = (1 << 3),
106 WGET_OPT_PREFIX = (1 << 4),
107 WGET_OPT_PROXY = (1 << 5),
108 WGET_OPT_USER_AGENT = (1 << 6),
109 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
110 WGET_OPT_RETRIES = (1 << 8),
111 WGET_OPT_PASSIVE = (1 << 9),
112 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
113 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
121 #if ENABLE_FEATURE_WGET_STATUSBAR
122 static void progress_meter(int flag)
124 if (option_mask32 & WGET_OPT_QUIET)
127 if (flag == PROGRESS_START)
128 bb_progress_init(&G.pmt, G.curfile);
130 bb_progress_update(&G.pmt,
133 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
136 if (flag == PROGRESS_END) {
137 bb_progress_free(&G.pmt);
138 bb_putchar_stderr('\n');
143 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
147 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
148 * local addresses can have a scope identifier to specify the
149 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
150 * identifier is only valid on a single node.
152 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
153 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
154 * in the Host header as invalid requests, see
155 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
157 static void strip_ipv6_scope_id(char *host)
161 /* bbox wget actually handles IPv6 addresses without [], like
162 * wget "http://::1/xxx", but this is not standard.
163 * To save code, _here_ we do not support it. */
166 return; /* not IPv6 */
168 scope = strchr(host, '%');
172 /* Remove the IPv6 zone identifier from the host address */
173 cp = strchr(host, ']');
174 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
175 /* malformed address (not "[xx]:nn" or "[xx]") */
179 /* cp points to "]...", scope points to "%eth0]..." */
180 overlapping_strcpy(scope, cp);
183 #if ENABLE_FEATURE_WGET_AUTHENTICATION
184 /* Base64-encode character string. */
185 static char *base64enc(const char *str)
187 unsigned len = strlen(str);
188 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
189 len = sizeof(G.wget_buf)/4*3 - 10;
190 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
195 static char* sanitize_string(char *s)
197 unsigned char *p = (void *) s;
204 #if ENABLE_FEATURE_WGET_TIMEOUT
205 static void alarm_handler(int sig UNUSED_PARAM)
207 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
209 bb_error_msg_and_die("download timed out");
213 static FILE *open_socket(len_and_sockaddr *lsa)
218 IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
219 fd = xconnect_stream(lsa);
220 IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
222 /* glibc 2.4 seems to try seeking on it - ??! */
223 /* hopefully it understands what ESPIPE means... */
224 fp = fdopen(fd, "r+");
226 bb_perror_msg_and_die(bb_msg_memory_exhausted);
231 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
232 /* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
233 static char fgets_and_trim(FILE *fp)
238 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
239 bb_perror_msg_and_die("error getting response");
241 buf_ptr = strchrnul(G.wget_buf, '\n');
244 buf_ptr = strchrnul(G.wget_buf, '\r');
247 log_io("< %s", G.wget_buf);
252 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
258 fprintf(fp, "%s%s\r\n", s1, s2);
260 log_io("> %s%s", s1, s2);
265 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
267 G.wget_buf[3] = '\0';
268 result = xatoi_positive(G.wget_buf);
273 static void parse_url(const char *src_url, struct host_info *h)
278 h->allocated = url = xstrdup(src_url);
281 p = strstr(url, "://");
285 if (strcmp(url, P_FTP) == 0) {
286 h->port = bb_lookup_port(P_FTP, "tcp", 21);
288 if (strcmp(url, P_HTTPS) == 0) {
289 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
290 h->protocol = P_HTTPS;
292 if (strcmp(url, P_HTTP) == 0) {
294 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
295 h->protocol = P_HTTP;
298 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
301 // GNU wget is user-friendly and falls back to http://
307 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
308 // 'GET /?var=a/b HTTP 1.0'
309 // and saves 'index.html?var=a%2Fb' (we save 'b')
310 // wget 'http://busybox.net?login=john@doe':
311 // request: 'GET /?login=john@doe HTTP/1.0'
312 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
313 // wget 'http://busybox.net#test/test':
314 // request: 'GET / HTTP/1.0'
315 // saves: 'index.html' (we save 'test')
317 // We also don't add unique .N suffix if file exists...
318 sp = strchr(h->host, '/');
319 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
320 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
323 } else if (*sp == '/') {
326 } else { // '#' or '?'
327 // http://busybox.net?login=john@doe is a valid URL
328 // memmove converts to:
329 // http:/busybox.nett?login=john@doe...
330 memmove(h->host - 1, h->host, sp - h->host);
336 sp = strrchr(h->host, '@');
338 // URL-decode "user:password" string before base64-encoding:
339 // wget http://test:my%20pass@example.com should send
340 // Authorization: Basic dGVzdDpteSBwYXNz
341 // which decodes to "test:my pass".
342 // Standard wget and curl do this too.
345 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
348 /* else: h->user remains NULL, or as set by original request
349 * before redirect (if we are here after a redirect).
353 static char *gethdr(FILE *fp)
358 /* retrieve header line */
359 c = fgets_and_trim(fp);
361 /* end of the headers? */
362 if (G.wget_buf[0] == '\0')
365 /* convert the header name to lower case */
366 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
368 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
369 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
370 * "A-Z" maps to "a-z".
371 * "@[\]" can't occur in header names.
372 * "^_" maps to "~,DEL" (which is wrong).
373 * "^" was never seen yet, "_" was seen from web.archive.org
374 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
379 /* verify we are at the end of the header name */
381 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
383 /* locate the start of the header value */
385 hdrval = skip_whitespace(s);
388 /* Rats! The buffer isn't big enough to hold the entire header value */
389 while (c = getc(fp), c != EOF && c != '\n')
396 static void reset_beg_range_to_zero(void)
398 bb_error_msg("restart failed");
400 xlseek(G.output_fd, 0, SEEK_SET);
401 /* Done at the end instead: */
402 /* ftruncate(G.output_fd, 0); */
405 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
412 target->user = xstrdup("anonymous:busybox@");
414 sfp = open_socket(lsa);
415 if (ftpcmd(NULL, NULL, sfp) != 220)
416 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
419 * Splitting username:password pair,
422 str = strchr(target->user, ':');
425 switch (ftpcmd("USER ", target->user, sfp)) {
429 if (ftpcmd("PASS ", str, sfp) == 230)
431 /* fall through (failed login) */
433 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
436 ftpcmd("TYPE I", NULL, sfp);
441 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
442 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
443 if (G.content_len < 0 || errno) {
444 bb_error_msg_and_die("SIZE value is garbage");
450 * Entering passive mode
452 if (ftpcmd("PASV", NULL, sfp) != 227) {
454 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
456 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
457 // Server's IP is N1.N2.N3.N4 (we ignore it)
458 // Server's port for data connection is P1*256+P2
459 str = strrchr(G.wget_buf, ')');
460 if (str) str[0] = '\0';
461 str = strrchr(G.wget_buf, ',');
462 if (!str) goto pasv_error;
463 port = xatou_range(str+1, 0, 255);
465 str = strrchr(G.wget_buf, ',');
466 if (!str) goto pasv_error;
467 port += xatou_range(str+1, 0, 255) * 256;
468 set_nport(&lsa->u.sa, htons(port));
470 *dfpp = open_socket(lsa);
472 if (G.beg_range != 0) {
473 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
474 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
475 G.content_len -= G.beg_range;
477 reset_beg_range_to_zero();
480 if (ftpcmd("RETR ", target->path, sfp) > 150)
481 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
486 static int spawn_https_helper(const char *host, unsigned port)
488 char *allocated = NULL;
492 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
493 /* Kernel can have AF_UNIX support disabled */
494 bb_perror_msg_and_die("socketpair");
496 if (!strchr(host, ':'))
497 host = allocated = xasprintf("%s:%u", host, port);
499 pid = BB_MMU ? xfork() : xvfork();
508 * TODO: develop a tiny ssl/tls helper (using matrixssl?),
509 * try to exec it here before falling back to big fat openssl.
512 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
513 * It prints some debug stuff on stderr, don't know how to suppress it.
514 * Work around by dev-nulling stderr. We lose all error messages :(
517 xopen("/dev/null", O_RDWR);
518 argv[0] = (char*)"openssl";
519 argv[1] = (char*)"s_client";
520 argv[2] = (char*)"-quiet";
521 argv[3] = (char*)"-connect";
522 argv[4] = (char*)host;
524 BB_EXECVP(argv[0], argv);
526 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
536 /* See networking/ssl_helper/README */
540 static void spawn_https_helper1(int network_fd)
545 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
546 /* Kernel can have AF_UNIX support disabled */
547 bb_perror_msg_and_die("socketpair");
549 pid = BB_MMU ? xfork() : xvfork();
557 xmove_fd(network_fd, 3);
559 * A simple ssl/tls helper
561 argv[0] = (char*)"ssl_helper";
562 argv[1] = (char*)"-d3";
564 BB_EXECVP(argv[0], argv);
565 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
571 xmove_fd(sp[0], network_fd);
575 static void NOINLINE retrieve_file_data(FILE *dfp)
577 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
578 # if ENABLE_FEATURE_WGET_TIMEOUT
579 unsigned second_cnt = G.timeout_seconds;
581 struct pollfd polldata;
583 polldata.fd = fileno(dfp);
584 polldata.events = POLLIN | POLLPRI;
586 progress_meter(PROGRESS_START);
591 /* Loops only if chunked */
594 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
595 /* Must use nonblocking I/O, otherwise fread will loop
596 * and *block* until it reads full buffer,
597 * which messes up progress bar and/or timeout logic.
598 * Because of nonblocking I/O, we need to dance
599 * very carefully around EAGAIN. See explanation at
602 ndelay_on(polldata.fd);
608 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
609 /* fread internally uses read loop, which in our case
610 * is usually exited when we get EAGAIN.
611 * In this case, libc sets error marker on the stream.
612 * Need to clear it before next fread to avoid possible
613 * rare false positive ferror below. Rare because usually
614 * fread gets more than zero bytes, and we don't fall
615 * into if (n <= 0) ...
620 rdsz = sizeof(G.wget_buf);
622 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
623 if ((int)G.content_len <= 0)
625 rdsz = (unsigned)G.content_len;
628 n = fread(G.wget_buf, 1, rdsz, dfp);
631 xwrite(G.output_fd, G.wget_buf, n);
632 #if ENABLE_FEATURE_WGET_STATUSBAR
637 if (G.content_len == 0)
640 #if ENABLE_FEATURE_WGET_TIMEOUT
641 second_cnt = G.timeout_seconds;
648 * If error occurs, or EOF is reached, the return value
649 * is a short item count (or zero).
650 * fread does not distinguish between EOF and error.
652 if (errno != EAGAIN) {
654 progress_meter(PROGRESS_END);
655 bb_perror_msg_and_die(bb_msg_read_error);
657 break; /* EOF, not error */
660 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
661 /* It was EAGAIN. There is no data. Wait up to one second
662 * then abort if timed out, or update the bar and try reading again.
664 if (safe_poll(&polldata, 1, 1000) == 0) {
665 # if ENABLE_FEATURE_WGET_TIMEOUT
666 if (second_cnt != 0 && --second_cnt == 0) {
667 progress_meter(PROGRESS_END);
668 bb_error_msg_and_die("download timed out");
671 /* We used to loop back to poll here,
672 * but there is no great harm in letting fread
673 * to try reading anyway.
676 /* Need to do it _every_ second for "stalled" indicator
677 * to be shown properly.
679 progress_meter(PROGRESS_BUMP);
681 } /* while (reading data) */
683 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
685 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
690 fgets_and_trim(dfp); /* Eat empty line */
693 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
694 /* FIXME: error check? */
695 if (G.content_len == 0)
696 break; /* all done! */
699 * Note that fgets may result in some data being buffered in dfp.
700 * We loop back to fread, which will retrieve this data.
701 * Also note that code has to be arranged so that fread
702 * is done _before_ one-second poll wait - poll doesn't know
703 * about stdio buffering and can result in spurious one second waits!
707 /* If -c failed, we restart from the beginning,
708 * but we do not truncate file then, we do it only now, at the end.
709 * This lets user to ^C if his 99% complete 10 GB file download
710 * failed to restart *without* losing the almost complete file.
713 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
714 if (pos != (off_t)-1)
715 ftruncate(G.output_fd, pos);
718 /* Draw full bar and free its resources */
719 G.chunked = 0; /* makes it show 100% even for chunked download */
720 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
721 progress_meter(PROGRESS_END);
724 static void download_one_url(const char *url)
726 bool use_proxy; /* Use proxies if env vars are set */
728 len_and_sockaddr *lsa;
729 FILE *sfp; /* socket to web/ftp server */
730 FILE *dfp; /* socket to ftp server (data) */
732 char *fname_out_alloc;
733 char *redirected_path = NULL;
734 struct host_info server;
735 struct host_info target;
737 server.allocated = NULL;
738 target.allocated = NULL;
742 parse_url(url, &target);
744 /* Use the proxy if necessary */
745 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
747 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
748 //FIXME: what if protocol is https? Ok to use http_proxy?
749 use_proxy = (proxy && proxy[0]);
751 parse_url(proxy, &server);
754 server.port = target.port;
755 if (ENABLE_FEATURE_IPV6) {
756 //free(server.allocated); - can't be non-NULL
757 server.host = server.allocated = xstrdup(target.host);
759 server.host = target.host;
763 if (ENABLE_FEATURE_IPV6)
764 strip_ipv6_scope_id(target.host);
766 /* If there was no -O FILE, guess output filename */
767 fname_out_alloc = NULL;
768 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
769 G.fname_out = bb_get_last_path_component_nostrip(target.path);
770 /* handle "wget http://kernel.org//" */
771 if (G.fname_out[0] == '/' || !G.fname_out[0])
772 G.fname_out = (char*)"index.html";
773 /* -P DIR is considered only if there was no -O FILE */
775 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
777 /* redirects may free target.path later, need to make a copy */
778 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
781 #if ENABLE_FEATURE_WGET_STATUSBAR
782 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
785 /* Determine where to start transfer */
787 if (option_mask32 & WGET_OPT_CONTINUE) {
788 G.output_fd = open(G.fname_out, O_WRONLY);
789 if (G.output_fd >= 0) {
790 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
792 /* File doesn't exist. We do not create file here yet.
793 * We are not sure it exists on remote side */
798 lsa = xhost2sockaddr(server.host, server.port);
799 if (!(option_mask32 & WGET_OPT_QUIET)) {
800 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
801 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
805 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
808 if (use_proxy || target.protocol != P_FTP) {
815 /* Open socket to http(s) server */
816 if (target.protocol == P_HTTPS) {
817 /* openssl-based helper
818 * Inconvenient API since we can't give it an open fd,
820 int fd = spawn_https_helper(server.host, server.port);
821 sfp = fdopen(fd, "r+");
823 bb_perror_msg_and_die(bb_msg_memory_exhausted);
825 sfp = open_socket(lsa);
827 if (target.protocol == P_HTTPS)
828 spawn_https_helper1(fileno(sfp));
830 /* Send HTTP request */
832 fprintf(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
833 target.protocol, target.host,
836 fprintf(sfp, "%s /%s HTTP/1.1\r\n",
837 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
841 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
842 target.host, G.user_agent);
844 /* Ask server to close the connection as soon as we are done
845 * (IOW: we do not intend to send more requests)
847 fprintf(sfp, "Connection: close\r\n");
849 #if ENABLE_FEATURE_WGET_AUTHENTICATION
851 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
852 base64enc(target.user));
854 if (use_proxy && server.user) {
855 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
856 base64enc(server.user));
860 if (G.beg_range != 0)
861 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
863 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
865 fputs(G.extra_headers, sfp);
867 if (option_mask32 & WGET_OPT_POST_DATA) {
869 "Content-Type: application/x-www-form-urlencoded\r\n"
870 "Content-Length: %u\r\n"
873 (int) strlen(G.post_data), G.post_data
878 fprintf(sfp, "\r\n");
884 * Retrieve HTTP response line and check for "200" status code.
890 str = skip_non_whitespace(str);
891 str = skip_whitespace(str);
892 // FIXME: no error check
893 // xatou wouldn't work: "200 OK"
898 while (gethdr(sfp) != NULL)
899 /* eat all remaining headers */;
903 Response 204 doesn't say "null file", it says "metadata
904 has changed but data didn't":
906 "10.2.5 204 No Content
907 The server has fulfilled the request but does not need to return
908 an entity-body, and might want to return updated metainformation.
909 The response MAY include new or updated metainformation in the form
910 of entity-headers, which if present SHOULD be associated with
911 the requested variant.
913 If the client is a user agent, it SHOULD NOT change its document
914 view from that which caused the request to be sent. This response
915 is primarily intended to allow input for actions to take place
916 without causing a change to the user agent's active document view,
917 although any new or updated metainformation SHOULD be applied
918 to the document currently in the user agent's active view.
920 The 204 response MUST NOT include a message-body, and thus
921 is always terminated by the first empty line after the header fields."
923 However, in real world it was observed that some web servers
924 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
927 if (G.beg_range != 0) {
928 /* "Range:..." was not honored by the server.
929 * Restart download from the beginning.
931 reset_beg_range_to_zero();
934 case 300: /* redirection */
939 case 206: /* Partial Content */
940 if (G.beg_range != 0)
941 /* "Range:..." worked. Good. */
943 /* Partial Content even though we did not ask for it??? */
946 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
950 * Retrieve HTTP headers.
952 while ((str = gethdr(sfp)) != NULL) {
953 static const char keywords[] ALIGN1 =
954 "content-length\0""transfer-encoding\0""location\0";
956 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
960 /* gethdr converted "FOO:" string to lowercase */
962 /* strip trailing whitespace */
963 char *s = strchrnul(str, '\0') - 1;
964 while (s >= str && (*s == ' ' || *s == '\t')) {
968 key = index_in_strings(keywords, G.wget_buf) + 1;
969 if (key == KEY_content_length) {
970 G.content_len = BB_STRTOOFF(str, NULL, 10);
971 if (G.content_len < 0 || errno) {
972 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
977 if (key == KEY_transfer_encoding) {
978 if (strcmp(str_tolower(str), "chunked") != 0)
979 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
982 if (key == KEY_location && status >= 300) {
983 if (--redir_limit == 0)
984 bb_error_msg_and_die("too many redirections");
987 free(redirected_path);
988 target.path = redirected_path = xstrdup(str+1);
989 /* lsa stays the same: it's on the same server */
991 parse_url(str, &target);
993 /* server.user remains untouched */
994 free(server.allocated);
995 server.allocated = NULL;
996 server.host = target.host;
997 /* strip_ipv6_scope_id(target.host); - no! */
998 /* we assume remote never gives us IPv6 addr with scope id */
999 server.port = target.port;
1002 } /* else: lsa stays the same: we use proxy */
1004 goto establish_session;
1007 // if (status >= 300)
1008 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
1010 /* For HTTP, data is pumped over the same connection */
1017 sfp = prepare_ftp_session(&dfp, &target, lsa);
1022 if (!(option_mask32 & WGET_OPT_SPIDER)) {
1023 if (G.output_fd < 0)
1024 G.output_fd = xopen(G.fname_out, G.o_flags);
1025 retrieve_file_data(dfp);
1026 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1027 xclose(G.output_fd);
1033 /* It's ftp. Close data connection properly */
1035 if (ftpcmd(NULL, NULL, sfp) != 226)
1036 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1037 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1041 free(server.allocated);
1042 free(target.allocated);
1045 free(fname_out_alloc);
1046 free(redirected_path);
1049 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1050 int wget_main(int argc UNUSED_PARAM, char **argv)
1052 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1053 static const char wget_longopts[] ALIGN1 =
1054 /* name, has_arg, val */
1055 "continue\0" No_argument "c"
1056 //FIXME: -s isn't --spider, it's --save-headers!
1057 "spider\0" No_argument "s"
1058 "quiet\0" No_argument "q"
1059 "output-document\0" Required_argument "O"
1060 "directory-prefix\0" Required_argument "P"
1061 "proxy\0" Required_argument "Y"
1062 "user-agent\0" Required_argument "U"
1063 #if ENABLE_FEATURE_WGET_TIMEOUT
1064 "timeout\0" Required_argument "T"
1067 // "tries\0" Required_argument "t"
1068 /* Ignored (we always use PASV): */
1069 "passive-ftp\0" No_argument "\xff"
1070 "header\0" Required_argument "\xfe"
1071 "post-data\0" Required_argument "\xfd"
1072 /* Ignored (we don't do ssl) */
1073 "no-check-certificate\0" No_argument "\xfc"
1074 /* Ignored (we don't support caching) */
1075 "no-cache\0" No_argument "\xfb"
1079 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1080 llist_t *headers_llist = NULL;
1085 #if ENABLE_FEATURE_WGET_TIMEOUT
1086 G.timeout_seconds = 900;
1087 signal(SIGALRM, alarm_handler);
1089 G.proxy_flag = "on"; /* use proxies if env vars are set */
1090 G.user_agent = "Wget"; /* "User-Agent" header field */
1092 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1093 applet_long_options = wget_longopts;
1095 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
1096 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
1097 &G.fname_out, &G.dir_prefix,
1098 &G.proxy_flag, &G.user_agent,
1099 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1100 NULL /* -t RETRIES */
1101 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1102 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1106 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1107 if (headers_llist) {
1110 llist_t *ll = headers_llist;
1112 size += strlen(ll->data) + 2;
1115 G.extra_headers = cp = xmalloc(size);
1116 while (headers_llist) {
1117 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
1123 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1124 if (G.fname_out) { /* -O FILE ? */
1125 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1127 option_mask32 &= ~WGET_OPT_CONTINUE;
1129 /* compat with wget: -O FILE can overwrite */
1130 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1134 download_one_url(*argv++);
1136 if (G.output_fd >= 0)
1137 xclose(G.output_fd);
1139 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1140 free(G.extra_headers);
1144 return EXIT_SUCCESS;