1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
12 //usage:#define wget_trivial_usage
13 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage: " [--no-check-certificate] [--no-cache]" */
18 //usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
20 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
24 //usage:#define wget_full_usage "\n\n"
25 //usage: "Retrieve files via HTTP or FTP\n"
26 //usage: "\n -s Spider mode - only check file existence"
27 //usage: "\n -c Continue retrieval of aborted transfer"
28 //usage: "\n -q Quiet"
29 //usage: "\n -P DIR Save to DIR (default .)"
30 //usage: IF_FEATURE_WGET_TIMEOUT(
31 //usage: "\n -T SEC Network read timeout is SEC seconds"
33 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
34 //usage: "\n -U STR Use STR for User-Agent header"
35 //usage: "\n -Y Use proxy ('on' or 'off')"
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
42 # define log_io(...) ((void)0)
58 off_t content_len; /* Content-length of the file */
59 off_t beg_range; /* Range at which continue begins */
60 #if ENABLE_FEATURE_WGET_STATUSBAR
61 off_t transferred; /* Number of bytes transferred so far */
62 const char *curfile; /* Name of current file being transferred */
66 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
70 char *fname_out; /* where to direct output (-O) */
71 const char *proxy_flag; /* Use proxies if env vars are set */
72 const char *user_agent; /* "User-Agent" header field */
73 #if ENABLE_FEATURE_WGET_TIMEOUT
74 unsigned timeout_seconds;
78 smallint chunked; /* chunked transfer encoding */
79 smallint got_clen; /* got content-length: from server */
80 /* Local downloads do benefit from big buffer.
81 * With 512 byte buffer, it was measured to be
82 * an order of magnitude slower than with big one.
84 uint64_t just_to_align_next_member;
85 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
87 #define G (*ptr_to_globals)
88 #define INIT_G() do { \
89 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
90 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
94 /* Must match option string! */
96 WGET_OPT_CONTINUE = (1 << 0),
97 WGET_OPT_SPIDER = (1 << 1),
98 WGET_OPT_QUIET = (1 << 2),
99 WGET_OPT_OUTNAME = (1 << 3),
100 WGET_OPT_PREFIX = (1 << 4),
101 WGET_OPT_PROXY = (1 << 5),
102 WGET_OPT_USER_AGENT = (1 << 6),
103 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
104 WGET_OPT_RETRIES = (1 << 8),
105 WGET_OPT_PASSIVE = (1 << 9),
106 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
107 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
115 #if ENABLE_FEATURE_WGET_STATUSBAR
116 static void progress_meter(int flag)
118 if (option_mask32 & WGET_OPT_QUIET)
121 if (flag == PROGRESS_START)
122 bb_progress_init(&G.pmt, G.curfile);
124 bb_progress_update(&G.pmt,
127 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
130 if (flag == PROGRESS_END) {
131 bb_progress_free(&G.pmt);
132 bb_putchar_stderr('\n');
137 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
141 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
142 * local addresses can have a scope identifier to specify the
143 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
144 * identifier is only valid on a single node.
146 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
147 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
148 * in the Host header as invalid requests, see
149 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
151 static void strip_ipv6_scope_id(char *host)
155 /* bbox wget actually handles IPv6 addresses without [], like
156 * wget "http://::1/xxx", but this is not standard.
157 * To save code, _here_ we do not support it. */
160 return; /* not IPv6 */
162 scope = strchr(host, '%');
166 /* Remove the IPv6 zone identifier from the host address */
167 cp = strchr(host, ']');
168 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
169 /* malformed address (not "[xx]:nn" or "[xx]") */
173 /* cp points to "]...", scope points to "%eth0]..." */
174 overlapping_strcpy(scope, cp);
177 #if ENABLE_FEATURE_WGET_AUTHENTICATION
178 /* Base64-encode character string. */
179 static char *base64enc(const char *str)
181 unsigned len = strlen(str);
182 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
183 len = sizeof(G.wget_buf)/4*3 - 10;
184 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
189 static char* sanitize_string(char *s)
191 unsigned char *p = (void *) s;
198 static FILE *open_socket(len_and_sockaddr *lsa)
202 /* glibc 2.4 seems to try seeking on it - ??! */
203 /* hopefully it understands what ESPIPE means... */
204 fp = fdopen(xconnect_stream(lsa), "r+");
206 bb_perror_msg_and_die(bb_msg_memory_exhausted);
211 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
212 static char fgets_and_trim(FILE *fp)
217 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
218 bb_perror_msg_and_die("error getting response");
220 buf_ptr = strchrnul(G.wget_buf, '\n');
223 buf_ptr = strchrnul(G.wget_buf, '\r');
226 log_io("< %s", G.wget_buf);
231 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
237 fprintf(fp, "%s%s\r\n", s1, s2);
239 log_io("> %s%s", s1, s2);
244 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
246 G.wget_buf[3] = '\0';
247 result = xatoi_positive(G.wget_buf);
252 static void parse_url(const char *src_url, struct host_info *h)
257 h->allocated = url = xstrdup(src_url);
259 if (strncmp(url, "http://", 7) == 0) {
260 h->port = bb_lookup_port("http", "tcp", 80);
263 } else if (strncmp(url, "ftp://", 6) == 0) {
264 h->port = bb_lookup_port("ftp", "tcp", 21);
268 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
271 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
272 // 'GET /?var=a/b HTTP 1.0'
273 // and saves 'index.html?var=a%2Fb' (we save 'b')
274 // wget 'http://busybox.net?login=john@doe':
275 // request: 'GET /?login=john@doe HTTP/1.0'
276 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
277 // wget 'http://busybox.net#test/test':
278 // request: 'GET / HTTP/1.0'
279 // saves: 'index.html' (we save 'test')
281 // We also don't add unique .N suffix if file exists...
282 sp = strchr(h->host, '/');
283 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
284 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
287 } else if (*sp == '/') {
290 } else { // '#' or '?'
291 // http://busybox.net?login=john@doe is a valid URL
292 // memmove converts to:
293 // http:/busybox.nett?login=john@doe...
294 memmove(h->host - 1, h->host, sp - h->host);
300 // We used to set h->user to NULL here, but this interferes
301 // with handling of code 302 ("object was moved")
303 sp = strrchr(h->host, '@');
305 // URL-decode "user:password" string before base64-encoding:
306 // wget http://test:my%20pass@example.com should send
307 // Authorization: Basic dGVzdDpteSBwYXNz
308 // which decodes to "test:my pass".
309 // Standard wget and curl do this too.
311 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
318 static char *gethdr(FILE *fp)
323 /* retrieve header line */
324 c = fgets_and_trim(fp);
326 /* end of the headers? */
327 if (G.wget_buf[0] == '\0')
330 /* convert the header name to lower case */
331 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
333 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
334 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
335 * "A-Z" maps to "a-z".
336 * "@[\]" can't occur in header names.
337 * "^_" maps to "~,DEL" (which is wrong).
338 * "^" was never seen yet, "_" was seen from web.archive.org
339 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
344 /* verify we are at the end of the header name */
346 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
348 /* locate the start of the header value */
350 hdrval = skip_whitespace(s);
353 /* Rats! The buffer isn't big enough to hold the entire header value */
354 while (c = getc(fp), c != EOF && c != '\n')
361 static void reset_beg_range_to_zero(void)
363 bb_error_msg("restart failed");
365 xlseek(G.output_fd, 0, SEEK_SET);
366 /* Done at the end instead: */
367 /* ftruncate(G.output_fd, 0); */
370 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
377 target->user = xstrdup("anonymous:busybox@");
379 sfp = open_socket(lsa);
380 if (ftpcmd(NULL, NULL, sfp) != 220)
381 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
384 * Splitting username:password pair,
387 str = strchr(target->user, ':');
390 switch (ftpcmd("USER ", target->user, sfp)) {
394 if (ftpcmd("PASS ", str, sfp) == 230)
396 /* fall through (failed login) */
398 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
401 ftpcmd("TYPE I", NULL, sfp);
406 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
407 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
408 if (G.content_len < 0 || errno) {
409 bb_error_msg_and_die("SIZE value is garbage");
415 * Entering passive mode
417 if (ftpcmd("PASV", NULL, sfp) != 227) {
419 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
421 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
422 // Server's IP is N1.N2.N3.N4 (we ignore it)
423 // Server's port for data connection is P1*256+P2
424 str = strrchr(G.wget_buf, ')');
425 if (str) str[0] = '\0';
426 str = strrchr(G.wget_buf, ',');
427 if (!str) goto pasv_error;
428 port = xatou_range(str+1, 0, 255);
430 str = strrchr(G.wget_buf, ',');
431 if (!str) goto pasv_error;
432 port += xatou_range(str+1, 0, 255) * 256;
433 set_nport(&lsa->u.sa, htons(port));
435 *dfpp = open_socket(lsa);
437 if (G.beg_range != 0) {
438 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
439 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
440 G.content_len -= G.beg_range;
442 reset_beg_range_to_zero();
445 if (ftpcmd("RETR ", target->path, sfp) > 150)
446 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
451 static void NOINLINE retrieve_file_data(FILE *dfp)
453 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
454 # if ENABLE_FEATURE_WGET_TIMEOUT
455 unsigned second_cnt = G.timeout_seconds;
457 struct pollfd polldata;
459 polldata.fd = fileno(dfp);
460 polldata.events = POLLIN | POLLPRI;
462 progress_meter(PROGRESS_START);
467 /* Loops only if chunked */
470 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
471 /* Must use nonblocking I/O, otherwise fread will loop
472 * and *block* until it reads full buffer,
473 * which messes up progress bar and/or timeout logic.
474 * Because of nonblocking I/O, we need to dance
475 * very carefully around EAGAIN. See explanation at
478 ndelay_on(polldata.fd);
484 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
485 /* fread internally uses read loop, which in our case
486 * is usually exited when we get EAGAIN.
487 * In this case, libc sets error marker on the stream.
488 * Need to clear it before next fread to avoid possible
489 * rare false positive ferror below. Rare because usually
490 * fread gets more than zero bytes, and we don't fall
491 * into if (n <= 0) ...
496 rdsz = sizeof(G.wget_buf);
498 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
499 if ((int)G.content_len <= 0)
501 rdsz = (unsigned)G.content_len;
504 n = fread(G.wget_buf, 1, rdsz, dfp);
507 xwrite(G.output_fd, G.wget_buf, n);
508 #if ENABLE_FEATURE_WGET_STATUSBAR
513 if (G.content_len == 0)
516 #if ENABLE_FEATURE_WGET_TIMEOUT
517 second_cnt = G.timeout_seconds;
524 * If error occurs, or EOF is reached, the return value
525 * is a short item count (or zero).
526 * fread does not distinguish between EOF and error.
528 if (errno != EAGAIN) {
530 progress_meter(PROGRESS_END);
531 bb_perror_msg_and_die(bb_msg_read_error);
533 break; /* EOF, not error */
536 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
537 /* It was EAGAIN. There is no data. Wait up to one second
538 * then abort if timed out, or update the bar and try reading again.
540 if (safe_poll(&polldata, 1, 1000) == 0) {
541 # if ENABLE_FEATURE_WGET_TIMEOUT
542 if (second_cnt != 0 && --second_cnt == 0) {
543 progress_meter(PROGRESS_END);
544 bb_error_msg_and_die("download timed out");
547 /* We used to loop back to poll here,
548 * but there is no great harm in letting fread
549 * to try reading anyway.
552 /* Need to do it _every_ second for "stalled" indicator
553 * to be shown properly.
555 progress_meter(PROGRESS_BUMP);
557 } /* while (reading data) */
559 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
561 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
566 fgets_and_trim(dfp); /* Eat empty line */
569 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
570 /* FIXME: error check? */
571 if (G.content_len == 0)
572 break; /* all done! */
575 * Note that fgets may result in some data being buffered in dfp.
576 * We loop back to fread, which will retrieve this data.
577 * Also note that code has to be arranged so that fread
578 * is done _before_ one-second poll wait - poll doesn't know
579 * about stdio buffering and can result in spurious one second waits!
583 /* If -c failed, we restart from the beginning,
584 * but we do not truncate file then, we do it only now, at the end.
585 * This lets user to ^C if his 99% complete 10 GB file download
586 * failed to restart *without* losing the almost complete file.
589 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
590 if (pos != (off_t)-1)
591 ftruncate(G.output_fd, pos);
594 /* Draw full bar and free its resources */
595 G.chunked = 0; /* makes it show 100% even for chunked download */
596 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
597 progress_meter(PROGRESS_END);
600 static void download_one_url(const char *url)
602 bool use_proxy; /* Use proxies if env vars are set */
604 len_and_sockaddr *lsa;
605 FILE *sfp; /* socket to web/ftp server */
606 FILE *dfp; /* socket to ftp server (data) */
608 char *fname_out_alloc;
609 char *redirected_path = NULL;
610 struct host_info server;
611 struct host_info target;
613 server.allocated = NULL;
614 target.allocated = NULL;
618 parse_url(url, &target);
620 /* Use the proxy if necessary */
621 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
623 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
624 use_proxy = (proxy && proxy[0]);
626 parse_url(proxy, &server);
629 server.port = target.port;
630 if (ENABLE_FEATURE_IPV6) {
631 //free(server.allocated); - can't be non-NULL
632 server.host = server.allocated = xstrdup(target.host);
634 server.host = target.host;
638 if (ENABLE_FEATURE_IPV6)
639 strip_ipv6_scope_id(target.host);
641 /* If there was no -O FILE, guess output filename */
642 fname_out_alloc = NULL;
643 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
644 G.fname_out = bb_get_last_path_component_nostrip(target.path);
645 /* handle "wget http://kernel.org//" */
646 if (G.fname_out[0] == '/' || !G.fname_out[0])
647 G.fname_out = (char*)"index.html";
648 /* -P DIR is considered only if there was no -O FILE */
650 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
652 /* redirects may free target.path later, need to make a copy */
653 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
656 #if ENABLE_FEATURE_WGET_STATUSBAR
657 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
660 /* Determine where to start transfer */
662 if (option_mask32 & WGET_OPT_CONTINUE) {
663 G.output_fd = open(G.fname_out, O_WRONLY);
664 if (G.output_fd >= 0) {
665 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
667 /* File doesn't exist. We do not create file here yet.
668 * We are not sure it exists on remote side */
673 lsa = xhost2sockaddr(server.host, server.port);
674 if (!(option_mask32 & WGET_OPT_QUIET)) {
675 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
676 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
680 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
683 if (use_proxy || !target.is_ftp) {
691 /* Open socket to http server */
692 sfp = open_socket(lsa);
694 /* Send HTTP request */
696 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
697 target.is_ftp ? "f" : "ht", target.host,
700 if (option_mask32 & WGET_OPT_POST_DATA)
701 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
703 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
706 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
707 target.host, G.user_agent);
709 /* Ask server to close the connection as soon as we are done
710 * (IOW: we do not intend to send more requests)
712 fprintf(sfp, "Connection: close\r\n");
714 #if ENABLE_FEATURE_WGET_AUTHENTICATION
716 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
717 base64enc(target.user));
719 if (use_proxy && server.user) {
720 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
721 base64enc(server.user));
725 if (G.beg_range != 0)
726 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
728 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
730 fputs(G.extra_headers, sfp);
732 if (option_mask32 & WGET_OPT_POST_DATA) {
734 "Content-Type: application/x-www-form-urlencoded\r\n"
735 "Content-Length: %u\r\n"
738 (int) strlen(G.post_data), G.post_data
743 fprintf(sfp, "\r\n");
749 * Retrieve HTTP response line and check for "200" status code.
755 str = skip_non_whitespace(str);
756 str = skip_whitespace(str);
757 // FIXME: no error check
758 // xatou wouldn't work: "200 OK"
763 while (gethdr(sfp) != NULL)
764 /* eat all remaining headers */;
768 Response 204 doesn't say "null file", it says "metadata
769 has changed but data didn't":
771 "10.2.5 204 No Content
772 The server has fulfilled the request but does not need to return
773 an entity-body, and might want to return updated metainformation.
774 The response MAY include new or updated metainformation in the form
775 of entity-headers, which if present SHOULD be associated with
776 the requested variant.
778 If the client is a user agent, it SHOULD NOT change its document
779 view from that which caused the request to be sent. This response
780 is primarily intended to allow input for actions to take place
781 without causing a change to the user agent's active document view,
782 although any new or updated metainformation SHOULD be applied
783 to the document currently in the user agent's active view.
785 The 204 response MUST NOT include a message-body, and thus
786 is always terminated by the first empty line after the header fields."
788 However, in real world it was observed that some web servers
789 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
792 if (G.beg_range != 0) {
793 /* "Range:..." was not honored by the server.
794 * Restart download from the beginning.
796 reset_beg_range_to_zero();
799 case 300: /* redirection */
804 case 206: /* Partial Content */
805 if (G.beg_range != 0)
806 /* "Range:..." worked. Good. */
808 /* Partial Content even though we did not ask for it??? */
811 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
815 * Retrieve HTTP headers.
817 while ((str = gethdr(sfp)) != NULL) {
818 static const char keywords[] ALIGN1 =
819 "content-length\0""transfer-encoding\0""location\0";
821 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
825 /* gethdr converted "FOO:" string to lowercase */
827 /* strip trailing whitespace */
828 char *s = strchrnul(str, '\0') - 1;
829 while (s >= str && (*s == ' ' || *s == '\t')) {
833 key = index_in_strings(keywords, G.wget_buf) + 1;
834 if (key == KEY_content_length) {
835 G.content_len = BB_STRTOOFF(str, NULL, 10);
836 if (G.content_len < 0 || errno) {
837 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
842 if (key == KEY_transfer_encoding) {
843 if (strcmp(str_tolower(str), "chunked") != 0)
844 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
847 if (key == KEY_location && status >= 300) {
848 if (--redir_limit == 0)
849 bb_error_msg_and_die("too many redirections");
852 free(redirected_path);
853 target.path = redirected_path = xstrdup(str+1);
854 /* lsa stays the same: it's on the same server */
856 parse_url(str, &target);
858 free(server.allocated);
859 server.allocated = NULL;
860 server.host = target.host;
861 /* strip_ipv6_scope_id(target.host); - no! */
862 /* we assume remote never gives us IPv6 addr with scope id */
863 server.port = target.port;
866 } /* else: lsa stays the same: we use proxy */
868 goto establish_session;
871 // if (status >= 300)
872 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
874 /* For HTTP, data is pumped over the same connection */
881 sfp = prepare_ftp_session(&dfp, &target, lsa);
886 if (!(option_mask32 & WGET_OPT_SPIDER)) {
888 G.output_fd = xopen(G.fname_out, G.o_flags);
889 retrieve_file_data(dfp);
890 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
897 /* It's ftp. Close data connection properly */
899 if (ftpcmd(NULL, NULL, sfp) != 226)
900 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
901 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
905 free(server.allocated);
906 free(target.allocated);
907 free(fname_out_alloc);
908 free(redirected_path);
911 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
912 int wget_main(int argc UNUSED_PARAM, char **argv)
914 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
915 static const char wget_longopts[] ALIGN1 =
916 /* name, has_arg, val */
917 "continue\0" No_argument "c"
918 //FIXME: -s isn't --spider, it's --save-headers!
919 "spider\0" No_argument "s"
920 "quiet\0" No_argument "q"
921 "output-document\0" Required_argument "O"
922 "directory-prefix\0" Required_argument "P"
923 "proxy\0" Required_argument "Y"
924 "user-agent\0" Required_argument "U"
925 #if ENABLE_FEATURE_WGET_TIMEOUT
926 "timeout\0" Required_argument "T"
929 // "tries\0" Required_argument "t"
930 /* Ignored (we always use PASV): */
931 "passive-ftp\0" No_argument "\xff"
932 "header\0" Required_argument "\xfe"
933 "post-data\0" Required_argument "\xfd"
934 /* Ignored (we don't do ssl) */
935 "no-check-certificate\0" No_argument "\xfc"
936 /* Ignored (we don't support caching) */
937 "no-cache\0" No_argument "\xfb"
941 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
942 llist_t *headers_llist = NULL;
947 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
948 G.proxy_flag = "on"; /* use proxies if env vars are set */
949 G.user_agent = "Wget"; /* "User-Agent" header field */
951 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
952 applet_long_options = wget_longopts;
954 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
955 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
956 &G.fname_out, &G.dir_prefix,
957 &G.proxy_flag, &G.user_agent,
958 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
959 NULL /* -t RETRIES */
960 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
961 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
965 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
969 llist_t *ll = headers_llist;
971 size += strlen(ll->data) + 2;
974 G.extra_headers = cp = xmalloc(size);
975 while (headers_llist) {
976 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
982 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
983 if (G.fname_out) { /* -O FILE ? */
984 if (LONE_DASH(G.fname_out)) { /* -O - ? */
986 option_mask32 &= ~WGET_OPT_CONTINUE;
988 /* compat with wget: -O FILE can overwrite */
989 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
993 download_one_url(*argv++);
995 if (G.output_fd >= 0)