1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
12 //usage:#define wget_trivial_usage
13 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage: " [--no-check-certificate] [--no-cache]" */
18 //usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
20 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
24 //usage:#define wget_full_usage "\n\n"
25 //usage: "Retrieve files via HTTP or FTP\n"
26 //usage: "\n -s Spider mode - only check file existence"
27 //usage: "\n -c Continue retrieval of aborted transfer"
28 //usage: "\n -q Quiet"
29 //usage: "\n -P DIR Save to DIR (default .)"
30 //usage: IF_FEATURE_WGET_TIMEOUT(
31 //usage: "\n -T SEC Network read timeout is SEC seconds"
33 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
34 //usage: "\n -U STR Use STR for User-Agent header"
35 //usage: "\n -Y Use proxy ('on' or 'off')"
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
42 # define log_io(...) ((void)0)
58 off_t content_len; /* Content-length of the file */
59 off_t beg_range; /* Range at which continue begins */
60 #if ENABLE_FEATURE_WGET_STATUSBAR
61 off_t transferred; /* Number of bytes transferred so far */
62 const char *curfile; /* Name of current file being transferred */
66 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
70 char *fname_out; /* where to direct output (-O) */
71 const char *proxy_flag; /* Use proxies if env vars are set */
72 const char *user_agent; /* "User-Agent" header field */
73 #if ENABLE_FEATURE_WGET_TIMEOUT
74 unsigned timeout_seconds;
79 smallint chunked; /* chunked transfer encoding */
80 smallint got_clen; /* got content-length: from server */
81 /* Local downloads do benefit from big buffer.
82 * With 512 byte buffer, it was measured to be
83 * an order of magnitude slower than with big one.
85 uint64_t just_to_align_next_member;
86 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
88 #define G (*ptr_to_globals)
89 #define INIT_G() do { \
90 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
94 /* Must match option string! */
96 WGET_OPT_CONTINUE = (1 << 0),
97 WGET_OPT_SPIDER = (1 << 1),
98 WGET_OPT_QUIET = (1 << 2),
99 WGET_OPT_OUTNAME = (1 << 3),
100 WGET_OPT_PREFIX = (1 << 4),
101 WGET_OPT_PROXY = (1 << 5),
102 WGET_OPT_USER_AGENT = (1 << 6),
103 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
104 WGET_OPT_RETRIES = (1 << 8),
105 WGET_OPT_PASSIVE = (1 << 9),
106 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
107 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
115 #if ENABLE_FEATURE_WGET_STATUSBAR
116 static void progress_meter(int flag)
118 if (option_mask32 & WGET_OPT_QUIET)
121 if (flag == PROGRESS_START)
122 bb_progress_init(&G.pmt, G.curfile);
124 bb_progress_update(&G.pmt,
127 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
130 if (flag == PROGRESS_END) {
131 bb_progress_free(&G.pmt);
132 bb_putchar_stderr('\n');
137 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
141 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
142 * local addresses can have a scope identifier to specify the
143 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
144 * identifier is only valid on a single node.
146 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
147 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
148 * in the Host header as invalid requests, see
149 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
151 static void strip_ipv6_scope_id(char *host)
155 /* bbox wget actually handles IPv6 addresses without [], like
156 * wget "http://::1/xxx", but this is not standard.
157 * To save code, _here_ we do not support it. */
160 return; /* not IPv6 */
162 scope = strchr(host, '%');
166 /* Remove the IPv6 zone identifier from the host address */
167 cp = strchr(host, ']');
168 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
169 /* malformed address (not "[xx]:nn" or "[xx]") */
173 /* cp points to "]...", scope points to "%eth0]..." */
174 overlapping_strcpy(scope, cp);
177 #if ENABLE_FEATURE_WGET_AUTHENTICATION
178 /* Base64-encode character string. */
179 static char *base64enc(const char *str)
181 unsigned len = strlen(str);
182 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
183 len = sizeof(G.wget_buf)/4*3 - 10;
184 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
189 static char* sanitize_string(char *s)
191 unsigned char *p = (void *) s;
198 #if ENABLE_FEATURE_WGET_TIMEOUT
199 static void alarm_handler(int sig UNUSED_PARAM)
201 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
203 bb_error_msg_and_die("download timed out");
207 static FILE *open_socket(len_and_sockaddr *lsa)
212 IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
213 fd = xconnect_stream(lsa);
214 IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
216 /* glibc 2.4 seems to try seeking on it - ??! */
217 /* hopefully it understands what ESPIPE means... */
218 fp = fdopen(fd, "r+");
220 bb_perror_msg_and_die(bb_msg_memory_exhausted);
225 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
226 /* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
227 static char fgets_and_trim(FILE *fp)
232 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
233 bb_perror_msg_and_die("error getting response");
235 buf_ptr = strchrnul(G.wget_buf, '\n');
238 buf_ptr = strchrnul(G.wget_buf, '\r');
241 log_io("< %s", G.wget_buf);
246 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
252 fprintf(fp, "%s%s\r\n", s1, s2);
254 log_io("> %s%s", s1, s2);
259 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
261 G.wget_buf[3] = '\0';
262 result = xatoi_positive(G.wget_buf);
267 static void parse_url(const char *src_url, struct host_info *h)
272 h->allocated = url = xstrdup(src_url);
274 if (strncmp(url, "http://", 7) == 0) {
275 h->port = bb_lookup_port("http", "tcp", 80);
278 } else if (strncmp(url, "ftp://", 6) == 0) {
279 h->port = bb_lookup_port("ftp", "tcp", 21);
283 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
286 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
287 // 'GET /?var=a/b HTTP 1.0'
288 // and saves 'index.html?var=a%2Fb' (we save 'b')
289 // wget 'http://busybox.net?login=john@doe':
290 // request: 'GET /?login=john@doe HTTP/1.0'
291 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
292 // wget 'http://busybox.net#test/test':
293 // request: 'GET / HTTP/1.0'
294 // saves: 'index.html' (we save 'test')
296 // We also don't add unique .N suffix if file exists...
297 sp = strchr(h->host, '/');
298 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
299 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
302 } else if (*sp == '/') {
305 } else { // '#' or '?'
306 // http://busybox.net?login=john@doe is a valid URL
307 // memmove converts to:
308 // http:/busybox.nett?login=john@doe...
309 memmove(h->host - 1, h->host, sp - h->host);
315 // We used to set h->user to NULL here, but this interferes
316 // with handling of code 302 ("object was moved")
318 sp = strrchr(h->host, '@');
320 // URL-decode "user:password" string before base64-encoding:
321 // wget http://test:my%20pass@example.com should send
322 // Authorization: Basic dGVzdDpteSBwYXNz
323 // which decodes to "test:my pass".
324 // Standard wget and curl do this too.
326 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
333 static char *gethdr(FILE *fp)
338 /* retrieve header line */
339 c = fgets_and_trim(fp);
341 /* end of the headers? */
342 if (G.wget_buf[0] == '\0')
345 /* convert the header name to lower case */
346 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
348 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
349 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
350 * "A-Z" maps to "a-z".
351 * "@[\]" can't occur in header names.
352 * "^_" maps to "~,DEL" (which is wrong).
353 * "^" was never seen yet, "_" was seen from web.archive.org
354 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
359 /* verify we are at the end of the header name */
361 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
363 /* locate the start of the header value */
365 hdrval = skip_whitespace(s);
368 /* Rats! The buffer isn't big enough to hold the entire header value */
369 while (c = getc(fp), c != EOF && c != '\n')
376 static void reset_beg_range_to_zero(void)
378 bb_error_msg("restart failed");
380 xlseek(G.output_fd, 0, SEEK_SET);
381 /* Done at the end instead: */
382 /* ftruncate(G.output_fd, 0); */
385 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
392 target->user = xstrdup("anonymous:busybox@");
394 sfp = open_socket(lsa);
395 if (ftpcmd(NULL, NULL, sfp) != 220)
396 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
399 * Splitting username:password pair,
402 str = strchr(target->user, ':');
405 switch (ftpcmd("USER ", target->user, sfp)) {
409 if (ftpcmd("PASS ", str, sfp) == 230)
411 /* fall through (failed login) */
413 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
416 ftpcmd("TYPE I", NULL, sfp);
421 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
422 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
423 if (G.content_len < 0 || errno) {
424 bb_error_msg_and_die("SIZE value is garbage");
430 * Entering passive mode
432 if (ftpcmd("PASV", NULL, sfp) != 227) {
434 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
436 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
437 // Server's IP is N1.N2.N3.N4 (we ignore it)
438 // Server's port for data connection is P1*256+P2
439 str = strrchr(G.wget_buf, ')');
440 if (str) str[0] = '\0';
441 str = strrchr(G.wget_buf, ',');
442 if (!str) goto pasv_error;
443 port = xatou_range(str+1, 0, 255);
445 str = strrchr(G.wget_buf, ',');
446 if (!str) goto pasv_error;
447 port += xatou_range(str+1, 0, 255) * 256;
448 set_nport(&lsa->u.sa, htons(port));
450 *dfpp = open_socket(lsa);
452 if (G.beg_range != 0) {
453 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
454 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
455 G.content_len -= G.beg_range;
457 reset_beg_range_to_zero();
460 if (ftpcmd("RETR ", target->path, sfp) > 150)
461 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
466 static void NOINLINE retrieve_file_data(FILE *dfp)
468 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
469 # if ENABLE_FEATURE_WGET_TIMEOUT
470 unsigned second_cnt = G.timeout_seconds;
472 struct pollfd polldata;
474 polldata.fd = fileno(dfp);
475 polldata.events = POLLIN | POLLPRI;
477 progress_meter(PROGRESS_START);
482 /* Loops only if chunked */
485 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
486 /* Must use nonblocking I/O, otherwise fread will loop
487 * and *block* until it reads full buffer,
488 * which messes up progress bar and/or timeout logic.
489 * Because of nonblocking I/O, we need to dance
490 * very carefully around EAGAIN. See explanation at
493 ndelay_on(polldata.fd);
499 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
500 /* fread internally uses read loop, which in our case
501 * is usually exited when we get EAGAIN.
502 * In this case, libc sets error marker on the stream.
503 * Need to clear it before next fread to avoid possible
504 * rare false positive ferror below. Rare because usually
505 * fread gets more than zero bytes, and we don't fall
506 * into if (n <= 0) ...
511 rdsz = sizeof(G.wget_buf);
513 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
514 if ((int)G.content_len <= 0)
516 rdsz = (unsigned)G.content_len;
519 n = fread(G.wget_buf, 1, rdsz, dfp);
522 xwrite(G.output_fd, G.wget_buf, n);
523 #if ENABLE_FEATURE_WGET_STATUSBAR
528 if (G.content_len == 0)
531 #if ENABLE_FEATURE_WGET_TIMEOUT
532 second_cnt = G.timeout_seconds;
539 * If error occurs, or EOF is reached, the return value
540 * is a short item count (or zero).
541 * fread does not distinguish between EOF and error.
543 if (errno != EAGAIN) {
545 progress_meter(PROGRESS_END);
546 bb_perror_msg_and_die(bb_msg_read_error);
548 break; /* EOF, not error */
551 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
552 /* It was EAGAIN. There is no data. Wait up to one second
553 * then abort if timed out, or update the bar and try reading again.
555 if (safe_poll(&polldata, 1, 1000) == 0) {
556 # if ENABLE_FEATURE_WGET_TIMEOUT
557 if (second_cnt != 0 && --second_cnt == 0) {
558 progress_meter(PROGRESS_END);
559 bb_error_msg_and_die("download timed out");
562 /* We used to loop back to poll here,
563 * but there is no great harm in letting fread
564 * to try reading anyway.
567 /* Need to do it _every_ second for "stalled" indicator
568 * to be shown properly.
570 progress_meter(PROGRESS_BUMP);
572 } /* while (reading data) */
574 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
576 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
581 fgets_and_trim(dfp); /* Eat empty line */
584 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
585 /* FIXME: error check? */
586 if (G.content_len == 0)
587 break; /* all done! */
590 * Note that fgets may result in some data being buffered in dfp.
591 * We loop back to fread, which will retrieve this data.
592 * Also note that code has to be arranged so that fread
593 * is done _before_ one-second poll wait - poll doesn't know
594 * about stdio buffering and can result in spurious one second waits!
598 /* If -c failed, we restart from the beginning,
599 * but we do not truncate file then, we do it only now, at the end.
600 * This lets user to ^C if his 99% complete 10 GB file download
601 * failed to restart *without* losing the almost complete file.
604 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
605 if (pos != (off_t)-1)
606 ftruncate(G.output_fd, pos);
609 /* Draw full bar and free its resources */
610 G.chunked = 0; /* makes it show 100% even for chunked download */
611 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
612 progress_meter(PROGRESS_END);
615 static void download_one_url(const char *url)
617 bool use_proxy; /* Use proxies if env vars are set */
619 len_and_sockaddr *lsa;
620 FILE *sfp; /* socket to web/ftp server */
621 FILE *dfp; /* socket to ftp server (data) */
623 char *fname_out_alloc;
624 char *redirected_path = NULL;
625 struct host_info server;
626 struct host_info target;
628 server.allocated = NULL;
629 target.allocated = NULL;
633 parse_url(url, &target);
635 /* Use the proxy if necessary */
636 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
638 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
639 use_proxy = (proxy && proxy[0]);
641 parse_url(proxy, &server);
644 server.port = target.port;
645 if (ENABLE_FEATURE_IPV6) {
646 //free(server.allocated); - can't be non-NULL
647 server.host = server.allocated = xstrdup(target.host);
649 server.host = target.host;
653 if (ENABLE_FEATURE_IPV6)
654 strip_ipv6_scope_id(target.host);
656 /* If there was no -O FILE, guess output filename */
657 fname_out_alloc = NULL;
658 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
659 G.fname_out = bb_get_last_path_component_nostrip(target.path);
660 /* handle "wget http://kernel.org//" */
661 if (G.fname_out[0] == '/' || !G.fname_out[0])
662 G.fname_out = (char*)"index.html";
663 /* -P DIR is considered only if there was no -O FILE */
665 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
667 /* redirects may free target.path later, need to make a copy */
668 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
671 #if ENABLE_FEATURE_WGET_STATUSBAR
672 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
675 /* Determine where to start transfer */
677 if (option_mask32 & WGET_OPT_CONTINUE) {
678 G.output_fd = open(G.fname_out, O_WRONLY);
679 if (G.output_fd >= 0) {
680 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
682 /* File doesn't exist. We do not create file here yet.
683 * We are not sure it exists on remote side */
688 lsa = xhost2sockaddr(server.host, server.port);
689 if (!(option_mask32 & WGET_OPT_QUIET)) {
690 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
691 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
695 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
698 if (use_proxy || !target.is_ftp) {
706 /* Open socket to http server */
707 sfp = open_socket(lsa);
709 /* Send HTTP request */
711 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
712 target.is_ftp ? "f" : "ht", target.host,
715 if (option_mask32 & WGET_OPT_POST_DATA)
716 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
718 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
721 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
722 target.host, G.user_agent);
724 /* Ask server to close the connection as soon as we are done
725 * (IOW: we do not intend to send more requests)
727 fprintf(sfp, "Connection: close\r\n");
729 #if ENABLE_FEATURE_WGET_AUTHENTICATION
731 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
732 base64enc(target.user));
734 if (use_proxy && server.user) {
735 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
736 base64enc(server.user));
740 if (G.beg_range != 0)
741 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
743 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
745 fputs(G.extra_headers, sfp);
747 if (option_mask32 & WGET_OPT_POST_DATA) {
749 "Content-Type: application/x-www-form-urlencoded\r\n"
750 "Content-Length: %u\r\n"
753 (int) strlen(G.post_data), G.post_data
758 fprintf(sfp, "\r\n");
764 * Retrieve HTTP response line and check for "200" status code.
770 str = skip_non_whitespace(str);
771 str = skip_whitespace(str);
772 // FIXME: no error check
773 // xatou wouldn't work: "200 OK"
778 while (gethdr(sfp) != NULL)
779 /* eat all remaining headers */;
783 Response 204 doesn't say "null file", it says "metadata
784 has changed but data didn't":
786 "10.2.5 204 No Content
787 The server has fulfilled the request but does not need to return
788 an entity-body, and might want to return updated metainformation.
789 The response MAY include new or updated metainformation in the form
790 of entity-headers, which if present SHOULD be associated with
791 the requested variant.
793 If the client is a user agent, it SHOULD NOT change its document
794 view from that which caused the request to be sent. This response
795 is primarily intended to allow input for actions to take place
796 without causing a change to the user agent's active document view,
797 although any new or updated metainformation SHOULD be applied
798 to the document currently in the user agent's active view.
800 The 204 response MUST NOT include a message-body, and thus
801 is always terminated by the first empty line after the header fields."
803 However, in real world it was observed that some web servers
804 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
807 if (G.beg_range != 0) {
808 /* "Range:..." was not honored by the server.
809 * Restart download from the beginning.
811 reset_beg_range_to_zero();
814 case 300: /* redirection */
819 case 206: /* Partial Content */
820 if (G.beg_range != 0)
821 /* "Range:..." worked. Good. */
823 /* Partial Content even though we did not ask for it??? */
826 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
830 * Retrieve HTTP headers.
832 while ((str = gethdr(sfp)) != NULL) {
833 static const char keywords[] ALIGN1 =
834 "content-length\0""transfer-encoding\0""location\0";
836 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
840 /* gethdr converted "FOO:" string to lowercase */
842 /* strip trailing whitespace */
843 char *s = strchrnul(str, '\0') - 1;
844 while (s >= str && (*s == ' ' || *s == '\t')) {
848 key = index_in_strings(keywords, G.wget_buf) + 1;
849 if (key == KEY_content_length) {
850 G.content_len = BB_STRTOOFF(str, NULL, 10);
851 if (G.content_len < 0 || errno) {
852 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
857 if (key == KEY_transfer_encoding) {
858 if (strcmp(str_tolower(str), "chunked") != 0)
859 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
862 if (key == KEY_location && status >= 300) {
863 if (--redir_limit == 0)
864 bb_error_msg_and_die("too many redirections");
867 free(redirected_path);
868 target.path = redirected_path = xstrdup(str+1);
869 /* lsa stays the same: it's on the same server */
871 parse_url(str, &target);
873 free(server.allocated);
874 server.allocated = NULL;
875 server.host = target.host;
876 /* strip_ipv6_scope_id(target.host); - no! */
877 /* we assume remote never gives us IPv6 addr with scope id */
878 server.port = target.port;
881 } /* else: lsa stays the same: we use proxy */
883 goto establish_session;
886 // if (status >= 300)
887 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
889 /* For HTTP, data is pumped over the same connection */
896 sfp = prepare_ftp_session(&dfp, &target, lsa);
901 if (!(option_mask32 & WGET_OPT_SPIDER)) {
903 G.output_fd = xopen(G.fname_out, G.o_flags);
904 retrieve_file_data(dfp);
905 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
912 /* It's ftp. Close data connection properly */
914 if (ftpcmd(NULL, NULL, sfp) != 226)
915 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
916 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
920 free(server.allocated);
921 free(target.allocated);
922 free(fname_out_alloc);
923 free(redirected_path);
926 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
927 int wget_main(int argc UNUSED_PARAM, char **argv)
929 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
930 static const char wget_longopts[] ALIGN1 =
931 /* name, has_arg, val */
932 "continue\0" No_argument "c"
933 //FIXME: -s isn't --spider, it's --save-headers!
934 "spider\0" No_argument "s"
935 "quiet\0" No_argument "q"
936 "output-document\0" Required_argument "O"
937 "directory-prefix\0" Required_argument "P"
938 "proxy\0" Required_argument "Y"
939 "user-agent\0" Required_argument "U"
940 #if ENABLE_FEATURE_WGET_TIMEOUT
941 "timeout\0" Required_argument "T"
944 // "tries\0" Required_argument "t"
945 /* Ignored (we always use PASV): */
946 "passive-ftp\0" No_argument "\xff"
947 "header\0" Required_argument "\xfe"
948 "post-data\0" Required_argument "\xfd"
949 /* Ignored (we don't do ssl) */
950 "no-check-certificate\0" No_argument "\xfc"
951 /* Ignored (we don't support caching) */
952 "no-cache\0" No_argument "\xfb"
956 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
957 llist_t *headers_llist = NULL;
962 #if ENABLE_FEATURE_WGET_TIMEOUT
963 G.timeout_seconds = 900;
964 signal(SIGALRM, alarm_handler);
966 G.proxy_flag = "on"; /* use proxies if env vars are set */
967 G.user_agent = "Wget"; /* "User-Agent" header field */
969 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
970 applet_long_options = wget_longopts;
972 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
973 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
974 &G.fname_out, &G.dir_prefix,
975 &G.proxy_flag, &G.user_agent,
976 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
977 NULL /* -t RETRIES */
978 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
979 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
983 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
987 llist_t *ll = headers_llist;
989 size += strlen(ll->data) + 2;
992 G.extra_headers = cp = xmalloc(size);
993 while (headers_llist) {
994 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
1000 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1001 if (G.fname_out) { /* -O FILE ? */
1002 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1004 option_mask32 &= ~WGET_OPT_CONTINUE;
1006 /* compat with wget: -O FILE can overwrite */
1007 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1011 download_one_url(*argv++);
1013 if (G.output_fd >= 0)
1014 xclose(G.output_fd);
1016 return EXIT_SUCCESS;