1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
12 //usage:#define wget_trivial_usage
13 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage: " [--no-check-certificate] [--no-cache]" */
18 //usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
20 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
24 //usage:#define wget_full_usage "\n\n"
25 //usage: "Retrieve files via HTTP or FTP\n"
26 //usage: "\n -s Spider mode - only check file existence"
27 //usage: "\n -c Continue retrieval of aborted transfer"
28 //usage: "\n -q Quiet"
29 //usage: "\n -P DIR Save to DIR (default .)"
30 //usage: IF_FEATURE_WGET_TIMEOUT(
31 //usage: "\n -T SEC Network read timeout is SEC seconds"
33 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
34 //usage: "\n -U STR Use STR for User-Agent header"
35 //usage: "\n -Y Use proxy ('on' or 'off')"
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
42 # define log_io(...) ((void)0)
58 off_t content_len; /* Content-length of the file */
59 off_t beg_range; /* Range at which continue begins */
60 #if ENABLE_FEATURE_WGET_STATUSBAR
61 off_t transferred; /* Number of bytes transferred so far */
62 const char *curfile; /* Name of current file being transferred */
66 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
70 char *fname_out; /* where to direct output (-O) */
71 const char *proxy_flag; /* Use proxies if env vars are set */
72 const char *user_agent; /* "User-Agent" header field */
73 #if ENABLE_FEATURE_WGET_TIMEOUT
74 unsigned timeout_seconds;
79 smallint chunked; /* chunked transfer encoding */
80 smallint got_clen; /* got content-length: from server */
81 /* Local downloads do benefit from big buffer.
82 * With 512 byte buffer, it was measured to be
83 * an order of magnitude slower than with big one.
85 uint64_t just_to_align_next_member;
86 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
88 #define G (*ptr_to_globals)
89 #define INIT_G() do { \
90 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
92 #define FINI_G() do { \
93 FREE_PTR_TO_GLOBALS(); \
97 /* Must match option string! */
99 WGET_OPT_CONTINUE = (1 << 0),
100 WGET_OPT_SPIDER = (1 << 1),
101 WGET_OPT_QUIET = (1 << 2),
102 WGET_OPT_OUTNAME = (1 << 3),
103 WGET_OPT_PREFIX = (1 << 4),
104 WGET_OPT_PROXY = (1 << 5),
105 WGET_OPT_USER_AGENT = (1 << 6),
106 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
107 WGET_OPT_RETRIES = (1 << 8),
108 WGET_OPT_PASSIVE = (1 << 9),
109 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
110 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
118 #if ENABLE_FEATURE_WGET_STATUSBAR
119 static void progress_meter(int flag)
121 if (option_mask32 & WGET_OPT_QUIET)
124 if (flag == PROGRESS_START)
125 bb_progress_init(&G.pmt, G.curfile);
127 bb_progress_update(&G.pmt,
130 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
133 if (flag == PROGRESS_END) {
134 bb_progress_free(&G.pmt);
135 bb_putchar_stderr('\n');
140 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
144 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
145 * local addresses can have a scope identifier to specify the
146 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
147 * identifier is only valid on a single node.
149 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
150 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
151 * in the Host header as invalid requests, see
152 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
154 static void strip_ipv6_scope_id(char *host)
158 /* bbox wget actually handles IPv6 addresses without [], like
159 * wget "http://::1/xxx", but this is not standard.
160 * To save code, _here_ we do not support it. */
163 return; /* not IPv6 */
165 scope = strchr(host, '%');
169 /* Remove the IPv6 zone identifier from the host address */
170 cp = strchr(host, ']');
171 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
172 /* malformed address (not "[xx]:nn" or "[xx]") */
176 /* cp points to "]...", scope points to "%eth0]..." */
177 overlapping_strcpy(scope, cp);
180 #if ENABLE_FEATURE_WGET_AUTHENTICATION
181 /* Base64-encode character string. */
182 static char *base64enc(const char *str)
184 unsigned len = strlen(str);
185 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
186 len = sizeof(G.wget_buf)/4*3 - 10;
187 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
192 static char* sanitize_string(char *s)
194 unsigned char *p = (void *) s;
201 #if ENABLE_FEATURE_WGET_TIMEOUT
202 static void alarm_handler(int sig UNUSED_PARAM)
204 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
206 bb_error_msg_and_die("download timed out");
210 static FILE *open_socket(len_and_sockaddr *lsa)
215 IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
216 fd = xconnect_stream(lsa);
217 IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
219 /* glibc 2.4 seems to try seeking on it - ??! */
220 /* hopefully it understands what ESPIPE means... */
221 fp = fdopen(fd, "r+");
223 bb_perror_msg_and_die(bb_msg_memory_exhausted);
228 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
229 /* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
230 static char fgets_and_trim(FILE *fp)
235 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
236 bb_perror_msg_and_die("error getting response");
238 buf_ptr = strchrnul(G.wget_buf, '\n');
241 buf_ptr = strchrnul(G.wget_buf, '\r');
244 log_io("< %s", G.wget_buf);
249 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
255 fprintf(fp, "%s%s\r\n", s1, s2);
257 log_io("> %s%s", s1, s2);
262 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
264 G.wget_buf[3] = '\0';
265 result = xatoi_positive(G.wget_buf);
270 static void parse_url(const char *src_url, struct host_info *h)
275 h->allocated = url = xstrdup(src_url);
277 if (strncmp(url, "ftp://", 6) == 0) {
278 h->port = bb_lookup_port("ftp", "tcp", 21);
282 if (strncmp(url, "http://", 7) == 0) {
285 h->port = bb_lookup_port("http", "tcp", 80);
288 if (!strstr(url, "//")) {
289 // GNU wget is user-friendly and falls back to http://
293 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
296 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
297 // 'GET /?var=a/b HTTP 1.0'
298 // and saves 'index.html?var=a%2Fb' (we save 'b')
299 // wget 'http://busybox.net?login=john@doe':
300 // request: 'GET /?login=john@doe HTTP/1.0'
301 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
302 // wget 'http://busybox.net#test/test':
303 // request: 'GET / HTTP/1.0'
304 // saves: 'index.html' (we save 'test')
306 // We also don't add unique .N suffix if file exists...
307 sp = strchr(h->host, '/');
308 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
309 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
312 } else if (*sp == '/') {
315 } else { // '#' or '?'
316 // http://busybox.net?login=john@doe is a valid URL
317 // memmove converts to:
318 // http:/busybox.nett?login=john@doe...
319 memmove(h->host - 1, h->host, sp - h->host);
325 // We used to set h->user to NULL here, but this interferes
326 // with handling of code 302 ("object was moved")
328 sp = strrchr(h->host, '@');
330 // URL-decode "user:password" string before base64-encoding:
331 // wget http://test:my%20pass@example.com should send
332 // Authorization: Basic dGVzdDpteSBwYXNz
333 // which decodes to "test:my pass".
334 // Standard wget and curl do this too.
336 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
343 static char *gethdr(FILE *fp)
348 /* retrieve header line */
349 c = fgets_and_trim(fp);
351 /* end of the headers? */
352 if (G.wget_buf[0] == '\0')
355 /* convert the header name to lower case */
356 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
358 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
359 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
360 * "A-Z" maps to "a-z".
361 * "@[\]" can't occur in header names.
362 * "^_" maps to "~,DEL" (which is wrong).
363 * "^" was never seen yet, "_" was seen from web.archive.org
364 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
369 /* verify we are at the end of the header name */
371 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
373 /* locate the start of the header value */
375 hdrval = skip_whitespace(s);
378 /* Rats! The buffer isn't big enough to hold the entire header value */
379 while (c = getc(fp), c != EOF && c != '\n')
386 static void reset_beg_range_to_zero(void)
388 bb_error_msg("restart failed");
390 xlseek(G.output_fd, 0, SEEK_SET);
391 /* Done at the end instead: */
392 /* ftruncate(G.output_fd, 0); */
395 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
402 target->user = xstrdup("anonymous:busybox@");
404 sfp = open_socket(lsa);
405 if (ftpcmd(NULL, NULL, sfp) != 220)
406 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
409 * Splitting username:password pair,
412 str = strchr(target->user, ':');
415 switch (ftpcmd("USER ", target->user, sfp)) {
419 if (ftpcmd("PASS ", str, sfp) == 230)
421 /* fall through (failed login) */
423 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
426 ftpcmd("TYPE I", NULL, sfp);
431 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
432 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
433 if (G.content_len < 0 || errno) {
434 bb_error_msg_and_die("SIZE value is garbage");
440 * Entering passive mode
442 if (ftpcmd("PASV", NULL, sfp) != 227) {
444 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
446 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
447 // Server's IP is N1.N2.N3.N4 (we ignore it)
448 // Server's port for data connection is P1*256+P2
449 str = strrchr(G.wget_buf, ')');
450 if (str) str[0] = '\0';
451 str = strrchr(G.wget_buf, ',');
452 if (!str) goto pasv_error;
453 port = xatou_range(str+1, 0, 255);
455 str = strrchr(G.wget_buf, ',');
456 if (!str) goto pasv_error;
457 port += xatou_range(str+1, 0, 255) * 256;
458 set_nport(&lsa->u.sa, htons(port));
460 *dfpp = open_socket(lsa);
462 if (G.beg_range != 0) {
463 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
464 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
465 G.content_len -= G.beg_range;
467 reset_beg_range_to_zero();
470 if (ftpcmd("RETR ", target->path, sfp) > 150)
471 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
476 static void NOINLINE retrieve_file_data(FILE *dfp)
478 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
479 # if ENABLE_FEATURE_WGET_TIMEOUT
480 unsigned second_cnt = G.timeout_seconds;
482 struct pollfd polldata;
484 polldata.fd = fileno(dfp);
485 polldata.events = POLLIN | POLLPRI;
487 progress_meter(PROGRESS_START);
492 /* Loops only if chunked */
495 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
496 /* Must use nonblocking I/O, otherwise fread will loop
497 * and *block* until it reads full buffer,
498 * which messes up progress bar and/or timeout logic.
499 * Because of nonblocking I/O, we need to dance
500 * very carefully around EAGAIN. See explanation at
503 ndelay_on(polldata.fd);
509 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
510 /* fread internally uses read loop, which in our case
511 * is usually exited when we get EAGAIN.
512 * In this case, libc sets error marker on the stream.
513 * Need to clear it before next fread to avoid possible
514 * rare false positive ferror below. Rare because usually
515 * fread gets more than zero bytes, and we don't fall
516 * into if (n <= 0) ...
521 rdsz = sizeof(G.wget_buf);
523 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
524 if ((int)G.content_len <= 0)
526 rdsz = (unsigned)G.content_len;
529 n = fread(G.wget_buf, 1, rdsz, dfp);
532 xwrite(G.output_fd, G.wget_buf, n);
533 #if ENABLE_FEATURE_WGET_STATUSBAR
538 if (G.content_len == 0)
541 #if ENABLE_FEATURE_WGET_TIMEOUT
542 second_cnt = G.timeout_seconds;
549 * If error occurs, or EOF is reached, the return value
550 * is a short item count (or zero).
551 * fread does not distinguish between EOF and error.
553 if (errno != EAGAIN) {
555 progress_meter(PROGRESS_END);
556 bb_perror_msg_and_die(bb_msg_read_error);
558 break; /* EOF, not error */
561 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
562 /* It was EAGAIN. There is no data. Wait up to one second
563 * then abort if timed out, or update the bar and try reading again.
565 if (safe_poll(&polldata, 1, 1000) == 0) {
566 # if ENABLE_FEATURE_WGET_TIMEOUT
567 if (second_cnt != 0 && --second_cnt == 0) {
568 progress_meter(PROGRESS_END);
569 bb_error_msg_and_die("download timed out");
572 /* We used to loop back to poll here,
573 * but there is no great harm in letting fread
574 * to try reading anyway.
577 /* Need to do it _every_ second for "stalled" indicator
578 * to be shown properly.
580 progress_meter(PROGRESS_BUMP);
582 } /* while (reading data) */
584 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
586 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
591 fgets_and_trim(dfp); /* Eat empty line */
594 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
595 /* FIXME: error check? */
596 if (G.content_len == 0)
597 break; /* all done! */
600 * Note that fgets may result in some data being buffered in dfp.
601 * We loop back to fread, which will retrieve this data.
602 * Also note that code has to be arranged so that fread
603 * is done _before_ one-second poll wait - poll doesn't know
604 * about stdio buffering and can result in spurious one second waits!
608 /* If -c failed, we restart from the beginning,
609 * but we do not truncate file then, we do it only now, at the end.
610 * This lets user to ^C if his 99% complete 10 GB file download
611 * failed to restart *without* losing the almost complete file.
614 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
615 if (pos != (off_t)-1)
616 ftruncate(G.output_fd, pos);
619 /* Draw full bar and free its resources */
620 G.chunked = 0; /* makes it show 100% even for chunked download */
621 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
622 progress_meter(PROGRESS_END);
625 static void download_one_url(const char *url)
627 bool use_proxy; /* Use proxies if env vars are set */
629 len_and_sockaddr *lsa;
630 FILE *sfp; /* socket to web/ftp server */
631 FILE *dfp; /* socket to ftp server (data) */
633 char *fname_out_alloc;
634 char *redirected_path = NULL;
635 struct host_info server;
636 struct host_info target;
638 server.allocated = NULL;
639 target.allocated = NULL;
643 parse_url(url, &target);
645 /* Use the proxy if necessary */
646 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
648 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
649 use_proxy = (proxy && proxy[0]);
651 parse_url(proxy, &server);
654 server.port = target.port;
655 if (ENABLE_FEATURE_IPV6) {
656 //free(server.allocated); - can't be non-NULL
657 server.host = server.allocated = xstrdup(target.host);
659 server.host = target.host;
663 if (ENABLE_FEATURE_IPV6)
664 strip_ipv6_scope_id(target.host);
666 /* If there was no -O FILE, guess output filename */
667 fname_out_alloc = NULL;
668 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
669 G.fname_out = bb_get_last_path_component_nostrip(target.path);
670 /* handle "wget http://kernel.org//" */
671 if (G.fname_out[0] == '/' || !G.fname_out[0])
672 G.fname_out = (char*)"index.html";
673 /* -P DIR is considered only if there was no -O FILE */
675 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
677 /* redirects may free target.path later, need to make a copy */
678 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
681 #if ENABLE_FEATURE_WGET_STATUSBAR
682 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
685 /* Determine where to start transfer */
687 if (option_mask32 & WGET_OPT_CONTINUE) {
688 G.output_fd = open(G.fname_out, O_WRONLY);
689 if (G.output_fd >= 0) {
690 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
692 /* File doesn't exist. We do not create file here yet.
693 * We are not sure it exists on remote side */
698 lsa = xhost2sockaddr(server.host, server.port);
699 if (!(option_mask32 & WGET_OPT_QUIET)) {
700 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
701 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
705 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
708 if (use_proxy || !target.is_ftp) {
716 /* Open socket to http server */
717 sfp = open_socket(lsa);
719 /* Send HTTP request */
721 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
722 target.is_ftp ? "f" : "ht", target.host,
725 if (option_mask32 & WGET_OPT_POST_DATA)
726 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
728 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
731 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
732 target.host, G.user_agent);
734 /* Ask server to close the connection as soon as we are done
735 * (IOW: we do not intend to send more requests)
737 fprintf(sfp, "Connection: close\r\n");
739 #if ENABLE_FEATURE_WGET_AUTHENTICATION
741 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
742 base64enc(target.user));
744 if (use_proxy && server.user) {
745 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
746 base64enc(server.user));
750 if (G.beg_range != 0)
751 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
753 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
755 fputs(G.extra_headers, sfp);
757 if (option_mask32 & WGET_OPT_POST_DATA) {
759 "Content-Type: application/x-www-form-urlencoded\r\n"
760 "Content-Length: %u\r\n"
763 (int) strlen(G.post_data), G.post_data
768 fprintf(sfp, "\r\n");
774 * Retrieve HTTP response line and check for "200" status code.
780 str = skip_non_whitespace(str);
781 str = skip_whitespace(str);
782 // FIXME: no error check
783 // xatou wouldn't work: "200 OK"
788 while (gethdr(sfp) != NULL)
789 /* eat all remaining headers */;
793 Response 204 doesn't say "null file", it says "metadata
794 has changed but data didn't":
796 "10.2.5 204 No Content
797 The server has fulfilled the request but does not need to return
798 an entity-body, and might want to return updated metainformation.
799 The response MAY include new or updated metainformation in the form
800 of entity-headers, which if present SHOULD be associated with
801 the requested variant.
803 If the client is a user agent, it SHOULD NOT change its document
804 view from that which caused the request to be sent. This response
805 is primarily intended to allow input for actions to take place
806 without causing a change to the user agent's active document view,
807 although any new or updated metainformation SHOULD be applied
808 to the document currently in the user agent's active view.
810 The 204 response MUST NOT include a message-body, and thus
811 is always terminated by the first empty line after the header fields."
813 However, in real world it was observed that some web servers
814 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
817 if (G.beg_range != 0) {
818 /* "Range:..." was not honored by the server.
819 * Restart download from the beginning.
821 reset_beg_range_to_zero();
824 case 300: /* redirection */
829 case 206: /* Partial Content */
830 if (G.beg_range != 0)
831 /* "Range:..." worked. Good. */
833 /* Partial Content even though we did not ask for it??? */
836 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
840 * Retrieve HTTP headers.
842 while ((str = gethdr(sfp)) != NULL) {
843 static const char keywords[] ALIGN1 =
844 "content-length\0""transfer-encoding\0""location\0";
846 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
850 /* gethdr converted "FOO:" string to lowercase */
852 /* strip trailing whitespace */
853 char *s = strchrnul(str, '\0') - 1;
854 while (s >= str && (*s == ' ' || *s == '\t')) {
858 key = index_in_strings(keywords, G.wget_buf) + 1;
859 if (key == KEY_content_length) {
860 G.content_len = BB_STRTOOFF(str, NULL, 10);
861 if (G.content_len < 0 || errno) {
862 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
867 if (key == KEY_transfer_encoding) {
868 if (strcmp(str_tolower(str), "chunked") != 0)
869 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
872 if (key == KEY_location && status >= 300) {
873 if (--redir_limit == 0)
874 bb_error_msg_and_die("too many redirections");
877 free(redirected_path);
878 target.path = redirected_path = xstrdup(str+1);
879 /* lsa stays the same: it's on the same server */
881 parse_url(str, &target);
883 free(server.allocated);
884 server.allocated = NULL;
885 server.host = target.host;
886 /* strip_ipv6_scope_id(target.host); - no! */
887 /* we assume remote never gives us IPv6 addr with scope id */
888 server.port = target.port;
891 } /* else: lsa stays the same: we use proxy */
893 goto establish_session;
896 // if (status >= 300)
897 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
899 /* For HTTP, data is pumped over the same connection */
906 sfp = prepare_ftp_session(&dfp, &target, lsa);
911 if (!(option_mask32 & WGET_OPT_SPIDER)) {
913 G.output_fd = xopen(G.fname_out, G.o_flags);
914 retrieve_file_data(dfp);
915 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
922 /* It's ftp. Close data connection properly */
924 if (ftpcmd(NULL, NULL, sfp) != 226)
925 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
926 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
930 free(server.allocated);
931 free(target.allocated);
932 free(fname_out_alloc);
933 free(redirected_path);
936 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
937 int wget_main(int argc UNUSED_PARAM, char **argv)
939 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
940 static const char wget_longopts[] ALIGN1 =
941 /* name, has_arg, val */
942 "continue\0" No_argument "c"
943 //FIXME: -s isn't --spider, it's --save-headers!
944 "spider\0" No_argument "s"
945 "quiet\0" No_argument "q"
946 "output-document\0" Required_argument "O"
947 "directory-prefix\0" Required_argument "P"
948 "proxy\0" Required_argument "Y"
949 "user-agent\0" Required_argument "U"
950 #if ENABLE_FEATURE_WGET_TIMEOUT
951 "timeout\0" Required_argument "T"
954 // "tries\0" Required_argument "t"
955 /* Ignored (we always use PASV): */
956 "passive-ftp\0" No_argument "\xff"
957 "header\0" Required_argument "\xfe"
958 "post-data\0" Required_argument "\xfd"
959 /* Ignored (we don't do ssl) */
960 "no-check-certificate\0" No_argument "\xfc"
961 /* Ignored (we don't support caching) */
962 "no-cache\0" No_argument "\xfb"
966 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
967 llist_t *headers_llist = NULL;
972 #if ENABLE_FEATURE_WGET_TIMEOUT
973 G.timeout_seconds = 900;
974 signal(SIGALRM, alarm_handler);
976 G.proxy_flag = "on"; /* use proxies if env vars are set */
977 G.user_agent = "Wget"; /* "User-Agent" header field */
979 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
980 applet_long_options = wget_longopts;
982 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
983 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
984 &G.fname_out, &G.dir_prefix,
985 &G.proxy_flag, &G.user_agent,
986 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
987 NULL /* -t RETRIES */
988 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
989 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
993 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
997 llist_t *ll = headers_llist;
999 size += strlen(ll->data) + 2;
1002 G.extra_headers = cp = xmalloc(size);
1003 while (headers_llist) {
1004 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
1010 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1011 if (G.fname_out) { /* -O FILE ? */
1012 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1014 option_mask32 &= ~WGET_OPT_CONTINUE;
1016 /* compat with wget: -O FILE can overwrite */
1017 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1021 download_one_url(*argv++);
1023 if (G.output_fd >= 0)
1024 xclose(G.output_fd);
1026 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1027 free(G.extra_headers);
1031 return EXIT_SUCCESS;