wget: move help text to .c file
[oweals/busybox.git] / networking / wget.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * wget - retrieve a file using HTTP or FTP
4  *
5  * Chip Rosenthal Covad Communications <chip@laserlink.net>
6  * Licensed under GPLv2, see file LICENSE in this source tree.
7  *
8  * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9  * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
10  */
11
12 //usage:#define wget_trivial_usage
13 //usage:        IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage:       "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage:       "        [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 //usage:       "        [--no-check-certificate] [-U|--user-agent AGENT]"
17 //usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
18 //usage:        )
19 //usage:        IF_NOT_FEATURE_WGET_LONG_OPTIONS(
20 //usage:       "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
21 //usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
22 //usage:        )
23 //usage:#define wget_full_usage "\n\n"
24 //usage:       "Retrieve files via HTTP or FTP\n"
25 //usage:     "\nOptions:"
26 //usage:     "\n        -s      Spider mode - only check file existence"
27 //usage:     "\n        -c      Continue retrieval of aborted transfer"
28 //usage:     "\n        -q      Quiet"
29 //usage:     "\n        -P DIR  Save to DIR (default .)"
30 //usage:        IF_FEATURE_WGET_TIMEOUT(
31 //usage:     "\n        -T SEC  Network read timeout is SEC seconds"
32 //usage:        )
33 //usage:     "\n        -O FILE Save to FILE ('-' for stdout)"
34 //usage:     "\n        -U STR  Use STR for User-Agent header"
35 //usage:     "\n        -Y      Use proxy ('on' or 'off')"
36
37 #include "libbb.h"
38
39 //#define log_io(...) bb_error_msg(__VA_ARGS__)
40 #define log_io(...) ((void)0)
41
42
43 struct host_info {
44         char *allocated;
45         const char *path;
46         const char *user;
47         char       *host;
48         int         port;
49         smallint    is_ftp;
50 };
51
52
53 /* Globals */
54 struct globals {
55         off_t content_len;        /* Content-length of the file */
56         off_t beg_range;          /* Range at which continue begins */
57 #if ENABLE_FEATURE_WGET_STATUSBAR
58         off_t transferred;        /* Number of bytes transferred so far */
59         const char *curfile;      /* Name of current file being transferred */
60         bb_progress_t pmt;
61 #endif
62         char *dir_prefix;
63 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
64         char *post_data;
65         char *extra_headers;
66 #endif
67         char *fname_out;        /* where to direct output (-O) */
68         const char *proxy_flag; /* Use proxies if env vars are set */
69         const char *user_agent; /* "User-Agent" header field */
70 #if ENABLE_FEATURE_WGET_TIMEOUT
71         unsigned timeout_seconds;
72 #endif
73         int output_fd;
74         int o_flags;
75         smallint chunked;         /* chunked transfer encoding */
76         smallint got_clen;        /* got content-length: from server  */
77         /* Local downloads do benefit from big buffer.
78          * With 512 byte buffer, it was measured to be
79          * an order of magnitude slower than with big one.
80          */
81         uint64_t just_to_align_next_member;
82         char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
83 } FIX_ALIASING;
84 #define G (*ptr_to_globals)
85 #define INIT_G() do { \
86         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
87         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
88 } while (0)
89
90
91 /* Must match option string! */
92 enum {
93         WGET_OPT_CONTINUE   = (1 << 0),
94         WGET_OPT_SPIDER     = (1 << 1),
95         WGET_OPT_QUIET      = (1 << 2),
96         WGET_OPT_OUTNAME    = (1 << 3),
97         WGET_OPT_PREFIX     = (1 << 4),
98         WGET_OPT_PROXY      = (1 << 5),
99         WGET_OPT_USER_AGENT = (1 << 6),
100         WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
101         WGET_OPT_RETRIES    = (1 << 8),
102         WGET_OPT_PASSIVE    = (1 << 9),
103         WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
104         WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
105 };
106
107 enum {
108         PROGRESS_START = -1,
109         PROGRESS_END   = 0,
110         PROGRESS_BUMP  = 1,
111 };
112 #if ENABLE_FEATURE_WGET_STATUSBAR
113 static void progress_meter(int flag)
114 {
115         if (option_mask32 & WGET_OPT_QUIET)
116                 return;
117
118         if (flag == PROGRESS_START)
119                 bb_progress_init(&G.pmt, G.curfile);
120
121         bb_progress_update(&G.pmt,
122                         G.beg_range,
123                         G.transferred,
124                         (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
125         );
126
127         if (flag == PROGRESS_END) {
128                 bb_progress_free(&G.pmt);
129                 bb_putchar_stderr('\n');
130                 G.transferred = 0;
131         }
132 }
133 #else
134 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
135 #endif
136
137
138 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
139  * local addresses can have a scope identifier to specify the
140  * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
141  * identifier is only valid on a single node.
142  *
143  * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
144  * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
145  * in the Host header as invalid requests, see
146  * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
147  */
148 static void strip_ipv6_scope_id(char *host)
149 {
150         char *scope, *cp;
151
152         /* bbox wget actually handles IPv6 addresses without [], like
153          * wget "http://::1/xxx", but this is not standard.
154          * To save code, _here_ we do not support it. */
155
156         if (host[0] != '[')
157                 return; /* not IPv6 */
158
159         scope = strchr(host, '%');
160         if (!scope)
161                 return;
162
163         /* Remove the IPv6 zone identifier from the host address */
164         cp = strchr(host, ']');
165         if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
166                 /* malformed address (not "[xx]:nn" or "[xx]") */
167                 return;
168         }
169
170         /* cp points to "]...", scope points to "%eth0]..." */
171         overlapping_strcpy(scope, cp);
172 }
173
174 #if ENABLE_FEATURE_WGET_AUTHENTICATION
175 /* Base64-encode character string. */
176 static char *base64enc(const char *str)
177 {
178         unsigned len = strlen(str);
179         if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
180                 len = sizeof(G.wget_buf)/4*3 - 10;
181         bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
182         return G.wget_buf;
183 }
184 #endif
185
186 static char* sanitize_string(char *s)
187 {
188         unsigned char *p = (void *) s;
189         while (*p >= ' ')
190                 p++;
191         *p = '\0';
192         return s;
193 }
194
195 static FILE *open_socket(len_and_sockaddr *lsa)
196 {
197         FILE *fp;
198
199         /* glibc 2.4 seems to try seeking on it - ??! */
200         /* hopefully it understands what ESPIPE means... */
201         fp = fdopen(xconnect_stream(lsa), "r+");
202         if (fp == NULL)
203                 bb_perror_msg_and_die(bb_msg_memory_exhausted);
204
205         return fp;
206 }
207
208 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
209 static char fgets_and_trim(FILE *fp)
210 {
211         char c;
212         char *buf_ptr;
213
214         if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
215                 bb_perror_msg_and_die("error getting response");
216
217         buf_ptr = strchrnul(G.wget_buf, '\n');
218         c = *buf_ptr;
219         *buf_ptr = '\0';
220         buf_ptr = strchrnul(G.wget_buf, '\r');
221         *buf_ptr = '\0';
222
223         log_io("< %s", G.wget_buf);
224
225         return c;
226 }
227
228 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
229 {
230         int result;
231         if (s1) {
232                 if (!s2)
233                         s2 = "";
234                 fprintf(fp, "%s%s\r\n", s1, s2);
235                 fflush(fp);
236                 log_io("> %s%s", s1, s2);
237         }
238
239         do {
240                 fgets_and_trim(fp);
241         } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
242
243         G.wget_buf[3] = '\0';
244         result = xatoi_positive(G.wget_buf);
245         G.wget_buf[3] = ' ';
246         return result;
247 }
248
249 static void parse_url(const char *src_url, struct host_info *h)
250 {
251         char *url, *p, *sp;
252
253         free(h->allocated);
254         h->allocated = url = xstrdup(src_url);
255
256         if (strncmp(url, "http://", 7) == 0) {
257                 h->port = bb_lookup_port("http", "tcp", 80);
258                 h->host = url + 7;
259                 h->is_ftp = 0;
260         } else if (strncmp(url, "ftp://", 6) == 0) {
261                 h->port = bb_lookup_port("ftp", "tcp", 21);
262                 h->host = url + 6;
263                 h->is_ftp = 1;
264         } else
265                 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
266
267         // FYI:
268         // "Real" wget 'http://busybox.net?var=a/b' sends this request:
269         //   'GET /?var=a/b HTTP 1.0'
270         //   and saves 'index.html?var=a%2Fb' (we save 'b')
271         // wget 'http://busybox.net?login=john@doe':
272         //   request: 'GET /?login=john@doe HTTP/1.0'
273         //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
274         // wget 'http://busybox.net#test/test':
275         //   request: 'GET / HTTP/1.0'
276         //   saves: 'index.html' (we save 'test')
277         //
278         // We also don't add unique .N suffix if file exists...
279         sp = strchr(h->host, '/');
280         p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
281         p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
282         if (!sp) {
283                 h->path = "";
284         } else if (*sp == '/') {
285                 *sp = '\0';
286                 h->path = sp + 1;
287         } else { // '#' or '?'
288                 // http://busybox.net?login=john@doe is a valid URL
289                 // memmove converts to:
290                 // http:/busybox.nett?login=john@doe...
291                 memmove(h->host - 1, h->host, sp - h->host);
292                 h->host--;
293                 sp[-1] = '\0';
294                 h->path = sp;
295         }
296
297         // We used to set h->user to NULL here, but this interferes
298         // with handling of code 302 ("object was moved")
299
300         sp = strrchr(h->host, '@');
301         if (sp != NULL) {
302                 h->user = h->host;
303                 *sp = '\0';
304                 h->host = sp + 1;
305         }
306
307         sp = h->host;
308 }
309
310 static char *gethdr(FILE *fp)
311 {
312         char *s, *hdrval;
313         int c;
314
315         /* *istrunc = 0; */
316
317         /* retrieve header line */
318         c = fgets_and_trim(fp);
319
320         /* end of the headers? */
321         if (G.wget_buf[0] == '\0')
322                 return NULL;
323
324         /* convert the header name to lower case */
325         for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
326                 /* tolower for "A-Z", no-op for "0-9a-z-." */
327                 *s |= 0x20;
328         }
329
330         /* verify we are at the end of the header name */
331         if (*s != ':')
332                 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
333
334         /* locate the start of the header value */
335         *s++ = '\0';
336         hdrval = skip_whitespace(s);
337
338         if (c != '\n') {
339                 /* Rats! The buffer isn't big enough to hold the entire header value */
340                 while (c = getc(fp), c != EOF && c != '\n')
341                         continue;
342         }
343
344         return hdrval;
345 }
346
347 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
348 static char *URL_escape(const char *str)
349 {
350         /* URL encode, see RFC 2396 */
351         char *dst;
352         char *res = dst = xmalloc(strlen(str) * 3 + 1);
353         unsigned char c;
354
355         while (1) {
356                 c = *str++;
357                 if (c == '\0'
358                 /* || strchr("!&'()*-.=_~", c) - more code */
359                  || c == '!'
360                  || c == '&'
361                  || c == '\''
362                  || c == '('
363                  || c == ')'
364                  || c == '*'
365                  || c == '-'
366                  || c == '.'
367                  || c == '='
368                  || c == '_'
369                  || c == '~'
370                  || (c >= '0' && c <= '9')
371                  || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
372                 ) {
373                         *dst++ = c;
374                         if (c == '\0')
375                                 return res;
376                 } else {
377                         *dst++ = '%';
378                         *dst++ = bb_hexdigits_upcase[c >> 4];
379                         *dst++ = bb_hexdigits_upcase[c & 0xf];
380                 }
381         }
382 }
383 #endif
384
385 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
386 {
387         FILE *sfp;
388         char *str;
389         int port;
390
391         if (!target->user)
392                 target->user = xstrdup("anonymous:busybox@");
393
394         sfp = open_socket(lsa);
395         if (ftpcmd(NULL, NULL, sfp) != 220)
396                 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
397
398         /*
399          * Splitting username:password pair,
400          * trying to log in
401          */
402         str = strchr(target->user, ':');
403         if (str)
404                 *str++ = '\0';
405         switch (ftpcmd("USER ", target->user, sfp)) {
406         case 230:
407                 break;
408         case 331:
409                 if (ftpcmd("PASS ", str, sfp) == 230)
410                         break;
411                 /* fall through (failed login) */
412         default:
413                 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
414         }
415
416         ftpcmd("TYPE I", NULL, sfp);
417
418         /*
419          * Querying file size
420          */
421         if (ftpcmd("SIZE ", target->path, sfp) == 213) {
422                 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
423                 if (G.content_len < 0 || errno) {
424                         bb_error_msg_and_die("SIZE value is garbage");
425                 }
426                 G.got_clen = 1;
427         }
428
429         /*
430          * Entering passive mode
431          */
432         if (ftpcmd("PASV", NULL, sfp) != 227) {
433  pasv_error:
434                 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
435         }
436         // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
437         // Server's IP is N1.N2.N3.N4 (we ignore it)
438         // Server's port for data connection is P1*256+P2
439         str = strrchr(G.wget_buf, ')');
440         if (str) str[0] = '\0';
441         str = strrchr(G.wget_buf, ',');
442         if (!str) goto pasv_error;
443         port = xatou_range(str+1, 0, 255);
444         *str = '\0';
445         str = strrchr(G.wget_buf, ',');
446         if (!str) goto pasv_error;
447         port += xatou_range(str+1, 0, 255) * 256;
448         set_nport(lsa, htons(port));
449
450         *dfpp = open_socket(lsa);
451
452         if (G.beg_range) {
453                 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
454                 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
455                         G.content_len -= G.beg_range;
456         }
457
458         if (ftpcmd("RETR ", target->path, sfp) > 150)
459                 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
460
461         return sfp;
462 }
463
464 static void NOINLINE retrieve_file_data(FILE *dfp)
465 {
466 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
467 # if ENABLE_FEATURE_WGET_TIMEOUT
468         unsigned second_cnt;
469 # endif
470         struct pollfd polldata;
471
472         polldata.fd = fileno(dfp);
473         polldata.events = POLLIN | POLLPRI;
474 #endif
475         progress_meter(PROGRESS_START);
476
477         if (G.chunked)
478                 goto get_clen;
479
480         /* Loops only if chunked */
481         while (1) {
482
483 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
484                 /* Must use nonblocking I/O, otherwise fread will loop
485                  * and *block* until it reads full buffer,
486                  * which messes up progress bar and/or timeout logic.
487                  * Because of nonblocking I/O, we need to dance
488                  * very carefully around EAGAIN. See explanation at
489                  * clearerr() call.
490                  */
491                 ndelay_on(polldata.fd);
492 #endif
493                 while (1) {
494                         int n;
495                         unsigned rdsz;
496
497                         rdsz = sizeof(G.wget_buf);
498                         if (G.got_clen) {
499                                 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
500                                         if ((int)G.content_len <= 0)
501                                                 break;
502                                         rdsz = (unsigned)G.content_len;
503                                 }
504                         }
505
506 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
507 # if ENABLE_FEATURE_WGET_TIMEOUT
508                         second_cnt = G.timeout_seconds;
509 # endif
510                         while (1) {
511                                 if (safe_poll(&polldata, 1, 1000) != 0)
512                                         break; /* error, EOF, or data is available */
513 # if ENABLE_FEATURE_WGET_TIMEOUT
514                                 if (second_cnt != 0 && --second_cnt == 0) {
515                                         progress_meter(PROGRESS_END);
516                                         bb_error_msg_and_die("download timed out");
517                                 }
518 # endif
519                                 /* Needed for "stalled" indicator */
520                                 progress_meter(PROGRESS_BUMP);
521                         }
522
523                         /* fread internally uses read loop, which in our case
524                          * is usually exited when we get EAGAIN.
525                          * In this case, libc sets error marker on the stream.
526                          * Need to clear it before next fread to avoid possible
527                          * rare false positive ferror below. Rare because usually
528                          * fread gets more than zero bytes, and we don't fall
529                          * into if (n <= 0) ...
530                          */
531                         clearerr(dfp);
532                         errno = 0;
533 #endif
534                         n = fread(G.wget_buf, 1, rdsz, dfp);
535                         /* man fread:
536                          * If error occurs, or EOF is reached, the return value
537                          * is a short item count (or zero).
538                          * fread does not distinguish between EOF and error.
539                          */
540                         if (n <= 0) {
541 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
542                                 if (errno == EAGAIN) /* poll lied, there is no data? */
543                                         continue; /* yes */
544 #endif
545                                 if (ferror(dfp))
546                                         bb_perror_msg_and_die(bb_msg_read_error);
547                                 break; /* EOF, not error */
548                         }
549
550                         xwrite(G.output_fd, G.wget_buf, n);
551
552 #if ENABLE_FEATURE_WGET_STATUSBAR
553                         G.transferred += n;
554                         progress_meter(PROGRESS_BUMP);
555 #endif
556                         if (G.got_clen) {
557                                 G.content_len -= n;
558                                 if (G.content_len == 0)
559                                         break;
560                         }
561                 }
562 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
563                 clearerr(dfp);
564                 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
565 #endif
566                 if (!G.chunked)
567                         break;
568
569                 fgets_and_trim(dfp); /* Eat empty line */
570  get_clen:
571                 fgets_and_trim(dfp);
572                 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
573                 /* FIXME: error check? */
574                 if (G.content_len == 0)
575                         break; /* all done! */
576                 G.got_clen = 1;
577         }
578
579         /* Draw full bar and free its resources */
580         G.chunked = 0;  /* makes it show 100% even for chunked download */
581         G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
582         progress_meter(PROGRESS_END);
583 }
584
585 static void download_one_url(const char *url)
586 {
587         bool use_proxy;                 /* Use proxies if env vars are set  */
588         int redir_limit;
589         len_and_sockaddr *lsa;
590         FILE *sfp;                      /* socket to web/ftp server         */
591         FILE *dfp;                      /* socket to ftp server (data)      */
592         char *proxy = NULL;
593         char *fname_out_alloc;
594         struct host_info server;
595         struct host_info target;
596
597         server.allocated = NULL;
598         target.allocated = NULL;
599         server.user = NULL;
600         target.user = NULL;
601
602         parse_url(url, &target);
603
604         /* Use the proxy if necessary */
605         use_proxy = (strcmp(G.proxy_flag, "off") != 0);
606         if (use_proxy) {
607                 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
608                 use_proxy = (proxy && proxy[0]);
609                 if (use_proxy)
610                         parse_url(proxy, &server);
611         }
612         if (!use_proxy) {
613                 server.port = target.port;
614                 if (ENABLE_FEATURE_IPV6) {
615                         //free(server.allocated); - can't be non-NULL
616                         server.host = server.allocated = xstrdup(target.host);
617                 } else {
618                         server.host = target.host;
619                 }
620         }
621
622         if (ENABLE_FEATURE_IPV6)
623                 strip_ipv6_scope_id(target.host);
624
625         /* If there was no -O FILE, guess output filename */
626         fname_out_alloc = NULL;
627         if (!(option_mask32 & WGET_OPT_OUTNAME)) {
628                 G.fname_out = bb_get_last_path_component_nostrip(target.path);
629                 /* handle "wget http://kernel.org//" */
630                 if (G.fname_out[0] == '/' || !G.fname_out[0])
631                         G.fname_out = (char*)"index.html";
632                 /* -P DIR is considered only if there was no -O FILE */
633                 else {
634                         if (G.dir_prefix)
635                                 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
636                         else {
637                                 /* redirects may free target.path later, need to make a copy */
638                                 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
639                         }
640                 }
641         }
642 #if ENABLE_FEATURE_WGET_STATUSBAR
643         G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
644 #endif
645
646         /* Determine where to start transfer */
647         G.beg_range = 0;
648         if (option_mask32 & WGET_OPT_CONTINUE) {
649                 G.output_fd = open(G.fname_out, O_WRONLY);
650                 if (G.output_fd >= 0) {
651                         G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
652                 }
653                 /* File doesn't exist. We do not create file here yet.
654                  * We are not sure it exists on remote side */
655         }
656
657         redir_limit = 5;
658  resolve_lsa:
659         lsa = xhost2sockaddr(server.host, server.port);
660         if (!(option_mask32 & WGET_OPT_QUIET)) {
661                 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
662                 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
663                 free(s);
664         }
665  establish_session:
666         /*G.content_len = 0; - redundant, got_clen = 0 is enough */
667         G.got_clen = 0;
668         G.chunked = 0;
669         if (use_proxy || !target.is_ftp) {
670                 /*
671                  *  HTTP session
672                  */
673                 char *str;
674                 int status;
675
676
677                 /* Open socket to http server */
678                 sfp = open_socket(lsa);
679
680                 /* Send HTTP request */
681                 if (use_proxy) {
682                         fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
683                                 target.is_ftp ? "f" : "ht", target.host,
684                                 target.path);
685                 } else {
686                         if (option_mask32 & WGET_OPT_POST_DATA)
687                                 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
688                         else
689                                 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
690                 }
691
692                 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
693                         target.host, G.user_agent);
694
695                 /* Ask server to close the connection as soon as we are done
696                  * (IOW: we do not intend to send more requests)
697                  */
698                 fprintf(sfp, "Connection: close\r\n");
699
700 #if ENABLE_FEATURE_WGET_AUTHENTICATION
701                 if (target.user) {
702                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
703                                 base64enc(target.user));
704                 }
705                 if (use_proxy && server.user) {
706                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
707                                 base64enc(server.user));
708                 }
709 #endif
710
711                 if (G.beg_range)
712                         fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
713
714 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
715                 if (G.extra_headers)
716                         fputs(G.extra_headers, sfp);
717
718                 if (option_mask32 & WGET_OPT_POST_DATA) {
719                         char *estr = URL_escape(G.post_data);
720                         fprintf(sfp,
721                                 "Content-Type: application/x-www-form-urlencoded\r\n"
722                                 "Content-Length: %u\r\n"
723                                 "\r\n"
724                                 "%s",
725                                 (int) strlen(estr), estr
726                         );
727                         free(estr);
728                 } else
729 #endif
730                 {
731                         fprintf(sfp, "\r\n");
732                 }
733
734                 fflush(sfp);
735
736                 /*
737                  * Retrieve HTTP response line and check for "200" status code.
738                  */
739  read_response:
740                 fgets_and_trim(sfp);
741
742                 str = G.wget_buf;
743                 str = skip_non_whitespace(str);
744                 str = skip_whitespace(str);
745                 // FIXME: no error check
746                 // xatou wouldn't work: "200 OK"
747                 status = atoi(str);
748                 switch (status) {
749                 case 0:
750                 case 100:
751                         while (gethdr(sfp) != NULL)
752                                 /* eat all remaining headers */;
753                         goto read_response;
754                 case 200:
755 /*
756 Response 204 doesn't say "null file", it says "metadata
757 has changed but data didn't":
758
759 "10.2.5 204 No Content
760 The server has fulfilled the request but does not need to return
761 an entity-body, and might want to return updated metainformation.
762 The response MAY include new or updated metainformation in the form
763 of entity-headers, which if present SHOULD be associated with
764 the requested variant.
765
766 If the client is a user agent, it SHOULD NOT change its document
767 view from that which caused the request to be sent. This response
768 is primarily intended to allow input for actions to take place
769 without causing a change to the user agent's active document view,
770 although any new or updated metainformation SHOULD be applied
771 to the document currently in the user agent's active view.
772
773 The 204 response MUST NOT include a message-body, and thus
774 is always terminated by the first empty line after the header fields."
775
776 However, in real world it was observed that some web servers
777 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
778 */
779                 case 204:
780                         break;
781                 case 300:  /* redirection */
782                 case 301:
783                 case 302:
784                 case 303:
785                         break;
786                 case 206:
787                         if (G.beg_range)
788                                 break;
789                         /* fall through */
790                 default:
791                         bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
792                 }
793
794                 /*
795                  * Retrieve HTTP headers.
796                  */
797                 while ((str = gethdr(sfp)) != NULL) {
798                         static const char keywords[] ALIGN1 =
799                                 "content-length\0""transfer-encoding\0""location\0";
800                         enum {
801                                 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
802                         };
803                         smalluint key;
804
805                         /* gethdr converted "FOO:" string to lowercase */
806
807                         /* strip trailing whitespace */
808                         char *s = strchrnul(str, '\0') - 1;
809                         while (s >= str && (*s == ' ' || *s == '\t')) {
810                                 *s = '\0';
811                                 s--;
812                         }
813                         key = index_in_strings(keywords, G.wget_buf) + 1;
814                         if (key == KEY_content_length) {
815                                 G.content_len = BB_STRTOOFF(str, NULL, 10);
816                                 if (G.content_len < 0 || errno) {
817                                         bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
818                                 }
819                                 G.got_clen = 1;
820                                 continue;
821                         }
822                         if (key == KEY_transfer_encoding) {
823                                 if (strcmp(str_tolower(str), "chunked") != 0)
824                                         bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
825                                 G.chunked = 1;
826                         }
827                         if (key == KEY_location && status >= 300) {
828                                 if (--redir_limit == 0)
829                                         bb_error_msg_and_die("too many redirections");
830                                 fclose(sfp);
831                                 if (str[0] == '/') {
832                                         free(target.allocated);
833                                         target.path = target.allocated = xstrdup(str+1);
834                                         /* lsa stays the same: it's on the same server */
835                                 } else {
836                                         parse_url(str, &target);
837                                         if (!use_proxy) {
838                                                 free(server.allocated);
839                                                 server.allocated = NULL;
840                                                 server.host = target.host;
841                                                 /* strip_ipv6_scope_id(target.host); - no! */
842                                                 /* we assume remote never gives us IPv6 addr with scope id */
843                                                 server.port = target.port;
844                                                 free(lsa);
845                                                 goto resolve_lsa;
846                                         } /* else: lsa stays the same: we use proxy */
847                                 }
848                                 goto establish_session;
849                         }
850                 }
851 //              if (status >= 300)
852 //                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
853
854                 /* For HTTP, data is pumped over the same connection */
855                 dfp = sfp;
856
857         } else {
858                 /*
859                  *  FTP session
860                  */
861                 sfp = prepare_ftp_session(&dfp, &target, lsa);
862         }
863
864         free(lsa);
865
866         if (!(option_mask32 & WGET_OPT_SPIDER)) {
867                 if (G.output_fd < 0)
868                         G.output_fd = xopen(G.fname_out, G.o_flags);
869                 retrieve_file_data(dfp);
870                 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
871                         xclose(G.output_fd);
872                         G.output_fd = -1;
873                 }
874         }
875
876         if (dfp != sfp) {
877                 /* It's ftp. Close data connection properly */
878                 fclose(dfp);
879                 if (ftpcmd(NULL, NULL, sfp) != 226)
880                         bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
881                 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
882         }
883         fclose(sfp);
884
885         free(server.allocated);
886         free(target.allocated);
887         free(fname_out_alloc);
888 }
889
890 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
891 int wget_main(int argc UNUSED_PARAM, char **argv)
892 {
893 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
894         static const char wget_longopts[] ALIGN1 =
895                 /* name, has_arg, val */
896                 "continue\0"         No_argument       "c"
897 //FIXME: -s isn't --spider, it's --save-headers!
898                 "spider\0"           No_argument       "s"
899                 "quiet\0"            No_argument       "q"
900                 "output-document\0"  Required_argument "O"
901                 "directory-prefix\0" Required_argument "P"
902                 "proxy\0"            Required_argument "Y"
903                 "user-agent\0"       Required_argument "U"
904 #if ENABLE_FEATURE_WGET_TIMEOUT
905                 "timeout\0"          Required_argument "T"
906 #endif
907                 /* Ignored: */
908                 // "tries\0"            Required_argument "t"
909                 /* Ignored (we always use PASV): */
910                 "passive-ftp\0"      No_argument       "\xff"
911                 "header\0"           Required_argument "\xfe"
912                 "post-data\0"        Required_argument "\xfd"
913                 /* Ignored (we don't do ssl) */
914                 "no-check-certificate\0" No_argument   "\xfc"
915                 ;
916 #endif
917
918 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
919         llist_t *headers_llist = NULL;
920 #endif
921
922         INIT_G();
923
924         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
925         G.proxy_flag = "on";   /* use proxies if env vars are set */
926         G.user_agent = "Wget"; /* "User-Agent" header field */
927
928 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
929         applet_long_options = wget_longopts;
930 #endif
931         opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
932         getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
933                 &G.fname_out, &G.dir_prefix,
934                 &G.proxy_flag, &G.user_agent,
935                 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
936                 NULL /* -t RETRIES */
937                 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
938                 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
939         );
940         argv += optind;
941
942 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
943         if (headers_llist) {
944                 int size = 1;
945                 char *cp;
946                 llist_t *ll = headers_llist;
947                 while (ll) {
948                         size += strlen(ll->data) + 2;
949                         ll = ll->link;
950                 }
951                 G.extra_headers = cp = xmalloc(size);
952                 while (headers_llist) {
953                         cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
954                 }
955         }
956 #endif
957
958         G.output_fd = -1;
959         G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
960         if (G.fname_out) { /* -O FILE ? */
961                 if (LONE_DASH(G.fname_out)) { /* -O - ? */
962                         G.output_fd = 1;
963                         option_mask32 &= ~WGET_OPT_CONTINUE;
964                 }
965                 /* compat with wget: -O FILE can overwrite */
966                 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
967         }
968
969         while (*argv)
970                 download_one_url(*argv++);
971
972         if (G.output_fd >= 0)
973                 xclose(G.output_fd);
974
975         return EXIT_SUCCESS;
976 }