5d58450198a87c054f4e475c6890b21d261e8404
[oweals/busybox.git] / networking / wget.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * wget - retrieve a file using HTTP or FTP
4  *
5  * Chip Rosenthal Covad Communications <chip@laserlink.net>
6  * Licensed under GPLv2, see file LICENSE in this source tree.
7  *
8  * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9  * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
10  */
11
12 //usage:#define wget_trivial_usage
13 //usage:        IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage:       "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage:       "        [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage:    "        [--no-check-certificate] [--no-cache]" */
18 //usage:       "        [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
19 //usage:        )
20 //usage:        IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage:       "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
23 //usage:        )
24 //usage:#define wget_full_usage "\n\n"
25 //usage:       "Retrieve files via HTTP or FTP\n"
26 //usage:     "\n        -s      Spider mode - only check file existence"
27 //usage:     "\n        -c      Continue retrieval of aborted transfer"
28 //usage:     "\n        -q      Quiet"
29 //usage:     "\n        -P DIR  Save to DIR (default .)"
30 //usage:        IF_FEATURE_WGET_TIMEOUT(
31 //usage:     "\n        -T SEC  Network read timeout is SEC seconds"
32 //usage:        )
33 //usage:     "\n        -O FILE Save to FILE ('-' for stdout)"
34 //usage:     "\n        -U STR  Use STR for User-Agent header"
35 //usage:     "\n        -Y      Use proxy ('on' or 'off')"
36
37 #include "libbb.h"
38
39 #if 0
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
41 #else
42 # define log_io(...) ((void)0)
43 #endif
44
45
46 struct host_info {
47         char *allocated;
48         const char *path;
49         const char *user;
50         char       *host;
51         int         port;
52         smallint    is_ftp;
53 };
54
55
56 /* Globals */
57 struct globals {
58         off_t content_len;        /* Content-length of the file */
59         off_t beg_range;          /* Range at which continue begins */
60 #if ENABLE_FEATURE_WGET_STATUSBAR
61         off_t transferred;        /* Number of bytes transferred so far */
62         const char *curfile;      /* Name of current file being transferred */
63         bb_progress_t pmt;
64 #endif
65         char *dir_prefix;
66 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
67         char *post_data;
68         char *extra_headers;
69 #endif
70         char *fname_out;        /* where to direct output (-O) */
71         const char *proxy_flag; /* Use proxies if env vars are set */
72         const char *user_agent; /* "User-Agent" header field */
73 #if ENABLE_FEATURE_WGET_TIMEOUT
74         unsigned timeout_seconds;
75 #endif
76         int output_fd;
77         int o_flags;
78         smallint chunked;         /* chunked transfer encoding */
79         smallint got_clen;        /* got content-length: from server  */
80         /* Local downloads do benefit from big buffer.
81          * With 512 byte buffer, it was measured to be
82          * an order of magnitude slower than with big one.
83          */
84         uint64_t just_to_align_next_member;
85         char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
86 } FIX_ALIASING;
87 #define G (*ptr_to_globals)
88 #define INIT_G() do { \
89         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
90         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
91 } while (0)
92
93
94 /* Must match option string! */
95 enum {
96         WGET_OPT_CONTINUE   = (1 << 0),
97         WGET_OPT_SPIDER     = (1 << 1),
98         WGET_OPT_QUIET      = (1 << 2),
99         WGET_OPT_OUTNAME    = (1 << 3),
100         WGET_OPT_PREFIX     = (1 << 4),
101         WGET_OPT_PROXY      = (1 << 5),
102         WGET_OPT_USER_AGENT = (1 << 6),
103         WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
104         WGET_OPT_RETRIES    = (1 << 8),
105         WGET_OPT_PASSIVE    = (1 << 9),
106         WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
107         WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
108 };
109
110 enum {
111         PROGRESS_START = -1,
112         PROGRESS_END   = 0,
113         PROGRESS_BUMP  = 1,
114 };
115 #if ENABLE_FEATURE_WGET_STATUSBAR
116 static void progress_meter(int flag)
117 {
118         if (option_mask32 & WGET_OPT_QUIET)
119                 return;
120
121         if (flag == PROGRESS_START)
122                 bb_progress_init(&G.pmt, G.curfile);
123
124         bb_progress_update(&G.pmt,
125                         G.beg_range,
126                         G.transferred,
127                         (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
128         );
129
130         if (flag == PROGRESS_END) {
131                 bb_progress_free(&G.pmt);
132                 bb_putchar_stderr('\n');
133                 G.transferred = 0;
134         }
135 }
136 #else
137 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
138 #endif
139
140
141 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
142  * local addresses can have a scope identifier to specify the
143  * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
144  * identifier is only valid on a single node.
145  *
146  * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
147  * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
148  * in the Host header as invalid requests, see
149  * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
150  */
151 static void strip_ipv6_scope_id(char *host)
152 {
153         char *scope, *cp;
154
155         /* bbox wget actually handles IPv6 addresses without [], like
156          * wget "http://::1/xxx", but this is not standard.
157          * To save code, _here_ we do not support it. */
158
159         if (host[0] != '[')
160                 return; /* not IPv6 */
161
162         scope = strchr(host, '%');
163         if (!scope)
164                 return;
165
166         /* Remove the IPv6 zone identifier from the host address */
167         cp = strchr(host, ']');
168         if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
169                 /* malformed address (not "[xx]:nn" or "[xx]") */
170                 return;
171         }
172
173         /* cp points to "]...", scope points to "%eth0]..." */
174         overlapping_strcpy(scope, cp);
175 }
176
177 #if ENABLE_FEATURE_WGET_AUTHENTICATION
178 /* Base64-encode character string. */
179 static char *base64enc(const char *str)
180 {
181         unsigned len = strlen(str);
182         if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
183                 len = sizeof(G.wget_buf)/4*3 - 10;
184         bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
185         return G.wget_buf;
186 }
187 #endif
188
189 static char* sanitize_string(char *s)
190 {
191         unsigned char *p = (void *) s;
192         while (*p >= ' ')
193                 p++;
194         *p = '\0';
195         return s;
196 }
197
198 static FILE *open_socket(len_and_sockaddr *lsa)
199 {
200         FILE *fp;
201
202         /* glibc 2.4 seems to try seeking on it - ??! */
203         /* hopefully it understands what ESPIPE means... */
204         fp = fdopen(xconnect_stream(lsa), "r+");
205         if (fp == NULL)
206                 bb_perror_msg_and_die(bb_msg_memory_exhausted);
207
208         return fp;
209 }
210
211 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
212 static char fgets_and_trim(FILE *fp)
213 {
214         char c;
215         char *buf_ptr;
216
217         if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
218                 bb_perror_msg_and_die("error getting response");
219
220         buf_ptr = strchrnul(G.wget_buf, '\n');
221         c = *buf_ptr;
222         *buf_ptr = '\0';
223         buf_ptr = strchrnul(G.wget_buf, '\r');
224         *buf_ptr = '\0';
225
226         log_io("< %s", G.wget_buf);
227
228         return c;
229 }
230
231 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
232 {
233         int result;
234         if (s1) {
235                 if (!s2)
236                         s2 = "";
237                 fprintf(fp, "%s%s\r\n", s1, s2);
238                 fflush(fp);
239                 log_io("> %s%s", s1, s2);
240         }
241
242         do {
243                 fgets_and_trim(fp);
244         } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
245
246         G.wget_buf[3] = '\0';
247         result = xatoi_positive(G.wget_buf);
248         G.wget_buf[3] = ' ';
249         return result;
250 }
251
252 static void parse_url(const char *src_url, struct host_info *h)
253 {
254         char *url, *p, *sp;
255
256         free(h->allocated);
257         h->allocated = url = xstrdup(src_url);
258
259         if (strncmp(url, "http://", 7) == 0) {
260                 h->port = bb_lookup_port("http", "tcp", 80);
261                 h->host = url + 7;
262                 h->is_ftp = 0;
263         } else if (strncmp(url, "ftp://", 6) == 0) {
264                 h->port = bb_lookup_port("ftp", "tcp", 21);
265                 h->host = url + 6;
266                 h->is_ftp = 1;
267         } else
268                 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
269
270         // FYI:
271         // "Real" wget 'http://busybox.net?var=a/b' sends this request:
272         //   'GET /?var=a/b HTTP 1.0'
273         //   and saves 'index.html?var=a%2Fb' (we save 'b')
274         // wget 'http://busybox.net?login=john@doe':
275         //   request: 'GET /?login=john@doe HTTP/1.0'
276         //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
277         // wget 'http://busybox.net#test/test':
278         //   request: 'GET / HTTP/1.0'
279         //   saves: 'index.html' (we save 'test')
280         //
281         // We also don't add unique .N suffix if file exists...
282         sp = strchr(h->host, '/');
283         p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
284         p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
285         if (!sp) {
286                 h->path = "";
287         } else if (*sp == '/') {
288                 *sp = '\0';
289                 h->path = sp + 1;
290         } else { // '#' or '?'
291                 // http://busybox.net?login=john@doe is a valid URL
292                 // memmove converts to:
293                 // http:/busybox.nett?login=john@doe...
294                 memmove(h->host - 1, h->host, sp - h->host);
295                 h->host--;
296                 sp[-1] = '\0';
297                 h->path = sp;
298         }
299
300         // We used to set h->user to NULL here, but this interferes
301         // with handling of code 302 ("object was moved")
302
303         sp = strrchr(h->host, '@');
304         if (sp != NULL) {
305                 // URL-decode "user:password" string before base64-encoding:
306                 // wget http://test:my%20pass@example.com should send
307                 // Authorization: Basic dGVzdDpteSBwYXNz
308                 // which decodes to "test:my pass".
309                 // Standard wget and curl do this too.
310                 *sp = '\0';
311                 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
312                 h->host = sp + 1;
313         }
314
315         sp = h->host;
316 }
317
318 static char *gethdr(FILE *fp)
319 {
320         char *s, *hdrval;
321         int c;
322
323         /* retrieve header line */
324         c = fgets_and_trim(fp);
325
326         /* end of the headers? */
327         if (G.wget_buf[0] == '\0')
328                 return NULL;
329
330         /* convert the header name to lower case */
331         for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
332                 /* tolower for "A-Z", no-op for "0-9a-z-." */
333                 *s |= 0x20;
334         }
335
336         /* verify we are at the end of the header name */
337         if (*s != ':')
338                 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
339
340         /* locate the start of the header value */
341         *s++ = '\0';
342         hdrval = skip_whitespace(s);
343
344         if (c != '\n') {
345                 /* Rats! The buffer isn't big enough to hold the entire header value */
346                 while (c = getc(fp), c != EOF && c != '\n')
347                         continue;
348         }
349
350         return hdrval;
351 }
352
353 static void reset_beg_range_to_zero(void)
354 {
355         bb_error_msg("restart failed");
356         G.beg_range = 0;
357         xlseek(G.output_fd, 0, SEEK_SET);
358         /* Done at the end instead: */
359         /* ftruncate(G.output_fd, 0); */
360 }
361
362 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
363 {
364         FILE *sfp;
365         char *str;
366         int port;
367
368         if (!target->user)
369                 target->user = xstrdup("anonymous:busybox@");
370
371         sfp = open_socket(lsa);
372         if (ftpcmd(NULL, NULL, sfp) != 220)
373                 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
374
375         /*
376          * Splitting username:password pair,
377          * trying to log in
378          */
379         str = strchr(target->user, ':');
380         if (str)
381                 *str++ = '\0';
382         switch (ftpcmd("USER ", target->user, sfp)) {
383         case 230:
384                 break;
385         case 331:
386                 if (ftpcmd("PASS ", str, sfp) == 230)
387                         break;
388                 /* fall through (failed login) */
389         default:
390                 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
391         }
392
393         ftpcmd("TYPE I", NULL, sfp);
394
395         /*
396          * Querying file size
397          */
398         if (ftpcmd("SIZE ", target->path, sfp) == 213) {
399                 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
400                 if (G.content_len < 0 || errno) {
401                         bb_error_msg_and_die("SIZE value is garbage");
402                 }
403                 G.got_clen = 1;
404         }
405
406         /*
407          * Entering passive mode
408          */
409         if (ftpcmd("PASV", NULL, sfp) != 227) {
410  pasv_error:
411                 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
412         }
413         // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
414         // Server's IP is N1.N2.N3.N4 (we ignore it)
415         // Server's port for data connection is P1*256+P2
416         str = strrchr(G.wget_buf, ')');
417         if (str) str[0] = '\0';
418         str = strrchr(G.wget_buf, ',');
419         if (!str) goto pasv_error;
420         port = xatou_range(str+1, 0, 255);
421         *str = '\0';
422         str = strrchr(G.wget_buf, ',');
423         if (!str) goto pasv_error;
424         port += xatou_range(str+1, 0, 255) * 256;
425         set_nport(&lsa->u.sa, htons(port));
426
427         *dfpp = open_socket(lsa);
428
429         if (G.beg_range != 0) {
430                 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
431                 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
432                         G.content_len -= G.beg_range;
433                 else
434                         reset_beg_range_to_zero();
435         }
436
437         if (ftpcmd("RETR ", target->path, sfp) > 150)
438                 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
439
440         return sfp;
441 }
442
443 static void NOINLINE retrieve_file_data(FILE *dfp)
444 {
445 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
446 # if ENABLE_FEATURE_WGET_TIMEOUT
447         unsigned second_cnt;
448 # endif
449         struct pollfd polldata;
450
451         polldata.fd = fileno(dfp);
452         polldata.events = POLLIN | POLLPRI;
453 #endif
454         progress_meter(PROGRESS_START);
455
456         if (G.chunked)
457                 goto get_clen;
458
459         /* Loops only if chunked */
460         while (1) {
461
462 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
463                 /* Must use nonblocking I/O, otherwise fread will loop
464                  * and *block* until it reads full buffer,
465                  * which messes up progress bar and/or timeout logic.
466                  * Because of nonblocking I/O, we need to dance
467                  * very carefully around EAGAIN. See explanation at
468                  * clearerr() call.
469                  */
470                 ndelay_on(polldata.fd);
471 #endif
472                 while (1) {
473                         int n;
474                         unsigned rdsz;
475
476                         rdsz = sizeof(G.wget_buf);
477                         if (G.got_clen) {
478                                 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
479                                         if ((int)G.content_len <= 0)
480                                                 break;
481                                         rdsz = (unsigned)G.content_len;
482                                 }
483                         }
484
485 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
486 # if ENABLE_FEATURE_WGET_TIMEOUT
487                         second_cnt = G.timeout_seconds;
488 # endif
489                         while (1) {
490                                 if (safe_poll(&polldata, 1, 1000) != 0)
491                                         break; /* error, EOF, or data is available */
492 # if ENABLE_FEATURE_WGET_TIMEOUT
493                                 if (second_cnt != 0 && --second_cnt == 0) {
494                                         progress_meter(PROGRESS_END);
495                                         bb_error_msg_and_die("download timed out");
496                                 }
497 # endif
498                                 /* Needed for "stalled" indicator */
499                                 progress_meter(PROGRESS_BUMP);
500                         }
501
502                         /* fread internally uses read loop, which in our case
503                          * is usually exited when we get EAGAIN.
504                          * In this case, libc sets error marker on the stream.
505                          * Need to clear it before next fread to avoid possible
506                          * rare false positive ferror below. Rare because usually
507                          * fread gets more than zero bytes, and we don't fall
508                          * into if (n <= 0) ...
509                          */
510                         clearerr(dfp);
511                         errno = 0;
512 #endif
513                         n = fread(G.wget_buf, 1, rdsz, dfp);
514                         /* man fread:
515                          * If error occurs, or EOF is reached, the return value
516                          * is a short item count (or zero).
517                          * fread does not distinguish between EOF and error.
518                          */
519                         if (n <= 0) {
520 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
521                                 if (errno == EAGAIN) /* poll lied, there is no data? */
522                                         continue; /* yes */
523 #endif
524                                 if (ferror(dfp))
525                                         bb_perror_msg_and_die(bb_msg_read_error);
526                                 break; /* EOF, not error */
527                         }
528
529                         xwrite(G.output_fd, G.wget_buf, n);
530
531 #if ENABLE_FEATURE_WGET_STATUSBAR
532                         G.transferred += n;
533                         progress_meter(PROGRESS_BUMP);
534 #endif
535                         if (G.got_clen) {
536                                 G.content_len -= n;
537                                 if (G.content_len == 0)
538                                         break;
539                         }
540                 }
541 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
542                 clearerr(dfp);
543                 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
544 #endif
545                 if (!G.chunked)
546                         break;
547
548                 fgets_and_trim(dfp); /* Eat empty line */
549  get_clen:
550                 fgets_and_trim(dfp);
551                 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
552                 /* FIXME: error check? */
553                 if (G.content_len == 0)
554                         break; /* all done! */
555                 G.got_clen = 1;
556         }
557
558         /* If -c failed, we restart from the beginning,
559          * but we do not truncate file then, we do it only now, at the end.
560          * This lets user to ^C if his 99% complete 10 GB file download
561          * failed to restart *without* losing the almost complete file.
562          */
563         {
564                 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
565                 if (pos != (off_t)-1)
566                         ftruncate(G.output_fd, pos);
567         }
568
569         /* Draw full bar and free its resources */
570         G.chunked = 0;  /* makes it show 100% even for chunked download */
571         G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
572         progress_meter(PROGRESS_END);
573 }
574
575 static void download_one_url(const char *url)
576 {
577         bool use_proxy;                 /* Use proxies if env vars are set  */
578         int redir_limit;
579         len_and_sockaddr *lsa;
580         FILE *sfp;                      /* socket to web/ftp server         */
581         FILE *dfp;                      /* socket to ftp server (data)      */
582         char *proxy = NULL;
583         char *fname_out_alloc;
584         char *redirected_path = NULL;
585         struct host_info server;
586         struct host_info target;
587
588         server.allocated = NULL;
589         target.allocated = NULL;
590         server.user = NULL;
591         target.user = NULL;
592
593         parse_url(url, &target);
594
595         /* Use the proxy if necessary */
596         use_proxy = (strcmp(G.proxy_flag, "off") != 0);
597         if (use_proxy) {
598                 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
599                 use_proxy = (proxy && proxy[0]);
600                 if (use_proxy)
601                         parse_url(proxy, &server);
602         }
603         if (!use_proxy) {
604                 server.port = target.port;
605                 if (ENABLE_FEATURE_IPV6) {
606                         //free(server.allocated); - can't be non-NULL
607                         server.host = server.allocated = xstrdup(target.host);
608                 } else {
609                         server.host = target.host;
610                 }
611         }
612
613         if (ENABLE_FEATURE_IPV6)
614                 strip_ipv6_scope_id(target.host);
615
616         /* If there was no -O FILE, guess output filename */
617         fname_out_alloc = NULL;
618         if (!(option_mask32 & WGET_OPT_OUTNAME)) {
619                 G.fname_out = bb_get_last_path_component_nostrip(target.path);
620                 /* handle "wget http://kernel.org//" */
621                 if (G.fname_out[0] == '/' || !G.fname_out[0])
622                         G.fname_out = (char*)"index.html";
623                 /* -P DIR is considered only if there was no -O FILE */
624                 else {
625                         if (G.dir_prefix)
626                                 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
627                         else {
628                                 /* redirects may free target.path later, need to make a copy */
629                                 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
630                         }
631                 }
632         }
633 #if ENABLE_FEATURE_WGET_STATUSBAR
634         G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
635 #endif
636
637         /* Determine where to start transfer */
638         G.beg_range = 0;
639         if (option_mask32 & WGET_OPT_CONTINUE) {
640                 G.output_fd = open(G.fname_out, O_WRONLY);
641                 if (G.output_fd >= 0) {
642                         G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
643                 }
644                 /* File doesn't exist. We do not create file here yet.
645                  * We are not sure it exists on remote side */
646         }
647
648         redir_limit = 5;
649  resolve_lsa:
650         lsa = xhost2sockaddr(server.host, server.port);
651         if (!(option_mask32 & WGET_OPT_QUIET)) {
652                 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
653                 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
654                 free(s);
655         }
656  establish_session:
657         /*G.content_len = 0; - redundant, got_clen = 0 is enough */
658         G.got_clen = 0;
659         G.chunked = 0;
660         if (use_proxy || !target.is_ftp) {
661                 /*
662                  *  HTTP session
663                  */
664                 char *str;
665                 int status;
666
667
668                 /* Open socket to http server */
669                 sfp = open_socket(lsa);
670
671                 /* Send HTTP request */
672                 if (use_proxy) {
673                         fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
674                                 target.is_ftp ? "f" : "ht", target.host,
675                                 target.path);
676                 } else {
677                         if (option_mask32 & WGET_OPT_POST_DATA)
678                                 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
679                         else
680                                 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
681                 }
682
683                 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
684                         target.host, G.user_agent);
685
686                 /* Ask server to close the connection as soon as we are done
687                  * (IOW: we do not intend to send more requests)
688                  */
689                 fprintf(sfp, "Connection: close\r\n");
690
691 #if ENABLE_FEATURE_WGET_AUTHENTICATION
692                 if (target.user) {
693                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
694                                 base64enc(target.user));
695                 }
696                 if (use_proxy && server.user) {
697                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
698                                 base64enc(server.user));
699                 }
700 #endif
701
702                 if (G.beg_range != 0)
703                         fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
704
705 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
706                 if (G.extra_headers)
707                         fputs(G.extra_headers, sfp);
708
709                 if (option_mask32 & WGET_OPT_POST_DATA) {
710                         fprintf(sfp,
711                                 "Content-Type: application/x-www-form-urlencoded\r\n"
712                                 "Content-Length: %u\r\n"
713                                 "\r\n"
714                                 "%s",
715                                 (int) strlen(G.post_data), G.post_data
716                         );
717                 } else
718 #endif
719                 {
720                         fprintf(sfp, "\r\n");
721                 }
722
723                 fflush(sfp);
724
725                 /*
726                  * Retrieve HTTP response line and check for "200" status code.
727                  */
728  read_response:
729                 fgets_and_trim(sfp);
730
731                 str = G.wget_buf;
732                 str = skip_non_whitespace(str);
733                 str = skip_whitespace(str);
734                 // FIXME: no error check
735                 // xatou wouldn't work: "200 OK"
736                 status = atoi(str);
737                 switch (status) {
738                 case 0:
739                 case 100:
740                         while (gethdr(sfp) != NULL)
741                                 /* eat all remaining headers */;
742                         goto read_response;
743                 case 200:
744 /*
745 Response 204 doesn't say "null file", it says "metadata
746 has changed but data didn't":
747
748 "10.2.5 204 No Content
749 The server has fulfilled the request but does not need to return
750 an entity-body, and might want to return updated metainformation.
751 The response MAY include new or updated metainformation in the form
752 of entity-headers, which if present SHOULD be associated with
753 the requested variant.
754
755 If the client is a user agent, it SHOULD NOT change its document
756 view from that which caused the request to be sent. This response
757 is primarily intended to allow input for actions to take place
758 without causing a change to the user agent's active document view,
759 although any new or updated metainformation SHOULD be applied
760 to the document currently in the user agent's active view.
761
762 The 204 response MUST NOT include a message-body, and thus
763 is always terminated by the first empty line after the header fields."
764
765 However, in real world it was observed that some web servers
766 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
767 */
768                 case 204:
769                         if (G.beg_range != 0) {
770                                 /* "Range:..." was not honored by the server.
771                                  * Restart download from the beginning.
772                                  */
773                                 reset_beg_range_to_zero();
774                         }
775                         break;
776                 case 300:  /* redirection */
777                 case 301:
778                 case 302:
779                 case 303:
780                         break;
781                 case 206: /* Partial Content */
782                         if (G.beg_range != 0)
783                                 /* "Range:..." worked. Good. */
784                                 break;
785                         /* Partial Content even though we did not ask for it??? */
786                         /* fall through */
787                 default:
788                         bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
789                 }
790
791                 /*
792                  * Retrieve HTTP headers.
793                  */
794                 while ((str = gethdr(sfp)) != NULL) {
795                         static const char keywords[] ALIGN1 =
796                                 "content-length\0""transfer-encoding\0""location\0";
797                         enum {
798                                 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
799                         };
800                         smalluint key;
801
802                         /* gethdr converted "FOO:" string to lowercase */
803
804                         /* strip trailing whitespace */
805                         char *s = strchrnul(str, '\0') - 1;
806                         while (s >= str && (*s == ' ' || *s == '\t')) {
807                                 *s = '\0';
808                                 s--;
809                         }
810                         key = index_in_strings(keywords, G.wget_buf) + 1;
811                         if (key == KEY_content_length) {
812                                 G.content_len = BB_STRTOOFF(str, NULL, 10);
813                                 if (G.content_len < 0 || errno) {
814                                         bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
815                                 }
816                                 G.got_clen = 1;
817                                 continue;
818                         }
819                         if (key == KEY_transfer_encoding) {
820                                 if (strcmp(str_tolower(str), "chunked") != 0)
821                                         bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
822                                 G.chunked = 1;
823                         }
824                         if (key == KEY_location && status >= 300) {
825                                 if (--redir_limit == 0)
826                                         bb_error_msg_and_die("too many redirections");
827                                 fclose(sfp);
828                                 if (str[0] == '/') {
829                                         free(redirected_path);
830                                         target.path = redirected_path = xstrdup(str+1);
831                                         /* lsa stays the same: it's on the same server */
832                                 } else {
833                                         parse_url(str, &target);
834                                         if (!use_proxy) {
835                                                 free(server.allocated);
836                                                 server.allocated = NULL;
837                                                 server.host = target.host;
838                                                 /* strip_ipv6_scope_id(target.host); - no! */
839                                                 /* we assume remote never gives us IPv6 addr with scope id */
840                                                 server.port = target.port;
841                                                 free(lsa);
842                                                 goto resolve_lsa;
843                                         } /* else: lsa stays the same: we use proxy */
844                                 }
845                                 goto establish_session;
846                         }
847                 }
848 //              if (status >= 300)
849 //                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
850
851                 /* For HTTP, data is pumped over the same connection */
852                 dfp = sfp;
853
854         } else {
855                 /*
856                  *  FTP session
857                  */
858                 sfp = prepare_ftp_session(&dfp, &target, lsa);
859         }
860
861         free(lsa);
862
863         if (!(option_mask32 & WGET_OPT_SPIDER)) {
864                 if (G.output_fd < 0)
865                         G.output_fd = xopen(G.fname_out, G.o_flags);
866                 retrieve_file_data(dfp);
867                 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
868                         xclose(G.output_fd);
869                         G.output_fd = -1;
870                 }
871         }
872
873         if (dfp != sfp) {
874                 /* It's ftp. Close data connection properly */
875                 fclose(dfp);
876                 if (ftpcmd(NULL, NULL, sfp) != 226)
877                         bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
878                 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
879         }
880         fclose(sfp);
881
882         free(server.allocated);
883         free(target.allocated);
884         free(fname_out_alloc);
885         free(redirected_path);
886 }
887
888 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
889 int wget_main(int argc UNUSED_PARAM, char **argv)
890 {
891 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
892         static const char wget_longopts[] ALIGN1 =
893                 /* name, has_arg, val */
894                 "continue\0"         No_argument       "c"
895 //FIXME: -s isn't --spider, it's --save-headers!
896                 "spider\0"           No_argument       "s"
897                 "quiet\0"            No_argument       "q"
898                 "output-document\0"  Required_argument "O"
899                 "directory-prefix\0" Required_argument "P"
900                 "proxy\0"            Required_argument "Y"
901                 "user-agent\0"       Required_argument "U"
902 #if ENABLE_FEATURE_WGET_TIMEOUT
903                 "timeout\0"          Required_argument "T"
904 #endif
905                 /* Ignored: */
906                 // "tries\0"            Required_argument "t"
907                 /* Ignored (we always use PASV): */
908                 "passive-ftp\0"      No_argument       "\xff"
909                 "header\0"           Required_argument "\xfe"
910                 "post-data\0"        Required_argument "\xfd"
911                 /* Ignored (we don't do ssl) */
912                 "no-check-certificate\0" No_argument   "\xfc"
913                 /* Ignored (we don't support caching) */
914                 "no-cache\0"         No_argument       "\xfb"
915                 ;
916 #endif
917
918 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
919         llist_t *headers_llist = NULL;
920 #endif
921
922         INIT_G();
923
924         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
925         G.proxy_flag = "on";   /* use proxies if env vars are set */
926         G.user_agent = "Wget"; /* "User-Agent" header field */
927
928 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
929         applet_long_options = wget_longopts;
930 #endif
931         opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
932         getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
933                 &G.fname_out, &G.dir_prefix,
934                 &G.proxy_flag, &G.user_agent,
935                 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
936                 NULL /* -t RETRIES */
937                 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
938                 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
939         );
940         argv += optind;
941
942 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
943         if (headers_llist) {
944                 int size = 1;
945                 char *cp;
946                 llist_t *ll = headers_llist;
947                 while (ll) {
948                         size += strlen(ll->data) + 2;
949                         ll = ll->link;
950                 }
951                 G.extra_headers = cp = xmalloc(size);
952                 while (headers_llist) {
953                         cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
954                 }
955         }
956 #endif
957
958         G.output_fd = -1;
959         G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
960         if (G.fname_out) { /* -O FILE ? */
961                 if (LONE_DASH(G.fname_out)) { /* -O - ? */
962                         G.output_fd = 1;
963                         option_mask32 &= ~WGET_OPT_CONTINUE;
964                 }
965                 /* compat with wget: -O FILE can overwrite */
966                 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
967         }
968
969         while (*argv)
970                 download_one_url(*argv++);
971
972         if (G.output_fd >= 0)
973                 xclose(G.output_fd);
974
975         return EXIT_SUCCESS;
976 }