013b87801de6bf28de0d6965bbe8bc522573f337
[oweals/busybox.git] / networking / wget.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * wget - retrieve a file using HTTP or FTP
4  *
5  * Chip Rosenthal Covad Communications <chip@laserlink.net>
6  * Licensed under GPLv2, see file LICENSE in this source tree.
7  *
8  * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9  * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
10  */
11
12 //usage:#define wget_trivial_usage
13 //usage:        IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage:       "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage:       "        [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage:    "        [--no-check-certificate] [--no-cache]" */
18 //usage:       "        [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
19 //usage:        )
20 //usage:        IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage:       "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
23 //usage:        )
24 //usage:#define wget_full_usage "\n\n"
25 //usage:       "Retrieve files via HTTP or FTP\n"
26 //usage:     "\n        -s      Spider mode - only check file existence"
27 //usage:     "\n        -c      Continue retrieval of aborted transfer"
28 //usage:     "\n        -q      Quiet"
29 //usage:     "\n        -P DIR  Save to DIR (default .)"
30 //usage:        IF_FEATURE_WGET_TIMEOUT(
31 //usage:     "\n        -T SEC  Network read timeout is SEC seconds"
32 //usage:        )
33 //usage:     "\n        -O FILE Save to FILE ('-' for stdout)"
34 //usage:     "\n        -U STR  Use STR for User-Agent header"
35 //usage:     "\n        -Y      Use proxy ('on' or 'off')"
36
37 #include "libbb.h"
38
39 #if 0
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
41 #else
42 # define log_io(...) ((void)0)
43 #endif
44
45
46 struct host_info {
47         char *allocated;
48         const char *path;
49         const char *user;
50         char       *host;
51         int         port;
52         smallint    is_ftp;
53 };
54
55
56 /* Globals */
57 struct globals {
58         off_t content_len;        /* Content-length of the file */
59         off_t beg_range;          /* Range at which continue begins */
60 #if ENABLE_FEATURE_WGET_STATUSBAR
61         off_t transferred;        /* Number of bytes transferred so far */
62         const char *curfile;      /* Name of current file being transferred */
63         bb_progress_t pmt;
64 #endif
65         char *dir_prefix;
66 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
67         char *post_data;
68         char *extra_headers;
69 #endif
70         char *fname_out;        /* where to direct output (-O) */
71         const char *proxy_flag; /* Use proxies if env vars are set */
72         const char *user_agent; /* "User-Agent" header field */
73 #if ENABLE_FEATURE_WGET_TIMEOUT
74         unsigned timeout_seconds;
75 #endif
76         int output_fd;
77         int o_flags;
78         smallint chunked;         /* chunked transfer encoding */
79         smallint got_clen;        /* got content-length: from server  */
80         /* Local downloads do benefit from big buffer.
81          * With 512 byte buffer, it was measured to be
82          * an order of magnitude slower than with big one.
83          */
84         uint64_t just_to_align_next_member;
85         char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
86 } FIX_ALIASING;
87 #define G (*ptr_to_globals)
88 #define INIT_G() do { \
89         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
90         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
91 } while (0)
92
93
94 /* Must match option string! */
95 enum {
96         WGET_OPT_CONTINUE   = (1 << 0),
97         WGET_OPT_SPIDER     = (1 << 1),
98         WGET_OPT_QUIET      = (1 << 2),
99         WGET_OPT_OUTNAME    = (1 << 3),
100         WGET_OPT_PREFIX     = (1 << 4),
101         WGET_OPT_PROXY      = (1 << 5),
102         WGET_OPT_USER_AGENT = (1 << 6),
103         WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
104         WGET_OPT_RETRIES    = (1 << 8),
105         WGET_OPT_PASSIVE    = (1 << 9),
106         WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
107         WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
108 };
109
110 enum {
111         PROGRESS_START = -1,
112         PROGRESS_END   = 0,
113         PROGRESS_BUMP  = 1,
114 };
115 #if ENABLE_FEATURE_WGET_STATUSBAR
116 static void progress_meter(int flag)
117 {
118         if (option_mask32 & WGET_OPT_QUIET)
119                 return;
120
121         if (flag == PROGRESS_START)
122                 bb_progress_init(&G.pmt, G.curfile);
123
124         bb_progress_update(&G.pmt,
125                         G.beg_range,
126                         G.transferred,
127                         (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
128         );
129
130         if (flag == PROGRESS_END) {
131                 bb_progress_free(&G.pmt);
132                 bb_putchar_stderr('\n');
133                 G.transferred = 0;
134         }
135 }
136 #else
137 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
138 #endif
139
140
141 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
142  * local addresses can have a scope identifier to specify the
143  * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
144  * identifier is only valid on a single node.
145  *
146  * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
147  * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
148  * in the Host header as invalid requests, see
149  * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
150  */
151 static void strip_ipv6_scope_id(char *host)
152 {
153         char *scope, *cp;
154
155         /* bbox wget actually handles IPv6 addresses without [], like
156          * wget "http://::1/xxx", but this is not standard.
157          * To save code, _here_ we do not support it. */
158
159         if (host[0] != '[')
160                 return; /* not IPv6 */
161
162         scope = strchr(host, '%');
163         if (!scope)
164                 return;
165
166         /* Remove the IPv6 zone identifier from the host address */
167         cp = strchr(host, ']');
168         if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
169                 /* malformed address (not "[xx]:nn" or "[xx]") */
170                 return;
171         }
172
173         /* cp points to "]...", scope points to "%eth0]..." */
174         overlapping_strcpy(scope, cp);
175 }
176
177 #if ENABLE_FEATURE_WGET_AUTHENTICATION
178 /* Base64-encode character string. */
179 static char *base64enc(const char *str)
180 {
181         unsigned len = strlen(str);
182         if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
183                 len = sizeof(G.wget_buf)/4*3 - 10;
184         bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
185         return G.wget_buf;
186 }
187 #endif
188
189 static char* sanitize_string(char *s)
190 {
191         unsigned char *p = (void *) s;
192         while (*p >= ' ')
193                 p++;
194         *p = '\0';
195         return s;
196 }
197
198 static FILE *open_socket(len_and_sockaddr *lsa)
199 {
200         FILE *fp;
201
202         /* glibc 2.4 seems to try seeking on it - ??! */
203         /* hopefully it understands what ESPIPE means... */
204         fp = fdopen(xconnect_stream(lsa), "r+");
205         if (fp == NULL)
206                 bb_perror_msg_and_die(bb_msg_memory_exhausted);
207
208         return fp;
209 }
210
211 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
212 static char fgets_and_trim(FILE *fp)
213 {
214         char c;
215         char *buf_ptr;
216
217         if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
218                 bb_perror_msg_and_die("error getting response");
219
220         buf_ptr = strchrnul(G.wget_buf, '\n');
221         c = *buf_ptr;
222         *buf_ptr = '\0';
223         buf_ptr = strchrnul(G.wget_buf, '\r');
224         *buf_ptr = '\0';
225
226         log_io("< %s", G.wget_buf);
227
228         return c;
229 }
230
231 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
232 {
233         int result;
234         if (s1) {
235                 if (!s2)
236                         s2 = "";
237                 fprintf(fp, "%s%s\r\n", s1, s2);
238                 fflush(fp);
239                 log_io("> %s%s", s1, s2);
240         }
241
242         do {
243                 fgets_and_trim(fp);
244         } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
245
246         G.wget_buf[3] = '\0';
247         result = xatoi_positive(G.wget_buf);
248         G.wget_buf[3] = ' ';
249         return result;
250 }
251
252 static void parse_url(const char *src_url, struct host_info *h)
253 {
254         char *url, *p, *sp;
255
256         free(h->allocated);
257         h->allocated = url = xstrdup(src_url);
258
259         if (strncmp(url, "http://", 7) == 0) {
260                 h->port = bb_lookup_port("http", "tcp", 80);
261                 h->host = url + 7;
262                 h->is_ftp = 0;
263         } else if (strncmp(url, "ftp://", 6) == 0) {
264                 h->port = bb_lookup_port("ftp", "tcp", 21);
265                 h->host = url + 6;
266                 h->is_ftp = 1;
267         } else
268                 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
269
270         // FYI:
271         // "Real" wget 'http://busybox.net?var=a/b' sends this request:
272         //   'GET /?var=a/b HTTP 1.0'
273         //   and saves 'index.html?var=a%2Fb' (we save 'b')
274         // wget 'http://busybox.net?login=john@doe':
275         //   request: 'GET /?login=john@doe HTTP/1.0'
276         //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
277         // wget 'http://busybox.net#test/test':
278         //   request: 'GET / HTTP/1.0'
279         //   saves: 'index.html' (we save 'test')
280         //
281         // We also don't add unique .N suffix if file exists...
282         sp = strchr(h->host, '/');
283         p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
284         p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
285         if (!sp) {
286                 h->path = "";
287         } else if (*sp == '/') {
288                 *sp = '\0';
289                 h->path = sp + 1;
290         } else { // '#' or '?'
291                 // http://busybox.net?login=john@doe is a valid URL
292                 // memmove converts to:
293                 // http:/busybox.nett?login=john@doe...
294                 memmove(h->host - 1, h->host, sp - h->host);
295                 h->host--;
296                 sp[-1] = '\0';
297                 h->path = sp;
298         }
299
300         // We used to set h->user to NULL here, but this interferes
301         // with handling of code 302 ("object was moved")
302
303         sp = strrchr(h->host, '@');
304         if (sp != NULL) {
305                 // URL-decode "user:password" string before base64-encoding:
306                 // wget http://test:my%20pass@example.com should send
307                 // Authorization: Basic dGVzdDpteSBwYXNz
308                 // which decodes to "test:my pass".
309                 // Standard wget and curl do this too.
310                 *sp = '\0';
311                 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
312                 h->host = sp + 1;
313         }
314
315         sp = h->host;
316 }
317
318 static char *gethdr(FILE *fp)
319 {
320         char *s, *hdrval;
321         int c;
322
323         /* retrieve header line */
324         c = fgets_and_trim(fp);
325
326         /* end of the headers? */
327         if (G.wget_buf[0] == '\0')
328                 return NULL;
329
330         /* convert the header name to lower case */
331         for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
332                 /* tolower for "A-Z", no-op for "0-9a-z-." */
333                 *s |= 0x20;
334         }
335
336         /* verify we are at the end of the header name */
337         if (*s != ':')
338                 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
339
340         /* locate the start of the header value */
341         *s++ = '\0';
342         hdrval = skip_whitespace(s);
343
344         if (c != '\n') {
345                 /* Rats! The buffer isn't big enough to hold the entire header value */
346                 while (c = getc(fp), c != EOF && c != '\n')
347                         continue;
348         }
349
350         return hdrval;
351 }
352
353 static void reset_beg_range_to_zero(void)
354 {
355         //bb_error_msg("restart failed");
356         G.beg_range = 0;
357         xlseek(G.output_fd, 0, SEEK_SET);
358         ftruncate(G.output_fd, 0);
359 }
360
361 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
362 {
363         FILE *sfp;
364         char *str;
365         int port;
366
367         if (!target->user)
368                 target->user = xstrdup("anonymous:busybox@");
369
370         sfp = open_socket(lsa);
371         if (ftpcmd(NULL, NULL, sfp) != 220)
372                 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
373
374         /*
375          * Splitting username:password pair,
376          * trying to log in
377          */
378         str = strchr(target->user, ':');
379         if (str)
380                 *str++ = '\0';
381         switch (ftpcmd("USER ", target->user, sfp)) {
382         case 230:
383                 break;
384         case 331:
385                 if (ftpcmd("PASS ", str, sfp) == 230)
386                         break;
387                 /* fall through (failed login) */
388         default:
389                 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
390         }
391
392         ftpcmd("TYPE I", NULL, sfp);
393
394         /*
395          * Querying file size
396          */
397         if (ftpcmd("SIZE ", target->path, sfp) == 213) {
398                 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
399                 if (G.content_len < 0 || errno) {
400                         bb_error_msg_and_die("SIZE value is garbage");
401                 }
402                 G.got_clen = 1;
403         }
404
405         /*
406          * Entering passive mode
407          */
408         if (ftpcmd("PASV", NULL, sfp) != 227) {
409  pasv_error:
410                 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
411         }
412         // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
413         // Server's IP is N1.N2.N3.N4 (we ignore it)
414         // Server's port for data connection is P1*256+P2
415         str = strrchr(G.wget_buf, ')');
416         if (str) str[0] = '\0';
417         str = strrchr(G.wget_buf, ',');
418         if (!str) goto pasv_error;
419         port = xatou_range(str+1, 0, 255);
420         *str = '\0';
421         str = strrchr(G.wget_buf, ',');
422         if (!str) goto pasv_error;
423         port += xatou_range(str+1, 0, 255) * 256;
424         set_nport(&lsa->u.sa, htons(port));
425
426         *dfpp = open_socket(lsa);
427
428         if (G.beg_range != 0) {
429                 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
430                 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
431                         G.content_len -= G.beg_range;
432                 else
433                         reset_beg_range_to_zero();
434         }
435
436         if (ftpcmd("RETR ", target->path, sfp) > 150)
437                 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
438
439         return sfp;
440 }
441
442 static void NOINLINE retrieve_file_data(FILE *dfp)
443 {
444 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
445 # if ENABLE_FEATURE_WGET_TIMEOUT
446         unsigned second_cnt;
447 # endif
448         struct pollfd polldata;
449
450         polldata.fd = fileno(dfp);
451         polldata.events = POLLIN | POLLPRI;
452 #endif
453         progress_meter(PROGRESS_START);
454
455         if (G.chunked)
456                 goto get_clen;
457
458         /* Loops only if chunked */
459         while (1) {
460
461 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
462                 /* Must use nonblocking I/O, otherwise fread will loop
463                  * and *block* until it reads full buffer,
464                  * which messes up progress bar and/or timeout logic.
465                  * Because of nonblocking I/O, we need to dance
466                  * very carefully around EAGAIN. See explanation at
467                  * clearerr() call.
468                  */
469                 ndelay_on(polldata.fd);
470 #endif
471                 while (1) {
472                         int n;
473                         unsigned rdsz;
474
475                         rdsz = sizeof(G.wget_buf);
476                         if (G.got_clen) {
477                                 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
478                                         if ((int)G.content_len <= 0)
479                                                 break;
480                                         rdsz = (unsigned)G.content_len;
481                                 }
482                         }
483
484 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
485 # if ENABLE_FEATURE_WGET_TIMEOUT
486                         second_cnt = G.timeout_seconds;
487 # endif
488                         while (1) {
489                                 if (safe_poll(&polldata, 1, 1000) != 0)
490                                         break; /* error, EOF, or data is available */
491 # if ENABLE_FEATURE_WGET_TIMEOUT
492                                 if (second_cnt != 0 && --second_cnt == 0) {
493                                         progress_meter(PROGRESS_END);
494                                         bb_error_msg_and_die("download timed out");
495                                 }
496 # endif
497                                 /* Needed for "stalled" indicator */
498                                 progress_meter(PROGRESS_BUMP);
499                         }
500
501                         /* fread internally uses read loop, which in our case
502                          * is usually exited when we get EAGAIN.
503                          * In this case, libc sets error marker on the stream.
504                          * Need to clear it before next fread to avoid possible
505                          * rare false positive ferror below. Rare because usually
506                          * fread gets more than zero bytes, and we don't fall
507                          * into if (n <= 0) ...
508                          */
509                         clearerr(dfp);
510                         errno = 0;
511 #endif
512                         n = fread(G.wget_buf, 1, rdsz, dfp);
513                         /* man fread:
514                          * If error occurs, or EOF is reached, the return value
515                          * is a short item count (or zero).
516                          * fread does not distinguish between EOF and error.
517                          */
518                         if (n <= 0) {
519 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
520                                 if (errno == EAGAIN) /* poll lied, there is no data? */
521                                         continue; /* yes */
522 #endif
523                                 if (ferror(dfp))
524                                         bb_perror_msg_and_die(bb_msg_read_error);
525                                 break; /* EOF, not error */
526                         }
527
528                         xwrite(G.output_fd, G.wget_buf, n);
529
530 #if ENABLE_FEATURE_WGET_STATUSBAR
531                         G.transferred += n;
532                         progress_meter(PROGRESS_BUMP);
533 #endif
534                         if (G.got_clen) {
535                                 G.content_len -= n;
536                                 if (G.content_len == 0)
537                                         break;
538                         }
539                 }
540 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
541                 clearerr(dfp);
542                 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
543 #endif
544                 if (!G.chunked)
545                         break;
546
547                 fgets_and_trim(dfp); /* Eat empty line */
548  get_clen:
549                 fgets_and_trim(dfp);
550                 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
551                 /* FIXME: error check? */
552                 if (G.content_len == 0)
553                         break; /* all done! */
554                 G.got_clen = 1;
555         }
556
557         /* Draw full bar and free its resources */
558         G.chunked = 0;  /* makes it show 100% even for chunked download */
559         G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
560         progress_meter(PROGRESS_END);
561 }
562
563 static void download_one_url(const char *url)
564 {
565         bool use_proxy;                 /* Use proxies if env vars are set  */
566         int redir_limit;
567         len_and_sockaddr *lsa;
568         FILE *sfp;                      /* socket to web/ftp server         */
569         FILE *dfp;                      /* socket to ftp server (data)      */
570         char *proxy = NULL;
571         char *fname_out_alloc;
572         char *redirected_path = NULL;
573         struct host_info server;
574         struct host_info target;
575
576         server.allocated = NULL;
577         target.allocated = NULL;
578         server.user = NULL;
579         target.user = NULL;
580
581         parse_url(url, &target);
582
583         /* Use the proxy if necessary */
584         use_proxy = (strcmp(G.proxy_flag, "off") != 0);
585         if (use_proxy) {
586                 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
587                 use_proxy = (proxy && proxy[0]);
588                 if (use_proxy)
589                         parse_url(proxy, &server);
590         }
591         if (!use_proxy) {
592                 server.port = target.port;
593                 if (ENABLE_FEATURE_IPV6) {
594                         //free(server.allocated); - can't be non-NULL
595                         server.host = server.allocated = xstrdup(target.host);
596                 } else {
597                         server.host = target.host;
598                 }
599         }
600
601         if (ENABLE_FEATURE_IPV6)
602                 strip_ipv6_scope_id(target.host);
603
604         /* If there was no -O FILE, guess output filename */
605         fname_out_alloc = NULL;
606         if (!(option_mask32 & WGET_OPT_OUTNAME)) {
607                 G.fname_out = bb_get_last_path_component_nostrip(target.path);
608                 /* handle "wget http://kernel.org//" */
609                 if (G.fname_out[0] == '/' || !G.fname_out[0])
610                         G.fname_out = (char*)"index.html";
611                 /* -P DIR is considered only if there was no -O FILE */
612                 else {
613                         if (G.dir_prefix)
614                                 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
615                         else {
616                                 /* redirects may free target.path later, need to make a copy */
617                                 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
618                         }
619                 }
620         }
621 #if ENABLE_FEATURE_WGET_STATUSBAR
622         G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
623 #endif
624
625         /* Determine where to start transfer */
626         G.beg_range = 0;
627         if (option_mask32 & WGET_OPT_CONTINUE) {
628                 G.output_fd = open(G.fname_out, O_WRONLY);
629                 if (G.output_fd >= 0) {
630                         G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
631                 }
632                 /* File doesn't exist. We do not create file here yet.
633                  * We are not sure it exists on remote side */
634         }
635
636         redir_limit = 5;
637  resolve_lsa:
638         lsa = xhost2sockaddr(server.host, server.port);
639         if (!(option_mask32 & WGET_OPT_QUIET)) {
640                 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
641                 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
642                 free(s);
643         }
644  establish_session:
645         /*G.content_len = 0; - redundant, got_clen = 0 is enough */
646         G.got_clen = 0;
647         G.chunked = 0;
648         if (use_proxy || !target.is_ftp) {
649                 /*
650                  *  HTTP session
651                  */
652                 char *str;
653                 int status;
654
655
656                 /* Open socket to http server */
657                 sfp = open_socket(lsa);
658
659                 /* Send HTTP request */
660                 if (use_proxy) {
661                         fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
662                                 target.is_ftp ? "f" : "ht", target.host,
663                                 target.path);
664                 } else {
665                         if (option_mask32 & WGET_OPT_POST_DATA)
666                                 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
667                         else
668                                 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
669                 }
670
671                 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
672                         target.host, G.user_agent);
673
674                 /* Ask server to close the connection as soon as we are done
675                  * (IOW: we do not intend to send more requests)
676                  */
677                 fprintf(sfp, "Connection: close\r\n");
678
679 #if ENABLE_FEATURE_WGET_AUTHENTICATION
680                 if (target.user) {
681                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
682                                 base64enc(target.user));
683                 }
684                 if (use_proxy && server.user) {
685                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
686                                 base64enc(server.user));
687                 }
688 #endif
689
690                 if (G.beg_range != 0)
691                         fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
692
693 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
694                 if (G.extra_headers)
695                         fputs(G.extra_headers, sfp);
696
697                 if (option_mask32 & WGET_OPT_POST_DATA) {
698                         fprintf(sfp,
699                                 "Content-Type: application/x-www-form-urlencoded\r\n"
700                                 "Content-Length: %u\r\n"
701                                 "\r\n"
702                                 "%s",
703                                 (int) strlen(G.post_data), G.post_data
704                         );
705                 } else
706 #endif
707                 {
708                         fprintf(sfp, "\r\n");
709                 }
710
711                 fflush(sfp);
712
713                 /*
714                  * Retrieve HTTP response line and check for "200" status code.
715                  */
716  read_response:
717                 fgets_and_trim(sfp);
718
719                 str = G.wget_buf;
720                 str = skip_non_whitespace(str);
721                 str = skip_whitespace(str);
722                 // FIXME: no error check
723                 // xatou wouldn't work: "200 OK"
724                 status = atoi(str);
725                 switch (status) {
726                 case 0:
727                 case 100:
728                         while (gethdr(sfp) != NULL)
729                                 /* eat all remaining headers */;
730                         goto read_response;
731                 case 200:
732 /*
733 Response 204 doesn't say "null file", it says "metadata
734 has changed but data didn't":
735
736 "10.2.5 204 No Content
737 The server has fulfilled the request but does not need to return
738 an entity-body, and might want to return updated metainformation.
739 The response MAY include new or updated metainformation in the form
740 of entity-headers, which if present SHOULD be associated with
741 the requested variant.
742
743 If the client is a user agent, it SHOULD NOT change its document
744 view from that which caused the request to be sent. This response
745 is primarily intended to allow input for actions to take place
746 without causing a change to the user agent's active document view,
747 although any new or updated metainformation SHOULD be applied
748 to the document currently in the user agent's active view.
749
750 The 204 response MUST NOT include a message-body, and thus
751 is always terminated by the first empty line after the header fields."
752
753 However, in real world it was observed that some web servers
754 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
755 */
756                 case 204:
757                         if (G.beg_range != 0) {
758                                 /* "Range:..." was not honored by the server.
759                                  * Restart download from the beginning.
760                                  */
761                                 reset_beg_range_to_zero();
762                         }
763                         break;
764                 case 300:  /* redirection */
765                 case 301:
766                 case 302:
767                 case 303:
768                         break;
769                 case 206: /* Partial Content */
770                         if (G.beg_range != 0)
771                                 /* "Range:..." worked. Good. */
772                                 break;
773                         /* Partial Content even though we did not ask for it??? */
774                         /* fall through */
775                 default:
776                         bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
777                 }
778
779                 /*
780                  * Retrieve HTTP headers.
781                  */
782                 while ((str = gethdr(sfp)) != NULL) {
783                         static const char keywords[] ALIGN1 =
784                                 "content-length\0""transfer-encoding\0""location\0";
785                         enum {
786                                 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
787                         };
788                         smalluint key;
789
790                         /* gethdr converted "FOO:" string to lowercase */
791
792                         /* strip trailing whitespace */
793                         char *s = strchrnul(str, '\0') - 1;
794                         while (s >= str && (*s == ' ' || *s == '\t')) {
795                                 *s = '\0';
796                                 s--;
797                         }
798                         key = index_in_strings(keywords, G.wget_buf) + 1;
799                         if (key == KEY_content_length) {
800                                 G.content_len = BB_STRTOOFF(str, NULL, 10);
801                                 if (G.content_len < 0 || errno) {
802                                         bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
803                                 }
804                                 G.got_clen = 1;
805                                 continue;
806                         }
807                         if (key == KEY_transfer_encoding) {
808                                 if (strcmp(str_tolower(str), "chunked") != 0)
809                                         bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
810                                 G.chunked = 1;
811                         }
812                         if (key == KEY_location && status >= 300) {
813                                 if (--redir_limit == 0)
814                                         bb_error_msg_and_die("too many redirections");
815                                 fclose(sfp);
816                                 if (str[0] == '/') {
817                                         free(redirected_path);
818                                         target.path = redirected_path = xstrdup(str+1);
819                                         /* lsa stays the same: it's on the same server */
820                                 } else {
821                                         parse_url(str, &target);
822                                         if (!use_proxy) {
823                                                 free(server.allocated);
824                                                 server.allocated = NULL;
825                                                 server.host = target.host;
826                                                 /* strip_ipv6_scope_id(target.host); - no! */
827                                                 /* we assume remote never gives us IPv6 addr with scope id */
828                                                 server.port = target.port;
829                                                 free(lsa);
830                                                 goto resolve_lsa;
831                                         } /* else: lsa stays the same: we use proxy */
832                                 }
833                                 goto establish_session;
834                         }
835                 }
836 //              if (status >= 300)
837 //                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
838
839                 /* For HTTP, data is pumped over the same connection */
840                 dfp = sfp;
841
842         } else {
843                 /*
844                  *  FTP session
845                  */
846                 sfp = prepare_ftp_session(&dfp, &target, lsa);
847         }
848
849         free(lsa);
850
851         if (!(option_mask32 & WGET_OPT_SPIDER)) {
852                 if (G.output_fd < 0)
853                         G.output_fd = xopen(G.fname_out, G.o_flags);
854                 retrieve_file_data(dfp);
855                 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
856                         xclose(G.output_fd);
857                         G.output_fd = -1;
858                 }
859         }
860
861         if (dfp != sfp) {
862                 /* It's ftp. Close data connection properly */
863                 fclose(dfp);
864                 if (ftpcmd(NULL, NULL, sfp) != 226)
865                         bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
866                 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
867         }
868         fclose(sfp);
869
870         free(server.allocated);
871         free(target.allocated);
872         free(fname_out_alloc);
873         free(redirected_path);
874 }
875
876 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
877 int wget_main(int argc UNUSED_PARAM, char **argv)
878 {
879 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
880         static const char wget_longopts[] ALIGN1 =
881                 /* name, has_arg, val */
882                 "continue\0"         No_argument       "c"
883 //FIXME: -s isn't --spider, it's --save-headers!
884                 "spider\0"           No_argument       "s"
885                 "quiet\0"            No_argument       "q"
886                 "output-document\0"  Required_argument "O"
887                 "directory-prefix\0" Required_argument "P"
888                 "proxy\0"            Required_argument "Y"
889                 "user-agent\0"       Required_argument "U"
890 #if ENABLE_FEATURE_WGET_TIMEOUT
891                 "timeout\0"          Required_argument "T"
892 #endif
893                 /* Ignored: */
894                 // "tries\0"            Required_argument "t"
895                 /* Ignored (we always use PASV): */
896                 "passive-ftp\0"      No_argument       "\xff"
897                 "header\0"           Required_argument "\xfe"
898                 "post-data\0"        Required_argument "\xfd"
899                 /* Ignored (we don't do ssl) */
900                 "no-check-certificate\0" No_argument   "\xfc"
901                 /* Ignored (we don't support caching) */
902                 "no-cache\0"         No_argument       "\xfb"
903                 ;
904 #endif
905
906 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
907         llist_t *headers_llist = NULL;
908 #endif
909
910         INIT_G();
911
912         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
913         G.proxy_flag = "on";   /* use proxies if env vars are set */
914         G.user_agent = "Wget"; /* "User-Agent" header field */
915
916 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
917         applet_long_options = wget_longopts;
918 #endif
919         opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
920         getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
921                 &G.fname_out, &G.dir_prefix,
922                 &G.proxy_flag, &G.user_agent,
923                 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
924                 NULL /* -t RETRIES */
925                 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
926                 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
927         );
928         argv += optind;
929
930 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
931         if (headers_llist) {
932                 int size = 1;
933                 char *cp;
934                 llist_t *ll = headers_llist;
935                 while (ll) {
936                         size += strlen(ll->data) + 2;
937                         ll = ll->link;
938                 }
939                 G.extra_headers = cp = xmalloc(size);
940                 while (headers_llist) {
941                         cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
942                 }
943         }
944 #endif
945
946         G.output_fd = -1;
947         G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
948         if (G.fname_out) { /* -O FILE ? */
949                 if (LONE_DASH(G.fname_out)) { /* -O - ? */
950                         G.output_fd = 1;
951                         option_mask32 &= ~WGET_OPT_CONTINUE;
952                 }
953                 /* compat with wget: -O FILE can overwrite */
954                 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
955         }
956
957         while (*argv)
958                 download_one_url(*argv++);
959
960         if (G.output_fd >= 0)
961                 xclose(G.output_fd);
962
963         return EXIT_SUCCESS;
964 }