wget: add a note about bug 3625
[oweals/busybox.git] / networking / wget.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * wget - retrieve a file using HTTP or FTP
4  *
5  * Chip Rosenthal Covad Communications <chip@laserlink.net>
6  * Licensed under GPLv2, see file LICENSE in this source tree.
7  *
8  * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9  * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
10  */
11
12 //usage:#define wget_trivial_usage
13 //usage:        IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage:       "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage:       "        [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 //usage:       "        [--no-check-certificate] [-U|--user-agent AGENT]"
17 //usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
18 //usage:        )
19 //usage:        IF_NOT_FEATURE_WGET_LONG_OPTIONS(
20 //usage:       "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
21 //usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
22 //usage:        )
23 //usage:#define wget_full_usage "\n\n"
24 //usage:       "Retrieve files via HTTP or FTP\n"
25 //usage:     "\nOptions:"
26 //usage:     "\n        -s      Spider mode - only check file existence"
27 //usage:     "\n        -c      Continue retrieval of aborted transfer"
28 //usage:     "\n        -q      Quiet"
29 //usage:     "\n        -P DIR  Save to DIR (default .)"
30 //usage:        IF_FEATURE_WGET_TIMEOUT(
31 //usage:     "\n        -T SEC  Network read timeout is SEC seconds"
32 //usage:        )
33 //usage:     "\n        -O FILE Save to FILE ('-' for stdout)"
34 //usage:     "\n        -U STR  Use STR for User-Agent header"
35 //usage:     "\n        -Y      Use proxy ('on' or 'off')"
36
37 #include "libbb.h"
38
39 //#define log_io(...) bb_error_msg(__VA_ARGS__)
40 #define log_io(...) ((void)0)
41
42
43 struct host_info {
44         char *allocated;
45         const char *path;
46         const char *user;
47         char       *host;
48         int         port;
49         smallint    is_ftp;
50 };
51
52
53 /* Globals */
54 struct globals {
55         off_t content_len;        /* Content-length of the file */
56         off_t beg_range;          /* Range at which continue begins */
57 #if ENABLE_FEATURE_WGET_STATUSBAR
58         off_t transferred;        /* Number of bytes transferred so far */
59         const char *curfile;      /* Name of current file being transferred */
60         bb_progress_t pmt;
61 #endif
62         char *dir_prefix;
63 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
64         char *post_data;
65         char *extra_headers;
66 #endif
67         char *fname_out;        /* where to direct output (-O) */
68         const char *proxy_flag; /* Use proxies if env vars are set */
69         const char *user_agent; /* "User-Agent" header field */
70 #if ENABLE_FEATURE_WGET_TIMEOUT
71         unsigned timeout_seconds;
72 #endif
73         int output_fd;
74         int o_flags;
75         smallint chunked;         /* chunked transfer encoding */
76         smallint got_clen;        /* got content-length: from server  */
77         /* Local downloads do benefit from big buffer.
78          * With 512 byte buffer, it was measured to be
79          * an order of magnitude slower than with big one.
80          */
81         uint64_t just_to_align_next_member;
82         char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
83 } FIX_ALIASING;
84 #define G (*ptr_to_globals)
85 #define INIT_G() do { \
86         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
87         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
88 } while (0)
89
90
91 /* Must match option string! */
92 enum {
93         WGET_OPT_CONTINUE   = (1 << 0),
94         WGET_OPT_SPIDER     = (1 << 1),
95         WGET_OPT_QUIET      = (1 << 2),
96         WGET_OPT_OUTNAME    = (1 << 3),
97         WGET_OPT_PREFIX     = (1 << 4),
98         WGET_OPT_PROXY      = (1 << 5),
99         WGET_OPT_USER_AGENT = (1 << 6),
100         WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
101         WGET_OPT_RETRIES    = (1 << 8),
102         WGET_OPT_PASSIVE    = (1 << 9),
103         WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
104         WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
105 };
106
107 enum {
108         PROGRESS_START = -1,
109         PROGRESS_END   = 0,
110         PROGRESS_BUMP  = 1,
111 };
112 #if ENABLE_FEATURE_WGET_STATUSBAR
113 static void progress_meter(int flag)
114 {
115         if (option_mask32 & WGET_OPT_QUIET)
116                 return;
117
118         if (flag == PROGRESS_START)
119                 bb_progress_init(&G.pmt, G.curfile);
120
121         bb_progress_update(&G.pmt,
122                         G.beg_range,
123                         G.transferred,
124                         (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
125         );
126
127         if (flag == PROGRESS_END) {
128                 bb_progress_free(&G.pmt);
129                 bb_putchar_stderr('\n');
130                 G.transferred = 0;
131         }
132 }
133 #else
134 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
135 #endif
136
137
138 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
139  * local addresses can have a scope identifier to specify the
140  * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
141  * identifier is only valid on a single node.
142  *
143  * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
144  * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
145  * in the Host header as invalid requests, see
146  * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
147  */
148 static void strip_ipv6_scope_id(char *host)
149 {
150         char *scope, *cp;
151
152         /* bbox wget actually handles IPv6 addresses without [], like
153          * wget "http://::1/xxx", but this is not standard.
154          * To save code, _here_ we do not support it. */
155
156         if (host[0] != '[')
157                 return; /* not IPv6 */
158
159         scope = strchr(host, '%');
160         if (!scope)
161                 return;
162
163         /* Remove the IPv6 zone identifier from the host address */
164         cp = strchr(host, ']');
165         if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
166                 /* malformed address (not "[xx]:nn" or "[xx]") */
167                 return;
168         }
169
170         /* cp points to "]...", scope points to "%eth0]..." */
171         overlapping_strcpy(scope, cp);
172 }
173
174 #if ENABLE_FEATURE_WGET_AUTHENTICATION
175 /* Base64-encode character string. */
176 static char *base64enc(const char *str)
177 {
178         unsigned len = strlen(str);
179         if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
180                 len = sizeof(G.wget_buf)/4*3 - 10;
181         bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
182         return G.wget_buf;
183 }
184 #endif
185
186 static char* sanitize_string(char *s)
187 {
188         unsigned char *p = (void *) s;
189         while (*p >= ' ')
190                 p++;
191         *p = '\0';
192         return s;
193 }
194
195 static FILE *open_socket(len_and_sockaddr *lsa)
196 {
197         FILE *fp;
198
199         /* glibc 2.4 seems to try seeking on it - ??! */
200         /* hopefully it understands what ESPIPE means... */
201         fp = fdopen(xconnect_stream(lsa), "r+");
202         if (fp == NULL)
203                 bb_perror_msg_and_die(bb_msg_memory_exhausted);
204
205         return fp;
206 }
207
208 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
209 static char fgets_and_trim(FILE *fp)
210 {
211         char c;
212         char *buf_ptr;
213
214         if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
215                 bb_perror_msg_and_die("error getting response");
216
217         buf_ptr = strchrnul(G.wget_buf, '\n');
218         c = *buf_ptr;
219         *buf_ptr = '\0';
220         buf_ptr = strchrnul(G.wget_buf, '\r');
221         *buf_ptr = '\0';
222
223         log_io("< %s", G.wget_buf);
224
225         return c;
226 }
227
228 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
229 {
230         int result;
231         if (s1) {
232                 if (!s2)
233                         s2 = "";
234                 fprintf(fp, "%s%s\r\n", s1, s2);
235                 fflush(fp);
236                 log_io("> %s%s", s1, s2);
237         }
238
239         do {
240                 fgets_and_trim(fp);
241         } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
242
243         G.wget_buf[3] = '\0';
244         result = xatoi_positive(G.wget_buf);
245         G.wget_buf[3] = ' ';
246         return result;
247 }
248
249 static void parse_url(const char *src_url, struct host_info *h)
250 {
251         char *url, *p, *sp;
252
253         free(h->allocated);
254         h->allocated = url = xstrdup(src_url);
255
256         if (strncmp(url, "http://", 7) == 0) {
257                 h->port = bb_lookup_port("http", "tcp", 80);
258                 h->host = url + 7;
259                 h->is_ftp = 0;
260         } else if (strncmp(url, "ftp://", 6) == 0) {
261                 h->port = bb_lookup_port("ftp", "tcp", 21);
262                 h->host = url + 6;
263                 h->is_ftp = 1;
264         } else
265                 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
266
267         // FYI:
268         // "Real" wget 'http://busybox.net?var=a/b' sends this request:
269         //   'GET /?var=a/b HTTP 1.0'
270         //   and saves 'index.html?var=a%2Fb' (we save 'b')
271         // wget 'http://busybox.net?login=john@doe':
272         //   request: 'GET /?login=john@doe HTTP/1.0'
273         //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
274         // wget 'http://busybox.net#test/test':
275         //   request: 'GET / HTTP/1.0'
276         //   saves: 'index.html' (we save 'test')
277         //
278         // We also don't add unique .N suffix if file exists...
279         sp = strchr(h->host, '/');
280         p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
281         p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
282         if (!sp) {
283                 h->path = "";
284         } else if (*sp == '/') {
285                 *sp = '\0';
286                 h->path = sp + 1;
287         } else { // '#' or '?'
288                 // http://busybox.net?login=john@doe is a valid URL
289                 // memmove converts to:
290                 // http:/busybox.nett?login=john@doe...
291                 memmove(h->host - 1, h->host, sp - h->host);
292                 h->host--;
293                 sp[-1] = '\0';
294                 h->path = sp;
295         }
296
297         // We used to set h->user to NULL here, but this interferes
298         // with handling of code 302 ("object was moved")
299
300         sp = strrchr(h->host, '@');
301         if (sp != NULL) {
302                 h->user = h->host;
303                 *sp = '\0';
304                 h->host = sp + 1;
305         }
306
307         sp = h->host;
308 }
309
310 static char *gethdr(FILE *fp)
311 {
312         char *s, *hdrval;
313         int c;
314
315         /* *istrunc = 0; */
316
317         /* retrieve header line */
318         c = fgets_and_trim(fp);
319
320         /* end of the headers? */
321         if (G.wget_buf[0] == '\0')
322                 return NULL;
323
324         /* convert the header name to lower case */
325         for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
326                 /* tolower for "A-Z", no-op for "0-9a-z-." */
327                 *s |= 0x20;
328         }
329
330         /* verify we are at the end of the header name */
331         if (*s != ':')
332                 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
333
334         /* locate the start of the header value */
335         *s++ = '\0';
336         hdrval = skip_whitespace(s);
337
338         if (c != '\n') {
339                 /* Rats! The buffer isn't big enough to hold the entire header value */
340                 while (c = getc(fp), c != EOF && c != '\n')
341                         continue;
342         }
343
344         return hdrval;
345 }
346
347 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
348 {
349         FILE *sfp;
350         char *str;
351         int port;
352
353         if (!target->user)
354                 target->user = xstrdup("anonymous:busybox@");
355
356         sfp = open_socket(lsa);
357         if (ftpcmd(NULL, NULL, sfp) != 220)
358                 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
359
360         /*
361          * Splitting username:password pair,
362          * trying to log in
363          */
364         str = strchr(target->user, ':');
365         if (str)
366                 *str++ = '\0';
367         switch (ftpcmd("USER ", target->user, sfp)) {
368         case 230:
369                 break;
370         case 331:
371                 if (ftpcmd("PASS ", str, sfp) == 230)
372                         break;
373                 /* fall through (failed login) */
374         default:
375                 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
376         }
377
378         ftpcmd("TYPE I", NULL, sfp);
379
380         /*
381          * Querying file size
382          */
383         if (ftpcmd("SIZE ", target->path, sfp) == 213) {
384                 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
385                 if (G.content_len < 0 || errno) {
386                         bb_error_msg_and_die("SIZE value is garbage");
387                 }
388                 G.got_clen = 1;
389         }
390
391         /*
392          * Entering passive mode
393          */
394         if (ftpcmd("PASV", NULL, sfp) != 227) {
395  pasv_error:
396                 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
397         }
398         // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
399         // Server's IP is N1.N2.N3.N4 (we ignore it)
400         // Server's port for data connection is P1*256+P2
401         str = strrchr(G.wget_buf, ')');
402         if (str) str[0] = '\0';
403         str = strrchr(G.wget_buf, ',');
404         if (!str) goto pasv_error;
405         port = xatou_range(str+1, 0, 255);
406         *str = '\0';
407         str = strrchr(G.wget_buf, ',');
408         if (!str) goto pasv_error;
409         port += xatou_range(str+1, 0, 255) * 256;
410         set_nport(&lsa->u.sa, htons(port));
411
412         *dfpp = open_socket(lsa);
413
414         if (G.beg_range) {
415                 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
416                 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
417                         G.content_len -= G.beg_range;
418         }
419
420         if (ftpcmd("RETR ", target->path, sfp) > 150)
421                 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
422
423         return sfp;
424 }
425
426 static void NOINLINE retrieve_file_data(FILE *dfp)
427 {
428 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
429 # if ENABLE_FEATURE_WGET_TIMEOUT
430         unsigned second_cnt;
431 # endif
432         struct pollfd polldata;
433
434         polldata.fd = fileno(dfp);
435         polldata.events = POLLIN | POLLPRI;
436 #endif
437         progress_meter(PROGRESS_START);
438
439         if (G.chunked)
440                 goto get_clen;
441
442         /* Loops only if chunked */
443         while (1) {
444
445 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
446                 /* Must use nonblocking I/O, otherwise fread will loop
447                  * and *block* until it reads full buffer,
448                  * which messes up progress bar and/or timeout logic.
449                  * Because of nonblocking I/O, we need to dance
450                  * very carefully around EAGAIN. See explanation at
451                  * clearerr() call.
452                  */
453                 ndelay_on(polldata.fd);
454 #endif
455                 while (1) {
456                         int n;
457                         unsigned rdsz;
458
459                         rdsz = sizeof(G.wget_buf);
460                         if (G.got_clen) {
461                                 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
462                                         if ((int)G.content_len <= 0)
463                                                 break;
464                                         rdsz = (unsigned)G.content_len;
465                                 }
466                         }
467
468 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
469 # if ENABLE_FEATURE_WGET_TIMEOUT
470                         second_cnt = G.timeout_seconds;
471 # endif
472                         while (1) {
473                                 if (safe_poll(&polldata, 1, 1000) != 0)
474                                         break; /* error, EOF, or data is available */
475 # if ENABLE_FEATURE_WGET_TIMEOUT
476                                 if (second_cnt != 0 && --second_cnt == 0) {
477                                         progress_meter(PROGRESS_END);
478                                         bb_error_msg_and_die("download timed out");
479                                 }
480 # endif
481                                 /* Needed for "stalled" indicator */
482                                 progress_meter(PROGRESS_BUMP);
483                         }
484
485                         /* fread internally uses read loop, which in our case
486                          * is usually exited when we get EAGAIN.
487                          * In this case, libc sets error marker on the stream.
488                          * Need to clear it before next fread to avoid possible
489                          * rare false positive ferror below. Rare because usually
490                          * fread gets more than zero bytes, and we don't fall
491                          * into if (n <= 0) ...
492                          */
493                         clearerr(dfp);
494                         errno = 0;
495 #endif
496                         n = fread(G.wget_buf, 1, rdsz, dfp);
497                         /* man fread:
498                          * If error occurs, or EOF is reached, the return value
499                          * is a short item count (or zero).
500                          * fread does not distinguish between EOF and error.
501                          */
502                         if (n <= 0) {
503 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
504                                 if (errno == EAGAIN) /* poll lied, there is no data? */
505                                         continue; /* yes */
506 #endif
507                                 if (ferror(dfp))
508                                         bb_perror_msg_and_die(bb_msg_read_error);
509                                 break; /* EOF, not error */
510                         }
511
512                         xwrite(G.output_fd, G.wget_buf, n);
513
514 #if ENABLE_FEATURE_WGET_STATUSBAR
515                         G.transferred += n;
516                         progress_meter(PROGRESS_BUMP);
517 #endif
518                         if (G.got_clen) {
519                                 G.content_len -= n;
520                                 if (G.content_len == 0)
521                                         break;
522                         }
523                 }
524 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
525                 clearerr(dfp);
526                 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
527 #endif
528                 if (!G.chunked)
529                         break;
530
531                 fgets_and_trim(dfp); /* Eat empty line */
532  get_clen:
533                 fgets_and_trim(dfp);
534                 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
535                 /* FIXME: error check? */
536                 if (G.content_len == 0)
537                         break; /* all done! */
538                 G.got_clen = 1;
539         }
540
541         /* Draw full bar and free its resources */
542         G.chunked = 0;  /* makes it show 100% even for chunked download */
543         G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
544         progress_meter(PROGRESS_END);
545 }
546
547 static void download_one_url(const char *url)
548 {
549         bool use_proxy;                 /* Use proxies if env vars are set  */
550         int redir_limit;
551         len_and_sockaddr *lsa;
552         FILE *sfp;                      /* socket to web/ftp server         */
553         FILE *dfp;                      /* socket to ftp server (data)      */
554         char *proxy = NULL;
555         char *fname_out_alloc;
556         struct host_info server;
557         struct host_info target;
558
559         server.allocated = NULL;
560         target.allocated = NULL;
561         server.user = NULL;
562         target.user = NULL;
563
564         parse_url(url, &target);
565
566         /* Use the proxy if necessary */
567         use_proxy = (strcmp(G.proxy_flag, "off") != 0);
568         if (use_proxy) {
569                 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
570                 use_proxy = (proxy && proxy[0]);
571                 if (use_proxy)
572                         parse_url(proxy, &server);
573         }
574         if (!use_proxy) {
575                 server.port = target.port;
576                 if (ENABLE_FEATURE_IPV6) {
577                         //free(server.allocated); - can't be non-NULL
578                         server.host = server.allocated = xstrdup(target.host);
579                 } else {
580                         server.host = target.host;
581                 }
582         }
583
584         if (ENABLE_FEATURE_IPV6)
585                 strip_ipv6_scope_id(target.host);
586
587         /* If there was no -O FILE, guess output filename */
588         fname_out_alloc = NULL;
589         if (!(option_mask32 & WGET_OPT_OUTNAME)) {
590                 G.fname_out = bb_get_last_path_component_nostrip(target.path);
591                 /* handle "wget http://kernel.org//" */
592                 if (G.fname_out[0] == '/' || !G.fname_out[0])
593                         G.fname_out = (char*)"index.html";
594                 /* -P DIR is considered only if there was no -O FILE */
595                 else {
596                         if (G.dir_prefix)
597                                 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
598                         else {
599                                 /* redirects may free target.path later, need to make a copy */
600                                 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
601                         }
602                 }
603         }
604 #if ENABLE_FEATURE_WGET_STATUSBAR
605         G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
606 #endif
607
608         /* Determine where to start transfer */
609         G.beg_range = 0;
610         if (option_mask32 & WGET_OPT_CONTINUE) {
611                 G.output_fd = open(G.fname_out, O_WRONLY);
612                 if (G.output_fd >= 0) {
613                         G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
614                 }
615                 /* File doesn't exist. We do not create file here yet.
616                  * We are not sure it exists on remote side */
617         }
618
619         redir_limit = 5;
620  resolve_lsa:
621         lsa = xhost2sockaddr(server.host, server.port);
622         if (!(option_mask32 & WGET_OPT_QUIET)) {
623                 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
624                 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
625                 free(s);
626         }
627  establish_session:
628         /*G.content_len = 0; - redundant, got_clen = 0 is enough */
629         G.got_clen = 0;
630         G.chunked = 0;
631         if (use_proxy || !target.is_ftp) {
632                 /*
633                  *  HTTP session
634                  */
635                 char *str;
636                 int status;
637
638
639                 /* Open socket to http server */
640                 sfp = open_socket(lsa);
641
642                 /* Send HTTP request */
643                 if (use_proxy) {
644                         fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
645                                 target.is_ftp ? "f" : "ht", target.host,
646                                 target.path);
647                 } else {
648                         if (option_mask32 & WGET_OPT_POST_DATA)
649                                 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
650                         else
651                                 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
652                 }
653
654                 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
655                         target.host, G.user_agent);
656
657                 /* Ask server to close the connection as soon as we are done
658                  * (IOW: we do not intend to send more requests)
659                  */
660                 fprintf(sfp, "Connection: close\r\n");
661
662 #if ENABLE_FEATURE_WGET_AUTHENTICATION
663                 if (target.user) {
664 //TODO: URL-decode "user:password" string before base64-encoding:
665 //wget http://test:my%20pass@example.com should send
666 // Authorization: Basic dGVzdDpteSBwYXNz
667 //which decodes to "test:my pass", instead of what we send now:
668 // Authorization: Basic dGVzdDpteSUyMHBhc3M=
669 //Can reuse decodeString() from httpd.c
670                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
671                                 base64enc(target.user));
672                 }
673                 if (use_proxy && server.user) {
674                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
675                                 base64enc(server.user));
676                 }
677 #endif
678
679                 if (G.beg_range)
680                         fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
681
682 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
683                 if (G.extra_headers)
684                         fputs(G.extra_headers, sfp);
685
686                 if (option_mask32 & WGET_OPT_POST_DATA) {
687                         fprintf(sfp,
688                                 "Content-Type: application/x-www-form-urlencoded\r\n"
689                                 "Content-Length: %u\r\n"
690                                 "\r\n"
691                                 "%s",
692                                 (int) strlen(G.post_data), G.post_data
693                         );
694                 } else
695 #endif
696                 {
697                         fprintf(sfp, "\r\n");
698                 }
699
700                 fflush(sfp);
701
702                 /*
703                  * Retrieve HTTP response line and check for "200" status code.
704                  */
705  read_response:
706                 fgets_and_trim(sfp);
707
708                 str = G.wget_buf;
709                 str = skip_non_whitespace(str);
710                 str = skip_whitespace(str);
711                 // FIXME: no error check
712                 // xatou wouldn't work: "200 OK"
713                 status = atoi(str);
714                 switch (status) {
715                 case 0:
716                 case 100:
717                         while (gethdr(sfp) != NULL)
718                                 /* eat all remaining headers */;
719                         goto read_response;
720                 case 200:
721 /*
722 Response 204 doesn't say "null file", it says "metadata
723 has changed but data didn't":
724
725 "10.2.5 204 No Content
726 The server has fulfilled the request but does not need to return
727 an entity-body, and might want to return updated metainformation.
728 The response MAY include new or updated metainformation in the form
729 of entity-headers, which if present SHOULD be associated with
730 the requested variant.
731
732 If the client is a user agent, it SHOULD NOT change its document
733 view from that which caused the request to be sent. This response
734 is primarily intended to allow input for actions to take place
735 without causing a change to the user agent's active document view,
736 although any new or updated metainformation SHOULD be applied
737 to the document currently in the user agent's active view.
738
739 The 204 response MUST NOT include a message-body, and thus
740 is always terminated by the first empty line after the header fields."
741
742 However, in real world it was observed that some web servers
743 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
744 */
745                 case 204:
746                         break;
747                 case 300:  /* redirection */
748                 case 301:
749                 case 302:
750                 case 303:
751                         break;
752                 case 206:
753                         if (G.beg_range)
754                                 break;
755                         /* fall through */
756                 default:
757                         bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
758                 }
759
760                 /*
761                  * Retrieve HTTP headers.
762                  */
763                 while ((str = gethdr(sfp)) != NULL) {
764                         static const char keywords[] ALIGN1 =
765                                 "content-length\0""transfer-encoding\0""location\0";
766                         enum {
767                                 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
768                         };
769                         smalluint key;
770
771                         /* gethdr converted "FOO:" string to lowercase */
772
773                         /* strip trailing whitespace */
774                         char *s = strchrnul(str, '\0') - 1;
775                         while (s >= str && (*s == ' ' || *s == '\t')) {
776                                 *s = '\0';
777                                 s--;
778                         }
779                         key = index_in_strings(keywords, G.wget_buf) + 1;
780                         if (key == KEY_content_length) {
781                                 G.content_len = BB_STRTOOFF(str, NULL, 10);
782                                 if (G.content_len < 0 || errno) {
783                                         bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
784                                 }
785                                 G.got_clen = 1;
786                                 continue;
787                         }
788                         if (key == KEY_transfer_encoding) {
789                                 if (strcmp(str_tolower(str), "chunked") != 0)
790                                         bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
791                                 G.chunked = 1;
792                         }
793                         if (key == KEY_location && status >= 300) {
794                                 if (--redir_limit == 0)
795                                         bb_error_msg_and_die("too many redirections");
796                                 fclose(sfp);
797                                 if (str[0] == '/') {
798                                         free(target.allocated);
799                                         target.path = target.allocated = xstrdup(str+1);
800                                         /* lsa stays the same: it's on the same server */
801                                 } else {
802                                         parse_url(str, &target);
803                                         if (!use_proxy) {
804                                                 free(server.allocated);
805                                                 server.allocated = NULL;
806                                                 server.host = target.host;
807                                                 /* strip_ipv6_scope_id(target.host); - no! */
808                                                 /* we assume remote never gives us IPv6 addr with scope id */
809                                                 server.port = target.port;
810                                                 free(lsa);
811                                                 goto resolve_lsa;
812                                         } /* else: lsa stays the same: we use proxy */
813                                 }
814                                 goto establish_session;
815                         }
816                 }
817 //              if (status >= 300)
818 //                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
819
820                 /* For HTTP, data is pumped over the same connection */
821                 dfp = sfp;
822
823         } else {
824                 /*
825                  *  FTP session
826                  */
827                 sfp = prepare_ftp_session(&dfp, &target, lsa);
828         }
829
830         free(lsa);
831
832         if (!(option_mask32 & WGET_OPT_SPIDER)) {
833                 if (G.output_fd < 0)
834                         G.output_fd = xopen(G.fname_out, G.o_flags);
835                 retrieve_file_data(dfp);
836                 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
837                         xclose(G.output_fd);
838                         G.output_fd = -1;
839                 }
840         }
841
842         if (dfp != sfp) {
843                 /* It's ftp. Close data connection properly */
844                 fclose(dfp);
845                 if (ftpcmd(NULL, NULL, sfp) != 226)
846                         bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
847                 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
848         }
849         fclose(sfp);
850
851         free(server.allocated);
852         free(target.allocated);
853         free(fname_out_alloc);
854 }
855
856 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
857 int wget_main(int argc UNUSED_PARAM, char **argv)
858 {
859 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
860         static const char wget_longopts[] ALIGN1 =
861                 /* name, has_arg, val */
862                 "continue\0"         No_argument       "c"
863 //FIXME: -s isn't --spider, it's --save-headers!
864                 "spider\0"           No_argument       "s"
865                 "quiet\0"            No_argument       "q"
866                 "output-document\0"  Required_argument "O"
867                 "directory-prefix\0" Required_argument "P"
868                 "proxy\0"            Required_argument "Y"
869                 "user-agent\0"       Required_argument "U"
870 #if ENABLE_FEATURE_WGET_TIMEOUT
871                 "timeout\0"          Required_argument "T"
872 #endif
873                 /* Ignored: */
874                 // "tries\0"            Required_argument "t"
875                 /* Ignored (we always use PASV): */
876                 "passive-ftp\0"      No_argument       "\xff"
877                 "header\0"           Required_argument "\xfe"
878                 "post-data\0"        Required_argument "\xfd"
879                 /* Ignored (we don't do ssl) */
880                 "no-check-certificate\0" No_argument   "\xfc"
881                 ;
882 #endif
883
884 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
885         llist_t *headers_llist = NULL;
886 #endif
887
888         INIT_G();
889
890         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
891         G.proxy_flag = "on";   /* use proxies if env vars are set */
892         G.user_agent = "Wget"; /* "User-Agent" header field */
893
894 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
895         applet_long_options = wget_longopts;
896 #endif
897         opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
898         getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
899                 &G.fname_out, &G.dir_prefix,
900                 &G.proxy_flag, &G.user_agent,
901                 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
902                 NULL /* -t RETRIES */
903                 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
904                 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
905         );
906         argv += optind;
907
908 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
909         if (headers_llist) {
910                 int size = 1;
911                 char *cp;
912                 llist_t *ll = headers_llist;
913                 while (ll) {
914                         size += strlen(ll->data) + 2;
915                         ll = ll->link;
916                 }
917                 G.extra_headers = cp = xmalloc(size);
918                 while (headers_llist) {
919                         cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
920                 }
921         }
922 #endif
923
924         G.output_fd = -1;
925         G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
926         if (G.fname_out) { /* -O FILE ? */
927                 if (LONE_DASH(G.fname_out)) { /* -O - ? */
928                         G.output_fd = 1;
929                         option_mask32 &= ~WGET_OPT_CONTINUE;
930                 }
931                 /* compat with wget: -O FILE can overwrite */
932                 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
933         }
934
935         while (*argv)
936                 download_one_url(*argv++);
937
938         if (G.output_fd >= 0)
939                 xclose(G.output_fd);
940
941         return EXIT_SUCCESS;
942 }