wget: add dummy --no-cache
[oweals/busybox.git] / networking / wget.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * wget - retrieve a file using HTTP or FTP
4  *
5  * Chip Rosenthal Covad Communications <chip@laserlink.net>
6  * Licensed under GPLv2, see file LICENSE in this source tree.
7  *
8  * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9  * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
10  */
11
12 //usage:#define wget_trivial_usage
13 //usage:        IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage:       "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage:       "        [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage:    "        [--no-check-certificate] [--no-cache]" */
18 //usage:       "        [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
19 //usage:        )
20 //usage:        IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage:       "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
23 //usage:        )
24 //usage:#define wget_full_usage "\n\n"
25 //usage:       "Retrieve files via HTTP or FTP\n"
26 //usage:     "\n        -s      Spider mode - only check file existence"
27 //usage:     "\n        -c      Continue retrieval of aborted transfer"
28 //usage:     "\n        -q      Quiet"
29 //usage:     "\n        -P DIR  Save to DIR (default .)"
30 //usage:        IF_FEATURE_WGET_TIMEOUT(
31 //usage:     "\n        -T SEC  Network read timeout is SEC seconds"
32 //usage:        )
33 //usage:     "\n        -O FILE Save to FILE ('-' for stdout)"
34 //usage:     "\n        -U STR  Use STR for User-Agent header"
35 //usage:     "\n        -Y      Use proxy ('on' or 'off')"
36
37 #include "libbb.h"
38
39 #if 0
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
41 #else
42 # define log_io(...) ((void)0)
43 #endif
44
45
46 struct host_info {
47         char *allocated;
48         const char *path;
49         const char *user;
50         char       *host;
51         int         port;
52         smallint    is_ftp;
53 };
54
55
56 /* Globals */
57 struct globals {
58         off_t content_len;        /* Content-length of the file */
59         off_t beg_range;          /* Range at which continue begins */
60 #if ENABLE_FEATURE_WGET_STATUSBAR
61         off_t transferred;        /* Number of bytes transferred so far */
62         const char *curfile;      /* Name of current file being transferred */
63         bb_progress_t pmt;
64 #endif
65         char *dir_prefix;
66 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
67         char *post_data;
68         char *extra_headers;
69 #endif
70         char *fname_out;        /* where to direct output (-O) */
71         const char *proxy_flag; /* Use proxies if env vars are set */
72         const char *user_agent; /* "User-Agent" header field */
73 #if ENABLE_FEATURE_WGET_TIMEOUT
74         unsigned timeout_seconds;
75 #endif
76         int output_fd;
77         int o_flags;
78         smallint chunked;         /* chunked transfer encoding */
79         smallint got_clen;        /* got content-length: from server  */
80         /* Local downloads do benefit from big buffer.
81          * With 512 byte buffer, it was measured to be
82          * an order of magnitude slower than with big one.
83          */
84         uint64_t just_to_align_next_member;
85         char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
86 } FIX_ALIASING;
87 #define G (*ptr_to_globals)
88 #define INIT_G() do { \
89         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
90         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
91 } while (0)
92
93
94 /* Must match option string! */
95 enum {
96         WGET_OPT_CONTINUE   = (1 << 0),
97         WGET_OPT_SPIDER     = (1 << 1),
98         WGET_OPT_QUIET      = (1 << 2),
99         WGET_OPT_OUTNAME    = (1 << 3),
100         WGET_OPT_PREFIX     = (1 << 4),
101         WGET_OPT_PROXY      = (1 << 5),
102         WGET_OPT_USER_AGENT = (1 << 6),
103         WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
104         WGET_OPT_RETRIES    = (1 << 8),
105         WGET_OPT_PASSIVE    = (1 << 9),
106         WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
107         WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
108 };
109
110 enum {
111         PROGRESS_START = -1,
112         PROGRESS_END   = 0,
113         PROGRESS_BUMP  = 1,
114 };
115 #if ENABLE_FEATURE_WGET_STATUSBAR
116 static void progress_meter(int flag)
117 {
118         if (option_mask32 & WGET_OPT_QUIET)
119                 return;
120
121         if (flag == PROGRESS_START)
122                 bb_progress_init(&G.pmt, G.curfile);
123
124         bb_progress_update(&G.pmt,
125                         G.beg_range,
126                         G.transferred,
127                         (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
128         );
129
130         if (flag == PROGRESS_END) {
131                 bb_progress_free(&G.pmt);
132                 bb_putchar_stderr('\n');
133                 G.transferred = 0;
134         }
135 }
136 #else
137 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
138 #endif
139
140
141 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
142  * local addresses can have a scope identifier to specify the
143  * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
144  * identifier is only valid on a single node.
145  *
146  * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
147  * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
148  * in the Host header as invalid requests, see
149  * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
150  */
151 static void strip_ipv6_scope_id(char *host)
152 {
153         char *scope, *cp;
154
155         /* bbox wget actually handles IPv6 addresses without [], like
156          * wget "http://::1/xxx", but this is not standard.
157          * To save code, _here_ we do not support it. */
158
159         if (host[0] != '[')
160                 return; /* not IPv6 */
161
162         scope = strchr(host, '%');
163         if (!scope)
164                 return;
165
166         /* Remove the IPv6 zone identifier from the host address */
167         cp = strchr(host, ']');
168         if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
169                 /* malformed address (not "[xx]:nn" or "[xx]") */
170                 return;
171         }
172
173         /* cp points to "]...", scope points to "%eth0]..." */
174         overlapping_strcpy(scope, cp);
175 }
176
177 #if ENABLE_FEATURE_WGET_AUTHENTICATION
178 /* Base64-encode character string. */
179 static char *base64enc(const char *str)
180 {
181         unsigned len = strlen(str);
182         if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
183                 len = sizeof(G.wget_buf)/4*3 - 10;
184         bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
185         return G.wget_buf;
186 }
187 #endif
188
189 static char* sanitize_string(char *s)
190 {
191         unsigned char *p = (void *) s;
192         while (*p >= ' ')
193                 p++;
194         *p = '\0';
195         return s;
196 }
197
198 static FILE *open_socket(len_and_sockaddr *lsa)
199 {
200         FILE *fp;
201
202         /* glibc 2.4 seems to try seeking on it - ??! */
203         /* hopefully it understands what ESPIPE means... */
204         fp = fdopen(xconnect_stream(lsa), "r+");
205         if (fp == NULL)
206                 bb_perror_msg_and_die(bb_msg_memory_exhausted);
207
208         return fp;
209 }
210
211 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
212 static char fgets_and_trim(FILE *fp)
213 {
214         char c;
215         char *buf_ptr;
216
217         if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
218                 bb_perror_msg_and_die("error getting response");
219
220         buf_ptr = strchrnul(G.wget_buf, '\n');
221         c = *buf_ptr;
222         *buf_ptr = '\0';
223         buf_ptr = strchrnul(G.wget_buf, '\r');
224         *buf_ptr = '\0';
225
226         log_io("< %s", G.wget_buf);
227
228         return c;
229 }
230
231 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
232 {
233         int result;
234         if (s1) {
235                 if (!s2)
236                         s2 = "";
237                 fprintf(fp, "%s%s\r\n", s1, s2);
238                 fflush(fp);
239                 log_io("> %s%s", s1, s2);
240         }
241
242         do {
243                 fgets_and_trim(fp);
244         } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
245
246         G.wget_buf[3] = '\0';
247         result = xatoi_positive(G.wget_buf);
248         G.wget_buf[3] = ' ';
249         return result;
250 }
251
252 static void parse_url(const char *src_url, struct host_info *h)
253 {
254         char *url, *p, *sp;
255
256         free(h->allocated);
257         h->allocated = url = xstrdup(src_url);
258
259         if (strncmp(url, "http://", 7) == 0) {
260                 h->port = bb_lookup_port("http", "tcp", 80);
261                 h->host = url + 7;
262                 h->is_ftp = 0;
263         } else if (strncmp(url, "ftp://", 6) == 0) {
264                 h->port = bb_lookup_port("ftp", "tcp", 21);
265                 h->host = url + 6;
266                 h->is_ftp = 1;
267         } else
268                 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
269
270         // FYI:
271         // "Real" wget 'http://busybox.net?var=a/b' sends this request:
272         //   'GET /?var=a/b HTTP 1.0'
273         //   and saves 'index.html?var=a%2Fb' (we save 'b')
274         // wget 'http://busybox.net?login=john@doe':
275         //   request: 'GET /?login=john@doe HTTP/1.0'
276         //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
277         // wget 'http://busybox.net#test/test':
278         //   request: 'GET / HTTP/1.0'
279         //   saves: 'index.html' (we save 'test')
280         //
281         // We also don't add unique .N suffix if file exists...
282         sp = strchr(h->host, '/');
283         p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
284         p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
285         if (!sp) {
286                 h->path = "";
287         } else if (*sp == '/') {
288                 *sp = '\0';
289                 h->path = sp + 1;
290         } else { // '#' or '?'
291                 // http://busybox.net?login=john@doe is a valid URL
292                 // memmove converts to:
293                 // http:/busybox.nett?login=john@doe...
294                 memmove(h->host - 1, h->host, sp - h->host);
295                 h->host--;
296                 sp[-1] = '\0';
297                 h->path = sp;
298         }
299
300         // We used to set h->user to NULL here, but this interferes
301         // with handling of code 302 ("object was moved")
302
303         sp = strrchr(h->host, '@');
304         if (sp != NULL) {
305                 // URL-decode "user:password" string before base64-encoding:
306                 // wget http://test:my%20pass@example.com should send
307                 // Authorization: Basic dGVzdDpteSBwYXNz
308                 // which decodes to "test:my pass".
309                 // Standard wget and curl do this too.
310                 *sp = '\0';
311                 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
312                 h->host = sp + 1;
313         }
314
315         sp = h->host;
316 }
317
318 static char *gethdr(FILE *fp)
319 {
320         char *s, *hdrval;
321         int c;
322
323         /* retrieve header line */
324         c = fgets_and_trim(fp);
325
326         /* end of the headers? */
327         if (G.wget_buf[0] == '\0')
328                 return NULL;
329
330         /* convert the header name to lower case */
331         for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
332                 /* tolower for "A-Z", no-op for "0-9a-z-." */
333                 *s |= 0x20;
334         }
335
336         /* verify we are at the end of the header name */
337         if (*s != ':')
338                 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
339
340         /* locate the start of the header value */
341         *s++ = '\0';
342         hdrval = skip_whitespace(s);
343
344         if (c != '\n') {
345                 /* Rats! The buffer isn't big enough to hold the entire header value */
346                 while (c = getc(fp), c != EOF && c != '\n')
347                         continue;
348         }
349
350         return hdrval;
351 }
352
353 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
354 {
355         FILE *sfp;
356         char *str;
357         int port;
358
359         if (!target->user)
360                 target->user = xstrdup("anonymous:busybox@");
361
362         sfp = open_socket(lsa);
363         if (ftpcmd(NULL, NULL, sfp) != 220)
364                 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
365
366         /*
367          * Splitting username:password pair,
368          * trying to log in
369          */
370         str = strchr(target->user, ':');
371         if (str)
372                 *str++ = '\0';
373         switch (ftpcmd("USER ", target->user, sfp)) {
374         case 230:
375                 break;
376         case 331:
377                 if (ftpcmd("PASS ", str, sfp) == 230)
378                         break;
379                 /* fall through (failed login) */
380         default:
381                 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
382         }
383
384         ftpcmd("TYPE I", NULL, sfp);
385
386         /*
387          * Querying file size
388          */
389         if (ftpcmd("SIZE ", target->path, sfp) == 213) {
390                 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
391                 if (G.content_len < 0 || errno) {
392                         bb_error_msg_and_die("SIZE value is garbage");
393                 }
394                 G.got_clen = 1;
395         }
396
397         /*
398          * Entering passive mode
399          */
400         if (ftpcmd("PASV", NULL, sfp) != 227) {
401  pasv_error:
402                 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
403         }
404         // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
405         // Server's IP is N1.N2.N3.N4 (we ignore it)
406         // Server's port for data connection is P1*256+P2
407         str = strrchr(G.wget_buf, ')');
408         if (str) str[0] = '\0';
409         str = strrchr(G.wget_buf, ',');
410         if (!str) goto pasv_error;
411         port = xatou_range(str+1, 0, 255);
412         *str = '\0';
413         str = strrchr(G.wget_buf, ',');
414         if (!str) goto pasv_error;
415         port += xatou_range(str+1, 0, 255) * 256;
416         set_nport(&lsa->u.sa, htons(port));
417
418         *dfpp = open_socket(lsa);
419
420         if (G.beg_range != 0) {
421                 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
422                 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
423                         G.content_len -= G.beg_range;
424         }
425
426         if (ftpcmd("RETR ", target->path, sfp) > 150)
427                 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
428
429         return sfp;
430 }
431
432 static void NOINLINE retrieve_file_data(FILE *dfp)
433 {
434 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
435 # if ENABLE_FEATURE_WGET_TIMEOUT
436         unsigned second_cnt;
437 # endif
438         struct pollfd polldata;
439
440         polldata.fd = fileno(dfp);
441         polldata.events = POLLIN | POLLPRI;
442 #endif
443         progress_meter(PROGRESS_START);
444
445         if (G.chunked)
446                 goto get_clen;
447
448         /* Loops only if chunked */
449         while (1) {
450
451 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
452                 /* Must use nonblocking I/O, otherwise fread will loop
453                  * and *block* until it reads full buffer,
454                  * which messes up progress bar and/or timeout logic.
455                  * Because of nonblocking I/O, we need to dance
456                  * very carefully around EAGAIN. See explanation at
457                  * clearerr() call.
458                  */
459                 ndelay_on(polldata.fd);
460 #endif
461                 while (1) {
462                         int n;
463                         unsigned rdsz;
464
465                         rdsz = sizeof(G.wget_buf);
466                         if (G.got_clen) {
467                                 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
468                                         if ((int)G.content_len <= 0)
469                                                 break;
470                                         rdsz = (unsigned)G.content_len;
471                                 }
472                         }
473
474 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
475 # if ENABLE_FEATURE_WGET_TIMEOUT
476                         second_cnt = G.timeout_seconds;
477 # endif
478                         while (1) {
479                                 if (safe_poll(&polldata, 1, 1000) != 0)
480                                         break; /* error, EOF, or data is available */
481 # if ENABLE_FEATURE_WGET_TIMEOUT
482                                 if (second_cnt != 0 && --second_cnt == 0) {
483                                         progress_meter(PROGRESS_END);
484                                         bb_error_msg_and_die("download timed out");
485                                 }
486 # endif
487                                 /* Needed for "stalled" indicator */
488                                 progress_meter(PROGRESS_BUMP);
489                         }
490
491                         /* fread internally uses read loop, which in our case
492                          * is usually exited when we get EAGAIN.
493                          * In this case, libc sets error marker on the stream.
494                          * Need to clear it before next fread to avoid possible
495                          * rare false positive ferror below. Rare because usually
496                          * fread gets more than zero bytes, and we don't fall
497                          * into if (n <= 0) ...
498                          */
499                         clearerr(dfp);
500                         errno = 0;
501 #endif
502                         n = fread(G.wget_buf, 1, rdsz, dfp);
503                         /* man fread:
504                          * If error occurs, or EOF is reached, the return value
505                          * is a short item count (or zero).
506                          * fread does not distinguish between EOF and error.
507                          */
508                         if (n <= 0) {
509 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
510                                 if (errno == EAGAIN) /* poll lied, there is no data? */
511                                         continue; /* yes */
512 #endif
513                                 if (ferror(dfp))
514                                         bb_perror_msg_and_die(bb_msg_read_error);
515                                 break; /* EOF, not error */
516                         }
517
518                         xwrite(G.output_fd, G.wget_buf, n);
519
520 #if ENABLE_FEATURE_WGET_STATUSBAR
521                         G.transferred += n;
522                         progress_meter(PROGRESS_BUMP);
523 #endif
524                         if (G.got_clen) {
525                                 G.content_len -= n;
526                                 if (G.content_len == 0)
527                                         break;
528                         }
529                 }
530 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
531                 clearerr(dfp);
532                 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
533 #endif
534                 if (!G.chunked)
535                         break;
536
537                 fgets_and_trim(dfp); /* Eat empty line */
538  get_clen:
539                 fgets_and_trim(dfp);
540                 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
541                 /* FIXME: error check? */
542                 if (G.content_len == 0)
543                         break; /* all done! */
544                 G.got_clen = 1;
545         }
546
547         /* Draw full bar and free its resources */
548         G.chunked = 0;  /* makes it show 100% even for chunked download */
549         G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
550         progress_meter(PROGRESS_END);
551 }
552
553 static void download_one_url(const char *url)
554 {
555         bool use_proxy;                 /* Use proxies if env vars are set  */
556         int redir_limit;
557         len_and_sockaddr *lsa;
558         FILE *sfp;                      /* socket to web/ftp server         */
559         FILE *dfp;                      /* socket to ftp server (data)      */
560         char *proxy = NULL;
561         char *fname_out_alloc;
562         char *redirected_path = NULL;
563         struct host_info server;
564         struct host_info target;
565
566         server.allocated = NULL;
567         target.allocated = NULL;
568         server.user = NULL;
569         target.user = NULL;
570
571         parse_url(url, &target);
572
573         /* Use the proxy if necessary */
574         use_proxy = (strcmp(G.proxy_flag, "off") != 0);
575         if (use_proxy) {
576                 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
577                 use_proxy = (proxy && proxy[0]);
578                 if (use_proxy)
579                         parse_url(proxy, &server);
580         }
581         if (!use_proxy) {
582                 server.port = target.port;
583                 if (ENABLE_FEATURE_IPV6) {
584                         //free(server.allocated); - can't be non-NULL
585                         server.host = server.allocated = xstrdup(target.host);
586                 } else {
587                         server.host = target.host;
588                 }
589         }
590
591         if (ENABLE_FEATURE_IPV6)
592                 strip_ipv6_scope_id(target.host);
593
594         /* If there was no -O FILE, guess output filename */
595         fname_out_alloc = NULL;
596         if (!(option_mask32 & WGET_OPT_OUTNAME)) {
597                 G.fname_out = bb_get_last_path_component_nostrip(target.path);
598                 /* handle "wget http://kernel.org//" */
599                 if (G.fname_out[0] == '/' || !G.fname_out[0])
600                         G.fname_out = (char*)"index.html";
601                 /* -P DIR is considered only if there was no -O FILE */
602                 else {
603                         if (G.dir_prefix)
604                                 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
605                         else {
606                                 /* redirects may free target.path later, need to make a copy */
607                                 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
608                         }
609                 }
610         }
611 #if ENABLE_FEATURE_WGET_STATUSBAR
612         G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
613 #endif
614
615         /* Determine where to start transfer */
616         G.beg_range = 0;
617         if (option_mask32 & WGET_OPT_CONTINUE) {
618                 G.output_fd = open(G.fname_out, O_WRONLY);
619                 if (G.output_fd >= 0) {
620                         G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
621                 }
622                 /* File doesn't exist. We do not create file here yet.
623                  * We are not sure it exists on remote side */
624         }
625
626         redir_limit = 5;
627  resolve_lsa:
628         lsa = xhost2sockaddr(server.host, server.port);
629         if (!(option_mask32 & WGET_OPT_QUIET)) {
630                 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
631                 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
632                 free(s);
633         }
634  establish_session:
635         /*G.content_len = 0; - redundant, got_clen = 0 is enough */
636         G.got_clen = 0;
637         G.chunked = 0;
638         if (use_proxy || !target.is_ftp) {
639                 /*
640                  *  HTTP session
641                  */
642                 char *str;
643                 int status;
644
645
646                 /* Open socket to http server */
647                 sfp = open_socket(lsa);
648
649                 /* Send HTTP request */
650                 if (use_proxy) {
651                         fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
652                                 target.is_ftp ? "f" : "ht", target.host,
653                                 target.path);
654                 } else {
655                         if (option_mask32 & WGET_OPT_POST_DATA)
656                                 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
657                         else
658                                 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
659                 }
660
661                 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
662                         target.host, G.user_agent);
663
664                 /* Ask server to close the connection as soon as we are done
665                  * (IOW: we do not intend to send more requests)
666                  */
667                 fprintf(sfp, "Connection: close\r\n");
668
669 #if ENABLE_FEATURE_WGET_AUTHENTICATION
670                 if (target.user) {
671                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
672                                 base64enc(target.user));
673                 }
674                 if (use_proxy && server.user) {
675                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
676                                 base64enc(server.user));
677                 }
678 #endif
679
680                 if (G.beg_range != 0)
681                         fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
682
683 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
684                 if (G.extra_headers)
685                         fputs(G.extra_headers, sfp);
686
687                 if (option_mask32 & WGET_OPT_POST_DATA) {
688                         fprintf(sfp,
689                                 "Content-Type: application/x-www-form-urlencoded\r\n"
690                                 "Content-Length: %u\r\n"
691                                 "\r\n"
692                                 "%s",
693                                 (int) strlen(G.post_data), G.post_data
694                         );
695                 } else
696 #endif
697                 {
698                         fprintf(sfp, "\r\n");
699                 }
700
701                 fflush(sfp);
702
703                 /*
704                  * Retrieve HTTP response line and check for "200" status code.
705                  */
706  read_response:
707                 fgets_and_trim(sfp);
708
709                 str = G.wget_buf;
710                 str = skip_non_whitespace(str);
711                 str = skip_whitespace(str);
712                 // FIXME: no error check
713                 // xatou wouldn't work: "200 OK"
714                 status = atoi(str);
715                 switch (status) {
716                 case 0:
717                 case 100:
718                         while (gethdr(sfp) != NULL)
719                                 /* eat all remaining headers */;
720                         goto read_response;
721                 case 200:
722 /*
723 Response 204 doesn't say "null file", it says "metadata
724 has changed but data didn't":
725
726 "10.2.5 204 No Content
727 The server has fulfilled the request but does not need to return
728 an entity-body, and might want to return updated metainformation.
729 The response MAY include new or updated metainformation in the form
730 of entity-headers, which if present SHOULD be associated with
731 the requested variant.
732
733 If the client is a user agent, it SHOULD NOT change its document
734 view from that which caused the request to be sent. This response
735 is primarily intended to allow input for actions to take place
736 without causing a change to the user agent's active document view,
737 although any new or updated metainformation SHOULD be applied
738 to the document currently in the user agent's active view.
739
740 The 204 response MUST NOT include a message-body, and thus
741 is always terminated by the first empty line after the header fields."
742
743 However, in real world it was observed that some web servers
744 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
745 */
746                 case 204:
747                         break;
748                 case 300:  /* redirection */
749                 case 301:
750                 case 302:
751                 case 303:
752                         break;
753                 case 206: /* Partial Content */
754                         if (G.beg_range != 0)
755                                 /* "Range:..." worked. Good. */
756                                 break;
757                         /* Partial Content even though we did not ask for it??? */
758                         /* fall through */
759                 default:
760                         bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
761                 }
762
763                 /*
764                  * Retrieve HTTP headers.
765                  */
766                 while ((str = gethdr(sfp)) != NULL) {
767                         static const char keywords[] ALIGN1 =
768                                 "content-length\0""transfer-encoding\0""location\0";
769                         enum {
770                                 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
771                         };
772                         smalluint key;
773
774                         /* gethdr converted "FOO:" string to lowercase */
775
776                         /* strip trailing whitespace */
777                         char *s = strchrnul(str, '\0') - 1;
778                         while (s >= str && (*s == ' ' || *s == '\t')) {
779                                 *s = '\0';
780                                 s--;
781                         }
782                         key = index_in_strings(keywords, G.wget_buf) + 1;
783                         if (key == KEY_content_length) {
784                                 G.content_len = BB_STRTOOFF(str, NULL, 10);
785                                 if (G.content_len < 0 || errno) {
786                                         bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
787                                 }
788                                 G.got_clen = 1;
789                                 continue;
790                         }
791                         if (key == KEY_transfer_encoding) {
792                                 if (strcmp(str_tolower(str), "chunked") != 0)
793                                         bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
794                                 G.chunked = 1;
795                         }
796                         if (key == KEY_location && status >= 300) {
797                                 if (--redir_limit == 0)
798                                         bb_error_msg_and_die("too many redirections");
799                                 fclose(sfp);
800                                 if (str[0] == '/') {
801                                         free(redirected_path);
802                                         target.path = redirected_path = xstrdup(str+1);
803                                         /* lsa stays the same: it's on the same server */
804                                 } else {
805                                         parse_url(str, &target);
806                                         if (!use_proxy) {
807                                                 free(server.allocated);
808                                                 server.allocated = NULL;
809                                                 server.host = target.host;
810                                                 /* strip_ipv6_scope_id(target.host); - no! */
811                                                 /* we assume remote never gives us IPv6 addr with scope id */
812                                                 server.port = target.port;
813                                                 free(lsa);
814                                                 goto resolve_lsa;
815                                         } /* else: lsa stays the same: we use proxy */
816                                 }
817                                 goto establish_session;
818                         }
819                 }
820 //              if (status >= 300)
821 //                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
822
823                 /* For HTTP, data is pumped over the same connection */
824                 dfp = sfp;
825
826         } else {
827                 /*
828                  *  FTP session
829                  */
830                 sfp = prepare_ftp_session(&dfp, &target, lsa);
831         }
832
833         free(lsa);
834
835         if (!(option_mask32 & WGET_OPT_SPIDER)) {
836                 if (G.output_fd < 0)
837                         G.output_fd = xopen(G.fname_out, G.o_flags);
838                 retrieve_file_data(dfp);
839                 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
840                         xclose(G.output_fd);
841                         G.output_fd = -1;
842                 }
843         }
844
845         if (dfp != sfp) {
846                 /* It's ftp. Close data connection properly */
847                 fclose(dfp);
848                 if (ftpcmd(NULL, NULL, sfp) != 226)
849                         bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
850                 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
851         }
852         fclose(sfp);
853
854         free(server.allocated);
855         free(target.allocated);
856         free(fname_out_alloc);
857         free(redirected_path);
858 }
859
860 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
861 int wget_main(int argc UNUSED_PARAM, char **argv)
862 {
863 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
864         static const char wget_longopts[] ALIGN1 =
865                 /* name, has_arg, val */
866                 "continue\0"         No_argument       "c"
867 //FIXME: -s isn't --spider, it's --save-headers!
868                 "spider\0"           No_argument       "s"
869                 "quiet\0"            No_argument       "q"
870                 "output-document\0"  Required_argument "O"
871                 "directory-prefix\0" Required_argument "P"
872                 "proxy\0"            Required_argument "Y"
873                 "user-agent\0"       Required_argument "U"
874 #if ENABLE_FEATURE_WGET_TIMEOUT
875                 "timeout\0"          Required_argument "T"
876 #endif
877                 /* Ignored: */
878                 // "tries\0"            Required_argument "t"
879                 /* Ignored (we always use PASV): */
880                 "passive-ftp\0"      No_argument       "\xff"
881                 "header\0"           Required_argument "\xfe"
882                 "post-data\0"        Required_argument "\xfd"
883                 /* Ignored (we don't do ssl) */
884                 "no-check-certificate\0" No_argument   "\xfc"
885                 /* Ignored (we don't support caching) */
886                 "no-cache\0"         No_argument       "\xfb"
887                 ;
888 #endif
889
890 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
891         llist_t *headers_llist = NULL;
892 #endif
893
894         INIT_G();
895
896         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
897         G.proxy_flag = "on";   /* use proxies if env vars are set */
898         G.user_agent = "Wget"; /* "User-Agent" header field */
899
900 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
901         applet_long_options = wget_longopts;
902 #endif
903         opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
904         getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
905                 &G.fname_out, &G.dir_prefix,
906                 &G.proxy_flag, &G.user_agent,
907                 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
908                 NULL /* -t RETRIES */
909                 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
910                 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
911         );
912         argv += optind;
913
914 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
915         if (headers_llist) {
916                 int size = 1;
917                 char *cp;
918                 llist_t *ll = headers_llist;
919                 while (ll) {
920                         size += strlen(ll->data) + 2;
921                         ll = ll->link;
922                 }
923                 G.extra_headers = cp = xmalloc(size);
924                 while (headers_llist) {
925                         cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
926                 }
927         }
928 #endif
929
930         G.output_fd = -1;
931         G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
932         if (G.fname_out) { /* -O FILE ? */
933                 if (LONE_DASH(G.fname_out)) { /* -O - ? */
934                         G.output_fd = 1;
935                         option_mask32 &= ~WGET_OPT_CONTINUE;
936                 }
937                 /* compat with wget: -O FILE can overwrite */
938                 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
939         }
940
941         while (*argv)
942                 download_one_url(*argv++);
943
944         if (G.output_fd >= 0)
945                 xclose(G.output_fd);
946
947         return EXIT_SUCCESS;
948 }