6443705fde70f312de3703395a0aad23b421ef32
[oweals/busybox.git] / networking / wget.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * wget - retrieve a file using HTTP or FTP
4  *
5  * Chip Rosenthal Covad Communications <chip@laserlink.net>
6  * Licensed under GPLv2, see file LICENSE in this source tree.
7  *
8  * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9  * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
10  */
11
12 //usage:#define wget_trivial_usage
13 //usage:        IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage:       "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage:       "        [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 //usage:       "        [--no-check-certificate] [-U|--user-agent AGENT]"
17 //usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
18 //usage:        )
19 //usage:        IF_NOT_FEATURE_WGET_LONG_OPTIONS(
20 //usage:       "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
21 //usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
22 //usage:        )
23 //usage:#define wget_full_usage "\n\n"
24 //usage:       "Retrieve files via HTTP or FTP\n"
25 //usage:     "\n        -s      Spider mode - only check file existence"
26 //usage:     "\n        -c      Continue retrieval of aborted transfer"
27 //usage:     "\n        -q      Quiet"
28 //usage:     "\n        -P DIR  Save to DIR (default .)"
29 //usage:        IF_FEATURE_WGET_TIMEOUT(
30 //usage:     "\n        -T SEC  Network read timeout is SEC seconds"
31 //usage:        )
32 //usage:     "\n        -O FILE Save to FILE ('-' for stdout)"
33 //usage:     "\n        -U STR  Use STR for User-Agent header"
34 //usage:     "\n        -Y      Use proxy ('on' or 'off')"
35
36 #include "libbb.h"
37
38 //#define log_io(...) bb_error_msg(__VA_ARGS__)
39 #define log_io(...) ((void)0)
40
41
42 struct host_info {
43         char *allocated;
44         const char *path;
45         const char *user;
46         char       *host;
47         int         port;
48         smallint    is_ftp;
49 };
50
51
52 /* Globals */
53 struct globals {
54         off_t content_len;        /* Content-length of the file */
55         off_t beg_range;          /* Range at which continue begins */
56 #if ENABLE_FEATURE_WGET_STATUSBAR
57         off_t transferred;        /* Number of bytes transferred so far */
58         const char *curfile;      /* Name of current file being transferred */
59         bb_progress_t pmt;
60 #endif
61         char *dir_prefix;
62 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
63         char *post_data;
64         char *extra_headers;
65 #endif
66         char *fname_out;        /* where to direct output (-O) */
67         const char *proxy_flag; /* Use proxies if env vars are set */
68         const char *user_agent; /* "User-Agent" header field */
69 #if ENABLE_FEATURE_WGET_TIMEOUT
70         unsigned timeout_seconds;
71 #endif
72         int output_fd;
73         int o_flags;
74         smallint chunked;         /* chunked transfer encoding */
75         smallint got_clen;        /* got content-length: from server  */
76         /* Local downloads do benefit from big buffer.
77          * With 512 byte buffer, it was measured to be
78          * an order of magnitude slower than with big one.
79          */
80         uint64_t just_to_align_next_member;
81         char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
82 } FIX_ALIASING;
83 #define G (*ptr_to_globals)
84 #define INIT_G() do { \
85         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
86         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
87 } while (0)
88
89
90 /* Must match option string! */
91 enum {
92         WGET_OPT_CONTINUE   = (1 << 0),
93         WGET_OPT_SPIDER     = (1 << 1),
94         WGET_OPT_QUIET      = (1 << 2),
95         WGET_OPT_OUTNAME    = (1 << 3),
96         WGET_OPT_PREFIX     = (1 << 4),
97         WGET_OPT_PROXY      = (1 << 5),
98         WGET_OPT_USER_AGENT = (1 << 6),
99         WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
100         WGET_OPT_RETRIES    = (1 << 8),
101         WGET_OPT_PASSIVE    = (1 << 9),
102         WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
103         WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
104 };
105
106 enum {
107         PROGRESS_START = -1,
108         PROGRESS_END   = 0,
109         PROGRESS_BUMP  = 1,
110 };
111 #if ENABLE_FEATURE_WGET_STATUSBAR
112 static void progress_meter(int flag)
113 {
114         if (option_mask32 & WGET_OPT_QUIET)
115                 return;
116
117         if (flag == PROGRESS_START)
118                 bb_progress_init(&G.pmt, G.curfile);
119
120         bb_progress_update(&G.pmt,
121                         G.beg_range,
122                         G.transferred,
123                         (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
124         );
125
126         if (flag == PROGRESS_END) {
127                 bb_progress_free(&G.pmt);
128                 bb_putchar_stderr('\n');
129                 G.transferred = 0;
130         }
131 }
132 #else
133 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
134 #endif
135
136
137 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
138  * local addresses can have a scope identifier to specify the
139  * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
140  * identifier is only valid on a single node.
141  *
142  * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
143  * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
144  * in the Host header as invalid requests, see
145  * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
146  */
147 static void strip_ipv6_scope_id(char *host)
148 {
149         char *scope, *cp;
150
151         /* bbox wget actually handles IPv6 addresses without [], like
152          * wget "http://::1/xxx", but this is not standard.
153          * To save code, _here_ we do not support it. */
154
155         if (host[0] != '[')
156                 return; /* not IPv6 */
157
158         scope = strchr(host, '%');
159         if (!scope)
160                 return;
161
162         /* Remove the IPv6 zone identifier from the host address */
163         cp = strchr(host, ']');
164         if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
165                 /* malformed address (not "[xx]:nn" or "[xx]") */
166                 return;
167         }
168
169         /* cp points to "]...", scope points to "%eth0]..." */
170         overlapping_strcpy(scope, cp);
171 }
172
173 #if ENABLE_FEATURE_WGET_AUTHENTICATION
174 /* Base64-encode character string. */
175 static char *base64enc(const char *str)
176 {
177         unsigned len = strlen(str);
178         if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
179                 len = sizeof(G.wget_buf)/4*3 - 10;
180         bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
181         return G.wget_buf;
182 }
183 #endif
184
185 static char* sanitize_string(char *s)
186 {
187         unsigned char *p = (void *) s;
188         while (*p >= ' ')
189                 p++;
190         *p = '\0';
191         return s;
192 }
193
194 static FILE *open_socket(len_and_sockaddr *lsa)
195 {
196         FILE *fp;
197
198         /* glibc 2.4 seems to try seeking on it - ??! */
199         /* hopefully it understands what ESPIPE means... */
200         fp = fdopen(xconnect_stream(lsa), "r+");
201         if (fp == NULL)
202                 bb_perror_msg_and_die(bb_msg_memory_exhausted);
203
204         return fp;
205 }
206
207 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
208 static char fgets_and_trim(FILE *fp)
209 {
210         char c;
211         char *buf_ptr;
212
213         if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
214                 bb_perror_msg_and_die("error getting response");
215
216         buf_ptr = strchrnul(G.wget_buf, '\n');
217         c = *buf_ptr;
218         *buf_ptr = '\0';
219         buf_ptr = strchrnul(G.wget_buf, '\r');
220         *buf_ptr = '\0';
221
222         log_io("< %s", G.wget_buf);
223
224         return c;
225 }
226
227 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
228 {
229         int result;
230         if (s1) {
231                 if (!s2)
232                         s2 = "";
233                 fprintf(fp, "%s%s\r\n", s1, s2);
234                 fflush(fp);
235                 log_io("> %s%s", s1, s2);
236         }
237
238         do {
239                 fgets_and_trim(fp);
240         } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
241
242         G.wget_buf[3] = '\0';
243         result = xatoi_positive(G.wget_buf);
244         G.wget_buf[3] = ' ';
245         return result;
246 }
247
248 static void parse_url(const char *src_url, struct host_info *h)
249 {
250         char *url, *p, *sp;
251
252         free(h->allocated);
253         h->allocated = url = xstrdup(src_url);
254
255         if (strncmp(url, "http://", 7) == 0) {
256                 h->port = bb_lookup_port("http", "tcp", 80);
257                 h->host = url + 7;
258                 h->is_ftp = 0;
259         } else if (strncmp(url, "ftp://", 6) == 0) {
260                 h->port = bb_lookup_port("ftp", "tcp", 21);
261                 h->host = url + 6;
262                 h->is_ftp = 1;
263         } else
264                 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
265
266         // FYI:
267         // "Real" wget 'http://busybox.net?var=a/b' sends this request:
268         //   'GET /?var=a/b HTTP 1.0'
269         //   and saves 'index.html?var=a%2Fb' (we save 'b')
270         // wget 'http://busybox.net?login=john@doe':
271         //   request: 'GET /?login=john@doe HTTP/1.0'
272         //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
273         // wget 'http://busybox.net#test/test':
274         //   request: 'GET / HTTP/1.0'
275         //   saves: 'index.html' (we save 'test')
276         //
277         // We also don't add unique .N suffix if file exists...
278         sp = strchr(h->host, '/');
279         p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
280         p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
281         if (!sp) {
282                 h->path = "";
283         } else if (*sp == '/') {
284                 *sp = '\0';
285                 h->path = sp + 1;
286         } else { // '#' or '?'
287                 // http://busybox.net?login=john@doe is a valid URL
288                 // memmove converts to:
289                 // http:/busybox.nett?login=john@doe...
290                 memmove(h->host - 1, h->host, sp - h->host);
291                 h->host--;
292                 sp[-1] = '\0';
293                 h->path = sp;
294         }
295
296         // We used to set h->user to NULL here, but this interferes
297         // with handling of code 302 ("object was moved")
298
299         sp = strrchr(h->host, '@');
300         if (sp != NULL) {
301                 h->user = h->host;
302                 *sp = '\0';
303                 h->host = sp + 1;
304         }
305
306         sp = h->host;
307 }
308
309 static char *gethdr(FILE *fp)
310 {
311         char *s, *hdrval;
312         int c;
313
314         /* *istrunc = 0; */
315
316         /* retrieve header line */
317         c = fgets_and_trim(fp);
318
319         /* end of the headers? */
320         if (G.wget_buf[0] == '\0')
321                 return NULL;
322
323         /* convert the header name to lower case */
324         for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
325                 /* tolower for "A-Z", no-op for "0-9a-z-." */
326                 *s |= 0x20;
327         }
328
329         /* verify we are at the end of the header name */
330         if (*s != ':')
331                 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
332
333         /* locate the start of the header value */
334         *s++ = '\0';
335         hdrval = skip_whitespace(s);
336
337         if (c != '\n') {
338                 /* Rats! The buffer isn't big enough to hold the entire header value */
339                 while (c = getc(fp), c != EOF && c != '\n')
340                         continue;
341         }
342
343         return hdrval;
344 }
345
346 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
347 {
348         FILE *sfp;
349         char *str;
350         int port;
351
352         if (!target->user)
353                 target->user = xstrdup("anonymous:busybox@");
354
355         sfp = open_socket(lsa);
356         if (ftpcmd(NULL, NULL, sfp) != 220)
357                 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
358
359         /*
360          * Splitting username:password pair,
361          * trying to log in
362          */
363         str = strchr(target->user, ':');
364         if (str)
365                 *str++ = '\0';
366         switch (ftpcmd("USER ", target->user, sfp)) {
367         case 230:
368                 break;
369         case 331:
370                 if (ftpcmd("PASS ", str, sfp) == 230)
371                         break;
372                 /* fall through (failed login) */
373         default:
374                 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
375         }
376
377         ftpcmd("TYPE I", NULL, sfp);
378
379         /*
380          * Querying file size
381          */
382         if (ftpcmd("SIZE ", target->path, sfp) == 213) {
383                 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
384                 if (G.content_len < 0 || errno) {
385                         bb_error_msg_and_die("SIZE value is garbage");
386                 }
387                 G.got_clen = 1;
388         }
389
390         /*
391          * Entering passive mode
392          */
393         if (ftpcmd("PASV", NULL, sfp) != 227) {
394  pasv_error:
395                 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
396         }
397         // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
398         // Server's IP is N1.N2.N3.N4 (we ignore it)
399         // Server's port for data connection is P1*256+P2
400         str = strrchr(G.wget_buf, ')');
401         if (str) str[0] = '\0';
402         str = strrchr(G.wget_buf, ',');
403         if (!str) goto pasv_error;
404         port = xatou_range(str+1, 0, 255);
405         *str = '\0';
406         str = strrchr(G.wget_buf, ',');
407         if (!str) goto pasv_error;
408         port += xatou_range(str+1, 0, 255) * 256;
409         set_nport(&lsa->u.sa, htons(port));
410
411         *dfpp = open_socket(lsa);
412
413         if (G.beg_range) {
414                 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
415                 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
416                         G.content_len -= G.beg_range;
417         }
418
419         if (ftpcmd("RETR ", target->path, sfp) > 150)
420                 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
421
422         return sfp;
423 }
424
425 static void NOINLINE retrieve_file_data(FILE *dfp)
426 {
427 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
428 # if ENABLE_FEATURE_WGET_TIMEOUT
429         unsigned second_cnt;
430 # endif
431         struct pollfd polldata;
432
433         polldata.fd = fileno(dfp);
434         polldata.events = POLLIN | POLLPRI;
435 #endif
436         progress_meter(PROGRESS_START);
437
438         if (G.chunked)
439                 goto get_clen;
440
441         /* Loops only if chunked */
442         while (1) {
443
444 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
445                 /* Must use nonblocking I/O, otherwise fread will loop
446                  * and *block* until it reads full buffer,
447                  * which messes up progress bar and/or timeout logic.
448                  * Because of nonblocking I/O, we need to dance
449                  * very carefully around EAGAIN. See explanation at
450                  * clearerr() call.
451                  */
452                 ndelay_on(polldata.fd);
453 #endif
454                 while (1) {
455                         int n;
456                         unsigned rdsz;
457
458                         rdsz = sizeof(G.wget_buf);
459                         if (G.got_clen) {
460                                 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
461                                         if ((int)G.content_len <= 0)
462                                                 break;
463                                         rdsz = (unsigned)G.content_len;
464                                 }
465                         }
466
467 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
468 # if ENABLE_FEATURE_WGET_TIMEOUT
469                         second_cnt = G.timeout_seconds;
470 # endif
471                         while (1) {
472                                 if (safe_poll(&polldata, 1, 1000) != 0)
473                                         break; /* error, EOF, or data is available */
474 # if ENABLE_FEATURE_WGET_TIMEOUT
475                                 if (second_cnt != 0 && --second_cnt == 0) {
476                                         progress_meter(PROGRESS_END);
477                                         bb_error_msg_and_die("download timed out");
478                                 }
479 # endif
480                                 /* Needed for "stalled" indicator */
481                                 progress_meter(PROGRESS_BUMP);
482                         }
483
484                         /* fread internally uses read loop, which in our case
485                          * is usually exited when we get EAGAIN.
486                          * In this case, libc sets error marker on the stream.
487                          * Need to clear it before next fread to avoid possible
488                          * rare false positive ferror below. Rare because usually
489                          * fread gets more than zero bytes, and we don't fall
490                          * into if (n <= 0) ...
491                          */
492                         clearerr(dfp);
493                         errno = 0;
494 #endif
495                         n = fread(G.wget_buf, 1, rdsz, dfp);
496                         /* man fread:
497                          * If error occurs, or EOF is reached, the return value
498                          * is a short item count (or zero).
499                          * fread does not distinguish between EOF and error.
500                          */
501                         if (n <= 0) {
502 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
503                                 if (errno == EAGAIN) /* poll lied, there is no data? */
504                                         continue; /* yes */
505 #endif
506                                 if (ferror(dfp))
507                                         bb_perror_msg_and_die(bb_msg_read_error);
508                                 break; /* EOF, not error */
509                         }
510
511                         xwrite(G.output_fd, G.wget_buf, n);
512
513 #if ENABLE_FEATURE_WGET_STATUSBAR
514                         G.transferred += n;
515                         progress_meter(PROGRESS_BUMP);
516 #endif
517                         if (G.got_clen) {
518                                 G.content_len -= n;
519                                 if (G.content_len == 0)
520                                         break;
521                         }
522                 }
523 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
524                 clearerr(dfp);
525                 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
526 #endif
527                 if (!G.chunked)
528                         break;
529
530                 fgets_and_trim(dfp); /* Eat empty line */
531  get_clen:
532                 fgets_and_trim(dfp);
533                 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
534                 /* FIXME: error check? */
535                 if (G.content_len == 0)
536                         break; /* all done! */
537                 G.got_clen = 1;
538         }
539
540         /* Draw full bar and free its resources */
541         G.chunked = 0;  /* makes it show 100% even for chunked download */
542         G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
543         progress_meter(PROGRESS_END);
544 }
545
546 static void download_one_url(const char *url)
547 {
548         bool use_proxy;                 /* Use proxies if env vars are set  */
549         int redir_limit;
550         len_and_sockaddr *lsa;
551         FILE *sfp;                      /* socket to web/ftp server         */
552         FILE *dfp;                      /* socket to ftp server (data)      */
553         char *proxy = NULL;
554         char *fname_out_alloc;
555         struct host_info server;
556         struct host_info target;
557
558         server.allocated = NULL;
559         target.allocated = NULL;
560         server.user = NULL;
561         target.user = NULL;
562
563         parse_url(url, &target);
564
565         /* Use the proxy if necessary */
566         use_proxy = (strcmp(G.proxy_flag, "off") != 0);
567         if (use_proxy) {
568                 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
569                 use_proxy = (proxy && proxy[0]);
570                 if (use_proxy)
571                         parse_url(proxy, &server);
572         }
573         if (!use_proxy) {
574                 server.port = target.port;
575                 if (ENABLE_FEATURE_IPV6) {
576                         //free(server.allocated); - can't be non-NULL
577                         server.host = server.allocated = xstrdup(target.host);
578                 } else {
579                         server.host = target.host;
580                 }
581         }
582
583         if (ENABLE_FEATURE_IPV6)
584                 strip_ipv6_scope_id(target.host);
585
586         /* If there was no -O FILE, guess output filename */
587         fname_out_alloc = NULL;
588         if (!(option_mask32 & WGET_OPT_OUTNAME)) {
589                 G.fname_out = bb_get_last_path_component_nostrip(target.path);
590                 /* handle "wget http://kernel.org//" */
591                 if (G.fname_out[0] == '/' || !G.fname_out[0])
592                         G.fname_out = (char*)"index.html";
593                 /* -P DIR is considered only if there was no -O FILE */
594                 else {
595                         if (G.dir_prefix)
596                                 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
597                         else {
598                                 /* redirects may free target.path later, need to make a copy */
599                                 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
600                         }
601                 }
602         }
603 #if ENABLE_FEATURE_WGET_STATUSBAR
604         G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
605 #endif
606
607         /* Determine where to start transfer */
608         G.beg_range = 0;
609         if (option_mask32 & WGET_OPT_CONTINUE) {
610                 G.output_fd = open(G.fname_out, O_WRONLY);
611                 if (G.output_fd >= 0) {
612                         G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
613                 }
614                 /* File doesn't exist. We do not create file here yet.
615                  * We are not sure it exists on remote side */
616         }
617
618         redir_limit = 5;
619  resolve_lsa:
620         lsa = xhost2sockaddr(server.host, server.port);
621         if (!(option_mask32 & WGET_OPT_QUIET)) {
622                 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
623                 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
624                 free(s);
625         }
626  establish_session:
627         /*G.content_len = 0; - redundant, got_clen = 0 is enough */
628         G.got_clen = 0;
629         G.chunked = 0;
630         if (use_proxy || !target.is_ftp) {
631                 /*
632                  *  HTTP session
633                  */
634                 char *str;
635                 int status;
636
637
638                 /* Open socket to http server */
639                 sfp = open_socket(lsa);
640
641                 /* Send HTTP request */
642                 if (use_proxy) {
643                         fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
644                                 target.is_ftp ? "f" : "ht", target.host,
645                                 target.path);
646                 } else {
647                         if (option_mask32 & WGET_OPT_POST_DATA)
648                                 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
649                         else
650                                 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
651                 }
652
653                 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
654                         target.host, G.user_agent);
655
656                 /* Ask server to close the connection as soon as we are done
657                  * (IOW: we do not intend to send more requests)
658                  */
659                 fprintf(sfp, "Connection: close\r\n");
660
661 #if ENABLE_FEATURE_WGET_AUTHENTICATION
662                 if (target.user) {
663 //TODO: URL-decode "user:password" string before base64-encoding:
664 //wget http://test:my%20pass@example.com should send
665 // Authorization: Basic dGVzdDpteSBwYXNz
666 //which decodes to "test:my pass", instead of what we send now:
667 // Authorization: Basic dGVzdDpteSUyMHBhc3M=
668 //Can reuse decodeString() from httpd.c
669                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
670                                 base64enc(target.user));
671                 }
672                 if (use_proxy && server.user) {
673                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
674                                 base64enc(server.user));
675                 }
676 #endif
677
678                 if (G.beg_range)
679                         fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
680
681 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
682                 if (G.extra_headers)
683                         fputs(G.extra_headers, sfp);
684
685                 if (option_mask32 & WGET_OPT_POST_DATA) {
686                         fprintf(sfp,
687                                 "Content-Type: application/x-www-form-urlencoded\r\n"
688                                 "Content-Length: %u\r\n"
689                                 "\r\n"
690                                 "%s",
691                                 (int) strlen(G.post_data), G.post_data
692                         );
693                 } else
694 #endif
695                 {
696                         fprintf(sfp, "\r\n");
697                 }
698
699                 fflush(sfp);
700
701                 /*
702                  * Retrieve HTTP response line and check for "200" status code.
703                  */
704  read_response:
705                 fgets_and_trim(sfp);
706
707                 str = G.wget_buf;
708                 str = skip_non_whitespace(str);
709                 str = skip_whitespace(str);
710                 // FIXME: no error check
711                 // xatou wouldn't work: "200 OK"
712                 status = atoi(str);
713                 switch (status) {
714                 case 0:
715                 case 100:
716                         while (gethdr(sfp) != NULL)
717                                 /* eat all remaining headers */;
718                         goto read_response;
719                 case 200:
720 /*
721 Response 204 doesn't say "null file", it says "metadata
722 has changed but data didn't":
723
724 "10.2.5 204 No Content
725 The server has fulfilled the request but does not need to return
726 an entity-body, and might want to return updated metainformation.
727 The response MAY include new or updated metainformation in the form
728 of entity-headers, which if present SHOULD be associated with
729 the requested variant.
730
731 If the client is a user agent, it SHOULD NOT change its document
732 view from that which caused the request to be sent. This response
733 is primarily intended to allow input for actions to take place
734 without causing a change to the user agent's active document view,
735 although any new or updated metainformation SHOULD be applied
736 to the document currently in the user agent's active view.
737
738 The 204 response MUST NOT include a message-body, and thus
739 is always terminated by the first empty line after the header fields."
740
741 However, in real world it was observed that some web servers
742 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
743 */
744                 case 204:
745                         break;
746                 case 300:  /* redirection */
747                 case 301:
748                 case 302:
749                 case 303:
750                         break;
751                 case 206:
752                         if (G.beg_range)
753                                 break;
754                         /* fall through */
755                 default:
756                         bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
757                 }
758
759                 /*
760                  * Retrieve HTTP headers.
761                  */
762                 while ((str = gethdr(sfp)) != NULL) {
763                         static const char keywords[] ALIGN1 =
764                                 "content-length\0""transfer-encoding\0""location\0";
765                         enum {
766                                 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
767                         };
768                         smalluint key;
769
770                         /* gethdr converted "FOO:" string to lowercase */
771
772                         /* strip trailing whitespace */
773                         char *s = strchrnul(str, '\0') - 1;
774                         while (s >= str && (*s == ' ' || *s == '\t')) {
775                                 *s = '\0';
776                                 s--;
777                         }
778                         key = index_in_strings(keywords, G.wget_buf) + 1;
779                         if (key == KEY_content_length) {
780                                 G.content_len = BB_STRTOOFF(str, NULL, 10);
781                                 if (G.content_len < 0 || errno) {
782                                         bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
783                                 }
784                                 G.got_clen = 1;
785                                 continue;
786                         }
787                         if (key == KEY_transfer_encoding) {
788                                 if (strcmp(str_tolower(str), "chunked") != 0)
789                                         bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
790                                 G.chunked = 1;
791                         }
792                         if (key == KEY_location && status >= 300) {
793                                 if (--redir_limit == 0)
794                                         bb_error_msg_and_die("too many redirections");
795                                 fclose(sfp);
796                                 if (str[0] == '/') {
797                                         free(target.allocated);
798                                         target.path = target.allocated = xstrdup(str+1);
799                                         /* lsa stays the same: it's on the same server */
800                                 } else {
801                                         parse_url(str, &target);
802                                         if (!use_proxy) {
803                                                 free(server.allocated);
804                                                 server.allocated = NULL;
805                                                 server.host = target.host;
806                                                 /* strip_ipv6_scope_id(target.host); - no! */
807                                                 /* we assume remote never gives us IPv6 addr with scope id */
808                                                 server.port = target.port;
809                                                 free(lsa);
810                                                 goto resolve_lsa;
811                                         } /* else: lsa stays the same: we use proxy */
812                                 }
813                                 goto establish_session;
814                         }
815                 }
816 //              if (status >= 300)
817 //                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
818
819                 /* For HTTP, data is pumped over the same connection */
820                 dfp = sfp;
821
822         } else {
823                 /*
824                  *  FTP session
825                  */
826                 sfp = prepare_ftp_session(&dfp, &target, lsa);
827         }
828
829         free(lsa);
830
831         if (!(option_mask32 & WGET_OPT_SPIDER)) {
832                 if (G.output_fd < 0)
833                         G.output_fd = xopen(G.fname_out, G.o_flags);
834                 retrieve_file_data(dfp);
835                 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
836                         xclose(G.output_fd);
837                         G.output_fd = -1;
838                 }
839         }
840
841         if (dfp != sfp) {
842                 /* It's ftp. Close data connection properly */
843                 fclose(dfp);
844                 if (ftpcmd(NULL, NULL, sfp) != 226)
845                         bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
846                 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
847         }
848         fclose(sfp);
849
850         free(server.allocated);
851         free(target.allocated);
852         free(fname_out_alloc);
853 }
854
855 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
856 int wget_main(int argc UNUSED_PARAM, char **argv)
857 {
858 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
859         static const char wget_longopts[] ALIGN1 =
860                 /* name, has_arg, val */
861                 "continue\0"         No_argument       "c"
862 //FIXME: -s isn't --spider, it's --save-headers!
863                 "spider\0"           No_argument       "s"
864                 "quiet\0"            No_argument       "q"
865                 "output-document\0"  Required_argument "O"
866                 "directory-prefix\0" Required_argument "P"
867                 "proxy\0"            Required_argument "Y"
868                 "user-agent\0"       Required_argument "U"
869 #if ENABLE_FEATURE_WGET_TIMEOUT
870                 "timeout\0"          Required_argument "T"
871 #endif
872                 /* Ignored: */
873                 // "tries\0"            Required_argument "t"
874                 /* Ignored (we always use PASV): */
875                 "passive-ftp\0"      No_argument       "\xff"
876                 "header\0"           Required_argument "\xfe"
877                 "post-data\0"        Required_argument "\xfd"
878                 /* Ignored (we don't do ssl) */
879                 "no-check-certificate\0" No_argument   "\xfc"
880                 ;
881 #endif
882
883 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
884         llist_t *headers_llist = NULL;
885 #endif
886
887         INIT_G();
888
889         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
890         G.proxy_flag = "on";   /* use proxies if env vars are set */
891         G.user_agent = "Wget"; /* "User-Agent" header field */
892
893 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
894         applet_long_options = wget_longopts;
895 #endif
896         opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
897         getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
898                 &G.fname_out, &G.dir_prefix,
899                 &G.proxy_flag, &G.user_agent,
900                 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
901                 NULL /* -t RETRIES */
902                 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
903                 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
904         );
905         argv += optind;
906
907 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
908         if (headers_llist) {
909                 int size = 1;
910                 char *cp;
911                 llist_t *ll = headers_llist;
912                 while (ll) {
913                         size += strlen(ll->data) + 2;
914                         ll = ll->link;
915                 }
916                 G.extra_headers = cp = xmalloc(size);
917                 while (headers_llist) {
918                         cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
919                 }
920         }
921 #endif
922
923         G.output_fd = -1;
924         G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
925         if (G.fname_out) { /* -O FILE ? */
926                 if (LONE_DASH(G.fname_out)) { /* -O - ? */
927                         G.output_fd = 1;
928                         option_mask32 &= ~WGET_OPT_CONTINUE;
929                 }
930                 /* compat with wget: -O FILE can overwrite */
931                 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
932         }
933
934         while (*argv)
935                 download_one_url(*argv++);
936
937         if (G.output_fd >= 0)
938                 xclose(G.output_fd);
939
940         return EXIT_SUCCESS;
941 }