7f27e4e7b8aa0e8bc56ebbd5e30cbc5caf07c64a
[oweals/busybox.git] / networking / wget.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * wget - retrieve a file using HTTP or FTP
4  *
5  * Chip Rosenthal Covad Communications <chip@laserlink.net>
6  * Licensed under GPLv2, see file LICENSE in this source tree.
7  *
8  * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9  * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
10  */
11
12 //config:config WGET
13 //config:       bool "wget"
14 //config:       default y
15 //config:       help
16 //config:         wget is a utility for non-interactive download of files from HTTP
17 //config:         and FTP servers.
18 //config:
19 //config:config FEATURE_WGET_STATUSBAR
20 //config:       bool "Enable a nifty process meter (+2k)"
21 //config:       default y
22 //config:       depends on WGET
23 //config:       help
24 //config:         Enable the transfer progress bar for wget transfers.
25 //config:
26 //config:config FEATURE_WGET_AUTHENTICATION
27 //config:       bool "Enable HTTP authentication"
28 //config:       default y
29 //config:       depends on WGET
30 //config:       help
31 //config:         Support authenticated HTTP transfers.
32 //config:
33 //config:config FEATURE_WGET_LONG_OPTIONS
34 //config:       bool "Enable long options"
35 //config:       default y
36 //config:       depends on WGET && LONG_OPTS
37 //config:       help
38 //config:         Support long options for the wget applet.
39 //config:
40 //config:config FEATURE_WGET_TIMEOUT
41 //config:       bool "Enable timeout option -T SEC"
42 //config:       default y
43 //config:       depends on WGET
44 //config:       help
45 //config:         Supports network read and connect timeouts for wget,
46 //config:         so that wget will give up and timeout, through the -T
47 //config:         command line option.
48 //config:
49 //config:         Currently only connect and network data read timeout are
50 //config:         supported (i.e., timeout is not applied to the DNS query). When
51 //config:         FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
52 //config:         will work in addition to -T.
53 //config:
54 //config:config FEATURE_WGET_OPENSSL
55 //config:       bool "Try to connect to HTTPS using openssl"
56 //config:       default y
57 //config:       depends on WGET
58 //config:       help
59 //config:         Choose how wget establishes SSL connection for https:// URLs.
60 //config:
61 //config:         Busybox itself contains no SSL code. wget will spawn
62 //config:         a helper program to talk over HTTPS.
63 //config:
64 //config:         OpenSSL has a simple SSL client for debug purposes.
65 //config:         If you select "openssl" helper, wget will effectively call
66 //config:         "openssl s_client -quiet -connect IP:443 2>/dev/null"
67 //config:         and pipe its data through it.
68 //config:         Note inconvenient API: host resolution is done twice,
69 //config:         and there is no guarantee openssl's idea of IPv6 address
70 //config:         format is the same as ours.
71 //config:         Another problem is that s_client prints debug information
72 //config:         to stderr, and it needs to be suppressed. This means
73 //config:         all error messages get suppressed too.
74 //config:         openssl is also a big binary, often dynamically linked
75 //config:         against ~15 libraries.
76 //config:
77 //config:config FEATURE_WGET_SSL_HELPER
78 //config:       bool "Try to connect to HTTPS using ssl_helper"
79 //config:       default y
80 //config:       depends on WGET
81 //config:       help
82 //config:         Choose how wget establishes SSL connection for https:// URLs.
83 //config:
84 //config:         Busybox itself contains no SSL code. wget will spawn
85 //config:         a helper program to talk over HTTPS.
86 //config:
87 //config:         ssl_helper is a tool which can be built statically
88 //config:         from busybox sources against a small embedded SSL library.
89 //config:         Please see networking/ssl_helper/README.
90 //config:         It does not require double host resolution and emits
91 //config:         error messages to stderr.
92 //config:
93 //config:         Precompiled static binary may be available at
94 //config:         http://busybox.net/downloads/binaries/
95
96 //applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
97
98 //kbuild:lib-$(CONFIG_WGET) += wget.o
99
100 //usage:#define wget_trivial_usage
101 //usage:        IF_FEATURE_WGET_LONG_OPTIONS(
102 //usage:       "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
103 //usage:       "        [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
104 /* Since we ignore these opts, we don't show them in --help */
105 /* //usage:    "        [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
106 /* //usage:    "        [-nv] [-nc] [-nH] [-np]" */
107 //usage:       "        [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
108 //usage:        )
109 //usage:        IF_NOT_FEATURE_WGET_LONG_OPTIONS(
110 //usage:       "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
111 //usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
112 //usage:        )
113 //usage:#define wget_full_usage "\n\n"
114 //usage:       "Retrieve files via HTTP or FTP\n"
115 //usage:     "\n        -s      Spider mode - only check file existence"
116 //usage:     "\n        -c      Continue retrieval of aborted transfer"
117 //usage:     "\n        -q      Quiet"
118 //usage:     "\n        -P DIR  Save to DIR (default .)"
119 //usage:        IF_FEATURE_WGET_TIMEOUT(
120 //usage:     "\n        -T SEC  Network read timeout is SEC seconds"
121 //usage:        )
122 //usage:     "\n        -O FILE Save to FILE ('-' for stdout)"
123 //usage:     "\n        -U STR  Use STR for User-Agent header"
124 //usage:     "\n        -Y      Use proxy ('on' or 'off')"
125
126 #include "libbb.h"
127
128 #if 0
129 # define log_io(...) bb_error_msg(__VA_ARGS__)
130 # define SENDFMT(fp, fmt, ...) \
131         do { \
132                 log_io("> " fmt, ##__VA_ARGS__); \
133                 fprintf(fp, fmt, ##__VA_ARGS__); \
134         } while (0);
135 #else
136 # define log_io(...) ((void)0)
137 # define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
138 #endif
139
140
141 struct host_info {
142         char *allocated;
143         const char *path;
144         char       *user;
145         const char *protocol;
146         char       *host;
147         int         port;
148 };
149 static const char P_FTP[] = "ftp";
150 static const char P_HTTP[] = "http";
151 #if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
152 static const char P_HTTPS[] = "https";
153 #endif
154
155 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
156 /* User-specified headers prevent using our corresponding built-in headers.  */
157 enum {
158         HDR_HOST          = (1<<0),
159         HDR_USER_AGENT    = (1<<1),
160         HDR_RANGE         = (1<<2),
161         HDR_AUTH          = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
162         HDR_PROXY_AUTH    = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
163 };
164 static const char wget_user_headers[] ALIGN1 =
165         "Host:\0"
166         "User-Agent:\0"
167         "Range:\0"
168 # if ENABLE_FEATURE_WGET_AUTHENTICATION
169         "Authorization:\0"
170         "Proxy-Authorization:\0"
171 # endif
172         ;
173 # define USR_HEADER_HOST       (G.user_headers & HDR_HOST)
174 # define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
175 # define USR_HEADER_RANGE      (G.user_headers & HDR_RANGE)
176 # define USR_HEADER_AUTH       (G.user_headers & HDR_AUTH)
177 # define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
178 #else /* No long options, no user-headers :( */
179 # define USR_HEADER_HOST       0
180 # define USR_HEADER_USER_AGENT 0
181 # define USR_HEADER_RANGE      0
182 # define USR_HEADER_AUTH       0
183 # define USR_HEADER_PROXY_AUTH 0
184 #endif
185
186 /* Globals */
187 struct globals {
188         off_t content_len;        /* Content-length of the file */
189         off_t beg_range;          /* Range at which continue begins */
190 #if ENABLE_FEATURE_WGET_STATUSBAR
191         off_t transferred;        /* Number of bytes transferred so far */
192         const char *curfile;      /* Name of current file being transferred */
193         bb_progress_t pmt;
194 #endif
195         char *dir_prefix;
196 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
197         char *post_data;
198         char *extra_headers;
199         unsigned char user_headers; /* Headers mentioned by the user */
200 #endif
201         char *fname_out;        /* where to direct output (-O) */
202         const char *proxy_flag; /* Use proxies if env vars are set */
203         const char *user_agent; /* "User-Agent" header field */
204 #if ENABLE_FEATURE_WGET_TIMEOUT
205         unsigned timeout_seconds;
206         bool connecting;
207 #endif
208         int output_fd;
209         int o_flags;
210         smallint chunked;         /* chunked transfer encoding */
211         smallint got_clen;        /* got content-length: from server  */
212         /* Local downloads do benefit from big buffer.
213          * With 512 byte buffer, it was measured to be
214          * an order of magnitude slower than with big one.
215          */
216         uint64_t just_to_align_next_member;
217         char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
218 } FIX_ALIASING;
219 #define G (*ptr_to_globals)
220 #define INIT_G() do { \
221         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
222 } while (0)
223 #define FINI_G() do { \
224         FREE_PTR_TO_GLOBALS(); \
225 } while (0)
226
227
228 /* Must match option string! */
229 enum {
230         WGET_OPT_CONTINUE   = (1 << 0),
231         WGET_OPT_SPIDER     = (1 << 1),
232         WGET_OPT_QUIET      = (1 << 2),
233         WGET_OPT_OUTNAME    = (1 << 3),
234         WGET_OPT_PREFIX     = (1 << 4),
235         WGET_OPT_PROXY      = (1 << 5),
236         WGET_OPT_USER_AGENT = (1 << 6),
237         WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
238         WGET_OPT_RETRIES    = (1 << 8),
239         WGET_OPT_PASSIVE    = (1 << 9),
240         WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
241         WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
242 };
243
244 enum {
245         PROGRESS_START = -1,
246         PROGRESS_END   = 0,
247         PROGRESS_BUMP  = 1,
248 };
249 #if ENABLE_FEATURE_WGET_STATUSBAR
250 static void progress_meter(int flag)
251 {
252         if (option_mask32 & WGET_OPT_QUIET)
253                 return;
254
255         if (flag == PROGRESS_START)
256                 bb_progress_init(&G.pmt, G.curfile);
257
258         bb_progress_update(&G.pmt,
259                         G.beg_range,
260                         G.transferred,
261                         (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
262         );
263
264         if (flag == PROGRESS_END) {
265                 bb_progress_free(&G.pmt);
266                 bb_putchar_stderr('\n');
267                 G.transferred = 0;
268         }
269 }
270 #else
271 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
272 #endif
273
274
275 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
276  * local addresses can have a scope identifier to specify the
277  * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
278  * identifier is only valid on a single node.
279  *
280  * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
281  * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
282  * in the Host header as invalid requests, see
283  * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
284  */
285 static void strip_ipv6_scope_id(char *host)
286 {
287         char *scope, *cp;
288
289         /* bbox wget actually handles IPv6 addresses without [], like
290          * wget "http://::1/xxx", but this is not standard.
291          * To save code, _here_ we do not support it. */
292
293         if (host[0] != '[')
294                 return; /* not IPv6 */
295
296         scope = strchr(host, '%');
297         if (!scope)
298                 return;
299
300         /* Remove the IPv6 zone identifier from the host address */
301         cp = strchr(host, ']');
302         if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
303                 /* malformed address (not "[xx]:nn" or "[xx]") */
304                 return;
305         }
306
307         /* cp points to "]...", scope points to "%eth0]..." */
308         overlapping_strcpy(scope, cp);
309 }
310
311 #if ENABLE_FEATURE_WGET_AUTHENTICATION
312 /* Base64-encode character string. */
313 static char *base64enc(const char *str)
314 {
315         unsigned len = strlen(str);
316         if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
317                 len = sizeof(G.wget_buf)/4*3 - 10;
318         bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
319         return G.wget_buf;
320 }
321 #endif
322
323 static char* sanitize_string(char *s)
324 {
325         unsigned char *p = (void *) s;
326         while (*p >= ' ')
327                 p++;
328         *p = '\0';
329         return s;
330 }
331
332 #if ENABLE_FEATURE_WGET_TIMEOUT
333 static void alarm_handler(int sig UNUSED_PARAM)
334 {
335         /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
336         if (G.connecting)
337                 bb_error_msg_and_die("download timed out");
338 }
339 #endif
340
341 static FILE *open_socket(len_and_sockaddr *lsa)
342 {
343         int fd;
344         FILE *fp;
345
346         IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
347         fd = xconnect_stream(lsa);
348         IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
349
350         /* glibc 2.4 seems to try seeking on it - ??! */
351         /* hopefully it understands what ESPIPE means... */
352         fp = fdopen(fd, "r+");
353         if (!fp)
354                 bb_perror_msg_and_die(bb_msg_memory_exhausted);
355
356         return fp;
357 }
358
359 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
360 /* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
361 static char fgets_and_trim(FILE *fp)
362 {
363         char c;
364         char *buf_ptr;
365
366         if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
367                 bb_perror_msg_and_die("error getting response");
368
369         buf_ptr = strchrnul(G.wget_buf, '\n');
370         c = *buf_ptr;
371         *buf_ptr = '\0';
372         buf_ptr = strchrnul(G.wget_buf, '\r');
373         *buf_ptr = '\0';
374
375         log_io("< %s", G.wget_buf);
376
377         return c;
378 }
379
380 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
381 {
382         int result;
383         if (s1) {
384                 if (!s2)
385                         s2 = "";
386                 fprintf(fp, "%s%s\r\n", s1, s2);
387                 fflush(fp);
388                 log_io("> %s%s", s1, s2);
389         }
390
391         do {
392                 fgets_and_trim(fp);
393         } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
394
395         G.wget_buf[3] = '\0';
396         result = xatoi_positive(G.wget_buf);
397         G.wget_buf[3] = ' ';
398         return result;
399 }
400
401 static void parse_url(const char *src_url, struct host_info *h)
402 {
403         char *url, *p, *sp;
404
405         free(h->allocated);
406         h->allocated = url = xstrdup(src_url);
407
408         h->protocol = P_FTP;
409         p = strstr(url, "://");
410         if (p) {
411                 *p = '\0';
412                 h->host = p + 3;
413                 if (strcmp(url, P_FTP) == 0) {
414                         h->port = bb_lookup_port(P_FTP, "tcp", 21);
415                 } else
416 #if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
417                 if (strcmp(url, P_HTTPS) == 0) {
418                         h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
419                         h->protocol = P_HTTPS;
420                 } else
421 #endif
422                 if (strcmp(url, P_HTTP) == 0) {
423  http:
424                         h->port = bb_lookup_port(P_HTTP, "tcp", 80);
425                         h->protocol = P_HTTP;
426                 } else {
427                         *p = ':';
428                         bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
429                 }
430         } else {
431                 // GNU wget is user-friendly and falls back to http://
432                 h->host = url;
433                 goto http;
434         }
435
436         // FYI:
437         // "Real" wget 'http://busybox.net?var=a/b' sends this request:
438         //   'GET /?var=a/b HTTP 1.0'
439         //   and saves 'index.html?var=a%2Fb' (we save 'b')
440         // wget 'http://busybox.net?login=john@doe':
441         //   request: 'GET /?login=john@doe HTTP/1.0'
442         //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
443         // wget 'http://busybox.net#test/test':
444         //   request: 'GET / HTTP/1.0'
445         //   saves: 'index.html' (we save 'test')
446         //
447         // We also don't add unique .N suffix if file exists...
448         sp = strchr(h->host, '/');
449         p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
450         p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
451         if (!sp) {
452                 h->path = "";
453         } else if (*sp == '/') {
454                 *sp = '\0';
455                 h->path = sp + 1;
456         } else { // '#' or '?'
457                 // http://busybox.net?login=john@doe is a valid URL
458                 // memmove converts to:
459                 // http:/busybox.nett?login=john@doe...
460                 memmove(h->host - 1, h->host, sp - h->host);
461                 h->host--;
462                 sp[-1] = '\0';
463                 h->path = sp;
464         }
465
466         sp = strrchr(h->host, '@');
467         if (sp != NULL) {
468                 // URL-decode "user:password" string before base64-encoding:
469                 // wget http://test:my%20pass@example.com should send
470                 // Authorization: Basic dGVzdDpteSBwYXNz
471                 // which decodes to "test:my pass".
472                 // Standard wget and curl do this too.
473                 *sp = '\0';
474                 free(h->user);
475                 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
476                 h->host = sp + 1;
477         }
478         /* else: h->user remains NULL, or as set by original request
479          * before redirect (if we are here after a redirect).
480          */
481 }
482
483 static char *gethdr(FILE *fp)
484 {
485         char *s, *hdrval;
486         int c;
487
488         /* retrieve header line */
489         c = fgets_and_trim(fp);
490
491         /* end of the headers? */
492         if (G.wget_buf[0] == '\0')
493                 return NULL;
494
495         /* convert the header name to lower case */
496         for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
497                 /*
498                  * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
499                  * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
500                  * "A-Z" maps to "a-z".
501                  * "@[\]" can't occur in header names.
502                  * "^_" maps to "~,DEL" (which is wrong).
503                  * "^" was never seen yet, "_" was seen from web.archive.org
504                  * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
505                  */
506                 *s |= 0x20;
507         }
508
509         /* verify we are at the end of the header name */
510         if (*s != ':')
511                 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
512
513         /* locate the start of the header value */
514         *s++ = '\0';
515         hdrval = skip_whitespace(s);
516
517         if (c != '\n') {
518                 /* Rats! The buffer isn't big enough to hold the entire header value */
519                 while (c = getc(fp), c != EOF && c != '\n')
520                         continue;
521         }
522
523         return hdrval;
524 }
525
526 static void reset_beg_range_to_zero(void)
527 {
528         bb_error_msg("restart failed");
529         G.beg_range = 0;
530         xlseek(G.output_fd, 0, SEEK_SET);
531         /* Done at the end instead: */
532         /* ftruncate(G.output_fd, 0); */
533 }
534
535 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
536 {
537         FILE *sfp;
538         char *str;
539         int port;
540
541         if (!target->user)
542                 target->user = xstrdup("anonymous:busybox@");
543
544         sfp = open_socket(lsa);
545         if (ftpcmd(NULL, NULL, sfp) != 220)
546                 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
547
548         /*
549          * Splitting username:password pair,
550          * trying to log in
551          */
552         str = strchr(target->user, ':');
553         if (str)
554                 *str++ = '\0';
555         switch (ftpcmd("USER ", target->user, sfp)) {
556         case 230:
557                 break;
558         case 331:
559                 if (ftpcmd("PASS ", str, sfp) == 230)
560                         break;
561                 /* fall through (failed login) */
562         default:
563                 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
564         }
565
566         ftpcmd("TYPE I", NULL, sfp);
567
568         /*
569          * Querying file size
570          */
571         if (ftpcmd("SIZE ", target->path, sfp) == 213) {
572                 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
573                 if (G.content_len < 0 || errno) {
574                         bb_error_msg_and_die("SIZE value is garbage");
575                 }
576                 G.got_clen = 1;
577         }
578
579         /*
580          * Entering passive mode
581          */
582         if (ftpcmd("PASV", NULL, sfp) != 227) {
583  pasv_error:
584                 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
585         }
586         // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
587         // Server's IP is N1.N2.N3.N4 (we ignore it)
588         // Server's port for data connection is P1*256+P2
589         str = strrchr(G.wget_buf, ')');
590         if (str) str[0] = '\0';
591         str = strrchr(G.wget_buf, ',');
592         if (!str) goto pasv_error;
593         port = xatou_range(str+1, 0, 255);
594         *str = '\0';
595         str = strrchr(G.wget_buf, ',');
596         if (!str) goto pasv_error;
597         port += xatou_range(str+1, 0, 255) * 256;
598         set_nport(&lsa->u.sa, htons(port));
599
600         *dfpp = open_socket(lsa);
601
602         if (G.beg_range != 0) {
603                 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
604                 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
605                         G.content_len -= G.beg_range;
606                 else
607                         reset_beg_range_to_zero();
608         }
609
610         if (ftpcmd("RETR ", target->path, sfp) > 150)
611                 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
612
613         return sfp;
614 }
615
616 #if ENABLE_FEATURE_WGET_OPENSSL
617 static int spawn_https_helper_openssl(const char *host, unsigned port)
618 {
619         char *allocated = NULL;
620         int sp[2];
621         int pid;
622         IF_FEATURE_WGET_SSL_HELPER(volatile int child_failed = 0;)
623
624         if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
625                 /* Kernel can have AF_UNIX support disabled */
626                 bb_perror_msg_and_die("socketpair");
627
628         if (!strchr(host, ':'))
629                 host = allocated = xasprintf("%s:%u", host, port);
630
631         fflush_all();
632         pid = xvfork();
633         if (pid == 0) {
634                 /* Child */
635                 char *argv[6];
636
637                 close(sp[0]);
638                 xmove_fd(sp[1], 0);
639                 xdup2(0, 1);
640                 /*
641                  * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
642                  * It prints some debug stuff on stderr, don't know how to suppress it.
643                  * Work around by dev-nulling stderr. We lose all error messages :(
644                  */
645                 xmove_fd(2, 3);
646                 xopen("/dev/null", O_RDWR);
647                 argv[0] = (char*)"openssl";
648                 argv[1] = (char*)"s_client";
649                 argv[2] = (char*)"-quiet";
650                 argv[3] = (char*)"-connect";
651                 argv[4] = (char*)host;
652                 argv[5] = NULL;
653                 BB_EXECVP(argv[0], argv);
654                 xmove_fd(3, 2);
655 # if ENABLE_FEATURE_WGET_SSL_HELPER
656                 child_failed = 1;
657                 xfunc_die();
658 # else
659                 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
660 # endif
661                 /* notreached */
662         }
663
664         /* Parent */
665         free(allocated);
666         close(sp[1]);
667 # if ENABLE_FEATURE_WGET_SSL_HELPER
668         if (child_failed) {
669                 close(sp[0]);
670                 return -1;
671         }
672 # endif
673         return sp[0];
674 }
675 #endif
676
677 /* See networking/ssl_helper/README how to build one */
678 #if ENABLE_FEATURE_WGET_SSL_HELPER
679 static void spawn_https_helper_small(int network_fd)
680 {
681         int sp[2];
682         int pid;
683
684         if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
685                 /* Kernel can have AF_UNIX support disabled */
686                 bb_perror_msg_and_die("socketpair");
687
688         pid = BB_MMU ? xfork() : xvfork();
689         if (pid == 0) {
690                 /* Child */
691                 char *argv[3];
692
693                 close(sp[0]);
694                 xmove_fd(sp[1], 0);
695                 xdup2(0, 1);
696                 xmove_fd(network_fd, 3);
697                 /*
698                  * A simple ssl/tls helper
699                  */
700                 argv[0] = (char*)"ssl_helper";
701                 argv[1] = (char*)"-d3";
702                 argv[2] = NULL;
703                 BB_EXECVP(argv[0], argv);
704                 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
705                 /* notreached */
706         }
707
708         /* Parent */
709         close(sp[1]);
710         xmove_fd(sp[0], network_fd);
711 }
712 #endif
713
714 static void NOINLINE retrieve_file_data(FILE *dfp)
715 {
716 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
717 # if ENABLE_FEATURE_WGET_TIMEOUT
718         unsigned second_cnt = G.timeout_seconds;
719 # endif
720         struct pollfd polldata;
721
722         polldata.fd = fileno(dfp);
723         polldata.events = POLLIN | POLLPRI;
724 #endif
725         progress_meter(PROGRESS_START);
726
727         if (G.chunked)
728                 goto get_clen;
729
730         /* Loops only if chunked */
731         while (1) {
732
733 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
734                 /* Must use nonblocking I/O, otherwise fread will loop
735                  * and *block* until it reads full buffer,
736                  * which messes up progress bar and/or timeout logic.
737                  * Because of nonblocking I/O, we need to dance
738                  * very carefully around EAGAIN. See explanation at
739                  * clearerr() calls.
740                  */
741                 ndelay_on(polldata.fd);
742 #endif
743                 while (1) {
744                         int n;
745                         unsigned rdsz;
746
747 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
748                         /* fread internally uses read loop, which in our case
749                          * is usually exited when we get EAGAIN.
750                          * In this case, libc sets error marker on the stream.
751                          * Need to clear it before next fread to avoid possible
752                          * rare false positive ferror below. Rare because usually
753                          * fread gets more than zero bytes, and we don't fall
754                          * into if (n <= 0) ...
755                          */
756                         clearerr(dfp);
757 #endif
758                         errno = 0;
759                         rdsz = sizeof(G.wget_buf);
760                         if (G.got_clen) {
761                                 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
762                                         if ((int)G.content_len <= 0)
763                                                 break;
764                                         rdsz = (unsigned)G.content_len;
765                                 }
766                         }
767                         n = fread(G.wget_buf, 1, rdsz, dfp);
768
769                         if (n > 0) {
770                                 xwrite(G.output_fd, G.wget_buf, n);
771 #if ENABLE_FEATURE_WGET_STATUSBAR
772                                 G.transferred += n;
773 #endif
774                                 if (G.got_clen) {
775                                         G.content_len -= n;
776                                         if (G.content_len == 0)
777                                                 break;
778                                 }
779 #if ENABLE_FEATURE_WGET_TIMEOUT
780                                 second_cnt = G.timeout_seconds;
781 #endif
782                                 goto bump;
783                         }
784
785                         /* n <= 0.
786                          * man fread:
787                          * If error occurs, or EOF is reached, the return value
788                          * is a short item count (or zero).
789                          * fread does not distinguish between EOF and error.
790                          */
791                         if (errno != EAGAIN) {
792                                 if (ferror(dfp)) {
793                                         progress_meter(PROGRESS_END);
794                                         bb_perror_msg_and_die(bb_msg_read_error);
795                                 }
796                                 break; /* EOF, not error */
797                         }
798
799 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
800                         /* It was EAGAIN. There is no data. Wait up to one second
801                          * then abort if timed out, or update the bar and try reading again.
802                          */
803                         if (safe_poll(&polldata, 1, 1000) == 0) {
804 # if ENABLE_FEATURE_WGET_TIMEOUT
805                                 if (second_cnt != 0 && --second_cnt == 0) {
806                                         progress_meter(PROGRESS_END);
807                                         bb_error_msg_and_die("download timed out");
808                                 }
809 # endif
810                                 /* We used to loop back to poll here,
811                                  * but there is no great harm in letting fread
812                                  * to try reading anyway.
813                                  */
814                         }
815 #endif
816  bump:
817                         /* Need to do it _every_ second for "stalled" indicator
818                          * to be shown properly.
819                          */
820                         progress_meter(PROGRESS_BUMP);
821                 } /* while (reading data) */
822
823 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
824                 clearerr(dfp);
825                 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
826 #endif
827                 if (!G.chunked)
828                         break;
829
830                 fgets_and_trim(dfp); /* Eat empty line */
831  get_clen:
832                 fgets_and_trim(dfp);
833                 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
834                 /* FIXME: error check? */
835                 if (G.content_len == 0)
836                         break; /* all done! */
837                 G.got_clen = 1;
838                 /*
839                  * Note that fgets may result in some data being buffered in dfp.
840                  * We loop back to fread, which will retrieve this data.
841                  * Also note that code has to be arranged so that fread
842                  * is done _before_ one-second poll wait - poll doesn't know
843                  * about stdio buffering and can result in spurious one second waits!
844                  */
845         }
846
847         /* If -c failed, we restart from the beginning,
848          * but we do not truncate file then, we do it only now, at the end.
849          * This lets user to ^C if his 99% complete 10 GB file download
850          * failed to restart *without* losing the almost complete file.
851          */
852         {
853                 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
854                 if (pos != (off_t)-1)
855                         ftruncate(G.output_fd, pos);
856         }
857
858         /* Draw full bar and free its resources */
859         G.chunked = 0;  /* makes it show 100% even for chunked download */
860         G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
861         progress_meter(PROGRESS_END);
862 }
863
864 static void download_one_url(const char *url)
865 {
866         bool use_proxy;                 /* Use proxies if env vars are set  */
867         int redir_limit;
868         len_and_sockaddr *lsa;
869         FILE *sfp;                      /* socket to web/ftp server         */
870         FILE *dfp;                      /* socket to ftp server (data)      */
871         char *proxy = NULL;
872         char *fname_out_alloc;
873         char *redirected_path = NULL;
874         struct host_info server;
875         struct host_info target;
876
877         server.allocated = NULL;
878         target.allocated = NULL;
879         server.user = NULL;
880         target.user = NULL;
881
882         parse_url(url, &target);
883
884         /* Use the proxy if necessary */
885         use_proxy = (strcmp(G.proxy_flag, "off") != 0);
886         if (use_proxy) {
887                 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
888 //FIXME: what if protocol is https? Ok to use http_proxy?
889                 use_proxy = (proxy && proxy[0]);
890                 if (use_proxy)
891                         parse_url(proxy, &server);
892         }
893         if (!use_proxy) {
894                 server.port = target.port;
895                 if (ENABLE_FEATURE_IPV6) {
896                         //free(server.allocated); - can't be non-NULL
897                         server.host = server.allocated = xstrdup(target.host);
898                 } else {
899                         server.host = target.host;
900                 }
901         }
902
903         if (ENABLE_FEATURE_IPV6)
904                 strip_ipv6_scope_id(target.host);
905
906         /* If there was no -O FILE, guess output filename */
907         fname_out_alloc = NULL;
908         if (!(option_mask32 & WGET_OPT_OUTNAME)) {
909                 G.fname_out = bb_get_last_path_component_nostrip(target.path);
910                 /* handle "wget http://kernel.org//" */
911                 if (G.fname_out[0] == '/' || !G.fname_out[0])
912                         G.fname_out = (char*)"index.html";
913                 /* -P DIR is considered only if there was no -O FILE */
914                 if (G.dir_prefix)
915                         G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
916                 else {
917                         /* redirects may free target.path later, need to make a copy */
918                         G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
919                 }
920         }
921 #if ENABLE_FEATURE_WGET_STATUSBAR
922         G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
923 #endif
924
925         /* Determine where to start transfer */
926         G.beg_range = 0;
927         if (option_mask32 & WGET_OPT_CONTINUE) {
928                 G.output_fd = open(G.fname_out, O_WRONLY);
929                 if (G.output_fd >= 0) {
930                         G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
931                 }
932                 /* File doesn't exist. We do not create file here yet.
933                  * We are not sure it exists on remote side */
934         }
935
936         redir_limit = 5;
937  resolve_lsa:
938         lsa = xhost2sockaddr(server.host, server.port);
939         if (!(option_mask32 & WGET_OPT_QUIET)) {
940                 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
941                 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
942                 free(s);
943         }
944  establish_session:
945         /*G.content_len = 0; - redundant, got_clen = 0 is enough */
946         G.got_clen = 0;
947         G.chunked = 0;
948         if (use_proxy || target.protocol != P_FTP) {
949                 /*
950                  *  HTTP session
951                  */
952                 char *str;
953                 int status;
954
955                 /* Open socket to http(s) server */
956 #if ENABLE_FEATURE_WGET_OPENSSL
957                 /* openssl (and maybe ssl_helper) support is configured */
958                 if (target.protocol == P_HTTPS) {
959                         /* openssl-based helper
960                          * Inconvenient API since we can't give it an open fd
961                          */
962                         int fd = spawn_https_helper_openssl(server.host, server.port);
963 # if ENABLE_FEATURE_WGET_SSL_HELPER
964                         if (fd < 0) { /* no openssl? try ssl_helper */
965                                 sfp = open_socket(lsa);
966                                 spawn_https_helper_small(fileno(sfp));
967                                 goto socket_opened;
968                         }
969 # else
970                         /* We don't check for exec("openssl") failure in this case */
971 # endif
972                         sfp = fdopen(fd, "r+");
973                         if (!sfp)
974                                 bb_perror_msg_and_die(bb_msg_memory_exhausted);
975                         goto socket_opened;
976                 }
977                 sfp = open_socket(lsa);
978  socket_opened:
979 #elif ENABLE_FEATURE_WGET_SSL_HELPER
980                 /* Only ssl_helper support is configured */
981                 sfp = open_socket(lsa);
982                 if (target.protocol == P_HTTPS)
983                         spawn_https_helper_small(fileno(sfp));
984 #else
985                 /* ssl (https) support is not configured */
986                 sfp = open_socket(lsa);
987 #endif
988                 /* Send HTTP request */
989                 if (use_proxy) {
990                         SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
991                                 target.protocol, target.host,
992                                 target.path);
993                 } else {
994                         SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
995                                 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
996                                 target.path);
997                 }
998                 if (!USR_HEADER_HOST)
999                         SENDFMT(sfp, "Host: %s\r\n", target.host);
1000                 if (!USR_HEADER_USER_AGENT)
1001                         SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
1002
1003                 /* Ask server to close the connection as soon as we are done
1004                  * (IOW: we do not intend to send more requests)
1005                  */
1006                 SENDFMT(sfp, "Connection: close\r\n");
1007
1008 #if ENABLE_FEATURE_WGET_AUTHENTICATION
1009                 if (target.user && !USR_HEADER_AUTH) {
1010                         SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
1011                                 base64enc(target.user));
1012                 }
1013                 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1014                         SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
1015                                 base64enc(server.user));
1016                 }
1017 #endif
1018
1019                 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1020                         SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
1021
1022 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1023                 if (G.extra_headers) {
1024                         log_io(G.extra_headers);
1025                         fputs(G.extra_headers, sfp);
1026                 }
1027
1028                 if (option_mask32 & WGET_OPT_POST_DATA) {
1029                         SENDFMT(sfp,
1030                                 "Content-Type: application/x-www-form-urlencoded\r\n"
1031                                 "Content-Length: %u\r\n"
1032                                 "\r\n"
1033                                 "%s",
1034                                 (int) strlen(G.post_data), G.post_data
1035                         );
1036                 } else
1037 #endif
1038                 {
1039                         SENDFMT(sfp, "\r\n");
1040                 }
1041
1042                 fflush(sfp);
1043
1044                 /*
1045                  * Retrieve HTTP response line and check for "200" status code.
1046                  */
1047  read_response:
1048                 fgets_and_trim(sfp);
1049
1050                 str = G.wget_buf;
1051                 str = skip_non_whitespace(str);
1052                 str = skip_whitespace(str);
1053                 // FIXME: no error check
1054                 // xatou wouldn't work: "200 OK"
1055                 status = atoi(str);
1056                 switch (status) {
1057                 case 0:
1058                 case 100:
1059                         while (gethdr(sfp) != NULL)
1060                                 /* eat all remaining headers */;
1061                         goto read_response;
1062                 case 200:
1063 /*
1064 Response 204 doesn't say "null file", it says "metadata
1065 has changed but data didn't":
1066
1067 "10.2.5 204 No Content
1068 The server has fulfilled the request but does not need to return
1069 an entity-body, and might want to return updated metainformation.
1070 The response MAY include new or updated metainformation in the form
1071 of entity-headers, which if present SHOULD be associated with
1072 the requested variant.
1073
1074 If the client is a user agent, it SHOULD NOT change its document
1075 view from that which caused the request to be sent. This response
1076 is primarily intended to allow input for actions to take place
1077 without causing a change to the user agent's active document view,
1078 although any new or updated metainformation SHOULD be applied
1079 to the document currently in the user agent's active view.
1080
1081 The 204 response MUST NOT include a message-body, and thus
1082 is always terminated by the first empty line after the header fields."
1083
1084 However, in real world it was observed that some web servers
1085 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1086 */
1087                 case 204:
1088                         if (G.beg_range != 0) {
1089                                 /* "Range:..." was not honored by the server.
1090                                  * Restart download from the beginning.
1091                                  */
1092                                 reset_beg_range_to_zero();
1093                         }
1094                         break;
1095                 case 300:  /* redirection */
1096                 case 301:
1097                 case 302:
1098                 case 303:
1099                         break;
1100                 case 206: /* Partial Content */
1101                         if (G.beg_range != 0)
1102                                 /* "Range:..." worked. Good. */
1103                                 break;
1104                         /* Partial Content even though we did not ask for it??? */
1105                         /* fall through */
1106                 default:
1107                         bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
1108                 }
1109
1110                 /*
1111                  * Retrieve HTTP headers.
1112                  */
1113                 while ((str = gethdr(sfp)) != NULL) {
1114                         static const char keywords[] ALIGN1 =
1115                                 "content-length\0""transfer-encoding\0""location\0";
1116                         enum {
1117                                 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1118                         };
1119                         smalluint key;
1120
1121                         /* gethdr converted "FOO:" string to lowercase */
1122
1123                         /* strip trailing whitespace */
1124                         char *s = strchrnul(str, '\0') - 1;
1125                         while (s >= str && (*s == ' ' || *s == '\t')) {
1126                                 *s = '\0';
1127                                 s--;
1128                         }
1129                         key = index_in_strings(keywords, G.wget_buf) + 1;
1130                         if (key == KEY_content_length) {
1131                                 G.content_len = BB_STRTOOFF(str, NULL, 10);
1132                                 if (G.content_len < 0 || errno) {
1133                                         bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
1134                                 }
1135                                 G.got_clen = 1;
1136                                 continue;
1137                         }
1138                         if (key == KEY_transfer_encoding) {
1139                                 if (strcmp(str_tolower(str), "chunked") != 0)
1140                                         bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
1141                                 G.chunked = 1;
1142                         }
1143                         if (key == KEY_location && status >= 300) {
1144                                 if (--redir_limit == 0)
1145                                         bb_error_msg_and_die("too many redirections");
1146                                 fclose(sfp);
1147                                 if (str[0] == '/') {
1148                                         free(redirected_path);
1149                                         target.path = redirected_path = xstrdup(str+1);
1150                                         /* lsa stays the same: it's on the same server */
1151                                 } else {
1152                                         parse_url(str, &target);
1153                                         if (!use_proxy) {
1154                                                 /* server.user remains untouched */
1155                                                 free(server.allocated);
1156                                                 server.allocated = NULL;
1157                                                 server.host = target.host;
1158                                                 /* strip_ipv6_scope_id(target.host); - no! */
1159                                                 /* we assume remote never gives us IPv6 addr with scope id */
1160                                                 server.port = target.port;
1161                                                 free(lsa);
1162                                                 goto resolve_lsa;
1163                                         } /* else: lsa stays the same: we use proxy */
1164                                 }
1165                                 goto establish_session;
1166                         }
1167                 }
1168 //              if (status >= 300)
1169 //                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
1170
1171                 /* For HTTP, data is pumped over the same connection */
1172                 dfp = sfp;
1173         } else {
1174                 /*
1175                  *  FTP session
1176                  */
1177                 sfp = prepare_ftp_session(&dfp, &target, lsa);
1178         }
1179
1180         free(lsa);
1181
1182         if (!(option_mask32 & WGET_OPT_SPIDER)) {
1183                 if (G.output_fd < 0)
1184                         G.output_fd = xopen(G.fname_out, G.o_flags);
1185                 retrieve_file_data(dfp);
1186                 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1187                         xclose(G.output_fd);
1188                         G.output_fd = -1;
1189                 }
1190         }
1191
1192         if (dfp != sfp) {
1193                 /* It's ftp. Close data connection properly */
1194                 fclose(dfp);
1195                 if (ftpcmd(NULL, NULL, sfp) != 226)
1196                         bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1197                 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1198         }
1199         fclose(sfp);
1200
1201         free(server.allocated);
1202         free(target.allocated);
1203         free(server.user);
1204         free(target.user);
1205         free(fname_out_alloc);
1206         free(redirected_path);
1207 }
1208
1209 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1210 int wget_main(int argc UNUSED_PARAM, char **argv)
1211 {
1212 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1213         static const char wget_longopts[] ALIGN1 =
1214                 /* name, has_arg, val */
1215                 "continue\0"         No_argument       "c"
1216 //FIXME: -s isn't --spider, it's --save-headers!
1217                 "spider\0"           No_argument       "s"
1218                 "quiet\0"            No_argument       "q"
1219                 "output-document\0"  Required_argument "O"
1220                 "directory-prefix\0" Required_argument "P"
1221                 "proxy\0"            Required_argument "Y"
1222                 "user-agent\0"       Required_argument "U"
1223 IF_FEATURE_WGET_TIMEOUT(
1224                 "timeout\0"          Required_argument "T")
1225                 /* Ignored: */
1226 IF_DESKTOP(     "tries\0"            Required_argument "t")
1227                 "header\0"           Required_argument "\xff"
1228                 "post-data\0"        Required_argument "\xfe"
1229                 /* Ignored (we always use PASV): */
1230 IF_DESKTOP(     "passive-ftp\0"      No_argument       "\xf0")
1231                 /* Ignored (we don't do ssl) */
1232 IF_DESKTOP(     "no-check-certificate\0" No_argument   "\xf0")
1233                 /* Ignored (we don't support caching) */
1234 IF_DESKTOP(     "no-cache\0"         No_argument       "\xf0")
1235 IF_DESKTOP(     "no-verbose\0"       No_argument       "\xf0")
1236 IF_DESKTOP(     "no-clobber\0"       No_argument       "\xf0")
1237 IF_DESKTOP(     "no-host-directories\0" No_argument    "\xf0")
1238 IF_DESKTOP(     "no-parent\0"        No_argument       "\xf0")
1239                 ;
1240 #endif
1241
1242 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1243         llist_t *headers_llist = NULL;
1244 #endif
1245
1246         INIT_G();
1247
1248 #if ENABLE_FEATURE_WGET_TIMEOUT
1249         G.timeout_seconds = 900;
1250         signal(SIGALRM, alarm_handler);
1251 #endif
1252         G.proxy_flag = "on";   /* use proxies if env vars are set */
1253         G.user_agent = "Wget"; /* "User-Agent" header field */
1254
1255 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1256         applet_long_options = wget_longopts;
1257 #endif
1258         opt_complementary = "-1" /* at least one URL */
1259                 IF_FEATURE_WGET_TIMEOUT(":T+") /* -T NUM */
1260                 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::"); /* --header is a list */
1261         getopt32(argv, "csqO:P:Y:U:T:"
1262                 /*ignored:*/ "t:"
1263                 /*ignored:*/ "n::"
1264                 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1265                  * -nv --no-verbose: be moderately quiet (-q is full quiet)
1266                  * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1267                  * -nH --no-host-directories: wget -r http://host/ won't create host/
1268                  * -np --no-parent
1269                  * "n::" above says that we accept -n[ARG].
1270                  * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1271                  */
1272                 , &G.fname_out, &G.dir_prefix,
1273                 &G.proxy_flag, &G.user_agent,
1274                 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1275                 NULL, /* -t RETRIES */
1276                 NULL  /* -n[ARG] */
1277                 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1278                 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1279         );
1280         argv += optind;
1281
1282 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1283         if (headers_llist) {
1284                 int size = 0;
1285                 char *hdr;
1286                 llist_t *ll = headers_llist;
1287                 while (ll) {
1288                         size += strlen(ll->data) + 2;
1289                         ll = ll->link;
1290                 }
1291                 G.extra_headers = hdr = xmalloc(size + 1);
1292                 while (headers_llist) {
1293                         int bit;
1294                         const char *words;
1295
1296                         size = sprintf(hdr, "%s\r\n",
1297                                         (char*)llist_pop(&headers_llist));
1298                         /* a bit like index_in_substrings but don't match full key */
1299                         bit = 1;
1300                         words = wget_user_headers;
1301                         while (*words) {
1302                                 if (strstr(hdr, words) == hdr) {
1303                                         G.user_headers |= bit;
1304                                         break;
1305                                 }
1306                                 bit <<= 1;
1307                                 words += strlen(words) + 1;
1308                         }
1309                         hdr += size;
1310                 }
1311         }
1312 #endif
1313
1314         G.output_fd = -1;
1315         G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1316         if (G.fname_out) { /* -O FILE ? */
1317                 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1318                         G.output_fd = 1;
1319                         option_mask32 &= ~WGET_OPT_CONTINUE;
1320                 }
1321                 /* compat with wget: -O FILE can overwrite */
1322                 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1323         }
1324
1325         while (*argv)
1326                 download_one_url(*argv++);
1327
1328         if (G.output_fd >= 0)
1329                 xclose(G.output_fd);
1330
1331 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1332         free(G.extra_headers);
1333 #endif
1334         FINI_G();
1335
1336         return EXIT_SUCCESS;
1337 }