X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=networking%2Fwget.c;h=e47c9a51b5bfe913d61f9f49a4b70c7a83fa518a;hb=5b3d2eb327ce7e1c55c6c94bc70782f733d59990;hp=ad2bfa4b5a3f5f8e71a0e17c458989b4b45a0da5;hpb=28556b95febc77ebdd2d2f2e0be4a0157dcaf735;p=oweals%2Fbusybox.git diff --git a/networking/wget.c b/networking/wget.c index ad2bfa4b5..e47c9a51b 100644 --- a/networking/wget.c +++ b/networking/wget.c @@ -8,21 +8,208 @@ * Copyright (C) 2010 Bradley M. Kuhn * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2. */ + +//config:config WGET +//config: bool "wget (35 kb)" +//config: default y +//config: help +//config: wget is a utility for non-interactive download of files from HTTP +//config: and FTP servers. +//config: +//config:config FEATURE_WGET_LONG_OPTIONS +//config: bool "Enable long options" +//config: default y +//config: depends on WGET && LONG_OPTS +//config: +//config:config FEATURE_WGET_STATUSBAR +//config: bool "Enable progress bar (+2k)" +//config: default y +//config: depends on WGET +//config: +//config:config FEATURE_WGET_AUTHENTICATION +//config: bool "Enable HTTP authentication" +//config: default y +//config: depends on WGET +//config: help +//config: Support authenticated HTTP transfers. +//config: +//config:config FEATURE_WGET_TIMEOUT +//config: bool "Enable timeout option -T SEC" +//config: default y +//config: depends on WGET +//config: help +//config: Supports network read and connect timeouts for wget, +//config: so that wget will give up and timeout, through the -T +//config: command line option. +//config: +//config: Currently only connect and network data read timeout are +//config: supported (i.e., timeout is not applied to the DNS query). When +//config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option +//config: will work in addition to -T. +//config: +//config:config FEATURE_WGET_HTTPS +//config: bool "Support HTTPS using internal TLS code" +//config: default y +//config: depends on WGET +//config: select TLS +//config: help +//config: wget will use internal TLS code to connect to https:// URLs. +//config: Note: +//config: On NOMMU machines, ssl_helper applet should be available +//config: in the $PATH for this to work. Make sure to select that applet. +//config: +//config: Note: currently, TLS code only makes TLS I/O work, it +//config: does *not* check that the peer is who it claims to be, etc. +//config: IOW: it uses peer-supplied public keys to establish encryption +//config: and signing keys, then encrypts and signs outgoing data and +//config: decrypts incoming data. +//config: It does not check signature hashes on the incoming data: +//config: this means that attackers manipulating TCP packets can +//config: send altered data and we unknowingly receive garbage. +//config: (This check might be relatively easy to add). +//config: It does not check public key's certificate: +//config: this means that the peer may be an attacker impersonating +//config: the server we think we are talking to. +//config: +//config: If you think this is unacceptable, consider this. As more and more +//config: servers switch to HTTPS-only operation, without such "crippled" +//config: TLS code it is *impossible* to simply download a kernel source +//config: from kernel.org. Which can in real world translate into +//config: "my small automatic tooling to build cross-compilers from sources +//config: no longer works, I need to additionally keep a local copy +//config: of ~4 megabyte source tarball of a SSL library and ~2 megabyte +//config: source of wget, need to compile and built both before I can +//config: download anything. All this despite the fact that the build +//config: is done in a QEMU sandbox on a machine with absolutely nothing +//config: worth stealing, so I don't care if someone would go to a lot +//config: of trouble to intercept my HTTPS download to send me an altered +//config: kernel tarball". +//config: +//config: If you still think this is unacceptable, send patches. +//config: +//config: If you still think this is unacceptable, do not want to send +//config: patches, but do want to waste bandwidth expaining how wrong +//config: it is, you will be ignored. +//config: +//config:config FEATURE_WGET_OPENSSL +//config: bool "Try to connect to HTTPS using openssl" +//config: default y +//config: depends on WGET +//config: help +//config: Try to use openssl to handle HTTPS. +//config: +//config: OpenSSL has a simple SSL client for debug purposes. +//config: If you select this option, wget will effectively run: +//config: "openssl s_client -quiet -connect hostname:443 +//config: -servername hostname 2>/dev/null" and pipe its data +//config: through it. -servername is not used if hostname is numeric. +//config: Note inconvenient API: host resolution is done twice, +//config: and there is no guarantee openssl's idea of IPv6 address +//config: format is the same as ours. +//config: Another problem is that s_client prints debug information +//config: to stderr, and it needs to be suppressed. This means +//config: all error messages get suppressed too. +//config: openssl is also a big binary, often dynamically linked +//config: against ~15 libraries. +//config: +//config: If openssl can't be executed, internal TLS code will be used +//config: (if you enabled it); if openssl can be executed but fails later, +//config: wget can't detect this, and download will fail. + +//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP)) + +//kbuild:lib-$(CONFIG_WGET) += wget.o + +//usage:#define wget_trivial_usage +//usage: IF_FEATURE_WGET_LONG_OPTIONS( +//usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n" +//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n" +/* Since we ignore these opts, we don't show them in --help */ +/* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */ +/* //usage: " [-nv] [-nc] [-nH] [-np]" */ +//usage: " [-S|--server-response] [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..." +//usage: ) +//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS( +//usage: "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-S] [-U AGENT]" +//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..." +//usage: ) +//usage:#define wget_full_usage "\n\n" +//usage: "Retrieve files via HTTP or FTP\n" +//usage: IF_FEATURE_WGET_LONG_OPTIONS( +//usage: "\n --spider Only check URL existence: $? is 0 if exists" +//usage: ) +//usage: "\n -c Continue retrieval of aborted transfer" +//usage: "\n -q Quiet" +//usage: "\n -P DIR Save to DIR (default .)" +//usage: "\n -S Show server response" +//usage: IF_FEATURE_WGET_TIMEOUT( +//usage: "\n -T SEC Network read timeout is SEC seconds" +//usage: ) +//usage: "\n -O FILE Save to FILE ('-' for stdout)" +//usage: "\n -U STR Use STR for User-Agent header" +//usage: "\n -Y on/off Use proxy" + #include "libbb.h" -//#define log_io(...) bb_error_msg(__VA_ARGS__) -#define log_io(...) ((void)0) +#if 0 +# define log_io(...) bb_error_msg(__VA_ARGS__) +# define SENDFMT(fp, fmt, ...) \ + do { \ + log_io("> " fmt, ##__VA_ARGS__); \ + fprintf(fp, fmt, ##__VA_ARGS__); \ + } while (0); +#else +# define log_io(...) ((void)0) +# define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__) +#endif + +#define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_HTTPS) struct host_info { char *allocated; const char *path; - const char *user; + char *user; + const char *protocol; char *host; int port; - smallint is_ftp; }; +static const char P_FTP[] ALIGN1 = "ftp"; +static const char P_HTTP[] ALIGN1 = "http"; +#if SSL_SUPPORTED +static const char P_HTTPS[] ALIGN1 = "https"; +#endif +#if ENABLE_FEATURE_WGET_LONG_OPTIONS +/* User-specified headers prevent using our corresponding built-in headers. */ +enum { + HDR_HOST = (1<<0), + HDR_USER_AGENT = (1<<1), + HDR_RANGE = (1<<2), + HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION, + HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION, +}; +static const char wget_user_headers[] ALIGN1 = + "Host:\0" + "User-Agent:\0" + "Range:\0" +# if ENABLE_FEATURE_WGET_AUTHENTICATION + "Authorization:\0" + "Proxy-Authorization:\0" +# endif + ; +# define USR_HEADER_HOST (G.user_headers & HDR_HOST) +# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT) +# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE) +# define USR_HEADER_AUTH (G.user_headers & HDR_AUTH) +# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH) +#else /* No long options, no user-headers :( */ +# define USR_HEADER_HOST 0 +# define USR_HEADER_USER_AGENT 0 +# define USR_HEADER_RANGE 0 +# define USR_HEADER_AUTH 0 +# define USR_HEADER_PROXY_AUTH 0 +#endif /* Globals */ struct globals { @@ -33,16 +220,18 @@ struct globals { const char *curfile; /* Name of current file being transferred */ bb_progress_t pmt; #endif - char *dir_prefix; + char *dir_prefix; #if ENABLE_FEATURE_WGET_LONG_OPTIONS - char *post_data; - char *extra_headers; + char *post_data; + char *extra_headers; + unsigned char user_headers; /* Headers mentioned by the user */ #endif - char *fname_out; /* where to direct output (-O) */ - const char *proxy_flag; /* Use proxies if env vars are set */ - const char *user_agent; /* "User-Agent" header field */ + char *fname_out; /* where to direct output (-O) */ + const char *proxy_flag; /* Use proxies if env vars are set */ + const char *user_agent; /* "User-Agent" header field */ #if ENABLE_FEATURE_WGET_TIMEOUT unsigned timeout_seconds; + bool die_if_timed_out; #endif int output_fd; int o_flags; @@ -57,25 +246,28 @@ struct globals { } FIX_ALIASING; #define G (*ptr_to_globals) #define INIT_G() do { \ - SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \ - IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \ + SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \ +} while (0) +#define FINI_G() do { \ + FREE_PTR_TO_GLOBALS(); \ } while (0) /* Must match option string! */ enum { WGET_OPT_CONTINUE = (1 << 0), - WGET_OPT_SPIDER = (1 << 1), - WGET_OPT_QUIET = (1 << 2), + WGET_OPT_QUIET = (1 << 1), + WGET_OPT_SERVER_RESPONSE = (1 << 2), WGET_OPT_OUTNAME = (1 << 3), WGET_OPT_PREFIX = (1 << 4), WGET_OPT_PROXY = (1 << 5), WGET_OPT_USER_AGENT = (1 << 6), WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7), WGET_OPT_RETRIES = (1 << 8), - WGET_OPT_PASSIVE = (1 << 9), + WGET_OPT_nsomething = (1 << 9), WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS, WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS, + WGET_OPT_SPIDER = (1 << 12) * ENABLE_FEATURE_WGET_LONG_OPTIONS, }; enum { @@ -166,27 +358,78 @@ static char* sanitize_string(char *s) return s; } +#if ENABLE_FEATURE_WGET_TIMEOUT +static void alarm_handler(int sig UNUSED_PARAM) +{ + /* This is theoretically unsafe (uses stdio and malloc in signal handler) */ + if (G.die_if_timed_out) + bb_error_msg_and_die("download timed out"); +} +static void set_alarm(void) +{ + if (G.timeout_seconds) { + alarm(G.timeout_seconds); + G.die_if_timed_out = 1; + } +} +# define clear_alarm() ((void)(G.die_if_timed_out = 0)) +#else +# define set_alarm() ((void)0) +# define clear_alarm() ((void)0) +#endif + +#if ENABLE_FEATURE_WGET_OPENSSL +/* + * is_ip_address() attempts to verify whether or not a string + * contains an IPv4 or IPv6 address (vs. an FQDN). The result + * of inet_pton() can be used to determine this. + * + * TODO add proper error checking when inet_pton() returns -1 + * (some form of system error has occurred, and errno is set) + */ +static int is_ip_address(const char *string) +{ + struct sockaddr_in sa; + + int result = inet_pton(AF_INET, string, &(sa.sin_addr)); +# if ENABLE_FEATURE_IPV6 + if (result == 0) { + struct sockaddr_in6 sa6; + result = inet_pton(AF_INET6, string, &(sa6.sin6_addr)); + } +# endif + return (result == 1); +} +#endif + static FILE *open_socket(len_and_sockaddr *lsa) { + int fd; FILE *fp; + set_alarm(); + fd = xconnect_stream(lsa); + clear_alarm(); + /* glibc 2.4 seems to try seeking on it - ??! */ /* hopefully it understands what ESPIPE means... */ - fp = fdopen(xconnect_stream(lsa), "r+"); - if (fp == NULL) + fp = fdopen(fd, "r+"); + if (!fp) bb_perror_msg_and_die(bb_msg_memory_exhausted); return fp; } /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */ -static char fgets_and_trim(FILE *fp) +static char fgets_and_trim(FILE *fp, const char *fmt) { char c; char *buf_ptr; + set_alarm(); if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL) bb_perror_msg_and_die("error getting response"); + clear_alarm(); buf_ptr = strchrnul(G.wget_buf, '\n'); c = *buf_ptr; @@ -196,6 +439,9 @@ static char fgets_and_trim(FILE *fp) log_io("< %s", G.wget_buf); + if (fmt && (option_mask32 & WGET_OPT_SERVER_RESPONSE)) + fprintf(stderr, fmt, G.wget_buf); + return c; } @@ -206,12 +452,15 @@ static int ftpcmd(const char *s1, const char *s2, FILE *fp) if (!s2) s2 = ""; fprintf(fp, "%s%s\r\n", s1, s2); + /* With --server-response, wget also shows its ftp commands */ + if (option_mask32 & WGET_OPT_SERVER_RESPONSE) + fprintf(stderr, "--> %s%s\n\n", s1, s2); fflush(fp); log_io("> %s%s", s1, s2); } do { - fgets_and_trim(fp); + fgets_and_trim(fp, "%s\n"); } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' '); G.wget_buf[3] = '\0'; @@ -227,20 +476,37 @@ static void parse_url(const char *src_url, struct host_info *h) free(h->allocated); h->allocated = url = xstrdup(src_url); - if (strncmp(url, "http://", 7) == 0) { - h->port = bb_lookup_port("http", "tcp", 80); - h->host = url + 7; - h->is_ftp = 0; - } else if (strncmp(url, "ftp://", 6) == 0) { - h->port = bb_lookup_port("ftp", "tcp", 21); - h->host = url + 6; - h->is_ftp = 1; - } else - bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url)); + h->protocol = P_FTP; + p = strstr(url, "://"); + if (p) { + *p = '\0'; + h->host = p + 3; + if (strcmp(url, P_FTP) == 0) { + h->port = bb_lookup_port(P_FTP, "tcp", 21); + } else +#if SSL_SUPPORTED + if (strcmp(url, P_HTTPS) == 0) { + h->port = bb_lookup_port(P_HTTPS, "tcp", 443); + h->protocol = P_HTTPS; + } else +#endif + if (strcmp(url, P_HTTP) == 0) { + http: + h->port = bb_lookup_port(P_HTTP, "tcp", 80); + h->protocol = P_HTTP; + } else { + *p = ':'; + bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url)); + } + } else { + // GNU wget is user-friendly and falls back to http:// + h->host = url; + goto http; + } // FYI: // "Real" wget 'http://busybox.net?var=a/b' sends this request: - // 'GET /?var=a/b HTTP 1.0' + // 'GET /?var=a/b HTTP/1.0' // and saves 'index.html?var=a%2Fb' (we save 'b') // wget 'http://busybox.net?login=john@doe': // request: 'GET /?login=john@doe HTTP/1.0' @@ -268,17 +534,21 @@ static void parse_url(const char *src_url, struct host_info *h) h->path = sp; } - // We used to set h->user to NULL here, but this interferes - // with handling of code 302 ("object was moved") - sp = strrchr(h->host, '@'); if (sp != NULL) { - h->user = h->host; + // URL-decode "user:password" string before base64-encoding: + // wget http://test:my%20pass@example.com should send + // Authorization: Basic dGVzdDpteSBwYXNz + // which decodes to "test:my pass". + // Standard wget and curl do this too. *sp = '\0'; + free(h->user); + h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0)); h->host = sp + 1; } - - sp = h->host; + /* else: h->user remains NULL, or as set by original request + * before redirect (if we are here after a redirect). + */ } static char *gethdr(FILE *fp) @@ -286,18 +556,24 @@ static char *gethdr(FILE *fp) char *s, *hdrval; int c; - /* *istrunc = 0; */ - /* retrieve header line */ - c = fgets_and_trim(fp); + c = fgets_and_trim(fp, " %s\n"); /* end of the headers? */ if (G.wget_buf[0] == '\0') return NULL; /* convert the header name to lower case */ - for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) { - /* tolower for "A-Z", no-op for "0-9a-z-." */ + for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) { + /* + * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges. + * 40-5f range ("@A-Z[\]^_") maps to 60-7f. + * "A-Z" maps to "a-z". + * "@[\]" can't occur in header names. + * "^_" maps to "~,DEL" (which is wrong). + * "^" was never seen yet, "_" was seen from web.archive.org + * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING). + */ *s |= 0x20; } @@ -318,43 +594,14 @@ static char *gethdr(FILE *fp) return hdrval; } -#if ENABLE_FEATURE_WGET_LONG_OPTIONS -static char *URL_escape(const char *str) +static void reset_beg_range_to_zero(void) { - /* URL encode, see RFC 2396 */ - char *dst; - char *res = dst = xmalloc(strlen(str) * 3 + 1); - unsigned char c; - - while (1) { - c = *str++; - if (c == '\0' - /* || strchr("!&'()*-.=_~", c) - more code */ - || c == '!' - || c == '&' - || c == '\'' - || c == '(' - || c == ')' - || c == '*' - || c == '-' - || c == '.' - || c == '=' - || c == '_' - || c == '~' - || (c >= '0' && c <= '9') - || ((c|0x20) >= 'a' && (c|0x20) <= 'z') - ) { - *dst++ = c; - if (c == '\0') - return res; - } else { - *dst++ = '%'; - *dst++ = bb_hexdigits_upcase[c >> 4]; - *dst++ = bb_hexdigits_upcase[c & 0xf]; - } - } + bb_error_msg("restart failed"); + G.beg_range = 0; + xlseek(G.output_fd, 0, SEEK_SET); + /* Done at the end instead: */ + /* ftruncate(G.output_fd, 0); */ } -#endif static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa) { @@ -419,14 +666,16 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ str = strrchr(G.wget_buf, ','); if (!str) goto pasv_error; port += xatou_range(str+1, 0, 255) * 256; - set_nport(lsa, htons(port)); + set_nport(&lsa->u.sa, htons(port)); *dfpp = open_socket(lsa); - if (G.beg_range) { + if (G.beg_range != 0) { sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range); if (ftpcmd(G.wget_buf, NULL, sfp) == 350) G.content_len -= G.beg_range; + else + reset_beg_range_to_zero(); } if (ftpcmd("RETR ", target->path, sfp) > 150) @@ -435,11 +684,136 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ return sfp; } +#if ENABLE_FEATURE_WGET_OPENSSL +static int spawn_https_helper_openssl(const char *host, unsigned port) +{ + char *allocated = NULL; + char *servername; + int sp[2]; + int pid; + IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;) + + if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0) + /* Kernel can have AF_UNIX support disabled */ + bb_perror_msg_and_die("socketpair"); + + if (!strchr(host, ':')) + host = allocated = xasprintf("%s:%u", host, port); + servername = xstrdup(host); + strrchr(servername, ':')[0] = '\0'; + + fflush_all(); + pid = xvfork(); + if (pid == 0) { + /* Child */ + char *argv[8]; + + close(sp[0]); + xmove_fd(sp[1], 0); + xdup2(0, 1); + /* + * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null + * It prints some debug stuff on stderr, don't know how to suppress it. + * Work around by dev-nulling stderr. We lose all error messages :( + */ + xmove_fd(2, 3); + xopen("/dev/null", O_RDWR); + memset(&argv, 0, sizeof(argv)); + argv[0] = (char*)"openssl"; + argv[1] = (char*)"s_client"; + argv[2] = (char*)"-quiet"; + argv[3] = (char*)"-connect"; + argv[4] = (char*)host; + /* + * Per RFC 6066 Section 3, the only permitted values in the + * TLS server_name (SNI) field are FQDNs (DNS hostnames). + * IPv4 and IPv6 addresses, port numbers are not allowed. + */ + if (!is_ip_address(servername)) { + argv[5] = (char*)"-servername"; + argv[6] = (char*)servername; + } + + BB_EXECVP(argv[0], argv); + xmove_fd(3, 2); +# if ENABLE_FEATURE_WGET_HTTPS + child_failed = 1; + xfunc_die(); +# else + bb_perror_msg_and_die("can't execute '%s'", argv[0]); +# endif + /* notreached */ + } + + /* Parent */ + free(servername); + free(allocated); + close(sp[1]); +# if ENABLE_FEATURE_WGET_HTTPS + if (child_failed) { + close(sp[0]); + return -1; + } +# endif + return sp[0]; +} +#endif + +#if ENABLE_FEATURE_WGET_HTTPS +static void spawn_ssl_client(const char *host, int network_fd) +{ + int sp[2]; + int pid; + char *servername, *p; + + servername = xstrdup(host); + p = strrchr(servername, ':'); + if (p) *p = '\0'; + + if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0) + /* Kernel can have AF_UNIX support disabled */ + bb_perror_msg_and_die("socketpair"); + + fflush_all(); + pid = BB_MMU ? xfork() : xvfork(); + if (pid == 0) { + /* Child */ + close(sp[0]); + xmove_fd(sp[1], 0); + xdup2(0, 1); + if (BB_MMU) { + tls_state_t *tls = new_tls_state(); + tls->ifd = tls->ofd = network_fd; + tls_handshake(tls, servername); + tls_run_copy_loop(tls); + exit(0); + } else { + char *argv[5]; + xmove_fd(network_fd, 3); + argv[0] = (char*)"ssl_client"; + argv[1] = (char*)"-s3"; + //TODO: if (!is_ip_address(servername))... + argv[2] = (char*)"-n"; + argv[3] = servername; + argv[4] = NULL; + BB_EXECVP(argv[0], argv); + bb_perror_msg_and_die("can't execute '%s'", argv[0]); + } + /* notreached */ + } + + /* Parent */ + free(servername); + close(sp[1]); + xmove_fd(sp[0], network_fd); +} +#endif + static void NOINLINE retrieve_file_data(FILE *dfp) { #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT # if ENABLE_FEATURE_WGET_TIMEOUT - unsigned second_cnt; + unsigned second_cnt = G.timeout_seconds; # endif struct pollfd polldata; @@ -460,7 +834,7 @@ static void NOINLINE retrieve_file_data(FILE *dfp) * which messes up progress bar and/or timeout logic. * Because of nonblocking I/O, we need to dance * very carefully around EAGAIN. See explanation at - * clearerr() call. + * clearerr() calls. */ ndelay_on(polldata.fd); #endif @@ -468,32 +842,7 @@ static void NOINLINE retrieve_file_data(FILE *dfp) int n; unsigned rdsz; - rdsz = sizeof(G.wget_buf); - if (G.got_clen) { - if (G.content_len < (off_t)sizeof(G.wget_buf)) { - if ((int)G.content_len <= 0) - break; - rdsz = (unsigned)G.content_len; - } - } - #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT -# if ENABLE_FEATURE_WGET_TIMEOUT - second_cnt = G.timeout_seconds; -# endif - while (1) { - if (safe_poll(&polldata, 1, 1000) != 0) - break; /* error, EOF, or data is available */ -# if ENABLE_FEATURE_WGET_TIMEOUT - if (second_cnt != 0 && --second_cnt == 0) { - progress_meter(PROGRESS_END); - bb_error_msg_and_die("download timed out"); - } -# endif - /* Needed for "stalled" indicator */ - progress_meter(PROGRESS_BUMP); - } - /* fread internally uses read loop, which in our case * is usually exited when we get EAGAIN. * In this case, libc sets error marker on the stream. @@ -503,36 +852,72 @@ static void NOINLINE retrieve_file_data(FILE *dfp) * into if (n <= 0) ... */ clearerr(dfp); - errno = 0; #endif + errno = 0; + rdsz = sizeof(G.wget_buf); + if (G.got_clen) { + if (G.content_len < (off_t)sizeof(G.wget_buf)) { + if ((int)G.content_len <= 0) + break; + rdsz = (unsigned)G.content_len; + } + } n = fread(G.wget_buf, 1, rdsz, dfp); - /* man fread: + + if (n > 0) { + xwrite(G.output_fd, G.wget_buf, n); +#if ENABLE_FEATURE_WGET_STATUSBAR + G.transferred += n; +#endif + if (G.got_clen) { + G.content_len -= n; + if (G.content_len == 0) + break; + } +#if ENABLE_FEATURE_WGET_TIMEOUT + second_cnt = G.timeout_seconds; +#endif + goto bump; + } + + /* n <= 0. + * man fread: * If error occurs, or EOF is reached, the return value * is a short item count (or zero). * fread does not distinguish between EOF and error. */ - if (n <= 0) { -#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT - if (errno == EAGAIN) /* poll lied, there is no data? */ - continue; /* yes */ -#endif - if (ferror(dfp)) + if (errno != EAGAIN) { + if (ferror(dfp)) { + progress_meter(PROGRESS_END); bb_perror_msg_and_die(bb_msg_read_error); + } break; /* EOF, not error */ } - xwrite(G.output_fd, G.wget_buf, n); - -#if ENABLE_FEATURE_WGET_STATUSBAR - G.transferred += n; - progress_meter(PROGRESS_BUMP); -#endif - if (G.got_clen) { - G.content_len -= n; - if (G.content_len == 0) - break; +#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT + /* It was EAGAIN. There is no data. Wait up to one second + * then abort if timed out, or update the bar and try reading again. + */ + if (safe_poll(&polldata, 1, 1000) == 0) { +# if ENABLE_FEATURE_WGET_TIMEOUT + if (second_cnt != 0 && --second_cnt == 0) { + progress_meter(PROGRESS_END); + bb_error_msg_and_die("download timed out"); + } +# endif + /* We used to loop back to poll here, + * but there is no great harm in letting fread + * to try reading anyway. + */ } - } +#endif + bump: + /* Need to do it _every_ second for "stalled" indicator + * to be shown properly. + */ + progress_meter(PROGRESS_BUMP); + } /* while (reading data) */ + #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT clearerr(dfp); ndelay_off(polldata.fd); /* else fgets can get very unhappy */ @@ -540,14 +925,32 @@ static void NOINLINE retrieve_file_data(FILE *dfp) if (!G.chunked) break; - fgets_and_trim(dfp); /* Eat empty line */ + fgets_and_trim(dfp, NULL); /* Eat empty line */ get_clen: - fgets_and_trim(dfp); + fgets_and_trim(dfp, NULL); G.content_len = STRTOOFF(G.wget_buf, NULL, 16); /* FIXME: error check? */ if (G.content_len == 0) break; /* all done! */ G.got_clen = 1; + /* + * Note that fgets may result in some data being buffered in dfp. + * We loop back to fread, which will retrieve this data. + * Also note that code has to be arranged so that fread + * is done _before_ one-second poll wait - poll doesn't know + * about stdio buffering and can result in spurious one second waits! + */ + } + + /* If -c failed, we restart from the beginning, + * but we do not truncate file then, we do it only now, at the end. + * This lets user to ^C if his 99% complete 10 GB file download + * failed to restart *without* losing the almost complete file. + */ + { + off_t pos = lseek(G.output_fd, 0, SEEK_CUR); + if (pos != (off_t)-1) + ftruncate(G.output_fd, pos); } /* Draw full bar and free its resources */ @@ -556,7 +959,7 @@ static void NOINLINE retrieve_file_data(FILE *dfp) progress_meter(PROGRESS_END); } -static int download_one_url(const char *url) +static void download_one_url(const char *url) { bool use_proxy; /* Use proxies if env vars are set */ int redir_limit; @@ -565,6 +968,7 @@ static int download_one_url(const char *url) FILE *dfp; /* socket to ftp server (data) */ char *proxy = NULL; char *fname_out_alloc; + char *redirected_path = NULL; struct host_info server; struct host_info target; @@ -578,7 +982,8 @@ static int download_one_url(const char *url) /* Use the proxy if necessary */ use_proxy = (strcmp(G.proxy_flag, "off") != 0); if (use_proxy) { - proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy"); + proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy"); +//FIXME: what if protocol is https? Ok to use http_proxy? use_proxy = (proxy && proxy[0]); if (use_proxy) parse_url(proxy, &server); @@ -606,6 +1011,10 @@ static int download_one_url(const char *url) /* -P DIR is considered only if there was no -O FILE */ if (G.dir_prefix) G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out); + else { + /* redirects may free target.path later, need to make a copy */ + G.fname_out = fname_out_alloc = xstrdup(G.fname_out); + } } #if ENABLE_FEATURE_WGET_STATUSBAR G.curfile = bb_get_last_path_component_nostrip(G.fname_out); @@ -634,78 +1043,121 @@ static int download_one_url(const char *url) /*G.content_len = 0; - redundant, got_clen = 0 is enough */ G.got_clen = 0; G.chunked = 0; - if (use_proxy || !target.is_ftp) { + if (use_proxy || target.protocol != P_FTP) { /* * HTTP session */ char *str; int status; - - /* Open socket to http server */ + /* Open socket to http(s) server */ +#if ENABLE_FEATURE_WGET_OPENSSL + /* openssl (and maybe internal TLS) support is configured */ + if (target.protocol == P_HTTPS) { + /* openssl-based helper + * Inconvenient API since we can't give it an open fd + */ + int fd = spawn_https_helper_openssl(server.host, server.port); +# if ENABLE_FEATURE_WGET_HTTPS + if (fd < 0) { /* no openssl? try internal */ + sfp = open_socket(lsa); + spawn_ssl_client(server.host, fileno(sfp)); + goto socket_opened; + } +# else + /* We don't check for exec("openssl") failure in this case */ +# endif + sfp = fdopen(fd, "r+"); + if (!sfp) + bb_perror_msg_and_die(bb_msg_memory_exhausted); + goto socket_opened; + } sfp = open_socket(lsa); - + socket_opened: +#elif ENABLE_FEATURE_WGET_HTTPS + /* Only internal TLS support is configured */ + sfp = open_socket(lsa); + if (target.protocol == P_HTTPS) + spawn_ssl_client(server.host, fileno(sfp)); +#else + /* ssl (https) support is not configured */ + sfp = open_socket(lsa); +#endif /* Send HTTP request */ if (use_proxy) { - fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n", - target.is_ftp ? "f" : "ht", target.host, + SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n", + target.protocol, target.host, target.path); } else { - if (option_mask32 & WGET_OPT_POST_DATA) - fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path); - else - fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path); + SENDFMT(sfp, "%s /%s HTTP/1.1\r\n", + (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET", + target.path); } - - fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n", - target.host, G.user_agent); + if (!USR_HEADER_HOST) + SENDFMT(sfp, "Host: %s\r\n", target.host); + if (!USR_HEADER_USER_AGENT) + SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent); /* Ask server to close the connection as soon as we are done * (IOW: we do not intend to send more requests) */ - fprintf(sfp, "Connection: close\r\n"); + SENDFMT(sfp, "Connection: close\r\n"); #if ENABLE_FEATURE_WGET_AUTHENTICATION - if (target.user) { - fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6, + if (target.user && !USR_HEADER_AUTH) { + SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6, base64enc(target.user)); } - if (use_proxy && server.user) { - fprintf(sfp, "Proxy-Authorization: Basic %s\r\n", + if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) { + SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n", base64enc(server.user)); } #endif - if (G.beg_range) - fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range); + if (G.beg_range != 0 && !USR_HEADER_RANGE) + SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range); #if ENABLE_FEATURE_WGET_LONG_OPTIONS - if (G.extra_headers) + if (G.extra_headers) { + log_io(G.extra_headers); fputs(G.extra_headers, sfp); + } if (option_mask32 & WGET_OPT_POST_DATA) { - char *estr = URL_escape(G.post_data); - fprintf(sfp, + SENDFMT(sfp, "Content-Type: application/x-www-form-urlencoded\r\n" "Content-Length: %u\r\n" "\r\n" "%s", - (int) strlen(estr), estr + (int) strlen(G.post_data), G.post_data ); - free(estr); } else #endif { - fprintf(sfp, "\r\n"); + SENDFMT(sfp, "\r\n"); } fflush(sfp); +/* Tried doing this unconditionally. + * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS. + */ +#if SSL_SUPPORTED + if (target.protocol == P_HTTPS) { + /* If we use SSL helper, keeping our end of the socket open for writing + * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF) + * even after child closes its copy of the fd. + * This helps: + */ + shutdown(fileno(sfp), SHUT_WR); + } +#endif + /* * Retrieve HTTP response line and check for "200" status code. */ read_response: - fgets_and_trim(sfp); + fgets_and_trim(sfp, " %s\n"); str = G.wget_buf; str = skip_non_whitespace(str); @@ -719,7 +1171,21 @@ static int download_one_url(const char *url) while (gethdr(sfp) != NULL) /* eat all remaining headers */; goto read_response; + + /* Success responses */ case 200: + /* fall through */ + case 201: /* 201 Created */ +/* "The request has been fulfilled and resulted in a new resource being created" */ + /* Standard wget is reported to treat this as success */ + /* fall through */ + case 202: /* 202 Accepted */ +/* "The request has been accepted for processing, but the processing has not been completed" */ + /* Treat as success: fall through */ + case 203: /* 203 Non-Authoritative Information */ +/* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */ + /* fall through */ + case 204: /* 204 No Content */ /* Response 204 doesn't say "null file", it says "metadata has changed but data didn't": @@ -744,16 +1210,26 @@ is always terminated by the first empty line after the header fields." However, in real world it was observed that some web servers (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero. */ - case 204: + if (G.beg_range != 0) { + /* "Range:..." was not honored by the server. + * Restart download from the beginning. + */ + reset_beg_range_to_zero(); + } break; + /* 205 Reset Content ?? what to do on this ?? */ + case 300: /* redirection */ case 301: case 302: case 303: break; - case 206: - if (G.beg_range) + + case 206: /* Partial Content */ + if (G.beg_range != 0) + /* "Range:..." worked. Good. */ break; + /* Partial Content even though we did not ask for it??? */ /* fall through */ default: bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf)); @@ -797,12 +1273,13 @@ However, in real world it was observed that some web servers bb_error_msg_and_die("too many redirections"); fclose(sfp); if (str[0] == '/') { - free(target.allocated); - target.path = target.allocated = xstrdup(str+1); + free(redirected_path); + target.path = redirected_path = xstrdup(str+1); /* lsa stays the same: it's on the same server */ } else { parse_url(str, &target); if (!use_proxy) { + /* server.user remains untouched */ free(server.allocated); server.allocated = NULL; server.host = target.host; @@ -821,7 +1298,6 @@ However, in real world it was observed that some web servers /* For HTTP, data is pumped over the same connection */ dfp = sfp; - } else { /* * FTP session @@ -852,9 +1328,10 @@ However, in real world it was observed that some web servers free(server.allocated); free(target.allocated); + free(server.user); + free(target.user); free(fname_out_alloc); - - return EXIT_SUCCESS; + free(redirected_path); } int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; @@ -864,64 +1341,107 @@ int wget_main(int argc UNUSED_PARAM, char **argv) static const char wget_longopts[] ALIGN1 = /* name, has_arg, val */ "continue\0" No_argument "c" -//FIXME: -s isn't --spider, it's --save-headers! - "spider\0" No_argument "s" "quiet\0" No_argument "q" + "server-response\0" No_argument "S" "output-document\0" Required_argument "O" "directory-prefix\0" Required_argument "P" "proxy\0" Required_argument "Y" "user-agent\0" Required_argument "U" -#if ENABLE_FEATURE_WGET_TIMEOUT - "timeout\0" Required_argument "T" -#endif +IF_FEATURE_WGET_TIMEOUT( + "timeout\0" Required_argument "T") /* Ignored: */ - // "tries\0" Required_argument "t" +IF_DESKTOP( "tries\0" Required_argument "t") + "header\0" Required_argument "\xff" + "post-data\0" Required_argument "\xfe" + "spider\0" No_argument "\xfd" /* Ignored (we always use PASV): */ - "passive-ftp\0" No_argument "\xff" - "header\0" Required_argument "\xfe" - "post-data\0" Required_argument "\xfd" +IF_DESKTOP( "passive-ftp\0" No_argument "\xf0") /* Ignored (we don't do ssl) */ - "no-check-certificate\0" No_argument "\xfc" +IF_DESKTOP( "no-check-certificate\0" No_argument "\xf0") + /* Ignored (we don't support caching) */ +IF_DESKTOP( "no-cache\0" No_argument "\xf0") +IF_DESKTOP( "no-verbose\0" No_argument "\xf0") +IF_DESKTOP( "no-clobber\0" No_argument "\xf0") +IF_DESKTOP( "no-host-directories\0" No_argument "\xf0") +IF_DESKTOP( "no-parent\0" No_argument "\xf0") ; #endif - int exitcode; #if ENABLE_FEATURE_WGET_LONG_OPTIONS llist_t *headers_llist = NULL; #endif INIT_G(); - IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) +#if ENABLE_FEATURE_WGET_TIMEOUT + G.timeout_seconds = 900; + signal(SIGALRM, alarm_handler); +#endif G.proxy_flag = "on"; /* use proxies if env vars are set */ G.user_agent = "Wget"; /* "User-Agent" header field */ #if ENABLE_FEATURE_WGET_LONG_OPTIONS applet_long_options = wget_longopts; #endif - opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::"); - getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:", - &G.fname_out, &G.dir_prefix, + opt_complementary = "-1" /* at least one URL */ + IF_FEATURE_WGET_LONG_OPTIONS(":\xff::"); /* --header is a list */ + getopt32(argv, "cqSO:P:Y:U:T:+" + /*ignored:*/ "t:" + /*ignored:*/ "n::" + /* wget has exactly four -n opts, all of which we can ignore: + * -nv --no-verbose: be moderately quiet (-q is full quiet) + * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE + * -nH --no-host-directories: wget -r http://host/ won't create host/ + * -np --no-parent + * "n::" above says that we accept -n[ARG]. + * Specifying "n:" would be a bug: "-n ARG" would eat ARG! + */ + , &G.fname_out, &G.dir_prefix, &G.proxy_flag, &G.user_agent, IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL), - NULL /* -t RETRIES */ + NULL, /* -t RETRIES */ + NULL /* -n[ARG] */ IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist) IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data) ); +#if 0 /* option bits debug */ + if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM"); + if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething"); + if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header"); + if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data"); + if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider"); + exit(0); +#endif argv += optind; #if ENABLE_FEATURE_WGET_LONG_OPTIONS if (headers_llist) { - int size = 1; - char *cp; + int size = 0; + char *hdr; llist_t *ll = headers_llist; while (ll) { size += strlen(ll->data) + 2; ll = ll->link; } - G.extra_headers = cp = xmalloc(size); + G.extra_headers = hdr = xmalloc(size + 1); while (headers_llist) { - cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist)); + int bit; + const char *words; + + size = sprintf(hdr, "%s\r\n", + (char*)llist_pop(&headers_llist)); + /* a bit like index_in_substrings but don't match full key */ + bit = 1; + words = wget_user_headers; + while (*words) { + if (strstr(hdr, words) == hdr) { + G.user_headers |= bit; + break; + } + bit <<= 1; + words += strlen(words) + 1; + } + hdr += size; } } #endif @@ -937,12 +1457,16 @@ int wget_main(int argc UNUSED_PARAM, char **argv) G.o_flags = O_WRONLY | O_CREAT | O_TRUNC; } - exitcode = 0; while (*argv) - exitcode |= download_one_url(*argv++); + download_one_url(*argv++); if (G.output_fd >= 0) xclose(G.output_fd); - return exitcode; +#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS + free(G.extra_headers); +#endif + FINI_G(); + + return EXIT_SUCCESS; }