X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=networking%2Fwget.c;h=b082a0f591103dc8d16715bac53364dd732f05b3;hb=2b1559056cf32c42675ecd937796e1455bcb5c2c;hp=6c8bd90a8fe9262bce151d1a21eba937aa324de4;hpb=d7bfee1edb551d3503c7b616702a73192ba069a9;p=oweals%2Fbusybox.git diff --git a/networking/wget.c b/networking/wget.c index 6c8bd90a8..b082a0f59 100644 --- a/networking/wget.c +++ b/networking/wget.c @@ -9,30 +9,122 @@ * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2. */ +//config:config WGET +//config: bool "wget" +//config: default y +//config: help +//config: wget is a utility for non-interactive download of files from HTTP +//config: and FTP servers. +//config: +//config:config FEATURE_WGET_STATUSBAR +//config: bool "Enable a nifty process meter (+2k)" +//config: default y +//config: depends on WGET +//config: help +//config: Enable the transfer progress bar for wget transfers. +//config: +//config:config FEATURE_WGET_AUTHENTICATION +//config: bool "Enable HTTP authentication" +//config: default y +//config: depends on WGET +//config: help +//config: Support authenticated HTTP transfers. +//config: +//config:config FEATURE_WGET_LONG_OPTIONS +//config: bool "Enable long options" +//config: default y +//config: depends on WGET && LONG_OPTS +//config: help +//config: Support long options for the wget applet. +//config: +//config:config FEATURE_WGET_TIMEOUT +//config: bool "Enable timeout option -T SEC" +//config: default y +//config: depends on WGET +//config: help +//config: Supports network read and connect timeouts for wget, +//config: so that wget will give up and timeout, through the -T +//config: command line option. +//config: +//config: Currently only connect and network data read timeout are +//config: supported (i.e., timeout is not applied to the DNS query). When +//config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option +//config: will work in addition to -T. +//config: +//config:config FEATURE_WGET_OPENSSL +//config: bool "Try to connect to HTTPS using openssl" +//config: default y +//config: depends on WGET +//config: help +//config: Choose how wget establishes SSL connection for https:// URLs. +//config: +//config: Busybox itself contains no SSL code. wget will spawn +//config: a helper program to talk over HTTPS. +//config: +//config: OpenSSL has a simple SSL client for debug purposes. +//config: If you select "openssl" helper, wget will effectively run: +//config: "openssl s_client -quiet -connect hostname:443 +//config: -servername hostname 2>/dev/null" and pipe its data +//config: through it. -servername is not used if hostname is numeric. +//config: Note inconvenient API: host resolution is done twice, +//config: and there is no guarantee openssl's idea of IPv6 address +//config: format is the same as ours. +//config: Another problem is that s_client prints debug information +//config: to stderr, and it needs to be suppressed. This means +//config: all error messages get suppressed too. +//config: openssl is also a big binary, often dynamically linked +//config: against ~15 libraries. +//config: +//config:config FEATURE_WGET_SSL_HELPER +//config: bool "Try to connect to HTTPS using ssl_helper" +//config: default y +//config: depends on WGET +//config: help +//config: Choose how wget establishes SSL connection for https:// URLs. +//config: +//config: Busybox itself contains no SSL code. wget will spawn +//config: a helper program to talk over HTTPS. +//config: +//config: ssl_helper is a tool which can be built statically +//config: from busybox sources against a small embedded SSL library. +//config: Please see networking/ssl_helper/README. +//config: It does not require double host resolution and emits +//config: error messages to stderr. +//config: +//config: Precompiled static binary may be available at +//config: http://busybox.net/downloads/binaries/ + +//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP)) + +//kbuild:lib-$(CONFIG_WGET) += wget.o + //usage:#define wget_trivial_usage //usage: IF_FEATURE_WGET_LONG_OPTIONS( -//usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n" +//usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n" //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n" /* Since we ignore these opts, we don't show them in --help */ -/* //usage: " [--no-check-certificate] [--no-cache]" */ +/* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */ +/* //usage: " [-nv] [-nc] [-nH] [-np]" */ //usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..." //usage: ) //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS( -//usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]" +//usage: "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]" //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..." //usage: ) //usage:#define wget_full_usage "\n\n" //usage: "Retrieve files via HTTP or FTP\n" -//usage: "\n -s Spider mode - only check file existence" -//usage: "\n -c Continue retrieval of aborted transfer" -//usage: "\n -q Quiet" -//usage: "\n -P DIR Save to DIR (default .)" +//usage: IF_FEATURE_WGET_LONG_OPTIONS( +//usage: "\n --spider Spider mode - only check file existence" +//usage: ) +//usage: "\n -c Continue retrieval of aborted transfer" +//usage: "\n -q Quiet" +//usage: "\n -P DIR Save to DIR (default .)" //usage: IF_FEATURE_WGET_TIMEOUT( -//usage: "\n -T SEC Network read timeout is SEC seconds" +//usage: "\n -T SEC Network read timeout is SEC seconds" //usage: ) -//usage: "\n -O FILE Save to FILE ('-' for stdout)" -//usage: "\n -U STR Use STR for User-Agent header" -//usage: "\n -Y Use proxy ('on' or 'off')" +//usage: "\n -O FILE Save to FILE ('-' for stdout)" +//usage: "\n -U STR Use STR for User-Agent header" +//usage: "\n -Y on/off Use proxy" #include "libbb.h" @@ -57,9 +149,11 @@ struct host_info { char *host; int port; }; -static const char P_FTP[] = "ftp"; -static const char P_HTTP[] = "http"; -static const char P_HTTPS[] = "https"; +static const char P_FTP[] ALIGN1 = "ftp"; +static const char P_HTTP[] ALIGN1 = "http"; +#if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER +static const char P_HTTPS[] ALIGN1 = "https"; +#endif #if ENABLE_FEATURE_WGET_LONG_OPTIONS /* User-specified headers prevent using our corresponding built-in headers. */ @@ -112,7 +206,7 @@ struct globals { const char *user_agent; /* "User-Agent" header field */ #if ENABLE_FEATURE_WGET_TIMEOUT unsigned timeout_seconds; - bool connecting; + bool die_if_timed_out; #endif int output_fd; int o_flags; @@ -137,17 +231,17 @@ struct globals { /* Must match option string! */ enum { WGET_OPT_CONTINUE = (1 << 0), - WGET_OPT_SPIDER = (1 << 1), - WGET_OPT_QUIET = (1 << 2), - WGET_OPT_OUTNAME = (1 << 3), - WGET_OPT_PREFIX = (1 << 4), - WGET_OPT_PROXY = (1 << 5), - WGET_OPT_USER_AGENT = (1 << 6), - WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7), - WGET_OPT_RETRIES = (1 << 8), - WGET_OPT_PASSIVE = (1 << 9), - WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS, - WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS, + WGET_OPT_QUIET = (1 << 1), + WGET_OPT_OUTNAME = (1 << 2), + WGET_OPT_PREFIX = (1 << 3), + WGET_OPT_PROXY = (1 << 4), + WGET_OPT_USER_AGENT = (1 << 5), + WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 6), + WGET_OPT_RETRIES = (1 << 7), + WGET_OPT_nsomething = (1 << 8), + WGET_OPT_HEADER = (1 << 9) * ENABLE_FEATURE_WGET_LONG_OPTIONS, + WGET_OPT_POST_DATA = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS, + WGET_OPT_SPIDER = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS, }; enum { @@ -242,9 +336,44 @@ static char* sanitize_string(char *s) static void alarm_handler(int sig UNUSED_PARAM) { /* This is theoretically unsafe (uses stdio and malloc in signal handler) */ - if (G.connecting) + if (G.die_if_timed_out) bb_error_msg_and_die("download timed out"); } +static void set_alarm(void) +{ + if (G.timeout_seconds) { + alarm(G.timeout_seconds); + G.die_if_timed_out = 1; + } +} +# define clear_alarm() ((void)(G.die_if_timed_out = 0)) +#else +# define set_alarm() ((void)0) +# define clear_alarm() ((void)0) +#endif + +#if ENABLE_FEATURE_WGET_OPENSSL +/* + * is_ip_address() attempts to verify whether or not a string + * contains an IPv4 or IPv6 address (vs. an FQDN). The result + * of inet_pton() can be used to determine this. + * + * TODO add proper error checking when inet_pton() returns -1 + * (some form of system error has occurred, and errno is set) + */ +static int is_ip_address(const char *string) +{ + struct sockaddr_in sa; + + int result = inet_pton(AF_INET, string, &(sa.sin_addr)); +# if ENABLE_FEATURE_IPV6 + if (result == 0) { + struct sockaddr_in6 sa6; + result = inet_pton(AF_INET6, string, &(sa6.sin6_addr)); + } +# endif + return (result == 1); +} #endif static FILE *open_socket(len_and_sockaddr *lsa) @@ -252,9 +381,9 @@ static FILE *open_socket(len_and_sockaddr *lsa) int fd; FILE *fp; - IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;) + set_alarm(); fd = xconnect_stream(lsa); - IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;) + clear_alarm(); /* glibc 2.4 seems to try seeking on it - ??! */ /* hopefully it understands what ESPIPE means... */ @@ -266,14 +395,15 @@ static FILE *open_socket(len_and_sockaddr *lsa) } /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */ -/* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */ static char fgets_and_trim(FILE *fp) { char c; char *buf_ptr; + set_alarm(); if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL) bb_perror_msg_and_die("error getting response"); + clear_alarm(); buf_ptr = strchrnul(G.wget_buf, '\n'); c = *buf_ptr; @@ -322,10 +452,12 @@ static void parse_url(const char *src_url, struct host_info *h) if (strcmp(url, P_FTP) == 0) { h->port = bb_lookup_port(P_FTP, "tcp", 21); } else +#if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER if (strcmp(url, P_HTTPS) == 0) { h->port = bb_lookup_port(P_HTTPS, "tcp", 443); h->protocol = P_HTTPS; } else +#endif if (strcmp(url, P_HTTP) == 0) { http: h->port = bb_lookup_port(P_HTTP, "tcp", 80); @@ -520,11 +652,14 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_ return sfp; } -static int spawn_https_helper(const char *host, unsigned port) +#if ENABLE_FEATURE_WGET_OPENSSL +static int spawn_https_helper_openssl(const char *host, unsigned port) { char *allocated = NULL; + char *servername; int sp[2]; int pid; + IF_FEATURE_WGET_SSL_HELPER(volatile int child_failed = 0;) if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0) /* Kernel can have AF_UNIX support disabled */ @@ -532,19 +667,18 @@ static int spawn_https_helper(const char *host, unsigned port) if (!strchr(host, ':')) host = allocated = xasprintf("%s:%u", host, port); + servername = xstrdup(host); + strrchr(servername, ':')[0] = '\0'; - pid = BB_MMU ? xfork() : xvfork(); + fflush_all(); + pid = xvfork(); if (pid == 0) { /* Child */ - char *argv[6]; + char *argv[8]; close(sp[0]); xmove_fd(sp[1], 0); xdup2(0, 1); - /* - * TODO: develop a tiny ssl/tls helper (using matrixssl?), - * try to exec it here before falling back to big fat openssl. - */ /* * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null * It prints some debug stuff on stderr, don't know how to suppress it. @@ -552,29 +686,50 @@ static int spawn_https_helper(const char *host, unsigned port) */ xmove_fd(2, 3); xopen("/dev/null", O_RDWR); + memset(&argv, 0, sizeof(argv)); argv[0] = (char*)"openssl"; argv[1] = (char*)"s_client"; argv[2] = (char*)"-quiet"; argv[3] = (char*)"-connect"; argv[4] = (char*)host; - argv[5] = NULL; + /* + * Per RFC 6066 Section 3, the only permitted values in the + * TLS server_name (SNI) field are FQDNs (DNS hostnames). + * IPv4 and IPv6 addresses, port numbers are not allowed. + */ + if (!is_ip_address(servername)) { + argv[5] = (char*)"-servername"; + argv[6] = (char*)servername; + } + BB_EXECVP(argv[0], argv); xmove_fd(3, 2); +# if ENABLE_FEATURE_WGET_SSL_HELPER + child_failed = 1; + xfunc_die(); +# else bb_perror_msg_and_die("can't execute '%s'", argv[0]); +# endif /* notreached */ } /* Parent */ + free(servername); free(allocated); close(sp[1]); +# if ENABLE_FEATURE_WGET_SSL_HELPER + if (child_failed) { + close(sp[0]); + return -1; + } +# endif return sp[0]; } +#endif -/* See networking/ssl_helper/README */ -#define SSL_HELPER 0 - -#if SSL_HELPER -static void spawn_https_helper1(int network_fd) +/* See networking/ssl_helper/README how to build one */ +#if ENABLE_FEATURE_WGET_SSL_HELPER +static void spawn_https_helper_small(int network_fd) { int sp[2]; int pid; @@ -851,19 +1006,37 @@ static void download_one_url(const char *url) int status; /* Open socket to http(s) server */ +#if ENABLE_FEATURE_WGET_OPENSSL + /* openssl (and maybe ssl_helper) support is configured */ if (target.protocol == P_HTTPS) { -/* openssl-based helper - * Inconvenient API since we can't give it an open fd - */ - int fd = spawn_https_helper(server.host, server.port); + /* openssl-based helper + * Inconvenient API since we can't give it an open fd + */ + int fd = spawn_https_helper_openssl(server.host, server.port); +# if ENABLE_FEATURE_WGET_SSL_HELPER + if (fd < 0) { /* no openssl? try ssl_helper */ + sfp = open_socket(lsa); + spawn_https_helper_small(fileno(sfp)); + goto socket_opened; + } +# else + /* We don't check for exec("openssl") failure in this case */ +# endif sfp = fdopen(fd, "r+"); if (!sfp) bb_perror_msg_and_die(bb_msg_memory_exhausted); - } else - sfp = open_socket(lsa); -#if SSL_HELPER + goto socket_opened; + } + sfp = open_socket(lsa); + socket_opened: +#elif ENABLE_FEATURE_WGET_SSL_HELPER + /* Only ssl_helper support is configured */ + sfp = open_socket(lsa); if (target.protocol == P_HTTPS) - spawn_https_helper1(fileno(sfp)); + spawn_https_helper_small(fileno(sfp)); +#else + /* ssl (https) support is not configured */ + sfp = open_socket(lsa); #endif /* Send HTTP request */ if (use_proxy) { @@ -920,6 +1093,12 @@ static void download_one_url(const char *url) } fflush(sfp); + /* If we use SSL helper, keeping our end of the socket open for writing + * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF) + * even after child closes its copy of the fd. + * This helps: + */ + shutdown(fileno(sfp), SHUT_WR); /* * Retrieve HTTP response line and check for "200" status code. @@ -939,7 +1118,21 @@ static void download_one_url(const char *url) while (gethdr(sfp) != NULL) /* eat all remaining headers */; goto read_response; + + /* Success responses */ case 200: + /* fall through */ + case 201: /* 201 Created */ +/* "The request has been fulfilled and resulted in a new resource being created" */ + /* Standard wget is reported to treat this as success */ + /* fall through */ + case 202: /* 202 Accepted */ +/* "The request has been accepted for processing, but the processing has not been completed" */ + /* Treat as success: fall through */ + case 203: /* 203 Non-Authoritative Information */ +/* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */ + /* fall through */ + case 204: /* 204 No Content */ /* Response 204 doesn't say "null file", it says "metadata has changed but data didn't": @@ -964,7 +1157,6 @@ is always terminated by the first empty line after the header fields." However, in real world it was observed that some web servers (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero. */ - case 204: if (G.beg_range != 0) { /* "Range:..." was not honored by the server. * Restart download from the beginning. @@ -972,11 +1164,14 @@ However, in real world it was observed that some web servers reset_beg_range_to_zero(); } break; + /* 205 Reset Content ?? what to do on this ?? */ + case 300: /* redirection */ case 301: case 302: case 303: break; + case 206: /* Partial Content */ if (G.beg_range != 0) /* "Range:..." worked. Good. */ @@ -1050,7 +1245,6 @@ However, in real world it was observed that some web servers /* For HTTP, data is pumped over the same connection */ dfp = sfp; - } else { /* * FTP session @@ -1094,26 +1288,28 @@ int wget_main(int argc UNUSED_PARAM, char **argv) static const char wget_longopts[] ALIGN1 = /* name, has_arg, val */ "continue\0" No_argument "c" -//FIXME: -s isn't --spider, it's --save-headers! - "spider\0" No_argument "s" "quiet\0" No_argument "q" "output-document\0" Required_argument "O" "directory-prefix\0" Required_argument "P" "proxy\0" Required_argument "Y" "user-agent\0" Required_argument "U" -#if ENABLE_FEATURE_WGET_TIMEOUT - "timeout\0" Required_argument "T" -#endif +IF_FEATURE_WGET_TIMEOUT( + "timeout\0" Required_argument "T") /* Ignored: */ - // "tries\0" Required_argument "t" +IF_DESKTOP( "tries\0" Required_argument "t") + "header\0" Required_argument "\xff" + "post-data\0" Required_argument "\xfe" + "spider\0" No_argument "\xfd" /* Ignored (we always use PASV): */ - "passive-ftp\0" No_argument "\xff" - "header\0" Required_argument "\xfe" - "post-data\0" Required_argument "\xfd" +IF_DESKTOP( "passive-ftp\0" No_argument "\xf0") /* Ignored (we don't do ssl) */ - "no-check-certificate\0" No_argument "\xfc" +IF_DESKTOP( "no-check-certificate\0" No_argument "\xf0") /* Ignored (we don't support caching) */ - "no-cache\0" No_argument "\xfb" +IF_DESKTOP( "no-cache\0" No_argument "\xf0") +IF_DESKTOP( "no-verbose\0" No_argument "\xf0") +IF_DESKTOP( "no-clobber\0" No_argument "\xf0") +IF_DESKTOP( "no-host-directories\0" No_argument "\xf0") +IF_DESKTOP( "no-parent\0" No_argument "\xf0") ; #endif @@ -1133,17 +1329,35 @@ int wget_main(int argc UNUSED_PARAM, char **argv) #if ENABLE_FEATURE_WGET_LONG_OPTIONS applet_long_options = wget_longopts; #endif - opt_complementary = "-1" - IF_FEATURE_WGET_TIMEOUT(":T+") - IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::"); - getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:", - &G.fname_out, &G.dir_prefix, + opt_complementary = "-1" /* at least one URL */ + IF_FEATURE_WGET_LONG_OPTIONS(":\xff::"); /* --header is a list */ + getopt32(argv, "cqO:P:Y:U:T:+" + /*ignored:*/ "t:" + /*ignored:*/ "n::" + /* wget has exactly four -n opts, all of which we can ignore: + * -nv --no-verbose: be moderately quiet (-q is full quiet) + * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE + * -nH --no-host-directories: wget -r http://host/ won't create host/ + * -np --no-parent + * "n::" above says that we accept -n[ARG]. + * Specifying "n:" would be a bug: "-n ARG" would eat ARG! + */ + , &G.fname_out, &G.dir_prefix, &G.proxy_flag, &G.user_agent, IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL), - NULL /* -t RETRIES */ + NULL, /* -t RETRIES */ + NULL /* -n[ARG] */ IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist) IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data) ); +#if 0 /* option bits debug */ + if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM"); + if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething"); + if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header"); + if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data"); + if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider"); + exit(0); +#endif argv += optind; #if ENABLE_FEATURE_WGET_LONG_OPTIONS