X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=coreutils%2Fwc.c;h=4c53049b0d564f64de1cc6a69b3e6544d0d12b1e;hb=a107ef2a6ace98c51473dc3153564a44b260bc6f;hp=e5353b4cb5fce6569e79059f68411ca5bf60df9b;hpb=b71c668c577f7780362f7ea632895c11e3760d66;p=oweals%2Fbusybox.git diff --git a/coreutils/wc.c b/coreutils/wc.c index e5353b4cb..4c53049b0 100644 --- a/coreutils/wc.c +++ b/coreutils/wc.c @@ -4,12 +4,8 @@ * * Copyright (C) 2003 Manuel Novoa III * - * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + * Licensed under GPLv2 or later, see file LICENSE in this source tree. */ - -/* BB_AUDIT SUSv3 _NOT_ compliant -- option -m is not currently supported. */ -/* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */ - /* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) * * Rewritten to fix a number of problems and do some size optimizations. @@ -19,10 +15,6 @@ * 3) no checking of ferror on EOF returns * 4) isprint() wasn't considered when word counting. * - * TODO: - * - * When locale support is enabled, count multibyte chars in the '-m' case. - * * NOTES: * * The previous busybox wc attempted an optimization using stat for the @@ -40,62 +32,106 @@ * * for which 'wc -c' should output '0'. */ +//config:config WC +//config: bool "wc" +//config: default y +//config: help +//config: wc is used to print the number of bytes, words, and lines, +//config: in specified files. +//config: +//config:config FEATURE_WC_LARGE +//config: bool "Support very large counts" +//config: default y +//config: depends on WC +//config: help +//config: Use "unsigned long long" for counter variables. + +//applet:IF_WC(APPLET(wc, BB_DIR_USR_BIN, BB_SUID_DROP)) + +//kbuild:lib-$(CONFIG_WC) += wc.o + +/* BB_AUDIT SUSv3 compliant. */ +/* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */ #include "libbb.h" +#include "unicode.h" -#ifdef CONFIG_LOCALE_SUPPORT -#define isspace_given_isprint(c) isspace(c) -#else -#undef isspace -#undef isprint -#define isspace(c) ((((c) == ' ') || (((unsigned int)((c) - 9)) <= (13 - 9)))) -#define isprint(c) (((unsigned int)((c) - 0x20)) <= (0x7e - 0x20)) -#define isspace_given_isprint(c) ((c) == ' ') +#if !ENABLE_LOCALE_SUPPORT +# undef isprint +# undef isspace +# define isprint(c) ((unsigned)((c) - 0x20) <= (0x7e - 0x20)) +# define isspace(c) ((c) == ' ') #endif #if ENABLE_FEATURE_WC_LARGE -#define COUNT_T unsigned long long -#define COUNT_FMT "llu" +# define COUNT_T unsigned long long +# define COUNT_FMT "llu" #else -#define COUNT_T unsigned -#define COUNT_FMT "u" +# define COUNT_T unsigned +# define COUNT_FMT "u" #endif +/* We support -m even when UNICODE_SUPPORT is off, + * we just don't advertise it in help text, + * since it is the same as -c in this case. + */ + +//usage:#define wc_trivial_usage +//usage: "[-c"IF_UNICODE_SUPPORT("m")"lwL] [FILE]..." +//usage: +//usage:#define wc_full_usage "\n\n" +//usage: "Count lines, words, and bytes for each FILE (or stdin)\n" +//usage: "\n -c Count bytes" +//usage: IF_UNICODE_SUPPORT( +//usage: "\n -m Count characters" +//usage: ) +//usage: "\n -l Count newlines" +//usage: "\n -w Count words" +//usage: "\n -L Print longest line length" +//usage: +//usage:#define wc_example_usage +//usage: "$ wc /etc/passwd\n" +//usage: " 31 46 1365 /etc/passwd\n" + +/* Order is important if we want to be compatible with + * column order in "wc -cmlwL" output: + */ enum { - WC_LINES = 0, - WC_WORDS = 1, - WC_CHARS = 2, - WC_LENGTH = 3 + WC_LINES = 0, /* -l */ + WC_WORDS = 1, /* -w */ + WC_UNICHARS = 2, /* -m */ + WC_BYTES = 3, /* -c */ + WC_LENGTH = 4, /* -L */ + NUM_WCS = 5, }; -int wc_main(int argc, char **argv); -int wc_main(int argc, char **argv) +int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int wc_main(int argc UNUSED_PARAM, char **argv) { - FILE *fp; - const char *s, *arg; - const char *start_fmt = "%9"COUNT_FMT; + const char *arg; + const char *start_fmt = " %9"COUNT_FMT + 1; const char *fname_fmt = " %s\n"; COUNT_T *pcounts; - COUNT_T counts[4]; - COUNT_T totals[4]; - unsigned linepos; - unsigned u; - int num_files = 0; - int c; + COUNT_T counts[NUM_WCS]; + COUNT_T totals[NUM_WCS]; + int num_files; smallint status = EXIT_SUCCESS; - smallint in_word; unsigned print_type; - print_type = getopt32(argc, argv, "lwcL"); + init_unicode(); + + print_type = getopt32(argv, "lwmcL"); if (print_type == 0) { - print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS); + print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_BYTES); } argv += optind; if (!argv[0]) { *--argv = (char *) bb_msg_standard_input; fname_fmt = "\n"; + } + if (!argv[1]) { /* zero or one filename? */ if (!((print_type-1) & print_type)) /* exactly one option? */ start_fmt = "%"COUNT_FMT; } @@ -104,7 +140,14 @@ int wc_main(int argc, char **argv) pcounts = counts; - while ((arg = *argv++) != 0) { + num_files = 0; + while ((arg = *argv++) != NULL) { + FILE *fp; + const char *s; + unsigned u; + unsigned linepos; + smallint in_word; + ++num_files; fp = fopen_or_warn_stdin(arg); if (!fp) { @@ -116,18 +159,34 @@ int wc_main(int argc, char **argv) linepos = 0; in_word = 0; - do { + while (1) { + int c; /* Our -w doesn't match GNU wc exactly... oh well */ - ++counts[WC_CHARS]; c = getc(fp); - if (isprint(c)) { + if (c == EOF) { + if (ferror(fp)) { + bb_simple_perror_msg(arg); + status = EXIT_FAILURE; + } + goto DO_EOF; /* Treat an EOF as '\r'. */ + } + + /* Cater for -c and -m */ + ++counts[WC_BYTES]; + if (unicode_status != UNICODE_ON /* every byte is a new char */ + || (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */ + ) { + ++counts[WC_UNICHARS]; + } + + if (isprint_asciionly(c)) { /* FIXME: not unicode-aware */ ++linepos; - if (!isspace_given_isprint(c)) { + if (!isspace(c)) { in_word = 1; continue; } - } else if (((unsigned int)(c - 9)) <= 4) { + } else if ((unsigned)(c - 9) <= 4) { /* \t 9 * \n 10 * \v 11 @@ -136,8 +195,8 @@ int wc_main(int argc, char **argv) */ if (c == '\t') { linepos = (linepos | 7) + 1; - } else { /* '\n', '\r', '\f', or '\v' */ - DO_EOF: + } else { /* '\n', '\r', '\f', or '\v' */ + DO_EOF: if (linepos > counts[WC_LENGTH]) { counts[WC_LENGTH] = linepos; } @@ -148,13 +207,6 @@ int wc_main(int argc, char **argv) linepos = 0; } } - } else if (c == EOF) { - if (ferror(fp)) { - bb_perror_msg("%s", arg); - status = EXIT_FAILURE; - } - --counts[WC_CHARS]; - goto DO_EOF; /* Treat an EOF as '\r'. */ } else { continue; } @@ -164,18 +216,18 @@ int wc_main(int argc, char **argv) if (c == EOF) { break; } - } while (1); + } + + fclose_if_not_stdin(fp); if (totals[WC_LENGTH] < counts[WC_LENGTH]) { totals[WC_LENGTH] = counts[WC_LENGTH]; } totals[WC_LENGTH] -= counts[WC_LENGTH]; - fclose_if_not_stdin(fp); - - OUTPUT: + OUTPUT: /* coreutils wc tries hard to print pretty columns - * (saves results for all files, find max col len etc...) + * (saves results for all files, finds max col len etc...) * we won't try that hard, it will bloat us too much */ s = start_fmt; u = 0; @@ -185,7 +237,7 @@ int wc_main(int argc, char **argv) s = " %9"COUNT_FMT; /* Ok... restore the leading space. */ } totals[u] += pcounts[u]; - } while (++u < 4); + } while (++u < NUM_WCS); printf(fname_fmt, arg); } @@ -194,7 +246,7 @@ int wc_main(int argc, char **argv) * effect of trashing the totals array after outputting it, but that's * irrelavent since we no longer need it. */ if (num_files > 1) { - num_files = 0; /* Make sure we don't get here again. */ + num_files = 0; /* Make sure we don't get here again. */ arg = "total"; pcounts = totals; --argv;