X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=coreutils%2Fwc.c;h=4c53049b0d564f64de1cc6a69b3e6544d0d12b1e;hb=8a134ec68075fc2fd415558bcf6a37cda3ff285f;hp=e69f0d899f3ce1e749c5b54a78a2813ab5a5ec83;hpb=5509af7073ffff75d86ff8c67a2075169a859efd;p=oweals%2Fbusybox.git diff --git a/coreutils/wc.c b/coreutils/wc.c index e69f0d899..4c53049b0 100644 --- a/coreutils/wc.c +++ b/coreutils/wc.c @@ -1,161 +1,257 @@ +/* vi: set sw=4 ts=4: */ /* - * Mini wc implementation for busybox + * wc implementation for busybox * - * by Edward Betts + * Copyright (C) 2003 Manuel Novoa III * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ +/* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) + * + * Rewritten to fix a number of problems and do some size optimizations. + * Problems in the previous busybox implementation (besides bloat) included: + * 1) broken 'wc -c' optimization (read note below) + * 2) broken handling of '-' args + * 3) no checking of ferror on EOF returns + * 4) isprint() wasn't considered when word counting. + * + * NOTES: + * + * The previous busybox wc attempted an optimization using stat for the + * case of counting chars only. I omitted that because it was broken. + * It didn't take into account the possibility of input coming from a + * pipe, or input from a file with file pointer not at the beginning. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. + * To implement such a speed optimization correctly, not only do you + * need the size, but also the file position. Note also that the + * file position may be past the end of file. Consider the example + * (adapted from example in gnu wc.c) * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * echo hello > /tmp/testfile && + * (dd ibs=1k skip=1 count=0 &> /dev/null; wc -c) < /tmp/testfile * + * for which 'wc -c' should output '0'. + */ +//config:config WC +//config: bool "wc" +//config: default y +//config: help +//config: wc is used to print the number of bytes, words, and lines, +//config: in specified files. +//config: +//config:config FEATURE_WC_LARGE +//config: bool "Support very large counts" +//config: default y +//config: depends on WC +//config: help +//config: Use "unsigned long long" for counter variables. + +//applet:IF_WC(APPLET(wc, BB_DIR_USR_BIN, BB_SUID_DROP)) + +//kbuild:lib-$(CONFIG_WC) += wc.o + +/* BB_AUDIT SUSv3 compliant. */ +/* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */ + +#include "libbb.h" +#include "unicode.h" + +#if !ENABLE_LOCALE_SUPPORT +# undef isprint +# undef isspace +# define isprint(c) ((unsigned)((c) - 0x20) <= (0x7e - 0x20)) +# define isspace(c) ((c) == ' ') +#endif + +#if ENABLE_FEATURE_WC_LARGE +# define COUNT_T unsigned long long +# define COUNT_FMT "llu" +#else +# define COUNT_T unsigned +# define COUNT_FMT "u" +#endif + +/* We support -m even when UNICODE_SUPPORT is off, + * we just don't advertise it in help text, + * since it is the same as -c in this case. + */ + +//usage:#define wc_trivial_usage +//usage: "[-c"IF_UNICODE_SUPPORT("m")"lwL] [FILE]..." +//usage: +//usage:#define wc_full_usage "\n\n" +//usage: "Count lines, words, and bytes for each FILE (or stdin)\n" +//usage: "\n -c Count bytes" +//usage: IF_UNICODE_SUPPORT( +//usage: "\n -m Count characters" +//usage: ) +//usage: "\n -l Count newlines" +//usage: "\n -w Count words" +//usage: "\n -L Print longest line length" +//usage: +//usage:#define wc_example_usage +//usage: "$ wc /etc/passwd\n" +//usage: " 31 46 1365 /etc/passwd\n" + +/* Order is important if we want to be compatible with + * column order in "wc -cmlwL" output: */ +enum { + WC_LINES = 0, /* -l */ + WC_WORDS = 1, /* -w */ + WC_UNICHARS = 2, /* -m */ + WC_BYTES = 3, /* -c */ + WC_LENGTH = 4, /* -L */ + NUM_WCS = 5, +}; + +int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int wc_main(int argc UNUSED_PARAM, char **argv) +{ + const char *arg; + const char *start_fmt = " %9"COUNT_FMT + 1; + const char *fname_fmt = " %s\n"; + COUNT_T *pcounts; + COUNT_T counts[NUM_WCS]; + COUNT_T totals[NUM_WCS]; + int num_files; + smallint status = EXIT_SUCCESS; + unsigned print_type; -#include "internal.h" -#include - -static const char wc_usage[] = "wc [OPTION]... [FILE]...\n\n" -"Print line, word, and byte counts for each FILE, and a total line if\n" -"more than one FILE is specified. With no FILE, read standard input.\n" -"\t-c\tprint the byte counts\n" -"\t-l\tprint the newline counts\n" -"\t-L\tprint the length of the longest line\n" -"\t-w\tprint the word counts\n"; - -static int total_lines, total_words, total_chars, max_length; -static int print_lines, print_words, print_chars, print_length; - -void print_counts (int lines, int words, int chars, int length, - const char *name) { - char const *space = ""; - if (print_lines) { - printf ("%7d", lines); - space = " "; + init_unicode(); + + print_type = getopt32(argv, "lwmcL"); + + if (print_type == 0) { + print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_BYTES); } - if (print_words) { - printf ("%s%7d", space, words); - space = " "; + + argv += optind; + if (!argv[0]) { + *--argv = (char *) bb_msg_standard_input; + fname_fmt = "\n"; } - if (print_chars) { - printf ("%s%7d", space, chars); - space = " "; + if (!argv[1]) { /* zero or one filename? */ + if (!((print_type-1) & print_type)) /* exactly one option? */ + start_fmt = "%"COUNT_FMT; } - if (print_length) - printf ("%s%7d", space, length); - if (*name) - printf (" %s", name); - putchar ('\n'); -} -static void wc_file(FILE *file, const char *name) -{ - int lines, words, chars, length; - int in_word = 0, linepos = 0; - int c; - lines = words = chars = length = 0; - while ((c = getc(file)) != EOF) { - chars++; - switch (c) { - case '\n': - lines++; - case '\r': - case '\f': - if (linepos > length) - length = linepos; - linepos = 0; - goto word_separator; - case '\t': - linepos += 8 - (linepos % 8); - goto word_separator; - case ' ': - linepos++; - case '\v': - word_separator: - if (in_word) { - in_word = 0; - words++; - } - break; - default: - linepos++; - in_word = 1; - break; + memset(totals, 0, sizeof(totals)); + + pcounts = counts; + + num_files = 0; + while ((arg = *argv++) != NULL) { + FILE *fp; + const char *s; + unsigned u; + unsigned linepos; + smallint in_word; + + ++num_files; + fp = fopen_or_warn_stdin(arg); + if (!fp) { + status = EXIT_FAILURE; + continue; } - } - if (linepos > length) - length = linepos; - if (in_word) - words++; - print_counts (lines, words, chars, length, name); - total_lines += lines; - total_words += words; - total_chars += chars; - if (length > max_length) - max_length = length; - fclose(file); - fflush(stdout); -} -int wc_main(int argc, char **argv) { - FILE *file; - total_lines = total_words = total_chars = max_length = 0; - print_lines = print_words = print_chars = print_length = 0; - - while (--argc && **(++argv) == '-') { - while (*++(*argv)) - switch (**argv) { - case 'c': - print_chars = 1; - break; - case 'l': - print_lines = 1; - break; - case 'L': - print_length = 1; - break; - case 'w': - print_words = 1; - break; - default: - usage (wc_usage); + memset(counts, 0, sizeof(counts)); + linepos = 0; + in_word = 0; + + while (1) { + int c; + /* Our -w doesn't match GNU wc exactly... oh well */ + + c = getc(fp); + if (c == EOF) { + if (ferror(fp)) { + bb_simple_perror_msg(arg); + status = EXIT_FAILURE; + } + goto DO_EOF; /* Treat an EOF as '\r'. */ } - } - if (!print_lines && !print_words && !print_chars && !print_length) - print_lines = print_words = print_chars = 1; + /* Cater for -c and -m */ + ++counts[WC_BYTES]; + if (unicode_status != UNICODE_ON /* every byte is a new char */ + || (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */ + ) { + ++counts[WC_UNICHARS]; + } - if (argc == 0) { - wc_file(stdin, ""); - exit(TRUE); - } - else if (argc == 1) { - file = fopen(*argv, "r"); - if (file == NULL) { - perror(*argv); - exit(FALSE); - } - wc_file(file, *argv); - } - else { - while (argc-- > 0 && *argv != '\0' && strlen(*argv)) { - file = fopen(*argv, "r"); - if (file == NULL) { - perror(*argv); - exit(FALSE); + if (isprint_asciionly(c)) { /* FIXME: not unicode-aware */ + ++linepos; + if (!isspace(c)) { + in_word = 1; + continue; + } + } else if ((unsigned)(c - 9) <= 4) { + /* \t 9 + * \n 10 + * \v 11 + * \f 12 + * \r 13 + */ + if (c == '\t') { + linepos = (linepos | 7) + 1; + } else { /* '\n', '\r', '\f', or '\v' */ + DO_EOF: + if (linepos > counts[WC_LENGTH]) { + counts[WC_LENGTH] = linepos; + } + if (c == '\n') { + ++counts[WC_LINES]; + } + if (c != '\v') { + linepos = 0; + } + } + } else { + continue; + } + + counts[WC_WORDS] += in_word; + in_word = 0; + if (c == EOF) { + break; } - wc_file(file, *argv); - argv++; } - print_counts (total_lines, total_words, total_chars, - max_length, "total"); + + fclose_if_not_stdin(fp); + + if (totals[WC_LENGTH] < counts[WC_LENGTH]) { + totals[WC_LENGTH] = counts[WC_LENGTH]; + } + totals[WC_LENGTH] -= counts[WC_LENGTH]; + + OUTPUT: + /* coreutils wc tries hard to print pretty columns + * (saves results for all files, finds max col len etc...) + * we won't try that hard, it will bloat us too much */ + s = start_fmt; + u = 0; + do { + if (print_type & (1 << u)) { + printf(s, pcounts[u]); + s = " %9"COUNT_FMT; /* Ok... restore the leading space. */ + } + totals[u] += pcounts[u]; + } while (++u < NUM_WCS); + printf(fname_fmt, arg); } - exit(TRUE); + + /* If more than one file was processed, we want the totals. To save some + * space, we set the pcounts ptr to the totals array. This has the side + * effect of trashing the totals array after outputting it, but that's + * irrelavent since we no longer need it. */ + if (num_files > 1) { + num_files = 0; /* Make sure we don't get here again. */ + arg = "total"; + pcounts = totals; + --argv; + goto OUTPUT; + } + + fflush_stdout_and_exit(status); }