From 96b99b860cc15f13b85b1b2d5b5b20ab7183a652 Mon Sep 17 00:00:00 2001 From: Denis Vlasenko Date: Sat, 3 May 2008 07:21:27 +0000 Subject: [PATCH] uniq: support -w. closes bug 3094. function old new delta packed_usage 24136 24132 -4 uniq_main 399 384 -15 --- coreutils/uniq.c | 23 +++++++++++------------ include/usage.h | 10 +++++----- testsuite/uniq.tests | 17 +++++++++++++++++ 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/coreutils/uniq.c b/coreutils/uniq.c index 32327c6ce..41f1fed7b 100644 --- a/coreutils/uniq.c +++ b/coreutils/uniq.c @@ -12,8 +12,6 @@ #include "libbb.h" -static const char uniq_opts[] ALIGN1 = "cdu" "f:s:" "cdu\0\1\2\4"; - static FILE *xgetoptfile_uniq_s(char **argv, int read0write2) { const char *n; @@ -31,9 +29,11 @@ int uniq_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv) { FILE *in, *out; - unsigned long dups, skip_fields, skip_chars, i; const char *s0, *e0, *s1, *e1, *input_filename; + unsigned long dups; + unsigned skip_fields, skip_chars, max_chars; unsigned opt; + unsigned i; enum { OPT_c = 0x1, @@ -41,15 +41,14 @@ int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv) OPT_u = 0x4, OPT_f = 0x8, OPT_s = 0x10, + OPT_w = 0x20, }; skip_fields = skip_chars = 0; + max_chars = -1; - opt = getopt32(argv, "cduf:s:", &s0, &s1); - if (opt & OPT_f) - skip_fields = xatoul(s0); - if (opt & OPT_s) - skip_chars = xatoul(s1); + opt_complementary = "f+:s+:w+"; + opt = getopt32(argv, "cduf:s:w:", &skip_fields, &skip_chars, &max_chars); argv += optind; input_filename = *argv; @@ -63,7 +62,7 @@ int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv) bb_show_usage(); } - s1 = e1 = NULL; /* prime the pump */ + s1 = e1 = NULL; /* prime the pump */ do { s0 = s1; @@ -81,16 +80,16 @@ int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv) ++e1; } - if (!s0 || strcmp(e0, e1)) { + if (!s0 || strncmp(e0, e1, max_chars)) { break; } - ++dups; /* Note: Testing for overflow seems excessive. */ + ++dups; /* note: testing for overflow seems excessive. */ } if (s0) { if (!(opt & (OPT_d << !!dups))) { /* (if dups, opt & OPT_e) */ - fprintf(out, "\0%d " + (opt & 1), dups + 1); + fprintf(out, "\0%ld " + (opt & 1), dups + 1); /* 1 == OPT_c */ fprintf(out, "%s\n", s0); } free((void *)s0); diff --git a/include/usage.h b/include/usage.h index cbc5cb0a2..e791ba6bf 100644 --- a/include/usage.h +++ b/include/usage.h @@ -4303,16 +4303,16 @@ ) #define uniq_trivial_usage \ - "[-fscdu]... [INPUT [OUTPUT]]" + "[-fscduw]... [INPUT [OUTPUT]]" #define uniq_full_usage "\n\n" \ - "Discard all but one of successive identical lines from INPUT\n" \ - "(or standard input), writing to OUTPUT (or standard output)\n" \ + "Discard duplicate lines\n" \ "\nOptions:" \ "\n -c Prefix lines by the number of occurrences" \ "\n -d Only print duplicate lines" \ "\n -u Only print unique lines" \ - "\n -f N Skip the first N fields" \ - "\n -s N Skip the first N chars (after any skipped fields)" \ + "\n -f N Skip first N fields" \ + "\n -s N Skip first N chars (after any skipped fields)" \ + "\n -w N Compare N characters in line" \ #define uniq_example_usage \ "$ echo -e \"a\\na\\nb\\nc\\nc\\na\" | sort | uniq\n" \ diff --git a/testsuite/uniq.tests b/testsuite/uniq.tests index 49d4bed9c..8961d669c 100755 --- a/testsuite/uniq.tests +++ b/testsuite/uniq.tests @@ -41,6 +41,7 @@ testing "uniq input - (specify stdout)" "uniq input -" \ #-c occurrences #-d dups only #-u +#-w max chars # Test various command line options @@ -60,6 +61,22 @@ aa bb cc9 bb cc dd8 aa bb cc9 " +testing "uniq -w (compare max characters)" "uniq -w 2" \ +"cc1 +" "" \ +"cc1 +cc2 +cc3 +" + +testing "uniq -s -w (skip fields and compare max chars)" \ +"uniq -s 2 -w 2" \ +"aaccaa +" "" \ +"aaccaa +aaccbb +bbccaa +" # -d is "Suppress the writing fo lines that are not repeated in the input." # -u is "Suppress the writing of lines that are repeated in the input." -- 2.25.1