uniq: support -w. closes bug 3094.
authorDenis Vlasenko <vda.linux@googlemail.com>
Sat, 3 May 2008 07:21:27 +0000 (07:21 -0000)
committerDenis Vlasenko <vda.linux@googlemail.com>
Sat, 3 May 2008 07:21:27 +0000 (07:21 -0000)
function                                             old     new   delta
packed_usage                                       24136   24132      -4
uniq_main                                            399     384     -15

coreutils/uniq.c
include/usage.h
testsuite/uniq.tests

index 32327c6ce335173dceec1b0d0ae992d1a6edbd0f..41f1fed7b7d94384fca0ff5919111ef1e719f317 100644 (file)
@@ -12,8 +12,6 @@
 
 #include "libbb.h"
 
-static const char uniq_opts[] ALIGN1 = "cdu" "f:s:" "cdu\0\1\2\4";
-
 static FILE *xgetoptfile_uniq_s(char **argv, int read0write2)
 {
        const char *n;
@@ -31,9 +29,11 @@ int uniq_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv)
 {
        FILE *in, *out;
-       unsigned long dups, skip_fields, skip_chars, i;
        const char *s0, *e0, *s1, *e1, *input_filename;
+       unsigned long dups;
+       unsigned skip_fields, skip_chars, max_chars;
        unsigned opt;
+       unsigned i;
 
        enum {
                OPT_c = 0x1,
@@ -41,15 +41,14 @@ int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv)
                OPT_u = 0x4,
                OPT_f = 0x8,
                OPT_s = 0x10,
+               OPT_w = 0x20,
        };
 
        skip_fields = skip_chars = 0;
+       max_chars = -1;
 
-       opt = getopt32(argv, "cduf:s:", &s0, &s1);
-       if (opt & OPT_f)
-               skip_fields = xatoul(s0);
-       if (opt & OPT_s)
-               skip_chars = xatoul(s1);
+       opt_complementary = "f+:s+:w+";
+       opt = getopt32(argv, "cduf:s:w:", &skip_fields, &skip_chars, &max_chars);
        argv += optind;
 
        input_filename = *argv;
@@ -63,7 +62,7 @@ int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv)
                bb_show_usage();
        }
 
-       s1 = e1 = NULL;                         /* prime the pump */
+       s1 = e1 = NULL; /* prime the pump */
 
        do {
                s0 = s1;
@@ -81,16 +80,16 @@ int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv)
                                ++e1;
                        }
 
-                       if (!s0 || strcmp(e0, e1)) {
+                       if (!s0 || strncmp(e0, e1, max_chars)) {
                                break;
                        }
 
-                       ++dups;          /* Note: Testing for overflow seems excessive. */
+                       ++dups;  /* note: testing for overflow seems excessive. */
                }
 
                if (s0) {
                        if (!(opt & (OPT_d << !!dups))) { /* (if dups, opt & OPT_e) */
-                               fprintf(out, "\0%d " + (opt & 1), dups + 1);
+                               fprintf(out, "\0%ld " + (opt & 1), dups + 1); /* 1 == OPT_c */
                                fprintf(out, "%s\n", s0);
                        }
                        free((void *)s0);
index cbc5cb0a238439edac6b56c4838ffe457d7f9670..e791ba6bf752e5e74d66240dbea15c7364c56bad 100644 (file)
        )
 
 #define uniq_trivial_usage \
-       "[-fscdu]... [INPUT [OUTPUT]]"
+       "[-fscduw]... [INPUT [OUTPUT]]"
 #define uniq_full_usage "\n\n" \
-       "Discard all but one of successive identical lines from INPUT\n" \
-       "(or standard input), writing to OUTPUT (or standard output)\n" \
+       "Discard duplicate lines\n" \
      "\nOptions:" \
      "\n       -c      Prefix lines by the number of occurrences" \
      "\n       -d      Only print duplicate lines" \
      "\n       -u      Only print unique lines" \
-     "\n       -f N    Skip the first N fields" \
-     "\n       -s N    Skip the first N chars (after any skipped fields)" \
+     "\n       -f N    Skip first N fields" \
+     "\n       -s N    Skip first N chars (after any skipped fields)" \
+     "\n       -w N    Compare N characters in line" \
 
 #define uniq_example_usage \
        "$ echo -e \"a\\na\\nb\\nc\\nc\\na\" | sort | uniq\n" \
index 49d4bed9c396adcd40976f33d693114c887fc571..8961d669ca7792123a760debea388745ae1a9619 100755 (executable)
@@ -41,6 +41,7 @@ testing "uniq input - (specify stdout)" "uniq input -" \
 #-c occurrences
 #-d dups only
 #-u
+#-w max chars
 
 # Test various command line options
 
@@ -60,6 +61,22 @@ aa   bb      cc9
 bb     cc      dd8
 aa     bb      cc9
 "
+testing "uniq -w (compare max characters)" "uniq -w 2" \
+"cc1
+" "" \
+"cc1
+cc2
+cc3
+"
+
+testing "uniq -s -w (skip fields and compare max chars)" \
+"uniq -s 2 -w 2" \
+"aaccaa
+" "" \
+"aaccaa
+aaccbb
+bbccaa
+"
 
 # -d is "Suppress the writing fo lines that are not repeated in the input."
 # -u is "Suppress the writing of lines that are repeated in the input."