tr: support [:xdigit:], fix handling of ranges and [x]'s.
authorDenis Vlasenko <vda.linux@googlemail.com>
Sun, 1 Mar 2009 04:50:18 +0000 (04:50 -0000)
committerDenis Vlasenko <vda.linux@googlemail.com>
Sun, 1 Mar 2009 04:50:18 +0000 (04:50 -0000)
 add testsuite entry for each of 3 bugs fixed.

function                                             old     new   delta
static.classes                                        73      82      +9
expand                                              1738    1743      +5
complement                                            74      72      -2
tr_main                                              472     463      -9
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/2 up/down: 14/-11)              Total: 3 bytes

coreutils/tr.c
testsuite/tr.tests [new file with mode: 0644]

index c736c716b229681f4e7a4895bac69709585c0941..cd31b8550519012de75f3e75bde3af0bee52e3a0 100644 (file)
 #define ASCII 0377
 
 static void map(char *pvector,
-               unsigned char *string1, unsigned int string1_len,
-               unsigned char *string2, unsigned int string2_len)
+               unsigned char *string1, unsigned string1_len,
+               unsigned char *string2, unsigned string2_len)
 {
        char last = '0';
-       unsigned int i, j;
+       unsigned i, j;
 
        for (j = 0, i = 0; i < string1_len; i++) {
                if (string2_len <= j)
@@ -39,12 +39,11 @@ static void map(char *pvector,
 
 /* supported constructs:
  *   Ranges,  e.g.,  0-9   ==>  0123456789
- *   Ranges,  e.g.,  [0-9] ==>  0123456789
  *   Escapes, e.g.,  \a    ==>  Control-G
  *   Character classes, e.g. [:upper:] ==> A...Z
  *   Equiv classess, e.g. [=A=] ==> A   (hmmmmmmm?)
  */
-static unsigned int expand(const char *arg, char *buffer)
+static unsigned expand(const char *arg, char *buffer)
 {
        char *buffer_start = buffer;
        unsigned i; /* can't be unsigned char: must be able to hold 256 */
@@ -77,7 +76,8 @@ static unsigned int expand(const char *arg, char *buffer)
                                static const char classes[] ALIGN1 =
                                        "alpha"CLO "alnum"CLO "digit"CLO
                                        "lower"CLO "upper"CLO "space"CLO
-                                       "blank"CLO "punct"CLO "cntrl"CLO;
+                                       "blank"CLO "punct"CLO "cntrl"CLO
+                                       "xdigit"CLO;
 #define CLASS_invalid 0 /* we increment the retval */
 #define CLASS_alpha 1
 #define CLASS_alnum 2
@@ -88,16 +88,17 @@ static unsigned int expand(const char *arg, char *buffer)
 #define CLASS_blank 7
 #define CLASS_punct 8
 #define CLASS_cntrl 9
-//#define CLASS_xdigit 10
+#define CLASS_xdigit 10
 //#define CLASS_graph 11
 //#define CLASS_print 12
                                smalluint j;
-                               { /* not really pretty.. */
-                                       char *tmp = xstrndup(arg, 7); // warning: xdigit would need 8, not 7
+                               {
+                                       /* xdigit needs 8, not 7 */
+                                       char *tmp = xstrndup(arg, 7 + (arg[0]=='x'));
                                        j = index_in_strings(classes, tmp) + 1;
                                        free(tmp);
                                }
-                               if (j == CLASS_alnum || j == CLASS_digit) {
+                               if (j == CLASS_alnum || j == CLASS_digit || j == CLASS_xdigit) {
                                        for (i = '0'; i <= '9'; i++)
                                                *buffer++ = i;
                                }
@@ -125,6 +126,12 @@ static unsigned int expand(const char *arg, char *buffer)
                                                 || (j == CLASS_cntrl && iscntrl(i)))
                                                        *buffer++ = i;
                                }
+                               if (j == CLASS_xdigit) {
+                                       for (i = 'A'; i <= 'F'; i++) {
+                                               *buffer++ = i;
+                                               *buffer++ = i | 0x20;
+                                       }
+                               }
                                if (j == CLASS_invalid) {
                                        *buffer++ = '[';
                                        *buffer++ = ':';
@@ -140,19 +147,14 @@ static unsigned int expand(const char *arg, char *buffer)
                                arg += 3;       /* skip CHAR=] */
                                continue;
                        }
-                       if (i == '\0' || *arg != '-') { /* not [x-...] - copy verbatim */
-                               *buffer++ = '[';
-                               arg--; /* points to x */
-                               continue; /* copy all, including eventual ']' */
-                       }
-                       /* [x-z] */
-                       arg++; /* skip - */
-                       if (arg[0] == '\0' || arg[1] != ']')
-                               bb_show_usage();
-                       ac = *arg++;
-                       while (i <= ac)
-                               *buffer++ = i++;
-                       arg++;  /* skip ] */
+                       /* The rest of [xyz... cases is treated as normal
+                        * string, '[' has no special meaning here:
+                        * tr "[a-z]" "[A-Z]" can be written as tr "a-z" "A-Z",
+                        * also try tr "[a-z]" "_A-Z+" and you'll see that
+                        * [] is not special here.
+                        */
+                       *buffer++ = '[';
+                       arg--; /* points to x */
                        continue;
                }
                *buffer++ = *arg++;
@@ -162,29 +164,30 @@ static unsigned int expand(const char *arg, char *buffer)
 
 static int complement(char *buffer, int buffer_len)
 {
-       int i, j, ix;
+       int ch, j, len;
        char conv[ASCII + 2];
 
-       ix = 0;
-       for (i = '\0'; i <= ASCII; i++) {
+       len = 0;
+       for (ch = '\0'; ch <= ASCII; ch++) {
                for (j = 0; j < buffer_len; j++)
-                       if (buffer[j] == i)
-                               break;
-               if (j == buffer_len)
-                       conv[ix++] = i & ASCII;
+                       if (buffer[j] == ch)
+                               goto next_ch;
+               /* Didn't find it */
+               conv[len++] = (char) ch;
+ next_ch:
+               continue;
        }
-       memcpy(buffer, conv, ix);
-       return ix;
+       memcpy(buffer, conv, len);
+       return len;
 }
 
 int tr_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 int tr_main(int argc UNUSED_PARAM, char **argv)
 {
-       int output_length = 0, input_length;
        int i;
        smalluint flags;
-       ssize_t read_chars = 0;
-       size_t in_index = 0, out_index = 0;
+       ssize_t read_chars;
+       size_t in_index, out_index;
        unsigned last = UCHAR_MAX + 1; /* not equal to any char */
        unsigned char coded, c;
        unsigned char *output = xmalloc(BUFSIZ);
@@ -206,6 +209,9 @@ int tr_main(int argc UNUSED_PARAM, char **argv)
 
 #define tr_buf bb_common_bufsiz1
        if (*argv != NULL) {
+               int output_length = 0;
+               int input_length;
+
                input_length = expand(*argv++, tr_buf);
                if (flags & TR_OPT_complement)
                        input_length = complement(tr_buf, input_length);
@@ -221,30 +227,33 @@ int tr_main(int argc UNUSED_PARAM, char **argv)
                        outvec[output[i]] = TRUE;
        }
 
+       goto start_from;
+
        for (;;) {
                /* If we're out of input, flush output and read more input. */
                if ((ssize_t)in_index == read_chars) {
                        if (out_index) {
                                xwrite(STDOUT_FILENO, (char *)output, out_index);
+ start_from:
                                out_index = 0;
                        }
                        read_chars = safe_read(STDIN_FILENO, tr_buf, BUFSIZ);
                        if (read_chars <= 0) {
                                if (read_chars < 0)
                                        bb_perror_msg_and_die(bb_msg_read_error);
-                               exit(EXIT_SUCCESS);
+                               break;
                        }
                        in_index = 0;
                }
                c = tr_buf[in_index++];
-               coded = vector[c];
                if ((flags & TR_OPT_delete) && invec[c])
                        continue;
+               coded = vector[c];
                if ((flags & TR_OPT_squeeze_reps) && last == coded
                 && (invec[c] || outvec[coded]))
                        continue;
                output[out_index++] = last = coded;
        }
-       /* NOTREACHED */
+
        return EXIT_SUCCESS;
 }
diff --git a/testsuite/tr.tests b/testsuite/tr.tests
new file mode 100644 (file)
index 0000000..7339ccf
--- /dev/null
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+# Copyright 2009 by Denys Vlasenko <vda.linux@googlemail.com>
+# Licensed under GPL v2, see file LICENSE for details.
+
+. testing.sh
+
+# testing "description" "arguments" "result" "infile" "stdin"
+
+testing "tr does not treat [] in [a-z] as special" \
+       "tr '[q-z]' '_Q-Z+'" \
+       "_QWe+" "" "[qwe]"
+
+testing "tr understands 0-9A-F" \
+       "tr -cd '[0-9A-F]'" \
+       "19AF" "" "19AFH\n"
+
+testing "tr understands [:xdigit:]" \
+       "tr -cd '[:xdigit:]'" \
+       "19AF" "" "19AFH\n"
+
+exit $FAILCOUNT