cksum: code shrink
[oweals/busybox.git] / coreutils / tr.c
index 3bfa4808056d4dc0c694ee30bc886f4ce5814d66..10284e1c9f93314fa58d75983f8dad538369385e 100644 (file)
 /* vi: set sw=4 ts=4: */
 /*
- * Copyright (c) 1988, 1993
- *     The Regents of the University of California.  All rights reserved.
+ * Mini tr implementation for busybox
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *     This product includes software developed by the University of
- *     California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
+ * Copyright (c) 1987,1997, Prentice Hall   All rights reserved.
  *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * The name of Prentice Hall may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * Copyright (c) Michiel Huisjes
+ *
+ * This version of tr is adapted from Minix tr and was modified
+ * by Erik Andersen <andersen@codepoet.org> to be used in busybox.
+ *
+ * Licensed under GPLv2 or later, see file LICENSE in this source tree.
  */
+/* http://www.opengroup.org/onlinepubs/009695399/utilities/tr.html
+ * TODO: graph, print
+ */
+//config:config TR
+//config:      bool "tr (5.5 kb)"
+//config:      default y
+//config:      help
+//config:      tr is used to squeeze, and/or delete characters from standard
+//config:      input, writing to standard output.
+//config:
+//config:config FEATURE_TR_CLASSES
+//config:      bool "Enable character classes (such as [:upper:])"
+//config:      default y
+//config:      depends on TR
+//config:      help
+//config:      Enable character classes, enabling commands such as:
+//config:      tr [:upper:] [:lower:] to convert input into lowercase.
+//config:
+//config:config FEATURE_TR_EQUIV
+//config:      bool "Enable equivalence classes"
+//config:      default y
+//config:      depends on TR
+//config:      help
+//config:      Enable equivalence classes, which essentially add the enclosed
+//config:      character to the current set. For instance, tr [=a=] xyz would
+//config:      replace all instances of 'a' with 'xyz'. This option is mainly
+//config:      useful for cases when no other way of expressing a character
+//config:      is possible.
+
+//applet:IF_TR(APPLET(tr, BB_DIR_USR_BIN, BB_SUID_DROP))
+
+//kbuild:lib-$(CONFIG_TR) += tr.o
+
+//usage:#define tr_trivial_usage
+//usage:       "[-cds] STRING1 [STRING2]"
+//usage:#define tr_full_usage "\n\n"
+//usage:       "Translate, squeeze, or delete characters from stdin, writing to stdout\n"
+//usage:     "\n       -c      Take complement of STRING1"
+//usage:     "\n       -d      Delete input characters coded STRING1"
+//usage:     "\n       -s      Squeeze multiple output characters of STRING2 into one character"
+//usage:
+//usage:#define tr_example_usage
+//usage:       "$ echo \"gdkkn vnqkc\" | tr [a-y] [b-z]\n"
+//usage:       "hello world\n"
+
+#include "libbb.h"
+
+enum {
+       ASCII = 256,
+       /* string buffer needs to be at least as big as the whole "alphabet".
+        * BUFSIZ == ASCII is ok, but we will realloc in expand
+        * even for smallest patterns, let's avoid that by using *2:
+        */
+       TR_BUFSIZ = (BUFSIZ > ASCII*2) ? BUFSIZ : ASCII*2,
+};
 
-#if 0
-#ifndef lint
-static const char copyright[] = "@(#) Copyright (c) 1988, 1993\n\
-       The Regents of the University of California.  All rights reserved.\n";
-#endif                                                 /* not lint */
-
-#ifndef lint
-#if 0
-static char sccsid[] = "@(#)tr.c       8.2 (Berkeley) 5/4/95";
-#endif
-static const char rcsid[] =
-
-       "$Id: tr.c,v 1.2 2000/03/21 22:32:57 erik Exp $";
-#endif                                                 /* not lint */
-#endif                                                 /* #if 0 */
-
-#include "internal.h"
-#include <locale.h>
-#include <sys/types.h>
-#include <sys/cdefs.h>
-#include <sys/types.h>
-
-#include <err.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <ctype.h>
-#include <err.h>
-#include <stddef.h>
-
-typedef struct {
-       enum { STRING1, STRING2 } which;
-       enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
-       int cnt;                                        /* character count */
-       int lastch;                                     /* last character */
-       int equiv[2];                           /* equivalence set */
-       int *set;                                       /* set of characters */
-       char *str;                                      /* user's string */
-} STR;
-
-#include <limits.h>
-#define        NCHARS  (UCHAR_MAX + 1) /* Number of possible characters. */
-#define        OOBCH   (UCHAR_MAX + 1) /* Out of band character value. */
-
-static int next __P((STR *));
-
-static int string1[NCHARS] = {
-       0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ASCII */
-       0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
-       0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
-       0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
-       0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
-       0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
-       0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
-       0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
-       0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
-       0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
-       0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
-       0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
-       0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
-       0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
-       0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
-       0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
-       0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-       0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
-       0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
-       0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
-       0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
-       0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
-       0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
-       0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
-       0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
-       0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
-       0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
-       0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
-       0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
-       0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
-       0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
-       0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
-}, string2[NCHARS];
-
-STR s1 = { STRING1, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL };
-STR s2 = { STRING2, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL };
-
-static void setup(string, arg, str, cflag)
-int *string;
-char *arg;
-STR *str;
-int cflag;
-{
-       register int cnt, *p;
-
-       str->str = arg;
-       bzero(string, NCHARS * sizeof(int));
-
-       while (next(str))
-               string[str->lastch] = 1;
-       if (cflag)
-               for (p = string, cnt = NCHARS; cnt--; ++p)
-                       *p = !*p;
-}
-
-static void tr_usage()
+static void map(char *pvector,
+               char *string1, unsigned string1_len,
+               char *string2, unsigned string2_len)
 {
-       (void) fprintf(stderr, "%s\n%s\n%s\n%s\n",
-                                  "usage: tr [-csu] string1 string2",
-                                  "       tr [-cu] -d string1",
-                                  "       tr [-cu] -s string1",
-                                  "       tr [-cu] -ds string1 string2");
-       exit(1);
+       char last = '0';
+       unsigned i, j;
+
+       for (j = 0, i = 0; i < string1_len; i++) {
+               if (string2_len <= j)
+                       pvector[(unsigned char)(string1[i])] = last;
+               else
+                       pvector[(unsigned char)(string1[i])] = last = string2[j++];
+       }
 }
 
-
-extern int tr_main(argc, argv)
-int argc;
-char **argv;
+/* supported constructs:
+ *   Ranges,  e.g.,  0-9   ==>  0123456789
+ *   Escapes, e.g.,  \a    ==>  Control-G
+ *   Character classes, e.g. [:upper:] ==> A...Z
+ *   Equiv classess, e.g. [=A=] ==> A   (hmmmmmmm?)
+ * not supported:
+ *   [x*N] - repeat char x N times
+ *   [x*] - repeat char x until it fills STRING2:
+ * # echo qwe123 | /usr/bin/tr 123456789 '[d]'
+ * qwe[d]
+ * # echo qwe123 | /usr/bin/tr 123456789 '[d*]'
+ * qweddd
+ */
+static unsigned expand(char *arg, char **buffer_p)
 {
-       register int ch, cnt, lastch, *p;
-       int cflag, dflag, sflag, isstring2;
-
-       (void) setlocale(LC_CTYPE, "");
-
-       cflag = dflag = sflag = 0;
-       while ((ch = getopt(argc, argv, "cdsu")) != -1)
-               switch ((char) ch) {
-               case 'c':
-                       cflag = 1;
-                       break;
-               case 'd':
-                       dflag = 1;
-                       break;
-               case 's':
-                       sflag = 1;
-                       break;
-               case 'u':
-                       setbuf(stdout, (char *) NULL);
-                       break;
-               case '?':
-               default:
-                       tr_usage();
+       char *buffer = *buffer_p;
+       unsigned pos = 0;
+       unsigned size = TR_BUFSIZ;
+       unsigned i; /* can't be unsigned char: must be able to hold 256 */
+       unsigned char ac;
+
+       while (*arg) {
+               if (pos + ASCII > size) {
+                       size += ASCII;
+                       *buffer_p = buffer = xrealloc(buffer, size);
                }
-       argc -= optind;
-       argv += optind;
-
-       switch (argc) {
-       case 0:
-       default:
-               tr_usage();
-               /* NOTREACHED */
-       case 1:
-               isstring2 = 0;
-               break;
-       case 2:
-               isstring2 = 1;
-               break;
-       }
-
-       /*
-        * tr -ds [-c] string1 string2
-        * Delete all characters (or complemented characters) in string1.
-        * Squeeze all characters in string2.
-        */
-       if (dflag && sflag) {
-               if (!isstring2)
-                       tr_usage();
-
-               setup(string1, argv[0], &s1, cflag);
-               setup(string2, argv[1], &s2, 0);
-
-               for (lastch = OOBCH; (ch = getchar()) != EOF;)
-                       if (!string1[ch] && (!string2[ch] || lastch != ch)) {
-                               lastch = ch;
-                               (void) putchar(ch);
+               if (*arg == '\\') {
+                       const char *z;
+                       arg++;
+                       z = arg;
+                       ac = bb_process_escape_sequence(&z);
+                       arg = (char *)z;
+                       arg--;
+                       *arg = ac;
+                       /*
+                        * fall through, there may be a range.
+                        * If not, current char will be treated anyway.
+                        */
+               }
+               if (arg[1] == '-') { /* "0-9..." */
+                       ac = arg[2];
+                       if (ac == '\0') { /* "0-": copy verbatim */
+                               buffer[pos++] = *arg++; /* copy '0' */
+                               continue; /* next iter will copy '-' and stop */
                        }
-               exit(0);
-       }
-
-       /*
-        * tr -d [-c] string1
-        * Delete all characters (or complemented characters) in string1.
-        */
-       if (dflag) {
-               if (isstring2)
-                       tr_usage();
-
-               setup(string1, argv[0], &s1, cflag);
-
-               while ((ch = getchar()) != EOF)
-                       if (!string1[ch])
-                               (void) putchar(ch);
-               exit(0);
-       }
-
-       /*
-        * tr -s [-c] string1
-        * Squeeze all characters (or complemented characters) in string1.
-        */
-       if (sflag && !isstring2) {
-               setup(string1, argv[0], &s1, cflag);
-
-               for (lastch = OOBCH; (ch = getchar()) != EOF;)
-                       if (!string1[ch] || lastch != ch) {
-                               lastch = ch;
-                               (void) putchar(ch);
+                       i = (unsigned char) *arg;
+                       arg += 3; /* skip 0-9 or 0-\ */
+                       if (ac == '\\') {
+                               const char *z;
+                               z = arg;
+                               ac = bb_process_escape_sequence(&z);
+                               arg = (char *)z;
                        }
-               exit(0);
-       }
-
-       /*
-        * tr [-cs] string1 string2
-        * Replace all characters (or complemented characters) in string1 with
-        * the character in the same position in string2.  If the -s option is
-        * specified, squeeze all the characters in string2.
-        */
-       if (!isstring2)
-               tr_usage();
-
-       s1.str = argv[0];
-       s2.str = argv[1];
-
-       if (cflag)
-               for (cnt = NCHARS, p = string1; cnt--;)
-                       *p++ = OOBCH;
-
-       if (!next(&s2))
-               errx(1, "empty string2");
-
-       /* If string2 runs out of characters, use the last one specified. */
-       if (sflag)
-               while (next(&s1)) {
-                       string1[s1.lastch] = ch = s2.lastch;
-                       string2[ch] = 1;
-                       (void) next(&s2);
-       } else
-               while (next(&s1)) {
-                       string1[s1.lastch] = ch = s2.lastch;
-                       (void) next(&s2);
+                       while (i <= ac) /* ok: i is unsigned _int_ */
+                               buffer[pos++] = i++;
+                       continue;
                }
-
-       if (cflag)
-               for (cnt = 0, p = string1; cnt < NCHARS; ++p, ++cnt)
-                       *p = *p == OOBCH ? ch : cnt;
-
-       if (sflag)
-               for (lastch = OOBCH; (ch = getchar()) != EOF;) {
-                       ch = string1[ch];
-                       if (!string2[ch] || lastch != ch) {
-                               lastch = ch;
-                               (void) putchar(ch);
+               if ((ENABLE_FEATURE_TR_CLASSES || ENABLE_FEATURE_TR_EQUIV)
+                && *arg == '['
+               ) {
+                       arg++;
+                       i = (unsigned char) *arg++;
+                       /* "[xyz...". i=x, arg points to y */
+                       if (ENABLE_FEATURE_TR_CLASSES && i == ':') { /* [:class:] */
+#define CLO ":]\0"
+                               static const char classes[] ALIGN1 =
+                                       "alpha"CLO "alnum"CLO "digit"CLO
+                                       "lower"CLO "upper"CLO "space"CLO
+                                       "blank"CLO "punct"CLO "cntrl"CLO
+                                       "xdigit"CLO;
+                               enum {
+                                       CLASS_invalid = 0, /* we increment the retval */
+                                       CLASS_alpha = 1,
+                                       CLASS_alnum = 2,
+                                       CLASS_digit = 3,
+                                       CLASS_lower = 4,
+                                       CLASS_upper = 5,
+                                       CLASS_space = 6,
+                                       CLASS_blank = 7,
+                                       CLASS_punct = 8,
+                                       CLASS_cntrl = 9,
+                                       CLASS_xdigit = 10,
+                                       //CLASS_graph = 11,
+                                       //CLASS_print = 12,
+                               };
+                               smalluint j;
+                               char *tmp;
+
+                               /* xdigit needs 8, not 7 */
+                               i = 7 + (arg[0] == 'x');
+                               tmp = xstrndup(arg, i);
+                               j = index_in_strings(classes, tmp) + 1;
+                               free(tmp);
+
+                               if (j == CLASS_invalid)
+                                       goto skip_bracket;
+
+                               arg += i;
+                               if (j == CLASS_alnum || j == CLASS_digit || j == CLASS_xdigit) {
+                                       for (i = '0'; i <= '9'; i++)
+                                               buffer[pos++] = i;
+                               }
+                               if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_upper) {
+                                       for (i = 'A'; i <= 'Z'; i++)
+                                               buffer[pos++] = i;
+                               }
+                               if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_lower) {
+                                       for (i = 'a'; i <= 'z'; i++)
+                                               buffer[pos++] = i;
+                               }
+                               if (j == CLASS_space || j == CLASS_blank) {
+                                       buffer[pos++] = '\t';
+                                       if (j == CLASS_space) {
+                                               buffer[pos++] = '\n';
+                                               buffer[pos++] = '\v';
+                                               buffer[pos++] = '\f';
+                                               buffer[pos++] = '\r';
+                                       }
+                                       buffer[pos++] = ' ';
+                               }
+                               if (j == CLASS_punct || j == CLASS_cntrl) {
+                                       for (i = '\0'; i < ASCII; i++) {
+                                               if ((j == CLASS_punct && isprint_asciionly(i) && !isalnum(i) && !isspace(i))
+                                                || (j == CLASS_cntrl && iscntrl(i))
+                                               ) {
+                                                       buffer[pos++] = i;
+                                               }
+                                       }
+                               }
+                               if (j == CLASS_xdigit) {
+                                       for (i = 'A'; i <= 'F'; i++) {
+                                               buffer[pos + 6] = i | 0x20;
+                                               buffer[pos++] = i;
+                                       }
+                                       pos += 6;
+                               }
+                               continue;
                        }
-       } else
-               while ((ch = getchar()) != EOF)
-                       (void) putchar(string1[ch]);
-       exit(0);
-}
-
-static int backslash __P((STR *));
-static int bracket __P((STR *));
-static int c_class __P((const void *, const void *));
-static void genclass __P((STR *));
-static void genequiv __P((STR *));
-static int genrange __P((STR *));
-static void genseq __P((STR *));
-
-static int next(s)
-register STR *s;
-{
-       register int ch;
-
-       switch (s->state) {
-       case EOS:
-               return (0);
-       case INFINITE:
-               return (1);
-       case NORMAL:
-               switch (ch = (u_char) * s->str) {
-               case '\0':
-                       s->state = EOS;
-                       return (0);
-               case '\\':
-                       s->lastch = backslash(s);
-                       break;
-               case '[':
-                       if (bracket(s))
-                               return (next(s));
-                       /* FALLTHROUGH */
-               default:
-                       ++s->str;
-                       s->lastch = ch;
-                       break;
-               }
-
-               /* We can start a range at any time. */
-               if (s->str[0] == '-' && genrange(s))
-                       return (next(s));
-               return (1);
-       case RANGE:
-               if (s->cnt-- == 0) {
-                       s->state = NORMAL;
-                       return (next(s));
-               }
-               ++s->lastch;
-               return (1);
-       case SEQUENCE:
-               if (s->cnt-- == 0) {
-                       s->state = NORMAL;
-                       return (next(s));
-               }
-               return (1);
-       case SET:
-               if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
-                       s->state = NORMAL;
-                       return (next(s));
+                       /* "[xyz...", i=x, arg points to y */
+                       if (ENABLE_FEATURE_TR_EQUIV && i == '=') { /* [=CHAR=] */
+                               buffer[pos++] = *arg; /* copy CHAR */
+                               if (!arg[0] || arg[1] != '=' || arg[2] != ']')
+                                       bb_show_usage();
+                               arg += 3;  /* skip CHAR=] */
+                               continue;
+                       }
+                       /* The rest of "[xyz..." cases is treated as normal
+                        * string, "[" has no special meaning here:
+                        * tr "[a-z]" "[A-Z]" can be written as tr "a-z" "A-Z",
+                        * also try tr "[a-z]" "_A-Z+" and you'll see that
+                        * [] is not special here.
+                        */
+ skip_bracket:
+                       arg -= 2; /* points to "[" in "[xyz..." */
                }
-               return (1);
+               buffer[pos++] = *arg++;
        }
-       /* NOTREACHED */
-       return (0);
-}
-
-static int bracket(s)
-register STR *s;
-{
-       register char *p;
-
-       switch (s->str[1]) {
-       case ':':                                       /* "[:class:]" */
-               if ((p = strstr(s->str + 2, ":]")) == NULL)
-                       return (0);
-               *p = '\0';
-               s->str += 2;
-               genclass(s);
-               s->str = p + 2;
-               return (1);
-       case '=':                                       /* "[=equiv=]" */
-               if ((p = strstr(s->str + 2, "=]")) == NULL)
-                       return (0);
-               s->str += 2;
-               genequiv(s);
-               return (1);
-       default:                                        /* "[\###*n]" or "[#*n]" */
-               if ((p = strpbrk(s->str + 2, "*]")) == NULL)
-                       return (0);
-               if (p[0] != '*' || index(p, ']') == NULL)
-                       return (0);
-               s->str += 1;
-               genseq(s);
-               return (1);
-       }
-       /* NOTREACHED */
-}
-
-typedef struct {
-       char *name;
-       int (*func) __P((int));
-       int *set;
-} CLASS;
-
-static CLASS classes[] = {
-#undef isalnum
-       {"alnum", isalnum,},
-#undef isalpha
-       {"alpha", isalpha,},
-/*#undef isblank
-       { "blank",  isblank,  },*/
-#undef iscntrl
-       {"cntrl", iscntrl,},
-#undef isdigit
-       {"digit", isdigit,},
-#undef isgraph
-       {"graph", isgraph,},
-#undef islower
-       {"lower", islower,},
-#undef isprint
-       {"print", isprint,},
-#undef ispunct
-       {"punct", ispunct,},
-#undef isspace
-       {"space", isspace,},
-#undef isupper
-       {"upper", isupper,},
-#undef isxdigit
-       {"xdigit", isxdigit,},
-};
-
-static void genclass(s)
-STR *s;
-{
-       register int cnt, (*func) __P((int));
-       CLASS *cp, tmp;
-       int *p;
-
-       tmp.name = s->str;
-       if ((cp = (CLASS *) bsearch(&tmp, classes, sizeof(classes) /
-                                                               sizeof(CLASS), sizeof(CLASS),
-                                                               c_class)) == NULL) errx(1,
-                                                                                                               "unknown class %s",
-                                                                                                               s->str);
-
-       cp->set = p = xmalloc((NCHARS + 1) * sizeof(int));
-       bzero(p, (NCHARS + 1) * sizeof(int));
-
-       for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
-               if ((func) (cnt))
-                       *p++ = cnt;
-       *p = OOBCH;
-
-       s->cnt = 0;
-       s->state = SET;
-       s->set = cp->set;
+       return pos;
 }
 
-static int c_class(a, b)
-const void *a, *b;
-{
-       return (strcmp(((CLASS *) a)->name, ((CLASS *) b)->name));
-}
-
-/*
- * English doesn't have any equivalence classes, so for now
- * we just syntax check and grab the character.
+/* NB: buffer is guaranteed to be at least TR_BUFSIZE
+ * (which is >= ASCII) big.
  */
-static void genequiv(s)
-STR *s;
+static int complement(char *buffer, int buffer_len)
 {
-       if (*s->str == '\\') {
-               s->equiv[0] = backslash(s);
-               if (*s->str != '=')
-                       errx(1, "misplaced equivalence equals sign");
-       } else {
-               s->equiv[0] = s->str[0];
-               if (s->str[1] != '=')
-                       errx(1, "misplaced equivalence equals sign");
+       int len;
+       char conv[ASCII];
+       unsigned char ch;
+
+       len = 0;
+       ch = '\0';
+       while (1) {
+               if (memchr(buffer, ch, buffer_len) == NULL)
+                       conv[len++] = ch;
+               if (++ch == '\0')
+                       break;
        }
-       s->str += 2;
-       s->cnt = 0;
-       s->state = SET;
-       s->set = s->equiv;
+       memcpy(buffer, conv, len);
+       return len;
 }
 
-static int genrange(s)
-STR *s;
+int tr_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
+int tr_main(int argc UNUSED_PARAM, char **argv)
 {
-       int stopval;
-       char *savestart;
-
-       savestart = s->str;
-       stopval = *++s->str == '\\' ? backslash(s) : (u_char) * s->str++;
-       if (stopval < (u_char) s->lastch) {
-               s->str = savestart;
-               return (0);
+       int i;
+       smalluint opts;
+       ssize_t read_chars;
+       size_t in_index, out_index;
+       unsigned last = UCHAR_MAX + 1; /* not equal to any char */
+       unsigned char coded, c;
+       char *str1 = xmalloc(TR_BUFSIZ);
+       char *str2 = xmalloc(TR_BUFSIZ);
+       int str2_length;
+       int str1_length;
+       char *vector = xzalloc(ASCII * 3);
+       char *invec  = vector + ASCII;
+       char *outvec = vector + ASCII * 2;
+
+#define TR_OPT_complement   (3 << 0)
+#define TR_OPT_delete       (1 << 2)
+#define TR_OPT_squeeze_reps (1 << 3)
+
+       for (i = 0; i < ASCII; i++) {
+               vector[i] = i;
+               /*invec[i] = outvec[i] = FALSE; - done by xzalloc */
        }
-       s->cnt = stopval - s->lastch + 1;
-       s->state = RANGE;
-       --s->lastch;
-       return (1);
-}
-
-static void genseq(s)
-STR *s;
-{
-       char *ep;
 
-       if (s->which == STRING1)
-               errx(1, "sequences only valid in string2");
+       /* -C/-c difference is that -C complements "characters",
+        * and -c complements "values" (binary bytes I guess).
+        * In POSIX locale, these are the same.
+        */
 
-       if (*s->str == '\\')
-               s->lastch = backslash(s);
-       else
-               s->lastch = *s->str++;
-       if (*s->str != '*')
-               errx(1, "misplaced sequence asterisk");
+       /* '+': stop at first non-option */
+       opts = getopt32(argv, "^+" "Ccds" "\0" "-1");
+       argv += optind;
 
-       switch (*++s->str) {
-       case '\\':
-               s->cnt = backslash(s);
-               break;
-       case ']':
-               s->cnt = 0;
-               ++s->str;
-               break;
-       default:
-               if (isdigit((u_char) * s->str)) {
-                       s->cnt = strtol(s->str, &ep, 0);
-                       if (*ep == ']') {
-                               s->str = ep + 1;
+       str1_length = expand(*argv++, &str1);
+       str2_length = 0;
+       if (opts & TR_OPT_complement)
+               str1_length = complement(str1, str1_length);
+       if (*argv) {
+               if (argv[0][0] == '\0')
+                       bb_error_msg_and_die("STRING2 cannot be empty");
+               str2_length = expand(*argv, &str2);
+               map(vector, str1, str1_length,
+                               str2, str2_length);
+       }
+       for (i = 0; i < str1_length; i++)
+               invec[(unsigned char)(str1[i])] = TRUE;
+       for (i = 0; i < str2_length; i++)
+               outvec[(unsigned char)(str2[i])] = TRUE;
+
+       goto start_from;
+
+       /* In this loop, str1 space is reused as input buffer,
+        * str2 - as output one. */
+       for (;;) {
+               /* If we're out of input, flush output and read more input. */
+               if ((ssize_t)in_index == read_chars) {
+                       if (out_index) {
+                               xwrite(STDOUT_FILENO, str2, out_index);
+ start_from:
+                               out_index = 0;
+                       }
+                       read_chars = safe_read(STDIN_FILENO, str1, TR_BUFSIZ);
+                       if (read_chars <= 0) {
+                               if (read_chars < 0)
+                                       bb_perror_msg_and_die(bb_msg_read_error);
                                break;
                        }
+                       in_index = 0;
                }
-               errx(1, "illegal sequence count");
-               /* NOTREACHED */
-       }
-
-       s->state = s->cnt ? SEQUENCE : INFINITE;
-}
-
-/*
- * Translate \??? into a character.  Up to 3 octal digits, if no digits either
- * an escape code or a literal character.
- */
-static int backslash(s)
-register STR *s;
-{
-       register int ch, cnt, val;
-
-       for (cnt = val = 0;;) {
-               ch = (u_char) * ++s->str;
-               if (!isascii(ch) || !isdigit(ch))
-                       break;
-               val = val * 8 + ch - '0';
-               if (++cnt == 3) {
-                       ++s->str;
-                       break;
+               c = str1[in_index++];
+               if ((opts & TR_OPT_delete) && invec[c])
+                       continue;
+               coded = vector[c];
+               if ((opts & TR_OPT_squeeze_reps) && last == coded
+                && (invec[c] || outvec[coded])
+               ) {
+                       continue;
                }
+               str2[out_index++] = last = coded;
        }
-       if (cnt)
-               return (val);
-       if (ch != '\0')
-               ++s->str;
-       switch (ch) {
-       case 'a':                                       /* escape characters */
-               return ('\7');
-       case 'b':
-               return ('\b');
-       case 'f':
-               return ('\f');
-       case 'n':
-               return ('\n');
-       case 'r':
-               return ('\r');
-       case 't':
-               return ('\t');
-       case 'v':
-               return ('\13');
-       case '\0':                                      /*  \" -> \ */
-               s->state = EOS;
-               return ('\\');
-       default:                                        /* \x" -> x */
-               return (ch);
+
+       if (ENABLE_FEATURE_CLEAN_UP) {
+               free(vector);
+               free(str2);
+               free(str1);
        }
+
+       return EXIT_SUCCESS;
 }