1 /* vi: set sw=4 ts=4: */
3 * Copyright (c) 1988, 1993
4 * The Regents of the University of California. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 static const char copyright[] = "@(#) Copyright (c) 1988, 1993\n\
38 The Regents of the University of California. All rights reserved.\n";
43 static char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95";
45 static const char rcsid[] =
47 "$Id: tr.c,v 1.4 2000/04/17 16:44:46 erik Exp $";
53 #include <sys/types.h>
54 #include <sys/cdefs.h>
55 #include <sys/types.h>
68 enum { STRING1, STRING2 } which;
69 enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
70 int cnt; /* character count */
71 int lastch; /* last character */
72 int equiv[2]; /* equivalence set */
73 int *set; /* set of characters */
74 char *str; /* user's string */
78 #define NCHARS (UCHAR_MAX + 1) /* Number of possible characters. */
79 #define OOBCH (UCHAR_MAX + 1) /* Out of band character value. */
81 static int next __P((STR *));
83 static int string1[NCHARS] = {
84 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ASCII */
85 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
86 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
87 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
88 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
89 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
90 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
91 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
92 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
93 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
94 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
95 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
96 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
97 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
98 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
99 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
100 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
101 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
102 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
103 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
104 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
105 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
106 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
107 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
108 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
109 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
110 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
111 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
112 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
113 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
114 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
115 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
118 STR s1 = { STRING1, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL };
119 STR s2 = { STRING2, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL };
121 static void setup(string, arg, str, cflag)
127 register int cnt, *p;
130 bzero(string, NCHARS * sizeof(int));
133 string[str->lastch] = 1;
135 for (p = string, cnt = NCHARS; cnt--; ++p)
139 static void tr_usage()
141 usage( "\ttr [-cdsu] string1 [string2]\n\n"
142 "Translate, squeeze, and/or delete characters from standard\n"
143 "input, writing to standard output.\n");
147 extern int tr_main(argc, argv)
151 register int ch, cnt, lastch, *p;
152 int cflag, dflag, sflag, isstring2;
154 (void) setlocale(LC_CTYPE, "");
156 cflag = dflag = sflag = 0;
157 while ((ch = getopt(argc, argv, "cdsu")) != -1)
169 setbuf(stdout, (char *) NULL);
192 * tr -ds [-c] string1 string2
193 * Delete all characters (or complemented characters) in string1.
194 * Squeeze all characters in string2.
196 if (dflag && sflag) {
200 setup(string1, argv[0], &s1, cflag);
201 setup(string2, argv[1], &s2, 0);
203 for (lastch = OOBCH; (ch = getchar()) != EOF;)
204 if (!string1[ch] && (!string2[ch] || lastch != ch)) {
213 * Delete all characters (or complemented characters) in string1.
219 setup(string1, argv[0], &s1, cflag);
221 while ((ch = getchar()) != EOF)
229 * Squeeze all characters (or complemented characters) in string1.
231 if (sflag && !isstring2) {
232 setup(string1, argv[0], &s1, cflag);
234 for (lastch = OOBCH; (ch = getchar()) != EOF;)
235 if (!string1[ch] || lastch != ch) {
243 * tr [-cs] string1 string2
244 * Replace all characters (or complemented characters) in string1 with
245 * the character in the same position in string2. If the -s option is
246 * specified, squeeze all the characters in string2.
255 for (cnt = NCHARS, p = string1; cnt--;)
259 errx(1, "empty string2");
261 /* If string2 runs out of characters, use the last one specified. */
264 string1[s1.lastch] = ch = s2.lastch;
269 string1[s1.lastch] = ch = s2.lastch;
274 for (cnt = 0, p = string1; cnt < NCHARS; ++p, ++cnt)
275 *p = *p == OOBCH ? ch : cnt;
278 for (lastch = OOBCH; (ch = getchar()) != EOF;) {
280 if (!string2[ch] || lastch != ch) {
285 while ((ch = getchar()) != EOF)
286 (void) putchar(string1[ch]);
290 static int backslash __P((STR *));
291 static int bracket __P((STR *));
292 static int c_class __P((const void *, const void *));
293 static void genclass __P((STR *));
294 static void genequiv __P((STR *));
295 static int genrange __P((STR *));
296 static void genseq __P((STR *));
309 switch (ch = (u_char) * s->str) {
314 s->lastch = backslash(s);
326 /* We can start a range at any time. */
327 if (s->str[0] == '-' && genrange(s))
344 if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
354 static int bracket(s)
360 case ':': /* "[:class:]" */
361 if ((p = strstr(s->str + 2, ":]")) == NULL)
368 case '=': /* "[=equiv=]" */
369 if ((p = strstr(s->str + 2, "=]")) == NULL)
374 default: /* "[\###*n]" or "[#*n]" */
375 if ((p = strpbrk(s->str + 2, "*]")) == NULL)
377 if (p[0] != '*' || index(p, ']') == NULL)
388 int (*func) __P((int));
392 static CLASS classes[] = {
398 { "blank", isblank, },*/
416 {"xdigit", isxdigit,},
419 static void genclass(s)
422 register int cnt, (*func) __P((int));
427 if ((cp = (CLASS *) bsearch(&tmp, classes, sizeof(classes) /
428 sizeof(CLASS), sizeof(CLASS),
429 c_class)) == NULL) errx(1,
433 cp->set = p = xmalloc((NCHARS + 1) * sizeof(int));
434 bzero(p, (NCHARS + 1) * sizeof(int));
436 for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
446 static int c_class(a, b)
449 return (strcmp(((CLASS *) a)->name, ((CLASS *) b)->name));
453 * English doesn't have any equivalence classes, so for now
454 * we just syntax check and grab the character.
456 static void genequiv(s)
459 if (*s->str == '\\') {
460 s->equiv[0] = backslash(s);
462 errx(1, "misplaced equivalence equals sign");
464 s->equiv[0] = s->str[0];
465 if (s->str[1] != '=')
466 errx(1, "misplaced equivalence equals sign");
474 static int genrange(s)
481 stopval = *++s->str == '\\' ? backslash(s) : (u_char) * s->str++;
482 if (stopval < (u_char) s->lastch) {
486 s->cnt = stopval - s->lastch + 1;
492 static void genseq(s)
497 if (s->which == STRING1)
498 errx(1, "sequences only valid in string2");
501 s->lastch = backslash(s);
503 s->lastch = *s->str++;
505 errx(1, "misplaced sequence asterisk");
509 s->cnt = backslash(s);
516 if (isdigit((u_char) * s->str)) {
517 s->cnt = strtol(s->str, &ep, 0);
523 errx(1, "illegal sequence count");
527 s->state = s->cnt ? SEQUENCE : INFINITE;
531 * Translate \??? into a character. Up to 3 octal digits, if no digits either
532 * an escape code or a literal character.
534 static int backslash(s)
537 register int ch, cnt, val;
539 for (cnt = val = 0;;) {
540 ch = (u_char) * ++s->str;
541 if (!isascii(ch) || !isdigit(ch))
543 val = val * 8 + ch - '0';
554 case 'a': /* escape characters */
568 case '\0': /* \" -> \ */
571 default: /* \x" -> x */