1 /* vi: set sw=4 ts=4: */
3 * Copyright (c) 1988, 1993
4 * The Regents of the University of California. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 static const char copyright[] = "@(#) Copyright (c) 1988, 1993\n\
38 The Regents of the University of California. All rights reserved.\n";
43 static char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95";
45 static const char rcsid[] =
47 "$Id: tr.c,v 1.2 2000/03/21 22:32:57 erik Exp $";
53 #include <sys/types.h>
54 #include <sys/cdefs.h>
55 #include <sys/types.h>
68 enum { STRING1, STRING2 } which;
69 enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
70 int cnt; /* character count */
71 int lastch; /* last character */
72 int equiv[2]; /* equivalence set */
73 int *set; /* set of characters */
74 char *str; /* user's string */
78 #define NCHARS (UCHAR_MAX + 1) /* Number of possible characters. */
79 #define OOBCH (UCHAR_MAX + 1) /* Out of band character value. */
81 static int next __P((STR *));
83 static int string1[NCHARS] = {
84 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ASCII */
85 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
86 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
87 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
88 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
89 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
90 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
91 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
92 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
93 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
94 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
95 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
96 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
97 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
98 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
99 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
100 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
101 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
102 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
103 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
104 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
105 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
106 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
107 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
108 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
109 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
110 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
111 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
112 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
113 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
114 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
115 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
118 STR s1 = { STRING1, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL };
119 STR s2 = { STRING2, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL };
121 static void setup(string, arg, str, cflag)
127 register int cnt, *p;
130 bzero(string, NCHARS * sizeof(int));
133 string[str->lastch] = 1;
135 for (p = string, cnt = NCHARS; cnt--; ++p)
139 static void tr_usage()
141 (void) fprintf(stderr, "%s\n%s\n%s\n%s\n",
142 "usage: tr [-csu] string1 string2",
143 " tr [-cu] -d string1",
144 " tr [-cu] -s string1",
145 " tr [-cu] -ds string1 string2");
150 extern int tr_main(argc, argv)
154 register int ch, cnt, lastch, *p;
155 int cflag, dflag, sflag, isstring2;
157 (void) setlocale(LC_CTYPE, "");
159 cflag = dflag = sflag = 0;
160 while ((ch = getopt(argc, argv, "cdsu")) != -1)
172 setbuf(stdout, (char *) NULL);
195 * tr -ds [-c] string1 string2
196 * Delete all characters (or complemented characters) in string1.
197 * Squeeze all characters in string2.
199 if (dflag && sflag) {
203 setup(string1, argv[0], &s1, cflag);
204 setup(string2, argv[1], &s2, 0);
206 for (lastch = OOBCH; (ch = getchar()) != EOF;)
207 if (!string1[ch] && (!string2[ch] || lastch != ch)) {
216 * Delete all characters (or complemented characters) in string1.
222 setup(string1, argv[0], &s1, cflag);
224 while ((ch = getchar()) != EOF)
232 * Squeeze all characters (or complemented characters) in string1.
234 if (sflag && !isstring2) {
235 setup(string1, argv[0], &s1, cflag);
237 for (lastch = OOBCH; (ch = getchar()) != EOF;)
238 if (!string1[ch] || lastch != ch) {
246 * tr [-cs] string1 string2
247 * Replace all characters (or complemented characters) in string1 with
248 * the character in the same position in string2. If the -s option is
249 * specified, squeeze all the characters in string2.
258 for (cnt = NCHARS, p = string1; cnt--;)
262 errx(1, "empty string2");
264 /* If string2 runs out of characters, use the last one specified. */
267 string1[s1.lastch] = ch = s2.lastch;
272 string1[s1.lastch] = ch = s2.lastch;
277 for (cnt = 0, p = string1; cnt < NCHARS; ++p, ++cnt)
278 *p = *p == OOBCH ? ch : cnt;
281 for (lastch = OOBCH; (ch = getchar()) != EOF;) {
283 if (!string2[ch] || lastch != ch) {
288 while ((ch = getchar()) != EOF)
289 (void) putchar(string1[ch]);
293 static int backslash __P((STR *));
294 static int bracket __P((STR *));
295 static int c_class __P((const void *, const void *));
296 static void genclass __P((STR *));
297 static void genequiv __P((STR *));
298 static int genrange __P((STR *));
299 static void genseq __P((STR *));
312 switch (ch = (u_char) * s->str) {
317 s->lastch = backslash(s);
329 /* We can start a range at any time. */
330 if (s->str[0] == '-' && genrange(s))
347 if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
357 static int bracket(s)
363 case ':': /* "[:class:]" */
364 if ((p = strstr(s->str + 2, ":]")) == NULL)
371 case '=': /* "[=equiv=]" */
372 if ((p = strstr(s->str + 2, "=]")) == NULL)
377 default: /* "[\###*n]" or "[#*n]" */
378 if ((p = strpbrk(s->str + 2, "*]")) == NULL)
380 if (p[0] != '*' || index(p, ']') == NULL)
391 int (*func) __P((int));
395 static CLASS classes[] = {
401 { "blank", isblank, },*/
419 {"xdigit", isxdigit,},
422 static void genclass(s)
425 register int cnt, (*func) __P((int));
430 if ((cp = (CLASS *) bsearch(&tmp, classes, sizeof(classes) /
431 sizeof(CLASS), sizeof(CLASS),
432 c_class)) == NULL) errx(1,
436 cp->set = p = xmalloc((NCHARS + 1) * sizeof(int));
437 bzero(p, (NCHARS + 1) * sizeof(int));
439 for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
449 static int c_class(a, b)
452 return (strcmp(((CLASS *) a)->name, ((CLASS *) b)->name));
456 * English doesn't have any equivalence classes, so for now
457 * we just syntax check and grab the character.
459 static void genequiv(s)
462 if (*s->str == '\\') {
463 s->equiv[0] = backslash(s);
465 errx(1, "misplaced equivalence equals sign");
467 s->equiv[0] = s->str[0];
468 if (s->str[1] != '=')
469 errx(1, "misplaced equivalence equals sign");
477 static int genrange(s)
484 stopval = *++s->str == '\\' ? backslash(s) : (u_char) * s->str++;
485 if (stopval < (u_char) s->lastch) {
489 s->cnt = stopval - s->lastch + 1;
495 static void genseq(s)
500 if (s->which == STRING1)
501 errx(1, "sequences only valid in string2");
504 s->lastch = backslash(s);
506 s->lastch = *s->str++;
508 errx(1, "misplaced sequence asterisk");
512 s->cnt = backslash(s);
519 if (isdigit((u_char) * s->str)) {
520 s->cnt = strtol(s->str, &ep, 0);
526 errx(1, "illegal sequence count");
530 s->state = s->cnt ? SEQUENCE : INFINITE;
534 * Translate \??? into a character. Up to 3 octal digits, if no digits either
535 * an escape code or a literal character.
537 static int backslash(s)
540 register int ch, cnt, val;
542 for (cnt = val = 0;;) {
543 ch = (u_char) * ++s->str;
544 if (!isascii(ch) || !isdigit(ch))
546 val = val * 8 + ch - '0';
557 case 'a': /* escape characters */
571 case '\0': /* \" -> \ */
574 default: /* \x" -> x */