1 /* vi: set sw=4 ts=4: */
3 * Copyright (c) 1988, 1993
4 * The Regents of the University of California. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 static const char copyright[] = "@(#) Copyright (c) 1988, 1993\n\
38 The Regents of the University of California. All rights reserved.\n";
43 static char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95";
45 static const char rcsid[] =
47 "$Id: tr.c,v 1.1 2000/03/05 08:07:00 erik Exp $";
53 #include <sys/types.h>
54 #include <sys/cdefs.h>
55 #include <sys/types.h>
68 enum { STRING1, STRING2 } which;
69 enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
70 int cnt; /* character count */
71 int lastch; /* last character */
72 int equiv[2]; /* equivalence set */
73 int *set; /* set of characters */
74 char *str; /* user's string */
78 #define NCHARS (UCHAR_MAX + 1) /* Number of possible characters. */
79 #define OOBCH (UCHAR_MAX + 1) /* Out of band character value. */
81 static int next __P((STR *));
83 static int string1[NCHARS] = {
84 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ASCII */
85 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
86 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
87 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
88 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
89 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
90 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
91 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
92 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
93 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
94 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
95 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
96 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
97 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
98 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
99 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
100 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
101 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
102 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
103 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
104 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
105 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
106 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
107 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
108 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
109 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
110 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
111 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
112 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
113 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
114 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
115 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
118 STR s1 = { STRING1, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL };
119 STR s2 = { STRING2, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL };
121 static void setup(string, arg, str, cflag)
127 register int cnt, *p;
130 bzero(string, NCHARS * sizeof(int));
133 string[str->lastch] = 1;
135 for (p = string, cnt = NCHARS; cnt--; ++p)
139 static void tr_usage()
141 (void) fprintf(stderr, "%s\n%s\n%s\n%s\n",
142 "usage: tr [-csu] string1 string2",
143 " tr [-cu] -d string1",
144 " tr [-cu] -s string1",
145 " tr [-cu] -ds string1 string2");
150 extern int tr_main(argc, argv)
154 register int ch, cnt, lastch, *p;
155 int cflag, dflag, sflag, isstring2;
157 (void) setlocale(LC_CTYPE, "");
159 cflag = dflag = sflag = 0;
160 while ((ch = getopt(argc, argv, "cdsu")) != -1)
172 setbuf(stdout, (char *) NULL);
195 * tr -ds [-c] string1 string2
196 * Delete all characters (or complemented characters) in string1.
197 * Squeeze all characters in string2.
199 if (dflag && sflag) {
203 setup(string1, argv[0], &s1, cflag);
204 setup(string2, argv[1], &s2, 0);
206 for (lastch = OOBCH; (ch = getchar()) != EOF;)
207 if (!string1[ch] && (!string2[ch] || lastch != ch)) {
216 * Delete all characters (or complemented characters) in string1.
222 setup(string1, argv[0], &s1, cflag);
224 while ((ch = getchar()) != EOF)
232 * Squeeze all characters (or complemented characters) in string1.
234 if (sflag && !isstring2) {
235 setup(string1, argv[0], &s1, cflag);
237 for (lastch = OOBCH; (ch = getchar()) != EOF;)
238 if (!string1[ch] || lastch != ch) {
246 * tr [-cs] string1 string2
247 * Replace all characters (or complemented characters) in string1 with
248 * the character in the same position in string2. If the -s option is
249 * specified, squeeze all the characters in string2.
258 for (cnt = NCHARS, p = string1; cnt--;)
262 errx(1, "empty string2");
264 /* If string2 runs out of characters, use the last one specified. */
267 string1[s1.lastch] = ch = s2.lastch;
272 string1[s1.lastch] = ch = s2.lastch;
277 for (cnt = 0, p = string1; cnt < NCHARS; ++p, ++cnt)
278 *p = *p == OOBCH ? ch : cnt;
281 for (lastch = OOBCH; (ch = getchar()) != EOF;) {
283 if (!string2[ch] || lastch != ch) {
288 while ((ch = getchar()) != EOF)
289 (void) putchar(string1[ch]);
293 static int backslash __P((STR *));
294 static int bracket __P((STR *));
295 static int c_class __P((const void *, const void *));
296 static void genclass __P((STR *));
297 static void genequiv __P((STR *));
298 static int genrange __P((STR *));
299 static void genseq __P((STR *));
312 switch (ch = (u_char) * s->str) {
317 s->lastch = backslash(s);
329 /* We can start a range at any time. */
330 if (s->str[0] == '-' && genrange(s))
347 if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
357 static int bracket(s)
363 case ':': /* "[:class:]" */
364 if ((p = strstr(s->str + 2, ":]")) == NULL)
371 case '=': /* "[=equiv=]" */
372 if ((p = strstr(s->str + 2, "=]")) == NULL)
377 default: /* "[\###*n]" or "[#*n]" */
378 if ((p = strpbrk(s->str + 2, "*]")) == NULL)
380 if (p[0] != '*' || index(p, ']') == NULL)
391 int (*func) __P((int));
395 static CLASS classes[] = {
401 { "blank", isblank, },*/
419 {"xdigit", isxdigit,},
422 static void genclass(s)
425 register int cnt, (*func) __P((int));
430 if ((cp = (CLASS *) bsearch(&tmp, classes, sizeof(classes) /
431 sizeof(CLASS), sizeof(CLASS),
432 c_class)) == NULL) errx(1,
436 if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
438 bzero(p, (NCHARS + 1) * sizeof(int));
440 for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
450 static int c_class(a, b)
453 return (strcmp(((CLASS *) a)->name, ((CLASS *) b)->name));
457 * English doesn't have any equivalence classes, so for now
458 * we just syntax check and grab the character.
460 static void genequiv(s)
463 if (*s->str == '\\') {
464 s->equiv[0] = backslash(s);
466 errx(1, "misplaced equivalence equals sign");
468 s->equiv[0] = s->str[0];
469 if (s->str[1] != '=')
470 errx(1, "misplaced equivalence equals sign");
478 static int genrange(s)
485 stopval = *++s->str == '\\' ? backslash(s) : (u_char) * s->str++;
486 if (stopval < (u_char) s->lastch) {
490 s->cnt = stopval - s->lastch + 1;
496 static void genseq(s)
501 if (s->which == STRING1)
502 errx(1, "sequences only valid in string2");
505 s->lastch = backslash(s);
507 s->lastch = *s->str++;
509 errx(1, "misplaced sequence asterisk");
513 s->cnt = backslash(s);
520 if (isdigit((u_char) * s->str)) {
521 s->cnt = strtol(s->str, &ep, 0);
527 errx(1, "illegal sequence count");
531 s->state = s->cnt ? SEQUENCE : INFINITE;
535 * Translate \??? into a character. Up to 3 octal digits, if no digits either
536 * an escape code or a literal character.
538 static int backslash(s)
541 register int ch, cnt, val;
543 for (cnt = val = 0;;) {
544 ch = (u_char) * ++s->str;
545 if (!isascii(ch) || !isdigit(ch))
547 val = val * 8 + ch - '0';
558 case 'a': /* escape characters */
572 case '\0': /* \" -> \ */
575 default: /* \x" -> x */