594962f926554ccff7fdb42478f9e9511eb3b8e6
[oweals/busybox.git] / coreutils / tr.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * Mini tr implementation for busybox
4  *
5  ** Copyright (c) 1987,1997, Prentice Hall   All rights reserved.
6  *
7  * The name of Prentice Hall may not be used to endorse or promote
8  * products derived from this software without specific prior
9  * written permission.
10  *
11  * Copyright (c) Michiel Huisjes
12  *
13  * This version of tr is adapted from Minix tr and was modified
14  * by Erik Andersen <andersen@codepoet.org> to be used in busybox.
15  *
16  * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
17  */
18
19 #include "busybox.h"
20
21 #define ASCII 0377
22
23 /* some "globals" shared across this file */
24 static char com_fl, del_fl, sq_fl;
25 /* these last are pointers to static buffers declared in tr_main */
26 static unsigned char *poutput;
27 static unsigned char *pvector;
28 static unsigned char *pinvec, *poutvec;
29
30 static void convert(void)
31 {
32         int read_chars = 0, in_index = 0, out_index = 0, c, coded, last = -1;
33
34         for (;;) {
35                 // If we're out of input, flush output and read more input.
36
37                 if (in_index == read_chars) {
38                         if (out_index) {
39                                 if (write(1, (char *) poutput, out_index) != out_index)
40                                         bb_error_msg_and_die(bb_msg_write_error);
41                                 out_index = 0;
42                         }
43
44                         if ((read_chars = read(0, bb_common_bufsiz1, BUFSIZ)) <= 0) {
45                                 if (write(1, (char *) poutput, out_index) != out_index)
46                                         bb_error_msg(bb_msg_write_error);
47                                 exit(0);
48                         }
49                         in_index = 0;
50                 }
51                 c = bb_common_bufsiz1[in_index++];
52                 coded = pvector[c];
53                 if (del_fl && pinvec[c])
54                         continue;
55                 if (sq_fl && last == coded && (pinvec[c] || poutvec[coded]))
56                         continue;
57                 poutput[out_index++] = last = coded;
58         }
59
60         /* NOTREACHED */
61 }
62
63 static void map(register unsigned char *string1, unsigned int string1_len,
64                 register unsigned char *string2, unsigned int string2_len)
65 {
66         unsigned char last = '0';
67         unsigned int i, j;
68
69         for (j = 0, i = 0; i < string1_len; i++) {
70                 if (string2_len <= j)
71                         pvector[string1[i]] = last;
72                 else
73                         pvector[string1[i]] = last = string2[j++];
74         }
75 }
76
77 /* supported constructs:
78  *   Ranges,  e.g.,  [0-9]  ==>  0123456789
79  *   Escapes, e.g.,  \a     ==>  Control-G
80  *       Character classes, e.g. [:upper:] ==> A ... Z
81  */
82 static unsigned int expand(const char *arg, register unsigned char *buffer)
83 {
84         unsigned char *buffer_start = buffer;
85         int i, ac;
86
87         while (*arg) {
88                 if (*arg == '\\') {
89                         arg++;
90                         *buffer++ = bb_process_escape_sequence(&arg);
91                 } else if (*(arg+1) == '-') {
92                         ac = *(arg+2);
93                         if(ac == 0) {
94                                 *buffer++ = *arg++;
95                                 continue;
96                         }
97                         i = *arg;
98                         while (i <= ac)
99                                 *buffer++ = i++;
100                         arg += 3; /* Skip the assumed a-z */
101                 } else if (*arg == '[') {
102                         arg++;
103                         i = *arg++;
104                         if (ENABLE_FEATURE_TR_CLASSES && i == ':') {
105                                 if (strncmp(arg, "alpha", 5) == 0) {
106                                         for (i = 'A'; i <= 'Z'; i++)
107                                                 *buffer++ = i;
108                                         for (i = 'a'; i <= 'z'; i++)
109                                                 *buffer++ = i;
110                                 }
111                                 else if (strncmp(arg, "alnum", 5) == 0) {
112                                         for (i = '0'; i <= '9'; i++)
113                                                 *buffer++ = i;
114                                         for (i = 'A'; i <= 'Z'; i++)
115                                                 *buffer++ = i;
116                                         for (i = 'a'; i <= 'z'; i++)
117                                                 *buffer++ = i;
118                                 }
119                                 else if (strncmp(arg, "digit", 5) == 0)
120                                         for (i = '0'; i <= '9'; i++)
121                                                 *buffer++ = i;
122                                 else if (strncmp(arg, "lower", 5) == 0)
123                                         for (i = 'a'; i <= 'z'; i++)
124                                                 *buffer++ = i;
125                                 else if (strncmp(arg, "upper", 5) == 0)
126                                         for (i = 'A'; i <= 'Z'; i++)
127                                                 *buffer++ = i;
128                                 else if (strncmp(arg, "space", 5) == 0) {
129                                     const char s[] = "\t\n\v\f\r ";
130                                         strcat((char*)buffer, s);
131                                         buffer += sizeof(s) - 1;
132                                 }
133                                 else if (strncmp(arg, "blank", 5) == 0) {
134                                         *buffer++ = '\t';
135                                         *buffer++ = ' ';
136                                 }
137                                 /* gcc gives a warning if braces aren't used here */
138                                 else if (strncmp(arg, "punct", 5) == 0) {
139                                         for (i = 0; i <= ASCII; i++)
140                                                 if (isprint(i) && (!isalnum(i)) && (!isspace(i)))
141                                                         *buffer++ = i;
142                                 }
143                                 else if (strncmp(arg, "cntrl", 5) == 0) {
144                                         for (i = 0; i <= ASCII; i++)
145                                                 if (iscntrl(i))
146                                                         *buffer++ = i;
147                                 }
148                                 else {
149                                         *buffer++ = '[';
150                                         *buffer++ = ':';
151                                         continue;
152                                 }
153                                 break;
154                         }
155                         if (ENABLE_FEATURE_TR_EQUIV && i == '=') {
156                                 *buffer++ = *arg;
157                                 /* skip the closing =] */
158                                 arg += 3;
159                                 continue;
160                         }
161                         if (*arg++ != '-') {
162                                 *buffer++ = '[';
163                                 arg -= 2;
164                                 continue;
165                         }
166                         ac = *arg++;
167                         while (i <= ac)
168                                 *buffer++ = i++;
169                         arg++;                          /* Skip the assumed ']' */
170                 } else
171                         *buffer++ = *arg++;
172         }
173
174         return (buffer - buffer_start);
175 }
176
177 static int complement(unsigned char *buffer, int buffer_len)
178 {
179         register short i, j, ix;
180         char conv[ASCII + 2];
181
182         ix = 0;
183         for (i = 0; i <= ASCII; i++) {
184                 for (j = 0; j < buffer_len; j++)
185                         if (buffer[j] == i)
186                                 break;
187                 if (j == buffer_len)
188                         conv[ix++] = i & ASCII;
189         }
190         memcpy(buffer, conv, ix);
191         return ix;
192 }
193
194 int tr_main(int argc, char **argv)
195 {
196         register unsigned char *ptr;
197         int output_length=0, input_length;
198         int idx = 1;
199         int i;
200         RESERVE_CONFIG_BUFFER(output, BUFSIZ);
201         RESERVE_CONFIG_BUFFER(vector, ASCII+1);
202         RESERVE_CONFIG_BUFFER(invec,  ASCII+1);
203         RESERVE_CONFIG_BUFFER(outvec, ASCII+1);
204
205         /* ... but make them available globally */
206         poutput = (unsigned char*)output;
207         pvector = (unsigned char*)vector;
208         pinvec  = (unsigned char*)invec;
209         poutvec = (unsigned char*)outvec;
210
211         if (argc > 1 && argv[idx][0] == '-') {
212                 for (ptr = (unsigned char *) &argv[idx][1]; *ptr; ptr++) {
213                         switch (*ptr) {
214                         case 'c':
215                                 com_fl = TRUE;
216                                 break;
217                         case 'd':
218                                 del_fl = TRUE;
219                                 break;
220                         case 's':
221                                 sq_fl = TRUE;
222                                 break;
223                         default:
224                                 bb_show_usage();
225                         }
226                 }
227                 idx++;
228         }
229         for (i = 0; i <= ASCII; i++) {
230                 vector[i] = i;
231                 invec[i] = outvec[i] = FALSE;
232         }
233
234         if (argv[idx] != NULL) {
235                 input_length = expand(argv[idx++], bb_common_bufsiz1);
236                 if (com_fl)
237                         input_length = complement(bb_common_bufsiz1, input_length);
238                 if (argv[idx] != NULL) {
239                         if (*argv[idx] == '\0')
240                                 bb_error_msg_and_die("STRING2 cannot be empty");
241                         output_length = expand(argv[idx], (unsigned char*)output);
242                         map(bb_common_bufsiz1, input_length, (unsigned char*)output, output_length);
243                 }
244                 for (i = 0; i < input_length; i++)
245                         invec[bb_common_bufsiz1[i]] = TRUE;
246                 for (i = 0; i < output_length; i++)
247                         outvec[(unsigned char)output[i]] = TRUE;
248         }
249         convert();
250         return (0);
251 }