6eb86750d0417afb091d0fd89f96b20595908b9d
[oweals/busybox.git] / coreutils / tr.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * Mini tr implementation for busybox
4  *
5  ** Copyright (c) 1987,1997, Prentice Hall   All rights reserved.
6  *
7  * The name of Prentice Hall may not be used to endorse or promote
8  * products derived from this software without specific prior
9  * written permission.
10  *
11  * Copyright (c) Michiel Huisjes
12  *
13  * This version of tr is adapted from Minix tr and was modified
14  * by Erik Andersen <andersen@codepoet.org> to be used in busybox.
15  *
16  * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
17  */
18
19 #include "busybox.h"
20
21 // Even with -funsigned-char, gcc still complains about char as an array index.
22
23 #define GCC4_IS_STUPID int
24
25 #define ASCII 0377
26
27 /* some "globals" shared across this file */
28 static char com_fl, del_fl, sq_fl;
29 /* these last are pointers to static buffers declared in tr_main */
30 static char *poutput, *pvector, *pinvec, *poutvec;
31
32 static void convert(void)
33 {
34         int read_chars = 0, in_index = 0, out_index = 0, c, coded, last = -1;
35
36         for (;;) {
37                 // If we're out of input, flush output and read more input.
38
39                 if (in_index == read_chars) {
40                         if (out_index) {
41                                 if (write(1, (char *) poutput, out_index) != out_index)
42                                         bb_error_msg_and_die(bb_msg_write_error);
43                                 out_index = 0;
44                         }
45
46                         if ((read_chars = read(0, bb_common_bufsiz1, BUFSIZ)) <= 0) {
47                                 if (write(1, (char *) poutput, out_index) != out_index)
48                                         bb_error_msg(bb_msg_write_error);
49                                 exit(0);
50                         }
51                         in_index = 0;
52                 }
53                 c = bb_common_bufsiz1[in_index++];
54                 coded = pvector[c];
55                 if (del_fl && pinvec[c])
56                         continue;
57                 if (sq_fl && last == coded && (pinvec[c] || poutvec[coded]))
58                         continue;
59                 poutput[out_index++] = last = coded;
60         }
61
62         /* NOTREACHED */
63 }
64
65 static void map(char *string1, unsigned int string1_len,
66                 char *string2, unsigned int string2_len)
67 {
68         char last = '0';
69         unsigned int i, j;
70
71         for (j = 0, i = 0; i < string1_len; i++) {
72                 if (string2_len <= j)
73                         pvector[(GCC4_IS_STUPID)string1[i]] = last;
74                 else
75                         pvector[(GCC4_IS_STUPID)string1[i]] = last = string2[j++];
76         }
77 }
78
79 /* supported constructs:
80  *   Ranges,  e.g.,  [0-9]  ==>  0123456789
81  *   Escapes, e.g.,  \a     ==>  Control-G
82  *       Character classes, e.g. [:upper:] ==> A ... Z
83  */
84 static unsigned int expand(const char *arg, char *buffer)
85 {
86         char *buffer_start = buffer;
87         int i, ac;
88
89         while (*arg) {
90                 if (*arg == '\\') {
91                         arg++;
92                         *buffer++ = bb_process_escape_sequence(&arg);
93                 } else if (*(arg+1) == '-') {
94                         ac = *(arg+2);
95                         if(ac == 0) {
96                                 *buffer++ = *arg++;
97                                 continue;
98                         }
99                         i = *arg;
100                         while (i <= ac)
101                                 *buffer++ = i++;
102                         arg += 3; /* Skip the assumed a-z */
103                 } else if (*arg == '[') {
104                         arg++;
105                         i = *arg++;
106                         if (ENABLE_FEATURE_TR_CLASSES && i == ':') {
107                                 if (strncmp(arg, "alpha", 5) == 0) {
108                                         for (i = 'A'; i <= 'Z'; i++)
109                                                 *buffer++ = i;
110                                         for (i = 'a'; i <= 'z'; i++)
111                                                 *buffer++ = i;
112                                 }
113                                 else if (strncmp(arg, "alnum", 5) == 0) {
114                                         for (i = '0'; i <= '9'; i++)
115                                                 *buffer++ = i;
116                                         for (i = 'A'; i <= 'Z'; i++)
117                                                 *buffer++ = i;
118                                         for (i = 'a'; i <= 'z'; i++)
119                                                 *buffer++ = i;
120                                 }
121                                 else if (strncmp(arg, "digit", 5) == 0)
122                                         for (i = '0'; i <= '9'; i++)
123                                                 *buffer++ = i;
124                                 else if (strncmp(arg, "lower", 5) == 0)
125                                         for (i = 'a'; i <= 'z'; i++)
126                                                 *buffer++ = i;
127                                 else if (strncmp(arg, "upper", 5) == 0)
128                                         for (i = 'A'; i <= 'Z'; i++)
129                                                 *buffer++ = i;
130                                 else if (strncmp(arg, "space", 5) == 0) {
131                                     const char s[] = "\t\n\v\f\r ";
132                                         strcat((char*)buffer, s);
133                                         buffer += sizeof(s) - 1;
134                                 }
135                                 else if (strncmp(arg, "blank", 5) == 0) {
136                                         *buffer++ = '\t';
137                                         *buffer++ = ' ';
138                                 }
139                                 /* gcc gives a warning if braces aren't used here */
140                                 else if (strncmp(arg, "punct", 5) == 0) {
141                                         for (i = 0; i <= ASCII; i++)
142                                                 if (isprint(i) && (!isalnum(i)) && (!isspace(i)))
143                                                         *buffer++ = i;
144                                 }
145                                 else if (strncmp(arg, "cntrl", 5) == 0) {
146                                         for (i = 0; i <= ASCII; i++)
147                                                 if (iscntrl(i))
148                                                         *buffer++ = i;
149                                 }
150                                 else {
151                                         *buffer++ = '[';
152                                         *buffer++ = ':';
153                                         continue;
154                                 }
155                                 break;
156                         }
157                         if (ENABLE_FEATURE_TR_EQUIV && i == '=') {
158                                 *buffer++ = *arg;
159                                 /* skip the closing =] */
160                                 arg += 3;
161                                 continue;
162                         }
163                         if (*arg++ != '-') {
164                                 *buffer++ = '[';
165                                 arg -= 2;
166                                 continue;
167                         }
168                         ac = *arg++;
169                         while (i <= ac)
170                                 *buffer++ = i++;
171                         arg++;                          /* Skip the assumed ']' */
172                 } else
173                         *buffer++ = *arg++;
174         }
175
176         return (buffer - buffer_start);
177 }
178
179 static int complement(char *buffer, int buffer_len)
180 {
181         short i, j, ix;
182         char conv[ASCII + 2];
183
184         ix = 0;
185         for (i = 0; i <= ASCII; i++) {
186                 for (j = 0; j < buffer_len; j++)
187                         if (buffer[j] == i)
188                                 break;
189                 if (j == buffer_len)
190                         conv[ix++] = i & ASCII;
191         }
192         memcpy(buffer, conv, ix);
193         return ix;
194 }
195
196 int tr_main(int argc, char **argv)
197 {
198         unsigned char *ptr;
199         int output_length=0, input_length;
200         int idx = 1;
201         int i;
202         RESERVE_CONFIG_BUFFER(output, BUFSIZ);
203         RESERVE_CONFIG_BUFFER(vector, ASCII+1);
204         RESERVE_CONFIG_BUFFER(invec,  ASCII+1);
205         RESERVE_CONFIG_BUFFER(outvec, ASCII+1);
206
207         /* ... but make them available globally */
208         poutput = output;
209         pvector = vector;
210         pinvec  = invec;
211         poutvec = outvec;
212
213         if (argc > 1 && argv[idx][0] == '-') {
214                 for (ptr = (unsigned char *) &argv[idx][1]; *ptr; ptr++) {
215                         switch (*ptr) {
216                         case 'c':
217                                 com_fl = TRUE;
218                                 break;
219                         case 'd':
220                                 del_fl = TRUE;
221                                 break;
222                         case 's':
223                                 sq_fl = TRUE;
224                                 break;
225                         default:
226                                 bb_show_usage();
227                         }
228                 }
229                 idx++;
230         }
231         for (i = 0; i <= ASCII; i++) {
232                 vector[i] = i;
233                 invec[i] = outvec[i] = FALSE;
234         }
235
236         if (argv[idx] != NULL) {
237                 input_length = expand(argv[idx++], bb_common_bufsiz1);
238                 if (com_fl)
239                         input_length = complement(bb_common_bufsiz1, input_length);
240                 if (argv[idx] != NULL) {
241                         if (*argv[idx] == '\0')
242                                 bb_error_msg_and_die("STRING2 cannot be empty");
243                         output_length = expand(argv[idx], output);
244                         map(bb_common_bufsiz1, input_length, output, output_length);
245                 }
246                 for (i = 0; i < input_length; i++)
247                         invec[(GCC4_IS_STUPID)bb_common_bufsiz1[i]] = TRUE;
248                 for (i = 0; i < output_length; i++)
249                         outvec[(GCC4_IS_STUPID)output[i]] = TRUE;
250         }
251         convert();
252         return (0);
253 }