[un]expand: account for different character widths. +16 bytes.
[oweals/busybox.git] / coreutils / expand.c
1 /* expand - convert tabs to spaces
2  * unexpand - convert spaces to tabs
3  *
4  * Copyright (C) 89, 91, 1995-2006 Free Software Foundation, Inc.
5  *
6  * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
7  *
8  * David MacKenzie <djm@gnu.ai.mit.edu>
9  *
10  * Options for expand:
11  * -t num  --tabs=NUM      Convert tabs to num spaces (default 8 spaces).
12  * -i      --initial       Only convert initial tabs on each line to spaces.
13  *
14  * Options for unexpand:
15  * -a      --all           Convert all blanks, instead of just initial blanks.
16  * -f      --first-only    Convert only leading sequences of blanks (default).
17  * -t num  --tabs=NUM      Have tabs num characters apart instead of 8.
18  *
19  *  Busybox version (C) 2007 by Tito Ragusa <farmatito@tiscali.it>
20  *
21  *  Caveat: this versions of expand and unexpand don't accept tab lists.
22  */
23 #include "libbb.h"
24 #include "unicode.h"
25
26 enum {
27         OPT_INITIAL     = 1 << 0,
28         OPT_TABS        = 1 << 1,
29         OPT_ALL         = 1 << 2,
30 };
31
32 #if ENABLE_EXPAND
33 static void expand(FILE *file, unsigned tab_size, unsigned opt)
34 {
35         char *line;
36
37         while ((line = xmalloc_fgets(file)) != NULL) {
38                 unsigned char c;
39                 char *ptr;
40                 char *ptr_strbeg;
41
42                 ptr = ptr_strbeg = line;
43                 while ((c = *ptr) != '\0') {
44                         if ((opt & OPT_INITIAL) && !isblank(c)) {
45                                 /* not space or tab */
46                                 break;
47                         }
48                         if (c == '\t') {
49                                 unsigned len;
50                                 *ptr = '\0';
51 # if ENABLE_FEATURE_ASSUME_UNICODE
52                                 {
53                                         uni_stat_t uni_stat;
54                                         printable_string(&uni_stat, ptr_strbeg);
55                                         len = uni_stat.unicode_width;
56                                 }
57 # else
58                                 len = ptr - ptr_strbeg;
59 # endif
60                                 len = tab_size - (len % tab_size);
61                                 /*while (ptr[1] == '\t') { ptr++; len += tab_size; } - can handle many tabs at once */
62                                 printf("%s%*s", ptr_strbeg, len, "");
63                                 ptr_strbeg = ptr + 1;
64                         }
65                         ptr++;
66                 }
67                 fputs(ptr_strbeg, stdout);
68                 free(line);
69         }
70 }
71 #endif
72
73 #if ENABLE_UNEXPAND
74 static void unexpand(FILE *file, unsigned tab_size, unsigned opt)
75 {
76         char *line;
77
78         while ((line = xmalloc_fgets(file)) != NULL) {
79                 char *ptr = line;
80                 unsigned column = 0;
81
82                 while (*ptr) {
83                         unsigned n;
84                         unsigned len;
85
86                         while (*ptr == ' ') {
87                                 column++;
88                                 ptr++;
89                         }
90                         if (*ptr == '\t') {
91                                 column += tab_size - (column % tab_size);
92                                 ptr++;
93                                 continue;
94                         }
95
96                         n = column / tab_size;
97                         column = column % tab_size;
98                         while (n--)
99                                 putchar('\t');
100
101                         if ((opt & OPT_INITIAL) && ptr != line) {
102                                 printf("%*s%s", column, "", ptr);
103                                 break;
104                         }
105                         n = strcspn(ptr, "\t ");
106                         printf("%*s%.*s", column, "", n, ptr);
107 # if ENABLE_FEATURE_ASSUME_UNICODE
108                         {
109                                 char c;
110                                 uni_stat_t uni_stat;
111                                 c = ptr[n];
112                                 ptr[n] = '\0';
113                                 printable_string(&uni_stat, ptr);
114                                 len = uni_stat.unicode_width;
115                                 ptr[n] = c;
116                         }
117 # else
118                         len = n;
119 # endif
120                         ptr += n;
121                         column = (column + len) % tab_size;
122                 }
123                 free(line);
124         }
125 }
126 #endif
127
128 int expand_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
129 int expand_main(int argc UNUSED_PARAM, char **argv)
130 {
131         /* Default 8 spaces for 1 tab */
132         const char *opt_t = "8";
133         FILE *file;
134         unsigned tab_size;
135         unsigned opt;
136         int exit_status = EXIT_SUCCESS;
137
138 #if ENABLE_FEATURE_EXPAND_LONG_OPTIONS
139         static const char expand_longopts[] ALIGN1 =
140                 /* name, has_arg, val */
141                 "initial\0"          No_argument       "i"
142                 "tabs\0"             Required_argument "t"
143         ;
144 #endif
145 #if ENABLE_FEATURE_UNEXPAND_LONG_OPTIONS
146         static const char unexpand_longopts[] ALIGN1 =
147                 /* name, has_arg, val */
148                 "first-only\0"       No_argument       "i"
149                 "tabs\0"             Required_argument "t"
150                 "all\0"              No_argument       "a"
151         ;
152 #endif
153         init_unicode();
154
155         if (ENABLE_EXPAND && (!ENABLE_UNEXPAND || applet_name[0] == 'e')) {
156                 IF_FEATURE_EXPAND_LONG_OPTIONS(applet_long_options = expand_longopts);
157                 opt = getopt32(argv, "it:", &opt_t);
158         } else {
159                 IF_FEATURE_UNEXPAND_LONG_OPTIONS(applet_long_options = unexpand_longopts);
160                 /* -t NUM sets also -a */
161                 opt_complementary = "ta";
162                 opt = getopt32(argv, "ft:a", &opt_t);
163                 /* -f --first-only is the default */
164                 if (!(opt & OPT_ALL)) opt |= OPT_INITIAL;
165         }
166         tab_size = xatou_range(opt_t, 1, UINT_MAX);
167
168         argv += optind;
169
170         if (!*argv) {
171                 *--argv = (char*)bb_msg_standard_input;
172         }
173         do {
174                 file = fopen_or_warn_stdin(*argv);
175                 if (!file) {
176                         exit_status = EXIT_FAILURE;
177                         continue;
178                 }
179
180                 if (ENABLE_EXPAND && (!ENABLE_UNEXPAND || applet_name[0] == 'e'))
181                         IF_EXPAND(expand(file, tab_size, opt));
182                 else
183                         IF_UNEXPAND(unexpand(file, tab_size, opt));
184
185                 /* Check and close the file */
186                 if (fclose_if_not_stdin(file)) {
187                         bb_simple_perror_msg(*argv);
188                         exit_status = EXIT_FAILURE;
189                 }
190                 /* If stdin also clear EOF */
191                 if (file == stdin)
192                         clearerr(file);
193         } while (*++argv);
194
195         /* Now close stdin also */
196         /* (if we didn't read from it, it's a no-op) */
197         if (fclose(stdin))
198                 bb_perror_msg_and_die(bb_msg_standard_input);
199
200         fflush_stdout_and_exit(exit_status);
201 }