fix q command
[oweals/busybox.git] / editors / sed.c
1 /*
2  * sed.c - very minimalist version of sed
3  *
4  * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
5  * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
6  * Copyright (C) 2002  Matt Kraai
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21  *
22  */
23
24 /*
25         Supported features and commands in this version of sed:
26
27          - comments ('#')
28          - address matching: num|/matchstr/[,num|/matchstr/|$]command
29          - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
30          - edit commands: (a)ppend, (i)nsert, (c)hange
31          - file commands: (r)ead
32          - backreferences in substitution expressions (\1, \2...\9)
33          - grouped commands: {cmd1;cmd2}
34
35          (Note: Specifying an address (range) to match is *optional*; commands
36          default to the whole pattern space if no specific address match was
37          requested.)
38
39         Unsupported features:
40
41          - transliteration (y/source-chars/dest-chars/) (use 'tr')
42          - no pattern space hold space storing / swapping (x, etc.)
43          - no labels / branching (: label, b, t, and friends)
44          - and lots, lots more.
45
46         Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
47 */
48
49 #include <stdio.h>
50 #include <unistd.h> /* for getopt() */
51 #include <regex.h>
52 #include <string.h> /* for strdup() */
53 #include <errno.h>
54 #include <ctype.h> /* for isspace() */
55 #include <stdlib.h>
56 #include "busybox.h"
57
58 /* the spec says label must be at least 8 chars, behavious is unspecified if more than 8 chars */
59 #define SED_LABEL_LENGTH        8
60
61 /* externs */
62 extern void xregcomp(regex_t *preg, const char *regex, int cflags);
63 extern int optind; /* in unistd.h */
64 extern char *optarg; /* ditto */
65
66 /* options */
67 static int be_quiet = 0;
68
69
70 typedef struct sed_cmd_s {
71         /* Order by alignment requirements */
72
73         /* address storage */
74         regex_t *beg_match; /* sed -e '/match/cmd' */
75         regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */
76
77         /* SUBSTITUTION COMMAND SPECIFIC FIELDS */
78
79         /* sed -e 's/sub_match/replace/' */
80         regex_t *sub_match;
81         char *replace;
82
83         /* EDIT COMMAND (a,i,c) SPECIFIC FIELDS */
84         char *editline;
85
86         /* FILE COMMAND (r) SPECIFIC FIELDS */
87         char *filename;
88
89         /* address storage */
90         int beg_line; /* 'sed 1p'   0 == no begining line, apply commands to all lines */
91         int end_line; /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */
92         /* SUBSTITUTION COMMAND SPECIFIC FIELDS */
93
94         unsigned int num_backrefs:4; /* how many back references (\1..\9) */
95                         /* Note:  GNU/POSIX sed does not save more than nine backrefs, so
96                          * we only use 4 bits to hold the number */
97         unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */
98         unsigned int sub_p:2; /* sed -e 's/foo/bar/p' (print substitution) */
99
100         /* GENERAL FIELDS */
101         char delimiter;     /* The delimiter used to separate regexps */
102
103         /* the command */
104         char cmd; /* p,d,s (add more at your leisure :-) */
105
106         /* inversion flag */
107         int invert;         /* the '!' after the address */
108
109         /* Branch commands */
110         char label[SED_LABEL_LENGTH + 1];
111
112         /* next command in list (sequential list of specified commands) */
113         struct sed_cmd_s *linear;
114
115 } sed_cmd_t;
116
117 /* globals */
118 /* linked list of sed commands */
119 static sed_cmd_t sed_cmd_head;
120 static sed_cmd_t *sed_cmd_tail = &sed_cmd_head;
121
122 const char * const semicolon_whitespace = "; \n\r\t\v\0";
123
124 #ifdef CONFIG_FEATURE_CLEAN_UP
125 static void destroy_cmd_strs(void)
126 {
127         sed_cmd_t *sed_cmd = sed_cmd_head.linear;
128
129         while (sed_cmd) {
130                 sed_cmd_t *sed_cmd_next = sed_cmd->linear;
131
132                 if (sed_cmd->beg_match) {
133                         regfree(sed_cmd->beg_match);
134                         free(sed_cmd->beg_match);
135                 }
136                 if (sed_cmd->end_match) {
137                         regfree(sed_cmd->end_match);
138                         free(sed_cmd->end_match);
139                 }
140                 if (sed_cmd->sub_match) {
141                         regfree(sed_cmd->sub_match);
142                         free(sed_cmd->sub_match);
143                 }
144                 free(sed_cmd->replace);
145                 free(sed_cmd);
146                 sed_cmd = sed_cmd_next;
147         }
148 }
149 #endif
150
151
152 /*
153  * index_of_next_unescaped_regexp_delim - walks left to right through a string
154  * beginning at a specified index and returns the index of the next regular
155  * expression delimiter (typically a forward * slash ('/')) not preceeded by 
156  * a backslash ('\').
157  */
158 static int index_of_next_unescaped_regexp_delim(const char delimiter, const char *str, int idx)
159 {
160         int bracket = -1;
161         int escaped = 0;
162         char ch;
163
164         for ( ; (ch = str[idx]); idx++) {
165                 if (bracket != -1) {
166                         if (ch == ']' && !(bracket == idx - 1 ||
167                                                                          (bracket == idx - 2 && str[idx-1] == '^')))
168                                 bracket = -1;
169                 } else if (escaped)
170                         escaped = 0;
171                 else if (ch == '\\')
172                         escaped = 1;
173                 else if (ch == '[')
174                         bracket = idx;
175                 else if (ch == delimiter)
176                         return idx;
177         }
178
179         /* if we make it to here, we've hit the end of the string */
180         return -1;
181 }
182
183 /*
184  * returns the index in the string just past where the address ends.
185  */
186 static int get_address(char *delimiter, char *my_str, int *linenum, regex_t **regex)
187 {
188         int idx = 0;
189         if (isdigit(my_str[idx])) {
190                 char *endstr;
191                 *linenum = strtol(my_str, &endstr, 10);
192                 /* endstr shouldnt ever equal NULL */
193                 idx = endstr - my_str;
194         }
195         else if (my_str[idx] == '$') {
196                 *linenum = -1;
197                 idx++;
198         }
199         else if (my_str[idx] == '/' || my_str[idx] == '\\') {
200                 int idx_start = 1;
201
202                 *delimiter = '/';
203                 if (my_str[idx] == '\\') {
204                         idx_start++;
205                         *delimiter = my_str[++idx];
206                 }
207                 idx = index_of_next_unescaped_regexp_delim(*delimiter, my_str, ++idx);
208                 if (idx == -1) {
209                         bb_error_msg_and_die("unterminated match expression");
210                 }
211                 my_str[idx] = '\0';
212                 *regex = (regex_t *)xmalloc(sizeof(regex_t));
213                 xregcomp(*regex, my_str+idx_start, REG_NEWLINE);
214                 idx++; /* so it points to the next character after the last '/' */
215         }
216         return idx;
217 }
218
219 static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr)
220 {
221         int oldidx;
222         int cflags = 0;
223         char *match;
224         int idx = 0;
225         int j;
226
227         /*
228          * the string that gets passed to this function should look like this:
229          *    s/match/replace/gIp
230          *    ||     |        |||
231          *    mandatory       optional
232          *
233          *    (all three of the '/' slashes are mandatory)
234          */
235
236         /* verify that the 's' is followed by something.  That something
237          * (typically a 'slash') is now our regexp delimiter... */
238         if (substr[idx] == '\0')
239                 bb_error_msg_and_die("bad format in substitution expression");
240         else
241             sed_cmd->delimiter=substr[idx];
242
243         /* save the match string */
244         oldidx = idx+1;
245         idx = index_of_next_unescaped_regexp_delim(sed_cmd->delimiter, substr, ++idx);
246         if (idx == -1)
247                 bb_error_msg_and_die("bad format in substitution expression");
248         match = bb_xstrndup(substr + oldidx, idx - oldidx);
249
250         /* determine the number of back references in the match string */
251         /* Note: we compute this here rather than in the do_subst_command()
252          * function to save processor time, at the expense of a little more memory
253          * (4 bits) per sed_cmd */
254         
255         /* sed_cmd->num_backrefs = 0; */ /* XXX: not needed? --apparently not */ 
256         for (j = 0; match[j]; j++) {
257                 /* GNU/POSIX sed does not save more than nine backrefs */
258                 if (match[j] == '\\' && match[j+1] == '(' && sed_cmd->num_backrefs <= 9)
259                         sed_cmd->num_backrefs++;
260         }
261
262         /* save the replacement string */
263         oldidx = idx+1;
264         idx = index_of_next_unescaped_regexp_delim(sed_cmd->delimiter, substr, ++idx);
265         if (idx == -1)
266                 bb_error_msg_and_die("bad format in substitution expression");
267         sed_cmd->replace = bb_xstrndup(substr + oldidx, idx - oldidx);
268
269         /* process the flags */
270         while (substr[++idx]) {
271                 switch (substr[idx]) {
272                         case 'g':
273                                 sed_cmd->sub_g = 1;
274                                 break;
275                         /* Hmm, i dont see the I option mentioned in the standard */
276                         case 'I':
277                                 cflags |= REG_ICASE;
278                                 break;
279                         case 'p':
280                                 sed_cmd->sub_p = 1;
281                                 break;
282                         default:
283                                 /* any whitespace or semicolon trailing after a s/// is ok */
284                                 if (strchr(semicolon_whitespace, substr[idx]))
285                                         goto out;
286                                 /* else */
287                                 bb_error_msg_and_die("bad option in substitution expression");
288                 }
289         }
290
291 out:    
292         /* compile the match string into a regex */
293         sed_cmd->sub_match = (regex_t *)xmalloc(sizeof(regex_t));
294         xregcomp(sed_cmd->sub_match, match, cflags);
295         free(match);
296
297         return idx;
298 }
299
300 static int parse_edit_cmd(sed_cmd_t *sed_cmd, const char *editstr)
301 {
302         int i, j;
303
304         /*
305          * the string that gets passed to this function should look like this:
306          *
307          *    need one of these 
308          *    |
309          *    |    this backslash (immediately following the edit command) is mandatory
310          *    |    |
311          *    [aic]\
312          *    TEXT1\
313          *    TEXT2\
314          *    TEXTN
315          *
316          * as soon as we hit a TEXT line that has no trailing '\', we're done.
317          * this means a command like:
318          *
319          * i\
320          * INSERTME
321          *
322          * is a-ok.
323          *
324          */
325         if ((*editstr != '\\') || ((editstr[1] != '\n') && (editstr[1] != '\r'))) {
326                 bb_error_msg_and_die("bad format in edit expression");
327         }
328
329         /* store the edit line text */
330         sed_cmd->editline = xmalloc(strlen(&editstr[2]) + 2);
331         for (i = 2, j = 0; editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL;
332                         i++, j++) {
333                 if ((editstr[i] == '\\') && strchr("\n\r", editstr[i+1]) != NULL) {
334                         sed_cmd->editline[j] = '\n';
335                         i++;
336                 } else
337                         sed_cmd->editline[j] = editstr[i];
338         }
339
340         /* figure out if we need to add a newline */
341         if (sed_cmd->editline[j-1] != '\n')
342                 sed_cmd->editline[j++] = '\n';
343
344         /* terminate string */
345         sed_cmd->editline[j] = '\0';
346
347         return i;
348 }
349
350
351 static int parse_file_cmd(sed_cmd_t *sed_cmd, const char *filecmdstr)
352 {
353         int idx = 0;
354         int filenamelen = 0;
355
356         /*
357          * the string that gets passed to this function should look like this:
358          *    '[ ]filename'
359          *      |  |
360          *      |  a filename
361          *      |
362          *     optional whitespace
363
364          *   re: the file to be read, the GNU manual says the following: "Note that
365          *   if filename cannot be read, it is treated as if it were an empty file,
366          *   without any error indication." Thus, all of the following commands are
367          *   perfectly leagal:
368          *
369          *   sed -e '1r noexist'
370          *   sed -e '1r ;'
371          *   sed -e '1r'
372          */
373
374         /* the file command may be followed by whitespace; move past it. */
375         while (isspace(filecmdstr[++idx]))
376                 { ; }
377                 
378         /* the first non-whitespace we get is a filename. the filename ends when we
379          * hit a normal sed command terminator or end of string */
380         filenamelen = strcspn(&filecmdstr[idx], semicolon_whitespace);
381         sed_cmd->filename = xmalloc(filenamelen + 1);
382         safe_strncpy(sed_cmd->filename, &filecmdstr[idx], filenamelen + 1);
383
384         return idx + filenamelen;
385 }
386
387 /*
388  *  Process the commands arguments
389  */
390 static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr)
391 {
392         /* handle (s)ubstitution command */
393         if (sed_cmd->cmd == 's') {
394                 cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
395         }
396         /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
397         else if (strchr("aic", sed_cmd->cmd)) {
398                 if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
399                         bb_error_msg_and_die("only a beginning address can be specified for edit commands");
400                 cmdstr += parse_edit_cmd(sed_cmd, cmdstr);
401         }
402         /* handle file cmds: (r)ead */
403         else if (sed_cmd->cmd == 'r') {
404                 if (sed_cmd->end_line || sed_cmd->end_match)
405                         bb_error_msg_and_die("Command only uses one address");
406                 cmdstr += parse_file_cmd(sed_cmd, cmdstr);
407         }
408         /* handle branch commands */
409         else if (strchr(":b", sed_cmd->cmd)) {
410                 int length;
411
412                 cmdstr += strspn(cmdstr, " ");
413                 length = strcspn(cmdstr, "; ");
414                 if (length > SED_LABEL_LENGTH) {
415                         length = SED_LABEL_LENGTH;
416                 }
417                 strncpy(sed_cmd->label, cmdstr, length);
418         cmdstr += length;
419         }
420         /* if it wasnt a single-letter command that takes no arguments
421          * then it must be an invalid command.
422          */
423         else if (strchr("nNpPqd=", sed_cmd->cmd) == 0) {
424                 bb_error_msg_and_die("Unsupported command %c", sed_cmd->cmd);
425         }
426
427         /* give back whatever's left over */
428         return(cmdstr);
429 }
430
431 static char *add_cmd(sed_cmd_t *sed_cmd, char *cmdstr)
432 {
433         
434         /* Skip over leading whitespace and semicolons */
435         cmdstr += strspn(cmdstr, semicolon_whitespace);
436
437         /* if we ate the whole thing, that means there was just trailing
438          * whitespace or a final / no-op semicolon. either way, get out */
439         if (*cmdstr == '\0') {
440                 return(NULL);
441         }
442
443         /* if this is a comment, jump past it and keep going */
444         if (*cmdstr == '#') {
445                 return(strpbrk(cmdstr, "\n\r"));
446         }
447
448         /* parse the command
449          * format is: [addr][,addr]cmd
450          *            |----||-----||-|
451          *            part1 part2  part3
452          */
453
454         /* first part (if present) is an address: either a '$', a number or a /regex/ */
455         cmdstr += get_address(&(sed_cmd->delimiter), cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
456
457         /* second part (if present) will begin with a comma */
458         if (*cmdstr == ',') {
459                 int idx;
460                 cmdstr++;
461                 idx = get_address(&(sed_cmd->delimiter), cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
462                 if (idx == 0) {
463                         bb_error_msg_and_die("get_address: no address found in string\n"
464                                 "\t(you probably didn't check the string you passed me)");
465                 }
466                 cmdstr += idx;
467         }
468
469         /* skip whitespace before the command */
470         while (isspace(*cmdstr)) {
471                 cmdstr++;
472         }
473
474         /* there my be the inversion flag between part2 and part3 */
475         if (*cmdstr == '!') {
476                 sed_cmd->invert = 1;
477                 cmdstr++;
478
479 #ifdef SED_FEATURE_STRICT_CHECKING
480                 /* According to the spec
481                  * It is unspecified whether <blank>s can follow a '!' character,
482                  * and conforming applications shall not follow a '!' character
483                  * with <blank>s.
484                  */
485                 if (isblank(cmdstr[idx]) {
486                         bb_error_msg_and_die("blank follows '!'");
487                 }
488 #else 
489                 /* skip whitespace before the command */
490                 while (isspace(*cmdstr)) {
491                         cmdstr++;
492                 }
493 #endif
494
495         }
496
497         /* last part (mandatory) will be a command */
498         if (*cmdstr == '\0')
499                 bb_error_msg_and_die("missing command");
500
501         sed_cmd->cmd = *cmdstr;
502         cmdstr++;
503
504         if (sed_cmd->cmd == '{') {
505                 do {
506                         sed_cmd_t *sed_cmd_new;
507                         char *end_ptr = strpbrk(cmdstr, ";}");
508
509                         *end_ptr = '\0';
510                         sed_cmd_new = xcalloc(1, sizeof(sed_cmd_t));
511                         sed_cmd_new->beg_match = sed_cmd->beg_match;
512                         sed_cmd_new->end_match = sed_cmd->end_match;
513                         sed_cmd_new->beg_line = sed_cmd->beg_line;
514                         sed_cmd_new->end_line = sed_cmd->end_line;
515                         sed_cmd_new->invert = sed_cmd->invert;
516
517                         add_cmd(sed_cmd_new, cmdstr);
518                         cmdstr = end_ptr + 1;
519                 } while (*cmdstr != '\0');
520         } else {
521                 cmdstr = parse_cmd_str(sed_cmd, cmdstr);
522
523                 /* Add the command to the command array */
524                 sed_cmd_tail->linear = sed_cmd;
525                 sed_cmd_tail = sed_cmd_tail->linear;
526         }
527         return(cmdstr);
528 }
529
530 static void add_cmd_str(char *cmdstr)
531 {
532         do {
533                 sed_cmd_t *sed_cmd;
534                 sed_cmd = xcalloc(1, sizeof(sed_cmd_t));
535                 cmdstr = add_cmd(sed_cmd, cmdstr);
536         } while (cmdstr && strlen(cmdstr));
537 }
538
539
540 static void load_cmd_file(char *filename)
541 {
542         FILE *cmdfile;
543         char *line;
544         char *nextline;
545         char *e;
546
547         cmdfile = bb_xfopen(filename, "r");
548
549         while ((line = bb_get_line_from_file(cmdfile)) != NULL) {
550                 /* if a line ends with '\' it needs the next line appended to it */
551                 while (((e = last_char_is(line, '\n')) != NULL)
552                            && (e > line) && (e[-1] == '\\')
553                            && ((nextline = bb_get_line_from_file(cmdfile)) != NULL)) {
554                         line = xrealloc(line, (e - line) + 1 + strlen(nextline) + 1);
555                         strcat(line, nextline);
556                         free(nextline);
557                 }
558                 /* eat trailing newline (if any) --if I don't do this, edit commands
559                  * (aic) will print an extra newline */
560                 chomp(line);
561                 add_cmd_str(line);
562                 free(line);
563         }
564 }
565
566 struct pipeline {
567         char *buf;
568         int idx;
569         int len;
570 };
571
572 #define PIPE_MAGIC 0x7f
573 #define PIPE_GROW 64  
574
575 void pipe_putc(struct pipeline *const pipeline, char c)
576 {
577         if (pipeline->buf[pipeline->idx] == PIPE_MAGIC) {
578                 pipeline->buf =
579                         xrealloc(pipeline->buf, pipeline->len + PIPE_GROW);
580                 memset(pipeline->buf + pipeline->len, 0, PIPE_GROW);
581                 pipeline->len += PIPE_GROW;
582                 pipeline->buf[pipeline->len - 1] = PIPE_MAGIC;
583         }
584         pipeline->buf[pipeline->idx++] = (c);
585 }
586
587 #define pipeputc(c)     pipe_putc(pipeline, c)
588
589 #if 0
590 { if (pipeline[pipeline_idx] == PIPE_MAGIC) { \
591         pipeline = xrealloc(pipeline, pipeline_len+PIPE_GROW); \
592         memset(pipeline+pipeline_len, 0, PIPE_GROW); \
593         pipeline_len += PIPE_GROW; \
594         pipeline[pipeline_len-1] = PIPE_MAGIC; } \
595         pipeline[pipeline_idx++] = (c); }
596 #endif
597
598 static void print_subst_w_backrefs(const char *line, const char *replace, 
599         regmatch_t *regmatch, struct pipeline *const pipeline, int matches)
600 {
601         int i;
602
603         /* go through the replacement string */
604         for (i = 0; replace[i]; i++) {
605                 /* if we find a backreference (\1, \2, etc.) print the backref'ed * text */
606                 if (replace[i] == '\\' && isdigit(replace[i+1])) {
607                         int j;
608                         char tmpstr[2];
609                         int backref;
610                         ++i; /* i now indexes the backref number, instead of the leading slash */
611                         tmpstr[0] = replace[i];
612                         tmpstr[1] = 0;
613                         backref = atoi(tmpstr);
614                         /* print out the text held in regmatch[backref] */
615                         if (backref <= matches && regmatch[backref].rm_so != -1)
616                                 for (j = regmatch[backref].rm_so; j < regmatch[backref].rm_eo; j++)
617                                         pipeputc(line[j]);
618                 }
619
620                 /* if we find a backslash escaped character, print the character */
621                 else if (replace[i] == '\\') {
622                         ++i;
623                         pipeputc(replace[i]);
624                 }
625
626                 /* if we find an unescaped '&' print out the whole matched text.
627                  * fortunately, regmatch[0] contains the indicies to the whole matched
628                  * expression (kinda seems like it was designed for just such a
629                  * purpose...) */
630                 else if (replace[i] == '&' && replace[i-1] != '\\') {
631                         int j;
632                         for (j = regmatch[0].rm_so; j < regmatch[0].rm_eo; j++)
633                                 pipeputc(line[j]);
634                 }
635                 /* nothing special, just print this char of the replacement string to stdout */
636                 else
637                         pipeputc(replace[i]);
638         }
639 }
640
641 static int do_subst_command(const sed_cmd_t *sed_cmd, char **line)
642 {
643         char *hackline = *line;
644         struct pipeline thepipe = { NULL, 0 , 0};
645         struct pipeline *const pipeline = &thepipe;
646         int altered = 0;
647         regmatch_t *regmatch = NULL;
648
649         /* we only proceed if the substitution 'search' expression matches */
650         if (regexec(sed_cmd->sub_match, hackline, 0, NULL, 0) == REG_NOMATCH)
651                 return 0;
652
653         /* whaddaya know, it matched. get the number of back references */
654         regmatch = xmalloc(sizeof(regmatch_t) * (sed_cmd->num_backrefs+1));
655
656         /* allocate more PIPE_GROW bytes
657            if replaced string is larger than original */
658         thepipe.len = strlen(hackline)+PIPE_GROW;
659         thepipe.buf = xcalloc(1, thepipe.len);
660         /* buffer magic */
661         thepipe.buf[thepipe.len-1] = PIPE_MAGIC;
662
663         /* and now, as long as we've got a line to try matching and if we can match
664          * the search string, we make substitutions */
665         while ((*hackline || !altered) && (regexec(sed_cmd->sub_match, hackline,
666                                         sed_cmd->num_backrefs+1, regmatch, 0) != REG_NOMATCH) ) {
667                 int i;
668
669                 /* print everything before the match */
670                 for (i = 0; i < regmatch[0].rm_so; i++)
671                         pipeputc(hackline[i]);
672
673                 /* then print the substitution string */
674                 print_subst_w_backrefs(hackline, sed_cmd->replace, regmatch, 
675                                 pipeline, sed_cmd->num_backrefs);
676
677                 /* advance past the match */
678                 hackline += regmatch[0].rm_eo;
679                 /* flag that something has changed */
680                 altered++;
681
682                 /* if we're not doing this globally, get out now */
683                 if (!sed_cmd->sub_g)
684                         break;
685         }
686
687         for (; *hackline; hackline++) pipeputc(*hackline);
688         if (thepipe.buf[thepipe.idx] == PIPE_MAGIC) thepipe.buf[thepipe.idx] = 0;
689
690         /* cleanup */
691         free(regmatch);
692
693         free(*line);
694         *line = thepipe.buf;
695         return altered;
696 }
697
698 static sed_cmd_t *branch_to(const char *label)
699 {
700         sed_cmd_t *sed_cmd;
701         for(sed_cmd = sed_cmd_head.linear; sed_cmd; sed_cmd = sed_cmd->linear) {
702                 if (strcmp(sed_cmd->label, label) == 0) {
703                         break;
704                 }
705         }
706
707         /* If no match returns last command */
708         return(sed_cmd);
709 }
710
711 static void process_file(FILE *file)
712 {
713         char *line;
714         static int linenum = 0; /* GNU sed does not restart counting lines at EOF */
715         unsigned int still_in_range = 0;
716         int altered;
717
718         line = bb_get_chomped_line_from_file(file);
719         if (line == NULL) {
720                 return;
721         }
722
723         /* go through every line in the file */
724         do {
725                 char *next_line;
726                 sed_cmd_t *sed_cmd;
727
728                 /* Read one line in advance so we can act on the last line, the '$' address */
729                 next_line = bb_get_chomped_line_from_file(file);
730
731                 linenum++;
732                 altered = 0;
733
734                 /* for every line, go through all the commands */
735                 for (sed_cmd = sed_cmd_head.linear; sed_cmd; sed_cmd = sed_cmd->linear) {
736                         int deleted = 0;
737
738                         /*
739                          * entry point into sedding...
740                          */
741                         int matched = (
742                                         /* no range necessary */
743                                         (sed_cmd->beg_line == 0 && sed_cmd->end_line == 0 &&
744                                          sed_cmd->beg_match == NULL &&
745                                          sed_cmd->end_match == NULL) ||
746                                         /* this line number is the first address we're looking for */
747                                         (sed_cmd->beg_line && (sed_cmd->beg_line == linenum)) ||
748                                         /* this line matches our first address regex */
749                                         (sed_cmd->beg_match && (regexec(sed_cmd->beg_match, line, 0, NULL, 0) == 0)) ||
750                                         /* we are currently within the beginning & ending address range */
751                                         still_in_range || ((sed_cmd->beg_line == -1) && (next_line == NULL))
752                            );
753
754                         if (sed_cmd->invert ^ matched) {
755
756                                 /*
757                                  * actual sedding
758                                  */
759                                 switch (sed_cmd->cmd) {
760                                         case '=':
761                                                 printf("%d\n", linenum);
762                                                 break;
763                                         case 'P': {     /* Write the current pattern space upto the first newline */
764                                                         char *tmp = strchr(line, '\n');
765                                                         if (tmp) {
766                                                                 *tmp = '\0';
767                                                         }
768                                                 }
769                                         case 'p':       /* Write the current pattern space to output */
770                                                 puts(line);
771                                                 break;
772                                         case 'd':
773                                                 altered++;
774                                                 deleted = 1;
775                                                 break;
776
777                                         case 's':
778
779                                                 /*
780                                                  * Some special cases for 's' printing to make it compliant with
781                                                  * GNU sed printing behavior (aka "The -n | s///p Matrix"):
782                                                  *
783                                                  *    -n ONLY = never print anything regardless of any successful
784                                                  *    substitution
785                                                  *
786                                                  *    s///p ONLY = always print successful substitutions, even if
787                                                  *    the line is going to be printed anyway (line will be printed
788                                                  *    twice).
789                                                  *
790                                                  *    -n AND s///p = print ONLY a successful substitution ONE TIME;
791                                                  *    no other lines are printed - this is the reason why the 'p'
792                                                  *    flag exists in the first place.
793                                                  */
794
795                                                 /* if the user specified that they didn't want anything printed (i.e., a -n
796                                                  * flag and no 'p' flag after the s///), then there's really no point doing
797                                                  * anything here. */
798                                                 if (be_quiet && !sed_cmd->sub_p)
799                                                         break;
800
801                                                 /* we print the line once, unless we were told to be quiet */
802                                                 if (!be_quiet)
803                                                         altered |= do_subst_command(sed_cmd, &line);
804
805                                                 /* we also print the line if we were given the 'p' flag
806                                                  * (this is quite possibly the second printing) */
807                                                 if (sed_cmd->sub_p)
808                                                         altered |= do_subst_command(sed_cmd, &line);
809                                                 if (altered && ((sed_cmd->linear == NULL) || (sed_cmd->linear->cmd != 's')))
810                                                         puts(line);
811
812                                                 break;
813
814                                         case 'a':
815                                                 puts(line);
816                                                 fputs(sed_cmd->editline, stdout);
817                                                 altered++;
818                                                 break;
819
820                                         case 'i':
821                                                 fputs(sed_cmd->editline, stdout);
822                                                 break;
823
824                                         case 'c':
825                                                 /* single-address case */
826                                                 if ((sed_cmd->end_match == NULL && sed_cmd->end_line == 0)
827                                                 /* multi-address case */
828                                                 /* - matching text */
829                                                 || (sed_cmd->end_match && (regexec(sed_cmd->end_match, line, 0, NULL, 0) == 0))
830                                                 /* - matching line numbers */
831                                                 || (sed_cmd->end_line > 0 && sed_cmd->end_line == linenum))
832                                                 {
833                                                         fputs(sed_cmd->editline, stdout);
834                                                 }
835                                                 altered++;
836
837                                                 break;
838
839                                         case 'r': {
840                                                         FILE *outfile;
841                                                         puts(line);
842                                                         outfile = fopen(sed_cmd->filename, "r");
843                                                         if (outfile)
844                                                                 bb_xprint_and_close_file(outfile);
845                                                                 /* else if we couldn't open the output file,
846                                                                  * no biggie, just don't print anything */
847                                                                 altered++;
848                                                 }
849                                                 break;
850                                         case 'q':       /* Branch to end of script and quit */
851                                                 deleted = 1;
852                                                 /* Exit the outer while loop */
853                                                 free(next_line);
854                                                 next_line = NULL;
855                                                 break;
856                                         case 'n':       /* Read next line from input */
857                                                 free(line);
858                                                 line = next_line;
859                                                 next_line = bb_get_chomped_line_from_file(file);
860                                                 linenum++;
861                                                 break;
862                                         case 'N':       /* Append the next line to the current line */
863                                                 line = realloc(line, strlen(line) + strlen(next_line) + 2);
864                                                 strcat(line, "\n");
865                                                 strcat(line, next_line);
866                                                 next_line = bb_get_chomped_line_from_file(file);
867                                                 linenum++;
868                                                 break;
869                                         case 'b':
870                                                 sed_cmd = branch_to(sed_cmd->label);
871                                                 break;
872 //                                      case ':':
873 //                                              break;
874                                 }
875                         }
876
877                         /*
878                          * exit point from sedding...
879                          */
880                         if (matched) {
881                                 if (
882                                         /* this is a single-address command or... */
883                                         (sed_cmd->end_line == 0 && sed_cmd->end_match == NULL) || (
884                                                 /* we were in the middle of our address range (this
885                                                  * isn't the first time through) and.. */
886                                                 (still_in_range == 1) && (
887                                                         /* this line number is the last address we're looking for or... */
888                                                         (sed_cmd->end_line && (sed_cmd->end_line == linenum)) ||
889                                                         /* this line matches our last address regex */
890                                                         (sed_cmd->end_match && (regexec(sed_cmd->end_match, line, 0, NULL, 0) == 0))
891                                                 )
892                                         )
893                                 ) {
894                                         /* we're out of our address range */
895                                         still_in_range = 0;
896                                 }
897
898                                 /* didn't hit the exit? then we're still in the middle of an address range */
899                                 else {
900                                         still_in_range = 1;
901                                 }
902                         }
903
904                         if (deleted)
905                                 break;
906                 }
907
908                 /* we will print the line unless we were told to be quiet or if the
909                  * line was altered (via a 'd'elete or 's'ubstitution), in which case
910                  * the altered line was already printed */
911                 if (!be_quiet && !altered)
912                         puts(line);
913
914                 free(line);
915                 line = next_line;
916         } while (line);
917 }
918
919 extern int sed_main(int argc, char **argv)
920 {
921         int opt, status = EXIT_SUCCESS;
922
923 #ifdef CONFIG_FEATURE_CLEAN_UP
924         /* destroy command strings on exit */
925         if (atexit(destroy_cmd_strs) == -1)
926                 bb_perror_msg_and_die("atexit");
927 #endif
928
929         /* do normal option parsing */
930         while ((opt = getopt(argc, argv, "ne:f:")) > 0) {
931                 switch (opt) {
932                         case 'n':
933                                 be_quiet++;
934                                 break;
935                         case 'e':
936                                 add_cmd_str(optarg);
937                                 break;
938                         case 'f': 
939                                 load_cmd_file(optarg);
940                                 break;
941                         default:
942                                 bb_show_usage();
943                 }
944         }
945
946         /* if we didn't get a pattern from a -e and no command file was specified,
947          * argv[optind] should be the pattern. no pattern, no worky */
948         if (sed_cmd_head.linear == NULL) {
949                 if (argv[optind] == NULL)
950                         bb_show_usage();
951                 else {
952                         add_cmd_str(argv[optind]);
953                         optind++;
954                 }
955         }
956
957         /* argv[(optind)..(argc-1)] should be names of file to process. If no
958          * files were specified or '-' was specified, take input from stdin.
959          * Otherwise, we process all the files specified. */
960         if (argv[optind] == NULL || (strcmp(argv[optind], "-") == 0)) {
961                 process_file(stdin);
962         }
963         else {
964                 int i;
965                 FILE *file;
966                 for (i = optind; i < argc; i++) {
967                         file = bb_wfopen(argv[i], "r");
968                         if (file) {
969                                 process_file(file);
970                                 fclose(file);
971                         } else
972                                 status = EXIT_FAILURE;
973                 }
974         }
975         
976         return status;
977 }