unzip: use printable_string() for printing filenames
[oweals/busybox.git] / editors / sed.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * sed.c - very minimalist version of sed
4  *
5  * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
6  * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
7  * Copyright (C) 2002  Matt Kraai
8  * Copyright (C) 2003 by Glenn McGrath
9  * Copyright (C) 2003,2004 by Rob Landley <rob@landley.net>
10  *
11  * MAINTAINER: Rob Landley <rob@landley.net>
12  *
13  * Licensed under GPLv2, see file LICENSE in this source tree.
14  */
15 /* Code overview.
16  *
17  * Files are laid out to avoid unnecessary function declarations.  So for
18  * example, every function add_cmd calls occurs before add_cmd in this file.
19  *
20  * add_cmd() is called on each line of sed command text (from a file or from
21  * the command line).  It calls get_address() and parse_cmd_args().  The
22  * resulting sed_cmd_t structures are appended to a linked list
23  * (G.sed_cmd_head/G.sed_cmd_tail).
24  *
25  * process_files() does actual sedding, reading data lines from each input FILE*
26  * (which could be stdin) and applying the sed command list (sed_cmd_head) to
27  * each of the resulting lines.
28  *
29  * sed_main() is where external code calls into this, with a command line.
30  */
31 /* Supported features and commands in this version of sed:
32  *
33  * - comments ('#')
34  * - address matching: num|/matchstr/[,num|/matchstr/|$]command
35  * - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
36  * - edit commands: (a)ppend, (i)nsert, (c)hange
37  * - file commands: (r)ead
38  * - backreferences in substitution expressions (\0, \1, \2...\9)
39  * - grouped commands: {cmd1;cmd2}
40  * - transliteration (y/source-chars/dest-chars/)
41  * - pattern space hold space storing / swapping (g, h, x)
42  * - labels / branching (: label, b, t, T)
43  *
44  * (Note: Specifying an address (range) to match is *optional*; commands
45  * default to the whole pattern space if no specific address match was
46  * requested.)
47  *
48  * Todo:
49  * - Create a wrapper around regex to make libc's regex conform with sed
50  *
51  * Reference
52  * http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
53  * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html
54  * http://sed.sourceforge.net/sedfaq3.html
55  */
56 //config:config SED
57 //config:       bool "sed (12 kb)"
58 //config:       default y
59 //config:       help
60 //config:       sed is used to perform text transformations on a file
61 //config:       or input from a pipeline.
62
63 //applet:IF_SED(APPLET(sed, BB_DIR_BIN, BB_SUID_DROP))
64
65 //kbuild:lib-$(CONFIG_SED) += sed.o
66
67 //usage:#define sed_trivial_usage
68 //usage:       "[-i[SFX]] [-nrE] [-f FILE]... [-e CMD]... [FILE]...\n"
69 //usage:       "or: sed [-i[SFX]] [-nrE] CMD [FILE]..."
70 //usage:#define sed_full_usage "\n\n"
71 //usage:       "        -e CMD  Add CMD to sed commands to be executed"
72 //usage:     "\n        -f FILE Add FILE contents to sed commands to be executed"
73 //usage:     "\n        -i[SFX] Edit files in-place (otherwise sends to stdout)"
74 //usage:     "\n                Optionally back files up, appending SFX"
75 //usage:     "\n        -n      Suppress automatic printing of pattern space"
76 //usage:     "\n        -r,-E   Use extended regex syntax"
77 //usage:     "\n"
78 //usage:     "\nIf no -e or -f, the first non-option argument is the sed command string."
79 //usage:     "\nRemaining arguments are input files (stdin if none)."
80 //usage:
81 //usage:#define sed_example_usage
82 //usage:       "$ echo \"foo\" | sed -e 's/f[a-zA-Z]o/bar/g'\n"
83 //usage:       "bar\n"
84
85 #include "libbb.h"
86 #include "common_bufsiz.h"
87 #include "xregex.h"
88
89 #if 0
90 # define dbg(...) bb_error_msg(__VA_ARGS__)
91 #else
92 # define dbg(...) ((void)0)
93 #endif
94
95
96 enum {
97         OPT_in_place = 1 << 0,
98 };
99
100 /* Each sed command turns into one of these structures. */
101 typedef struct sed_cmd_s {
102         /* Ordered by alignment requirements: currently 36 bytes on x86 */
103         struct sed_cmd_s *next; /* Next command (linked list, NULL terminated) */
104
105         /* address storage */
106         regex_t *beg_match;     /* sed -e '/match/cmd' */
107         regex_t *end_match;     /* sed -e '/match/,/end_match/cmd' */
108         regex_t *sub_match;     /* For 's/sub_match/string/' */
109         int beg_line;           /* 'sed 1p'   0 == apply commands to all lines */
110         int beg_line_orig;      /* copy of the above, needed for -i */
111         int end_line;           /* 'sed 1,3p' 0 == one line only. -1 = last line ($). -2-N = +N */
112         int end_line_orig;
113
114         FILE *sw_file;          /* File (sw) command writes to, NULL for none. */
115         char *string;           /* Data string for (saicytb) commands. */
116
117         unsigned which_match;   /* (s) Which match to replace (0 for all) */
118
119         /* Bitfields (gcc won't group them if we don't) */
120         unsigned invert:1;      /* the '!' after the address */
121         unsigned in_match:1;    /* Next line also included in match? */
122         unsigned sub_p:1;       /* (s) print option */
123
124         char sw_last_char;      /* Last line written by (sw) had no '\n' */
125
126         /* GENERAL FIELDS */
127         char cmd;               /* The command char: abcdDgGhHilnNpPqrstwxy:={} */
128 } sed_cmd_t;
129
130 static const char semicolon_whitespace[] ALIGN1 = "; \n\r\t\v";
131
132 struct globals {
133         /* options */
134         int be_quiet, regex_type;
135
136         FILE *nonstdout;
137         char *outname, *hold_space;
138         smallint exitcode;
139
140         /* list of input files */
141         int current_input_file, last_input_file;
142         char **input_file_list;
143         FILE *current_fp;
144
145         regmatch_t regmatch[10];
146         regex_t *previous_regex_ptr;
147
148         /* linked list of sed commands */
149         sed_cmd_t *sed_cmd_head, **sed_cmd_tail;
150
151         /* linked list of append lines */
152         llist_t *append_head;
153
154         char *add_cmd_line;
155
156         struct pipeline {
157                 char *buf;  /* Space to hold string */
158                 int idx;    /* Space used */
159                 int len;    /* Space allocated */
160         } pipeline;
161 } FIX_ALIASING;
162 #define G (*(struct globals*)bb_common_bufsiz1)
163 #define INIT_G() do { \
164         setup_common_bufsiz(); \
165         BUILD_BUG_ON(sizeof(G) > COMMON_BUFSIZE); \
166         G.sed_cmd_tail = &G.sed_cmd_head; \
167 } while (0)
168
169
170 #if ENABLE_FEATURE_CLEAN_UP
171 static void sed_free_and_close_stuff(void)
172 {
173         sed_cmd_t *sed_cmd = G.sed_cmd_head;
174
175         llist_free(G.append_head, free);
176
177         while (sed_cmd) {
178                 sed_cmd_t *sed_cmd_next = sed_cmd->next;
179
180                 if (sed_cmd->sw_file)
181                         fclose(sed_cmd->sw_file);
182
183                 /* Used to free regexps, but now there is code
184                  * in get_address() which can reuse a regexp
185                  * for constructs as /regexp/cmd1;//cmd2
186                  * leading to double-frees here:
187                  */
188                 //if (sed_cmd->beg_match) {
189                 //      regfree(sed_cmd->beg_match);
190                 //      free(sed_cmd->beg_match);
191                 //}
192                 //if (sed_cmd->end_match) {
193                 //      regfree(sed_cmd->end_match);
194                 //      free(sed_cmd->end_match);
195                 //}
196                 //if (sed_cmd->sub_match) {
197                 //      regfree(sed_cmd->sub_match);
198                 //      free(sed_cmd->sub_match);
199                 //}
200                 free(sed_cmd->string);
201                 free(sed_cmd);
202                 sed_cmd = sed_cmd_next;
203         }
204
205         free(G.hold_space);
206
207         if (G.current_fp)
208                 fclose(G.current_fp);
209 }
210 #else
211 void sed_free_and_close_stuff(void);
212 #endif
213
214 /* If something bad happens during -i operation, delete temp file */
215
216 static void cleanup_outname(void)
217 {
218         if (G.outname) unlink(G.outname);
219 }
220
221 /* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */
222
223 static unsigned parse_escapes(char *dest, const char *string, int len, char from, char to)
224 {
225         char *d = dest;
226         int i = 0;
227
228         if (len == -1)
229                 len = strlen(string);
230
231         while (i < len) {
232                 if (string[i] == '\\') {
233                         if (!to || string[i+1] == from) {
234                                 if ((*d = to ? to : string[i+1]) == '\0')
235                                         return d - dest;
236                                 i += 2;
237                                 d++;
238                                 continue;
239                         }
240                         i++; /* skip backslash in string[] */
241                         *d++ = '\\';
242                         /* fall through: copy next char verbatim */
243                 }
244                 if ((*d = string[i++]) == '\0')
245                         return d - dest;
246                 d++;
247         }
248         *d = '\0';
249         return d - dest;
250 }
251
252 static char *copy_parsing_escapes(const char *string, int len)
253 {
254         const char *s;
255         char *dest = xmalloc(len + 1);
256
257         /* sed recognizes \n */
258         /* GNU sed also recognizes \t and \r */
259         for (s = "\nn\tt\rr"; *s; s += 2) {
260                 len = parse_escapes(dest, string, len, s[1], s[0]);
261                 string = dest;
262         }
263         return dest;
264 }
265
266
267 /*
268  * index_of_next_unescaped_regexp_delim - walks left to right through a string
269  * beginning at a specified index and returns the index of the next regular
270  * expression delimiter (typically a forward slash ('/')) not preceded by
271  * a backslash ('\').  A negative delimiter disables square bracket checking.
272  */
273 static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str)
274 {
275         int bracket = -1;
276         int escaped = 0;
277         int idx = 0;
278         char ch;
279
280         if (delimiter < 0) {
281                 bracket--;
282                 delimiter = -delimiter;
283         }
284
285         for (; (ch = str[idx]) != '\0'; idx++) {
286                 if (bracket >= 0) {
287                         if (ch == ']'
288                          && !(bracket == idx - 1 || (bracket == idx - 2 && str[idx - 1] == '^'))
289                         ) {
290                                 bracket = -1;
291                         }
292                 } else if (escaped)
293                         escaped = 0;
294                 else if (ch == '\\')
295                         escaped = 1;
296                 else if (bracket == -1 && ch == '[')
297                         bracket = idx;
298                 else if (ch == delimiter)
299                         return idx;
300         }
301
302         /* if we make it to here, we've hit the end of the string */
303         bb_error_msg_and_die("unmatched '%c'", delimiter);
304 }
305
306 /*
307  *  Returns the index of the third delimiter
308  */
309 static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
310 {
311         const char *cmdstr_ptr = cmdstr;
312         unsigned char delimiter;
313         int idx = 0;
314
315         /* verify that the 's' or 'y' is followed by something.  That something
316          * (typically a 'slash') is now our regexp delimiter... */
317         if (*cmdstr == '\0')
318                 bb_error_msg_and_die("bad format in substitution expression");
319         delimiter = *cmdstr_ptr++;
320
321         /* save the match string */
322         idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
323         *match = copy_parsing_escapes(cmdstr_ptr, idx);
324
325         /* save the replacement string */
326         cmdstr_ptr += idx + 1;
327         idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr);
328         *replace = copy_parsing_escapes(cmdstr_ptr, idx);
329
330         return ((cmdstr_ptr - cmdstr) + idx);
331 }
332
333 /*
334  * returns the index in the string just past where the address ends.
335  */
336 static int get_address(const char *my_str, int *linenum, regex_t ** regex)
337 {
338         const char *pos = my_str;
339
340         if (isdigit(*my_str)) {
341                 *linenum = strtol(my_str, (char**)&pos, 10);
342                 /* endstr shouldn't ever equal NULL */
343         } else if (*my_str == '$') {
344                 *linenum = -1;
345                 pos++;
346         } else if (*my_str == '/' || *my_str == '\\') {
347                 int next;
348                 char delimiter;
349                 char *temp;
350
351                 delimiter = '/';
352                 if (*my_str == '\\')
353                         delimiter = *++pos;
354                 next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
355                 if (next != 0) {
356                         temp = copy_parsing_escapes(pos, next);
357                         G.previous_regex_ptr = *regex = xzalloc(sizeof(regex_t));
358                         xregcomp(*regex, temp, G.regex_type);
359                         free(temp);
360                 } else {
361                         *regex = G.previous_regex_ptr;
362                         if (!G.previous_regex_ptr)
363                                 bb_error_msg_and_die("no previous regexp");
364                 }
365                 /* Move position to next character after last delimiter */
366                 pos += (next+1);
367         }
368         return pos - my_str;
369 }
370
371 /* Grab a filename.  Whitespace at start is skipped, then goes to EOL. */
372 static int parse_file_cmd(/*sed_cmd_t *sed_cmd,*/ const char *filecmdstr, char **retval)
373 {
374         int start = 0, idx, hack = 0;
375
376         /* Skip whitespace, then grab filename to end of line */
377         while (isspace(filecmdstr[start]))
378                 start++;
379         idx = start;
380         while (filecmdstr[idx] && filecmdstr[idx] != '\n')
381                 idx++;
382
383         /* If lines glued together, put backslash back. */
384         if (filecmdstr[idx] == '\n')
385                 hack = 1;
386         if (idx == start)
387                 bb_error_msg_and_die("empty filename");
388         *retval = xstrndup(filecmdstr+start, idx-start+hack+1);
389         if (hack)
390                 (*retval)[idx] = '\\';
391
392         return idx;
393 }
394
395 static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr)
396 {
397         int cflags = G.regex_type;
398         char *match;
399         int idx;
400
401         /*
402          * A substitution command should look something like this:
403          *    s/match/replace/ #giIpw
404          *    ||     |        |||
405          *    mandatory       optional
406          */
407         idx = parse_regex_delim(substr, &match, &sed_cmd->string);
408
409         /* determine the number of back references in the match string */
410         /* Note: we compute this here rather than in the do_subst_command()
411          * function to save processor time, at the expense of a little more memory
412          * (4 bits) per sed_cmd */
413
414         /* process the flags */
415
416         sed_cmd->which_match = 1;
417         dbg("s flags:'%s'", substr + idx + 1);
418         while (substr[++idx]) {
419                 dbg("s flag:'%c'", substr[idx]);
420                 /* Parse match number */
421                 if (isdigit(substr[idx])) {
422                         if (match[0] != '^') {
423                                 /* Match 0 treated as all, multiple matches we take the last one. */
424                                 const char *pos = substr + idx;
425 /* FIXME: error check? */
426                                 sed_cmd->which_match = (unsigned)strtol(substr+idx, (char**) &pos, 10);
427                                 idx = pos - substr - 1;
428                         }
429                         continue;
430                 }
431                 /* Skip spaces */
432                 if (isspace(substr[idx]))
433                         continue;
434
435                 switch (substr[idx]) {
436                 /* Replace all occurrences */
437                 case 'g':
438                         if (match[0] != '^')
439                                 sed_cmd->which_match = 0;
440                         break;
441                 /* Print pattern space */
442                 case 'p':
443                         sed_cmd->sub_p = 1;
444                         break;
445                 /* Write to file */
446                 case 'w':
447                 {
448                         char *fname;
449                         idx += parse_file_cmd(/*sed_cmd,*/ substr+idx+1, &fname);
450                         sed_cmd->sw_file = xfopen_for_write(fname);
451                         sed_cmd->sw_last_char = '\n';
452                         free(fname);
453                         break;
454                 }
455                 /* Ignore case (gnu extension) */
456                 case 'i':
457                 case 'I':
458                         cflags |= REG_ICASE;
459                         break;
460                 /* Comment */
461                 case '#':
462                         // while (substr[++idx]) continue;
463                         idx += strlen(substr + idx); // same
464                         /* Fall through */
465                 /* End of command */
466                 case ';':
467                 case '}':
468                         goto out;
469                 default:
470                         dbg("s bad flags:'%s'", substr + idx);
471                         bb_error_msg_and_die("bad option in substitution expression");
472                 }
473         }
474  out:
475         /* compile the match string into a regex */
476         if (*match != '\0') {
477                 /* If match is empty, we use last regex used at runtime */
478                 sed_cmd->sub_match = xzalloc(sizeof(regex_t));
479                 dbg("xregcomp('%s',%x)", match, cflags);
480                 xregcomp(sed_cmd->sub_match, match, cflags);
481                 dbg("regcomp ok");
482         }
483         free(match);
484
485         return idx;
486 }
487
488 /*
489  *  Process the commands arguments
490  */
491 static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
492 {
493         static const char cmd_letters[] ALIGN1 = "saicrw:btTydDgGhHlnNpPqx={}";
494         enum {
495                 IDX_s = 0,
496                 IDX_a,
497                 IDX_i,
498                 IDX_c,
499                 IDX_r,
500                 IDX_w,
501                 IDX_colon,
502                 IDX_b,
503                 IDX_t,
504                 IDX_T,
505                 IDX_y,
506                 IDX_d,
507                 IDX_D,
508                 IDX_g,
509                 IDX_G,
510                 IDX_h,
511                 IDX_H,
512                 IDX_l,
513                 IDX_n,
514                 IDX_N,
515                 IDX_p,
516                 IDX_P,
517                 IDX_q,
518                 IDX_x,
519                 IDX_equal,
520                 IDX_lbrace,
521                 IDX_rbrace,
522                 IDX_nul
523         };
524         unsigned idx;
525
526         BUILD_BUG_ON(sizeof(cmd_letters)-1 != IDX_nul);
527
528         idx = strchrnul(cmd_letters, sed_cmd->cmd) - cmd_letters;
529
530         /* handle (s)ubstitution command */
531         if (idx == IDX_s) {
532                 cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
533         }
534         /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
535         else if (idx <= IDX_c) { /* a,i,c */
536                 unsigned len;
537
538                 if (idx < IDX_c) { /* a,i */
539                         if (sed_cmd->end_line || sed_cmd->end_match)
540                                 bb_error_msg_and_die("command '%c' uses only one address", sed_cmd->cmd);
541                 }
542                 for (;;) {
543                         if (*cmdstr == '\n' || *cmdstr == '\\') {
544                                 cmdstr++;
545                                 break;
546                         }
547                         if (!isspace(*cmdstr))
548                                 break;
549                         cmdstr++;
550                 }
551                 len = strlen(cmdstr);
552                 sed_cmd->string = copy_parsing_escapes(cmdstr, len);
553                 cmdstr += len;
554                 /* "\anychar" -> "anychar" */
555                 parse_escapes(sed_cmd->string, sed_cmd->string, -1, '\0', '\0');
556         }
557         /* handle file cmds: (r)ead */
558         else if (idx <= IDX_w) { /* r,w */
559                 if (idx < IDX_w) { /* r */
560                         if (sed_cmd->end_line || sed_cmd->end_match)
561                                 bb_error_msg_and_die("command '%c' uses only one address", sed_cmd->cmd);
562                 }
563                 cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string);
564                 if (sed_cmd->cmd == 'w') {
565                         sed_cmd->sw_file = xfopen_for_write(sed_cmd->string);
566                         sed_cmd->sw_last_char = '\n';
567                 }
568         }
569         /* handle branch commands */
570         else if (idx <= IDX_T) { /* :,b,t,T */
571                 int length;
572
573                 cmdstr = skip_whitespace(cmdstr);
574                 length = strcspn(cmdstr, semicolon_whitespace);
575                 if (length) {
576                         sed_cmd->string = xstrndup(cmdstr, length);
577                         cmdstr += length;
578                 }
579         }
580         /* translation command */
581         else if (idx == IDX_y) {
582                 char *match, *replace;
583                 int i = cmdstr[0];
584
585                 cmdstr += parse_regex_delim(cmdstr, &match, &replace)+1;
586                 /* \n already parsed, but \delimiter needs unescaping. */
587                 parse_escapes(match,   match,   -1, i, i);
588                 parse_escapes(replace, replace, -1, i, i);
589
590                 sed_cmd->string = xzalloc((strlen(match) + 1) * 2);
591                 for (i = 0; match[i] && replace[i]; i++) {
592                         sed_cmd->string[i*2] = match[i];
593                         sed_cmd->string[i*2+1] = replace[i];
594                 }
595                 free(match);
596                 free(replace);
597         }
598         /* if it wasn't a single-letter command that takes no arguments
599          * then it must be an invalid command.
600          */
601         else if (idx >= IDX_nul) { /* not d,D,g,G,h,H,l,n,N,p,P,q,x,=,{,} */
602                 bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd);
603         }
604
605         /* give back whatever's left over */
606         return cmdstr;
607 }
608
609
610 /* Parse address+command sets, skipping comment lines. */
611
612 static void add_cmd(const char *cmdstr)
613 {
614         sed_cmd_t *sed_cmd;
615         unsigned len, n;
616
617         /* Append this line to any unfinished line from last time. */
618         if (G.add_cmd_line) {
619                 char *tp = xasprintf("%s\n%s", G.add_cmd_line, cmdstr);
620                 free(G.add_cmd_line);
621                 cmdstr = G.add_cmd_line = tp;
622         }
623
624         /* If this line ends with unescaped backslash, request next line. */
625         n = len = strlen(cmdstr);
626         while (n && cmdstr[n-1] == '\\')
627                 n--;
628         if ((len - n) & 1) { /* if odd number of trailing backslashes */
629                 if (!G.add_cmd_line)
630                         G.add_cmd_line = xstrdup(cmdstr);
631                 G.add_cmd_line[len-1] = '\0';
632                 return;
633         }
634
635         /* Loop parsing all commands in this line. */
636         while (*cmdstr) {
637                 /* Skip leading whitespace and semicolons */
638                 cmdstr += strspn(cmdstr, semicolon_whitespace);
639
640                 /* If no more commands, exit. */
641                 if (!*cmdstr) break;
642
643                 /* if this is a comment, jump past it and keep going */
644                 if (*cmdstr == '#') {
645                         /* "#n" is the same as using -n on the command line */
646                         if (cmdstr[1] == 'n')
647                                 G.be_quiet++;
648                         cmdstr = strpbrk(cmdstr, "\n\r");
649                         if (!cmdstr) break;
650                         continue;
651                 }
652
653                 /* parse the command
654                  * format is: [addr][,addr][!]cmd
655                  *            |----||-----||-|
656                  *            part1 part2  part3
657                  */
658
659                 sed_cmd = xzalloc(sizeof(sed_cmd_t));
660
661                 /* first part (if present) is an address: either a '$', a number or a /regex/ */
662                 cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
663                 sed_cmd->beg_line_orig = sed_cmd->beg_line;
664
665                 /* second part (if present) will begin with a comma */
666                 if (*cmdstr == ',') {
667                         int idx;
668
669                         cmdstr++;
670                         if (*cmdstr == '+' && isdigit(cmdstr[1])) {
671                                 /* http://sed.sourceforge.net/sedfaq3.html#s3.3
672                                  * Under GNU sed 3.02+, ssed, and sed15+, <address2>
673                                  * may also be a notation of the form +num,
674                                  * indicating the next num lines after <address1> is
675                                  * matched.
676                                  * GNU sed 4.2.1 accepts even "+" (meaning "+0").
677                                  * We don't (we check for isdigit, see above), think
678                                  * about the "+-3" case.
679                                  */
680                                 char *end;
681                                 /* code is smaller compared to using &cmdstr here: */
682                                 idx = strtol(cmdstr+1, &end, 10);
683                                 sed_cmd->end_line = -2 - idx;
684                                 cmdstr = end;
685                         } else {
686                                 idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
687                                 cmdstr += idx;
688                                 idx--; /* if 0, trigger error check below */
689                         }
690                         if (idx < 0)
691                                 bb_error_msg_and_die("no address after comma");
692                         sed_cmd->end_line_orig = sed_cmd->end_line;
693                 }
694
695                 /* skip whitespace before the command */
696                 cmdstr = skip_whitespace(cmdstr);
697
698                 /* Check for inversion flag */
699                 if (*cmdstr == '!') {
700                         sed_cmd->invert = 1;
701                         cmdstr++;
702
703                         /* skip whitespace before the command */
704                         cmdstr = skip_whitespace(cmdstr);
705                 }
706
707                 /* last part (mandatory) will be a command */
708                 if (!*cmdstr)
709                         bb_error_msg_and_die("missing command");
710                 sed_cmd->cmd = *cmdstr++;
711                 cmdstr = parse_cmd_args(sed_cmd, cmdstr);
712
713                 /* cmdstr now points past args.
714                  * GNU sed requires a separator, if there are more commands,
715                  * else it complains "char N: extra characters after command".
716                  * Example: "sed 'p;d'". We also allow "sed 'pd'".
717                  */
718
719                 /* Add the command to the command array */
720                 *G.sed_cmd_tail = sed_cmd;
721                 G.sed_cmd_tail = &sed_cmd->next;
722         }
723
724         /* If we glued multiple lines together, free the memory. */
725         free(G.add_cmd_line);
726         G.add_cmd_line = NULL;
727 }
728
729 /* Append to a string, reallocating memory as necessary. */
730
731 #define PIPE_GROW 64
732
733 static void pipe_putc(char c)
734 {
735         if (G.pipeline.idx == G.pipeline.len) {
736                 G.pipeline.buf = xrealloc(G.pipeline.buf,
737                                 G.pipeline.len + PIPE_GROW);
738                 G.pipeline.len += PIPE_GROW;
739         }
740         G.pipeline.buf[G.pipeline.idx++] = c;
741 }
742
743 static void do_subst_w_backrefs(char *line, char *replace)
744 {
745         int i, j;
746
747         /* go through the replacement string */
748         for (i = 0; replace[i]; i++) {
749                 /* if we find a backreference (\1, \2, etc.) print the backref'ed text */
750                 if (replace[i] == '\\') {
751                         unsigned backref = replace[++i] - '0';
752                         if (backref <= 9) {
753                                 /* print out the text held in G.regmatch[backref] */
754                                 if (G.regmatch[backref].rm_so != -1) {
755                                         j = G.regmatch[backref].rm_so;
756                                         while (j < G.regmatch[backref].rm_eo)
757                                                 pipe_putc(line[j++]);
758                                 }
759                                 continue;
760                         }
761                         /* I _think_ it is impossible to get '\' to be
762                          * the last char in replace string. Thus we don't check
763                          * for replace[i] == NUL. (counterexample anyone?) */
764                         /* if we find a backslash escaped character, print the character */
765                         pipe_putc(replace[i]);
766                         continue;
767                 }
768                 /* if we find an unescaped '&' print out the whole matched text. */
769                 if (replace[i] == '&') {
770                         j = G.regmatch[0].rm_so;
771                         while (j < G.regmatch[0].rm_eo)
772                                 pipe_putc(line[j++]);
773                         continue;
774                 }
775                 /* Otherwise just output the character. */
776                 pipe_putc(replace[i]);
777         }
778 }
779
780 static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
781 {
782         char *line = *line_p;
783         unsigned match_count = 0;
784         bool altered = 0;
785         bool prev_match_empty = 1;
786         bool tried_at_eol = 0;
787         regex_t *current_regex;
788
789         current_regex = sed_cmd->sub_match;
790         /* Handle empty regex. */
791         if (!current_regex) {
792                 current_regex = G.previous_regex_ptr;
793                 if (!current_regex)
794                         bb_error_msg_and_die("no previous regexp");
795         }
796         G.previous_regex_ptr = current_regex;
797
798         /* Find the first match */
799         dbg("matching '%s'", line);
800         if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0)) {
801                 dbg("no match");
802                 return 0;
803         }
804         dbg("match");
805
806         /* Initialize temporary output buffer. */
807         G.pipeline.buf = xmalloc(PIPE_GROW);
808         G.pipeline.len = PIPE_GROW;
809         G.pipeline.idx = 0;
810
811         /* Now loop through, substituting for matches */
812         do {
813                 int start = G.regmatch[0].rm_so;
814                 int end = G.regmatch[0].rm_eo;
815                 int i;
816
817                 match_count++;
818
819                 /* If we aren't interested in this match, output old line to
820                  * end of match and continue */
821                 if (sed_cmd->which_match
822                  && (sed_cmd->which_match != match_count)
823                 ) {
824                         for (i = 0; i < end; i++)
825                                 pipe_putc(*line++);
826                         /* Null match? Print one more char */
827                         if (start == end && *line)
828                                 pipe_putc(*line++);
829                         goto next;
830                 }
831
832                 /* Print everything before the match */
833                 for (i = 0; i < start; i++)
834                         pipe_putc(line[i]);
835
836                 /* Then print the substitution string,
837                  * unless we just matched empty string after non-empty one.
838                  * Example: string "cccd", pattern "c*", repl "R":
839                  * result is "RdR", not "RRdR": first match "ccc",
840                  * second is "" before "d", third is "" after "d".
841                  * Second match is NOT replaced!
842                  */
843                 if (prev_match_empty || start != 0 || start != end) {
844                         //dbg("%d %d %d", prev_match_empty, start, end);
845                         dbg("inserting replacement at %d in '%s'", start, line);
846                         do_subst_w_backrefs(line, sed_cmd->string);
847                         /* Flag that something has changed */
848                         altered = 1;
849                 } else {
850                         dbg("NOT inserting replacement at %d in '%s'", start, line);
851                 }
852
853                 /* If matched string is empty (f.e. "c*" pattern),
854                  * copy verbatim one char after it before attempting more matches
855                  */
856                 prev_match_empty = (start == end);
857                 if (prev_match_empty) {
858                         if (!line[end]) {
859                                 tried_at_eol = 1;
860                         } else {
861                                 pipe_putc(line[end]);
862                                 end++;
863                         }
864                 }
865
866                 /* Advance past the match */
867                 dbg("line += %d", end);
868                 line += end;
869
870                 /* if we're not doing this globally, get out now */
871                 if (sed_cmd->which_match != 0)
872                         break;
873  next:
874                 /* Exit if we are at EOL and already tried matching at it */
875                 if (*line == '\0') {
876                         if (tried_at_eol)
877                                 break;
878                         tried_at_eol = 1;
879                 }
880
881 //maybe (end ? REG_NOTBOL : 0) instead of unconditional REG_NOTBOL?
882         } while (regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH);
883
884         /* Copy rest of string into output pipeline */
885         while (1) {
886                 char c = *line++;
887                 pipe_putc(c);
888                 if (c == '\0')
889                         break;
890         }
891
892         free(*line_p);
893         *line_p = G.pipeline.buf;
894         return altered;
895 }
896
897 /* Set command pointer to point to this label.  (Does not handle null label.) */
898 static sed_cmd_t *branch_to(char *label)
899 {
900         sed_cmd_t *sed_cmd;
901
902         for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
903                 if (sed_cmd->cmd == ':'
904                  && sed_cmd->string
905                  && strcmp(sed_cmd->string, label) == 0
906                 ) {
907                         return sed_cmd;
908                 }
909         }
910         bb_error_msg_and_die("can't find label for jump to '%s'", label);
911 }
912
913 static void append(char *s)
914 {
915         llist_add_to_end(&G.append_head, s);
916 }
917
918 /* Output line of text. */
919 /* Note:
920  * The tricks with NO_EOL_CHAR and last_puts_char are there to emulate gnu sed.
921  * Without them, we had this:
922  * echo -n thingy >z1
923  * echo -n again >z2
924  * >znull
925  * sed "s/i/z/" z1 z2 znull | hexdump -vC
926  * output:
927  * gnu sed 4.1.5:
928  * 00000000  74 68 7a 6e 67 79 0a 61  67 61 7a 6e              |thzngy.agazn|
929  * bbox:
930  * 00000000  74 68 7a 6e 67 79 61 67  61 7a 6e                 |thzngyagazn|
931  */
932 enum {
933         NO_EOL_CHAR = 1,
934         LAST_IS_NUL = 2,
935 };
936 static void puts_maybe_newline(char *s, FILE *file, char *last_puts_char, char last_gets_char)
937 {
938         char lpc = *last_puts_char;
939
940         /* Need to insert a '\n' between two files because first file's
941          * last line wasn't terminated? */
942         if (lpc != '\n' && lpc != '\0') {
943                 fputc('\n', file);
944                 lpc = '\n';
945         }
946         fputs(s, file);
947
948         /* 'x' - just something which is not '\n', '\0' or NO_EOL_CHAR */
949         if (s[0])
950                 lpc = 'x';
951
952         /* had trailing '\0' and it was last char of file? */
953         if (last_gets_char == LAST_IS_NUL) {
954                 fputc('\0', file);
955                 lpc = 'x'; /* */
956         } else
957         /* had trailing '\n' or '\0'? */
958         if (last_gets_char != NO_EOL_CHAR) {
959                 fputc(last_gets_char, file);
960                 lpc = last_gets_char;
961         }
962
963         if (ferror(file)) {
964                 xfunc_error_retval = 4;  /* It's what gnu sed exits with... */
965                 bb_error_msg_and_die(bb_msg_write_error);
966         }
967         *last_puts_char = lpc;
968 }
969
970 static void flush_append(char *last_puts_char)
971 {
972         char *data;
973
974         /* Output appended lines. */
975         while ((data = (char *)llist_pop(&G.append_head)) != NULL) {
976                 /* Append command does not respect "nonterminated-ness"
977                  * of last line. Try this:
978                  * $ echo -n "woot" | sed -e '/woot/a woo' -
979                  * woot
980                  * woo
981                  * (both lines are terminated with \n)
982                  * Therefore we do not propagate "last_gets_char" here,
983                  * pass '\n' instead:
984                  */
985                 puts_maybe_newline(data, G.nonstdout, last_puts_char, '\n');
986                 free(data);
987         }
988 }
989
990 /* Get next line of input from G.input_file_list, flushing append buffer and
991  * noting if we ran out of files without a newline on the last line we read.
992  */
993 static char *get_next_line(char *gets_char, char *last_puts_char)
994 {
995         char *temp = NULL;
996         size_t len;
997         char gc;
998
999         flush_append(last_puts_char);
1000
1001         /* will be returned if last line in the file
1002          * doesn't end with either '\n' or '\0' */
1003         gc = NO_EOL_CHAR;
1004         for (; G.current_input_file <= G.last_input_file; G.current_input_file++) {
1005                 FILE *fp = G.current_fp;
1006                 if (!fp) {
1007                         const char *path = G.input_file_list[G.current_input_file];
1008                         fp = stdin;
1009                         if (path != bb_msg_standard_input) {
1010                                 fp = fopen_or_warn(path, "r");
1011                                 if (!fp) {
1012                                         G.exitcode = EXIT_FAILURE;
1013                                         continue;
1014                                 }
1015                         }
1016                         G.current_fp = fp;
1017                 }
1018                 /* Read line up to a newline or NUL byte, inclusive,
1019                  * return malloc'ed char[]. length of the chunk read
1020                  * is stored in len. NULL if EOF/error */
1021                 temp = bb_get_chunk_from_file(fp, &len);
1022                 if (temp) {
1023                         /* len > 0 here, it's ok to do temp[len-1] */
1024                         char c = temp[len-1];
1025                         if (c == '\n' || c == '\0') {
1026                                 temp[len-1] = '\0';
1027                                 gc = c;
1028                                 if (c == '\0') {
1029                                         int ch = fgetc(fp);
1030                                         if (ch != EOF)
1031                                                 ungetc(ch, fp);
1032                                         else
1033                                                 gc = LAST_IS_NUL;
1034                                 }
1035                         }
1036                         /* else we put NO_EOL_CHAR into *gets_char */
1037                         break;
1038
1039                 /* NB: I had the idea of peeking next file(s) and returning
1040                  * NO_EOL_CHAR only if it is the *last* non-empty
1041                  * input file. But there is a case where this won't work:
1042                  * file1: "a woo\nb woo"
1043                  * file2: "c no\nd no"
1044                  * sed -ne 's/woo/bang/p' input1 input2 => "a bang\nb bang"
1045                  * (note: *no* newline after "b bang"!) */
1046                 }
1047                 /* Close this file and advance to next one */
1048                 fclose_if_not_stdin(fp);
1049                 G.current_fp = NULL;
1050         }
1051         *gets_char = gc;
1052         return temp;
1053 }
1054
1055 #define sed_puts(s, n) (puts_maybe_newline(s, G.nonstdout, &last_puts_char, n))
1056
1057 static int beg_match(sed_cmd_t *sed_cmd, const char *pattern_space)
1058 {
1059         int retval = sed_cmd->beg_match && !regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 0);
1060         if (retval)
1061                 G.previous_regex_ptr = sed_cmd->beg_match;
1062         return retval;
1063 }
1064
1065 /* Process all the lines in all the files */
1066
1067 static void process_files(void)
1068 {
1069         char *pattern_space, *next_line;
1070         int linenum = 0;
1071         char last_puts_char = '\n';
1072         char last_gets_char, next_gets_char;
1073         sed_cmd_t *sed_cmd;
1074         int substituted;
1075
1076         /* Prime the pump */
1077         next_line = get_next_line(&next_gets_char, &last_puts_char);
1078
1079         /* Go through every line in each file */
1080  again:
1081         substituted = 0;
1082
1083         /* Advance to next line.  Stop if out of lines. */
1084         pattern_space = next_line;
1085         if (!pattern_space)
1086                 return;
1087         last_gets_char = next_gets_char;
1088
1089         /* Read one line in advance so we can act on the last line,
1090          * the '$' address */
1091         next_line = get_next_line(&next_gets_char, &last_puts_char);
1092         linenum++;
1093
1094         /* For every line, go through all the commands */
1095  restart:
1096         for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
1097                 int old_matched, matched;
1098
1099                 old_matched = sed_cmd->in_match;
1100
1101                 /* Determine if this command matches this line: */
1102
1103                 dbg("match1:%d", sed_cmd->in_match);
1104                 dbg("match2:%d", (!sed_cmd->beg_line && !sed_cmd->end_line
1105                                 && !sed_cmd->beg_match && !sed_cmd->end_match));
1106                 dbg("match3:%d", (sed_cmd->beg_line > 0
1107                         && (sed_cmd->end_line || sed_cmd->end_match
1108                             ? (sed_cmd->beg_line <= linenum)
1109                             : (sed_cmd->beg_line == linenum)
1110                             )
1111                         ));
1112                 dbg("match4:%d", (beg_match(sed_cmd, pattern_space)));
1113                 dbg("match5:%d", (sed_cmd->beg_line == -1 && next_line == NULL));
1114
1115                 /* Are we continuing a previous multi-line match? */
1116                 sed_cmd->in_match = sed_cmd->in_match
1117                         /* Or is no range necessary? */
1118                         || (!sed_cmd->beg_line && !sed_cmd->end_line
1119                                 && !sed_cmd->beg_match && !sed_cmd->end_match)
1120                         /* Or did we match the start of a numerical range? */
1121                         || (sed_cmd->beg_line > 0
1122                             && (sed_cmd->end_line || sed_cmd->end_match
1123                                   /* note: even if end is numeric and is < linenum too,
1124                                    * GNU sed matches! We match too, therefore we don't
1125                                    * check here that linenum <= end.
1126                                    * Example:
1127                                    * printf '1\n2\n3\n4\n' | sed -n '1{N;N;d};1p;2,3p;3p;4p'
1128                                    * first three input lines are deleted;
1129                                    * 4th line is matched and printed
1130                                    * by "2,3" (!) and by "4" ranges
1131                                    */
1132                                 ? (sed_cmd->beg_line <= linenum)    /* N,end */
1133                                 : (sed_cmd->beg_line == linenum)    /* N */
1134                                 )
1135                             )
1136                         /* Or does this line match our begin address regex? */
1137                         || (beg_match(sed_cmd, pattern_space))
1138                         /* Or did we match last line of input? */
1139                         || (sed_cmd->beg_line == -1 && next_line == NULL);
1140
1141                 /* Snapshot the value */
1142                 matched = sed_cmd->in_match;
1143
1144                 dbg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d",
1145                         sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum);
1146
1147                 /* Is this line the end of the current match? */
1148
1149                 if (matched) {
1150                         if (sed_cmd->end_line <= -2) {
1151                                 /* address2 is +N, i.e. N lines from beg_line */
1152                                 sed_cmd->end_line = linenum + (-sed_cmd->end_line - 2);
1153                         }
1154                         /* once matched, "n,xxx" range is dead, disabling it */
1155                         if (sed_cmd->beg_line > 0) {
1156                                 sed_cmd->beg_line = -2;
1157                         }
1158                         dbg("end1:%d", sed_cmd->end_line ? sed_cmd->end_line == -1
1159                                                 ? !next_line : (sed_cmd->end_line <= linenum)
1160                                         : !sed_cmd->end_match);
1161                         dbg("end2:%d", sed_cmd->end_match && old_matched
1162                                         && !regexec(sed_cmd->end_match,pattern_space, 0, NULL, 0));
1163                         sed_cmd->in_match = !(
1164                                 /* has the ending line come, or is this a single address command? */
1165                                 (sed_cmd->end_line
1166                                         ? sed_cmd->end_line == -1
1167                                                 ? !next_line
1168                                                 : (sed_cmd->end_line <= linenum)
1169                                         : !sed_cmd->end_match
1170                                 )
1171                                 /* or does this line matches our last address regex */
1172                                 || (sed_cmd->end_match && old_matched
1173                                      && (regexec(sed_cmd->end_match,
1174                                                 pattern_space, 0, NULL, 0) == 0)
1175                                 )
1176                         );
1177                 }
1178
1179                 /* Skip blocks of commands we didn't match */
1180                 if (sed_cmd->cmd == '{') {
1181                         if (sed_cmd->invert ? matched : !matched) {
1182                                 unsigned nest_cnt = 0;
1183                                 while (1) {
1184                                         if (sed_cmd->cmd == '{')
1185                                                 nest_cnt++;
1186                                         if (sed_cmd->cmd == '}') {
1187                                                 nest_cnt--;
1188                                                 if (nest_cnt == 0)
1189                                                         break;
1190                                         }
1191                                         sed_cmd = sed_cmd->next;
1192                                         if (!sed_cmd)
1193                                                 bb_error_msg_and_die("unterminated {");
1194                                 }
1195                         }
1196                         continue;
1197                 }
1198
1199                 /* Okay, so did this line match? */
1200                 if (sed_cmd->invert ? matched : !matched)
1201                         continue; /* no */
1202
1203                 /* Update last used regex in case a blank substitute BRE is found */
1204                 if (sed_cmd->beg_match) {
1205                         G.previous_regex_ptr = sed_cmd->beg_match;
1206                 }
1207
1208                 /* actual sedding */
1209                 dbg("pattern_space:'%s' next_line:'%s' cmd:%c",
1210                                 pattern_space, next_line, sed_cmd->cmd);
1211                 switch (sed_cmd->cmd) {
1212
1213                 /* Print line number */
1214                 case '=':
1215                         fprintf(G.nonstdout, "%d\n", linenum);
1216                         break;
1217
1218                 /* Write the current pattern space up to the first newline */
1219                 case 'P':
1220                 {
1221                         char *tmp = strchr(pattern_space, '\n');
1222                         if (tmp) {
1223                                 *tmp = '\0';
1224                                 /* TODO: explain why '\n' below */
1225                                 sed_puts(pattern_space, '\n');
1226                                 *tmp = '\n';
1227                                 break;
1228                         }
1229                         /* Fall Through */
1230                 }
1231
1232                 /* Write the current pattern space to output */
1233                 case 'p':
1234                         /* NB: we print this _before_ the last line
1235                          * (of current file) is printed. Even if
1236                          * that line is nonterminated, we print
1237                          * '\n' here (gnu sed does the same) */
1238                         sed_puts(pattern_space, '\n');
1239                         break;
1240                 /* Delete up through first newline */
1241                 case 'D':
1242                 {
1243                         char *tmp = strchr(pattern_space, '\n');
1244                         if (tmp) {
1245                                 overlapping_strcpy(pattern_space, tmp + 1);
1246                                 goto restart;
1247                         }
1248                 }
1249                 /* discard this line. */
1250                 case 'd':
1251                         goto discard_line;
1252
1253                 /* Substitute with regex */
1254                 case 's':
1255                         if (!do_subst_command(sed_cmd, &pattern_space))
1256                                 break;
1257                         dbg("do_subst_command succeeded:'%s'", pattern_space);
1258                         substituted |= 1;
1259
1260                         /* handle p option */
1261                         if (sed_cmd->sub_p)
1262                                 sed_puts(pattern_space, last_gets_char);
1263                         /* handle w option */
1264                         if (sed_cmd->sw_file)
1265                                 puts_maybe_newline(
1266                                         pattern_space, sed_cmd->sw_file,
1267                                         &sed_cmd->sw_last_char, last_gets_char);
1268                         break;
1269
1270                 /* Append line to linked list to be printed later */
1271                 case 'a':
1272                         append(xstrdup(sed_cmd->string));
1273                         break;
1274
1275                 /* Insert text before this line */
1276                 case 'i':
1277                         sed_puts(sed_cmd->string, '\n');
1278                         break;
1279
1280                 /* Cut and paste text (replace) */
1281                 case 'c':
1282                         /* Only triggers on last line of a matching range. */
1283                         if (!sed_cmd->in_match)
1284                                 sed_puts(sed_cmd->string, '\n');
1285                         goto discard_line;
1286
1287                 /* Read file, append contents to output */
1288                 case 'r':
1289                 {
1290                         FILE *rfile;
1291                         rfile = fopen_for_read(sed_cmd->string);
1292                         if (rfile) {
1293                                 char *line;
1294                                 while ((line = xmalloc_fgetline(rfile))
1295                                                 != NULL)
1296                                         append(line);
1297                                 fclose(rfile);
1298                         }
1299
1300                         break;
1301                 }
1302
1303                 /* Write pattern space to file. */
1304                 case 'w':
1305                         puts_maybe_newline(
1306                                 pattern_space, sed_cmd->sw_file,
1307                                 &sed_cmd->sw_last_char, last_gets_char);
1308                         break;
1309
1310                 /* Read next line from input */
1311                 case 'n':
1312                         if (!G.be_quiet)
1313                                 sed_puts(pattern_space, last_gets_char);
1314                         if (next_line == NULL) {
1315                                 /* If no next line, jump to end of script and exit. */
1316                                 goto discard_line;
1317                         }
1318                         free(pattern_space);
1319                         pattern_space = next_line;
1320                         last_gets_char = next_gets_char;
1321                         next_line = get_next_line(&next_gets_char, &last_puts_char);
1322                         substituted = 0;
1323                         linenum++;
1324                         break;
1325
1326                 /* Quit.  End of script, end of input. */
1327                 case 'q':
1328                         /* Exit the outer while loop */
1329                         free(next_line);
1330                         next_line = NULL;
1331                         goto discard_commands;
1332
1333                 /* Append the next line to the current line */
1334                 case 'N':
1335                 {
1336                         int len;
1337                         /* If no next line, jump to end of script and exit. */
1338                         /* http://www.gnu.org/software/sed/manual/sed.html:
1339                          * "Most versions of sed exit without printing anything
1340                          * when the N command is issued on the last line of
1341                          * a file. GNU sed prints pattern space before exiting
1342                          * unless of course the -n command switch has been
1343                          * specified. This choice is by design."
1344                          */
1345                         if (next_line == NULL) {
1346                                 //goto discard_line;
1347                                 goto discard_commands; /* GNU behavior */
1348                         }
1349                         /* Append next_line, read new next_line. */
1350                         len = strlen(pattern_space);
1351                         pattern_space = xrealloc(pattern_space, len + strlen(next_line) + 2);
1352                         pattern_space[len] = '\n';
1353                         strcpy(pattern_space + len+1, next_line);
1354                         last_gets_char = next_gets_char;
1355                         next_line = get_next_line(&next_gets_char, &last_puts_char);
1356                         linenum++;
1357                         break;
1358                 }
1359
1360                 /* Test/branch if substitution occurred */
1361                 case 't':
1362                         if (!substituted) break;
1363                         substituted = 0;
1364                         /* Fall through */
1365                 /* Test/branch if substitution didn't occur */
1366                 case 'T':
1367                         if (substituted) break;
1368                         /* Fall through */
1369                 /* Branch to label */
1370                 case 'b':
1371                         if (!sed_cmd->string) goto discard_commands;
1372                         else sed_cmd = branch_to(sed_cmd->string);
1373                         break;
1374                 /* Transliterate characters */
1375                 case 'y':
1376                 {
1377                         int i, j;
1378                         for (i = 0; pattern_space[i]; i++) {
1379                                 for (j = 0; sed_cmd->string[j]; j += 2) {
1380                                         if (pattern_space[i] == sed_cmd->string[j]) {
1381                                                 pattern_space[i] = sed_cmd->string[j + 1];
1382                                                 break;
1383                                         }
1384                                 }
1385                         }
1386
1387                         break;
1388                 }
1389                 case 'g':       /* Replace pattern space with hold space */
1390                         free(pattern_space);
1391                         pattern_space = xstrdup(G.hold_space ? G.hold_space : "");
1392                         break;
1393                 case 'G':       /* Append newline and hold space to pattern space */
1394                 {
1395                         int pattern_space_size = 2;
1396                         int hold_space_size = 0;
1397
1398                         if (pattern_space)
1399                                 pattern_space_size += strlen(pattern_space);
1400                         if (G.hold_space)
1401                                 hold_space_size = strlen(G.hold_space);
1402                         pattern_space = xrealloc(pattern_space,
1403                                         pattern_space_size + hold_space_size);
1404                         if (pattern_space_size == 2)
1405                                 pattern_space[0] = 0;
1406                         strcat(pattern_space, "\n");
1407                         if (G.hold_space)
1408                                 strcat(pattern_space, G.hold_space);
1409                         last_gets_char = '\n';
1410
1411                         break;
1412                 }
1413                 case 'h':       /* Replace hold space with pattern space */
1414                         free(G.hold_space);
1415                         G.hold_space = xstrdup(pattern_space);
1416                         break;
1417                 case 'H':       /* Append newline and pattern space to hold space */
1418                 {
1419                         int hold_space_size = 2;
1420                         int pattern_space_size = 0;
1421
1422                         if (G.hold_space)
1423                                 hold_space_size += strlen(G.hold_space);
1424                         if (pattern_space)
1425                                 pattern_space_size = strlen(pattern_space);
1426                         G.hold_space = xrealloc(G.hold_space,
1427                                         hold_space_size + pattern_space_size);
1428
1429                         if (hold_space_size == 2)
1430                                 *G.hold_space = 0;
1431                         strcat(G.hold_space, "\n");
1432                         if (pattern_space)
1433                                 strcat(G.hold_space, pattern_space);
1434
1435                         break;
1436                 }
1437                 case 'x': /* Exchange hold and pattern space */
1438                 {
1439                         char *tmp = pattern_space;
1440                         pattern_space = G.hold_space ? G.hold_space : xzalloc(1);
1441                         last_gets_char = '\n';
1442                         G.hold_space = tmp;
1443                         break;
1444                 }
1445                 } /* switch */
1446         } /* for each cmd */
1447
1448         /*
1449          * Exit point from sedding...
1450          */
1451  discard_commands:
1452         /* we will print the line unless we were told to be quiet ('-n')
1453            or if the line was suppressed (ala 'd'elete) */
1454         if (!G.be_quiet)
1455                 sed_puts(pattern_space, last_gets_char);
1456
1457         /* Delete and such jump here. */
1458  discard_line:
1459         flush_append(&last_puts_char /*,last_gets_char*/);
1460         free(pattern_space);
1461
1462         goto again;
1463 }
1464
1465 /* It is possible to have a command line argument with embedded
1466  * newlines.  This counts as multiple command lines.
1467  * However, newline can be escaped: 's/e/z\<newline>z/'
1468  * add_cmd() handles this.
1469  */
1470
1471 static void add_cmd_block(char *cmdstr)
1472 {
1473         char *sv, *eol;
1474
1475         cmdstr = sv = xstrdup(cmdstr);
1476         do {
1477                 eol = strchr(cmdstr, '\n');
1478                 if (eol)
1479                         *eol = '\0';
1480                 add_cmd(cmdstr);
1481                 cmdstr = eol + 1;
1482         } while (eol);
1483         free(sv);
1484 }
1485
1486 int sed_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1487 int sed_main(int argc UNUSED_PARAM, char **argv)
1488 {
1489         unsigned opt;
1490         llist_t *opt_e, *opt_f;
1491         char *opt_i;
1492
1493 #if ENABLE_LONG_OPTS
1494         static const char sed_longopts[] ALIGN1 =
1495                 /* name             has_arg             short */
1496                 "in-place\0"        Optional_argument   "i"
1497                 "regexp-extended\0" No_argument         "r"
1498                 "quiet\0"           No_argument         "n"
1499                 "silent\0"          No_argument         "n"
1500                 "expression\0"      Required_argument   "e"
1501                 "file\0"            Required_argument   "f";
1502 #endif
1503
1504         INIT_G();
1505
1506         /* destroy command strings on exit */
1507         if (ENABLE_FEATURE_CLEAN_UP) atexit(sed_free_and_close_stuff);
1508
1509         /* Lie to autoconf when it starts asking stupid questions. */
1510         if (argv[1] && strcmp(argv[1], "--version") == 0) {
1511                 puts("This is not GNU sed version 4.0");
1512                 return 0;
1513         }
1514
1515         /* do normal option parsing */
1516         opt_e = opt_f = NULL;
1517         opt_i = NULL;
1518         /* -i must be first, to match OPT_in_place definition */
1519         /* -E is a synonym of -r:
1520          * GNU sed 4.2.1 mentions it in neither --help
1521          * nor manpage, but does recognize it.
1522          */
1523         opt = getopt32long(argv, "^"
1524                         "i::rEne:*f:*"
1525                         "\0" "nn"/*count -n*/,
1526                         sed_longopts,
1527                         &opt_i, &opt_e, &opt_f,
1528                         &G.be_quiet); /* counter for -n */
1529         //argc -= optind;
1530         argv += optind;
1531         if (opt & OPT_in_place) { // -i
1532                 die_func = cleanup_outname;
1533         }
1534         if (opt & (2|4))
1535                 G.regex_type |= REG_EXTENDED; // -r or -E
1536         //if (opt & 8)
1537         //      G.be_quiet++; // -n (implemented with a counter instead)
1538         while (opt_e) { // -e
1539                 add_cmd_block(llist_pop(&opt_e));
1540         }
1541         while (opt_f) { // -f
1542                 char *line;
1543                 FILE *cmdfile;
1544                 cmdfile = xfopen_stdin(llist_pop(&opt_f));
1545                 while ((line = xmalloc_fgetline(cmdfile)) != NULL) {
1546                         add_cmd(line);
1547                         free(line);
1548                 }
1549                 fclose_if_not_stdin(cmdfile);
1550         }
1551         /* if we didn't get a pattern from -e or -f, use argv[0] */
1552         if (!(opt & 0x30)) {
1553                 if (!*argv)
1554                         bb_show_usage();
1555                 add_cmd_block(*argv++);
1556         }
1557         /* Flush any unfinished commands. */
1558         add_cmd("");
1559
1560         /* By default, we write to stdout */
1561         G.nonstdout = stdout;
1562
1563         /* argv[0..(argc-1)] should be names of file to process. If no
1564          * files were specified or '-' was specified, take input from stdin.
1565          * Otherwise, we process all the files specified. */
1566         G.input_file_list = argv;
1567         if (!argv[0]) {
1568                 if (opt & OPT_in_place)
1569                         bb_error_msg_and_die(bb_msg_requires_arg, "-i");
1570                 argv[0] = (char*)bb_msg_standard_input;
1571                 /* G.last_input_file = 0; - already is */
1572         } else {
1573                 goto start;
1574
1575                 for (; *argv; argv++) {
1576                         struct stat statbuf;
1577                         int nonstdoutfd;
1578                         sed_cmd_t *sed_cmd;
1579
1580                         G.last_input_file++;
1581  start:
1582                         if (!(opt & OPT_in_place)) {
1583                                 if (LONE_DASH(*argv)) {
1584                                         *argv = (char*)bb_msg_standard_input;
1585                                         process_files();
1586                                 }
1587                                 continue;
1588                         }
1589
1590                         /* -i: process each FILE separately: */
1591
1592                         if (stat(*argv, &statbuf) != 0) {
1593                                 bb_simple_perror_msg(*argv);
1594                                 G.exitcode = EXIT_FAILURE;
1595                                 G.current_input_file++;
1596                                 continue;
1597                         }
1598                         G.outname = xasprintf("%sXXXXXX", *argv);
1599                         nonstdoutfd = xmkstemp(G.outname);
1600                         G.nonstdout = xfdopen_for_write(nonstdoutfd);
1601                         /* Set permissions/owner of output file */
1602                         /* chmod'ing AFTER chown would preserve suid/sgid bits,
1603                          * but GNU sed 4.2.1 does not preserve them either */
1604                         fchmod(nonstdoutfd, statbuf.st_mode);
1605                         fchown(nonstdoutfd, statbuf.st_uid, statbuf.st_gid);
1606
1607                         process_files();
1608                         fclose(G.nonstdout);
1609                         G.nonstdout = stdout;
1610
1611                         if (opt_i) {
1612                                 char *backupname = xasprintf("%s%s", *argv, opt_i);
1613                                 xrename(*argv, backupname);
1614                                 free(backupname);
1615                         }
1616                         /* else unlink(*argv); - rename below does this */
1617                         xrename(G.outname, *argv); //TODO: rollback backup on error?
1618                         free(G.outname);
1619                         G.outname = NULL;
1620
1621                         /* Fix disabled range matches and mangled ",+N" ranges */
1622                         for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
1623                                 sed_cmd->beg_line = sed_cmd->beg_line_orig;
1624                                 sed_cmd->end_line = sed_cmd->end_line_orig;
1625                         }
1626                 }
1627                 /* Here, to handle "sed 'cmds' nonexistent_file" case we did:
1628                  * if (G.current_input_file[G.current_input_file] == NULL)
1629                  *      return G.exitcode;
1630                  * but it's not needed since process_files() works correctly
1631                  * in this case too. */
1632         }
1633
1634         process_files();
1635
1636         return G.exitcode;
1637 }