Apply post-1.20.0 patches, bump version to 1.20.1
[oweals/busybox.git] / editors / sed.c
index 302a156051a23938e5c1425ee0bdc2c22cd964bc..3ee8edc433add4e7af452753d93ec0db41f071ed 100644 (file)
  *
  * MAINTAINER: Rob Landley <rob@landley.net>
  *
- * Licensed under GPL version 2, see file LICENSE in this tarball for details.
+ * Licensed under GPLv2, see file LICENSE in this source tree.
  */
 
 /* Code overview.
+ *
+ * Files are laid out to avoid unnecessary function declarations.  So for
+ * example, every function add_cmd calls occurs before add_cmd in this file.
+ *
+ * add_cmd() is called on each line of sed command text (from a file or from
+ * the command line).  It calls get_address() and parse_cmd_args().  The
+ * resulting sed_cmd_t structures are appended to a linked list
+ * (G.sed_cmd_head/G.sed_cmd_tail).
+ *
+ * add_input_file() adds a FILE* to the list of input files.  We need to
+ * know all input sources ahead of time to find the last line for the $ match.
+ *
+ * process_files() does actual sedding, reading data lines from each input FILE*
+ * (which could be stdin) and applying the sed command list (sed_cmd_head) to
+ * each of the resulting lines.
+ *
+ * sed_main() is where external code calls into this, with a command line.
+ */
 
-  Files are laid out to avoid unnecessary function declarations.  So for
-  example, every function add_cmd calls occurs before add_cmd in this file.
-
-  add_cmd() is called on each line of sed command text (from a file or from
-  the command line).  It calls get_address() and parse_cmd_args().  The
-  resulting sed_cmd_t structures are appended to a linked list
-  (G.sed_cmd_head/G.sed_cmd_tail).
-
-  add_input_file() adds a FILE* to the list of input files.  We need to
-  know all input sources ahead of time to find the last line for the $ match.
-
-  process_files() does actual sedding, reading data lines from each input FILE *
-  (which could be stdin) and applying the sed command list (sed_cmd_head) to
-  each of the resulting lines.
-
-  sed_main() is where external code calls into this, with a command line.
-*/
-
-
-/*
-       Supported features and commands in this version of sed:
+/* Supported features and commands in this version of sed:
+ *
+ * - comments ('#')
+ * - address matching: num|/matchstr/[,num|/matchstr/|$]command
+ * - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
+ * - edit commands: (a)ppend, (i)nsert, (c)hange
+ * - file commands: (r)ead
+ * - backreferences in substitution expressions (\0, \1, \2...\9)
+ * - grouped commands: {cmd1;cmd2}
+ * - transliteration (y/source-chars/dest-chars/)
+ * - pattern space hold space storing / swapping (g, h, x)
+ * - labels / branching (: label, b, t, T)
+ *
+ * (Note: Specifying an address (range) to match is *optional*; commands
+ * default to the whole pattern space if no specific address match was
+ * requested.)
+ *
+ * Todo:
+ * - Create a wrapper around regex to make libc's regex conform with sed
+ *
+ * Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
+ */
 
-        - comments ('#')
-        - address matching: num|/matchstr/[,num|/matchstr/|$]command
-        - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
-        - edit commands: (a)ppend, (i)nsert, (c)hange
-        - file commands: (r)ead
-        - backreferences in substitution expressions (\0, \1, \2...\9)
-        - grouped commands: {cmd1;cmd2}
-        - transliteration (y/source-chars/dest-chars/)
-        - pattern space hold space storing / swapping (g, h, x)
-        - labels / branching (: label, b, t, T)
+//usage:#define sed_trivial_usage
+//usage:       "[-inr] [-f FILE]... [-e CMD]... [FILE]...\n"
+//usage:       "or: sed [-inr] CMD [FILE]..."
+//usage:#define sed_full_usage "\n\n"
+//usage:       "       -e CMD  Add CMD to sed commands to be executed"
+//usage:     "\n       -f FILE Add FILE contents to sed commands to be executed"
+//usage:     "\n       -i      Edit files in-place (else sends result to stdout)"
+//usage:     "\n       -n      Suppress automatic printing of pattern space"
+//usage:     "\n       -r      Use extended regex syntax"
+//usage:     "\n"
+//usage:     "\nIf no -e or -f, the first non-option argument is the sed command string."
+//usage:     "\nRemaining arguments are input files (stdin if none)."
+//usage:
+//usage:#define sed_example_usage
+//usage:       "$ echo \"foo\" | sed -e 's/f[a-zA-Z]o/bar/g'\n"
+//usage:       "bar\n"
 
-        (Note: Specifying an address (range) to match is *optional*; commands
-        default to the whole pattern space if no specific address match was
-        requested.)
+#include "libbb.h"
+#include "xregex.h"
 
-       Todo:
-        - Create a wrapper around regex to make libc's regex conform with sed
+#if 0
+# define dbg(...) bb_error_msg(__VA_ARGS__)
+#else
+# define dbg(...) ((void)0)
+#endif
 
-       Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
-*/
 
-#include "libbb.h"
-#include "xregex.h"
+enum {
+       OPT_in_place = 1 << 0,
+};
 
 /* Each sed command turns into one of these structures. */
 typedef struct sed_cmd_s {
@@ -71,6 +97,7 @@ typedef struct sed_cmd_s {
        regex_t *end_match;     /* sed -e '/match/,/end_match/cmd' */
        regex_t *sub_match;     /* For 's/sub_match/string/' */
        int beg_line;           /* 'sed 1p'   0 == apply commands to all lines */
+       int beg_line_orig;      /* copy of the above, needed for -i */
        int end_line;           /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */
 
        FILE *sw_file;          /* File (sw) command writes to, -1 for none. */
@@ -105,7 +132,7 @@ struct globals {
        regex_t *previous_regex_ptr;
 
        /* linked list of sed commands */
-       sed_cmd_t sed_cmd_head, *sed_cmd_tail;
+       sed_cmd_t *sed_cmd_head, **sed_cmd_tail;
 
        /* Linked list of append lines */
        llist_t *append_head;
@@ -113,16 +140,16 @@ struct globals {
        char *add_cmd_line;
 
        struct pipeline {
-               char *buf;      /* Space to hold string */
-               int idx;        /* Space used */
-               int len;        /* Space allocated */
+               char *buf;  /* Space to hold string */
+               int idx;    /* Space used */
+               int len;    /* Space allocated */
        } pipeline;
 } FIX_ALIASING;
 #define G (*(struct globals*)&bb_common_bufsiz1)
-void BUG_sed_globals_too_big(void);
+struct BUG_G_too_big {
+       char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
+};
 #define INIT_G() do { \
-       if (sizeof(struct globals) > COMMON_BUFSIZE) \
-               BUG_sed_globals_too_big(); \
        G.sed_cmd_tail = &G.sed_cmd_head; \
 } while (0)
 
@@ -130,7 +157,7 @@ void BUG_sed_globals_too_big(void);
 #if ENABLE_FEATURE_CLEAN_UP
 static void sed_free_and_close_stuff(void)
 {
-       sed_cmd_t *sed_cmd = G.sed_cmd_head.next;
+       sed_cmd_t *sed_cmd = G.sed_cmd_head;
 
        llist_free(G.append_head, free);
 
@@ -196,11 +223,16 @@ static void parse_escapes(char *dest, const char *string, int len, char from, ch
 
 static char *copy_parsing_escapes(const char *string, int len)
 {
+       const char *s;
        char *dest = xmalloc(len + 1);
 
-       parse_escapes(dest, string, len, 'n', '\n');
-       /* GNU sed also recognizes \t */
-       parse_escapes(dest, dest, strlen(dest), 't', '\t');
+       /* sed recognizes \n */
+       /* GNU sed also recognizes \t and \r */
+       for (s = "\nn\tt\rr"; *s; s += 2) {
+               parse_escapes(dest, string, len, s[1], s[0]);
+               string = dest;
+               len = strlen(dest);
+       }
        return dest;
 }
 
@@ -223,11 +255,13 @@ static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str)
                delimiter = -delimiter;
        }
 
-       for (; (ch = str[idx]); idx++) {
+       for (; (ch = str[idx]) != '\0'; idx++) {
                if (bracket >= 0) {
-                       if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2
-                                       && str[idx - 1] == '^')))
+                       if (ch == ']'
+                        && !(bracket == idx - 1 || (bracket == idx - 2 && str[idx - 1] == '^'))
+                       ) {
                                bracket = -1;
+                       }
                } else if (escaped)
                        escaped = 0;
                else if (ch == '\\')
@@ -248,7 +282,7 @@ static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str)
 static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
 {
        const char *cmdstr_ptr = cmdstr;
-       char delimiter;
+       unsigned char delimiter;
        int idx = 0;
 
        /* verify that the 's' or 'y' is followed by something.  That something
@@ -263,7 +297,7 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
 
        /* save the replacement string */
        cmdstr_ptr += idx + 1;
-       idx = index_of_next_unescaped_regexp_delim(-delimiter, cmdstr_ptr);
+       idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr);
        *replace = copy_parsing_escapes(cmdstr_ptr, idx);
 
        return ((cmdstr_ptr - cmdstr) + idx);
@@ -288,10 +322,11 @@ static int get_address(const char *my_str, int *linenum, regex_t ** regex)
                char *temp;
 
                delimiter = '/';
-               if (*my_str == '\\') delimiter = *++pos;
+               if (*my_str == '\\')
+                       delimiter = *++pos;
                next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
                temp = copy_parsing_escapes(pos, next);
-               *regex = xmalloc(sizeof(regex_t));
+               *regex = xzalloc(sizeof(regex_t));
                xregcomp(*regex, temp, G.regex_type|REG_NEWLINE);
                free(temp);
                /* Move position to next character after last delimiter */
@@ -385,7 +420,8 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr)
                        break;
                /* Comment */
                case '#':
-                       while (substr[++idx]) /*skip all*/;
+                       // while (substr[++idx]) continue;
+                       idx += strlen(substr + idx); // same
                        /* Fall through */
                /* End of command */
                case ';':
@@ -395,12 +431,14 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr)
                        bb_error_msg_and_die("bad option in substitution expression");
                }
        }
-out:
+ out:
        /* compile the match string into a regex */
        if (*match != '\0') {
                /* If match is empty, we use last regex used at runtime */
-               sed_cmd->sub_match = xmalloc(sizeof(regex_t));
+               sed_cmd->sub_match = xzalloc(sizeof(regex_t));
+               dbg("xregcomp('%s',%x)", match, cflags);
                xregcomp(sed_cmd->sub_match, match, cflags);
+               dbg("regcomp ok");
        }
        free(match);
 
@@ -412,11 +450,47 @@ out:
  */
 static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
 {
+       static const char cmd_letters[] = "saicrw:btTydDgGhHlnNpPqx={}";
+       enum {
+               IDX_s = 0,
+               IDX_a,
+               IDX_i,
+               IDX_c,
+               IDX_r,
+               IDX_w,
+               IDX_colon,
+               IDX_b,
+               IDX_t,
+               IDX_T,
+               IDX_y,
+               IDX_d,
+               IDX_D,
+               IDX_g,
+               IDX_G,
+               IDX_h,
+               IDX_H,
+               IDX_l,
+               IDX_n,
+               IDX_N,
+               IDX_p,
+               IDX_P,
+               IDX_q,
+               IDX_x,
+               IDX_equal,
+               IDX_lbrace,
+               IDX_rbrace,
+               IDX_nul
+       };
+       struct chk { char chk[sizeof(cmd_letters)-1 == IDX_nul ? 1 : -1]; };
+
+       unsigned idx = strchrnul(cmd_letters, sed_cmd->cmd) - cmd_letters;
+
        /* handle (s)ubstitution command */
-       if (sed_cmd->cmd == 's')
+       if (idx == IDX_s) {
                cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
+       }
        /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
-       else if (strchr("aic", sed_cmd->cmd)) {
+       else if (idx <= IDX_c) { /* a,i,c */
                if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
                        bb_error_msg_and_die("only a beginning address can be specified for edit commands");
                for (;;) {
@@ -432,8 +506,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
                /* "\anychar" -> "anychar" */
                parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0');
                cmdstr += strlen(cmdstr);
+       }
        /* handle file cmds: (r)ead */
-       } else if (strchr("rw", sed_cmd->cmd)) {
+       else if (idx <= IDX_w) { /* r,w */
                if (sed_cmd->end_line || sed_cmd->end_match)
                        bb_error_msg_and_die("command only uses one address");
                cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string);
@@ -441,8 +516,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
                        sed_cmd->sw_file = xfopen_for_write(sed_cmd->string);
                        sed_cmd->sw_last_char = '\n';
                }
+       }
        /* handle branch commands */
-       } else if (strchr(":btT", sed_cmd->cmd)) {
+       else if (idx <= IDX_T) { /* :,b,t,T */
                int length;
 
                cmdstr = skip_whitespace(cmdstr);
@@ -453,7 +529,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
                }
        }
        /* translation command */
-       else if (sed_cmd->cmd == 'y') {
+       else if (idx == IDX_y) {
                char *match, *replace;
                int i = cmdstr[0];
 
@@ -473,7 +549,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
        /* if it wasnt a single-letter command that takes no arguments
         * then it must be an invalid command.
         */
-       else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) {
+       else if (idx >= IDX_nul) { /* not d,D,g,G,h,H,l,n,N,p,P,q,x,=,{,} */
                bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd);
        }
 
@@ -487,7 +563,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
 static void add_cmd(const char *cmdstr)
 {
        sed_cmd_t *sed_cmd;
-       int temp;
+       unsigned len, n;
 
        /* Append this line to any unfinished line from last time. */
        if (G.add_cmd_line) {
@@ -496,12 +572,14 @@ static void add_cmd(const char *cmdstr)
                cmdstr = G.add_cmd_line = tp;
        }
 
-       /* If this line ends with backslash, request next line. */
-       temp = strlen(cmdstr);
-       if (temp && cmdstr[--temp] == '\\') {
+       /* If this line ends with unescaped backslash, request next line. */
+       n = len = strlen(cmdstr);
+       while (n && cmdstr[n-1] == '\\')
+               n--;
+       if ((len - n) & 1) { /* if odd number of trailing backslashes */
                if (!G.add_cmd_line)
                        G.add_cmd_line = xstrdup(cmdstr);
-               G.add_cmd_line[temp] = '\0';
+               G.add_cmd_line[len-1] = '\0';
                return;
        }
 
@@ -533,6 +611,7 @@ static void add_cmd(const char *cmdstr)
 
                /* first part (if present) is an address: either a '$', a number or a /regex/ */
                cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
+               sed_cmd->beg_line_orig = sed_cmd->beg_line;
 
                /* second part (if present) will begin with a comma */
                if (*cmdstr == ',') {
@@ -560,12 +639,12 @@ static void add_cmd(const char *cmdstr)
                /* last part (mandatory) will be a command */
                if (!*cmdstr)
                        bb_error_msg_and_die("missing command");
-               sed_cmd->cmd = *(cmdstr++);
+               sed_cmd->cmd = *cmdstr++;
                cmdstr = parse_cmd_args(sed_cmd, cmdstr);
 
                /* Add the command to the command array */
-               G.sed_cmd_tail->next = sed_cmd;
-               G.sed_cmd_tail = G.sed_cmd_tail->next;
+               *G.sed_cmd_tail = sed_cmd;
+               G.sed_cmd_tail = &sed_cmd->next;
        }
 
        /* If we glued multiple lines together, free the memory. */
@@ -641,8 +720,12 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
        G.previous_regex_ptr = current_regex;
 
        /* Find the first match */
-       if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0))
+       dbg("matching '%s'", line);
+       if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0)) {
+               dbg("no match");
                return 0;
+       }
+       dbg("match");
 
        /* Initialize temporary output buffer. */
        G.pipeline.buf = xmalloc(PIPE_GROW);
@@ -654,12 +737,13 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
                int i;
 
                /* Work around bug in glibc regexec, demonstrated by:
-                  echo " a.b" | busybox sed 's [^ .]* x g'
-                  The match_count check is so not to break
-                  echo "hi" | busybox sed 's/^/!/g' */
+                * echo " a.b" | busybox sed 's [^ .]* x g'
+                * The match_count check is so not to break
+                * echo "hi" | busybox sed 's/^/!/g'
+                */
                if (!G.regmatch[0].rm_so && !G.regmatch[0].rm_eo && match_count) {
                        pipe_putc(*line++);
-                       continue;
+                       goto next;
                }
 
                match_count++;
@@ -671,7 +755,7 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
                ) {
                        for (i = 0; i < G.regmatch[0].rm_eo; i++)
                                pipe_putc(*line++);
-                       continue;
+                       goto next;
                }
 
                /* print everything before the match */
@@ -687,11 +771,14 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
                altered++;
 
                /* if we're not doing this globally, get out now */
-               if (sed_cmd->which_match)
+               if (sed_cmd->which_match != 0)
+                       break;
+ next:
+               if (*line == '\0')
                        break;
 
 //maybe (G.regmatch[0].rm_eo ? REG_NOTBOL : 0) instead of unconditional REG_NOTBOL?
-       } while (*line && regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH);
+       } while (regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH);
 
        /* Copy rest of string into output pipeline */
        while (1) {
@@ -711,7 +798,7 @@ static sed_cmd_t *branch_to(char *label)
 {
        sed_cmd_t *sed_cmd;
 
-       for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
+       for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
                if (sed_cmd->cmd == ':' && sed_cmd->string && !strcmp(sed_cmd->string, label)) {
                        return sed_cmd;
                }
@@ -887,20 +974,46 @@ static void process_files(void)
 
        /* For every line, go through all the commands */
  restart:
-       for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
+       for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
                int old_matched, matched;
 
                old_matched = sed_cmd->in_match;
 
                /* Determine if this command matches this line: */
 
+               dbg("match1:%d", sed_cmd->in_match);
+               dbg("match2:%d", (!sed_cmd->beg_line && !sed_cmd->end_line
+                               && !sed_cmd->beg_match && !sed_cmd->end_match));
+               dbg("match3:%d", (sed_cmd->beg_line > 0
+                       && (sed_cmd->end_line || sed_cmd->end_match
+                           ? (sed_cmd->beg_line <= linenum)
+                           : (sed_cmd->beg_line == linenum)
+                           )
+                       ));
+               dbg("match4:%d", (beg_match(sed_cmd, pattern_space)));
+               dbg("match5:%d", (sed_cmd->beg_line == -1 && next_line == NULL));
+
                /* Are we continuing a previous multi-line match? */
                sed_cmd->in_match = sed_cmd->in_match
                        /* Or is no range necessary? */
                        || (!sed_cmd->beg_line && !sed_cmd->end_line
                                && !sed_cmd->beg_match && !sed_cmd->end_match)
                        /* Or did we match the start of a numerical range? */
-                       || (sed_cmd->beg_line > 0 && (sed_cmd->beg_line <= linenum))
+                       || (sed_cmd->beg_line > 0
+                           && (sed_cmd->end_line || sed_cmd->end_match
+                                 /* note: even if end is numeric and is < linenum too,
+                                  * GNU sed matches! We match too, therefore we don't
+                                  * check here that linenum <= end.
+                                  * Example:
+                                  * printf '1\n2\n3\n4\n' | sed -n '1{N;N;d};1p;2,3p;3p;4p'
+                                  * first three input lines are deleted;
+                                  * 4th line is matched and printed
+                                  * by "2,3" (!) and by "4" ranges
+                                  */
+                               ? (sed_cmd->beg_line <= linenum)    /* N,end */
+                               : (sed_cmd->beg_line == linenum)    /* N */
+                               )
+                           )
                        /* Or does this line match our begin address regex? */
                        || (beg_match(sed_cmd, pattern_space))
                        /* Or did we match last line of input? */
@@ -909,34 +1022,44 @@ static void process_files(void)
                /* Snapshot the value */
                matched = sed_cmd->in_match;
 
-               //bb_error_msg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d",
-               //sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum);
+               dbg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d",
+                       sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum);
 
                /* Is this line the end of the current match? */
 
                if (matched) {
                        /* once matched, "n,xxx" range is dead, disabling it */
-                       if (sed_cmd->beg_line > 0)
+                       if (sed_cmd->beg_line > 0) {
                                sed_cmd->beg_line = -2;
+                       }
                        sed_cmd->in_match = !(
                                /* has the ending line come, or is this a single address command? */
-                               (sed_cmd->end_line ?
-                                       sed_cmd->end_line == -1 ?
-                                               !next_line
+                               (sed_cmd->end_line
+                                       ? sed_cmd->end_line == -1
+                                               !next_line
                                                : (sed_cmd->end_line <= linenum)
                                        : !sed_cmd->end_match
                                )
                                /* or does this line matches our last address regex */
                                || (sed_cmd->end_match && old_matched
                                     && (regexec(sed_cmd->end_match,
-                                                pattern_space, 0, NULL, 0) == 0))
+                                                pattern_space, 0, NULL, 0) == 0)
+                               )
                        );
                }
 
                /* Skip blocks of commands we didn't match */
                if (sed_cmd->cmd == '{') {
                        if (sed_cmd->invert ? matched : !matched) {
-                               while (sed_cmd->cmd != '}') {
+                               unsigned nest_cnt = 0;
+                               while (1) {
+                                       if (sed_cmd->cmd == '{')
+                                               nest_cnt++;
+                                       if (sed_cmd->cmd == '}') {
+                                               nest_cnt--;
+                                               if (nest_cnt == 0)
+                                                       break;
+                                       }
                                        sed_cmd = sed_cmd->next;
                                        if (!sed_cmd)
                                                bb_error_msg_and_die("unterminated {");
@@ -955,6 +1078,8 @@ static void process_files(void)
                }
 
                /* actual sedding */
+               dbg("pattern_space:'%s' next_line:'%s' cmd:%c",
+                               pattern_space, next_line, sed_cmd->cmd);
                switch (sed_cmd->cmd) {
 
                /* Print line number */
@@ -966,7 +1091,6 @@ static void process_files(void)
                case 'P':
                {
                        char *tmp = strchr(pattern_space, '\n');
-
                        if (tmp) {
                                *tmp = '\0';
                                /* TODO: explain why '\n' below */
@@ -989,11 +1113,8 @@ static void process_files(void)
                case 'D':
                {
                        char *tmp = strchr(pattern_space, '\n');
-
                        if (tmp) {
-                               tmp = xstrdup(tmp+1);
-                               free(pattern_space);
-                               pattern_space = tmp;
+                               overlapping_strcpy(pattern_space, tmp + 1);
                                goto restart;
                        }
                }
@@ -1005,6 +1126,7 @@ static void process_files(void)
                case 's':
                        if (!do_subst_command(sed_cmd, &pattern_space))
                                break;
+                       dbg("do_subst_command succeeeded:'%s'", pattern_space);
                        substituted |= 1;
 
                        /* handle p option */
@@ -1038,7 +1160,6 @@ static void process_files(void)
                case 'r':
                {
                        FILE *rfile;
-
                        rfile = fopen_for_read(sed_cmd->string);
                        if (rfile) {
                                char *line;
@@ -1086,13 +1207,18 @@ static void process_files(void)
                {
                        int len;
                        /* If no next line, jump to end of script and exit. */
+                       /* http://www.gnu.org/software/sed/manual/sed.html:
+                        * "Most versions of sed exit without printing anything
+                        * when the N command is issued on the last line of
+                        * a file. GNU sed prints pattern space before exiting
+                        * unless of course the -n command switch has been
+                        * specified. This choice is by design."
+                        */
                        if (next_line == NULL) {
-                               /* Jump to end of script and exit */
-                               free(next_line);
-                               next_line = NULL;
-                               goto discard_line;
-                       /* append next_line, read new next_line. */
+                               //goto discard_line;
+                               goto discard_commands; /* GNU behavior */
                        }
+                       /* Append next_line, read new next_line. */
                        len = strlen(pattern_space);
                        pattern_space = xrealloc(pattern_space, len + strlen(next_line) + 2);
                        pattern_space[len] = '\n';
@@ -1121,7 +1247,6 @@ static void process_files(void)
                case 'y':
                {
                        int i, j;
-
                        for (i = 0; pattern_space[i]; i++) {
                                for (j = 0; sed_cmd->string[j]; j += 2) {
                                        if (pattern_space[i] == sed_cmd->string[j]) {
@@ -1247,9 +1372,6 @@ static void add_cmd_block(char *cmdstr)
 int sed_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 int sed_main(int argc UNUSED_PARAM, char **argv)
 {
-       enum {
-               OPT_in_place = 1 << 0,
-       };
        unsigned opt;
        llist_t *opt_e, *opt_f;
        int status = EXIT_SUCCESS;
@@ -1269,6 +1391,7 @@ int sed_main(int argc UNUSED_PARAM, char **argv)
        opt_e = opt_f = NULL;
        opt_complementary = "e::f::" /* can occur multiple times */
                            "nn"; /* count -n */
+       /* -i must be first, to match OPT_in_place definition */
        opt = getopt32(argv, "irne:f:", &opt_e, &opt_f,
                            &G.be_quiet); /* counter for -n */
        //argc -= optind;
@@ -1310,14 +1433,14 @@ int sed_main(int argc UNUSED_PARAM, char **argv)
                if (opt & OPT_in_place)
                        bb_error_msg_and_die(bb_msg_requires_arg, "-i");
                add_input_file(stdin);
-               process_files();
        } else {
                int i;
-               FILE *file;
 
                for (i = 0; argv[i]; i++) {
                        struct stat statbuf;
                        int nonstdoutfd;
+                       FILE *file;
+                       sed_cmd_t *sed_cmd;
 
                        if (LONE_DASH(argv[i]) && !(opt & OPT_in_place)) {
                                add_input_file(stdin);
@@ -1329,34 +1452,45 @@ int sed_main(int argc UNUSED_PARAM, char **argv)
                                status = EXIT_FAILURE;
                                continue;
                        }
+                       add_input_file(file);
                        if (!(opt & OPT_in_place)) {
-                               add_input_file(file);
                                continue;
                        }
 
+                       /* -i: process each FILE separately: */
+
                        G.outname = xasprintf("%sXXXXXX", argv[i]);
-                       nonstdoutfd = mkstemp(G.outname);
-                       if (-1 == nonstdoutfd)
-                               bb_perror_msg_and_die("can't create temp file %s", G.outname);
+                       nonstdoutfd = xmkstemp(G.outname);
                        G.nonstdout = xfdopen_for_write(nonstdoutfd);
 
                        /* Set permissions/owner of output file */
                        fstat(fileno(file), &statbuf);
+                       /* chmod'ing AFTER chown would preserve suid/sgid bits,
+                        * but GNU sed 4.2.1 does not preserve them either */
                        fchmod(nonstdoutfd, statbuf.st_mode);
                        fchown(nonstdoutfd, statbuf.st_uid, statbuf.st_gid);
-                       add_input_file(file);
+
                        process_files();
                        fclose(G.nonstdout);
-
                        G.nonstdout = stdout;
+
                        /* unlink(argv[i]); */
                        xrename(G.outname, argv[i]);
                        free(G.outname);
                        G.outname = NULL;
+
+                       /* Re-enable disabled range matches */
+                       for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
+                               sed_cmd->beg_line = sed_cmd->beg_line_orig;
+                       }
                }
-               if (G.input_file_count > G.current_input_file)
-                       process_files();
+               /* Here, to handle "sed 'cmds' nonexistent_file" case we did:
+                * if (G.current_input_file >= G.input_file_count)
+                *      return status;
+                * but it's not needed since process_files() works correctly
+                * in this case too. */
        }
+       process_files();
 
        return status;
 }