Apply post-1.20.0 patches, bump version to 1.20.1
[oweals/busybox.git] / editors / sed.c
index 11c4763213a2192a8f06c565bddf5a30929dc71a..3ee8edc433add4e7af452753d93ec0db41f071ed 100644 (file)
@@ -26,7 +26,7 @@
  * add_input_file() adds a FILE* to the list of input files.  We need to
  * know all input sources ahead of time to find the last line for the $ match.
  *
- * process_files() does actual sedding, reading data lines from each input FILE *
+ * process_files() does actual sedding, reading data lines from each input FILE*
  * (which could be stdin) and applying the sed command list (sed_cmd_head) to
  * each of the resulting lines.
  *
  */
 
 //usage:#define sed_trivial_usage
-//usage:       "[-efinr] SED_CMD [FILE]..."
+//usage:       "[-inr] [-f FILE]... [-e CMD]... [FILE]...\n"
+//usage:       "or: sed [-inr] CMD [FILE]..."
 //usage:#define sed_full_usage "\n\n"
-//usage:       "Options:"
-//usage:     "\n       -e CMD  Add CMD to sed commands to be executed"
+//usage:       "       -e CMD  Add CMD to sed commands to be executed"
 //usage:     "\n       -f FILE Add FILE contents to sed commands to be executed"
 //usage:     "\n       -i      Edit files in-place (else sends result to stdout)"
 //usage:     "\n       -n      Suppress automatic printing of pattern space"
 #include "libbb.h"
 #include "xregex.h"
 
+#if 0
+# define dbg(...) bb_error_msg(__VA_ARGS__)
+#else
+# define dbg(...) ((void)0)
+#endif
+
+
 enum {
        OPT_in_place = 1 << 0,
 };
@@ -90,6 +97,7 @@ typedef struct sed_cmd_s {
        regex_t *end_match;     /* sed -e '/match/,/end_match/cmd' */
        regex_t *sub_match;     /* For 's/sub_match/string/' */
        int beg_line;           /* 'sed 1p'   0 == apply commands to all lines */
+       int beg_line_orig;      /* copy of the above, needed for -i */
        int end_line;           /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */
 
        FILE *sw_file;          /* File (sw) command writes to, -1 for none. */
@@ -124,7 +132,7 @@ struct globals {
        regex_t *previous_regex_ptr;
 
        /* linked list of sed commands */
-       sed_cmd_t sed_cmd_head, *sed_cmd_tail;
+       sed_cmd_t *sed_cmd_head, **sed_cmd_tail;
 
        /* Linked list of append lines */
        llist_t *append_head;
@@ -149,7 +157,7 @@ struct BUG_G_too_big {
 #if ENABLE_FEATURE_CLEAN_UP
 static void sed_free_and_close_stuff(void)
 {
-       sed_cmd_t *sed_cmd = G.sed_cmd_head.next;
+       sed_cmd_t *sed_cmd = G.sed_cmd_head;
 
        llist_free(G.append_head, free);
 
@@ -215,12 +223,16 @@ static void parse_escapes(char *dest, const char *string, int len, char from, ch
 
 static char *copy_parsing_escapes(const char *string, int len)
 {
+       const char *s;
        char *dest = xmalloc(len + 1);
 
-       parse_escapes(dest, string, len, 'n', '\n');
+       /* sed recognizes \n */
        /* GNU sed also recognizes \t and \r */
-       parse_escapes(dest, dest, strlen(dest), 't', '\t');
-       parse_escapes(dest, dest, strlen(dest), 'r', '\r');
+       for (s = "\nn\tt\rr"; *s; s += 2) {
+               parse_escapes(dest, string, len, s[1], s[0]);
+               string = dest;
+               len = strlen(dest);
+       }
        return dest;
 }
 
@@ -270,7 +282,7 @@ static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str)
 static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
 {
        const char *cmdstr_ptr = cmdstr;
-       char delimiter;
+       unsigned char delimiter;
        int idx = 0;
 
        /* verify that the 's' or 'y' is followed by something.  That something
@@ -285,7 +297,7 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
 
        /* save the replacement string */
        cmdstr_ptr += idx + 1;
-       idx = index_of_next_unescaped_regexp_delim(-delimiter, cmdstr_ptr);
+       idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr);
        *replace = copy_parsing_escapes(cmdstr_ptr, idx);
 
        return ((cmdstr_ptr - cmdstr) + idx);
@@ -310,10 +322,11 @@ static int get_address(const char *my_str, int *linenum, regex_t ** regex)
                char *temp;
 
                delimiter = '/';
-               if (*my_str == '\\') delimiter = *++pos;
+               if (*my_str == '\\')
+                       delimiter = *++pos;
                next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
                temp = copy_parsing_escapes(pos, next);
-               *regex = xmalloc(sizeof(regex_t));
+               *regex = xzalloc(sizeof(regex_t));
                xregcomp(*regex, temp, G.regex_type|REG_NEWLINE);
                free(temp);
                /* Move position to next character after last delimiter */
@@ -422,8 +435,10 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr)
        /* compile the match string into a regex */
        if (*match != '\0') {
                /* If match is empty, we use last regex used at runtime */
-               sed_cmd->sub_match = xmalloc(sizeof(regex_t));
+               sed_cmd->sub_match = xzalloc(sizeof(regex_t));
+               dbg("xregcomp('%s',%x)", match, cflags);
                xregcomp(sed_cmd->sub_match, match, cflags);
+               dbg("regcomp ok");
        }
        free(match);
 
@@ -596,6 +611,7 @@ static void add_cmd(const char *cmdstr)
 
                /* first part (if present) is an address: either a '$', a number or a /regex/ */
                cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
+               sed_cmd->beg_line_orig = sed_cmd->beg_line;
 
                /* second part (if present) will begin with a comma */
                if (*cmdstr == ',') {
@@ -627,8 +643,8 @@ static void add_cmd(const char *cmdstr)
                cmdstr = parse_cmd_args(sed_cmd, cmdstr);
 
                /* Add the command to the command array */
-               G.sed_cmd_tail->next = sed_cmd;
-               G.sed_cmd_tail = G.sed_cmd_tail->next;
+               *G.sed_cmd_tail = sed_cmd;
+               G.sed_cmd_tail = &sed_cmd->next;
        }
 
        /* If we glued multiple lines together, free the memory. */
@@ -704,8 +720,12 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
        G.previous_regex_ptr = current_regex;
 
        /* Find the first match */
-       if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0))
+       dbg("matching '%s'", line);
+       if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0)) {
+               dbg("no match");
                return 0;
+       }
+       dbg("match");
 
        /* Initialize temporary output buffer. */
        G.pipeline.buf = xmalloc(PIPE_GROW);
@@ -717,12 +737,13 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
                int i;
 
                /* Work around bug in glibc regexec, demonstrated by:
-                  echo " a.b" | busybox sed 's [^ .]* x g'
-                  The match_count check is so not to break
-                  echo "hi" | busybox sed 's/^/!/g' */
+                * echo " a.b" | busybox sed 's [^ .]* x g'
+                * The match_count check is so not to break
+                * echo "hi" | busybox sed 's/^/!/g'
+                */
                if (!G.regmatch[0].rm_so && !G.regmatch[0].rm_eo && match_count) {
                        pipe_putc(*line++);
-                       continue;
+                       goto next;
                }
 
                match_count++;
@@ -734,7 +755,7 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
                ) {
                        for (i = 0; i < G.regmatch[0].rm_eo; i++)
                                pipe_putc(*line++);
-                       continue;
+                       goto next;
                }
 
                /* print everything before the match */
@@ -750,11 +771,14 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
                altered++;
 
                /* if we're not doing this globally, get out now */
-               if (sed_cmd->which_match)
+               if (sed_cmd->which_match != 0)
+                       break;
+ next:
+               if (*line == '\0')
                        break;
 
 //maybe (G.regmatch[0].rm_eo ? REG_NOTBOL : 0) instead of unconditional REG_NOTBOL?
-       } while (*line && regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH);
+       } while (regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH);
 
        /* Copy rest of string into output pipeline */
        while (1) {
@@ -774,7 +798,7 @@ static sed_cmd_t *branch_to(char *label)
 {
        sed_cmd_t *sed_cmd;
 
-       for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
+       for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
                if (sed_cmd->cmd == ':' && sed_cmd->string && !strcmp(sed_cmd->string, label)) {
                        return sed_cmd;
                }
@@ -950,24 +974,24 @@ static void process_files(void)
 
        /* For every line, go through all the commands */
  restart:
-       for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
+       for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
                int old_matched, matched;
 
                old_matched = sed_cmd->in_match;
 
                /* Determine if this command matches this line: */
 
-               //bb_error_msg("match1:%d", sed_cmd->in_match);
-               //bb_error_msg("match2:%d", (!sed_cmd->beg_line && !sed_cmd->end_line
-               //              && !sed_cmd->beg_match && !sed_cmd->end_match));
-               //bb_error_msg("match3:%d", (sed_cmd->beg_line > 0
-               //      && (sed_cmd->end_line || sed_cmd->end_match
-               //          ? (sed_cmd->beg_line <= linenum)
-               //          : (sed_cmd->beg_line == linenum)
-               //          )
-               //      )
-               //bb_error_msg("match4:%d", (beg_match(sed_cmd, pattern_space)));
-               //bb_error_msg("match5:%d", (sed_cmd->beg_line == -1 && next_line == NULL));
+               dbg("match1:%d", sed_cmd->in_match);
+               dbg("match2:%d", (!sed_cmd->beg_line && !sed_cmd->end_line
+                               && !sed_cmd->beg_match && !sed_cmd->end_match));
+               dbg("match3:%d", (sed_cmd->beg_line > 0
+                       && (sed_cmd->end_line || sed_cmd->end_match
+                           ? (sed_cmd->beg_line <= linenum)
+                           : (sed_cmd->beg_line == linenum)
+                           )
+                       ));
+               dbg("match4:%d", (beg_match(sed_cmd, pattern_space)));
+               dbg("match5:%d", (sed_cmd->beg_line == -1 && next_line == NULL));
 
                /* Are we continuing a previous multi-line match? */
                sed_cmd->in_match = sed_cmd->in_match
@@ -978,7 +1002,14 @@ static void process_files(void)
                        || (sed_cmd->beg_line > 0
                            && (sed_cmd->end_line || sed_cmd->end_match
                                  /* note: even if end is numeric and is < linenum too,
-                                  * GNU sed matches! We match too */
+                                  * GNU sed matches! We match too, therefore we don't
+                                  * check here that linenum <= end.
+                                  * Example:
+                                  * printf '1\n2\n3\n4\n' | sed -n '1{N;N;d};1p;2,3p;3p;4p'
+                                  * first three input lines are deleted;
+                                  * 4th line is matched and printed
+                                  * by "2,3" (!) and by "4" ranges
+                                  */
                                ? (sed_cmd->beg_line <= linenum)    /* N,end */
                                : (sed_cmd->beg_line == linenum)    /* N */
                                )
@@ -991,16 +1022,14 @@ static void process_files(void)
                /* Snapshot the value */
                matched = sed_cmd->in_match;
 
-               //bb_error_msg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d",
-               //sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum);
+               dbg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d",
+                       sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum);
 
                /* Is this line the end of the current match? */
 
                if (matched) {
                        /* once matched, "n,xxx" range is dead, disabling it */
-                       if (sed_cmd->beg_line > 0
-                        && !(option_mask32 & OPT_in_place) /* but not for -i */
-                       ) {
+                       if (sed_cmd->beg_line > 0) {
                                sed_cmd->beg_line = -2;
                        }
                        sed_cmd->in_match = !(
@@ -1014,7 +1043,8 @@ static void process_files(void)
                                /* or does this line matches our last address regex */
                                || (sed_cmd->end_match && old_matched
                                     && (regexec(sed_cmd->end_match,
-                                                pattern_space, 0, NULL, 0) == 0))
+                                                pattern_space, 0, NULL, 0) == 0)
+                               )
                        );
                }
 
@@ -1048,8 +1078,8 @@ static void process_files(void)
                }
 
                /* actual sedding */
-               //bb_error_msg("pattern_space:'%s' next_line:'%s' cmd:%c",
-               //pattern_space, next_line, sed_cmd->cmd);
+               dbg("pattern_space:'%s' next_line:'%s' cmd:%c",
+                               pattern_space, next_line, sed_cmd->cmd);
                switch (sed_cmd->cmd) {
 
                /* Print line number */
@@ -1096,6 +1126,7 @@ static void process_files(void)
                case 's':
                        if (!do_subst_command(sed_cmd, &pattern_space))
                                break;
+                       dbg("do_subst_command succeeeded:'%s'", pattern_space);
                        substituted |= 1;
 
                        /* handle p option */
@@ -1404,11 +1435,12 @@ int sed_main(int argc UNUSED_PARAM, char **argv)
                add_input_file(stdin);
        } else {
                int i;
-               FILE *file;
 
                for (i = 0; argv[i]; i++) {
                        struct stat statbuf;
                        int nonstdoutfd;
+                       FILE *file;
+                       sed_cmd_t *sed_cmd;
 
                        if (LONE_DASH(argv[i]) && !(opt & OPT_in_place)) {
                                add_input_file(stdin);
@@ -1420,11 +1452,13 @@ int sed_main(int argc UNUSED_PARAM, char **argv)
                                status = EXIT_FAILURE;
                                continue;
                        }
+                       add_input_file(file);
                        if (!(opt & OPT_in_place)) {
-                               add_input_file(file);
                                continue;
                        }
 
+                       /* -i: process each FILE separately: */
+
                        G.outname = xasprintf("%sXXXXXX", argv[i]);
                        nonstdoutfd = xmkstemp(G.outname);
                        G.nonstdout = xfdopen_for_write(nonstdoutfd);
@@ -1435,15 +1469,20 @@ int sed_main(int argc UNUSED_PARAM, char **argv)
                         * but GNU sed 4.2.1 does not preserve them either */
                        fchmod(nonstdoutfd, statbuf.st_mode);
                        fchown(nonstdoutfd, statbuf.st_uid, statbuf.st_gid);
-                       add_input_file(file);
+
                        process_files();
                        fclose(G.nonstdout);
-
                        G.nonstdout = stdout;
+
                        /* unlink(argv[i]); */
                        xrename(G.outname, argv[i]);
                        free(G.outname);
                        G.outname = NULL;
+
+                       /* Re-enable disabled range matches */
+                       for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
+                               sed_cmd->beg_line = sed_cmd->beg_line_orig;
+                       }
                }
                /* Here, to handle "sed 'cmds' nonexistent_file" case we did:
                 * if (G.current_input_file >= G.input_file_count)