Patch from Kent Robotti to being fdisk in sync with v2.12 final.
[oweals/busybox.git] / editors / sed.c
index a616c992c26b987604acc72002dd84b4e35090ca..6beba0661893edae23b60811193dda4cbee92164 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
  * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
  * Copyright (C) 2002  Matt Kraai
+ * Copyright (C) 2003 by Glenn McGrath <bug1@optushome.com.au>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -31,6 +32,9 @@
         - file commands: (r)ead
         - backreferences in substitution expressions (\1, \2...\9)
         - grouped commands: {cmd1;cmd2}
+        - transliteration (y/source-chars/dest-chars/)
+        - pattern space hold space storing / swapping (g, h, x)
+        - labels / branching (: label, b, t)
 
         (Note: Specifying an address (range) to match is *optional*; commands
         default to the whole pattern space if no specific address match was
 
        Unsupported features:
 
-        - transliteration (y/source-chars/dest-chars/) (use 'tr')
-        - no pattern space hold space storing / swapping (x, etc.)
-        - no labels / branching (: label, b, t, and friends)
+        - GNU extensions
         - and lots, lots more.
 
+       Bugs:
+       
+        - Cant subst globally using ^ or $ in regex, eg. "aah" | sed 's/^a/b/g'
+
        Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
 */
 
 #include <stdlib.h>
 #include "busybox.h"
 
-/* the spec says label must be at least 8 chars, behavious is unspecified if more than 8 chars */
-#define SED_LABEL_LENGTH       8
-
-/* externs */
-extern void xregcomp(regex_t * preg, const char *regex, int cflags);
-extern int optind;             /* in unistd.h */
-extern char *optarg;   /* ditto */
-
-/* options */
-static int be_quiet = 0;
-static const char bad_format_in_subst[] =
-       "bad format in substitution expression";
-
 typedef struct sed_cmd_s {
        /* Order by alignment requirements */
 
@@ -75,6 +68,13 @@ typedef struct sed_cmd_s {
        regex_t *beg_match;     /* sed -e '/match/cmd' */
        regex_t *end_match;     /* sed -e '/match/,/end_match/cmd' */
 
+       int beg_line;           /* 'sed 1p'   0 == no begining line, apply commands to all lines */
+       int end_line;           /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */
+
+       /* inversion flag */
+       int invert;                     /* the '!' after the address */
+//     int block_cmd;  /* This command is part of a group that has a command address */
+
        /* SUBSTITUTION COMMAND SPECIFIC FIELDS */
 
        /* sed -e 's/sub_match/replace/' */
@@ -87,9 +87,6 @@ typedef struct sed_cmd_s {
        /* FILE COMMAND (r) SPECIFIC FIELDS */
        char *filename;
 
-       /* address storage */
-       int beg_line;           /* 'sed 1p'   0 == no begining line, apply commands to all lines */
-       int end_line;           /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */
        /* SUBSTITUTION COMMAND SPECIFIC FIELDS */
 
        unsigned int num_backrefs:4;    /* how many back references (\1..\9) */
@@ -105,34 +102,41 @@ typedef struct sed_cmd_s {
        /* the command */
        char cmd;                       /* p,d,s (add more at your leisure :-) */
 
-       /* inversion flag */
-       int invert;                     /* the '!' after the address */
-
        /* Branch commands */
-       char label[SED_LABEL_LENGTH + 1];
+       char *label;
 
        /* next command in list (sequential list of specified commands) */
-       struct sed_cmd_s *linear;
+       struct sed_cmd_s *next;
 
 } sed_cmd_t;
 
+
+/* externs */
+extern void xregcomp(regex_t * preg, const char *regex, int cflags);
+extern int optind;             /* in unistd.h */
+extern char *optarg;   /* ditto */
+
 /* globals */
+/* options */
+static int be_quiet = 0;
+static const char bad_format_in_subst[] =
+       "bad format in substitution expression";
+
 /* linked list of sed commands */
 static sed_cmd_t sed_cmd_head;
 static sed_cmd_t *sed_cmd_tail = &sed_cmd_head;
-static sed_cmd_t *block_cmd;
 
-static int in_block = 0;
 const char *const semicolon_whitespace = "; \n\r\t\v\0";
 static regex_t *previous_regex_ptr = NULL;
 
+
 #ifdef CONFIG_FEATURE_CLEAN_UP
 static void destroy_cmd_strs(void)
 {
-       sed_cmd_t *sed_cmd = sed_cmd_head.linear;
+       sed_cmd_t *sed_cmd = sed_cmd_head.next;
 
        while (sed_cmd) {
-               sed_cmd_t *sed_cmd_next = sed_cmd->linear;
+               sed_cmd_t *sed_cmd_next = sed_cmd->next;
 
                if (sed_cmd->beg_match) {
                        regfree(sed_cmd->beg_match);
@@ -160,7 +164,7 @@ static void destroy_cmd_strs(void)
  * a backslash ('\').
  */
 static int index_of_next_unescaped_regexp_delim(const char delimiter,
-                                                                                               const char *str)
+       const char *str)
 {
        int bracket = -1;
        int escaped = 0;
@@ -169,8 +173,8 @@ static int index_of_next_unescaped_regexp_delim(const char delimiter,
 
        for (; (ch = str[idx]); idx++) {
                if (bracket != -1) {
-                       if (ch == ']' && !(bracket == idx - 1 ||
-                                                          (bracket == idx - 2 && str[idx - 1] == '^')))
+                       if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2
+                                               && str[idx - 1] == '^')))
                                bracket = -1;
                } else if (escaped)
                        escaped = 0;
@@ -387,7 +391,7 @@ static int parse_edit_cmd(sed_cmd_t * sed_cmd, const char *editstr)
        /* store the edit line text */
        sed_cmd->editline = xmalloc(strlen(&editstr[2]) + 2);
        for (i = 2, j = 0;
-                editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL; i++, j++) {
+               editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL; i++, j++) {
                if ((editstr[i] == '\\') && strchr("\n\r", editstr[i + 1]) != NULL) {
                        sed_cmd->editline[j] = '\n';
                        i++;
@@ -438,14 +442,13 @@ static int parse_file_cmd(sed_cmd_t * sed_cmd, const char *filecmdstr)
        filenamelen = strcspn(&filecmdstr[idx], semicolon_whitespace);
        sed_cmd->filename = xmalloc(filenamelen + 1);
        safe_strncpy(sed_cmd->filename, &filecmdstr[idx], filenamelen + 1);
-
        return idx + filenamelen;
 }
 
 /*
  *  Process the commands arguments
  */
-static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr)
+static char *parse_cmd_str(sed_cmd_t * sed_cmd, char *cmdstr)
 {
        /* handle (s)ubstitution command */
        if (sed_cmd->cmd == 's') {
@@ -469,11 +472,8 @@ static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr)
                int length;
 
                cmdstr += strspn(cmdstr, " ");
-               length = strcspn(cmdstr, "; ");
-               if (length > SED_LABEL_LENGTH) {
-                       length = SED_LABEL_LENGTH;
-               }
-               strncpy(sed_cmd->label, cmdstr, length);
+               length = strcspn(cmdstr, "; \n");
+               sed_cmd->label = strndup(cmdstr, length);
                cmdstr += length;
        }
        /* translation command */
@@ -483,7 +483,7 @@ static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr)
        /* if it wasnt a single-letter command that takes no arguments
         * then it must be an invalid command.
         */
-       else if (strchr("dghnNpPqx=", sed_cmd->cmd) == 0) {
+       else if (strchr("dgGhHnNpPqx={}", sed_cmd->cmd) == 0) {
                bb_error_msg_and_die("Unsupported command %c", sed_cmd->cmd);
        }
 
@@ -511,13 +511,6 @@ static char *add_cmd(sed_cmd_t * sed_cmd, char *cmdstr)
                return (strpbrk(cmdstr, "\n\r"));
        }
 
-       /* Test for end of block */
-       if (*cmdstr == '}') {
-               in_block = 0;
-               cmdstr++;
-               return (cmdstr);
-       }
-
        /* parse the command
         * format is: [addr][,addr]cmd
         *            |----||-----||-|
@@ -535,7 +528,7 @@ static char *add_cmd(sed_cmd_t * sed_cmd, char *cmdstr)
                idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
                if (idx == 0) {
                        bb_error_msg_and_die("get_address: no address found in string\n"
-                                                                "\t(you probably didn't check the string you passed me)");
+                               "\t(you probably didn't check the string you passed me)");
                }
                cmdstr += idx;
        }
@@ -570,33 +563,14 @@ static char *add_cmd(sed_cmd_t * sed_cmd, char *cmdstr)
        if (*cmdstr == '\0')
                bb_error_msg_and_die("missing command");
 
-       /* This is the start of a block of commands */
-       if (*cmdstr == '{') {
-               if (in_block != 0) {
-                       bb_error_msg_and_die("cant handle sub-blocks");
-               }
-               in_block = 1;
-               block_cmd = sed_cmd;
-
-               return (cmdstr + 1);
-       }
-
        sed_cmd->cmd = *cmdstr;
        cmdstr++;
 
-       if (in_block == 1) {
-               sed_cmd->beg_match = block_cmd->beg_match;
-               sed_cmd->end_match = block_cmd->end_match;
-               sed_cmd->beg_line = block_cmd->beg_line;
-               sed_cmd->end_line = block_cmd->end_line;
-               sed_cmd->invert = block_cmd->invert;
-       }
-
        cmdstr = parse_cmd_str(sed_cmd, cmdstr);
 
        /* Add the command to the command array */
-       sed_cmd_tail->linear = sed_cmd;
-       sed_cmd_tail = sed_cmd_tail->linear;
+       sed_cmd_tail->next = sed_cmd;
+       sed_cmd_tail = sed_cmd_tail->next;
 
        return (cmdstr);
 }
@@ -636,8 +610,8 @@ static void load_cmd_file(char *filename)
        while ((line = bb_get_line_from_file(cmdfile)) != NULL) {
                /* if a line ends with '\' it needs the next line appended to it */
                while (((e = last_char_is(line, '\n')) != NULL)
-                          && (e > line) && (e[-1] == '\\')
-                          && ((nextline = bb_get_line_from_file(cmdfile)) != NULL)) {
+                       && (e > line) && (e[-1] == '\\')
+                       && ((nextline = bb_get_line_from_file(cmdfile)) != NULL)) {
                        line = xrealloc(line, (e - line) + 1 + strlen(nextline) + 1);
                        strcat(line, nextline);
                        free(nextline);
@@ -685,9 +659,7 @@ void pipe_putc(struct pipeline *const pipeline, char c)
 #endif
 
 static void print_subst_w_backrefs(const char *line, const char *replace,
-                                                                  regmatch_t * regmatch,
-                                                                  struct pipeline *const pipeline,
-                                                                  int matches)
+       regmatch_t * regmatch, struct pipeline *const pipeline, int matches)
 {
        int i;
 
@@ -706,7 +678,7 @@ static void print_subst_w_backrefs(const char *line, const char *replace,
                        /* print out the text held in regmatch[backref] */
                        if (backref <= matches && regmatch[backref].rm_so != -1)
                                for (j = regmatch[backref].rm_so; j < regmatch[backref].rm_eo;
-                                        j++)
+                                       j++)
                                        pipeputc(line[j]);
                }
 
@@ -720,7 +692,7 @@ static void print_subst_w_backrefs(const char *line, const char *replace,
                 * fortunately, regmatch[0] contains the indicies to the whole matched
                 * expression (kinda seems like it was designed for just such a
                 * purpose...) */
-               else if (replace[i] == '&' && replace[i - 1] != '\\') {
+               else if (replace[i] == '&') {
                        int j;
 
                        for (j = regmatch[0].rm_so; j < regmatch[0].rm_eo; j++)
@@ -766,9 +738,9 @@ static int do_subst_command(sed_cmd_t * sed_cmd, char **line)
 
        /* and now, as long as we've got a line to try matching and if we can match
         * the search string, we make substitutions */
-       while ((*hackline || !altered) && (regexec(current_regex, hackline,
-                                                                                          sed_cmd->num_backrefs + 1,
-                                                                                          regmatch, 0) != REG_NOMATCH)) {
+       while ((*hackline || !altered)
+               && (regexec(current_regex, hackline, sed_cmd->num_backrefs + 1,
+                               regmatch, 0) != REG_NOMATCH)) {
                int i;
 
                /* print everything before the match */
@@ -776,8 +748,8 @@ static int do_subst_command(sed_cmd_t * sed_cmd, char **line)
                        pipeputc(hackline[i]);
 
                /* then print the substitution string */
-               print_subst_w_backrefs(hackline, sed_cmd->replace, regmatch,
-                                                          pipeline, sed_cmd->num_backrefs);
+               print_subst_w_backrefs(hackline, sed_cmd->replace, regmatch, pipeline,
+                       sed_cmd->num_backrefs);
 
                /* advance past the match */
                hackline += regmatch[0].rm_eo;
@@ -806,8 +778,8 @@ static sed_cmd_t *branch_to(const char *label)
 {
        sed_cmd_t *sed_cmd;
 
-       for (sed_cmd = sed_cmd_head.linear; sed_cmd; sed_cmd = sed_cmd->linear) {
-               if (strcmp(sed_cmd->label, label) == 0) {
+       for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
+               if ((sed_cmd->label) && (strcmp(sed_cmd->label, label) == 0)) {
                        break;
                }
        }
@@ -829,12 +801,14 @@ static void process_file(FILE * file)
        if (pattern_space == NULL) {
                return;
        }
-
        /* go through every line in the file */
        do {
                char *next_line;
                sed_cmd_t *sed_cmd;
                int substituted = 0;
+               /* This enables whole blocks of commands to be mask'ed out if the lead address doesnt match */
+               int block_mask = 1;
 
                /* Read one line in advance so we can act on the last line, the '$' address */
                next_line = bb_get_chomped_line_from_file(file);
@@ -844,34 +818,34 @@ static void process_file(FILE * file)
                force_print = 0;
 
                /* for every line, go through all the commands */
-               for (sed_cmd = sed_cmd_head.linear; sed_cmd;
-                        sed_cmd = sed_cmd->linear) {
+               for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
                        int deleted = 0;
 
                        /*
                         * entry point into sedding...
                         */
                        int matched = (
-                                                         /* no range necessary */
-                                                         (sed_cmd->beg_line == 0
-                                                          && sed_cmd->end_line == 0
-                                                          && sed_cmd->beg_match == NULL
-                                                          && sed_cmd->end_match == NULL) ||
-                                                         /* this line number is the first address we're looking for */
-                                                         (sed_cmd->beg_line
-                                                          && (sed_cmd->beg_line == linenum)) ||
-                                                         /* this line matches our first address regex */
-                                                         (sed_cmd->beg_match
-                                                          &&
-                                                          (regexec
-                                                               (sed_cmd->beg_match, pattern_space, 0, NULL,
-                                                                0) == 0)) ||
-                                                         /* we are currently within the beginning & ending address range */
-                                                         still_in_range || ((sed_cmd->beg_line == -1)
-                                                                                                && (next_line == NULL))
+                               /* no range necessary */
+                               (sed_cmd->beg_line == 0 && sed_cmd->end_line == 0
+                                       && sed_cmd->beg_match == NULL
+                                       && sed_cmd->end_match == NULL) ||
+                               /* this line number is the first address we're looking for */
+                               (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum)) ||
+                               /* this line matches our first address regex */
+                               (sed_cmd->beg_match
+                                       && (regexec(sed_cmd->beg_match, pattern_space, 0, NULL,
+                                                       0) == 0)) ||
+                               /* we are currently within the beginning & ending address range */
+                               still_in_range || ((sed_cmd->beg_line == -1)
+                                       && (next_line == NULL))
                                );
 
-                       if (sed_cmd->invert ^ matched) {
+                       if (sed_cmd->cmd == '{') {
+                               block_mask = block_mask & matched;
+                       }
+//                     matched &= block_mask;
+
+                       if (sed_cmd->invert ^ (matched & block_mask)) {
                                /* Update last used regex incase a blank substitute BRE is found */
                                if (sed_cmd->beg_match) {
                                        previous_regex_ptr = sed_cmd->beg_match;
@@ -928,7 +902,7 @@ static void process_file(FILE * file)
 
                                                pattern_space =
                                                        xrealloc(pattern_space,
-                                                                        strlen(pattern_space) + 2);
+                                                       strlen(pattern_space) + 2);
                                                tmp = strchr(pattern_space + offset, '\n');
                                                memmove(tmp + 1, tmp, strlen(tmp) + 1);
                                                tmp[0] = '\\';
@@ -952,9 +926,8 @@ static void process_file(FILE * file)
                                        }
 #endif
                                        altered |= substituted;
-                                       if (!be_quiet && altered && ((sed_cmd->linear == NULL)
-                                                                                                || (sed_cmd->linear->cmd !=
-                                                                                                        's'))) {
+                                       if (!be_quiet && altered && ((sed_cmd->next == NULL)
+                                                       || (sed_cmd->next->cmd != 's'))) {
                                                force_print = 1;
                                        }
 
@@ -980,10 +953,8 @@ static void process_file(FILE * file)
                                                /* multi-address case */
                                                /* - matching text */
                                                || (sed_cmd->end_match
-                                                       &&
-                                                       (regexec
-                                                        (sed_cmd->end_match, pattern_space, 0, NULL,
-                                                         0) == 0))
+                                                       && (regexec(sed_cmd->end_match, pattern_space, 0,
+                                                                       NULL, 0) == 0))
                                                /* - matching line numbers */
                                                || (sed_cmd->end_line > 0
                                                        && sed_cmd->end_line == linenum)) {
@@ -996,13 +967,22 @@ static void process_file(FILE * file)
                                case 'r':{
                                        FILE *outfile;
 
-                                       puts(pattern_space);
                                        outfile = fopen(sed_cmd->filename, "r");
-                                       if (outfile)
+                                       if (outfile) {
+                                               char *line;
+
+                                               while ((line =
+                                                               bb_get_chomped_line_from_file(outfile)) !=
+                                                       NULL) {
+                                                       pattern_space =
+                                                               xrealloc(pattern_space,
+                                                               strlen(line) + strlen(pattern_space) + 2);
+                                                       strcat(pattern_space, "\n");
+                                                       strcat(pattern_space, line);
+                                               }
                                                bb_xprint_and_close_file(outfile);
-                                       /* else if we couldn't open the output file,
-                                        * no biggie, just don't print anything */
-                                       altered++;
+                                       }
+
                                }
                                        break;
                                case 'q':       /* Branch to end of script and quit */
@@ -1021,8 +1001,7 @@ static void process_file(FILE * file)
                                        if (next_line) {
                                                pattern_space =
                                                        realloc(pattern_space,
-                                                                       strlen(pattern_space) +
-                                                                       strlen(next_line) + 2);
+                                                       strlen(pattern_space) + strlen(next_line) + 2);
                                                strcat(pattern_space, "\n");
                                                strcat(pattern_space, next_line);
                                                next_line = bb_get_chomped_line_from_file(file);
@@ -1055,17 +1034,36 @@ static void process_file(FILE * file)
                                        free(pattern_space);
                                        pattern_space = strdup(hold_space);
                                        break;
+                               case 'G': {     /* Append newline and hold space to pattern space */
+                                       int pattern_space_size = 0;
+                                       if (pattern_space) {
+                                               pattern_space_size = strlen(pattern_space);
+                                       }
+                                       pattern_space = xrealloc(pattern_space, pattern_space_size + strlen(hold_space) + 2);
+                                       strcat(pattern_space, "\n");
+                                       strcat(pattern_space, hold_space); 
+                                       break;
+                               }
                                case 'h':       /* Replace hold space with pattern space */
                                        free(hold_space);
                                        hold_space = strdup(pattern_space);
                                        break;
+                               case 'H': {     /* Append newline and pattern space to hold space */
+                                       int hold_space_size = 0;
+                                       if (hold_space) {
+                                               hold_space_size = strlen(hold_space);
+                                       }
+                                       hold_space = xrealloc(hold_space, hold_space_size + strlen(pattern_space) + 2);
+                                       strcat(hold_space, "\n");
+                                       strcat(hold_space, pattern_space); 
+                                       break;
+                               }
                                case 'x':{
                                        /* Swap hold and pattern space */
-                                       char *tmp;
-
-                                       tmp = pattern_space;
+                                       char *tmp = pattern_space;
                                        pattern_space = hold_space;
                                        hold_space = tmp;
+                                       break;
                                }
                                }
                        }
@@ -1075,43 +1073,31 @@ static void process_file(FILE * file)
                         */
                        if (matched) {
                                if (
-                                          /* this is a single-address command or... */
-                                          (sed_cmd->end_line == 0 && sed_cmd->end_match == NULL)
-                                          || (
-                                                         /* If only one address */
-                                                         /* we were in the middle of our address range (this
-                                                          * isn't the first time through) and.. */
-                                                         (still_in_range == 1) && (
-                                                                                                                  /* this line number is the last address we're looking for or... */
-                                                                                                                  (sed_cmd->
-                                                                                                                       end_line
-                                                                                                                       && (sed_cmd->
-                                                                                                                               end_line ==
-                                                                                                                               linenum))
-                                                                                                                  ||
-                                                                                                                  /* this line matches our last address regex */
-                                                                                                                  (sed_cmd->
-                                                                                                                       end_match
-                                                                                                                       &&
-                                                                                                                       (regexec
-                                                                                                                        (sed_cmd->
-                                                                                                                         end_match,
-                                                                                                                         pattern_space,
-                                                                                                                         0, NULL,
-                                                                                                                         0) == 0))
-                                                         )
-                                          )
-                                       ) {
+                                       /* this is a single-address command or... */
+                                       (sed_cmd->end_line == 0 && sed_cmd->end_match == NULL)
+                                       /* If only one address */
+                                       /* we were in the middle of our address range (this
+                                        * isn't the first time through) and.. */
+                                       || ((still_in_range == 1)
+                                               /* this line number is the last address we're looking for or... */
+                                               && ((sed_cmd->end_line > 0
+                                                               && (sed_cmd->end_line == linenum))
+                                                       /* this line matches our last address regex */
+                                                       || (sed_cmd->end_match
+                                                               && (regexec(sed_cmd->end_match, pattern_space,
+                                                                               0, NULL, 0) == 0))))) {
                                        /* we're out of our address range */
                                        still_in_range = 0;
-                               }
-
-                               /* didn't hit the exit? then we're still in the middle of an address range */
-                               else {
+                               } else {
+                                       /* didn't hit the exit? then we're still in the middle of an address range */
                                        still_in_range = 1;
                                }
                        }
 
+                       if (sed_cmd->cmd == '}') {
+                               block_mask = 1;
+                       }
+
                        if (deleted)
                                break;
                }
@@ -1160,7 +1146,7 @@ extern int sed_main(int argc, char **argv)
 
        /* if we didn't get a pattern from a -e and no command file was specified,
         * argv[optind] should be the pattern. no pattern, no worky */
-       if (sed_cmd_head.linear == NULL) {
+       if (sed_cmd_head.next == NULL) {
                if (argv[optind] == NULL)
                        bb_show_usage();
                else {