Bugfix for: echo '123456789' | sed 's/./|&/5'
[oweals/busybox.git] / editors / sed.c
index 39b28d0068891591201dfaa1bef4c6e56567eff9..9f3af33ed303e57e587ab4bc4b42c9b12e6371c8 100644 (file)
        Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
 */
 
-#include <stdio.h>
-#include <unistd.h>            /* for getopt() */
-#include <errno.h>
-#include <ctype.h>             /* for isspace() */
-#include <stdlib.h>
-#include <string.h>
 #include "busybox.h"
 #include "xregex.h"
 
@@ -94,8 +88,6 @@ typedef struct sed_cmd_s {
     struct sed_cmd_s *next;    /* Next command (linked list, NULL terminated) */
 } sed_cmd_t;
 
-static const char bad_format_in_subst[] =
-       "bad format in substitution expression";
 static const char *const semicolon_whitespace = "; \n\r\t\v";
 
 struct sed_globals
@@ -134,13 +126,13 @@ void sed_free_and_close_stuff(void)
 {
        sed_cmd_t *sed_cmd = bbg.sed_cmd_head.next;
 
-       llist_free_contents(bbg.append_head);
+       llist_free(bbg.append_head, free);
 
        while (sed_cmd) {
                sed_cmd_t *sed_cmd_next = sed_cmd->next;
 
                if(sed_cmd->file)
-                       bb_xprint_and_close_file(sed_cmd->file);
+                       xprint_and_close_file(sed_cmd->file);
 
                if (sed_cmd->beg_match) {
                        regfree(sed_cmd->beg_match);
@@ -175,7 +167,7 @@ static void cleanup_outname(void)
 
 /* strdup, replacing "\n" with '\n', and "\delimiter" with 'delimiter' */
 
-static void parse_escapes(char *dest, const char *string, int len, char from, char to)
+static void parse_escapes(char *dest, char *string, int len, char from, char to)
 {
        int i=0;
 
@@ -192,7 +184,7 @@ static void parse_escapes(char *dest, const char *string, int len, char from, ch
        *dest=0;
 }
 
-static char *copy_parsing_escapes(const char *string, int len)
+static char *copy_parsing_escapes(char *string, int len)
 {
        char *dest=xmalloc(len+1);
 
@@ -205,18 +197,22 @@ static char *copy_parsing_escapes(const char *string, int len)
  * index_of_next_unescaped_regexp_delim - walks left to right through a string
  * beginning at a specified index and returns the index of the next regular
  * expression delimiter (typically a forward * slash ('/')) not preceded by
- * a backslash ('\').
+ * a backslash ('\').  A negative delimiter disables square bracket checking.
  */
-static int index_of_next_unescaped_regexp_delim(const char delimiter,
-       const char *str)
+static int index_of_next_unescaped_regexp_delim(int delimiter, char *str)
 {
        int bracket = -1;
        int escaped = 0;
        int idx = 0;
        char ch;
 
+       if (delimiter < 0) {
+               bracket--;
+               delimiter *= -1;
+       }
+
        for (; (ch = str[idx]); idx++) {
-               if (bracket != -1) {
+               if (bracket >= 0) {
                        if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2
                                        && str[idx - 1] == '^')))
                                bracket = -1;
@@ -224,43 +220,38 @@ static int index_of_next_unescaped_regexp_delim(const char delimiter,
                        escaped = 0;
                else if (ch == '\\')
                        escaped = 1;
-               else if (ch == '[')
+               else if (bracket == -1 && ch == '[')
                        bracket = idx;
                else if (ch == delimiter)
                        return idx;
        }
 
        /* if we make it to here, we've hit the end of the string */
-       return -1;
+       bb_error_msg_and_die("unmatched '%c'",delimiter);
 }
 
 /*
  *  Returns the index of the third delimiter
  */
-static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
+static int parse_regex_delim(char *cmdstr, char **match, char **replace)
 {
-       const char *cmdstr_ptr = cmdstr;
+       char *cmdstr_ptr = cmdstr;
        char delimiter;
        int idx = 0;
 
        /* verify that the 's' or 'y' is followed by something.  That something
         * (typically a 'slash') is now our regexp delimiter... */
-       if (*cmdstr == '\0') bb_error_msg_and_die(bad_format_in_subst);
+       if (*cmdstr == '\0')
+               bb_error_msg_and_die("bad format in substitution expression");
        delimiter = *(cmdstr_ptr++);
 
        /* save the match string */
        idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
-       if (idx == -1) {
-               bb_error_msg_and_die(bad_format_in_subst);
-       }
        *match = copy_parsing_escapes(cmdstr_ptr, idx);
 
        /* save the replacement string */
        cmdstr_ptr += idx + 1;
-       idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
-       if (idx == -1) {
-               bb_error_msg_and_die(bad_format_in_subst);
-       }
+       idx = index_of_next_unescaped_regexp_delim(-delimiter, cmdstr_ptr);
        *replace = copy_parsing_escapes(cmdstr_ptr, idx);
 
        return ((cmdstr_ptr - cmdstr) + idx);
@@ -287,21 +278,18 @@ static int get_address(char *my_str, int *linenum, regex_t ** regex)
                if (*my_str == '\\') delimiter = *(++pos);
                else delimiter = '/';
                next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
-               if (next == -1)
-                       bb_error_msg_and_die("unterminated match expression");
-
-               temp=copy_parsing_escapes(pos,next);
+               temp = copy_parsing_escapes(pos,next);
                *regex = (regex_t *) xmalloc(sizeof(regex_t));
                xregcomp(*regex, temp, bbg.regex_type|REG_NEWLINE);
                free(temp);
                /* Move position to next character after last delimiter */
-               pos+=(next+1);
+               pos += (next+1);
        }
        return pos - my_str;
 }
 
 /* Grab a filename.  Whitespace at start is skipped, then goes to EOL. */
-static int parse_file_cmd(sed_cmd_t *sed_cmd, const char *filecmdstr, char **retval)
+static int parse_file_cmd(sed_cmd_t *sed_cmd, char *filecmdstr, char **retval)
 {
        int start = 0, idx, hack=0;
 
@@ -312,13 +300,13 @@ static int parse_file_cmd(sed_cmd_t *sed_cmd, const char *filecmdstr, char **ret
        /* If lines glued together, put backslash back. */
        if(filecmdstr[idx]=='\n') hack=1;
        if(idx==start) bb_error_msg_and_die("Empty filename");
-       *retval = bb_xstrndup(filecmdstr+start, idx-start+hack+1);
+       *retval = xstrndup(filecmdstr+start, idx-start+hack+1);
        if(hack) *(idx+*retval)='\\';
 
        return idx;
 }
 
-static int parse_subst_cmd(sed_cmd_t *const sed_cmd, char *substr)
+static int parse_subst_cmd(sed_cmd_t *sed_cmd, char *substr)
 {
        int cflags = bbg.regex_type;
        char *match;
@@ -418,7 +406,7 @@ static char *parse_cmd_args(sed_cmd_t *sed_cmd, char *cmdstr)
                        } else if(isspace(*cmdstr)) cmdstr++;
                        else break;
                }
-               sed_cmd->string = bb_xstrdup(cmdstr);
+               sed_cmd->string = xstrdup(cmdstr);
                parse_escapes(sed_cmd->string,sed_cmd->string,strlen(cmdstr),0,0);
                cmdstr += strlen(cmdstr);
        /* handle file cmds: (r)ead */
@@ -427,7 +415,7 @@ static char *parse_cmd_args(sed_cmd_t *sed_cmd, char *cmdstr)
                        bb_error_msg_and_die("Command only uses one address");
                cmdstr += parse_file_cmd(sed_cmd, cmdstr, &sed_cmd->string);
                if(sed_cmd->cmd=='w')
-                       sed_cmd->file=bb_xfopen(sed_cmd->string,"w");
+                       sed_cmd->file=xfopen(sed_cmd->string,"w");
        /* handle branch commands */
        } else if (strchr(":btT", sed_cmd->cmd)) {
                int length;
@@ -435,7 +423,7 @@ static char *parse_cmd_args(sed_cmd_t *sed_cmd, char *cmdstr)
                while(isspace(*cmdstr)) cmdstr++;
                length = strcspn(cmdstr, semicolon_whitespace);
                if (length) {
-                       sed_cmd->string = bb_xstrndup(cmdstr, length);
+                       sed_cmd->string = xstrndup(cmdstr, length);
                        cmdstr += length;
                }
        }
@@ -478,15 +466,15 @@ static void add_cmd(char *cmdstr)
 
        /* Append this line to any unfinished line from last time. */
        if (bbg.add_cmd_line) {
-               cmdstr = bb_xasprintf("%s\n%s", bbg.add_cmd_line, cmdstr);
+               cmdstr = xasprintf("%s\n%s", bbg.add_cmd_line, cmdstr);
                free(bbg.add_cmd_line);
                bbg.add_cmd_line = cmdstr;
-       } else bbg.add_cmd_line=NULL;
+       }
 
        /* If this line ends with backslash, request next line. */
        temp=strlen(cmdstr);
        if(temp && cmdstr[temp-1]=='\\') {
-               if (!bbg.add_cmd_line) bbg.add_cmd_line = bb_xstrdup(cmdstr);
+               if (!bbg.add_cmd_line) bbg.add_cmd_line = xstrdup(cmdstr);
                bbg.add_cmd_line[temp-1] = 0;
                return;
        }
@@ -524,7 +512,7 @@ static void add_cmd(char *cmdstr)
 
                        cmdstr++;
                        idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
-                       if (!idx) bb_error_msg_and_die("no address after comma\n");
+                       if (!idx) bb_error_msg_and_die("no address after comma");
                        cmdstr += idx;
                }
 
@@ -569,7 +557,7 @@ static void pipe_putc(char c)
        bbg.pipeline.buf[bbg.pipeline.idx++] = c;
 }
 
-static void do_subst_w_backrefs(const char *line, const char *replace)
+static void do_subst_w_backrefs(char *line, char *replace)
 {
        int i,j;
 
@@ -639,7 +627,7 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line)
                   end of match and continue */
                if(sed_cmd->which_match && sed_cmd->which_match!=match_count) {
                        for(i=0;i<bbg.regmatch[0].rm_eo;i++)
-                               pipe_putc(oldline[i]);
+                               pipe_putc(*(oldline++));
                        continue;
                }
 
@@ -669,7 +657,7 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line)
 }
 
 /* Set command pointer to point to this label.  (Does not handle null label.) */
-static sed_cmd_t *branch_to(const char *label)
+static sed_cmd_t *branch_to(char *label)
 {
        sed_cmd_t *sed_cmd;
 
@@ -683,7 +671,7 @@ static sed_cmd_t *branch_to(const char *label)
 
 static void append(char *s)
 {
-       bbg.append_head = llist_add_to_end(bbg.append_head, bb_xstrdup(s));
+       llist_add_to_end(&bbg.append_head, xstrdup(s));
 }
 
 static void flush_append(void)
@@ -737,8 +725,8 @@ static int puts_maybe_newline(char *s, FILE *file, int missing_newline, int no_n
        if(!no_newline) fputc('\n',file);
 
     if(ferror(file)) {
-               fprintf(stderr,"Write failed.\n");
-               exit(4);  /* It's what gnu sed exits with... */
+               bb_default_error_retval = 4;  /* It's what gnu sed exits with... */
+               bb_error_msg_and_die(bb_msg_write_error);
        }
 
        return no_newline;
@@ -864,7 +852,7 @@ restart:
                                                char *tmp = strchr(pattern_space,'\n');
 
                                                if(tmp) {
-                                                       tmp=bb_xstrdup(tmp+1);
+                                                       tmp=xstrdup(tmp+1);
                                                        free(pattern_space);
                                                        pattern_space=tmp;
                                                        goto restart;
@@ -919,7 +907,7 @@ restart:
                                                        while ((line = bb_get_chomped_line_from_file(rfile))
                                                                        != NULL)
                                                                append(line);
-                                                       bb_xprint_and_close_file(rfile);
+                                                       xprint_and_close_file(rfile);
                                                }
 
                                                break;
@@ -1008,7 +996,7 @@ restart:
                                        }
                                        case 'g':       /* Replace pattern space with hold space */
                                                free(pattern_space);
-                                               pattern_space = bb_xstrdup(bbg.hold_space ? bbg.hold_space : "");
+                                               pattern_space = xstrdup(bbg.hold_space ? bbg.hold_space : "");
                                                break;
                                        case 'G':       /* Append newline and hold space to pattern space */
                                        {
@@ -1031,7 +1019,7 @@ restart:
                                        }
                                        case 'h':       /* Replace hold space with pattern space */
                                                free(bbg.hold_space);
-                                               bbg.hold_space = bb_xstrdup(pattern_space);
+                                               bbg.hold_space = xstrdup(pattern_space);
                                                break;
                                        case 'H':       /* Append newline and pattern space to hold space */
                                        {
@@ -1054,7 +1042,7 @@ restart:
                                        case 'x': /* Exchange hold and pattern space */
                                        {
                                                char *tmp = pattern_space;
-                                               pattern_space = bbg.hold_space;
+                                               pattern_space = bbg.hold_space ? : xzalloc(1);
                                                no_newline=0;
                                                bbg.hold_space = tmp;
                                                break;
@@ -1084,7 +1072,7 @@ discard_line:
 static void add_cmd_block(char *cmdstr)
 {
        int go=1;
-       char *temp=bb_xstrdup(cmdstr),*temp2=temp;
+       char *temp=xstrdup(cmdstr),*temp2=temp;
 
        while(go) {
                int len=strcspn(temp2,"\n");
@@ -1133,14 +1121,14 @@ int sed_main(int argc, char **argv)
                        FILE *cmdfile;
                        char *line;
 
-                       cmdfile = bb_xfopen(optarg, "r");
+                       cmdfile = xfopen(optarg, "r");
 
                        while ((line = bb_get_chomped_line_from_file(cmdfile)) != NULL) {
                                add_cmd(line);
                                getpat=0;
                                free(line);
                        }
-                       bb_xprint_and_close_file(cmdfile);
+                       xprint_and_close_file(cmdfile);
 
                        break;
                }
@@ -1166,7 +1154,7 @@ int sed_main(int argc, char **argv)
         * files were specified or '-' was specified, take input from stdin.
         * Otherwise, we process all the files specified. */
        if (argv[optind] == NULL) {
-               if(bbg.in_place) bb_error_msg_and_die("Filename required for -i");
+               if(bbg.in_place) bb_error_msg_and_die(bb_msg_requires_arg, "-i");
                add_input_file(stdin);
                process_files();
        } else {
@@ -1184,7 +1172,7 @@ int sed_main(int argc, char **argv)
                                                struct stat statbuf;
                                                int nonstdoutfd;
 
-                                               bbg.outname=bb_xstrndup(argv[i],strlen(argv[i])+6);
+                                               bbg.outname=xstrndup(argv[i],strlen(argv[i])+6);
                                                strcat(bbg.outname,"XXXXXX");
                                                if(-1==(nonstdoutfd=mkstemp(bbg.outname)))
                                                        bb_error_msg_and_die("no temp file");