X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=sed.c;h=24b62e5fab6ba669c26c9f21faa018b5efe22c04;hb=044228d5ecb9b79397f9fc915d046cf4538281e2;hp=0f0f4a28d19829907293ab7335e1fe544aee2ad8;hpb=b50da53e0c6e07bbf69ac7ec1730087edf6815a1;p=oweals%2Fbusybox.git diff --git a/sed.c b/sed.c index 0f0f4a28d..24b62e5fa 100644 --- a/sed.c +++ b/sed.c @@ -27,6 +27,7 @@ - address matching: num|/matchstr/[,num|/matchstr/|$]command - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags) - edit commands: (a)ppend, (i)nsert, (c)hange + - file commands: (r)ead - backreferences in substitution expressions (\1, \2...\9) (Note: Specifying an address (range) to match is *optional*; commands @@ -90,6 +91,11 @@ struct sed_cmd { /* EDIT COMMAND (a,i,c) SPEICIFIC FIELDS */ char *editline; + + + /* FILE COMMAND (r) SPEICIFIC FIELDS */ + + char *filename; }; /* globals */ @@ -150,20 +156,23 @@ static int index_of_next_unescaped_regexp_delim(struct sed_cmd *sed_cmd, const c /* * returns the index in the string just past where the address ends. */ -static int get_address(struct sed_cmd *sed_cmd, const char *str, int *line, regex_t **regex) +static int get_address(struct sed_cmd *sed_cmd, const char *str, int *linenum, regex_t **regex) { char *my_str = strdup(str); int idx = 0; + char olddelimiter; + olddelimiter = sed_cmd->delimiter; + sed_cmd->delimiter = '/'; if (isdigit(my_str[idx])) { do { idx++; } while (isdigit(my_str[idx])); my_str[idx] = 0; - *line = atoi(my_str); + *linenum = atoi(my_str); } else if (my_str[idx] == '$') { - *line = -1; + *linenum = -1; idx++; } else if (my_str[idx] == '/') { @@ -172,7 +181,7 @@ static int get_address(struct sed_cmd *sed_cmd, const char *str, int *line, rege error_msg_and_die("unterminated match expression"); my_str[idx] = '\0'; *regex = (regex_t *)xmalloc(sizeof(regex_t)); - xregcomp(*regex, my_str+1, 0); + xregcomp(*regex, my_str+1, REG_NEWLINE); idx++; /* so it points to the next character after the last '/' */ } else { @@ -182,18 +191,10 @@ static int get_address(struct sed_cmd *sed_cmd, const char *str, int *line, rege } free(my_str); + sed_cmd->delimiter = olddelimiter; return idx; } -static char *strdup_substr(const char *str, int start, int end) -{ - int size = end - start + 1; - char *newstr = xmalloc(size); - memcpy(newstr, str+start, size-1); - newstr[size-1] = '\0'; - return newstr; -} - static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr) { int oldidx, cflags = REG_NEWLINE; @@ -222,7 +223,7 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr) idx = index_of_next_unescaped_regexp_delim(sed_cmd, substr, ++idx); if (idx == -1) error_msg_and_die("bad format in substitution expression"); - match = strdup_substr(substr, oldidx, idx); + match = xstrndup(substr + oldidx, idx - oldidx); /* determine the number of back references in the match string */ /* Note: we compute this here rather than in the do_subst_command() @@ -241,7 +242,7 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr) idx = index_of_next_unescaped_regexp_delim(sed_cmd, substr, ++idx); if (idx == -1) error_msg_and_die("bad format in substitution expression"); - sed_cmd->replace = strdup_substr(substr, oldidx, idx); + sed_cmd->replace = xstrndup(substr + oldidx, idx - oldidx); /* process the flags */ while (substr[++idx]) { @@ -313,7 +314,7 @@ static int parse_edit_cmd(struct sed_cmd *sed_cmd, const char *editstr) /* now we need to go through * and: s/\\[\r\n]$/\n/g on the edit line */ while (ptr[idx]) { - while (ptr[idx] != '\\' && (ptr[idx+1] != '\n' || ptr[idx+1] != '\r')) { + while (ptr[idx] != '\\' || (ptr[idx+1] != '\n' && ptr[idx+1] != '\r')) { idx++; if (!ptr[idx]) { goto out; @@ -329,22 +330,62 @@ static int parse_edit_cmd(struct sed_cmd *sed_cmd, const char *editstr) } out: - ptr[idx] = '\n'; - ptr[idx+1] = 0; - /* this accounts for discrepancies between the modified string and the * original string passed in to this function */ idx += slashes_eaten; - /* this accounts for the fact that A) we started at index 3, not at index - * 0 and B) that we added an extra '\n' at the end (if you think the next - * line should read 'idx += 4' remember, arrays are zero-based) */ + /* figure out if we need to add a newline */ + if (ptr[idx-1] != '\n') { + ptr[idx] = '\n'; + idx++; + } - idx += 3; + /* terminate string */ + ptr[idx]= 0; + /* adjust for opening 2 chars [aic]\ */ + idx += 2; return idx; } + +static int parse_file_cmd(struct sed_cmd *sed_cmd, const char *filecmdstr) +{ + int idx = 0; + int filenamelen = 0; + + /* + * the string that gets passed to this function should look like this: + * '[ ]filename' + * | | + * | a filename + * | + * optional whitespace + + * re: the file to be read, the GNU manual says the following: "Note that + * if filename cannot be read, it is treated as if it were an empty file, + * without any error indication." Thus, all of the following commands are + * perfectly leagal: + * + * sed -e '1r noexist' + * sed -e '1r ;' + * sed -e '1r' + */ + + /* the file command may be followed by whitespace; move past it. */ + while (isspace(filecmdstr[++idx])) + { ; } + + /* the first non-whitespace we get is a filename. the filename ends when we + * hit a normal sed command terminator or end of string */ + filenamelen = strcspn(&filecmdstr[idx], "; \n\r\t\v\0"); + sed_cmd->filename = xmalloc(filenamelen + 1); + safe_strncpy(sed_cmd->filename, &filecmdstr[idx], filenamelen + 1); + + return idx + filenamelen; +} + + static char *parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr) { int idx = 0; @@ -355,7 +396,6 @@ static char *parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr) * part1 part2 part3 */ - /* first part (if present) is an address: either a number or a /regex/ */ if (isdigit(cmdstr[idx]) || cmdstr[idx] == '/') idx = get_address(sed_cmd, cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); @@ -367,24 +407,32 @@ static char *parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr) /* last part (mandatory) will be a command */ if (cmdstr[idx] == '\0') error_msg_and_die("missing command"); - if (!strchr("pdsaic", cmdstr[idx])) /* <-- XXX add new commands here */ - error_msg_and_die("invalid command"); sed_cmd->cmd = cmdstr[idx]; - /* special-case handling for (s)ubstitution */ - if (sed_cmd->cmd == 's') { + /* if it was a single-letter command that takes no arguments (such as 'p' + * or 'd') all we need to do is increment the index past that command */ + if (strchr("pd", cmdstr[idx])) { + idx++; + } + /* handle (s)ubstitution command */ + else if (sed_cmd->cmd == 's') { idx += parse_subst_cmd(sed_cmd, &cmdstr[idx]); } - /* special-case handling for (a)ppend, (i)nsert, and (c)hange */ - else if (strchr("aic", cmdstr[idx])) { - if (sed_cmd->end_line || sed_cmd->end_match) + /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ + else if (strchr("aic", sed_cmd->cmd)) { + if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') error_msg_and_die("only a beginning address can be specified for edit commands"); idx += parse_edit_cmd(sed_cmd, &cmdstr[idx]); } - /* if it was a single-letter command (such as 'p' or 'd') we need to - * increment the index past that command */ - else - idx++; + /* handle file cmds: (r)ead */ + else if (sed_cmd->cmd == 'r') { + if (sed_cmd->end_line || sed_cmd->end_match) + error_msg_and_die("Command only uses one address"); + idx += parse_file_cmd(sed_cmd, &cmdstr[idx]); + } + else { + error_msg_and_die("invalid command"); + } /* give back whatever's left over */ return (char *)&cmdstr[idx]; @@ -519,9 +567,7 @@ static int do_subst_command(const struct sed_cmd *sed_cmd, const char *line) break; } - /* if there's anything left of the line, print it */ - if (*hackline) - fputs(hackline, stdout); + puts(hackline); /* cleanup */ free(regmatch); @@ -529,131 +575,163 @@ static int do_subst_command(const struct sed_cmd *sed_cmd, const char *line) return altered; } -static int do_sed_command(const struct sed_cmd *sed_cmd, const char *line) -{ - int altered = 0; - - switch (sed_cmd->cmd) { - - case 'p': - fputs(line, stdout); - break; - - case 'd': - altered++; - break; - - case 's': - - /* - * Some special cases for 's' printing to make it compliant with - * GNU sed printing behavior (aka "The -n | s///p Matrix"): - * - * -n ONLY = never print anything regardless of any successful - * substitution - * - * s///p ONLY = always print successful substitutions, even if - * the line is going to be printed anyway (line will be printed - * twice). - * - * -n AND s///p = print ONLY a successful substitution ONE TIME; - * no other lines are printed - this is the reason why the 'p' - * flag exists in the first place. - */ - - /* if the user specified that they didn't want anything printed (i.e. a -n - * flag and no 'p' flag after the s///), then there's really no point doing - * anything here. */ - if (be_quiet && !sed_cmd->sub_p) - break; - - /* we print the line once, unless we were told to be quiet */ - if (!be_quiet) - altered = do_subst_command(sed_cmd, line); - - /* we also print the line if we were given the 'p' flag - * (this is quite possibly the second printing) */ - if (sed_cmd->sub_p) - altered = do_subst_command(sed_cmd, line); - - break; - - case 'a': - fputs(line, stdout); - fputs(sed_cmd->editline, stdout); - altered++; - break; - - case 'i': - fputs(sed_cmd->editline, stdout); - break; - - case 'c': - fputs(sed_cmd->editline, stdout); - altered++; - break; - } - - return altered; -} static void process_file(FILE *file) { char *line = NULL; static int linenum = 0; /* GNU sed does not restart counting lines at EOF */ unsigned int still_in_range = 0; - int line_altered; + int altered; int i; /* go through every line in the file */ while ((line = get_line_from_file(file)) != NULL) { + chomp(line); linenum++; - line_altered = 0; + altered = 0; /* for every line, go through all the commands */ for (i = 0; i < ncmds; i++) { - /* are we acting on a range of matched lines? */ - if (sed_cmds[i].beg_match && sed_cmds[i].end_match) { - if (still_in_range || regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0) { - line_altered += do_sed_command(&sed_cmds[i], line); - still_in_range = 1; - if (regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0) - still_in_range = 0; - } - } - /* are we trying to match a single line? */ - else if (sed_cmds[i].beg_match) { - if (regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0) - line_altered += do_sed_command(&sed_cmds[i], line); - } + /* + * entry point into sedding... + */ + if ( + /* no range necessary */ + (sed_cmds[i].beg_line == 0 && sed_cmds[i].end_line == 0 && + sed_cmds[i].beg_match == NULL && + sed_cmds[i].end_match == NULL) || + /* this line number is the first address we're looking for */ + (sed_cmds[i].beg_line && (sed_cmds[i].beg_line == linenum)) || + /* this line matches our first address regex */ + (sed_cmds[i].beg_match && (regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0)) || + /* we are currently within the beginning & ending address range */ + still_in_range + ) { + + /* + * actual sedding + */ + switch (sed_cmds[i].cmd) { + + case 'p': + puts(line); + break; + + case 'd': + altered++; + break; + + case 's': + + /* + * Some special cases for 's' printing to make it compliant with + * GNU sed printing behavior (aka "The -n | s///p Matrix"): + * + * -n ONLY = never print anything regardless of any successful + * substitution + * + * s///p ONLY = always print successful substitutions, even if + * the line is going to be printed anyway (line will be printed + * twice). + * + * -n AND s///p = print ONLY a successful substitution ONE TIME; + * no other lines are printed - this is the reason why the 'p' + * flag exists in the first place. + */ + + /* if the user specified that they didn't want anything printed (i.e., a -n + * flag and no 'p' flag after the s///), then there's really no point doing + * anything here. */ + if (be_quiet && !sed_cmds[i].sub_p) + break; + + /* we print the line once, unless we were told to be quiet */ + if (!be_quiet) + altered = do_subst_command(&sed_cmds[i], line); + + /* we also print the line if we were given the 'p' flag + * (this is quite possibly the second printing) */ + if (sed_cmds[i].sub_p) + altered = do_subst_command(&sed_cmds[i], line); + + break; + + case 'a': + puts(line); + fputs(sed_cmds[i].editline, stdout); + altered++; + break; + + case 'i': + fputs(sed_cmds[i].editline, stdout); + break; + + case 'c': + /* single-address case */ + if (sed_cmds[i].end_match == NULL && sed_cmds[i].end_line == 0) { + fputs(sed_cmds[i].editline, stdout); + } + /* multi-address case */ + else { + /* matching text */ + if (sed_cmds[i].end_match && (regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0)) + fputs(sed_cmds[i].editline, stdout); + /* matching line numbers */ + if (sed_cmds[i].end_line > 0 && sed_cmds[i].end_line == linenum) + fputs(sed_cmds[i].editline, stdout); + } + altered++; + + break; + + case 'r': { + FILE *outfile; + puts(line); + outfile = fopen(sed_cmds[i].filename, "r"); + if (outfile) + print_file(outfile); + /* else if we couldn't open the output file, + * no biggie, just don't print anything */ + altered++; + } + break; + } - /* are we acting on a range of line numbers? */ - else if (sed_cmds[i].beg_line > 0 && sed_cmds[i].end_line != 0) { - if (linenum >= sed_cmds[i].beg_line && - (sed_cmds[i].end_line == -1 || linenum <= sed_cmds[i].end_line)) - line_altered += do_sed_command(&sed_cmds[i], line); - } + /* + * exit point from sedding... + */ + if ( + /* this is a single-address command or... */ + (sed_cmds[i].end_line == 0 && sed_cmds[i].end_match == NULL) || ( + /* we were in the middle of our address range (this + * isn't the first time through) and.. */ + (still_in_range == 1) && ( + /* this line number is the last address we're looking for or... */ + (sed_cmds[i].end_line && (sed_cmds[i].end_line == linenum)) || + /* this line matches our last address regex */ + (sed_cmds[i].end_match && (regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0)) + ) + ) + ) { + /* we're out of our address range */ + still_in_range = 0; + } - /* are we acting on a specified line number */ - else if (sed_cmds[i].beg_line > 0) { - if (linenum == sed_cmds[i].beg_line) - line_altered += do_sed_command(&sed_cmds[i], line); + /* didn't hit the exit? then we're still in the middle of an address range */ + else { + still_in_range = 1; + } } - - /* not acting on matches or line numbers. act on every line */ - else - line_altered += do_sed_command(&sed_cmds[i], line); - } /* we will print the line unless we were told to be quiet or if the * line was altered (via a 'd'elete or 's'ubstitution), in which case * the altered line was already printed */ - if (!be_quiet && !line_altered) - fputs(line, stdout); + if (!be_quiet && !altered) + puts(line); free(line); }