X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=editors%2Fsed.c;h=3ee8edc433add4e7af452753d93ec0db41f071ed;hb=aa4e5092f58f5a11018e569aee9cf037daf8c5d6;hp=b74941944e7e904d36c6c527d6213e3dbe91be72;hpb=90a9904e9ef5f2fb70e1952cb1a68bd9213bcde3;p=oweals%2Fbusybox.git diff --git a/editors/sed.c b/editors/sed.c index b74941944..3ee8edc43 100644 --- a/editors/sed.c +++ b/editors/sed.c @@ -10,56 +10,82 @@ * * MAINTAINER: Rob Landley * - * Licensed under GPL version 2, see file LICENSE in this tarball for details. + * Licensed under GPLv2, see file LICENSE in this source tree. */ /* Code overview. + * + * Files are laid out to avoid unnecessary function declarations. So for + * example, every function add_cmd calls occurs before add_cmd in this file. + * + * add_cmd() is called on each line of sed command text (from a file or from + * the command line). It calls get_address() and parse_cmd_args(). The + * resulting sed_cmd_t structures are appended to a linked list + * (G.sed_cmd_head/G.sed_cmd_tail). + * + * add_input_file() adds a FILE* to the list of input files. We need to + * know all input sources ahead of time to find the last line for the $ match. + * + * process_files() does actual sedding, reading data lines from each input FILE* + * (which could be stdin) and applying the sed command list (sed_cmd_head) to + * each of the resulting lines. + * + * sed_main() is where external code calls into this, with a command line. + */ - Files are laid out to avoid unnecessary function declarations. So for - example, every function add_cmd calls occurs before add_cmd in this file. - - add_cmd() is called on each line of sed command text (from a file or from - the command line). It calls get_address() and parse_cmd_args(). The - resulting sed_cmd_t structures are appended to a linked list - (G.sed_cmd_head/G.sed_cmd_tail). - - add_input_file() adds a FILE* to the list of input files. We need to - know all input sources ahead of time to find the last line for the $ match. - - process_files() does actual sedding, reading data lines from each input FILE * - (which could be stdin) and applying the sed command list (sed_cmd_head) to - each of the resulting lines. - - sed_main() is where external code calls into this, with a command line. -*/ - - -/* - Supported features and commands in this version of sed: +/* Supported features and commands in this version of sed: + * + * - comments ('#') + * - address matching: num|/matchstr/[,num|/matchstr/|$]command + * - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags) + * - edit commands: (a)ppend, (i)nsert, (c)hange + * - file commands: (r)ead + * - backreferences in substitution expressions (\0, \1, \2...\9) + * - grouped commands: {cmd1;cmd2} + * - transliteration (y/source-chars/dest-chars/) + * - pattern space hold space storing / swapping (g, h, x) + * - labels / branching (: label, b, t, T) + * + * (Note: Specifying an address (range) to match is *optional*; commands + * default to the whole pattern space if no specific address match was + * requested.) + * + * Todo: + * - Create a wrapper around regex to make libc's regex conform with sed + * + * Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html + */ - - comments ('#') - - address matching: num|/matchstr/[,num|/matchstr/|$]command - - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags) - - edit commands: (a)ppend, (i)nsert, (c)hange - - file commands: (r)ead - - backreferences in substitution expressions (\0, \1, \2...\9) - - grouped commands: {cmd1;cmd2} - - transliteration (y/source-chars/dest-chars/) - - pattern space hold space storing / swapping (g, h, x) - - labels / branching (: label, b, t, T) +//usage:#define sed_trivial_usage +//usage: "[-inr] [-f FILE]... [-e CMD]... [FILE]...\n" +//usage: "or: sed [-inr] CMD [FILE]..." +//usage:#define sed_full_usage "\n\n" +//usage: " -e CMD Add CMD to sed commands to be executed" +//usage: "\n -f FILE Add FILE contents to sed commands to be executed" +//usage: "\n -i Edit files in-place (else sends result to stdout)" +//usage: "\n -n Suppress automatic printing of pattern space" +//usage: "\n -r Use extended regex syntax" +//usage: "\n" +//usage: "\nIf no -e or -f, the first non-option argument is the sed command string." +//usage: "\nRemaining arguments are input files (stdin if none)." +//usage: +//usage:#define sed_example_usage +//usage: "$ echo \"foo\" | sed -e 's/f[a-zA-Z]o/bar/g'\n" +//usage: "bar\n" - (Note: Specifying an address (range) to match is *optional*; commands - default to the whole pattern space if no specific address match was - requested.) +#include "libbb.h" +#include "xregex.h" - Todo: - - Create a wrapper around regex to make libc's regex conform with sed +#if 0 +# define dbg(...) bb_error_msg(__VA_ARGS__) +#else +# define dbg(...) ((void)0) +#endif - Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html -*/ -#include "libbb.h" -#include "xregex.h" +enum { + OPT_in_place = 1 << 0, +}; /* Each sed command turns into one of these structures. */ typedef struct sed_cmd_s { @@ -71,6 +97,7 @@ typedef struct sed_cmd_s { regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */ regex_t *sub_match; /* For 's/sub_match/string/' */ int beg_line; /* 'sed 1p' 0 == apply commands to all lines */ + int beg_line_orig; /* copy of the above, needed for -i */ int end_line; /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */ FILE *sw_file; /* File (sw) command writes to, -1 for none. */ @@ -105,7 +132,7 @@ struct globals { regex_t *previous_regex_ptr; /* linked list of sed commands */ - sed_cmd_t sed_cmd_head, *sed_cmd_tail; + sed_cmd_t *sed_cmd_head, **sed_cmd_tail; /* Linked list of append lines */ llist_t *append_head; @@ -113,16 +140,16 @@ struct globals { char *add_cmd_line; struct pipeline { - char *buf; /* Space to hold string */ - int idx; /* Space used */ - int len; /* Space allocated */ + char *buf; /* Space to hold string */ + int idx; /* Space used */ + int len; /* Space allocated */ } pipeline; -}; +} FIX_ALIASING; #define G (*(struct globals*)&bb_common_bufsiz1) -void BUG_sed_globals_too_big(void); +struct BUG_G_too_big { + char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1]; +}; #define INIT_G() do { \ - if (sizeof(struct globals) > COMMON_BUFSIZE) \ - BUG_sed_globals_too_big(); \ G.sed_cmd_tail = &G.sed_cmd_head; \ } while (0) @@ -130,7 +157,7 @@ void BUG_sed_globals_too_big(void); #if ENABLE_FEATURE_CLEAN_UP static void sed_free_and_close_stuff(void) { - sed_cmd_t *sed_cmd = G.sed_cmd_head.next; + sed_cmd_t *sed_cmd = G.sed_cmd_head; llist_free(G.append_head, free); @@ -196,11 +223,16 @@ static void parse_escapes(char *dest, const char *string, int len, char from, ch static char *copy_parsing_escapes(const char *string, int len) { + const char *s; char *dest = xmalloc(len + 1); - parse_escapes(dest, string, len, 'n', '\n'); - /* GNU sed also recognizes \t */ - parse_escapes(dest, dest, strlen(dest), 't', '\t'); + /* sed recognizes \n */ + /* GNU sed also recognizes \t and \r */ + for (s = "\nn\tt\rr"; *s; s += 2) { + parse_escapes(dest, string, len, s[1], s[0]); + string = dest; + len = strlen(dest); + } return dest; } @@ -223,11 +255,13 @@ static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str) delimiter = -delimiter; } - for (; (ch = str[idx]); idx++) { + for (; (ch = str[idx]) != '\0'; idx++) { if (bracket >= 0) { - if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2 - && str[idx - 1] == '^'))) + if (ch == ']' + && !(bracket == idx - 1 || (bracket == idx - 2 && str[idx - 1] == '^')) + ) { bracket = -1; + } } else if (escaped) escaped = 0; else if (ch == '\\') @@ -248,7 +282,7 @@ static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str) static int parse_regex_delim(const char *cmdstr, char **match, char **replace) { const char *cmdstr_ptr = cmdstr; - char delimiter; + unsigned char delimiter; int idx = 0; /* verify that the 's' or 'y' is followed by something. That something @@ -263,7 +297,7 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace) /* save the replacement string */ cmdstr_ptr += idx + 1; - idx = index_of_next_unescaped_regexp_delim(-delimiter, cmdstr_ptr); + idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr); *replace = copy_parsing_escapes(cmdstr_ptr, idx); return ((cmdstr_ptr - cmdstr) + idx); @@ -288,10 +322,11 @@ static int get_address(const char *my_str, int *linenum, regex_t ** regex) char *temp; delimiter = '/'; - if (*my_str == '\\') delimiter = *++pos; + if (*my_str == '\\') + delimiter = *++pos; next = index_of_next_unescaped_regexp_delim(delimiter, ++pos); temp = copy_parsing_escapes(pos, next); - *regex = xmalloc(sizeof(regex_t)); + *regex = xzalloc(sizeof(regex_t)); xregcomp(*regex, temp, G.regex_type|REG_NEWLINE); free(temp); /* Move position to next character after last delimiter */ @@ -359,7 +394,8 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) continue; } /* Skip spaces */ - if (isspace(substr[idx])) continue; + if (isspace(substr[idx])) + continue; switch (substr[idx]) { /* Replace all occurrences */ @@ -384,7 +420,8 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) break; /* Comment */ case '#': - while (substr[++idx]) /*skip all*/; + // while (substr[++idx]) continue; + idx += strlen(substr + idx); // same /* Fall through */ /* End of command */ case ';': @@ -394,12 +431,14 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) bb_error_msg_and_die("bad option in substitution expression"); } } -out: + out: /* compile the match string into a regex */ if (*match != '\0') { /* If match is empty, we use last regex used at runtime */ - sed_cmd->sub_match = xmalloc(sizeof(regex_t)); + sed_cmd->sub_match = xzalloc(sizeof(regex_t)); + dbg("xregcomp('%s',%x)", match, cflags); xregcomp(sed_cmd->sub_match, match, cflags); + dbg("regcomp ok"); } free(match); @@ -411,29 +450,65 @@ out: */ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) { + static const char cmd_letters[] = "saicrw:btTydDgGhHlnNpPqx={}"; + enum { + IDX_s = 0, + IDX_a, + IDX_i, + IDX_c, + IDX_r, + IDX_w, + IDX_colon, + IDX_b, + IDX_t, + IDX_T, + IDX_y, + IDX_d, + IDX_D, + IDX_g, + IDX_G, + IDX_h, + IDX_H, + IDX_l, + IDX_n, + IDX_N, + IDX_p, + IDX_P, + IDX_q, + IDX_x, + IDX_equal, + IDX_lbrace, + IDX_rbrace, + IDX_nul + }; + struct chk { char chk[sizeof(cmd_letters)-1 == IDX_nul ? 1 : -1]; }; + + unsigned idx = strchrnul(cmd_letters, sed_cmd->cmd) - cmd_letters; + /* handle (s)ubstitution command */ - if (sed_cmd->cmd == 's') + if (idx == IDX_s) { cmdstr += parse_subst_cmd(sed_cmd, cmdstr); + } /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ - else if (strchr("aic", sed_cmd->cmd)) { + else if (idx <= IDX_c) { /* a,i,c */ if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') - bb_error_msg_and_die - ("only a beginning address can be specified for edit commands"); + bb_error_msg_and_die("only a beginning address can be specified for edit commands"); for (;;) { if (*cmdstr == '\n' || *cmdstr == '\\') { cmdstr++; break; - } else if (isspace(*cmdstr)) - cmdstr++; - else + } + if (!isspace(*cmdstr)) break; + cmdstr++; } sed_cmd->string = xstrdup(cmdstr); /* "\anychar" -> "anychar" */ parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0'); cmdstr += strlen(cmdstr); + } /* handle file cmds: (r)ead */ - } else if (strchr("rw", sed_cmd->cmd)) { + else if (idx <= IDX_w) { /* r,w */ if (sed_cmd->end_line || sed_cmd->end_match) bb_error_msg_and_die("command only uses one address"); cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string); @@ -441,8 +516,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) sed_cmd->sw_file = xfopen_for_write(sed_cmd->string); sed_cmd->sw_last_char = '\n'; } + } /* handle branch commands */ - } else if (strchr(":btT", sed_cmd->cmd)) { + else if (idx <= IDX_T) { /* :,b,t,T */ int length; cmdstr = skip_whitespace(cmdstr); @@ -453,7 +529,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) } } /* translation command */ - else if (sed_cmd->cmd == 'y') { + else if (idx == IDX_y) { char *match, *replace; int i = cmdstr[0]; @@ -473,7 +549,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) /* if it wasnt a single-letter command that takes no arguments * then it must be an invalid command. */ - else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) { + else if (idx >= IDX_nul) { /* not d,D,g,G,h,H,l,n,N,p,P,q,x,=,{,} */ bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd); } @@ -487,7 +563,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) static void add_cmd(const char *cmdstr) { sed_cmd_t *sed_cmd; - int temp; + unsigned len, n; /* Append this line to any unfinished line from last time. */ if (G.add_cmd_line) { @@ -496,12 +572,14 @@ static void add_cmd(const char *cmdstr) cmdstr = G.add_cmd_line = tp; } - /* If this line ends with backslash, request next line. */ - temp = strlen(cmdstr); - if (temp && cmdstr[--temp] == '\\') { + /* If this line ends with unescaped backslash, request next line. */ + n = len = strlen(cmdstr); + while (n && cmdstr[n-1] == '\\') + n--; + if ((len - n) & 1) { /* if odd number of trailing backslashes */ if (!G.add_cmd_line) G.add_cmd_line = xstrdup(cmdstr); - G.add_cmd_line[temp] = '\0'; + G.add_cmd_line[len-1] = '\0'; return; } @@ -533,6 +611,7 @@ static void add_cmd(const char *cmdstr) /* first part (if present) is an address: either a '$', a number or a /regex/ */ cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); + sed_cmd->beg_line_orig = sed_cmd->beg_line; /* second part (if present) will begin with a comma */ if (*cmdstr == ',') { @@ -560,12 +639,12 @@ static void add_cmd(const char *cmdstr) /* last part (mandatory) will be a command */ if (!*cmdstr) bb_error_msg_and_die("missing command"); - sed_cmd->cmd = *(cmdstr++); + sed_cmd->cmd = *cmdstr++; cmdstr = parse_cmd_args(sed_cmd, cmdstr); /* Add the command to the command array */ - G.sed_cmd_tail->next = sed_cmd; - G.sed_cmd_tail = G.sed_cmd_tail->next; + *G.sed_cmd_tail = sed_cmd; + G.sed_cmd_tail = &sed_cmd->next; } /* If we glued multiple lines together, free the memory. */ @@ -641,8 +720,12 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) G.previous_regex_ptr = current_regex; /* Find the first match */ - if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0)) + dbg("matching '%s'", line); + if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0)) { + dbg("no match"); return 0; + } + dbg("match"); /* Initialize temporary output buffer. */ G.pipeline.buf = xmalloc(PIPE_GROW); @@ -654,12 +737,13 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) int i; /* Work around bug in glibc regexec, demonstrated by: - echo " a.b" | busybox sed 's [^ .]* x g' - The match_count check is so not to break - echo "hi" | busybox sed 's/^/!/g' */ + * echo " a.b" | busybox sed 's [^ .]* x g' + * The match_count check is so not to break + * echo "hi" | busybox sed 's/^/!/g' + */ if (!G.regmatch[0].rm_so && !G.regmatch[0].rm_eo && match_count) { pipe_putc(*line++); - continue; + goto next; } match_count++; @@ -671,7 +755,7 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) ) { for (i = 0; i < G.regmatch[0].rm_eo; i++) pipe_putc(*line++); - continue; + goto next; } /* print everything before the match */ @@ -687,11 +771,12 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) altered++; /* if we're not doing this globally, get out now */ - if (sed_cmd->which_match) + if (sed_cmd->which_match != 0) break; - + next: if (*line == '\0') break; + //maybe (G.regmatch[0].rm_eo ? REG_NOTBOL : 0) instead of unconditional REG_NOTBOL? } while (regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH); @@ -713,7 +798,7 @@ static sed_cmd_t *branch_to(char *label) { sed_cmd_t *sed_cmd; - for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { + for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) { if (sed_cmd->cmd == ':' && sed_cmd->string && !strcmp(sed_cmd->string, label)) { return sed_cmd; } @@ -889,20 +974,46 @@ static void process_files(void) /* For every line, go through all the commands */ restart: - for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { + for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) { int old_matched, matched; old_matched = sed_cmd->in_match; /* Determine if this command matches this line: */ + dbg("match1:%d", sed_cmd->in_match); + dbg("match2:%d", (!sed_cmd->beg_line && !sed_cmd->end_line + && !sed_cmd->beg_match && !sed_cmd->end_match)); + dbg("match3:%d", (sed_cmd->beg_line > 0 + && (sed_cmd->end_line || sed_cmd->end_match + ? (sed_cmd->beg_line <= linenum) + : (sed_cmd->beg_line == linenum) + ) + )); + dbg("match4:%d", (beg_match(sed_cmd, pattern_space))); + dbg("match5:%d", (sed_cmd->beg_line == -1 && next_line == NULL)); + /* Are we continuing a previous multi-line match? */ sed_cmd->in_match = sed_cmd->in_match /* Or is no range necessary? */ || (!sed_cmd->beg_line && !sed_cmd->end_line && !sed_cmd->beg_match && !sed_cmd->end_match) /* Or did we match the start of a numerical range? */ - || (sed_cmd->beg_line > 0 && (sed_cmd->beg_line <= linenum)) + || (sed_cmd->beg_line > 0 + && (sed_cmd->end_line || sed_cmd->end_match + /* note: even if end is numeric and is < linenum too, + * GNU sed matches! We match too, therefore we don't + * check here that linenum <= end. + * Example: + * printf '1\n2\n3\n4\n' | sed -n '1{N;N;d};1p;2,3p;3p;4p' + * first three input lines are deleted; + * 4th line is matched and printed + * by "2,3" (!) and by "4" ranges + */ + ? (sed_cmd->beg_line <= linenum) /* N,end */ + : (sed_cmd->beg_line == linenum) /* N */ + ) + ) /* Or does this line match our begin address regex? */ || (beg_match(sed_cmd, pattern_space)) /* Or did we match last line of input? */ @@ -911,34 +1022,44 @@ static void process_files(void) /* Snapshot the value */ matched = sed_cmd->in_match; - //bb_error_msg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d", - //sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum); + dbg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d", + sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum); /* Is this line the end of the current match? */ if (matched) { /* once matched, "n,xxx" range is dead, disabling it */ - if (sed_cmd->beg_line > 0) + if (sed_cmd->beg_line > 0) { sed_cmd->beg_line = -2; + } sed_cmd->in_match = !( /* has the ending line come, or is this a single address command? */ - (sed_cmd->end_line ? - sed_cmd->end_line == -1 ? - !next_line + (sed_cmd->end_line + ? sed_cmd->end_line == -1 + ? !next_line : (sed_cmd->end_line <= linenum) : !sed_cmd->end_match ) /* or does this line matches our last address regex */ || (sed_cmd->end_match && old_matched && (regexec(sed_cmd->end_match, - pattern_space, 0, NULL, 0) == 0)) + pattern_space, 0, NULL, 0) == 0) + ) ); } /* Skip blocks of commands we didn't match */ if (sed_cmd->cmd == '{') { if (sed_cmd->invert ? matched : !matched) { - while (sed_cmd->cmd != '}') { + unsigned nest_cnt = 0; + while (1) { + if (sed_cmd->cmd == '{') + nest_cnt++; + if (sed_cmd->cmd == '}') { + nest_cnt--; + if (nest_cnt == 0) + break; + } sed_cmd = sed_cmd->next; if (!sed_cmd) bb_error_msg_and_die("unterminated {"); @@ -957,6 +1078,8 @@ static void process_files(void) } /* actual sedding */ + dbg("pattern_space:'%s' next_line:'%s' cmd:%c", + pattern_space, next_line, sed_cmd->cmd); switch (sed_cmd->cmd) { /* Print line number */ @@ -968,7 +1091,6 @@ static void process_files(void) case 'P': { char *tmp = strchr(pattern_space, '\n'); - if (tmp) { *tmp = '\0'; /* TODO: explain why '\n' below */ @@ -991,11 +1113,8 @@ static void process_files(void) case 'D': { char *tmp = strchr(pattern_space, '\n'); - if (tmp) { - tmp = xstrdup(tmp+1); - free(pattern_space); - pattern_space = tmp; + overlapping_strcpy(pattern_space, tmp + 1); goto restart; } } @@ -1007,6 +1126,7 @@ static void process_files(void) case 's': if (!do_subst_command(sed_cmd, &pattern_space)) break; + dbg("do_subst_command succeeeded:'%s'", pattern_space); substituted |= 1; /* handle p option */ @@ -1033,14 +1153,13 @@ static void process_files(void) case 'c': /* Only triggers on last line of a matching range. */ if (!sed_cmd->in_match) - sed_puts(sed_cmd->string, NO_EOL_CHAR); + sed_puts(sed_cmd->string, '\n'); goto discard_line; /* Read file, append contents to output */ case 'r': { FILE *rfile; - rfile = fopen_for_read(sed_cmd->string); if (rfile) { char *line; @@ -1088,15 +1207,20 @@ static void process_files(void) { int len; /* If no next line, jump to end of script and exit. */ + /* http://www.gnu.org/software/sed/manual/sed.html: + * "Most versions of sed exit without printing anything + * when the N command is issued on the last line of + * a file. GNU sed prints pattern space before exiting + * unless of course the -n command switch has been + * specified. This choice is by design." + */ if (next_line == NULL) { - /* Jump to end of script and exit */ - free(next_line); - next_line = NULL; - goto discard_line; - /* append next_line, read new next_line. */ + //goto discard_line; + goto discard_commands; /* GNU behavior */ } + /* Append next_line, read new next_line. */ len = strlen(pattern_space); - pattern_space = realloc(pattern_space, len + strlen(next_line) + 2); + pattern_space = xrealloc(pattern_space, len + strlen(next_line) + 2); pattern_space[len] = '\n'; strcpy(pattern_space + len+1, next_line); last_gets_char = next_gets_char; @@ -1123,7 +1247,6 @@ static void process_files(void) case 'y': { int i, j; - for (i = 0; pattern_space[i]; i++) { for (j = 0; sed_cmd->string[j]; j += 2) { if (pattern_space[i] == sed_cmd->string[j]) { @@ -1249,9 +1372,6 @@ static void add_cmd_block(char *cmdstr) int sed_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int sed_main(int argc UNUSED_PARAM, char **argv) { - enum { - OPT_in_place = 1 << 0, - }; unsigned opt; llist_t *opt_e, *opt_f; int status = EXIT_SUCCESS; @@ -1271,6 +1391,7 @@ int sed_main(int argc UNUSED_PARAM, char **argv) opt_e = opt_f = NULL; opt_complementary = "e::f::" /* can occur multiple times */ "nn"; /* count -n */ + /* -i must be first, to match OPT_in_place definition */ opt = getopt32(argv, "irne:f:", &opt_e, &opt_f, &G.be_quiet); /* counter for -n */ //argc -= optind; @@ -1312,14 +1433,14 @@ int sed_main(int argc UNUSED_PARAM, char **argv) if (opt & OPT_in_place) bb_error_msg_and_die(bb_msg_requires_arg, "-i"); add_input_file(stdin); - process_files(); } else { int i; - FILE *file; for (i = 0; argv[i]; i++) { struct stat statbuf; int nonstdoutfd; + FILE *file; + sed_cmd_t *sed_cmd; if (LONE_DASH(argv[i]) && !(opt & OPT_in_place)) { add_input_file(stdin); @@ -1331,34 +1452,45 @@ int sed_main(int argc UNUSED_PARAM, char **argv) status = EXIT_FAILURE; continue; } + add_input_file(file); if (!(opt & OPT_in_place)) { - add_input_file(file); continue; } - G.outname = xasprintf("%sXXXXXX", argv[i]); - nonstdoutfd = mkstemp(G.outname); - if (-1 == nonstdoutfd) - bb_perror_msg_and_die("cannot create temp file %s", G.outname); - G.nonstdout = fdopen(nonstdoutfd, "w"); + /* -i: process each FILE separately: */ - /* Set permissions of output file */ + G.outname = xasprintf("%sXXXXXX", argv[i]); + nonstdoutfd = xmkstemp(G.outname); + G.nonstdout = xfdopen_for_write(nonstdoutfd); + /* Set permissions/owner of output file */ fstat(fileno(file), &statbuf); + /* chmod'ing AFTER chown would preserve suid/sgid bits, + * but GNU sed 4.2.1 does not preserve them either */ fchmod(nonstdoutfd, statbuf.st_mode); - add_input_file(file); + fchown(nonstdoutfd, statbuf.st_uid, statbuf.st_gid); + process_files(); fclose(G.nonstdout); - G.nonstdout = stdout; + /* unlink(argv[i]); */ xrename(G.outname, argv[i]); free(G.outname); G.outname = NULL; + + /* Re-enable disabled range matches */ + for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) { + sed_cmd->beg_line = sed_cmd->beg_line_orig; + } } - if (G.input_file_count > G.current_input_file) - process_files(); + /* Here, to handle "sed 'cmds' nonexistent_file" case we did: + * if (G.current_input_file >= G.input_file_count) + * return status; + * but it's not needed since process_files() works correctly + * in this case too. */ } + process_files(); return status; }