X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=editors%2Fsed.c;h=bb39de14906e94e66baafefc7c9cf70d13aabd4e;hb=53799506acf69e7f7137d91fa5a4451211621469;hp=87fc755ebaf566b344c39ef869d99e3f95bdbe60;hpb=21f6fbf545e7fa58f0eaa444001a9d25bc37c4eb;p=oweals%2Fbusybox.git diff --git a/editors/sed.c b/editors/sed.c index 87fc755eb..bb39de149 100644 --- a/editors/sed.c +++ b/editors/sed.c @@ -12,7 +12,6 @@ * * Licensed under GPLv2, see file LICENSE in this source tree. */ - /* Code overview. * * Files are laid out to avoid unnecessary function declarations. So for @@ -23,16 +22,12 @@ * resulting sed_cmd_t structures are appended to a linked list * (G.sed_cmd_head/G.sed_cmd_tail). * - * add_input_file() adds a FILE* to the list of input files. We need to - * know all input sources ahead of time to find the last line for the $ match. - * * process_files() does actual sedding, reading data lines from each input FILE* * (which could be stdin) and applying the sed command list (sed_cmd_head) to * each of the resulting lines. * * sed_main() is where external code calls into this, with a command line. */ - /* Supported features and commands in this version of sed: * * - comments ('#') @@ -53,19 +48,32 @@ * Todo: * - Create a wrapper around regex to make libc's regex conform with sed * - * Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html + * Reference + * http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html + * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html + * http://sed.sourceforge.net/sedfaq3.html */ +//config:config SED +//config: bool "sed (12 kb)" +//config: default y +//config: help +//config: sed is used to perform text transformations on a file +//config: or input from a pipeline. + +//applet:IF_SED(APPLET(sed, BB_DIR_BIN, BB_SUID_DROP)) + +//kbuild:lib-$(CONFIG_SED) += sed.o //usage:#define sed_trivial_usage -//usage: "[-inr] [-f FILE]... [-e CMD]... [FILE]...\n" -//usage: "or: sed [-inr] CMD [FILE]..." +//usage: "[-i[SFX]] [-nrE] [-f FILE]... [-e CMD]... [FILE]...\n" +//usage: "or: sed [-i[SFX]] [-nrE] CMD [FILE]..." //usage:#define sed_full_usage "\n\n" //usage: " -e CMD Add CMD to sed commands to be executed" //usage: "\n -f FILE Add FILE contents to sed commands to be executed" //usage: "\n -i[SFX] Edit files in-place (otherwise sends to stdout)" -//usage: "\n Optionally backs files up, appending SFX" +//usage: "\n Optionally back files up, appending SFX" //usage: "\n -n Suppress automatic printing of pattern space" -//usage: "\n -r Use extended regex syntax" +//usage: "\n -r,-E Use extended regex syntax" //usage: "\n" //usage: "\nIf no -e or -f, the first non-option argument is the sed command string." //usage: "\nRemaining arguments are input files (stdin if none)." @@ -75,6 +83,7 @@ //usage: "bar\n" #include "libbb.h" +#include "common_bufsiz.h" #include "xregex.h" #if 0 @@ -99,9 +108,10 @@ typedef struct sed_cmd_s { regex_t *sub_match; /* For 's/sub_match/string/' */ int beg_line; /* 'sed 1p' 0 == apply commands to all lines */ int beg_line_orig; /* copy of the above, needed for -i */ - int end_line; /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */ + int end_line; /* 'sed 1,3p' 0 == one line only. -1 = last line ($). -2-N = +N */ + int end_line_orig; - FILE *sw_file; /* File (sw) command writes to, -1 for none. */ + FILE *sw_file; /* File (sw) command writes to, NULL for none. */ char *string; /* Data string for (saicytb) commands. */ unsigned which_match; /* (s) Which match to replace (0 for all) */ @@ -122,12 +132,15 @@ static const char semicolon_whitespace[] ALIGN1 = "; \n\r\t\v"; struct globals { /* options */ int be_quiet, regex_type; + FILE *nonstdout; char *outname, *hold_space; + smallint exitcode; - /* List of input files */ - int input_file_count, current_input_file; - FILE **input_file_list; + /* list of input files */ + int current_input_file, last_input_file; + char **input_file_list; + FILE *current_fp; regmatch_t regmatch[10]; regex_t *previous_regex_ptr; @@ -135,7 +148,7 @@ struct globals { /* linked list of sed commands */ sed_cmd_t *sed_cmd_head, **sed_cmd_tail; - /* Linked list of append lines */ + /* linked list of append lines */ llist_t *append_head; char *add_cmd_line; @@ -146,11 +159,10 @@ struct globals { int len; /* Space allocated */ } pipeline; } FIX_ALIASING; -#define G (*(struct globals*)&bb_common_bufsiz1) -struct BUG_G_too_big { - char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1]; -}; +#define G (*(struct globals*)bb_common_bufsiz1) #define INIT_G() do { \ + setup_common_bufsiz(); \ + BUILD_BUG_ON(sizeof(G) > COMMON_BUFSIZE); \ G.sed_cmd_tail = &G.sed_cmd_head; \ } while (0) @@ -166,20 +178,25 @@ static void sed_free_and_close_stuff(void) sed_cmd_t *sed_cmd_next = sed_cmd->next; if (sed_cmd->sw_file) - xprint_and_close_file(sed_cmd->sw_file); + fclose(sed_cmd->sw_file); - if (sed_cmd->beg_match) { - regfree(sed_cmd->beg_match); - free(sed_cmd->beg_match); - } - if (sed_cmd->end_match) { - regfree(sed_cmd->end_match); - free(sed_cmd->end_match); - } - if (sed_cmd->sub_match) { - regfree(sed_cmd->sub_match); - free(sed_cmd->sub_match); - } + /* Used to free regexps, but now there is code + * in get_address() which can reuse a regexp + * for constructs as /regexp/cmd1;//cmd2 + * leading to double-frees here: + */ + //if (sed_cmd->beg_match) { + // regfree(sed_cmd->beg_match); + // free(sed_cmd->beg_match); + //} + //if (sed_cmd->end_match) { + // regfree(sed_cmd->end_match); + // free(sed_cmd->end_match); + //} + //if (sed_cmd->sub_match) { + // regfree(sed_cmd->sub_match); + // free(sed_cmd->sub_match); + //} free(sed_cmd->string); free(sed_cmd); sed_cmd = sed_cmd_next; @@ -187,8 +204,8 @@ static void sed_free_and_close_stuff(void) free(G.hold_space); - while (G.current_input_file < G.input_file_count) - fclose(G.input_file_list[G.current_input_file++]); + if (G.current_fp) + fclose(G.current_fp); } #else void sed_free_and_close_stuff(void); @@ -203,23 +220,33 @@ static void cleanup_outname(void) /* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */ -static void parse_escapes(char *dest, const char *string, int len, char from, char to) +static unsigned parse_escapes(char *dest, const char *string, int len, char from, char to) { + char *d = dest; int i = 0; + if (len == -1) + len = strlen(string); + while (i < len) { if (string[i] == '\\') { if (!to || string[i+1] == from) { - *dest++ = to ? to : string[i+1]; + if ((*d = to ? to : string[i+1]) == '\0') + return d - dest; i += 2; + d++; continue; } - *dest++ = string[i++]; + i++; /* skip backslash in string[] */ + *d++ = '\\'; + /* fall through: copy next char verbatim */ } - /* TODO: is it safe wrt a string with trailing '\\' ? */ - *dest++ = string[i++]; + if ((*d = string[i++]) == '\0') + return d - dest; + d++; } - *dest = '\0'; + *d = '\0'; + return d - dest; } static char *copy_parsing_escapes(const char *string, int len) @@ -230,9 +257,8 @@ static char *copy_parsing_escapes(const char *string, int len) /* sed recognizes \n */ /* GNU sed also recognizes \t and \r */ for (s = "\nn\tt\rr"; *s; s += 2) { - parse_escapes(dest, string, len, s[1], s[0]); + len = parse_escapes(dest, string, len, s[1], s[0]); string = dest; - len = strlen(dest); } return dest; } @@ -313,7 +339,7 @@ static int get_address(const char *my_str, int *linenum, regex_t ** regex) if (isdigit(*my_str)) { *linenum = strtol(my_str, (char**)&pos, 10); - /* endstr shouldnt ever equal NULL */ + /* endstr shouldn't ever equal NULL */ } else if (*my_str == '$') { *linenum = -1; pos++; @@ -326,10 +352,16 @@ static int get_address(const char *my_str, int *linenum, regex_t ** regex) if (*my_str == '\\') delimiter = *++pos; next = index_of_next_unescaped_regexp_delim(delimiter, ++pos); - temp = copy_parsing_escapes(pos, next); - *regex = xzalloc(sizeof(regex_t)); - xregcomp(*regex, temp, G.regex_type|REG_NEWLINE); - free(temp); + if (next != 0) { + temp = copy_parsing_escapes(pos, next); + G.previous_regex_ptr = *regex = xzalloc(sizeof(regex_t)); + xregcomp(*regex, temp, G.regex_type); + free(temp); + } else { + *regex = G.previous_regex_ptr; + if (!G.previous_regex_ptr) + bb_error_msg_and_die("no previous regexp"); + } /* Move position to next character after last delimiter */ pos += (next+1); } @@ -339,25 +371,25 @@ static int get_address(const char *my_str, int *linenum, regex_t ** regex) /* Grab a filename. Whitespace at start is skipped, then goes to EOL. */ static int parse_file_cmd(/*sed_cmd_t *sed_cmd,*/ const char *filecmdstr, char **retval) { - int start = 0, idx, hack = 0; + const char *start; + const char *eol; /* Skip whitespace, then grab filename to end of line */ - while (isspace(filecmdstr[start])) - start++; - idx = start; - while (filecmdstr[idx] && filecmdstr[idx] != '\n') - idx++; - - /* If lines glued together, put backslash back. */ - if (filecmdstr[idx] == '\n') - hack = 1; - if (idx == start) + start = skip_whitespace(filecmdstr); + eol = strchrnul(start, '\n'); + if (eol == start) bb_error_msg_and_die("empty filename"); - *retval = xstrndup(filecmdstr+start, idx-start+hack+1); - if (hack) - (*retval)[idx] = '\\'; - return idx; + if (*eol) { + /* If lines glued together, put backslash back. */ + *retval = xstrndup(start, eol-start + 1); + (*retval)[eol-start] = '\\'; + } else { + /* eol is NUL */ + *retval = xstrdup(start); + } + + return eol - filecmdstr; } static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) @@ -368,7 +400,7 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) /* * A substitution command should look something like this: - * s/match/replace/ #gIpw + * s/match/replace/ #giIpw * || | ||| * mandatory optional */ @@ -382,7 +414,9 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) /* process the flags */ sed_cmd->which_match = 1; + dbg("s flags:'%s'", substr + idx + 1); while (substr[++idx]) { + dbg("s flag:'%c'", substr[idx]); /* Parse match number */ if (isdigit(substr[idx])) { if (match[0] != '^') { @@ -390,7 +424,7 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) const char *pos = substr + idx; /* FIXME: error check? */ sed_cmd->which_match = (unsigned)strtol(substr+idx, (char**) &pos, 10); - idx = pos - substr; + idx = pos - substr - 1; } continue; } @@ -411,11 +445,15 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) /* Write to file */ case 'w': { - char *temp; - idx += parse_file_cmd(/*sed_cmd,*/ substr+idx, &temp); + char *fname; + idx += parse_file_cmd(/*sed_cmd,*/ substr+idx+1, &fname); + sed_cmd->sw_file = xfopen_for_write(fname); + sed_cmd->sw_last_char = '\n'; + free(fname); break; } - /* Ignore case (gnu exension) */ + /* Ignore case (gnu extension) */ + case 'i': case 'I': cflags |= REG_ICASE; break; @@ -429,6 +467,7 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) case '}': goto out; default: + dbg("s bad flags:'%s'", substr + idx); bb_error_msg_and_die("bad option in substitution expression"); } } @@ -451,7 +490,7 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) */ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) { - static const char cmd_letters[] = "saicrw:btTydDgGhHlnNpPqx={}"; + static const char cmd_letters[] ALIGN1 = "saicrw:btTydDgGhHlnNpPqx={}"; enum { IDX_s = 0, IDX_a, @@ -482,9 +521,11 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) IDX_rbrace, IDX_nul }; - struct chk { char chk[sizeof(cmd_letters)-1 == IDX_nul ? 1 : -1]; }; + unsigned idx; - unsigned idx = strchrnul(cmd_letters, sed_cmd->cmd) - cmd_letters; + BUILD_BUG_ON(sizeof(cmd_letters)-1 != IDX_nul); + + idx = strchrnul(cmd_letters, sed_cmd->cmd) - cmd_letters; /* handle (s)ubstitution command */ if (idx == IDX_s) { @@ -492,8 +533,12 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) } /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ else if (idx <= IDX_c) { /* a,i,c */ - if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') - bb_error_msg_and_die("only a beginning address can be specified for edit commands"); + unsigned len; + + if (idx < IDX_c) { /* a,i */ + if (sed_cmd->end_line || sed_cmd->end_match) + bb_error_msg_and_die("command '%c' uses only one address", sed_cmd->cmd); + } for (;;) { if (*cmdstr == '\n' || *cmdstr == '\\') { cmdstr++; @@ -503,15 +548,18 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) break; cmdstr++; } - sed_cmd->string = xstrdup(cmdstr); + len = strlen(cmdstr); + sed_cmd->string = copy_parsing_escapes(cmdstr, len); + cmdstr += len; /* "\anychar" -> "anychar" */ - parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0'); - cmdstr += strlen(cmdstr); + parse_escapes(sed_cmd->string, sed_cmd->string, -1, '\0', '\0'); } /* handle file cmds: (r)ead */ else if (idx <= IDX_w) { /* r,w */ - if (sed_cmd->end_line || sed_cmd->end_match) - bb_error_msg_and_die("command only uses one address"); + if (idx < IDX_w) { /* r */ + if (sed_cmd->end_line || sed_cmd->end_match) + bb_error_msg_and_die("command '%c' uses only one address", sed_cmd->cmd); + } cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string); if (sed_cmd->cmd == 'w') { sed_cmd->sw_file = xfopen_for_write(sed_cmd->string); @@ -536,8 +584,8 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) cmdstr += parse_regex_delim(cmdstr, &match, &replace)+1; /* \n already parsed, but \delimiter needs unescaping. */ - parse_escapes(match, match, strlen(match), i, i); - parse_escapes(replace, replace, strlen(replace), i, i); + parse_escapes(match, match, -1, i, i); + parse_escapes(replace, replace, -1, i, i); sed_cmd->string = xzalloc((strlen(match) + 1) * 2); for (i = 0; match[i] && replace[i]; i++) { @@ -547,7 +595,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) free(match); free(replace); } - /* if it wasnt a single-letter command that takes no arguments + /* if it wasn't a single-letter command that takes no arguments * then it must be an invalid command. */ else if (idx >= IDX_nul) { /* not d,D,g,G,h,H,l,n,N,p,P,q,x,=,{,} */ @@ -619,10 +667,29 @@ static void add_cmd(const char *cmdstr) int idx; cmdstr++; - idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match); - if (!idx) + if (*cmdstr == '+' && isdigit(cmdstr[1])) { + /* http://sed.sourceforge.net/sedfaq3.html#s3.3 + * Under GNU sed 3.02+, ssed, and sed15+, + * may also be a notation of the form +num, + * indicating the next num lines after is + * matched. + * GNU sed 4.2.1 accepts even "+" (meaning "+0"). + * We don't (we check for isdigit, see above), think + * about the "+-3" case. + */ + char *end; + /* code is smaller compared to using &cmdstr here: */ + idx = strtol(cmdstr+1, &end, 10); + sed_cmd->end_line = -2 - idx; + cmdstr = end; + } else { + idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match); + cmdstr += idx; + idx--; /* if 0, trigger error check below */ + } + if (idx < 0) bb_error_msg_and_die("no address after comma"); - cmdstr += idx; + sed_cmd->end_line_orig = sed_cmd->end_line; } /* skip whitespace before the command */ @@ -643,6 +710,12 @@ static void add_cmd(const char *cmdstr) sed_cmd->cmd = *cmdstr++; cmdstr = parse_cmd_args(sed_cmd, cmdstr); + /* cmdstr now points past args. + * GNU sed requires a separator, if there are more commands, + * else it complains "char N: extra characters after command". + * Example: "sed 'p;d'". We also allow "sed 'pd'". + */ + /* Add the command to the command array */ *G.sed_cmd_tail = sed_cmd; G.sed_cmd_tail = &sed_cmd->next; @@ -686,7 +759,7 @@ static void do_subst_w_backrefs(char *line, char *replace) continue; } /* I _think_ it is impossible to get '\' to be - * the last char in replace string. Thus we dont check + * the last char in replace string. Thus we don't check * for replace[i] == NUL. (counterexample anyone?) */ /* if we find a backslash escaped character, print the character */ pipe_putc(replace[i]); @@ -737,6 +810,8 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) /* Now loop through, substituting for matches */ do { + int start = G.regmatch[0].rm_so; + int end = G.regmatch[0].rm_eo; int i; match_count++; @@ -746,16 +821,16 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) if (sed_cmd->which_match && (sed_cmd->which_match != match_count) ) { - for (i = 0; i < G.regmatch[0].rm_eo; i++) + for (i = 0; i < end; i++) pipe_putc(*line++); /* Null match? Print one more char */ - if (G.regmatch[0].rm_so == i && *line) + if (start == end && *line) pipe_putc(*line++); goto next; } /* Print everything before the match */ - for (i = 0; i < G.regmatch[0].rm_so; i++) + for (i = 0; i < start; i++) pipe_putc(line[i]); /* Then print the substitution string, @@ -765,27 +840,32 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) * second is "" before "d", third is "" after "d". * Second match is NOT replaced! */ - if (prev_match_empty || i != 0) { - dbg("inserting replacement at %d in '%s'", i, line); + if (prev_match_empty || start != 0 || start != end) { + //dbg("%d %d %d", prev_match_empty, start, end); + dbg("inserting replacement at %d in '%s'", start, line); do_subst_w_backrefs(line, sed_cmd->string); + /* Flag that something has changed */ + altered = 1; } else { - dbg("NOT inserting replacement at %d in '%s'", i, line); + dbg("NOT inserting replacement at %d in '%s'", start, line); } /* If matched string is empty (f.e. "c*" pattern), * copy verbatim one char after it before attempting more matches */ - prev_match_empty = (G.regmatch[0].rm_eo == i); - if (prev_match_empty && line[i]) { - pipe_putc(line[i]); - G.regmatch[0].rm_eo++; + prev_match_empty = (start == end); + if (prev_match_empty) { + if (!line[end]) { + tried_at_eol = 1; + } else { + pipe_putc(line[end]); + end++; + } } /* Advance past the match */ - dbg("line += %d", G.regmatch[0].rm_eo); - line += G.regmatch[0].rm_eo; - /* Flag that something has changed */ - altered = 1; + dbg("line += %d", end); + line += end; /* if we're not doing this globally, get out now */ if (sed_cmd->which_match != 0) @@ -798,7 +878,7 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) tried_at_eol = 1; } -//maybe (G.regmatch[0].rm_eo ? REG_NOTBOL : 0) instead of unconditional REG_NOTBOL? +//maybe (end ? REG_NOTBOL : 0) instead of unconditional REG_NOTBOL? } while (regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH); /* Copy rest of string into output pipeline */ @@ -820,7 +900,10 @@ static sed_cmd_t *branch_to(char *label) sed_cmd_t *sed_cmd; for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) { - if (sed_cmd->cmd == ':' && sed_cmd->string && !strcmp(sed_cmd->string, label)) { + if (sed_cmd->cmd == ':' + && sed_cmd->string + && strcmp(sed_cmd->string, label) == 0 + ) { return sed_cmd; } } @@ -829,46 +912,109 @@ static sed_cmd_t *branch_to(char *label) static void append(char *s) { - llist_add_to_end(&G.append_head, xstrdup(s)); + llist_add_to_end(&G.append_head, s); } -static void flush_append(void) +/* Output line of text. */ +/* Note: + * The tricks with NO_EOL_CHAR and last_puts_char are there to emulate gnu sed. + * Without them, we had this: + * echo -n thingy >z1 + * echo -n again >z2 + * >znull + * sed "s/i/z/" z1 z2 znull | hexdump -vC + * output: + * gnu sed 4.1.5: + * 00000000 74 68 7a 6e 67 79 0a 61 67 61 7a 6e |thzngy.agazn| + * bbox: + * 00000000 74 68 7a 6e 67 79 61 67 61 7a 6e |thzngyagazn| + */ +enum { + NO_EOL_CHAR = 1, + LAST_IS_NUL = 2, +}; +static void puts_maybe_newline(char *s, FILE *file, char *last_puts_char, char last_gets_char) { - char *data; + char lpc = *last_puts_char; - /* Output appended lines. */ - while ((data = (char *)llist_pop(&G.append_head))) { - fprintf(G.nonstdout, "%s\n", data); - free(data); + /* Need to insert a '\n' between two files because first file's + * last line wasn't terminated? */ + if (lpc != '\n' && lpc != '\0') { + fputc('\n', file); + lpc = '\n'; + } + fputs(s, file); + + /* 'x' - just something which is not '\n', '\0' or NO_EOL_CHAR */ + if (s[0]) + lpc = 'x'; + + /* had trailing '\0' and it was last char of file? */ + if (last_gets_char == LAST_IS_NUL) { + fputc('\0', file); + lpc = 'x'; /* */ + } else + /* had trailing '\n' or '\0'? */ + if (last_gets_char != NO_EOL_CHAR) { + fputc(last_gets_char, file); + lpc = last_gets_char; + } + + if (ferror(file)) { + xfunc_error_retval = 4; /* It's what gnu sed exits with... */ + bb_error_msg_and_die(bb_msg_write_error); } + *last_puts_char = lpc; } -static void add_input_file(FILE *file) +static void flush_append(char *last_puts_char) { - G.input_file_list = xrealloc_vector(G.input_file_list, 2, G.input_file_count); - G.input_file_list[G.input_file_count++] = file; + char *data; + + /* Output appended lines. */ + while ((data = (char *)llist_pop(&G.append_head)) != NULL) { + /* Append command does not respect "nonterminated-ness" + * of last line. Try this: + * $ echo -n "woot" | sed -e '/woot/a woo' - + * woot + * woo + * (both lines are terminated with \n) + * Therefore we do not propagate "last_gets_char" here, + * pass '\n' instead: + */ + puts_maybe_newline(data, G.nonstdout, last_puts_char, '\n'); + free(data); + } } /* Get next line of input from G.input_file_list, flushing append buffer and * noting if we ran out of files without a newline on the last line we read. */ -enum { - NO_EOL_CHAR = 1, - LAST_IS_NUL = 2, -}; -static char *get_next_line(char *gets_char) +static char *get_next_line(char *gets_char, char *last_puts_char) { char *temp = NULL; - int len; + size_t len; char gc; - flush_append(); + flush_append(last_puts_char); /* will be returned if last line in the file * doesn't end with either '\n' or '\0' */ gc = NO_EOL_CHAR; - while (G.current_input_file < G.input_file_count) { - FILE *fp = G.input_file_list[G.current_input_file]; + for (; G.current_input_file <= G.last_input_file; G.current_input_file++) { + FILE *fp = G.current_fp; + if (!fp) { + const char *path = G.input_file_list[G.current_input_file]; + fp = stdin; + if (path != bb_msg_standard_input) { + fp = fopen_or_warn(path, "r"); + if (!fp) { + G.exitcode = EXIT_FAILURE; + continue; + } + } + G.current_fp = fp; + } /* Read line up to a newline or NUL byte, inclusive, * return malloc'ed char[]. length of the chunk read * is stored in len. NULL if EOF/error */ @@ -899,61 +1045,13 @@ static char *get_next_line(char *gets_char) * (note: *no* newline after "b bang"!) */ } /* Close this file and advance to next one */ - fclose(fp); - G.current_input_file++; + fclose_if_not_stdin(fp); + G.current_fp = NULL; } *gets_char = gc; return temp; } -/* Output line of text. */ -/* Note: - * The tricks with NO_EOL_CHAR and last_puts_char are there to emulate gnu sed. - * Without them, we had this: - * echo -n thingy >z1 - * echo -n again >z2 - * >znull - * sed "s/i/z/" z1 z2 znull | hexdump -vC - * output: - * gnu sed 4.1.5: - * 00000000 74 68 7a 6e 67 79 0a 61 67 61 7a 6e |thzngy.agazn| - * bbox: - * 00000000 74 68 7a 6e 67 79 61 67 61 7a 6e |thzngyagazn| - */ -static void puts_maybe_newline(char *s, FILE *file, char *last_puts_char, char last_gets_char) -{ - char lpc = *last_puts_char; - - /* Need to insert a '\n' between two files because first file's - * last line wasn't terminated? */ - if (lpc != '\n' && lpc != '\0') { - fputc('\n', file); - lpc = '\n'; - } - fputs(s, file); - - /* 'x' - just something which is not '\n', '\0' or NO_EOL_CHAR */ - if (s[0]) - lpc = 'x'; - - /* had trailing '\0' and it was last char of file? */ - if (last_gets_char == LAST_IS_NUL) { - fputc('\0', file); - lpc = 'x'; /* */ - } else - /* had trailing '\n' or '\0'? */ - if (last_gets_char != NO_EOL_CHAR) { - fputc(last_gets_char, file); - lpc = last_gets_char; - } - - if (ferror(file)) { - xfunc_error_retval = 4; /* It's what gnu sed exits with... */ - bb_error_msg_and_die(bb_msg_write_error); - } - *last_puts_char = lpc; -} - #define sed_puts(s, n) (puts_maybe_newline(s, G.nonstdout, &last_puts_char, n)) static int beg_match(sed_cmd_t *sed_cmd, const char *pattern_space) @@ -976,7 +1074,7 @@ static void process_files(void) int substituted; /* Prime the pump */ - next_line = get_next_line(&next_gets_char); + next_line = get_next_line(&next_gets_char, &last_puts_char); /* Go through every line in each file */ again: @@ -990,7 +1088,7 @@ static void process_files(void) /* Read one line in advance so we can act on the last line, * the '$' address */ - next_line = get_next_line(&next_gets_char); + next_line = get_next_line(&next_gets_char, &last_puts_char); linenum++; /* For every line, go through all the commands */ @@ -1049,10 +1147,19 @@ static void process_files(void) /* Is this line the end of the current match? */ if (matched) { + if (sed_cmd->end_line <= -2) { + /* address2 is +N, i.e. N lines from beg_line */ + sed_cmd->end_line = linenum + (-sed_cmd->end_line - 2); + } /* once matched, "n,xxx" range is dead, disabling it */ if (sed_cmd->beg_line > 0) { sed_cmd->beg_line = -2; } + dbg("end1:%d", sed_cmd->end_line ? sed_cmd->end_line == -1 + ? !next_line : (sed_cmd->end_line <= linenum) + : !sed_cmd->end_match); + dbg("end2:%d", sed_cmd->end_match && old_matched + && !regexec(sed_cmd->end_match,pattern_space, 0, NULL, 0)); sed_cmd->in_match = !( /* has the ending line come, or is this a single address command? */ (sed_cmd->end_line @@ -1064,7 +1171,7 @@ static void process_files(void) /* or does this line matches our last address regex */ || (sed_cmd->end_match && old_matched && (regexec(sed_cmd->end_match, - pattern_space, 0, NULL, 0) == 0) + pattern_space, 0, NULL, 0) == 0) ) ); } @@ -1162,7 +1269,7 @@ static void process_files(void) /* Append line to linked list to be printed later */ case 'a': - append(sed_cmd->string); + append(xstrdup(sed_cmd->string)); break; /* Insert text before this line */ @@ -1184,11 +1291,10 @@ static void process_files(void) rfile = fopen_for_read(sed_cmd->string); if (rfile) { char *line; - while ((line = xmalloc_fgetline(rfile)) != NULL) append(line); - xprint_and_close_file(rfile); + fclose(rfile); } break; @@ -1205,16 +1311,17 @@ static void process_files(void) case 'n': if (!G.be_quiet) sed_puts(pattern_space, last_gets_char); - if (next_line) { - free(pattern_space); - pattern_space = next_line; - last_gets_char = next_gets_char; - next_line = get_next_line(&next_gets_char); - substituted = 0; - linenum++; - break; + if (next_line == NULL) { + /* If no next line, jump to end of script and exit. */ + goto discard_line; } - /* fall through */ + free(pattern_space); + pattern_space = next_line; + last_gets_char = next_gets_char; + next_line = get_next_line(&next_gets_char, &last_puts_char); + substituted = 0; + linenum++; + break; /* Quit. End of script, end of input. */ case 'q': @@ -1245,7 +1352,7 @@ static void process_files(void) pattern_space[len] = '\n'; strcpy(pattern_space + len+1, next_line); last_gets_char = next_gets_char; - next_line = get_next_line(&next_gets_char); + next_line = get_next_line(&next_gets_char, &last_puts_char); linenum++; break; } @@ -1349,7 +1456,7 @@ static void process_files(void) /* Delete and such jump here. */ discard_line: - flush_append(); + flush_append(&last_puts_char /*,last_gets_char*/); free(pattern_space); goto again; @@ -1358,7 +1465,7 @@ static void process_files(void) /* It is possible to have a command line argument with embedded * newlines. This counts as multiple command lines. * However, newline can be escaped: 's/e/z\z/' - * We check for this. + * add_cmd() handles this. */ static void add_cmd_block(char *cmdstr) @@ -1368,22 +1475,8 @@ static void add_cmd_block(char *cmdstr) cmdstr = sv = xstrdup(cmdstr); do { eol = strchr(cmdstr, '\n'); - next: - if (eol) { - /* Count preceding slashes */ - int slashes = 0; - char *sl = eol; - - while (sl != cmdstr && *--sl == '\\') - slashes++; - /* Odd number of preceding slashes - newline is escaped */ - if (slashes & 1) { - overlapping_strcpy(eol - 1, eol); - eol = strchr(eol, '\n'); - goto next; - } + if (eol) *eol = '\0'; - } add_cmd(cmdstr); cmdstr = eol + 1; } while (eol); @@ -1408,8 +1501,6 @@ int sed_main(int argc UNUSED_PARAM, char **argv) "file\0" Required_argument "f"; #endif - int status = EXIT_SUCCESS; - INIT_G(); /* destroy command strings on exit */ @@ -1424,36 +1515,41 @@ int sed_main(int argc UNUSED_PARAM, char **argv) /* do normal option parsing */ opt_e = opt_f = NULL; opt_i = NULL; - opt_complementary = "e::f::" /* can occur multiple times */ - "nn"; /* count -n */ - - IF_LONG_OPTS(applet_long_options = sed_longopts); - /* -i must be first, to match OPT_in_place definition */ - opt = getopt32(argv, "i::rne:f:", &opt_i, &opt_e, &opt_f, - &G.be_quiet); /* counter for -n */ + /* -E is a synonym of -r: + * GNU sed 4.2.1 mentions it in neither --help + * nor manpage, but does recognize it. + */ + opt = getopt32long(argv, "^" + "i::rEne:*f:*" + "\0" "nn"/*count -n*/, + sed_longopts, + &opt_i, &opt_e, &opt_f, + &G.be_quiet); /* counter for -n */ //argc -= optind; argv += optind; if (opt & OPT_in_place) { // -i - atexit(cleanup_outname); + die_func = cleanup_outname; } - if (opt & 0x2) G.regex_type |= REG_EXTENDED; // -r - //if (opt & 0x4) G.be_quiet++; // -n + if (opt & (2|4)) + G.regex_type |= REG_EXTENDED; // -r or -E + //if (opt & 8) + // G.be_quiet++; // -n (implemented with a counter instead) while (opt_e) { // -e add_cmd_block(llist_pop(&opt_e)); } while (opt_f) { // -f char *line; FILE *cmdfile; - cmdfile = xfopen_for_read(llist_pop(&opt_f)); + cmdfile = xfopen_stdin(llist_pop(&opt_f)); while ((line = xmalloc_fgetline(cmdfile)) != NULL) { add_cmd(line); free(line); } - fclose(cmdfile); + fclose_if_not_stdin(cmdfile); } /* if we didn't get a pattern from -e or -f, use argv[0] */ - if (!(opt & 0x18)) { + if (!(opt & 0x30)) { if (!*argv) bb_show_usage(); add_cmd_block(*argv++); @@ -1467,42 +1563,42 @@ int sed_main(int argc UNUSED_PARAM, char **argv) /* argv[0..(argc-1)] should be names of file to process. If no * files were specified or '-' was specified, take input from stdin. * Otherwise, we process all the files specified. */ - if (argv[0] == NULL) { + G.input_file_list = argv; + if (!argv[0]) { if (opt & OPT_in_place) bb_error_msg_and_die(bb_msg_requires_arg, "-i"); - add_input_file(stdin); + argv[0] = (char*)bb_msg_standard_input; + /* G.last_input_file = 0; - already is */ } else { - int i; + goto start; - for (i = 0; argv[i]; i++) { + for (; *argv; argv++) { struct stat statbuf; int nonstdoutfd; - FILE *file; sed_cmd_t *sed_cmd; - if (LONE_DASH(argv[i]) && !(opt & OPT_in_place)) { - add_input_file(stdin); - process_files(); - continue; - } - file = fopen_or_warn(argv[i], "r"); - if (!file) { - status = EXIT_FAILURE; - continue; - } - add_input_file(file); + G.last_input_file++; + start: if (!(opt & OPT_in_place)) { + if (LONE_DASH(*argv)) { + *argv = (char*)bb_msg_standard_input; + process_files(); + } continue; } /* -i: process each FILE separately: */ - G.outname = xasprintf("%sXXXXXX", argv[i]); + if (stat(*argv, &statbuf) != 0) { + bb_simple_perror_msg(*argv); + G.exitcode = EXIT_FAILURE; + G.current_input_file++; + continue; + } + G.outname = xasprintf("%sXXXXXX", *argv); nonstdoutfd = xmkstemp(G.outname); G.nonstdout = xfdopen_for_write(nonstdoutfd); - /* Set permissions/owner of output file */ - fstat(fileno(file), &statbuf); /* chmod'ing AFTER chown would preserve suid/sgid bits, * but GNU sed 4.2.1 does not preserve them either */ fchmod(nonstdoutfd, statbuf.st_mode); @@ -1513,27 +1609,29 @@ int sed_main(int argc UNUSED_PARAM, char **argv) G.nonstdout = stdout; if (opt_i) { - char *backupname = xasprintf("%s%s", argv[i], opt_i); - xrename(argv[i], backupname); + char *backupname = xasprintf("%s%s", *argv, opt_i); + xrename(*argv, backupname); free(backupname); } - /* else unlink(argv[i]); - rename below does this */ - xrename(G.outname, argv[i]); //TODO: rollback backup on error? + /* else unlink(*argv); - rename below does this */ + xrename(G.outname, *argv); //TODO: rollback backup on error? free(G.outname); G.outname = NULL; - /* Re-enable disabled range matches */ + /* Fix disabled range matches and mangled ",+N" ranges */ for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) { sed_cmd->beg_line = sed_cmd->beg_line_orig; + sed_cmd->end_line = sed_cmd->end_line_orig; } } /* Here, to handle "sed 'cmds' nonexistent_file" case we did: - * if (G.current_input_file >= G.input_file_count) - * return status; + * if (G.current_input_file[G.current_input_file] == NULL) + * return G.exitcode; * but it's not needed since process_files() works correctly * in this case too. */ } + process_files(); - return status; + return G.exitcode; }