X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=editors%2Fsed.c;h=3ee8edc433add4e7af452753d93ec0db41f071ed;hb=aa4e5092f58f5a11018e569aee9cf037daf8c5d6;hp=9e27e3e1857b8fc88630dec212751a7e4c1ca777;hpb=52d83708364f85463fbc3756420b4068df13aab7;p=oweals%2Fbusybox.git diff --git a/editors/sed.c b/editors/sed.c index 9e27e3e18..3ee8edc43 100644 --- a/editors/sed.c +++ b/editors/sed.c @@ -26,7 +26,7 @@ * add_input_file() adds a FILE* to the list of input files. We need to * know all input sources ahead of time to find the last line for the $ match. * - * process_files() does actual sedding, reading data lines from each input FILE * + * process_files() does actual sedding, reading data lines from each input FILE* * (which could be stdin) and applying the sed command list (sed_cmd_head) to * each of the resulting lines. * @@ -57,10 +57,10 @@ */ //usage:#define sed_trivial_usage -//usage: "[-efinr] SED_CMD [FILE]..." +//usage: "[-inr] [-f FILE]... [-e CMD]... [FILE]...\n" +//usage: "or: sed [-inr] CMD [FILE]..." //usage:#define sed_full_usage "\n\n" -//usage: "Options:" -//usage: "\n -e CMD Add CMD to sed commands to be executed" +//usage: " -e CMD Add CMD to sed commands to be executed" //usage: "\n -f FILE Add FILE contents to sed commands to be executed" //usage: "\n -i Edit files in-place (else sends result to stdout)" //usage: "\n -n Suppress automatic printing of pattern space" @@ -76,6 +76,13 @@ #include "libbb.h" #include "xregex.h" +#if 0 +# define dbg(...) bb_error_msg(__VA_ARGS__) +#else +# define dbg(...) ((void)0) +#endif + + enum { OPT_in_place = 1 << 0, }; @@ -90,6 +97,7 @@ typedef struct sed_cmd_s { regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */ regex_t *sub_match; /* For 's/sub_match/string/' */ int beg_line; /* 'sed 1p' 0 == apply commands to all lines */ + int beg_line_orig; /* copy of the above, needed for -i */ int end_line; /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */ FILE *sw_file; /* File (sw) command writes to, -1 for none. */ @@ -124,7 +132,7 @@ struct globals { regex_t *previous_regex_ptr; /* linked list of sed commands */ - sed_cmd_t sed_cmd_head, *sed_cmd_tail; + sed_cmd_t *sed_cmd_head, **sed_cmd_tail; /* Linked list of append lines */ llist_t *append_head; @@ -149,7 +157,7 @@ struct BUG_G_too_big { #if ENABLE_FEATURE_CLEAN_UP static void sed_free_and_close_stuff(void) { - sed_cmd_t *sed_cmd = G.sed_cmd_head.next; + sed_cmd_t *sed_cmd = G.sed_cmd_head; llist_free(G.append_head, free); @@ -215,11 +223,16 @@ static void parse_escapes(char *dest, const char *string, int len, char from, ch static char *copy_parsing_escapes(const char *string, int len) { + const char *s; char *dest = xmalloc(len + 1); - parse_escapes(dest, string, len, 'n', '\n'); - /* GNU sed also recognizes \t */ - parse_escapes(dest, dest, strlen(dest), 't', '\t'); + /* sed recognizes \n */ + /* GNU sed also recognizes \t and \r */ + for (s = "\nn\tt\rr"; *s; s += 2) { + parse_escapes(dest, string, len, s[1], s[0]); + string = dest; + len = strlen(dest); + } return dest; } @@ -269,7 +282,7 @@ static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str) static int parse_regex_delim(const char *cmdstr, char **match, char **replace) { const char *cmdstr_ptr = cmdstr; - char delimiter; + unsigned char delimiter; int idx = 0; /* verify that the 's' or 'y' is followed by something. That something @@ -284,7 +297,7 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace) /* save the replacement string */ cmdstr_ptr += idx + 1; - idx = index_of_next_unescaped_regexp_delim(-delimiter, cmdstr_ptr); + idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr); *replace = copy_parsing_escapes(cmdstr_ptr, idx); return ((cmdstr_ptr - cmdstr) + idx); @@ -309,10 +322,11 @@ static int get_address(const char *my_str, int *linenum, regex_t ** regex) char *temp; delimiter = '/'; - if (*my_str == '\\') delimiter = *++pos; + if (*my_str == '\\') + delimiter = *++pos; next = index_of_next_unescaped_regexp_delim(delimiter, ++pos); temp = copy_parsing_escapes(pos, next); - *regex = xmalloc(sizeof(regex_t)); + *regex = xzalloc(sizeof(regex_t)); xregcomp(*regex, temp, G.regex_type|REG_NEWLINE); free(temp); /* Move position to next character after last delimiter */ @@ -421,8 +435,10 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) /* compile the match string into a regex */ if (*match != '\0') { /* If match is empty, we use last regex used at runtime */ - sed_cmd->sub_match = xmalloc(sizeof(regex_t)); + sed_cmd->sub_match = xzalloc(sizeof(regex_t)); + dbg("xregcomp('%s',%x)", match, cflags); xregcomp(sed_cmd->sub_match, match, cflags); + dbg("regcomp ok"); } free(match); @@ -595,6 +611,7 @@ static void add_cmd(const char *cmdstr) /* first part (if present) is an address: either a '$', a number or a /regex/ */ cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); + sed_cmd->beg_line_orig = sed_cmd->beg_line; /* second part (if present) will begin with a comma */ if (*cmdstr == ',') { @@ -626,8 +643,8 @@ static void add_cmd(const char *cmdstr) cmdstr = parse_cmd_args(sed_cmd, cmdstr); /* Add the command to the command array */ - G.sed_cmd_tail->next = sed_cmd; - G.sed_cmd_tail = G.sed_cmd_tail->next; + *G.sed_cmd_tail = sed_cmd; + G.sed_cmd_tail = &sed_cmd->next; } /* If we glued multiple lines together, free the memory. */ @@ -703,8 +720,12 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) G.previous_regex_ptr = current_regex; /* Find the first match */ - if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0)) + dbg("matching '%s'", line); + if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0)) { + dbg("no match"); return 0; + } + dbg("match"); /* Initialize temporary output buffer. */ G.pipeline.buf = xmalloc(PIPE_GROW); @@ -716,12 +737,13 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) int i; /* Work around bug in glibc regexec, demonstrated by: - echo " a.b" | busybox sed 's [^ .]* x g' - The match_count check is so not to break - echo "hi" | busybox sed 's/^/!/g' */ + * echo " a.b" | busybox sed 's [^ .]* x g' + * The match_count check is so not to break + * echo "hi" | busybox sed 's/^/!/g' + */ if (!G.regmatch[0].rm_so && !G.regmatch[0].rm_eo && match_count) { pipe_putc(*line++); - continue; + goto next; } match_count++; @@ -733,7 +755,7 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) ) { for (i = 0; i < G.regmatch[0].rm_eo; i++) pipe_putc(*line++); - continue; + goto next; } /* print everything before the match */ @@ -749,11 +771,14 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) altered++; /* if we're not doing this globally, get out now */ - if (sed_cmd->which_match) + if (sed_cmd->which_match != 0) + break; + next: + if (*line == '\0') break; //maybe (G.regmatch[0].rm_eo ? REG_NOTBOL : 0) instead of unconditional REG_NOTBOL? - } while (*line && regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH); + } while (regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH); /* Copy rest of string into output pipeline */ while (1) { @@ -773,7 +798,7 @@ static sed_cmd_t *branch_to(char *label) { sed_cmd_t *sed_cmd; - for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { + for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) { if (sed_cmd->cmd == ':' && sed_cmd->string && !strcmp(sed_cmd->string, label)) { return sed_cmd; } @@ -949,24 +974,24 @@ static void process_files(void) /* For every line, go through all the commands */ restart: - for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { + for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) { int old_matched, matched; old_matched = sed_cmd->in_match; /* Determine if this command matches this line: */ - //bb_error_msg("match1:%d", sed_cmd->in_match); - //bb_error_msg("match2:%d", (!sed_cmd->beg_line && !sed_cmd->end_line - // && !sed_cmd->beg_match && !sed_cmd->end_match)); - //bb_error_msg("match3:%d", (sed_cmd->beg_line > 0 - // && (sed_cmd->end_line || sed_cmd->end_match - // ? (sed_cmd->beg_line <= linenum) - // : (sed_cmd->beg_line == linenum) - // ) - // ) - //bb_error_msg("match4:%d", (beg_match(sed_cmd, pattern_space))); - //bb_error_msg("match5:%d", (sed_cmd->beg_line == -1 && next_line == NULL)); + dbg("match1:%d", sed_cmd->in_match); + dbg("match2:%d", (!sed_cmd->beg_line && !sed_cmd->end_line + && !sed_cmd->beg_match && !sed_cmd->end_match)); + dbg("match3:%d", (sed_cmd->beg_line > 0 + && (sed_cmd->end_line || sed_cmd->end_match + ? (sed_cmd->beg_line <= linenum) + : (sed_cmd->beg_line == linenum) + ) + )); + dbg("match4:%d", (beg_match(sed_cmd, pattern_space))); + dbg("match5:%d", (sed_cmd->beg_line == -1 && next_line == NULL)); /* Are we continuing a previous multi-line match? */ sed_cmd->in_match = sed_cmd->in_match @@ -977,7 +1002,14 @@ static void process_files(void) || (sed_cmd->beg_line > 0 && (sed_cmd->end_line || sed_cmd->end_match /* note: even if end is numeric and is < linenum too, - * GNU sed matches! We match too */ + * GNU sed matches! We match too, therefore we don't + * check here that linenum <= end. + * Example: + * printf '1\n2\n3\n4\n' | sed -n '1{N;N;d};1p;2,3p;3p;4p' + * first three input lines are deleted; + * 4th line is matched and printed + * by "2,3" (!) and by "4" ranges + */ ? (sed_cmd->beg_line <= linenum) /* N,end */ : (sed_cmd->beg_line == linenum) /* N */ ) @@ -990,16 +1022,14 @@ static void process_files(void) /* Snapshot the value */ matched = sed_cmd->in_match; - //bb_error_msg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d", - //sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum); + dbg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d", + sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum); /* Is this line the end of the current match? */ if (matched) { /* once matched, "n,xxx" range is dead, disabling it */ - if (sed_cmd->beg_line > 0 - && !(option_mask32 & OPT_in_place) /* but not for -i */ - ) { + if (sed_cmd->beg_line > 0) { sed_cmd->beg_line = -2; } sed_cmd->in_match = !( @@ -1013,7 +1043,8 @@ static void process_files(void) /* or does this line matches our last address regex */ || (sed_cmd->end_match && old_matched && (regexec(sed_cmd->end_match, - pattern_space, 0, NULL, 0) == 0)) + pattern_space, 0, NULL, 0) == 0) + ) ); } @@ -1047,8 +1078,8 @@ static void process_files(void) } /* actual sedding */ - //bb_error_msg("pattern_space:'%s' next_line:'%s' cmd:%c", - //pattern_space, next_line, sed_cmd->cmd); + dbg("pattern_space:'%s' next_line:'%s' cmd:%c", + pattern_space, next_line, sed_cmd->cmd); switch (sed_cmd->cmd) { /* Print line number */ @@ -1095,6 +1126,7 @@ static void process_files(void) case 's': if (!do_subst_command(sed_cmd, &pattern_space)) break; + dbg("do_subst_command succeeeded:'%s'", pattern_space); substituted |= 1; /* handle p option */ @@ -1403,11 +1435,12 @@ int sed_main(int argc UNUSED_PARAM, char **argv) add_input_file(stdin); } else { int i; - FILE *file; for (i = 0; argv[i]; i++) { struct stat statbuf; int nonstdoutfd; + FILE *file; + sed_cmd_t *sed_cmd; if (LONE_DASH(argv[i]) && !(opt & OPT_in_place)) { add_input_file(stdin); @@ -1419,11 +1452,13 @@ int sed_main(int argc UNUSED_PARAM, char **argv) status = EXIT_FAILURE; continue; } + add_input_file(file); if (!(opt & OPT_in_place)) { - add_input_file(file); continue; } + /* -i: process each FILE separately: */ + G.outname = xasprintf("%sXXXXXX", argv[i]); nonstdoutfd = xmkstemp(G.outname); G.nonstdout = xfdopen_for_write(nonstdoutfd); @@ -1434,15 +1469,20 @@ int sed_main(int argc UNUSED_PARAM, char **argv) * but GNU sed 4.2.1 does not preserve them either */ fchmod(nonstdoutfd, statbuf.st_mode); fchown(nonstdoutfd, statbuf.st_uid, statbuf.st_gid); - add_input_file(file); + process_files(); fclose(G.nonstdout); - G.nonstdout = stdout; + /* unlink(argv[i]); */ xrename(G.outname, argv[i]); free(G.outname); G.outname = NULL; + + /* Re-enable disabled range matches */ + for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) { + sed_cmd->beg_line = sed_cmd->beg_line_orig; + } } /* Here, to handle "sed 'cmds' nonexistent_file" case we did: * if (G.current_input_file >= G.input_file_count)