* add_input_file() adds a FILE* to the list of input files. We need to
* know all input sources ahead of time to find the last line for the $ match.
*
- * process_files() does actual sedding, reading data lines from each input FILE *
+ * process_files() does actual sedding, reading data lines from each input FILE*
* (which could be stdin) and applying the sed command list (sed_cmd_head) to
* each of the resulting lines.
*
*/
//usage:#define sed_trivial_usage
-//usage: "[-efinr] SED_CMD [FILE]..."
+//usage: "[-inr] [-f FILE]... [-e CMD]... [FILE]...\n"
+//usage: "or: sed [-inr] CMD [FILE]..."
//usage:#define sed_full_usage "\n\n"
-//usage: "Options:"
-//usage: "\n -e CMD Add CMD to sed commands to be executed"
+//usage: " -e CMD Add CMD to sed commands to be executed"
//usage: "\n -f FILE Add FILE contents to sed commands to be executed"
//usage: "\n -i Edit files in-place (else sends result to stdout)"
//usage: "\n -n Suppress automatic printing of pattern space"
#include "libbb.h"
#include "xregex.h"
+#if 0
+# define dbg(...) bb_error_msg(__VA_ARGS__)
+#else
+# define dbg(...) ((void)0)
+#endif
+
+
enum {
OPT_in_place = 1 << 0,
};
regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */
regex_t *sub_match; /* For 's/sub_match/string/' */
int beg_line; /* 'sed 1p' 0 == apply commands to all lines */
+ int beg_line_orig; /* copy of the above, needed for -i */
int end_line; /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */
FILE *sw_file; /* File (sw) command writes to, -1 for none. */
regex_t *previous_regex_ptr;
/* linked list of sed commands */
- sed_cmd_t sed_cmd_head, *sed_cmd_tail;
+ sed_cmd_t *sed_cmd_head, **sed_cmd_tail;
/* Linked list of append lines */
llist_t *append_head;
#if ENABLE_FEATURE_CLEAN_UP
static void sed_free_and_close_stuff(void)
{
- sed_cmd_t *sed_cmd = G.sed_cmd_head.next;
+ sed_cmd_t *sed_cmd = G.sed_cmd_head;
llist_free(G.append_head, free);
static char *copy_parsing_escapes(const char *string, int len)
{
+ const char *s;
char *dest = xmalloc(len + 1);
- parse_escapes(dest, string, len, 'n', '\n');
- /* GNU sed also recognizes \t */
- parse_escapes(dest, dest, strlen(dest), 't', '\t');
+ /* sed recognizes \n */
+ /* GNU sed also recognizes \t and \r */
+ for (s = "\nn\tt\rr"; *s; s += 2) {
+ parse_escapes(dest, string, len, s[1], s[0]);
+ string = dest;
+ len = strlen(dest);
+ }
return dest;
}
static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
{
const char *cmdstr_ptr = cmdstr;
- char delimiter;
+ unsigned char delimiter;
int idx = 0;
/* verify that the 's' or 'y' is followed by something. That something
/* save the replacement string */
cmdstr_ptr += idx + 1;
- idx = index_of_next_unescaped_regexp_delim(-delimiter, cmdstr_ptr);
+ idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr);
*replace = copy_parsing_escapes(cmdstr_ptr, idx);
return ((cmdstr_ptr - cmdstr) + idx);
char *temp;
delimiter = '/';
- if (*my_str == '\\') delimiter = *++pos;
+ if (*my_str == '\\')
+ delimiter = *++pos;
next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
temp = copy_parsing_escapes(pos, next);
- *regex = xmalloc(sizeof(regex_t));
+ *regex = xzalloc(sizeof(regex_t));
xregcomp(*regex, temp, G.regex_type|REG_NEWLINE);
free(temp);
/* Move position to next character after last delimiter */
/* compile the match string into a regex */
if (*match != '\0') {
/* If match is empty, we use last regex used at runtime */
- sed_cmd->sub_match = xmalloc(sizeof(regex_t));
+ sed_cmd->sub_match = xzalloc(sizeof(regex_t));
+ dbg("xregcomp('%s',%x)", match, cflags);
xregcomp(sed_cmd->sub_match, match, cflags);
+ dbg("regcomp ok");
}
free(match);
/* first part (if present) is an address: either a '$', a number or a /regex/ */
cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
+ sed_cmd->beg_line_orig = sed_cmd->beg_line;
/* second part (if present) will begin with a comma */
if (*cmdstr == ',') {
cmdstr = parse_cmd_args(sed_cmd, cmdstr);
/* Add the command to the command array */
- G.sed_cmd_tail->next = sed_cmd;
- G.sed_cmd_tail = G.sed_cmd_tail->next;
+ *G.sed_cmd_tail = sed_cmd;
+ G.sed_cmd_tail = &sed_cmd->next;
}
/* If we glued multiple lines together, free the memory. */
G.previous_regex_ptr = current_regex;
/* Find the first match */
- if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0))
+ dbg("matching '%s'", line);
+ if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0)) {
+ dbg("no match");
return 0;
+ }
+ dbg("match");
/* Initialize temporary output buffer. */
G.pipeline.buf = xmalloc(PIPE_GROW);
int i;
/* Work around bug in glibc regexec, demonstrated by:
- echo " a.b" | busybox sed 's [^ .]* x g'
- The match_count check is so not to break
- echo "hi" | busybox sed 's/^/!/g' */
+ * echo " a.b" | busybox sed 's [^ .]* x g'
+ * The match_count check is so not to break
+ * echo "hi" | busybox sed 's/^/!/g'
+ */
if (!G.regmatch[0].rm_so && !G.regmatch[0].rm_eo && match_count) {
pipe_putc(*line++);
- continue;
+ goto next;
}
match_count++;
) {
for (i = 0; i < G.regmatch[0].rm_eo; i++)
pipe_putc(*line++);
- continue;
+ goto next;
}
/* print everything before the match */
altered++;
/* if we're not doing this globally, get out now */
- if (sed_cmd->which_match)
+ if (sed_cmd->which_match != 0)
+ break;
+ next:
+ if (*line == '\0')
break;
//maybe (G.regmatch[0].rm_eo ? REG_NOTBOL : 0) instead of unconditional REG_NOTBOL?
- } while (*line && regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH);
+ } while (regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH);
/* Copy rest of string into output pipeline */
while (1) {
{
sed_cmd_t *sed_cmd;
- for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
+ for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
if (sed_cmd->cmd == ':' && sed_cmd->string && !strcmp(sed_cmd->string, label)) {
return sed_cmd;
}
/* For every line, go through all the commands */
restart:
- for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
+ for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
int old_matched, matched;
old_matched = sed_cmd->in_match;
/* Determine if this command matches this line: */
- //bb_error_msg("match1:%d", sed_cmd->in_match);
- //bb_error_msg("match2:%d", (!sed_cmd->beg_line && !sed_cmd->end_line
- // && !sed_cmd->beg_match && !sed_cmd->end_match));
- //bb_error_msg("match3:%d", (sed_cmd->beg_line > 0
- // && (sed_cmd->end_line || sed_cmd->end_match
- // ? (sed_cmd->beg_line <= linenum)
- // : (sed_cmd->beg_line == linenum)
- // )
- // )
- //bb_error_msg("match4:%d", (beg_match(sed_cmd, pattern_space)));
- //bb_error_msg("match5:%d", (sed_cmd->beg_line == -1 && next_line == NULL));
+ dbg("match1:%d", sed_cmd->in_match);
+ dbg("match2:%d", (!sed_cmd->beg_line && !sed_cmd->end_line
+ && !sed_cmd->beg_match && !sed_cmd->end_match));
+ dbg("match3:%d", (sed_cmd->beg_line > 0
+ && (sed_cmd->end_line || sed_cmd->end_match
+ ? (sed_cmd->beg_line <= linenum)
+ : (sed_cmd->beg_line == linenum)
+ )
+ ));
+ dbg("match4:%d", (beg_match(sed_cmd, pattern_space)));
+ dbg("match5:%d", (sed_cmd->beg_line == -1 && next_line == NULL));
/* Are we continuing a previous multi-line match? */
sed_cmd->in_match = sed_cmd->in_match
|| (sed_cmd->beg_line > 0
&& (sed_cmd->end_line || sed_cmd->end_match
/* note: even if end is numeric and is < linenum too,
- * GNU sed matches! We match too */
+ * GNU sed matches! We match too, therefore we don't
+ * check here that linenum <= end.
+ * Example:
+ * printf '1\n2\n3\n4\n' | sed -n '1{N;N;d};1p;2,3p;3p;4p'
+ * first three input lines are deleted;
+ * 4th line is matched and printed
+ * by "2,3" (!) and by "4" ranges
+ */
? (sed_cmd->beg_line <= linenum) /* N,end */
: (sed_cmd->beg_line == linenum) /* N */
)
/* Snapshot the value */
matched = sed_cmd->in_match;
- //bb_error_msg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d",
- //sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum);
+ dbg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d",
+ sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum);
/* Is this line the end of the current match? */
if (matched) {
/* once matched, "n,xxx" range is dead, disabling it */
- if (sed_cmd->beg_line > 0
- && !(option_mask32 & OPT_in_place) /* but not for -i */
- ) {
+ if (sed_cmd->beg_line > 0) {
sed_cmd->beg_line = -2;
}
sed_cmd->in_match = !(
/* or does this line matches our last address regex */
|| (sed_cmd->end_match && old_matched
&& (regexec(sed_cmd->end_match,
- pattern_space, 0, NULL, 0) == 0))
+ pattern_space, 0, NULL, 0) == 0)
+ )
);
}
}
/* actual sedding */
- //bb_error_msg("pattern_space:'%s' next_line:'%s' cmd:%c",
- //pattern_space, next_line, sed_cmd->cmd);
+ dbg("pattern_space:'%s' next_line:'%s' cmd:%c",
+ pattern_space, next_line, sed_cmd->cmd);
switch (sed_cmd->cmd) {
/* Print line number */
case 's':
if (!do_subst_command(sed_cmd, &pattern_space))
break;
+ dbg("do_subst_command succeeeded:'%s'", pattern_space);
substituted |= 1;
/* handle p option */
add_input_file(stdin);
} else {
int i;
- FILE *file;
for (i = 0; argv[i]; i++) {
struct stat statbuf;
int nonstdoutfd;
+ FILE *file;
+ sed_cmd_t *sed_cmd;
if (LONE_DASH(argv[i]) && !(opt & OPT_in_place)) {
add_input_file(stdin);
status = EXIT_FAILURE;
continue;
}
+ add_input_file(file);
if (!(opt & OPT_in_place)) {
- add_input_file(file);
continue;
}
+ /* -i: process each FILE separately: */
+
G.outname = xasprintf("%sXXXXXX", argv[i]);
nonstdoutfd = xmkstemp(G.outname);
G.nonstdout = xfdopen_for_write(nonstdoutfd);
* but GNU sed 4.2.1 does not preserve them either */
fchmod(nonstdoutfd, statbuf.st_mode);
fchown(nonstdoutfd, statbuf.st_uid, statbuf.st_gid);
- add_input_file(file);
+
process_files();
fclose(G.nonstdout);
-
G.nonstdout = stdout;
+
/* unlink(argv[i]); */
xrename(G.outname, argv[i]);
free(G.outname);
G.outname = NULL;
+
+ /* Re-enable disabled range matches */
+ for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
+ sed_cmd->beg_line = sed_cmd->beg_line_orig;
+ }
}
/* Here, to handle "sed 'cmds' nonexistent_file" case we did:
* if (G.current_input_file >= G.input_file_count)