/*
* sed.c - very minimalist version of sed
*
- * Copyright (C) 1999,2000,2001 by Lineo, inc.
- * Written by Mark Whitley <markw@lineo.com>, <markw@codepoet.org>
+ * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
+ * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
+ * Copyright (C) 2002 Matt Kraai
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
struct sed_cmd {
-
-
- /* GENERAL FIELDS */
- char delimiter; /* The delimiter used to separate regexps */
+ /* Order by alignment requirements */
/* address storage */
- int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */
- int end_line; /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */
regex_t *beg_match; /* sed -e '/match/cmd' */
regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */
- /* the command */
- char cmd; /* p,d,s (add more at your leisure :-) */
-
-
/* SUBSTITUTION COMMAND SPECIFIC FIELDS */
/* sed -e 's/sub_match/replace/' */
regex_t *sub_match;
char *replace;
+
+ /* EDIT COMMAND (a,i,c) SPECIFIC FIELDS */
+ char *editline;
+
+ /* FILE COMMAND (r) SPECIFIC FIELDS */
+ char *filename;
+
+ /* address storage */
+ int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */
+ int end_line; /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */
+ /* SUBSTITUTION COMMAND SPECIFIC FIELDS */
+
unsigned int num_backrefs:4; /* how many back references (\1..\9) */
/* Note: GNU/POSIX sed does not save more than nine backrefs, so
* we only use 4 bits to hold the number */
unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */
unsigned int sub_p:2; /* sed -e 's/foo/bar/p' (print substitution) */
+ /* GENERAL FIELDS */
+ char delimiter; /* The delimiter used to separate regexps */
- /* EDIT COMMAND (a,i,c) SPEICIFIC FIELDS */
-
- char *editline;
-
-
- /* FILE COMMAND (r) SPEICIFIC FIELDS */
+ /* the command */
+ char cmd; /* p,d,s (add more at your leisure :-) */
- char *filename;
+ /* inversion flag */
+ int invert; /* the '!' after the address */
};
/* globals */
/*static char *cur_file = NULL;*/ /* file currently being processed XXX: do I need this? */
-#ifdef BB_FEATURE_CLEAN_UP
-static void destroy_cmd_strs()
+const char * const semicolon_whitespace = "; \n\r\t\v\0";
+
+#ifdef CONFIG_FEATURE_CLEAN_UP
+static void destroy_cmd_strs(void)
{
if (sed_cmds == NULL)
return;
* expression delimiter (typically a forward * slash ('/')) not preceeded by
* a backslash ('\').
*/
-static int index_of_next_unescaped_regexp_delim(struct sed_cmd *sed_cmd, const char *str, int idx)
+static int index_of_next_unescaped_regexp_delim(const struct sed_cmd * const sed_cmd, const char *str, int idx)
{
- for ( ; str[idx]; idx++) {
- if (str[idx] == sed_cmd->delimiter && str[idx-1] != '\\')
+ int bracket = -1;
+ int escaped = 0;
+ char ch;
+
+ for ( ; (ch = str[idx]); idx++) {
+ if (bracket != -1) {
+ if (ch == ']' && !(bracket == idx - 1 ||
+ (bracket == idx - 2 && str[idx-1] == '^')))
+ bracket = -1;
+ } else if (escaped)
+ escaped = 0;
+ else if (ch == '\\')
+ escaped = 1;
+ else if (ch == '[')
+ bracket = idx;
+ else if (ch == sed_cmd->delimiter)
return idx;
}
*/
static int get_address(struct sed_cmd *sed_cmd, const char *str, int *linenum, regex_t **regex)
{
- char *my_str = strdup(str);
+ char *my_str = xstrdup(str);
int idx = 0;
char olddelimiter;
olddelimiter = sed_cmd->delimiter;
*linenum = -1;
idx++;
}
- else if (my_str[idx] == '/') {
+ else if (my_str[idx] == '/' || my_str[idx] == '\\') {
+ if (my_str[idx] == '\\') {
+ my_str[idx] = 0;
+ sed_cmd-> delimiter = my_str[++idx];
+ }
idx = index_of_next_unescaped_regexp_delim(sed_cmd, my_str, ++idx);
if (idx == -1)
error_msg_and_die("unterminated match expression");
return idx;
}
-static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr)
+static int parse_subst_cmd(struct sed_cmd * const sed_cmd, const char *substr)
{
int oldidx, cflags = REG_NEWLINE;
char *match;
break;
default:
/* any whitespace or semicolon trailing after a s/// is ok */
- if (strchr("; \t\v\n\r", substr[idx]))
+ if (strchr(semicolon_whitespace, substr[idx]))
goto out;
/* else */
error_msg_and_die("bad option in substitution expression");
return idx;
}
+static void move_back(char *str, int offset)
+{
+ memmove(str, str + offset, strlen(str + offset) + 1);
+}
+
static int parse_edit_cmd(struct sed_cmd *sed_cmd, const char *editstr)
{
- int idx = 0;
- int slashes_eaten = 0;
- char *ptr; /* shorthand */
+ int i, j;
/*
* the string that gets passed to this function should look like this:
*
*/
- if (editstr[1] != '\\' && (editstr[2] != '\n' || editstr[2] != '\r'))
+ if (editstr[1] != '\\' || (editstr[2] != '\n' && editstr[2] != '\r'))
error_msg_and_die("bad format in edit expression");
/* store the edit line text */
- /* make editline big enough to accomodate the extra '\n' we will tack on
- * to the end */
sed_cmd->editline = xmalloc(strlen(&editstr[3]) + 2);
- strcpy(sed_cmd->editline, &editstr[3]);
- ptr = sed_cmd->editline;
-
- /* now we need to go through * and: s/\\[\r\n]$/\n/g on the edit line */
- while (ptr[idx]) {
- while (ptr[idx] != '\\' || (ptr[idx+1] != '\n' && ptr[idx+1] != '\r')) {
- idx++;
- if (!ptr[idx]) {
- goto out;
- }
- }
- /* move the newline over the '\' before it (effectively eats the '\') */
- memmove(&ptr[idx], &ptr[idx+1], strlen(&ptr[idx+1]));
- ptr[strlen(ptr)-1] = 0;
- slashes_eaten++;
- /* substitue \r for \n if needed */
- if (ptr[idx] == '\r')
- ptr[idx] = '\n';
+ for (i = 3, j = 0; editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL;
+ i++, j++) {
+ if (editstr[i] == '\\' && strchr("\n\r", editstr[i+1]) != NULL) {
+ sed_cmd->editline[j] = '\n';
+ i++;
+ } else
+ sed_cmd->editline[j] = editstr[i];
}
-out:
- /* this accounts for discrepancies between the modified string and the
- * original string passed in to this function */
- idx += slashes_eaten;
-
/* figure out if we need to add a newline */
- if (ptr[idx-1] != '\n') {
- ptr[idx] = '\n';
- idx++;
- }
+ if (sed_cmd->editline[j-1] != '\n')
+ sed_cmd->editline[j++] = '\n';
/* terminate string */
- ptr[idx]= 0;
- /* adjust for opening 2 chars [aic]\ */
- idx += 2;
+ sed_cmd->editline[j] = '\0';
- return idx;
+ return i;
}
/* the first non-whitespace we get is a filename. the filename ends when we
* hit a normal sed command terminator or end of string */
- filenamelen = strcspn(&filecmdstr[idx], "; \n\r\t\v\0");
+ filenamelen = strcspn(&filecmdstr[idx], semicolon_whitespace);
sed_cmd->filename = xmalloc(filenamelen + 1);
safe_strncpy(sed_cmd->filename, &filecmdstr[idx], filenamelen + 1);
}
-static char *parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
+static char *parse_cmd_str(struct sed_cmd * const sed_cmd, const char *const cmdstr)
{
int idx = 0;
idx = get_address(sed_cmd, cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
/* second part (if present) will begin with a comma */
- if (cmdstr[idx] == ',')
- idx += get_address(sed_cmd, &cmdstr[++idx], &sed_cmd->end_line, &sed_cmd->end_match);
+ if (cmdstr[idx] == ',') {
+ idx++;
+ idx += get_address(sed_cmd, &cmdstr[idx], &sed_cmd->end_line, &sed_cmd->end_match);
+ }
+
+ /* skip whitespace before the command */
+ while (isspace(cmdstr[idx]))
+ idx++;
+
+ /* there my be the inversion flag between part2 and part3 */
+ sed_cmd->invert = 0;
+ if (cmdstr[idx] == '!') {
+ sed_cmd->invert = 1;
+ idx++;
+
+ /* skip whitespace before the command */
+ while (isspace(cmdstr[idx]))
+ idx++;
+ }
/* last part (mandatory) will be a command */
if (cmdstr[idx] == '\0')
/* if it was a single-letter command that takes no arguments (such as 'p'
* or 'd') all we need to do is increment the index past that command */
- if (strchr("pd", cmdstr[idx])) {
+ if (strchr("pd", sed_cmd->cmd)) {
idx++;
}
/* handle (s)ubstitution command */
return (char *)&cmdstr[idx];
}
-static void add_cmd_str(const char *cmdstr)
+static void add_cmd_str(const char * const cmdstr)
{
char *mystr = (char *)cmdstr;
do {
/* trim leading whitespace and semicolons */
- memmove(mystr, &mystr[strspn(mystr, "; \n\r\t\v")], strlen(mystr));
+ move_back(mystr, strspn(mystr, semicolon_whitespace));
/* if we ate the whole thing, that means there was just trailing
* whitespace or a final / no-op semicolon. either way, get out */
if (strlen(mystr) == 0)
return;
/* if this is a comment, jump past it and keep going */
if (mystr[0] == '#') {
- mystr = strpbrk(mystr, ";\n\r");
+ mystr = strpbrk(mystr, "\n\r");
continue;
}
/* grow the array */
}
}
+struct pipeline {
+ char *buf;
+ int idx;
+ int len;
+};
+
#define PIPE_MAGIC 0x7f
#define PIPE_GROW 64
-#define pipeputc(c) \
+
+void pipe_putc(struct pipeline *const pipeline, char c)
+{
+ if (pipeline->buf[pipeline->idx] == PIPE_MAGIC) {
+ pipeline->buf =
+ xrealloc(pipeline->buf, pipeline->len + PIPE_GROW);
+ memset(pipeline->buf + pipeline->len, 0, PIPE_GROW);
+ pipeline->len += PIPE_GROW;
+ pipeline->buf[pipeline->len - 1] = PIPE_MAGIC;
+ }
+ pipeline->buf[pipeline->idx++] = (c);
+}
+
+#define pipeputc(c) pipe_putc(pipeline, c)
+
+#if 0
{ if (pipeline[pipeline_idx] == PIPE_MAGIC) { \
pipeline = xrealloc(pipeline, pipeline_len+PIPE_GROW); \
memset(pipeline+pipeline_len, 0, PIPE_GROW); \
pipeline_len += PIPE_GROW; \
pipeline[pipeline_len-1] = PIPE_MAGIC; } \
pipeline[pipeline_idx++] = (c); }
+#endif
static void print_subst_w_backrefs(const char *line, const char *replace,
- regmatch_t *regmatch, char **pipeline_p, int *pipeline_idx_p,
- int *pipeline_len_p, int matches)
+ regmatch_t *regmatch, struct pipeline *const pipeline, int matches)
{
- char *pipeline = *pipeline_p;
- int pipeline_idx = *pipeline_idx_p;
- int pipeline_len = *pipeline_len_p;
int i;
/* go through the replacement string */
else
pipeputc(replace[i]);
}
- *pipeline_p = pipeline;
- *pipeline_idx_p = pipeline_idx;
- *pipeline_len_p = pipeline_len;
}
static int do_subst_command(const struct sed_cmd *sed_cmd, char **line)
{
char *hackline = *line;
- char *pipeline = 0;
- int pipeline_idx = 0;
- int pipeline_len = 0;
+ struct pipeline thepipe = { NULL, 0 , 0};
+ struct pipeline *const pipeline = &thepipe;
int altered = 0;
regmatch_t *regmatch = NULL;
/* allocate more PIPE_GROW bytes
if replaced string is larger than original */
- pipeline_len = strlen(hackline)+PIPE_GROW;
- pipeline = xmalloc(pipeline_len);
- memset(pipeline, 0, pipeline_len);
+ thepipe.len = strlen(hackline)+PIPE_GROW;
+ thepipe.buf = xcalloc(1, thepipe.len);
/* buffer magic */
- pipeline[pipeline_len-1] = PIPE_MAGIC;
+ thepipe.buf[thepipe.len-1] = PIPE_MAGIC;
/* and now, as long as we've got a line to try matching and if we can match
* the search string, we make substitutions */
- while (*hackline && (regexec(sed_cmd->sub_match, hackline,
- sed_cmd->num_backrefs+1, regmatch, 0) == 0) ) {
+ while ((*hackline || !altered) && (regexec(sed_cmd->sub_match, hackline,
+ sed_cmd->num_backrefs+1, regmatch, 0) != REG_NOMATCH) ) {
int i;
/* print everything before the match */
/* then print the substitution string */
print_subst_w_backrefs(hackline, sed_cmd->replace, regmatch,
- &pipeline, &pipeline_idx, &pipeline_len,
- sed_cmd->num_backrefs);
+ pipeline, sed_cmd->num_backrefs);
/* advance past the match */
hackline += regmatch[0].rm_eo;
}
for (; *hackline; hackline++) pipeputc(*hackline);
- if (pipeline[pipeline_idx] == PIPE_MAGIC) pipeline[pipeline_idx] = 0;
+ if (thepipe.buf[thepipe.idx] == PIPE_MAGIC) thepipe.buf[thepipe.idx] = 0;
/* cleanup */
free(regmatch);
free(*line);
- *line = pipeline;
+ *line = thepipe.buf;
return altered;
}
/* for every line, go through all the commands */
for (i = 0; i < ncmds; i++) {
-
+ struct sed_cmd *sed_cmd = &sed_cmds[i];
+ int deleted = 0;
/*
* entry point into sedding...
*/
- if (
+ int matched = (
/* no range necessary */
- (sed_cmds[i].beg_line == 0 && sed_cmds[i].end_line == 0 &&
- sed_cmds[i].beg_match == NULL &&
- sed_cmds[i].end_match == NULL) ||
+ (sed_cmd->beg_line == 0 && sed_cmd->end_line == 0 &&
+ sed_cmd->beg_match == NULL &&
+ sed_cmd->end_match == NULL) ||
/* this line number is the first address we're looking for */
- (sed_cmds[i].beg_line && (sed_cmds[i].beg_line == linenum)) ||
+ (sed_cmd->beg_line && (sed_cmd->beg_line == linenum)) ||
/* this line matches our first address regex */
- (sed_cmds[i].beg_match && (regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0)) ||
+ (sed_cmd->beg_match && (regexec(sed_cmd->beg_match, line, 0, NULL, 0) == 0)) ||
/* we are currently within the beginning & ending address range */
still_in_range
- ) {
+ );
+
+ if (sed_cmd->invert ^ matched) {
/*
* actual sedding
*/
- switch (sed_cmds[i].cmd) {
+ switch (sed_cmd->cmd) {
case 'p':
puts(line);
case 'd':
altered++;
+ deleted = 1;
break;
case 's':
/* if the user specified that they didn't want anything printed (i.e., a -n
* flag and no 'p' flag after the s///), then there's really no point doing
* anything here. */
- if (be_quiet && !sed_cmds[i].sub_p)
+ if (be_quiet && !sed_cmd->sub_p)
break;
/* we print the line once, unless we were told to be quiet */
if (!be_quiet)
- altered |= do_subst_command(&sed_cmds[i], &line);
+ altered |= do_subst_command(sed_cmd, &line);
/* we also print the line if we were given the 'p' flag
* (this is quite possibly the second printing) */
- if (sed_cmds[i].sub_p)
- altered |= do_subst_command(&sed_cmds[i], &line);
+ if (sed_cmd->sub_p)
+ altered |= do_subst_command(sed_cmd, &line);
if (altered && (i+1 >= ncmds || sed_cmds[i+1].cmd != 's'))
puts(line);
case 'a':
puts(line);
- fputs(sed_cmds[i].editline, stdout);
+ fputs(sed_cmd->editline, stdout);
altered++;
break;
case 'i':
- fputs(sed_cmds[i].editline, stdout);
+ fputs(sed_cmd->editline, stdout);
break;
case 'c':
/* single-address case */
- if (sed_cmds[i].end_match == NULL && sed_cmds[i].end_line == 0) {
- fputs(sed_cmds[i].editline, stdout);
- }
+ if ((sed_cmd->end_match == NULL && sed_cmd->end_line == 0)
/* multi-address case */
- else {
- /* matching text */
- if (sed_cmds[i].end_match && (regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0))
- fputs(sed_cmds[i].editline, stdout);
- /* matching line numbers */
- if (sed_cmds[i].end_line > 0 && sed_cmds[i].end_line == linenum)
- fputs(sed_cmds[i].editline, stdout);
+ /* - matching text */
+ || (sed_cmd->end_match && (regexec(sed_cmd->end_match, line, 0, NULL, 0) == 0))
+ /* - matching line numbers */
+ || (sed_cmd->end_line > 0 && sed_cmd->end_line == linenum))
+ {
+ fputs(sed_cmd->editline, stdout);
}
altered++;
case 'r': {
FILE *outfile;
puts(line);
- outfile = fopen(sed_cmds[i].filename, "r");
+ outfile = fopen(sed_cmd->filename, "r");
if (outfile)
print_file(outfile);
/* else if we couldn't open the output file,
* no biggie, just don't print anything */
altered++;
- }
+ }
break;
}
+ }
- /*
- * exit point from sedding...
- */
+ /*
+ * exit point from sedding...
+ */
+ if (matched) {
if (
/* this is a single-address command or... */
- (sed_cmds[i].end_line == 0 && sed_cmds[i].end_match == NULL) || (
+ (sed_cmd->end_line == 0 && sed_cmd->end_match == NULL) || (
/* we were in the middle of our address range (this
* isn't the first time through) and.. */
(still_in_range == 1) && (
/* this line number is the last address we're looking for or... */
- (sed_cmds[i].end_line && (sed_cmds[i].end_line == linenum)) ||
+ (sed_cmd->end_line && (sed_cmd->end_line == linenum)) ||
/* this line matches our last address regex */
- (sed_cmds[i].end_match && (regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0))
+ (sed_cmd->end_match && (regexec(sed_cmd->end_match, line, 0, NULL, 0) == 0))
)
)
) {
still_in_range = 1;
}
}
+
+ if (deleted)
+ break;
}
/* we will print the line unless we were told to be quiet or if the
extern int sed_main(int argc, char **argv)
{
- int opt;
+ int opt, status = EXIT_SUCCESS;
-#ifdef BB_FEATURE_CLEAN_UP
+#ifdef CONFIG_FEATURE_CLEAN_UP
/* destroy command strings on exit */
if (atexit(destroy_cmd_strs) == -1)
perror_msg_and_die("atexit");
int i;
FILE *file;
for (i = optind; i < argc; i++) {
- file = fopen(argv[i], "r");
- if (file == NULL) {
- perror_msg("%s", argv[i]);
- } else {
+ file = wfopen(argv[i], "r");
+ if (file) {
process_file(file);
fclose(file);
- }
+ } else
+ status = EXIT_FAILURE;
}
}
- return 0;
+ return status;
}