* Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
* Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
* Copyright (C) 2002 Matt Kraai
+ * Copyright (C) 2003 by Glenn McGrath <bug1@optushome.com.au>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
- file commands: (r)ead
- backreferences in substitution expressions (\1, \2...\9)
- grouped commands: {cmd1;cmd2}
+ - transliteration (y/source-chars/dest-chars/)
+ - pattern space hold space storing / swapping (g, h, x)
+ - labels / branching (: label, b, t)
(Note: Specifying an address (range) to match is *optional*; commands
default to the whole pattern space if no specific address match was
Unsupported features:
- - transliteration (y/source-chars/dest-chars/) (use 'tr')
- - no pattern space hold space storing / swapping (x, etc.)
- - no labels / branching (: label, b, t, and friends)
+ - GNU extensions
- and lots, lots more.
+ Bugs:
+
+ - Cant subst globally using ^ or $ in regex, eg. "aah" | sed 's/^a/b/g'
+
Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
*/
#include <stdlib.h>
#include "busybox.h"
-/* the spec says label must be at least 8 chars, behavious is unspecified if more than 8 chars */
-#define SED_LABEL_LENGTH 8
-
-/* externs */
-extern void xregcomp(regex_t * preg, const char *regex, int cflags);
-extern int optind; /* in unistd.h */
-extern char *optarg; /* ditto */
-
-/* options */
-static int be_quiet = 0;
-static const char bad_format_in_subst[] =
- "bad format in substitution expression";
-
typedef struct sed_cmd_s {
/* Order by alignment requirements */
regex_t *beg_match; /* sed -e '/match/cmd' */
regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */
+ int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */
+ int end_line; /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */
+
+ /* inversion flag */
+ int invert; /* the '!' after the address */
+// int block_cmd; /* This command is part of a group that has a command address */
+
/* SUBSTITUTION COMMAND SPECIFIC FIELDS */
/* sed -e 's/sub_match/replace/' */
/* FILE COMMAND (r) SPECIFIC FIELDS */
char *filename;
- /* address storage */
- int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */
- int end_line; /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */
/* SUBSTITUTION COMMAND SPECIFIC FIELDS */
unsigned int num_backrefs:4; /* how many back references (\1..\9) */
/* the command */
char cmd; /* p,d,s (add more at your leisure :-) */
- /* inversion flag */
- int invert; /* the '!' after the address */
-
/* Branch commands */
- char label[SED_LABEL_LENGTH + 1];
+ char *label;
/* next command in list (sequential list of specified commands) */
- struct sed_cmd_s *linear;
+ struct sed_cmd_s *next;
} sed_cmd_t;
+
+/* externs */
+extern void xregcomp(regex_t * preg, const char *regex, int cflags);
+extern int optind; /* in unistd.h */
+extern char *optarg; /* ditto */
+
/* globals */
+/* options */
+static int be_quiet = 0;
+static const char bad_format_in_subst[] =
+ "bad format in substitution expression";
+
/* linked list of sed commands */
static sed_cmd_t sed_cmd_head;
static sed_cmd_t *sed_cmd_tail = &sed_cmd_head;
-static sed_cmd_t *block_cmd;
-static int in_block = 0;
const char *const semicolon_whitespace = "; \n\r\t\v\0";
static regex_t *previous_regex_ptr = NULL;
+
#ifdef CONFIG_FEATURE_CLEAN_UP
static void destroy_cmd_strs(void)
{
- sed_cmd_t *sed_cmd = sed_cmd_head.linear;
+ sed_cmd_t *sed_cmd = sed_cmd_head.next;
while (sed_cmd) {
- sed_cmd_t *sed_cmd_next = sed_cmd->linear;
+ sed_cmd_t *sed_cmd_next = sed_cmd->next;
if (sed_cmd->beg_match) {
regfree(sed_cmd->beg_match);
* a backslash ('\').
*/
static int index_of_next_unescaped_regexp_delim(const char delimiter,
- const char *str)
+ const char *str)
{
int bracket = -1;
int escaped = 0;
for (; (ch = str[idx]); idx++) {
if (bracket != -1) {
- if (ch == ']' && !(bracket == idx - 1 ||
- (bracket == idx - 2 && str[idx - 1] == '^')))
+ if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2
+ && str[idx - 1] == '^')))
bracket = -1;
} else if (escaped)
escaped = 0;
/* store the edit line text */
sed_cmd->editline = xmalloc(strlen(&editstr[2]) + 2);
for (i = 2, j = 0;
- editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL; i++, j++) {
+ editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL; i++, j++) {
if ((editstr[i] == '\\') && strchr("\n\r", editstr[i + 1]) != NULL) {
sed_cmd->editline[j] = '\n';
i++;
filenamelen = strcspn(&filecmdstr[idx], semicolon_whitespace);
sed_cmd->filename = xmalloc(filenamelen + 1);
safe_strncpy(sed_cmd->filename, &filecmdstr[idx], filenamelen + 1);
-
return idx + filenamelen;
}
/*
* Process the commands arguments
*/
-static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr)
+static char *parse_cmd_str(sed_cmd_t * sed_cmd, char *cmdstr)
{
/* handle (s)ubstitution command */
if (sed_cmd->cmd == 's') {
int length;
cmdstr += strspn(cmdstr, " ");
- length = strcspn(cmdstr, "; ");
- if (length > SED_LABEL_LENGTH) {
- length = SED_LABEL_LENGTH;
- }
- strncpy(sed_cmd->label, cmdstr, length);
+ length = strcspn(cmdstr, "; \n");
+ sed_cmd->label = strndup(cmdstr, length);
cmdstr += length;
}
/* translation command */
/* if it wasnt a single-letter command that takes no arguments
* then it must be an invalid command.
*/
- else if (strchr("dghnNpPqx=", sed_cmd->cmd) == 0) {
+ else if (strchr("dgGhHnNpPqx={}", sed_cmd->cmd) == 0) {
bb_error_msg_and_die("Unsupported command %c", sed_cmd->cmd);
}
return (strpbrk(cmdstr, "\n\r"));
}
- /* Test for end of block */
- if (*cmdstr == '}') {
- in_block = 0;
- cmdstr++;
- return (cmdstr);
- }
-
/* parse the command
* format is: [addr][,addr]cmd
* |----||-----||-|
idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
if (idx == 0) {
bb_error_msg_and_die("get_address: no address found in string\n"
- "\t(you probably didn't check the string you passed me)");
+ "\t(you probably didn't check the string you passed me)");
}
cmdstr += idx;
}
if (*cmdstr == '\0')
bb_error_msg_and_die("missing command");
- /* This is the start of a block of commands */
- if (*cmdstr == '{') {
- if (in_block != 0) {
- bb_error_msg_and_die("cant handle sub-blocks");
- }
- in_block = 1;
- block_cmd = sed_cmd;
-
- return (cmdstr + 1);
- }
-
sed_cmd->cmd = *cmdstr;
cmdstr++;
- if (in_block == 1) {
- sed_cmd->beg_match = block_cmd->beg_match;
- sed_cmd->end_match = block_cmd->end_match;
- sed_cmd->beg_line = block_cmd->beg_line;
- sed_cmd->end_line = block_cmd->end_line;
- sed_cmd->invert = block_cmd->invert;
- }
-
cmdstr = parse_cmd_str(sed_cmd, cmdstr);
/* Add the command to the command array */
- sed_cmd_tail->linear = sed_cmd;
- sed_cmd_tail = sed_cmd_tail->linear;
+ sed_cmd_tail->next = sed_cmd;
+ sed_cmd_tail = sed_cmd_tail->next;
return (cmdstr);
}
while ((line = bb_get_line_from_file(cmdfile)) != NULL) {
/* if a line ends with '\' it needs the next line appended to it */
while (((e = last_char_is(line, '\n')) != NULL)
- && (e > line) && (e[-1] == '\\')
- && ((nextline = bb_get_line_from_file(cmdfile)) != NULL)) {
+ && (e > line) && (e[-1] == '\\')
+ && ((nextline = bb_get_line_from_file(cmdfile)) != NULL)) {
line = xrealloc(line, (e - line) + 1 + strlen(nextline) + 1);
strcat(line, nextline);
free(nextline);
#endif
static void print_subst_w_backrefs(const char *line, const char *replace,
- regmatch_t * regmatch,
- struct pipeline *const pipeline,
- int matches)
+ regmatch_t * regmatch, struct pipeline *const pipeline, int matches)
{
int i;
/* print out the text held in regmatch[backref] */
if (backref <= matches && regmatch[backref].rm_so != -1)
for (j = regmatch[backref].rm_so; j < regmatch[backref].rm_eo;
- j++)
+ j++)
pipeputc(line[j]);
}
* fortunately, regmatch[0] contains the indicies to the whole matched
* expression (kinda seems like it was designed for just such a
* purpose...) */
- else if (replace[i] == '&' && replace[i - 1] != '\\') {
+ else if (replace[i] == '&') {
int j;
for (j = regmatch[0].rm_so; j < regmatch[0].rm_eo; j++)
/* and now, as long as we've got a line to try matching and if we can match
* the search string, we make substitutions */
- while ((*hackline || !altered) && (regexec(current_regex, hackline,
- sed_cmd->num_backrefs + 1,
- regmatch, 0) != REG_NOMATCH)) {
+ while ((*hackline || !altered)
+ && (regexec(current_regex, hackline, sed_cmd->num_backrefs + 1,
+ regmatch, 0) != REG_NOMATCH)) {
int i;
/* print everything before the match */
pipeputc(hackline[i]);
/* then print the substitution string */
- print_subst_w_backrefs(hackline, sed_cmd->replace, regmatch,
- pipeline, sed_cmd->num_backrefs);
+ print_subst_w_backrefs(hackline, sed_cmd->replace, regmatch, pipeline,
+ sed_cmd->num_backrefs);
/* advance past the match */
hackline += regmatch[0].rm_eo;
{
sed_cmd_t *sed_cmd;
- for (sed_cmd = sed_cmd_head.linear; sed_cmd; sed_cmd = sed_cmd->linear) {
- if (strcmp(sed_cmd->label, label) == 0) {
+ for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
+ if ((sed_cmd->label) && (strcmp(sed_cmd->label, label) == 0)) {
break;
}
}
if (pattern_space == NULL) {
return;
}
-
+
/* go through every line in the file */
do {
char *next_line;
sed_cmd_t *sed_cmd;
int substituted = 0;
+ /* This enables whole blocks of commands to be mask'ed out if the lead address doesnt match */
+ int block_mask = 1;
/* Read one line in advance so we can act on the last line, the '$' address */
next_line = bb_get_chomped_line_from_file(file);
force_print = 0;
/* for every line, go through all the commands */
- for (sed_cmd = sed_cmd_head.linear; sed_cmd;
- sed_cmd = sed_cmd->linear) {
+ for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
int deleted = 0;
/*
* entry point into sedding...
*/
int matched = (
- /* no range necessary */
- (sed_cmd->beg_line == 0
- && sed_cmd->end_line == 0
- && sed_cmd->beg_match == NULL
- && sed_cmd->end_match == NULL) ||
- /* this line number is the first address we're looking for */
- (sed_cmd->beg_line
- && (sed_cmd->beg_line == linenum)) ||
- /* this line matches our first address regex */
- (sed_cmd->beg_match
- &&
- (regexec
- (sed_cmd->beg_match, pattern_space, 0, NULL,
- 0) == 0)) ||
- /* we are currently within the beginning & ending address range */
- still_in_range || ((sed_cmd->beg_line == -1)
- && (next_line == NULL))
+ /* no range necessary */
+ (sed_cmd->beg_line == 0 && sed_cmd->end_line == 0
+ && sed_cmd->beg_match == NULL
+ && sed_cmd->end_match == NULL) ||
+ /* this line number is the first address we're looking for */
+ (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum)) ||
+ /* this line matches our first address regex */
+ (sed_cmd->beg_match
+ && (regexec(sed_cmd->beg_match, pattern_space, 0, NULL,
+ 0) == 0)) ||
+ /* we are currently within the beginning & ending address range */
+ still_in_range || ((sed_cmd->beg_line == -1)
+ && (next_line == NULL))
);
- if (sed_cmd->invert ^ matched) {
+ if (sed_cmd->cmd == '{') {
+ block_mask = block_mask & matched;
+ }
+// matched &= block_mask;
+
+ if (sed_cmd->invert ^ (matched & block_mask)) {
/* Update last used regex incase a blank substitute BRE is found */
if (sed_cmd->beg_match) {
previous_regex_ptr = sed_cmd->beg_match;
pattern_space =
xrealloc(pattern_space,
- strlen(pattern_space) + 2);
+ strlen(pattern_space) + 2);
tmp = strchr(pattern_space + offset, '\n');
memmove(tmp + 1, tmp, strlen(tmp) + 1);
tmp[0] = '\\';
}
#endif
altered |= substituted;
- if (!be_quiet && altered && ((sed_cmd->linear == NULL)
- || (sed_cmd->linear->cmd !=
- 's'))) {
+ if (!be_quiet && altered && ((sed_cmd->next == NULL)
+ || (sed_cmd->next->cmd != 's'))) {
force_print = 1;
}
/* multi-address case */
/* - matching text */
|| (sed_cmd->end_match
- &&
- (regexec
- (sed_cmd->end_match, pattern_space, 0, NULL,
- 0) == 0))
+ && (regexec(sed_cmd->end_match, pattern_space, 0,
+ NULL, 0) == 0))
/* - matching line numbers */
|| (sed_cmd->end_line > 0
&& sed_cmd->end_line == linenum)) {
case 'r':{
FILE *outfile;
- puts(pattern_space);
outfile = fopen(sed_cmd->filename, "r");
- if (outfile)
+ if (outfile) {
+ char *line;
+
+ while ((line =
+ bb_get_chomped_line_from_file(outfile)) !=
+ NULL) {
+ pattern_space =
+ xrealloc(pattern_space,
+ strlen(line) + strlen(pattern_space) + 2);
+ strcat(pattern_space, "\n");
+ strcat(pattern_space, line);
+ }
bb_xprint_and_close_file(outfile);
- /* else if we couldn't open the output file,
- * no biggie, just don't print anything */
- altered++;
+ }
+
}
break;
case 'q': /* Branch to end of script and quit */
if (next_line) {
pattern_space =
realloc(pattern_space,
- strlen(pattern_space) +
- strlen(next_line) + 2);
+ strlen(pattern_space) + strlen(next_line) + 2);
strcat(pattern_space, "\n");
strcat(pattern_space, next_line);
next_line = bb_get_chomped_line_from_file(file);
free(pattern_space);
pattern_space = strdup(hold_space);
break;
+ case 'G': { /* Append newline and hold space to pattern space */
+ int pattern_space_size = 0;
+ if (pattern_space) {
+ pattern_space_size = strlen(pattern_space);
+ }
+ pattern_space = xrealloc(pattern_space, pattern_space_size + strlen(hold_space) + 2);
+ strcat(pattern_space, "\n");
+ strcat(pattern_space, hold_space);
+ break;
+ }
case 'h': /* Replace hold space with pattern space */
free(hold_space);
hold_space = strdup(pattern_space);
break;
+ case 'H': { /* Append newline and pattern space to hold space */
+ int hold_space_size = 0;
+ if (hold_space) {
+ hold_space_size = strlen(hold_space);
+ }
+ hold_space = xrealloc(hold_space, hold_space_size + strlen(pattern_space) + 2);
+ strcat(hold_space, "\n");
+ strcat(hold_space, pattern_space);
+ break;
+ }
case 'x':{
/* Swap hold and pattern space */
- char *tmp;
-
- tmp = pattern_space;
+ char *tmp = pattern_space;
pattern_space = hold_space;
hold_space = tmp;
+ break;
}
}
}
*/
if (matched) {
if (
- /* this is a single-address command or... */
- (sed_cmd->end_line == 0 && sed_cmd->end_match == NULL)
- || (
- /* If only one address */
- /* we were in the middle of our address range (this
- * isn't the first time through) and.. */
- (still_in_range == 1) && (
- /* this line number is the last address we're looking for or... */
- (sed_cmd->
- end_line
- && (sed_cmd->
- end_line ==
- linenum))
- ||
- /* this line matches our last address regex */
- (sed_cmd->
- end_match
- &&
- (regexec
- (sed_cmd->
- end_match,
- pattern_space,
- 0, NULL,
- 0) == 0))
- )
- )
- ) {
+ /* this is a single-address command or... */
+ (sed_cmd->end_line == 0 && sed_cmd->end_match == NULL)
+ /* If only one address */
+ /* we were in the middle of our address range (this
+ * isn't the first time through) and.. */
+ || ((still_in_range == 1)
+ /* this line number is the last address we're looking for or... */
+ && ((sed_cmd->end_line > 0
+ && (sed_cmd->end_line == linenum))
+ /* this line matches our last address regex */
+ || (sed_cmd->end_match
+ && (regexec(sed_cmd->end_match, pattern_space,
+ 0, NULL, 0) == 0))))) {
/* we're out of our address range */
still_in_range = 0;
- }
-
- /* didn't hit the exit? then we're still in the middle of an address range */
- else {
+ } else {
+ /* didn't hit the exit? then we're still in the middle of an address range */
still_in_range = 1;
}
}
+ if (sed_cmd->cmd == '}') {
+ block_mask = 1;
+ }
+
if (deleted)
break;
}
/* if we didn't get a pattern from a -e and no command file was specified,
* argv[optind] should be the pattern. no pattern, no worky */
- if (sed_cmd_head.linear == NULL) {
+ if (sed_cmd_head.next == NULL) {
if (argv[optind] == NULL)
bb_show_usage();
else {