* Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
* Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
* Copyright (C) 2002 Matt Kraai
- * Copyright (C) 2003 by Glenn McGrath <bug1@optushome.com.au>
+ * Copyright (C) 2003 by Glenn McGrath <bug1@iinet.net.au>
+ * Copyright (C) 2003,2004 by Rob Landley <rob@landley.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
resulting sed_cmd_t structures are appended to a linked list
(sed_cmd_head/sed_cmd_tail).
- process_file() does actual sedding, reading data lines from an input FILE *
+ add_input_file() adds a FILE * to the list of input files. We need to
+ know them all ahead of time to find the last line for the $ match.
+
+ process_files() does actual sedding, reading data lines from each input FILE *
(which could be stdin) and applying the sed command list (sed_cmd_head) to
each of the resulting lines.
- grouped commands: {cmd1;cmd2}
- transliteration (y/source-chars/dest-chars/)
- pattern space hold space storing / swapping (g, h, x)
- - labels / branching (: label, b, t)
+ - labels / branching (: label, b, t, T)
(Note: Specifying an address (range) to match is *optional*; commands
default to the whole pattern space if no specific address match was
Unsupported features:
- - GNU extensions
+ - most GNU extensions
- and more.
Todo:
#include <stdio.h>
#include <unistd.h> /* for getopt() */
-#include <regex.h>
#include <string.h> /* for strdup() */
#include <errno.h>
#include <ctype.h> /* for isspace() */
#include <stdlib.h>
#include "busybox.h"
+#include "xregex.h"
typedef struct sed_cmd_s {
/* Ordered by alignment requirements: currently 36 bytes on x86 */
/* globals */
/* options */
-static int be_quiet = 0;
+static int be_quiet, in_place, regex_type;
+static FILE *nonstdout;
+static char *outname,*hold_space;
+
+/* List of input files */
+static int input_file_count,current_input_file;
+static FILE **input_file_list;
static const char bad_format_in_subst[] =
"bad format in substitution expression";
-const char *const semicolon_whitespace = "; \n\r\t\v";
+static const char *const semicolon_whitespace = "; \n\r\t\v";
-regmatch_t regmatch[10];
-static regex_t *previous_regex_ptr = NULL;
+static regmatch_t regmatch[10];
+static regex_t *previous_regex_ptr;
/* linked list of sed commands */
static sed_cmd_t sed_cmd_head;
char *string;
struct append_list *next;
};
-struct append_list *append_head=NULL, *append_tail=NULL;
+static struct append_list *append_head=NULL, *append_tail=NULL;
#ifdef CONFIG_FEATURE_CLEAN_UP
static void free_and_close_stuff(void)
free(sed_cmd);
sed_cmd = sed_cmd_next;
}
+
+ if(hold_space) free(hold_space);
+
+ while(current_input_file<input_file_count)
+ fclose(input_file_list[current_input_file++]);
}
#endif
+/* If something bad happens during -i operation, delete temp file */
+
+static void cleanup_outname(void)
+{
+ if(outname) unlink(outname);
+}
+
/* strdup, replacing "\n" with '\n', and "\delimiter" with 'delimiter' */
static void parse_escapes(char *dest, const char *string, int len, char from, char to)
/*
* index_of_next_unescaped_regexp_delim - walks left to right through a string
* beginning at a specified index and returns the index of the next regular
- * expression delimiter (typically a forward * slash ('/')) not preceeded by
+ * expression delimiter (typically a forward * slash ('/')) not preceded by
* a backslash ('\').
*/
static int index_of_next_unescaped_regexp_delim(const char delimiter,
next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
if (next == -1)
bb_error_msg_and_die("unterminated match expression");
-
+
temp=copy_parsing_slashn(pos,next);
*regex = (regex_t *) xmalloc(sizeof(regex_t));
- xregcomp(*regex, temp, REG_NEWLINE);
+ xregcomp(*regex, temp, regex_type|REG_NEWLINE);
free(temp);
/* Move position to next character after last delimiter */
pos+=(next+1);
static int parse_subst_cmd(sed_cmd_t * const sed_cmd, char *substr)
{
- int cflags = 0;
+ int cflags = regex_type;
char *match;
int idx = 0;
{
char *temp;
idx+=parse_file_cmd(sed_cmd,substr+idx,&temp);
-
+
break;
}
/* Ignore case (gnu exension) */
if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
bb_error_msg_and_die
("only a beginning address can be specified for edit commands");
- while(isspace(*cmdstr)) cmdstr++;
+ for(;;) {
+ if(*cmdstr=='\n' || *cmdstr=='\\') {
+ cmdstr++;
+ break;
+ } else if(isspace(*cmdstr)) cmdstr++;
+ else break;
+ }
sed_cmd->string = bb_xstrdup(cmdstr);
parse_escapes(sed_cmd->string,sed_cmd->string,strlen(cmdstr),0,0);
cmdstr += strlen(cmdstr);
if(sed_cmd->cmd=='w')
sed_cmd->file=bb_xfopen(sed_cmd->string,"w");
/* handle branch commands */
- } else if (strchr(":bt", sed_cmd->cmd)) {
+ } else if (strchr(":btT", sed_cmd->cmd)) {
int length;
while(isspace(*cmdstr)) cmdstr++;
/* Parse address+command sets, skipping comment lines. */
-void add_cmd(char *cmdstr)
+static void add_cmd(char *cmdstr)
{
static char *add_cmd_line=NULL;
sed_cmd_t *sed_cmd;
}
}
-struct pipeline {
+/* Append to a string, reallocating memory as necessary. */
+
+static struct pipeline {
char *buf; /* Space to hold string */
int idx; /* Space used */
int len; /* Space allocated */
#define PIPE_GROW 64
-void pipe_putc(char c)
+static void pipe_putc(char c)
{
if(pipeline.idx==pipeline.len) {
pipeline.buf = xrealloc(pipeline.buf, pipeline.len + PIPE_GROW);
do {
int i;
+ /* Work around bug in glibc regexec, demonstrated by:
+ echo " a.b" | busybox sed 's [^ .]* x g'
+ The match_count check is so not to break
+ echo "hi" | busybox sed 's/^/!/g' */
+ if(!regmatch[0].rm_so && !regmatch[0].rm_eo && match_count) {
+ pipe_putc(*(oldline++));
+ continue;
+ }
+
match_count++;
/* If we aren't interested in this match, output old line to
{
/* Output appended lines. */
while(append_head) {
- puts(append_head->string);
+ fprintf(nonstdout,"%s\n",append_head->string);
append_tail=append_head->next;
free(append_head->string);
free(append_head);
append_head=append_tail=NULL;
}
-/* Get next line of input, flushing append buffer and noting if we hit EOF
- * without a newline on the last line.
+static void add_input_file(FILE *file)
+{
+ input_file_list=xrealloc(input_file_list,(input_file_count+1)*sizeof(FILE *));
+ input_file_list[input_file_count++]=file;
+}
+
+/* Get next line of input from input_file_list, flushing append buffer and
+ * noting if we ran out of files without a newline on the last line we read.
*/
-static char *get_next_line(FILE * file, int *no_newline)
+static char *get_next_line(int *no_newline)
{
- char *temp;
+ char *temp=NULL;
int len;
flush_append();
- temp=bb_get_line_from_file(file);
- if(temp) {
- len=strlen(temp);
- if(len && temp[len-1]=='\n') temp[len-1]=0;
- else *no_newline=1;
+ while(current_input_file<input_file_count) {
+ temp=bb_get_line_from_file(input_file_list[current_input_file]);
+ if(temp) {
+ len=strlen(temp);
+ *no_newline=!(len && temp[len-1]=='\n');
+ if(!*no_newline) temp[len-1]=0;
+ break;
+ } else fclose(input_file_list[current_input_file++]);
}
return temp;
fputs(s,file);
if(!no_newline) fputc('\n',file);
+ if(ferror(file)) {
+ fprintf(stderr,"Write failed.\n");
+ exit(4); /* It's what gnu sed exits with... */
+ }
+
return no_newline;
}
-#define sed_puts(s,n) missing_newline=puts_maybe_newline(s,stdout,missing_newline,n)
+#define sed_puts(s,n) missing_newline=puts_maybe_newline(s,nonstdout,missing_newline,n)
-static void process_file(FILE * file)
+static void process_files(void)
{
- char *pattern_space, *next_line, *hold_space=NULL;
- static int linenum = 0, missing_newline=0;
+ char *pattern_space, *next_line;
+ int linenum = 0, missing_newline=0;
int no_newline,next_no_newline=0;
- next_line = get_next_line(file,&next_no_newline);
+ next_line = get_next_line(&next_no_newline);
- /* go through every line in the file */
+ /* go through every line in each file */
for(;;) {
sed_cmd_t *sed_cmd;
int substituted=0;
no_newline=next_no_newline;
/* Read one line in advance so we can act on the last line, the '$' address */
- next_line = get_next_line(file,&next_no_newline);
+ next_line = get_next_line(&next_no_newline);
linenum++;
restart:
/* for every line, go through all the commands */
/* Print line number */
case '=':
- printf("%d\n", linenum);
+ fprintf(nonstdout,"%d\n", linenum);
break;
/* Write the current pattern space up to the first newline */
/* Cut and paste text (replace) */
case 'c':
/* Only triggers on last line of a matching range. */
- if (!sed_cmd->in_match) sed_puts(sed_cmd->string,1);
+ if (!sed_cmd->in_match) sed_puts(sed_cmd->string,0);
goto discard_line;
/* Read file, append contents to output */
free(pattern_space);
pattern_space = next_line;
no_newline=next_no_newline;
- next_line = get_next_line(file,&next_no_newline);
+ next_line = get_next_line(&next_no_newline);
linenum++;
break;
}
pattern_space[len]='\n';
strcpy(pattern_space+len+1, next_line);
no_newline=next_no_newline;
- next_line = get_next_line(file,&next_no_newline);
+ next_line = get_next_line(&next_no_newline);
linenum++;
}
break;
}
- /* Test if substition worked, branch if so. */
+ /* Test/branch if substitution occurred */
case 't':
- if (!substituted) break;
+ if(!substituted) break;
substituted=0;
- /* Fall through */
+ /* Fall through */
+ /* Test/branch if substitution didn't occur */
+ case 'T':
+ if (substituted) break;
+ /* Fall through */
/* Branch to label */
case 'b':
if (!sed_cmd->string) goto discard_commands;
for (j = 0; sed_cmd->string[j]; j += 2) {
if (pattern_space[i] == sed_cmd->string[j]) {
pattern_space[i] = sed_cmd->string[j + 1];
+ break;
}
}
}
}
case 'g': /* Replace pattern space with hold space */
free(pattern_space);
- if (hold_space) {
- pattern_space = strdup(hold_space);
- no_newline=0;
- }
+ pattern_space = strdup(hold_space ? hold_space : "");
break;
case 'G': /* Append newline and hold space to pattern space */
{
extern int sed_main(int argc, char **argv)
{
- int opt, status = EXIT_SUCCESS;
+ int status = EXIT_SUCCESS, opt, getpat = 1;
#ifdef CONFIG_FEATURE_CLEAN_UP
/* destroy command strings on exit */
bb_perror_msg_and_die("atexit");
#endif
+#define LIE_TO_AUTOCONF
+#ifdef LIE_TO_AUTOCONF
+ if(argc==2 && !strcmp(argv[1],"--version")) {
+ printf("This is not GNU sed version 4.0\n");
+ exit(0);
+ }
+#endif
+
/* do normal option parsing */
- while ((opt = getopt(argc, argv, "ne:f:")) > 0) {
+ while ((opt = getopt(argc, argv, "irne:f:")) > 0) {
switch (opt) {
+ case 'i':
+ in_place++;
+ atexit(cleanup_outname);
+ break;
+ case 'r':
+ regex_type|=REG_EXTENDED;
+ break;
case 'n':
be_quiet++;
break;
case 'e':
add_cmd_block(optarg);
+ getpat=0;
break;
case 'f':
{
while ((line = bb_get_chomped_line_from_file(cmdfile))
!= NULL) {
add_cmd(line);
+ getpat=0;
free(line);
}
bb_xprint_and_close_file(cmdfile);
}
}
- /* if we didn't get a pattern from a -e and no command file was specified,
- * argv[optind] should be the pattern. no pattern, no worky */
- if (sed_cmd_head.next == NULL) {
+ /* if we didn't get a pattern from -e or -f, use argv[optind] */
+ if(getpat) {
if (argv[optind] == NULL)
bb_show_usage();
else
/* Flush any unfinished commands. */
add_cmd("");
+ /* By default, we write to stdout */
+ nonstdout=stdout;
+
/* argv[(optind)..(argc-1)] should be names of file to process. If no
* files were specified or '-' was specified, take input from stdin.
* Otherwise, we process all the files specified. */
if (argv[optind] == NULL) {
- process_file(stdin);
+ if(in_place) bb_error_msg_and_die("Filename required for -i");
+ add_input_file(stdin);
+ process_files();
} else {
int i;
FILE *file;
for (i = optind; i < argc; i++) {
- if(!strcmp(argv[i], "-")) {
- process_file(stdin);
+ if(!strcmp(argv[i], "-") && !in_place) {
+ add_input_file(stdin);
+ process_files();
} else {
file = bb_wfopen(argv[i], "r");
if (file) {
- process_file(file);
- fclose(file);
+ if(in_place) {
+ struct stat statbuf;
+ int nonstdoutfd;
+
+ outname=bb_xstrndup(argv[i],strlen(argv[i])+6);
+ strcat(outname,"XXXXXX");
+ if(-1==(nonstdoutfd=mkstemp(outname)))
+ bb_error_msg_and_die("no temp file");
+ nonstdout=fdopen(nonstdoutfd,"w");
+ /* Set permissions of output file */
+ fstat(fileno(file),&statbuf);
+ fchmod(nonstdoutfd,statbuf.st_mode);
+ add_input_file(file);
+ process_files();
+ fclose(nonstdout);
+ nonstdout=stdout;
+ unlink(argv[i]);
+ rename(outname,argv[i]);
+ free(outname);
+ outname=0;
+ } else add_input_file(file);
} else {
status = EXIT_FAILURE;
}
}
}
+ if(input_file_count>current_input_file) process_files();
}
return status;