Applied patch from Matt Kraai which does the following:
authorMark Whitley <markw@lineo.com>
Tue, 11 Jul 2000 16:53:56 +0000 (16:53 -0000)
committerMark Whitley <markw@lineo.com>
Tue, 11 Jul 2000 16:53:56 +0000 (16:53 -0000)
 - adds case-insensitive matching in sed s/// epxressions
 - consolodates common regcomp code in grep & sed into bb_regcomp and put in
   utility.c
 - cleans up a bunch of cruft

editors/sed.c
findutils/grep.c
grep.c
internal.h
sed.c
utility.c

index 329f5ae8d401517ee3ef276eeb0f2e71ef298d40..2fb243fb94ae4e09861b02aef6d868ed010a292e 100644 (file)
@@ -97,50 +97,6 @@ static const char sed_usage[] =
 #endif
        ;
 
-#if 0
-/* Nuke from here { */
-
-
-/* get_line_from_file() - This function reads an entire line from a text file
- *  * up to a newline. It returns a malloc'ed char * which must be stored and
- *   * free'ed  by the caller. */
-extern char *get_line_from_file(FILE *file)
-{
-       static const int GROWBY = 80; /* how large we will grow strings by */
-
-       int ch; 
-       int idx = 0;
-       char *linebuf = NULL;
-       int linebufsz = 0;
-
-       while (1) {
-               ch = fgetc(file);
-               if (ch == EOF)
-                       break;
-               /* grow the line buffer as necessary */
-               if (idx > linebufsz-2)
-                       linebuf = realloc(linebuf, linebufsz += GROWBY);
-               linebuf[idx++] = (char)ch;
-               if ((char)ch == '\n')
-                       break;
-       }
-
-       if (idx == 0)
-               return NULL;
-
-       linebuf[idx] = 0;
-       return linebuf;
-}
-
-static void usage(const char *string)
-{
-       printf("usage: %s\n", string);
-       exit(0);
-}
-
-/* } to here when we integrate this into busybox */
-#endif
-
 static void destroy_cmd_strs()
 {
        if (sed_cmds == NULL)
@@ -246,29 +202,15 @@ static int get_address(const char *str, int *line, regex_t **regex)
                idx++;
        }
        else if (my_str[idx] == '/') {
-               int ret;
                idx = index_of_next_unescaped_slash(idx, my_str);
-               if (idx == -1) {
-                       free(my_str);
+               if (idx == -1)
                        exit_sed(1, "sed: unterminated match expression\n");
-               }
-               my_str[idx] = 0; /* shave off the trailing '/' */
-               my_str++; /* shave off the leading '/' */
-               *regex = (regex_t *)malloc(sizeof(regex_t));
-               if ((ret = regcomp(*regex, my_str, 0)) != 0) {
-                       /* error handling if regular expression couldn't be compiled */
-                       int errmsgsz = regerror(ret, *regex, NULL, 0);
-                       char *errmsg = malloc(errmsgsz);
-                       if (errmsg == NULL) {
-                               exit_sed(1, "sed: memory error\n");
-                       }
-                       regerror(ret, *regex, errmsg, errmsgsz);
-                       fprintf(stderr, "sed: %s\n", errmsg);
-                       free(errmsg);
+               my_str[idx] = '\0';
+               *regex = (regex_t *)xmalloc(sizeof(regex_t));
+               if (bb_regcomp(*regex, my_str+1, REG_NEWLINE) != 0) {
+                       free(my_str);
                        exit_sed(1, NULL);
                }
-               my_str--; /* move my_str back so free() (below) won't barf */
-               idx++; /* advance idx one past the end of the /match/ */
        }
        else {
                fprintf(stderr, "sed.c:get_address: no address found in string\n");
@@ -280,6 +222,15 @@ static int get_address(const char *str, int *line, regex_t **regex)
        return idx;
 }
 
+static char *strdup_substr(const char *str, int start, int end)
+{
+       int size = end - start + 1;
+       char *newstr = xmalloc(size);
+       memcpy(newstr, str+start, size-1);
+       newstr[size-1] = '\0';
+       return newstr;
+}
+
 static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
 {
        int idx = 0;
@@ -306,10 +257,11 @@ static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
        sed_cmd->cmd = cmdstr[idx];
        /* special-case handling for 's' */
        if (sed_cmd->cmd == 's') {
-               int oldidx;
+               int oldidx, cflags = REG_NEWLINE;
+               char *match;
                /* format for substitution is:
-                *    s/match/replace/g
-                *    |               |
+                *    s/match/replace/gI
+                *    |               ||
                 *    mandatory       optional
                 */
 
@@ -317,19 +269,41 @@ static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
                if (cmdstr[++idx] != '/')
                        exit_sed(1, "sed: bad format in substitution expression\n");
 
-               /* get the substitution part */
-               idx += get_address(&cmdstr[idx], NULL, &sed_cmd->sub_match);
+               /* save the match string */
+               oldidx = idx+1;
+               idx = index_of_next_unescaped_slash(idx, cmdstr);
+               if (idx == -1)
+                       exit_sed(1, "sed: bad format in substitution expression\n");
+               match = strdup_substr(cmdstr, oldidx, idx);
 
-               /* get the replacement part */
-               oldidx = idx;
+               /* save the replacement string */
+               oldidx = idx+1;
                idx = index_of_next_unescaped_slash(idx, cmdstr);
-               sed_cmd->replace = (char *)malloc(idx - oldidx + 1);
-               strncpy(sed_cmd->replace, &cmdstr[oldidx], idx - oldidx);
-               sed_cmd->replace[idx - oldidx] = 0;
+               if (idx == -1)
+                       exit_sed(1, "sed: bad format in substitution expression\n");
+               sed_cmd->replace = strdup_substr(cmdstr, oldidx, idx);
 
-               /* store the 'g' if present */
-               if (cmdstr[++idx] == 'g')
-                       sed_cmd->sub_g = 1;
+               /* process the flags */
+               while (cmdstr[++idx]) {
+                       switch (cmdstr[idx]) {
+                       case 'g':
+                               sed_cmd->sub_g = 1;
+                               break;
+                       case 'I':
+                               cflags |= REG_ICASE;
+                               break;
+                       default:
+                               exit_sed(1, "sed: bad option in substitution expression\n");
+                       }
+               }
+                       
+               /* compile the regex */
+               sed_cmd->sub_match = (regex_t *)xmalloc(sizeof(regex_t));
+               if (bb_regcomp(sed_cmd->sub_match, match, cflags) != 0) {
+                       free(match);
+                       exit_sed(1, NULL);
+               }
+               free(match);
        }
 }
 
@@ -553,10 +527,3 @@ extern int sed_main(int argc, char **argv)
        /* not reached */
        return 0;
 }
-
-#ifdef TEST_SED
-int main(int argc, char **argv)
-{
-       return sed_main(argc, argv);
-}
-#endif
index 8d2c915be55176e3403c6540d8d0017f6284cbda..dec365f05be675baf00d34a01e36db85dc91dba0 100644 (file)
@@ -104,7 +104,6 @@ extern int grep_main(int argc, char **argv)
 {
        int opt;
        int reflags;
-       int ret;
 
        /* do special-case option parsing */
        if (argv[1] && (strcmp(argv[1], "--help") == 0))
@@ -147,20 +146,8 @@ extern int grep_main(int argc, char **argv)
        reflags = REG_NOSUB | REG_NEWLINE; 
        if (ignore_case)
                reflags |= REG_ICASE;
-       if ((ret = regcomp(&regex, argv[optind], reflags)) != 0) {
-               int errmsgsz = regerror(ret, &regex, NULL, 0);
-               char *errmsg = malloc(errmsgsz);
-               if (errmsg == NULL) {
-                       fprintf(stderr, "grep: memory error\n");
-                       regfree(&regex);
-                       exit(1);
-               }
-               regerror(ret, &regex, errmsg, errmsgsz);
-               fprintf(stderr, "grep: %s\n", errmsg);
-               free(errmsg);
-               regfree(&regex);
+       if (bb_regcomp(&regex, argv[optind], reflags) != 0)
                exit(1);
-       }
 
        /* argv[(optind+1)..(argc-1)] should be names of file to grep through. If
         * there is more than one file to grep, we will print the filenames */
diff --git a/grep.c b/grep.c
index 8d2c915be55176e3403c6540d8d0017f6284cbda..dec365f05be675baf00d34a01e36db85dc91dba0 100644 (file)
--- a/grep.c
+++ b/grep.c
@@ -104,7 +104,6 @@ extern int grep_main(int argc, char **argv)
 {
        int opt;
        int reflags;
-       int ret;
 
        /* do special-case option parsing */
        if (argv[1] && (strcmp(argv[1], "--help") == 0))
@@ -147,20 +146,8 @@ extern int grep_main(int argc, char **argv)
        reflags = REG_NOSUB | REG_NEWLINE; 
        if (ignore_case)
                reflags |= REG_ICASE;
-       if ((ret = regcomp(&regex, argv[optind], reflags)) != 0) {
-               int errmsgsz = regerror(ret, &regex, NULL, 0);
-               char *errmsg = malloc(errmsgsz);
-               if (errmsg == NULL) {
-                       fprintf(stderr, "grep: memory error\n");
-                       regfree(&regex);
-                       exit(1);
-               }
-               regerror(ret, &regex, errmsg, errmsgsz);
-               fprintf(stderr, "grep: %s\n", errmsg);
-               free(errmsg);
-               regfree(&regex);
+       if (bb_regcomp(&regex, argv[optind], reflags) != 0)
                exit(1);
-       }
 
        /* argv[(optind+1)..(argc-1)] should be names of file to grep through. If
         * there is more than one file to grep, we will print the filenames */
index 5864c47acf5a33398f4bf6d24a9927d600296ba4..4ef15325e37c6fadca1692db1ae87a907f7832be 100644 (file)
@@ -34,7 +34,7 @@
 #include <sys/stat.h>
 #include <sys/param.h>
 #include <mntent.h>
-
+#include <regex.h>
 
 /* Some useful definitions */
 #define FALSE   ((int) 1)
@@ -259,6 +259,7 @@ extern int find_real_root_device_name(char* name);
 extern char *get_line_from_file(FILE *file);
 extern char process_escape_sequence(char **ptr);
 extern char *get_last_path_component(char *path);
+extern int bb_regcomp(regex_t *preg, const char *regex, int cflags);
 
 extern void *xmalloc (size_t size);
 extern char *xstrdup (const char *s);
diff --git a/sed.c b/sed.c
index 329f5ae8d401517ee3ef276eeb0f2e71ef298d40..2fb243fb94ae4e09861b02aef6d868ed010a292e 100644 (file)
--- a/sed.c
+++ b/sed.c
@@ -97,50 +97,6 @@ static const char sed_usage[] =
 #endif
        ;
 
-#if 0
-/* Nuke from here { */
-
-
-/* get_line_from_file() - This function reads an entire line from a text file
- *  * up to a newline. It returns a malloc'ed char * which must be stored and
- *   * free'ed  by the caller. */
-extern char *get_line_from_file(FILE *file)
-{
-       static const int GROWBY = 80; /* how large we will grow strings by */
-
-       int ch; 
-       int idx = 0;
-       char *linebuf = NULL;
-       int linebufsz = 0;
-
-       while (1) {
-               ch = fgetc(file);
-               if (ch == EOF)
-                       break;
-               /* grow the line buffer as necessary */
-               if (idx > linebufsz-2)
-                       linebuf = realloc(linebuf, linebufsz += GROWBY);
-               linebuf[idx++] = (char)ch;
-               if ((char)ch == '\n')
-                       break;
-       }
-
-       if (idx == 0)
-               return NULL;
-
-       linebuf[idx] = 0;
-       return linebuf;
-}
-
-static void usage(const char *string)
-{
-       printf("usage: %s\n", string);
-       exit(0);
-}
-
-/* } to here when we integrate this into busybox */
-#endif
-
 static void destroy_cmd_strs()
 {
        if (sed_cmds == NULL)
@@ -246,29 +202,15 @@ static int get_address(const char *str, int *line, regex_t **regex)
                idx++;
        }
        else if (my_str[idx] == '/') {
-               int ret;
                idx = index_of_next_unescaped_slash(idx, my_str);
-               if (idx == -1) {
-                       free(my_str);
+               if (idx == -1)
                        exit_sed(1, "sed: unterminated match expression\n");
-               }
-               my_str[idx] = 0; /* shave off the trailing '/' */
-               my_str++; /* shave off the leading '/' */
-               *regex = (regex_t *)malloc(sizeof(regex_t));
-               if ((ret = regcomp(*regex, my_str, 0)) != 0) {
-                       /* error handling if regular expression couldn't be compiled */
-                       int errmsgsz = regerror(ret, *regex, NULL, 0);
-                       char *errmsg = malloc(errmsgsz);
-                       if (errmsg == NULL) {
-                               exit_sed(1, "sed: memory error\n");
-                       }
-                       regerror(ret, *regex, errmsg, errmsgsz);
-                       fprintf(stderr, "sed: %s\n", errmsg);
-                       free(errmsg);
+               my_str[idx] = '\0';
+               *regex = (regex_t *)xmalloc(sizeof(regex_t));
+               if (bb_regcomp(*regex, my_str+1, REG_NEWLINE) != 0) {
+                       free(my_str);
                        exit_sed(1, NULL);
                }
-               my_str--; /* move my_str back so free() (below) won't barf */
-               idx++; /* advance idx one past the end of the /match/ */
        }
        else {
                fprintf(stderr, "sed.c:get_address: no address found in string\n");
@@ -280,6 +222,15 @@ static int get_address(const char *str, int *line, regex_t **regex)
        return idx;
 }
 
+static char *strdup_substr(const char *str, int start, int end)
+{
+       int size = end - start + 1;
+       char *newstr = xmalloc(size);
+       memcpy(newstr, str+start, size-1);
+       newstr[size-1] = '\0';
+       return newstr;
+}
+
 static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
 {
        int idx = 0;
@@ -306,10 +257,11 @@ static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
        sed_cmd->cmd = cmdstr[idx];
        /* special-case handling for 's' */
        if (sed_cmd->cmd == 's') {
-               int oldidx;
+               int oldidx, cflags = REG_NEWLINE;
+               char *match;
                /* format for substitution is:
-                *    s/match/replace/g
-                *    |               |
+                *    s/match/replace/gI
+                *    |               ||
                 *    mandatory       optional
                 */
 
@@ -317,19 +269,41 @@ static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
                if (cmdstr[++idx] != '/')
                        exit_sed(1, "sed: bad format in substitution expression\n");
 
-               /* get the substitution part */
-               idx += get_address(&cmdstr[idx], NULL, &sed_cmd->sub_match);
+               /* save the match string */
+               oldidx = idx+1;
+               idx = index_of_next_unescaped_slash(idx, cmdstr);
+               if (idx == -1)
+                       exit_sed(1, "sed: bad format in substitution expression\n");
+               match = strdup_substr(cmdstr, oldidx, idx);
 
-               /* get the replacement part */
-               oldidx = idx;
+               /* save the replacement string */
+               oldidx = idx+1;
                idx = index_of_next_unescaped_slash(idx, cmdstr);
-               sed_cmd->replace = (char *)malloc(idx - oldidx + 1);
-               strncpy(sed_cmd->replace, &cmdstr[oldidx], idx - oldidx);
-               sed_cmd->replace[idx - oldidx] = 0;
+               if (idx == -1)
+                       exit_sed(1, "sed: bad format in substitution expression\n");
+               sed_cmd->replace = strdup_substr(cmdstr, oldidx, idx);
 
-               /* store the 'g' if present */
-               if (cmdstr[++idx] == 'g')
-                       sed_cmd->sub_g = 1;
+               /* process the flags */
+               while (cmdstr[++idx]) {
+                       switch (cmdstr[idx]) {
+                       case 'g':
+                               sed_cmd->sub_g = 1;
+                               break;
+                       case 'I':
+                               cflags |= REG_ICASE;
+                               break;
+                       default:
+                               exit_sed(1, "sed: bad option in substitution expression\n");
+                       }
+               }
+                       
+               /* compile the regex */
+               sed_cmd->sub_match = (regex_t *)xmalloc(sizeof(regex_t));
+               if (bb_regcomp(sed_cmd->sub_match, match, cflags) != 0) {
+                       free(match);
+                       exit_sed(1, NULL);
+               }
+               free(match);
        }
 }
 
@@ -553,10 +527,3 @@ extern int sed_main(int argc, char **argv)
        /* not reached */
        return 0;
 }
-
-#ifdef TEST_SED
-int main(int argc, char **argv)
-{
-       return sed_main(argc, argv);
-}
-#endif
index 46907e46a1400877782f46919d2742321d50734c..cbbc02f989fb7ae7c9ce4be0a0d8b7cbda037b2a 100644 (file)
--- a/utility.c
+++ b/utility.c
@@ -1721,6 +1721,22 @@ char *get_last_path_component(char *path)
 }
 #endif
 
+#if defined BB_GREP || defined BB_SED
+int bb_regcomp(regex_t *preg, const char *regex, int cflags)
+{
+       int ret;
+       if ((ret = regcomp(preg, regex, cflags)) != 0) {
+               int errmsgsz = regerror(ret, preg, NULL, 0);
+               char *errmsg = xmalloc(errmsgsz);
+               regerror(ret, preg, errmsg, errmsgsz);
+               errorMsg("bb_regcomp: %s\n", errmsg);
+               free(errmsg);
+               regfree(preg);
+       }
+       return ret;
+}
+#endif
+
 /* END CODE */
 /*
 Local Variables: