Undo all of the ugliness and some of the bloat from 15412.
[oweals/busybox.git] / coreutils / cut.c
index 4907ed9354fdba9581f828112febab266b7d64bd..11e9d5e873bc4aa155699799a8ebda29986dce25 100644 (file)
@@ -1,11 +1,9 @@
-/* vi: set sw=4 ts=4: */
+/* vi: set sw=8 ts=8: */
 /*
- * cut implementation for busybox
+ * cut.c - minimalist version of cut
  *
- * Copyright (c) Michael J. Holme
- *
- * This version of cut is adapted from Minix cut and was modified 
- * by Erik Andersen <andersee@debian.org> to be used in busybox.
+ * Copyright (C) 1999,2000,2001 by Lineo, inc.
+ * Written by Mark Whitley <markw@codepoet.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- * 
- * Original copyright notice is retained at the end of this file.
+ *
  */
 
-#include "internal.h"
-#include <sys/types.h>
-#include <ctype.h>
-#include <string.h>
-#include <errno.h>
-#include <stdlib.h>
 #include <stdio.h>
-#define BB_DECLARE_EXTERN
-#define bb_need_help
-#include "messages.c"
-
-#define MAX_FIELD      80                      /* Pointers to the beginning of each field
-                                                                  * are stored in columns[], if a line holds
-                                                                  * more than MAX_FIELD columns the array
-                                                                  * boundary is exceed. But unlikely at 80 */
-
-#define MAX_ARGS       32                      /* Maximum number of fields following -f or
-                                                                  * -c switches                                                      */
-int args[MAX_ARGS * 2];
-int num_args;
-
-/* Lots of new defines, should easen maintainance...                   */
-#define DUMP_STDIN     0                       /* define for mode: no options   */
-#define OPTIONF                1                       /* define for mode: option -f    */
-#define OPTIONC                2                       /* define for mode: option -c    */
-#define OPTIONB                3                       /* define for mode: option -b    */
-#define NOTSET         0                       /* option not selected       */
-#define SET                    1                       /* option selected       */
-#define OPTIONS                1                       /*define option -s */
-/* Defines for the warnings                                            */
-#define DELIMITER_NOT_APPLICABLE       0
-#define OVERRIDING_PREVIOUS_MODE       1
-#define OPTION_NOT_APPLICABLE          2
-#define UNKNOWN_OPTION                 3
-#define FILE_NOT_READABLE              4
-/* Defines for the fatal errors                                                */
-#define SYNTAX_ERROR                           101
-#define POSITION_ERROR                         102
-#define LINE_TO_LONG_ERROR                     103
-#define RANGE_ERROR                                    104
-#define MAX_FIELDS_EXEEDED_ERROR       105
-#define MAX_ARGS_EXEEDED_ERROR         106
-
-
-int mode;                                              /* 0 = dump stdin to stdout, 1=-f, 2=-c   */
-char delim = '\t';                             /* default delimiting character   */
-FILE *fd;
-char line[BUFSIZ];
-int exit_status;
-int option = 0;                                     /* for -s option */
-
-int cut_main(int argc, char **argv);
-void warn(int warn_number, char *option);
-void cuterror(int err);
-void get_args(void);
-void cut(void);
-
-void warn(int warn_number, char *option)
-{
-       static char *warn_msg[] = {
-               "Option -%s allowed only with -f\n",
-               "-%s overrides earlier option\n",
-               "-%s not allowed in current mode\n",
-               "Cannot open %s\n"
-       };
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <limits.h>
+#include "busybox.h"
 
-       errorMsg(warn_msg[warn_number], option);
-       exit_status = warn_number + 1;
 
-}
+/* option vars */
+static const char optstring[] = "b:c:f:d:sn";
+#define OPT_BYTE_FLGS    1
+#define OPT_CHAR_FLGS    2
+#define OPT_FIELDS_FLGS  4
+#define OPT_DELIM_FLGS   8
+#define OPT_SUPRESS_FLGS 16
+static char part; /* (b)yte, (c)har, (f)ields */
+static unsigned int supress_non_delimited_lines;
+static char delim = '\t'; /* delimiter, default is tab */
+
+struct cut_list {
+       int startpos;
+       int endpos;
+};
+
+enum {
+       BOL = 0,
+       EOL = INT_MAX,
+       NON_RANGE = -1
+};
+
+static struct cut_list *cut_lists = NULL; /* growable array holding a series of lists */
+static unsigned int nlists = 0; /* number of elements in above list */
+
 
-void cuterror(int err)
+static int cmpfunc(const void *a, const void *b)
 {
-       static char *err_mes[] = {
-               "syntax error\n",
-               "position must be >0\n",
-               "line longer than BUFSIZ\n",
-               "range must not decrease from left to right\n",
-               "MAX_FIELD exceeded\n",
-               "MAX_ARGS exceeded\n"
-       };
-
-       errorMsg(err_mes[err - 101]);
-       exit(err);
+       struct cut_list *la = (struct cut_list *)a;
+       struct cut_list *lb = (struct cut_list *)b;
+
+       if (la->startpos > lb->startpos)
+               return 1;
+       if (la->startpos < lb->startpos)
+               return -1;
+       return 0;
 }
 
 
-void get_args()
+/*
+ * parse_lists() - parses a list and puts values into startpos and endpos.
+ * valid list formats: N, N-, N-M, -M
+ * more than one list can be separated by commas
+ */
+static void parse_lists(char *lists)
 {
-       int i = 0;
-       int arg_ptr = 0;
-       int flag;
-
-       num_args = 0;
-       do {
-               if (num_args == MAX_ARGS)
-                       cuterror(MAX_ARGS_EXEEDED_ERROR);
-               if (!isdigit(line[i]) && line[i] != '-')
-                       cuterror(SYNTAX_ERROR);
-
-               args[arg_ptr] = 1;
-               args[arg_ptr + 1] = BUFSIZ;
-               flag = 1;
-
-               while (line[i] != ',' && line[i] != 0) {
-                       if (isdigit(line[i])) {
-                               args[arg_ptr] = 0;
-                               while (isdigit(line[i]))
-                                       args[arg_ptr] = 10 * args[arg_ptr] + line[i++] - '0';
-                               if (!args[arg_ptr])
-                                       cuterror(POSITION_ERROR);
-                               arg_ptr++;
-                       }
-                       if (line[i] == '-') {
-                               arg_ptr |= 1;
-                               i++;
-                               flag = 0;
-                       }
+       char *ltok = NULL;
+       char *ntok = NULL;
+       char *junk;
+       int s = 0, e = 0;
+
+       /* take apart the lists, one by one (they are separated with commas */
+       while ((ltok = strsep(&lists, ",")) != NULL) {
+
+               /* it's actually legal to pass an empty list */
+               if (strlen(ltok) == 0)
+                       continue;
+
+               /* get the start pos */
+               ntok = strsep(&ltok, "-");
+               if (ntok == NULL) {
+                       fprintf(stderr, "Help ntok is null for starting position! What do I do?\n");
+               } else if (strlen(ntok) == 0) {
+                       s = BOL;
+               } else {
+                       s = strtoul(ntok, &junk, 10);
+                       if(*junk != '\0' || s < 0)
+                               bb_error_msg_and_die("invalid byte or field list");
+
+                       /* account for the fact that arrays are zero based, while the user
+                        * expects the first char on the line to be char # 1 */
+                       if (s != 0)
+                               s--;
+               }
+
+               /* get the end pos */
+               ntok = strsep(&ltok, "-");
+               if (ntok == NULL) {
+                       e = NON_RANGE;
+               } else if (strlen(ntok) == 0) {
+                       e = EOL;
+               } else {
+                       e = strtoul(ntok, &junk, 10);
+                       if(*junk != '\0' || e < 0)
+                               bb_error_msg_and_die("invalid byte or field list");
+                       /* if the user specified and end position of 0, that means "til the
+                        * end of the line */
+                       if (e == 0)
+                               e = INT_MAX;
+                       e--; /* again, arrays are zero based, lines are 1 based */
+                       if (e == s)
+                               e = NON_RANGE;
                }
-               if (flag && arg_ptr & 1)
-                       args[arg_ptr] = args[arg_ptr - 1];
-               if (args[num_args * 2] > args[num_args * 2 + 1])
-                       cuterror(RANGE_ERROR);
-               num_args++;
-               arg_ptr = num_args * 2;
+
+               /* if there's something left to tokenize, the user past an invalid list */
+               if (ltok)
+                       bb_error_msg_and_die("invalid byte or field list");
+
+               /* add the new list */
+               cut_lists = xrealloc(cut_lists, sizeof(struct cut_list) * (++nlists));
+               cut_lists[nlists-1].startpos = s;
+               cut_lists[nlists-1].endpos = e;
        }
-       while (line[i++]);
+
+       /* make sure we got some cut positions out of all that */
+       if (nlists == 0)
+               bb_error_msg_and_die("missing list of positions");
+
+       /* now that the lists are parsed, we need to sort them to make life easier
+        * on us when it comes time to print the chars / fields / lines */
+       qsort(cut_lists, nlists, sizeof(struct cut_list), cmpfunc);
+
 }
 
 
-void cut()
+static void cut_line_by_chars(const char *line)
 {
-       int i, j, length, maxcol=0;
-       char *columns[MAX_FIELD];
-
-       while (fgets(line, BUFSIZ, fd)) {
-               maxcol=0;
-               length = strlen(line) - 1;
-               *(line + length) = 0;
-               switch (mode) {
-               case DUMP_STDIN:
-                       printf("%s", line);
-                       break;
-               case OPTIONF:
-                       columns[maxcol++] = line;
-                       for (i = 0; i < length; i++) {
-                               if (*(line + i) == delim) {
-                                       *(line + i) = 0;
-                                       if (maxcol == MAX_FIELD)
-                                               cuterror(MAX_FIELDS_EXEEDED_ERROR);
-                                       columns[maxcol] = line + i + 1;
-                                       maxcol++;
-                               }
-                       }
-                       if (maxcol != 1) { 
-                               for (i = 0; i < num_args; i++) {
-                                       for (j = args[i * 2]; j <= args[i * 2 + 1]; j++)
-                                               if (j <= maxcol) {
-                                                        
-                                                       printf("%s", columns[j - 1]);
-                                                      
-                                                       if (i != num_args - 1 || j != args[i * 2 + 1])
-                                                               putchar(delim);
-                                               }
-                               }
-                       } else if (option != OPTIONS) {
-                         printf("%s",line);
-                       }
-                       break;
-               case OPTIONC:
-                       for (i = 0; i < num_args; i++) {
-                               for (j = args[i * 2];
-                                        j <= (args[i * 2 + 1] >
-                                                  length ? length : args[i * 2 + 1]); j++)
-                                       putchar(*(line + j - 1));
+       int c, l;
+       /* set up a list so we can keep track of what's been printed */
+       char *printed = xcalloc(strlen(line), sizeof(char));
+
+       /* print the chars specified in each cut list */
+       for (c = 0; c < nlists; c++) {
+               l = cut_lists[c].startpos;
+               while (l < strlen(line)) {
+                       if (!printed[l]) {
+                               putchar(line[l]);
+                               printed[l] = 'X';
                        }
+                       l++;
+                       if (cut_lists[c].endpos == NON_RANGE || l > cut_lists[c].endpos)
+                               break;
                }
-               if (maxcol != 1)
-                       putchar('\n');
        }
+       putchar('\n'); /* cuz we were handed a chomped line */
+       free(printed);
 }
 
-int cut_main(int argc, char **argv)
+
+static void cut_line_by_fields(char *line)
 {
-       int i = 1;
-       int numberFilenames = 0;
-
-       while (i < argc) {
-               if (argv[i][0] == '-') {
-                       switch (argv[i++][1]) {
-                       case 'd':
-                               if (mode == OPTIONC || mode == OPTIONB)
-                                       warn(DELIMITER_NOT_APPLICABLE, "d");
-                               if (argc > i)
-                                       delim = argv[i++][0];
-                               else
-                                       cuterror(SYNTAX_ERROR);
-                               break;
-                       case 'f':
-                               sprintf(line, "%s", argv[i++]);
-                               if (mode == OPTIONC || mode == OPTIONB)
-                                       warn(OVERRIDING_PREVIOUS_MODE, "f");
-                               mode = OPTIONF;
-                               break;
-                       case 'b':
-                               sprintf(line, "%s", argv[i++]);
-                               if (mode == OPTIONF || mode == OPTIONC)
-                                       warn(OVERRIDING_PREVIOUS_MODE, "b");
-                               mode = OPTIONB;
-                               break;
-                       case 'c':
-                               sprintf(line, "%s", argv[i++]);
-                               if (mode == OPTIONF || mode == OPTIONB)
-                                       warn(OVERRIDING_PREVIOUS_MODE, "c");
-                               mode = OPTIONC;
-                               break;
-                       case 's':
-                               option = OPTIONS;
-               
-                               break;
-                       case '\0':                      /* - means: read from stdin      */
-                               numberFilenames++;
-                               break;
-                       case 'n':                       /* needed for Posix, but no effect here  */
-                               if (mode != OPTIONB)
-                                       warn(OPTION_NOT_APPLICABLE, "n");
-                               break;
-                       default:
-                               warn(UNKNOWN_OPTION, &(argv[i - 1][1]));
-                       }
-               } else {
-                       i++;
-                       numberFilenames++;
-               }
+       int c, f;
+       int ndelim = -1; /* zero-based / one-based problem */
+       int nfields_printed = 0;
+       char *field = NULL;
+       char d[2] = { delim, 0 };
+       char *printed;
+
+       /* test the easy case first: does this line contain any delimiters? */
+       if (strchr(line, delim) == NULL) {
+               if (!supress_non_delimited_lines)
+                       puts(line);
+               return;
        }
 
-/* Here follow the checks, if the selected options are reasonable.     */
-       if (mode == OPTIONB)            /* since in Minix char := byte       */
-               mode = OPTIONC;
-       
-       if (mode != OPTIONF && option == OPTIONS)
-               warn(DELIMITER_NOT_APPLICABLE,"s");
-       get_args();
-       if (numberFilenames != 0) {
-               i = 1;
-               while (i < argc) {
-                       if (argv[i][0] == '-') {
-                               switch (argv[i][1]) {
-                               case 'f':
-                               case 'c':
-                               case 'b':
-                               case 'd':
-                                       i += 2;
-                                       break;
-                               case 'n':
-                               case 'i':
-                               case 's':
-                                       i++;
-                                       break;
-                               case '\0':
-                                       fd = stdin;
-                                       i++;
-                                       cut();
-                                       break;
-                               default:
-                                       i++;
-                               }
-                       } else {
-                               if ((fd = fopen(argv[i++], "r")) == NULL) {
-                                       warn(FILE_NOT_READABLE, argv[i - 1]);
-                               } else {
-                                       cut();
-                                       fclose(fd);
-                               }
+       /* set up a list so we can keep track of what's been printed */
+       printed = xcalloc(strlen(line), sizeof(char));
+
+       /* process each list on this line, for as long as we've got a line to process */
+       for (c = 0; c < nlists && line; c++) {
+               f = cut_lists[c].startpos;
+               do {
+
+                       /* find the field we're looking for */
+                       while (line && ndelim < f) {
+                               field = strsep(&line, d);
+                               ndelim++;
                        }
+
+                       /* we found it, and it hasn't been printed yet */
+                       if (field && ndelim == f && !printed[ndelim]) {
+                               /* if this isn't our first time through, we need to print the
+                                * delimiter after the last field that was printed */
+                               if (nfields_printed > 0)
+                                       putchar(delim);
+                               fputs(field, stdout);
+                               printed[ndelim] = 'X';
+                               nfields_printed++;
+                       }
+
+                       f++;
+
+                       /* keep going as long as we have a line to work with, this is a
+                        * list, and we're not at the end of that list */
+               } while (line && cut_lists[c].endpos != NON_RANGE && f <= cut_lists[c].endpos);
+       }
+
+       /* if we printed anything at all, we need to finish it with a newline cuz
+        * we were handed a chomped line */
+       putchar('\n');
+
+       free(printed);
+}
+
+
+static void cut_file_by_lines(const char *line, unsigned int linenum)
+{
+       static int c = 0;
+       static int l = -1;
+
+       /* I can't initialize this above cuz the "initializer isn't
+        * constant" *sigh* */
+       if (l == -1)
+               l = cut_lists[c].startpos;
+
+       /* get out if we have no more lists to process or if the lines are lower
+        * than what we're interested in */
+       if (c >= nlists || linenum < l)
+               return;
+
+       /* if the line we're looking for is lower than the one we were passed, it
+        * means we displayed it already, so move on */
+       while (l < linenum) {
+               l++;
+               /* move on to the next list if we're at the end of this one */
+               if (cut_lists[c].endpos == NON_RANGE || l > cut_lists[c].endpos) {
+                       c++;
+                       /* get out if there's no more lists to process */
+                       if (c >= nlists)
+                               return;
+                       l = cut_lists[c].startpos;
+                       /* get out if the current line is lower than the one we just became
+                        * interested in */
+                       if (linenum < l)
+                               return;
                }
-       } else {
-               fd = stdin;
-               cut();
        }
 
-       return(exit_status);
+       /* If we made it here, it means we've found the line we're looking for, so print it */
+       puts(line);
 }
 
-/* cut - extract columns from a file or stdin.         Author: Michael J. Holme
- *
- *     Copyright 1989, Michael John Holme, All rights reserved.
- *     This code may be freely distributed, provided that this notice
- *     remains intact.
- *
- *     V1.1: 6th September 1989
- *
- *     Bugs, criticisms, etc,
- *      c/o Mark Powell
- *          JANET sq79@uk.ac.liv
- *          ARPA  sq79%liv.ac.uk@nsfnet-relay.ac.uk
- *          UUCP  ...!mcvax!ukc!liv.ac.uk!sq79
- *-------------------------------------------------------------------------
- *     Changed for POSIX1003.2/Draft10 conformance
- *     Thomas Brupbacher (tobr@mw.lpc.ethz.ch), September 1990.
- *     Changes:
- *         - separation of error messages ( stderr) and output (stdout).
- *         - support for -b and -n (no effect, -b acts as -c)
- *         - support for -s
- *-------------------------------------------------------------------------
- */
 
 /*
- * Copyright (c) 1987,1997, Prentice Hall
- * All rights reserved.
- * 
- * Redistribution and use of the MINIX operating system in source and
- * binary forms, with or without modification, are permitted provided
- * that the following conditions are met:
- * 
- * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 
- * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- * 
- * Neither the name of Prentice Hall nor the names of the software
- * authors or contributors may be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS, AUTHORS, AND
- * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL PRENTICE HALL OR ANY AUTHORS OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * snippy-snip
  */
+static void cut_file(FILE *file)
+{
+       char *line = NULL;
+       unsigned int linenum = 0; /* keep these zero-based to be consistent */
 
+       /* go through every line in the file */
+       while ((line = bb_get_chomped_line_from_file(file)) != NULL) {
 
+               /* cut based on chars/bytes XXX: only works when sizeof(char) == byte */
+               if ((part & (OPT_CHAR_FLGS | OPT_BYTE_FLGS)))
+                       cut_line_by_chars(line);
+
+               /* cut based on fields */
+               else {
+                       if (delim == '\n')
+                               cut_file_by_lines(line, linenum);
+                       else
+                               cut_line_by_fields(line);
+               }
+
+               linenum++;
+               free(line);
+       }
+}
+
+
+int cut_main(int argc, char **argv)
+{
+       unsigned long opt;
+       char *sopt, *sdopt;
+
+       bb_opt_complementally = "b--bcf:c--bcf:f--bcf";
+       opt = bb_getopt_ulflags(argc, argv, optstring, &sopt, &sopt, &sopt, &sdopt);
+       part = opt & (OPT_BYTE_FLGS|OPT_CHAR_FLGS|OPT_FIELDS_FLGS);
+       if(part == 0)
+               bb_error_msg_and_die("you must specify a list of bytes, characters, or fields");
+       if(opt & BB_GETOPT_ERROR)
+               bb_error_msg_and_die("only one type of list may be specified");
+       parse_lists(sopt);
+       if((opt & (OPT_DELIM_FLGS))) {
+               if (strlen(sdopt) > 1) {
+                       bb_error_msg_and_die("the delimiter must be a single character");
+               }
+               delim = sdopt[0];
+       }
+       supress_non_delimited_lines = opt & OPT_SUPRESS_FLGS;
+
+       /*  non-field (char or byte) cutting has some special handling */
+       if (part != OPT_FIELDS_FLGS) {
+               if (supress_non_delimited_lines) {
+                       bb_error_msg_and_die("suppressing non-delimited lines makes sense"
+                                       " only when operating on fields");
+               }
+               if (delim != '\t') {
+                       bb_error_msg_and_die("a delimiter may be specified only when operating on fields");
+               }
+       }
+
+       /* argv[(optind)..(argc-1)] should be names of file to process. If no
+        * files were specified or '-' was specified, take input from stdin.
+        * Otherwise, we process all the files specified. */
+       if (argv[optind] == NULL || (strcmp(argv[optind], "-") == 0)) {
+               cut_file(stdin);
+       }
+       else {
+               int i;
+               FILE *file;
+               for (i = optind; i < argc; i++) {
+                       file = bb_wfopen(argv[i], "r");
+                       if(file) {
+                               cut_file(file);
+                               fclose(file);
+                       }
+               }
+       }
+
+       return EXIT_SUCCESS;
+}