/* vi: set sw=4 ts=4: */
/*
- * Mini uniq implementation for busybox
+ * uniq implementation for busybox
*
+ * Copyright (C) 2005 Manuel Novoa III <mjn3@codepoet.org>
*
- * Copyright (C) 1999,2000 by Lineo, inc.
- * Written by John Beppu <beppu@lineo.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
+ * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
*/
-#include "internal.h"
-#include <stdio.h>
-#include <string.h>
-#include <errno.h>
-
-static const char uniq_usage[] =
- "uniq [OPTION]... [INPUT [OUTPUT]]\n"
-#ifndef BB_FEATURE_TRIVIAL_HELP
- "\nDiscard all but one of successive identical lines from INPUT\n"
- "(or standard input), writing to OUTPUT (or standard output).\n"
-#endif
- ;
-
-/* max chars in line */
-#define UNIQ_MAX 4096
-
-typedef void (Print) (FILE *, const char *);
-
-typedef int (Decide) (const char *, const char *);
-
-/* container for two lines to be compared */
-typedef struct {
- char *a;
- char *b;
- int recurrence;
- FILE *in;
- FILE *out;
- void *func;
-} Subject;
-
-/* set up all the variables of a uniq operation */
-static Subject *subject_init(Subject * self, FILE * in, FILE * out,
- void *func)
-{
- self->a = NULL;
- self->b = NULL;
- self->in = in;
- self->out = out;
- self->func = func;
- self->recurrence = 0;
- return self;
-}
-
-/* point a and b to the appropriate lines;
- * count the recurrences (if any) of a string;
- */
-static Subject *subject_next(Subject * self)
-{
- /* tmp line holders */
- static char line[2][UNIQ_MAX];
- static int alternator = 0;
-
- if (fgets(line[alternator], UNIQ_MAX, self->in)) {
- self->a = self->b;
- self->b = line[alternator];
- alternator ^= 1;
- return self;
- }
+/* BB_AUDIT SUSv3 compliant */
+/* http://www.opengroup.org/onlinepubs/007904975/utilities/uniq.html */
- return NULL;
-}
+#include "libbb.h"
-static Subject *subject_last(Subject * self)
+static FILE *xgetoptfile_uniq_s(char **argv, int read0write2)
{
- self->a = self->b;
- self->b = NULL;
- return self;
-}
+ const char *n;
-static Subject *subject_study(Subject * self)
-{
- if (self->a == NULL) {
- return self;
- }
- if (self->b == NULL) {
- fprintf(self->out, "%s", self->a);
- return self;
- }
- if (strcmp(self->a, self->b) == 0) {
- self->recurrence++;
- } else {
- fprintf(self->out, "%s", self->a);
- self->recurrence = 0;
+ n = *argv;
+ if (n != NULL) {
+ if ((*n != '-') || n[1]) {
+ return xfopen(n, "r\0w" + read0write2);
+ }
}
- return self;
+ return (read0write2) ? stdout : stdin;
}
-static int
-set_file_pointers(int schema, FILE ** in, FILE ** out, char **argv)
+int uniq_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
+int uniq_main(int argc UNUSED_PARAM, char **argv)
{
- switch (schema) {
- case 0:
- *in = stdin;
- *out = stdout;
- break;
- case 1:
- *in = fopen(argv[0], "r");
- *out = stdout;
- break;
- case 2:
- *in = fopen(argv[0], "r");
- *out = fopen(argv[1], "w");
- break;
- }
- if (*in == NULL) {
- fprintf(stderr, "uniq: %s: %s\n", argv[0], strerror(errno));
- return errno;
+ FILE *in, *out;
+ const char *s0, *e0, *s1, *e1, *input_filename;
+ unsigned long dups;
+ unsigned skip_fields, skip_chars, max_chars;
+ unsigned opt;
+ unsigned i;
+
+ enum {
+ OPT_c = 0x1,
+ OPT_d = 0x2,
+ OPT_u = 0x4,
+ OPT_f = 0x8,
+ OPT_s = 0x10,
+ OPT_w = 0x20,
+ };
+
+ skip_fields = skip_chars = 0;
+ max_chars = INT_MAX;
+
+ opt_complementary = "f+:s+:w+";
+ opt = getopt32(argv, "cduf:s:w:", &skip_fields, &skip_chars, &max_chars);
+ argv += optind;
+
+ input_filename = *argv;
+
+ in = xgetoptfile_uniq_s(argv, 0);
+ if (*argv) {
+ ++argv;
}
- if (*out == NULL) {
- fprintf(stderr, "uniq: %s: %s\n", argv[1], strerror(errno));
- return errno;
+ out = xgetoptfile_uniq_s(argv, 2);
+ if (*argv && argv[1]) {
+ bb_show_usage();
}
- return 0;
-}
-
-/* one variable is the decision algo */
-/* another variable is the printing algo */
+ s1 = e1 = NULL; /* prime the pump */
-/* I don't think I have to have more than a 1 line memory
- this is the one constant */
+ do {
+ s0 = s1;
+ e0 = e1;
+ dups = 0;
-/* it seems like GNU/uniq only takes one or two files as an option */
+ /* gnu uniq ignores newlines */
+ while ((s1 = xmalloc_fgetline(in)) != NULL) {
+ e1 = s1;
+ for (i = skip_fields; i; i--) {
+ e1 = skip_whitespace(e1);
+ e1 = skip_non_whitespace(e1);
+ }
+ for (i = skip_chars; *e1 && i; i--) {
+ ++e1;
+ }
-/* ________________________________________________________________________ */
-int uniq_main(int argc, char **argv)
-{
- int i;
- char opt;
- FILE *in, *out;
- Subject s;
-
- /* parse argv[] */
- for (i = 1; i < argc; i++) {
- if (argv[i][0] == '-') {
- opt = argv[i][1];
- switch (opt) {
- case '-':
- case 'h':
- usage(uniq_usage);
- default:
- usage(uniq_usage);
+ if (!s0 || strncmp(e0, e1, max_chars)) {
+ break;
}
- } else {
- break;
+
+ ++dups; /* note: testing for overflow seems excessive. */
}
- }
- /* 0 src: stdin; dst: stdout */
- /* 1 src: file; dst: stdout */
- /* 2 src: file; dst: file */
- if (set_file_pointers((argc - 1), &in, &out, &argv[i])) {
- exit(1);
- }
+ if (s0) {
+ if (!(opt & (OPT_d << !!dups))) { /* (if dups, opt & OPT_e) */
+ fprintf(out, "\0%ld " + (opt & 1), dups + 1); /* 1 == OPT_c */
+ fprintf(out, "%s\n", s0);
+ }
+ free((void *)s0);
+ }
+ } while (s1);
- subject_init(&s, in, out, NULL);
- while (subject_next(&s)) {
- subject_study(&s);
- }
- subject_last(&s);
- subject_study(&s);
+ die_if_ferror(in, input_filename);
- exit(0);
+ fflush_stdout_and_exit(EXIT_SUCCESS);
}
-
-/* $Id: uniq.c,v 1.10 2000/05/12 19:41:47 erik Exp $ */