X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=coreutils%2Funiq.c;h=6caab5dae3ccf8f9cdbe785f41fc5fbbedcbd215;hb=4905434b8aead249d6bdd134d2fbd8c06dfcc059;hp=965d290c2ff9003e4d72707601ffb15ae36e0a4e;hpb=fac10d7c59f7db0facd5fb94de273310b9ec86e6;p=oweals%2Fbusybox.git diff --git a/coreutils/uniq.c b/coreutils/uniq.c index 965d290c2..6caab5dae 100644 --- a/coreutils/uniq.c +++ b/coreutils/uniq.c @@ -1,9 +1,8 @@ +/* vi: set sw=4 ts=4: */ /* - * Mini uniq implementation for busybox + * uniq implementation for busybox * - * - * Copyright (C) 1999 by Lineo, inc. - * Written by John Beppu + * Copyright (C) 2003 Manuel Novoa III * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,176 +20,93 @@ * */ -#include "internal.h" +/* BB_AUDIT SUSv3 compliant */ +/* http://www.opengroup.org/onlinepubs/007904975/utilities/uniq.html */ + #include +#include #include -#include - -static const char uniq_usage[] = -"uniq [OPTION]... [INPUT [OUTPUT]]\n" -"Discard all but one of successive identical lines from INPUT (or\n" -"standard input), writing to OUTPUT (or standard output).\n" -"\n" -"\t-h\tdisplay this help and exit\n" -"\n" -"A field is a run of whitespace, then non-whitespace characters.\n" -"Fields are skipped before chars.\n" -; - -/* max chars in line */ -#define UNIQ_MAX 4096 - -typedef void (Print)(FILE *, const char *); - -typedef int (Decide)(const char *, const char *); - -/* container for two lines to be compared */ -typedef struct { - char *a; - char *b; - int recurrence; - FILE *in; - FILE *out; - void *func; -} Subject; - -/* set up all the variables of a uniq operation */ -static Subject * -subject_init(Subject *self, FILE *in, FILE *out, void *func) -{ - self->a = NULL; - self->b = NULL; - self->in = in; - self->out = out; - self->func = func; - self->recurrence = 0; - return self; -} - -/* point a and b to the appropriate lines; - * count the recurrences (if any) of a string; - */ -static Subject * -subject_next(Subject *self) -{ - /* tmp line holders */ - static char line[2][UNIQ_MAX]; - static int alternator = 0; - - if (fgets(line[alternator], UNIQ_MAX, self->in)) { - self->a = self->b; - self->b = line[alternator]; - alternator ^= 1; - return self; - } +#include +#include +#include "busybox.h" +#include "libcoreutils/coreutils.h" - return NULL; -} +static const char uniq_opts[] = "f:s:cdu\0\7\3\5\1\2\4"; -static Subject * -subject_last(Subject *self) +int uniq_main(int argc, char **argv) { - self->a = self->b; - self->b = NULL; - return self; -} - -static Subject * -subject_study(Subject *self) -{ - if (self->a == NULL) { - return self; - } - if (self->b == NULL) { - fprintf(self->out, "%s", self->a); - return self; - } - if (strcmp(self->a, self->b) == 0) { - self->recurrence++; - } else { - fprintf(self->out, "%s", self->a); - self->recurrence = 0; - } - return self; -} - -static int -set_file_pointers(int schema, FILE **in, FILE **out, char **argv) -{ - switch (schema) { - case 0: - *in = stdin; - *out = stdout; - break; - case 1: - *in = fopen(argv[0], "r"); - *out = stdout; - break; - case 2: - *in = fopen(argv[0], "r"); - *out = fopen(argv[1], "w"); - break; - } - if (*in == NULL) { - fprintf(stderr, "uniq: %s: %s\n", argv[0], strerror(errno)); - return errno; - } - if (*out == NULL) { - fprintf(stderr, "uniq: %s: %s\n", argv[1], strerror(errno)); - return errno; - } - return 0; -} - - -/* one variable is the decision algo */ -/* another variable is the printing algo */ - -/* I don't think I have to have more than a 1 line memory - this is the one constant */ + FILE *in, *out; + /* Note: Ignore the warning about dups and e0 being used uninitialized. + * They will be initialized on the fist pass of the loop (since s0 is NULL). */ + unsigned long dups, skip_fields, skip_chars, i; + const char *s0, *e0, *s1, *e1, *input_filename; + int opt; + int uniq_flags = 6; /* -u */ + + skip_fields = skip_chars = 0; + + while ((opt = getopt(argc, argv, uniq_opts)) > 0) { + if (opt == 'f') { + skip_fields = bb_xgetularg10(optarg); + } else if (opt == 's') { + skip_chars = bb_xgetularg10(optarg); + } else if ((s0 = strchr(uniq_opts, opt)) != NULL) { + uniq_flags &= s0[4]; + uniq_flags |= s0[7]; + } else { + bb_show_usage(); + } + } -/* it seems like GNU/uniq only takes one or two files as an option */ + input_filename = *(argv += optind); -/* ________________________________________________________________________ */ -int -uniq_main(int argc, char **argv) -{ - int i; - char opt; - FILE *in, *out; - Subject s; + in = xgetoptfile_sort_uniq(argv, "r"); + if (*argv) { + ++argv; + } + out = xgetoptfile_sort_uniq(argv, "w"); + if (*argv && argv[1]) { + bb_show_usage(); + } - /* parse argv[] */ - for (i = 1; i < argc; i++) { - if (argv[i][0] == '-') { - opt = argv[i][1]; - switch (opt) { - case '-': - case 'h': - usage(uniq_usage); - default: - usage(uniq_usage); - } - } else { - break; + s0 = NULL; + + /* gnu uniq ignores newlines */ + while ((s1 = bb_get_chomped_line_from_file(in)) != NULL) { + e1 = s1; + for (i=skip_fields ; i ; i--) { + e1 = bb_skip_whitespace(e1); + while (*e1 && !isspace(*e1)) { + ++e1; + } + } + for (i = skip_chars ; *e1 && i ; i--) { + ++e1; + } + if (s0) { + if (strcmp(e0, e1) == 0) { + ++dups; /* Note: Testing for overflow seems excessive. */ + continue; + } + DO_LAST: + if ((dups && (uniq_flags & 2)) || (!dups && (uniq_flags & 4))) { + bb_fprintf(out, "\0%7d\t" + (uniq_flags & 1), dups + 1); + bb_fprintf(out, "%s\n", s0); + } + free((void *)s0); + } + + s0 = s1; + e0 = e1; + dups = 0; } - } - /* 0 src: stdin; dst: stdout */ - /* 1 src: file; dst: stdout */ - /* 2 src: file; dst: file */ - if (set_file_pointers((argc - 1), &in, &out, &argv[i])) { - exit(1); - } + if (s0) { + e1 = NULL; + goto DO_LAST; + } - subject_init(&s, in, out, NULL); - while (subject_next(&s)) { - subject_study(&s); - } - subject_last(&s); - subject_study(&s); + bb_xferror(in, input_filename); - exit(0); + bb_fflush_stdout_and_exit(EXIT_SUCCESS); } - -/* $Id: uniq.c,v 1.6 2000/02/07 05:29:42 erik Exp $ */