X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;ds=sidebyside;f=coreutils%2Fsort.c;h=a1625fc9cdb30a6d332ba023ef901ff6abac172d;hb=1b49c25e0a719ec3051eafa2329e68012c815abb;hp=127d683198039470a1623aeff360a8924d8b5476;hpb=ee512a3f8620ab535716b8aa016b33610010920c;p=oweals%2Fbusybox.git diff --git a/coreutils/sort.c b/coreutils/sort.c index 127d68319..a1625fc9c 100644 --- a/coreutils/sort.c +++ b/coreutils/sort.c @@ -1,309 +1,469 @@ +/* vi: set sw=4 ts=4: */ /* - * Mini sort implementation for busybox + * SuS3 compliant sort implementation for busybox * + * Copyright (C) 2004 by Rob Landley * - * Copyright (C) 1999 by Lineo, inc. - * Written by John Beppu + * MAINTAINER: Rob Landley * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Licensed under GPLv2 or later, see file LICENSE in this source tree. * + * See SuS3 sort standard at: + * http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html */ -#include "internal.h" -#include -#include -#include -#include -#include - -static const char sort_usage[] = -"Usage: sort [OPTION]... [FILE]...\n\n" -; - -/* typedefs _______________________________________________________________ */ - -/* line node */ -typedef struct Line { - char *data; /* line data */ - struct Line *next; /* pointer to next line node */ -} Line; - -/* singly-linked list of lines */ -typedef struct { - int len; /* number of Lines */ - Line **sorted; /* array fed to qsort */ - - Line *head; /* head of List */ - Line *current; /* current Line */ -} List; - -/* comparison function */ -typedef int (Compare)(const void *, const void *); - - -/* methods ________________________________________________________________ */ - -static const int max = 1024; - -/* mallocate Line */ -static Line * -line_alloc() -{ - Line *self; - self = malloc(1 * sizeof(Line)); - return self; -} - -/* Initialize Line with string */ -static Line * -line_init(Line *self, const char *string) -{ - self->data = malloc((strlen(string) + 1) * sizeof(char)); - if (self->data == NULL) { return NULL; } - strcpy(self->data, string); - self->next = NULL; - return self; -} - -/* Construct Line from FILE* */ -static Line * -line_newFromFile(FILE *src) -{ - char buffer[max]; - Line *self; +//usage:#define sort_trivial_usage +//usage: "[-nru" +//usage: IF_FEATURE_SORT_BIG("gMcszbdfimSTokt] [-o FILE] [-k start[.offset][opts][,end[.offset][opts]] [-t CHAR") +//usage: "] [FILE]..." +//usage:#define sort_full_usage "\n\n" +//usage: "Sort lines of text\n" +//usage: IF_FEATURE_SORT_BIG( +//usage: "\n -b Ignore leading blanks" +//usage: "\n -c Check whether input is sorted" +//usage: "\n -d Dictionary order (blank or alphanumeric only)" +//usage: "\n -f Ignore case" +//usage: "\n -g General numerical sort" +//usage: "\n -i Ignore unprintable characters" +//usage: "\n -k Sort key" +//usage: "\n -M Sort month" +//usage: ) +//usage: "\n -n Sort numbers" +//usage: IF_FEATURE_SORT_BIG( +//usage: "\n -o Output to file" +//usage: "\n -k Sort by key" +//usage: "\n -t CHAR Key separator" +//usage: ) +//usage: "\n -r Reverse sort order" +//usage: IF_FEATURE_SORT_BIG( +//usage: "\n -s Stable (don't sort ties alphabetically)" +//usage: ) +//usage: "\n -u Suppress duplicate lines" +//usage: IF_FEATURE_SORT_BIG( +//usage: "\n -z Lines are terminated by NUL, not newline" +//usage: "\n -mST Ignored for GNU compatibility") +//usage: +//usage:#define sort_example_usage +//usage: "$ echo -e \"e\\nf\\nb\\nd\\nc\\na\" | sort\n" +//usage: "a\n" +//usage: "b\n" +//usage: "c\n" +//usage: "d\n" +//usage: "e\n" +//usage: "f\n" +//usage: IF_FEATURE_SORT_BIG( +//usage: "$ echo -e \"c 3\\nb 2\\nd 2\" | $SORT -k 2,2n -k 1,1r\n" +//usage: "d 2\n" +//usage: "b 2\n" +//usage: "c 3\n" +//usage: ) +//usage: "" + +#include "libbb.h" + +/* This is a NOEXEC applet. Be very careful! */ - if (fgets(buffer, max, src)) { - self = line_alloc(); - if (self == NULL) { return NULL; } - line_init(self, buffer); - return self; - } - return NULL; -} -/* Line destructor */ -static Line * -line_release(Line *self) -{ - if (self->data) { - free(self->data); - free(self); - } - return self; -} - - -/* Comparison */ - -/* ascii order */ -static int -compare_ascii(const void *a, const void *b) -{ - Line **doh; - Line *x, *y; - - doh = (Line **) a; - x = *doh; - doh = (Line **) b; - y = *doh; - - // fprintf(stdout, "> %p: %s< %p: %s", x, x->data, y, y->data); - return strcmp(x->data, y->data); -} - -/* numeric order */ -static int -compare_numeric(const void *a, const void *b) +/* + sort [-m][-o output][-bdfinru][-t char][-k keydef]... [file...] + sort -c [-bdfinru][-t char][-k keydef][file] +*/ + +/* These are sort types */ +static const char OPT_STR[] ALIGN1 = "ngMucszbrdfimS:T:o:k:t:"; +enum { + FLAG_n = 1, /* Numeric sort */ + FLAG_g = 2, /* Sort using strtod() */ + FLAG_M = 4, /* Sort date */ +/* ucsz apply to root level only, not keys. b at root level implies bb */ + FLAG_u = 8, /* Unique */ + FLAG_c = 0x10, /* Check: no output, exit(!ordered) */ + FLAG_s = 0x20, /* Stable sort, no ascii fallback at end */ + FLAG_z = 0x40, /* Input and output is NUL terminated, not \n */ +/* These can be applied to search keys, the previous four can't */ + FLAG_b = 0x80, /* Ignore leading blanks */ + FLAG_r = 0x100, /* Reverse */ + FLAG_d = 0x200, /* Ignore !(isalnum()|isspace()) */ + FLAG_f = 0x400, /* Force uppercase */ + FLAG_i = 0x800, /* Ignore !isprint() */ + FLAG_m = 0x1000, /* ignored: merge already sorted files; do not sort */ + FLAG_S = 0x2000, /* ignored: -S, --buffer-size=SIZE */ + FLAG_T = 0x4000, /* ignored: -T, --temporary-directory=DIR */ + FLAG_o = 0x8000, + FLAG_k = 0x10000, + FLAG_t = 0x20000, + FLAG_bb = 0x80000000, /* Ignore trailing blanks */ +}; + +#if ENABLE_FEATURE_SORT_BIG +static char key_separator; + +static struct sort_key { + struct sort_key *next_key; /* linked list */ + unsigned range[4]; /* start word, start char, end word, end char */ + unsigned flags; +} *key_list; + +static char *get_key(char *str, struct sort_key *key, int flags) { - Line **doh; - Line *x, *y; - int xint, yint; - - doh = (Line **) a; - x = *doh; - doh = (Line **) b; - y = *doh; - - xint = strtoul(x->data, NULL, 10); - yint = strtoul(y->data, NULL, 10); - - return (xint - yint); -} - + int start = 0, end = 0, len, j; + unsigned i; + + /* Special case whole string, so we don't have to make a copy */ + if (key->range[0] == 1 && !key->range[1] && !key->range[2] && !key->range[3] + && !(flags & (FLAG_b | FLAG_d | FLAG_f | FLAG_i | FLAG_bb)) + ) { + return str; + } -/* List */ + /* Find start of key on first pass, end on second pass */ + len = strlen(str); + for (j = 0; j < 2; j++) { + if (!key->range[2*j]) + end = len; + /* Loop through fields */ + else { + end = 0; + for (i = 1; i < key->range[2*j] + j; i++) { + if (key_separator) { + /* Skip body of key and separator */ + while (str[end]) { + if (str[end++] == key_separator) + break; + } + } else { + /* Skip leading blanks */ + while (isspace(str[end])) + end++; + /* Skip body of key */ + while (str[end]) { + if (isspace(str[end])) + break; + end++; + } + } + } + } + if (!j) start = end; + } + /* Strip leading whitespace if necessary */ +//XXX: skip_whitespace() + if (flags & FLAG_b) + while (isspace(str[start])) start++; + /* Strip trailing whitespace if necessary */ + if (flags & FLAG_bb) + while (end > start && isspace(str[end-1])) end--; + /* Handle offsets on start and end */ + if (key->range[3]) { + end += key->range[3] - 1; + if (end > len) end = len; + } + if (key->range[1]) { + start += key->range[1] - 1; + if (start > len) start = len; + } + /* Make the copy */ + if (end < start) end = start; + str = xstrndup(str+start, end-start); + /* Handle -d */ + if (flags & FLAG_d) { + for (start = end = 0; str[end]; end++) + if (isspace(str[end]) || isalnum(str[end])) + str[start++] = str[end]; + str[start] = '\0'; + } + /* Handle -i */ + if (flags & FLAG_i) { + for (start = end = 0; str[end]; end++) + if (isprint_asciionly(str[end])) + str[start++] = str[end]; + str[start] = '\0'; + } + /* Handle -f */ + if (flags & FLAG_f) + for (i = 0; str[i]; i++) + str[i] = toupper(str[i]); -/* */ -static List * -list_init(List *self) -{ - self->len = 0; - self->sorted = NULL; - self->head = NULL; - self->current = NULL; - return self; + return str; } -/* for simplicity, the List gains ownership of the line */ -static List * -list_insert(List *self, Line *line) +static struct sort_key *add_key(void) { - if (line == NULL) { return NULL; } - - /* first insertion */ - if (self->head == NULL) { - self->head = line; - self->current = line; - - /* all subsequent insertions */ - } else { - self->current->next = line; - self->current = line; - } - self->len++; - return self; + struct sort_key **pkey = &key_list; + while (*pkey) + pkey = &((*pkey)->next_key); + return *pkey = xzalloc(sizeof(struct sort_key)); } -/* order the list according to compare() */ -static List * -list_sort(List *self, Compare *compare) -{ - int i; - Line *line; - - /* mallocate array of Line*s */ - self->sorted = (Line **) malloc(self->len * sizeof(Line*)); - if (self->sorted == NULL) { return NULL; } +#define GET_LINE(fp) \ + ((option_mask32 & FLAG_z) \ + ? bb_get_chunk_from_file(fp, NULL) \ + : xmalloc_fgetline(fp)) +#else +#define GET_LINE(fp) xmalloc_fgetline(fp) +#endif - /* fill array w/ List's contents */ - i = 0; - line = self->head; - while (line) { - self->sorted[i++] = line; - line = line->next; - } - - /* apply qsort */ - qsort(self->sorted, self->len, sizeof(Line*), compare); - return self; -} - -/* precondition: list must be sorted */ -static List * -list_writeToFile(List *self, FILE* dst) +/* Iterate through keys list and perform comparisons */ +static int compare_keys(const void *xarg, const void *yarg) { - int i; - Line **line = self->sorted; - - if (self->sorted == NULL) { return NULL; } - for (i = 0; i < self->len; i++) { - fprintf(dst, "%s", line[i]->data); - } - return self; + int flags = option_mask32, retval = 0; + char *x, *y; + +#if ENABLE_FEATURE_SORT_BIG + struct sort_key *key; + + for (key = key_list; !retval && key; key = key->next_key) { + flags = key->flags ? key->flags : option_mask32; + /* Chop out and modify key chunks, handling -dfib */ + x = get_key(*(char **)xarg, key, flags); + y = get_key(*(char **)yarg, key, flags); +#else + /* This curly bracket serves no purpose but to match the nesting + * level of the for () loop we're not using */ + { + x = *(char **)xarg; + y = *(char **)yarg; +#endif + /* Perform actual comparison */ + switch (flags & 7) { + default: + bb_error_msg_and_die("unknown sort type"); + break; + /* Ascii sort */ + case 0: +#if ENABLE_LOCALE_SUPPORT + retval = strcoll(x, y); +#else + retval = strcmp(x, y); +#endif + break; +#if ENABLE_FEATURE_SORT_BIG + case FLAG_g: { + char *xx, *yy; + double dx = strtod(x, &xx); + double dy = strtod(y, &yy); + /* not numbers < NaN < -infinity < numbers < +infinity) */ + if (x == xx) + retval = (y == yy ? 0 : -1); + else if (y == yy) + retval = 1; + /* Check for isnan */ + else if (dx != dx) + retval = (dy != dy) ? 0 : -1; + else if (dy != dy) + retval = 1; + /* Check for infinity. Could underflow, but it avoids libm. */ + else if (1.0 / dx == 0.0) { + if (dx < 0) + retval = (1.0 / dy == 0.0 && dy < 0) ? 0 : -1; + else + retval = (1.0 / dy == 0.0 && dy > 0) ? 0 : 1; + } else if (1.0 / dy == 0.0) + retval = (dy < 0) ? 1 : -1; + else + retval = (dx > dy) ? 1 : ((dx < dy) ? -1 : 0); + break; + } + case FLAG_M: { + struct tm thyme; + int dx; + char *xx, *yy; + + xx = strptime(x, "%b", &thyme); + dx = thyme.tm_mon; + yy = strptime(y, "%b", &thyme); + if (!xx) + retval = (!yy) ? 0 : -1; + else if (!yy) + retval = 1; + else + retval = (dx == thyme.tm_mon) ? 0 : dx - thyme.tm_mon; + break; + } + /* Full floating point version of -n */ + case FLAG_n: { + double dx = atof(x); + double dy = atof(y); + retval = (dx > dy) ? 1 : ((dx < dy) ? -1 : 0); + break; + } + } /* switch */ + /* Free key copies. */ + if (x != *(char **)xarg) free(x); + if (y != *(char **)yarg) free(y); + /* if (retval) break; - done by for () anyway */ +#else + /* Integer version of -n for tiny systems */ + case FLAG_n: + retval = atoi(x) - atoi(y); + break; + } /* switch */ +#endif + } /* for */ + + /* Perform fallback sort if necessary */ + if (!retval && !(option_mask32 & FLAG_s)) + retval = strcmp(*(char **)xarg, *(char **)yarg); + + if (flags & FLAG_r) return -retval; + return retval; } -/* deallocate */ -static void -list_release(List *self) +#if ENABLE_FEATURE_SORT_BIG +static unsigned str2u(char **str) { - Line *i; - Line *die; - - i = self->head; - while (i) { - die = i; - i = die->next; - line_release(die); - } + unsigned long lu; + if (!isdigit((*str)[0])) + bb_error_msg_and_die("bad field specification"); + lu = strtoul(*str, str, 10); + if ((sizeof(long) > sizeof(int) && lu > INT_MAX) || !lu) + bb_error_msg_and_die("bad field specification"); + return lu; } +#endif - -/* - * I need a list - * to insert lines into - * then I need to sort this list - * and finally print it - */ - -int -sort_main(int argc, char **argv) +int sort_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int sort_main(int argc UNUSED_PARAM, char **argv) { - int i; - char opt; - List list; - Line *l; - Compare *compare; - - /* init */ - compare = compare_ascii; - list_init(&list); - - /* parse argv[] */ - for (i = 1; i < argc; i++) { - if (argv[i][0] == '-') { - opt = argv[i][1]; - switch (opt) { - case 'g': - compare = compare_numeric; - break; - case 'h': - usage(sort_usage); - break; - default: - fprintf(stderr, "sort: invalid option -- %c\n", opt); - usage(sort_usage); - } - } else { - break; + char *line, **lines; + char *str_ignored, *str_o, *str_t; + llist_t *lst_k = NULL; + int i, flag; + int linecount; + unsigned opts; + + xfunc_error_retval = 2; + + /* Parse command line options */ + /* -o and -t can be given at most once */ + opt_complementary = "o--o:t--t:" /* -t, -o: at most one of each */ + "k::"; /* -k takes list */ + opts = getopt32(argv, OPT_STR, &str_ignored, &str_ignored, &str_o, &lst_k, &str_t); + /* global b strips leading and trailing spaces */ + if (opts & FLAG_b) + option_mask32 |= FLAG_bb; +#if ENABLE_FEATURE_SORT_BIG + if (opts & FLAG_t) { + if (!str_t[0] || str_t[1]) + bb_error_msg_and_die("bad -t parameter"); + key_separator = str_t[0]; } - } - - /* go through remaining args (if any) */ - if (i >= argc) { - while ( (l = line_newFromFile(stdin))) { - list_insert(&list, l); + /* note: below this point we use option_mask32, not opts, + * since that reduces register pressure and makes code smaller */ + + /* parse sort key */ + while (lst_k) { + enum { + FLAG_allowed_for_k = + FLAG_n | /* Numeric sort */ + FLAG_g | /* Sort using strtod() */ + FLAG_M | /* Sort date */ + FLAG_b | /* Ignore leading blanks */ + FLAG_r | /* Reverse */ + FLAG_d | /* Ignore !(isalnum()|isspace()) */ + FLAG_f | /* Force uppercase */ + FLAG_i | /* Ignore !isprint() */ + 0 + }; + struct sort_key *key = add_key(); + char *str_k = llist_pop(&lst_k); + + i = 0; /* i==0 before comma, 1 after (-k3,6) */ + while (*str_k) { + /* Start of range */ + /* Cannot use bb_strtou - suffix can be a letter */ + key->range[2*i] = str2u(&str_k); + if (*str_k == '.') { + str_k++; + key->range[2*i+1] = str2u(&str_k); + } + while (*str_k) { + const char *temp2; + + if (*str_k == ',' && !i++) { + str_k++; + break; + } /* no else needed: fall through to syntax error + because comma isn't in OPT_STR */ + temp2 = strchr(OPT_STR, *str_k); + if (!temp2) + bb_error_msg_and_die("unknown key option"); + flag = 1 << (temp2 - OPT_STR); + if (flag & ~FLAG_allowed_for_k) + bb_error_msg_and_die("unknown sort type"); + /* b after ',' means strip _trailing_ space */ + if (i && flag == FLAG_b) + flag = FLAG_bb; + key->flags |= flag; + str_k++; + } + } } - list_sort(&list, compare); - list_writeToFile(&list, stdout); - list_release(&list); - } else { - for ( ; i < argc; i++) { +#endif + + /* Open input files and read data */ + argv += optind; + if (!*argv) + *--argv = (char*)"-"; + linecount = 0; + lines = NULL; + do { + /* coreutils 6.9 compat: abort on first open error, + * do not continue to next file: */ + FILE *fp = xfopen_stdin(*argv); + for (;;) { + line = GET_LINE(fp); + if (!line) + break; + lines = xrealloc_vector(lines, 6, linecount); + lines[linecount++] = line; + } + fclose_if_not_stdin(fp); + } while (*++argv); + +#if ENABLE_FEATURE_SORT_BIG + /* if no key, perform alphabetic sort */ + if (!key_list) + add_key()->range[0] = 1; + /* handle -c */ + if (option_mask32 & FLAG_c) { + int j = (option_mask32 & FLAG_u) ? -1 : 0; + for (i = 1; i < linecount; i++) { + if (compare_keys(&lines[i-1], &lines[i]) > j) { + fprintf(stderr, "Check line %u\n", i); + return EXIT_FAILURE; + } + } + return EXIT_SUCCESS; + } +#endif + /* Perform the actual sort */ + qsort(lines, linecount, sizeof(lines[0]), compare_keys); + /* handle -u */ + if (option_mask32 & FLAG_u) { + flag = 0; + /* coreutils 6.3 drop lines for which only key is the same */ + /* -- disabling last-resort compare... */ + option_mask32 |= FLAG_s; + for (i = 1; i < linecount; i++) { + if (compare_keys(&lines[flag], &lines[i]) == 0) + free(lines[i]); + else + lines[++flag] = lines[i]; + } + if (linecount) + linecount = flag+1; } - } - exit(0); + /* Print it */ +#if ENABLE_FEATURE_SORT_BIG + /* Open output file _after_ we read all input ones */ + if (option_mask32 & FLAG_o) + xmove_fd(xopen(str_o, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO); +#endif + flag = (option_mask32 & FLAG_z) ? '\0' : '\n'; + for (i = 0; i < linecount; i++) + printf("%s%c", lines[i], flag); + + fflush_stdout_and_exit(EXIT_SUCCESS); } - -/* $Id: sort.c,v 1.7 1999/12/23 00:02:49 beppu Exp $ */ -/* - * $Log: sort.c,v $ - * Revision 1.7 1999/12/23 00:02:49 beppu - * implemented numeric sort (sort -g) - * - * Revision 1.6 1999/12/22 23:02:12 beppu - * oops.. qsort(2) misunderstanding on my part. - * it's ok, now. - * - * Revision 1.5 1999/12/22 22:27:01 beppu - * playing w/ $Log: sort.c,v $ - * playing w/ Revision 1.7 1999/12/23 00:02:49 beppu - * playing w/ implemented numeric sort (sort -g) - * playing w/ - * playing w/ Revision 1.6 1999/12/22 23:02:12 beppu - * playing w/ oops.. qsort(2) misunderstanding on my part. - * playing w/ it's ok, now. - * playing w/ - * - */