X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=coreutils%2Fsort.c;h=a1625fc9cdb30a6d332ba023ef901ff6abac172d;hb=1b49c25e0a719ec3051eafa2329e68012c815abb;hp=a6c56ad88363569956aa9e10e031ca7956790f2b;hpb=e766715032fa14ae60a053c41a1eeeb6046e19e2;p=oweals%2Fbusybox.git diff --git a/coreutils/sort.c b/coreutils/sort.c index a6c56ad88..a1625fc9c 100644 --- a/coreutils/sort.c +++ b/coreutils/sort.c @@ -6,15 +6,63 @@ * * MAINTAINER: Rob Landley * - * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + * Licensed under GPLv2 or later, see file LICENSE in this source tree. * * See SuS3 sort standard at: * http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html */ -#include "busybox.h" +//usage:#define sort_trivial_usage +//usage: "[-nru" +//usage: IF_FEATURE_SORT_BIG("gMcszbdfimSTokt] [-o FILE] [-k start[.offset][opts][,end[.offset][opts]] [-t CHAR") +//usage: "] [FILE]..." +//usage:#define sort_full_usage "\n\n" +//usage: "Sort lines of text\n" +//usage: IF_FEATURE_SORT_BIG( +//usage: "\n -b Ignore leading blanks" +//usage: "\n -c Check whether input is sorted" +//usage: "\n -d Dictionary order (blank or alphanumeric only)" +//usage: "\n -f Ignore case" +//usage: "\n -g General numerical sort" +//usage: "\n -i Ignore unprintable characters" +//usage: "\n -k Sort key" +//usage: "\n -M Sort month" +//usage: ) +//usage: "\n -n Sort numbers" +//usage: IF_FEATURE_SORT_BIG( +//usage: "\n -o Output to file" +//usage: "\n -k Sort by key" +//usage: "\n -t CHAR Key separator" +//usage: ) +//usage: "\n -r Reverse sort order" +//usage: IF_FEATURE_SORT_BIG( +//usage: "\n -s Stable (don't sort ties alphabetically)" +//usage: ) +//usage: "\n -u Suppress duplicate lines" +//usage: IF_FEATURE_SORT_BIG( +//usage: "\n -z Lines are terminated by NUL, not newline" +//usage: "\n -mST Ignored for GNU compatibility") +//usage: +//usage:#define sort_example_usage +//usage: "$ echo -e \"e\\nf\\nb\\nd\\nc\\na\" | sort\n" +//usage: "a\n" +//usage: "b\n" +//usage: "c\n" +//usage: "d\n" +//usage: "e\n" +//usage: "f\n" +//usage: IF_FEATURE_SORT_BIG( +//usage: "$ echo -e \"c 3\\nb 2\\nd 2\" | $SORT -k 2,2n -k 1,1r\n" +//usage: "d 2\n" +//usage: "b 2\n" +//usage: "c 3\n" +//usage: ) +//usage: "" + +#include "libbb.h" + +/* This is a NOEXEC applet. Be very careful! */ -static int global_flags; /* sort [-m][-o output][-bdfinru][-t char][-k keydef]... [file...] @@ -22,302 +70,400 @@ static int global_flags; */ /* These are sort types */ -#define FLAG_n 1 /* Numeric sort */ -#define FLAG_g 2 /* Sort using strtod() */ -#define FLAG_M 4 /* Sort date */ +static const char OPT_STR[] ALIGN1 = "ngMucszbrdfimS:T:o:k:t:"; +enum { + FLAG_n = 1, /* Numeric sort */ + FLAG_g = 2, /* Sort using strtod() */ + FLAG_M = 4, /* Sort date */ /* ucsz apply to root level only, not keys. b at root level implies bb */ -#define FLAG_u 8 /* Unique */ -#define FLAG_c 16 /* Check: no output, exit(!ordered) */ -#define FLAG_s 32 /* Stable sort, no ascii fallback at end */ -#define FLAG_z 64 /* Input is null terminated, not \n */ + FLAG_u = 8, /* Unique */ + FLAG_c = 0x10, /* Check: no output, exit(!ordered) */ + FLAG_s = 0x20, /* Stable sort, no ascii fallback at end */ + FLAG_z = 0x40, /* Input and output is NUL terminated, not \n */ /* These can be applied to search keys, the previous four can't */ -#define FLAG_b 128 /* Ignore leading blanks */ -#define FLAG_r 256 /* Reverse */ -#define FLAG_d 512 /* Ignore !(isalnum()|isspace()) */ -#define FLAG_f 1024 /* Force uppercase */ -#define FLAG_i 2048 /* Ignore !isprint() */ -#define FLAG_bb 32768 /* Ignore trailing blanks */ + FLAG_b = 0x80, /* Ignore leading blanks */ + FLAG_r = 0x100, /* Reverse */ + FLAG_d = 0x200, /* Ignore !(isalnum()|isspace()) */ + FLAG_f = 0x400, /* Force uppercase */ + FLAG_i = 0x800, /* Ignore !isprint() */ + FLAG_m = 0x1000, /* ignored: merge already sorted files; do not sort */ + FLAG_S = 0x2000, /* ignored: -S, --buffer-size=SIZE */ + FLAG_T = 0x4000, /* ignored: -T, --temporary-directory=DIR */ + FLAG_o = 0x8000, + FLAG_k = 0x10000, + FLAG_t = 0x20000, + FLAG_bb = 0x80000000, /* Ignore trailing blanks */ +}; - -#ifdef CONFIG_FEATURE_SORT_BIG +#if ENABLE_FEATURE_SORT_BIG static char key_separator; -static struct sort_key -{ - struct sort_key *next_key; /* linked list */ - unsigned short range[4]; /* start word, start char, end word, end char */ - int flags; +static struct sort_key { + struct sort_key *next_key; /* linked list */ + unsigned range[4]; /* start word, start char, end word, end char */ + unsigned flags; } *key_list; static char *get_key(char *str, struct sort_key *key, int flags) { - int start = 0, end = 0, len, i, j; + int start = 0, end = 0, len, j; + unsigned i; /* Special case whole string, so we don't have to make a copy */ - if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3] - && !(flags&(FLAG_b&FLAG_d&FLAG_f&FLAG_i&FLAG_bb))) return str; - /* Find start of key on first pass, end on second pass*/ - len=strlen(str); + if (key->range[0] == 1 && !key->range[1] && !key->range[2] && !key->range[3] + && !(flags & (FLAG_b | FLAG_d | FLAG_f | FLAG_i | FLAG_bb)) + ) { + return str; + } - for(j=0;j<2;j++) { - if(!key->range[2*j]) end=len; + /* Find start of key on first pass, end on second pass */ + len = strlen(str); + for (j = 0; j < 2; j++) { + if (!key->range[2*j]) + end = len; /* Loop through fields */ else { - end=0; - for(i=1;irange[2*j]+j;i++) { - /* Skip leading blanks or first separator */ - if(str[end]) { - if(!key_separator && isspace(str[end])) - while(isspace(str[end])) end++; - } - /* Skip body of key */ - for(;str[end];end++) { - if(key_separator) { - if(str[end]==key_separator) break; - } else if(isspace(str[end])) break; + end = 0; + for (i = 1; i < key->range[2*j] + j; i++) { + if (key_separator) { + /* Skip body of key and separator */ + while (str[end]) { + if (str[end++] == key_separator) + break; + } + } else { + /* Skip leading blanks */ + while (isspace(str[end])) + end++; + /* Skip body of key */ + while (str[end]) { + if (isspace(str[end])) + break; + end++; + } } } } - if(!j) start=end; + if (!j) start = end; } - /* Key with explicit separator starts after separator */ - if(key_separator && str[start]==key_separator) start++; /* Strip leading whitespace if necessary */ //XXX: skip_whitespace() - if(flags&FLAG_b) while(isspace(str[start])) start++; + if (flags & FLAG_b) + while (isspace(str[start])) start++; /* Strip trailing whitespace if necessary */ - if(flags&FLAG_bb) while(end>start && isspace(str[end-1])) end--; + if (flags & FLAG_bb) + while (end > start && isspace(str[end-1])) end--; /* Handle offsets on start and end */ - if(key->range[3]) { - end+=key->range[3]-1; - if(end>len) end=len; + if (key->range[3]) { + end += key->range[3] - 1; + if (end > len) end = len; } - if(key->range[1]) { - start+=key->range[1]-1; - if(start>len) start=len; + if (key->range[1]) { + start += key->range[1] - 1; + if (start > len) start = len; } /* Make the copy */ - if(endnext_key); + struct sort_key **pkey = &key_list; + while (*pkey) + pkey = &((*pkey)->next_key); return *pkey = xzalloc(sizeof(struct sort_key)); } -#define GET_LINE(fp) (global_flags&FLAG_z) ? bb_get_chunk_from_file(fp,NULL) \ - : xmalloc_getline(fp) +#define GET_LINE(fp) \ + ((option_mask32 & FLAG_z) \ + ? bb_get_chunk_from_file(fp, NULL) \ + : xmalloc_fgetline(fp)) #else -#define GET_LINE(fp) xmalloc_getline(fp) +#define GET_LINE(fp) xmalloc_fgetline(fp) #endif /* Iterate through keys list and perform comparisons */ static int compare_keys(const void *xarg, const void *yarg) { - int flags=global_flags,retval=0; - char *x,*y; + int flags = option_mask32, retval = 0; + char *x, *y; -#ifdef CONFIG_FEATURE_SORT_BIG +#if ENABLE_FEATURE_SORT_BIG struct sort_key *key; - for(key=key_list;!retval && key;key=key->next_key) { - flags=(key->flags) ? key->flags : global_flags; + for (key = key_list; !retval && key; key = key->next_key) { + flags = key->flags ? key->flags : option_mask32; /* Chop out and modify key chunks, handling -dfib */ - x=get_key(*(char **)xarg,key,flags); - y=get_key(*(char **)yarg,key,flags); + x = get_key(*(char **)xarg, key, flags); + y = get_key(*(char **)yarg, key, flags); #else /* This curly bracket serves no purpose but to match the nesting - level of the for() loop we're not using */ + * level of the for () loop we're not using */ { - x=*(char **)xarg; - y=*(char **)yarg; + x = *(char **)xarg; + y = *(char **)yarg; #endif /* Perform actual comparison */ - switch(flags&7) { - default: - bb_error_msg_and_die("unknown sort type"); - break; - /* Ascii sort */ - case 0: - retval=strcmp(x,y); - break; -#ifdef CONFIG_FEATURE_SORT_BIG - case FLAG_g: - { - char *xx,*yy; - double dx=strtod(x,&xx), dy=strtod(y,&yy); - /* not numbers < NaN < -infinity < numbers < +infinity) */ - if(x==xx) retval=(y==yy ? 0 : -1); - else if(y==yy) retval=1; - /* Check for isnan */ - else if(dx != dx) retval = (dy != dy) ? 0 : -1; - else if(dy != dy) retval = 1; - /* Check for infinity. Could underflow, but it avoids libm. */ - else if(1.0/dx == 0.0) { - if(dx<0) retval=((1.0/dy == 0.0 && dy<0) ? 0 : -1); - else retval=((1.0/dy == 0.0 && dy>0) ? 0 : 1); - } else if(1.0/dy == 0.0) retval=dy<0 ? 1 : -1; - else retval=dx>dy ? 1 : (dx 0) ? 0 : 1; + } else if (1.0 / dy == 0.0) + retval = (dy < 0) ? 1 : -1; + else + retval = (dx > dy) ? 1 : ((dx < dy) ? -1 : 0); + break; + } + case FLAG_M: { + struct tm thyme; + int dx; + char *xx, *yy; - xx=strptime(x,"%b",&thyme); - dx=thyme.tm_mon; - yy=strptime(y,"%b",&thyme); - if(!xx) retval=(!yy ? 0 : -1); - else if(!yy) retval=1; - else retval=(dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon); - break; - } - /* Full floating point version of -n */ - case FLAG_n: - { - double dx=atof(x),dy=atof(y); - retval=dx>dy ? 1 : (dx dy) ? 1 : ((dx < dy) ? -1 : 0); + break; + } + } /* switch */ /* Free key copies. */ - if(x!=*(char **)xarg) free(x); - if(y!=*(char **)yarg) free(y); - if(retval) break; + if (x != *(char **)xarg) free(x); + if (y != *(char **)yarg) free(y); + /* if (retval) break; - done by for () anyway */ #else - /* Integer version of -n for tiny systems */ - case FLAG_n: - retval=atoi(x)-atoi(y); - break; - } + /* Integer version of -n for tiny systems */ + case FLAG_n: + retval = atoi(x) - atoi(y); + break; + } /* switch */ #endif - } + } /* for */ + /* Perform fallback sort if necessary */ - if(!retval && !(global_flags&FLAG_s)) - retval=strcmp(*(char **)xarg, *(char **)yarg); - return ((flags&FLAG_r)?-1:1)*retval; + if (!retval && !(option_mask32 & FLAG_s)) + retval = strcmp(*(char **)xarg, *(char **)yarg); + + if (flags & FLAG_r) return -retval; + return retval; } -int sort_main(int argc, char **argv) +#if ENABLE_FEATURE_SORT_BIG +static unsigned str2u(char **str) { - FILE *fp,*outfile=NULL; - int linecount=0,i,flag; - char *line,**lines=NULL,*optlist="ngMucszbrdfimS:T:o:k:t:"; - int c; + unsigned long lu; + if (!isdigit((*str)[0])) + bb_error_msg_and_die("bad field specification"); + lu = strtoul(*str, str, 10); + if ((sizeof(long) > sizeof(int) && lu > INT_MAX) || !lu) + bb_error_msg_and_die("bad field specification"); + return lu; +} +#endif + +int sort_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int sort_main(int argc UNUSED_PARAM, char **argv) +{ + char *line, **lines; + char *str_ignored, *str_o, *str_t; + llist_t *lst_k = NULL; + int i, flag; + int linecount; + unsigned opts; xfunc_error_retval = 2; + /* Parse command line options */ - while((c=getopt(argc,argv,optlist))>0) { - line=strchr(optlist,c); - if(!line) bb_show_usage(); - switch(*line) { -#ifdef CONFIG_FEATURE_SORT_BIG - case 'o': - if(outfile) bb_error_msg_and_die("too many -o"); - outfile=xfopen(optarg,"w"); - break; - case 't': - if(key_separator || optarg[1]) - bb_error_msg_and_die("too many -t"); - key_separator=*optarg; - break; - /* parse sort key */ - case 'k': - { - struct sort_key *key=add_key(); - char *temp, *temp2; + /* -o and -t can be given at most once */ + opt_complementary = "o--o:t--t:" /* -t, -o: at most one of each */ + "k::"; /* -k takes list */ + opts = getopt32(argv, OPT_STR, &str_ignored, &str_ignored, &str_o, &lst_k, &str_t); + /* global b strips leading and trailing spaces */ + if (opts & FLAG_b) + option_mask32 |= FLAG_bb; +#if ENABLE_FEATURE_SORT_BIG + if (opts & FLAG_t) { + if (!str_t[0] || str_t[1]) + bb_error_msg_and_die("bad -t parameter"); + key_separator = str_t[0]; + } + /* note: below this point we use option_mask32, not opts, + * since that reduces register pressure and makes code smaller */ - temp=optarg; - for(i=0;*temp;) { - /* Start of range */ - key->range[2*i]=(unsigned short)strtol(temp,&temp,10); - if(*temp=='.') - key->range[(2*i)+1]=(unsigned short)strtol(temp+1,&temp,10); - for(;*temp;temp++) { - if(*temp==',' && !i++) { - temp++; - break; - } /* no else needed: fall through to syntax error - because comma isn't in optlist */ - temp2=strchr(optlist,*temp); - flag=(1<<(temp2-optlist)); - if(!temp2 || (flag>FLAG_M && flagflags|=flag; - } - } - break; + /* parse sort key */ + while (lst_k) { + enum { + FLAG_allowed_for_k = + FLAG_n | /* Numeric sort */ + FLAG_g | /* Sort using strtod() */ + FLAG_M | /* Sort date */ + FLAG_b | /* Ignore leading blanks */ + FLAG_r | /* Reverse */ + FLAG_d | /* Ignore !(isalnum()|isspace()) */ + FLAG_f | /* Force uppercase */ + FLAG_i | /* Ignore !isprint() */ + 0 + }; + struct sort_key *key = add_key(); + char *str_k = llist_pop(&lst_k); + + i = 0; /* i==0 before comma, 1 after (-k3,6) */ + while (*str_k) { + /* Start of range */ + /* Cannot use bb_strtou - suffix can be a letter */ + key->range[2*i] = str2u(&str_k); + if (*str_k == '.') { + str_k++; + key->range[2*i+1] = str2u(&str_k); + } + while (*str_k) { + const char *temp2; + + if (*str_k == ',' && !i++) { + str_k++; + break; + } /* no else needed: fall through to syntax error + because comma isn't in OPT_STR */ + temp2 = strchr(OPT_STR, *str_k); + if (!temp2) + bb_error_msg_and_die("unknown key option"); + flag = 1 << (temp2 - OPT_STR); + if (flag & ~FLAG_allowed_for_k) + bb_error_msg_and_die("unknown sort type"); + /* b after ',' means strip _trailing_ space */ + if (i && flag == FLAG_b) + flag = FLAG_bb; + key->flags |= flag; + str_k++; } -#endif - default: - global_flags|=(1<<(line-optlist)); - /* global b strips leading and trailing spaces */ - if(global_flags&FLAG_b) global_flags|=FLAG_bb; - break; } } +#endif + /* Open input files and read data */ - for(i=argv[optind] ? optind : optind-1;argv[i];i++) { - if(irange[0]=1; + if (!key_list) + add_key()->range[0] = 1; /* handle -c */ - if(global_flags&FLAG_c) { - int j=(global_flags&FLAG_u) ? -1 : 0; - for(i=1;ij) { - fprintf(stderr,"Check line %d\n",i); - return 1; + if (option_mask32 & FLAG_c) { + int j = (option_mask32 & FLAG_u) ? -1 : 0; + for (i = 1; i < linecount; i++) { + if (compare_keys(&lines[i-1], &lines[i]) > j) { + fprintf(stderr, "Check line %u\n", i); + return EXIT_FAILURE; } - return 0; + } + return EXIT_SUCCESS; } #endif /* Perform the actual sort */ - qsort(lines,linecount,sizeof(char *),compare_keys); + qsort(lines, linecount, sizeof(lines[0]), compare_keys); /* handle -u */ - if(global_flags&FLAG_u) { - for(flag=0,i=1;i