From d8528b8e56bab7643722e4453121882d23c23c07 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 31 Jan 2010 05:15:38 +0100 Subject: [PATCH] ls: unicode fixes Signed-off-by: Denys Vlasenko --- TODO_unicode | 2 +- coreutils/ls.c | 412 ++++++++++++++++++++------------------ include/libbb.h | 19 +- include/unicode.h | 5 - libbb/Kbuild | 1 + libbb/printable_string.c | 65 ++++++ testsuite/ls.mk_uni_tests | 111 ++++++++++ testsuite/ls.tests | 136 +++++++++++++ 8 files changed, 545 insertions(+), 206 deletions(-) create mode 100644 libbb/printable_string.c create mode 100644 testsuite/ls.mk_uni_tests create mode 100755 testsuite/ls.tests diff --git a/TODO_unicode b/TODO_unicode index c29fd933b..b310e8d4d 100644 --- a/TODO_unicode +++ b/TODO_unicode @@ -7,7 +7,7 @@ dumpleases Applets which may need unicode handling (more extensive than sanitizing of filenames in error messages): -ls - uses unicode_strlen, not scrlen +ls - work in progress expand, unexpand - uses unicode_strlen, not scrlen ash, hush through lineedit - uses unicode_strlen, not scrlen top - need to sanitize process args diff --git a/coreutils/ls.c b/coreutils/ls.c index 6c898b793..d004ce8b1 100644 --- a/coreutils/ls.c +++ b/coreutils/ls.c @@ -241,9 +241,6 @@ struct dnode { IF_SELINUX(security_context_t sid;) }; -static struct dnode **list_dir(const char *, unsigned *); -static unsigned list_single(const struct dnode *); - struct globals { #if ENABLE_FEATURE_LS_COLOR smallint show_color; @@ -528,31 +525,236 @@ static void dnsort(struct dnode **dn, int size) #endif -static void showfiles(struct dnode **dn, unsigned nfiles) +static unsigned calc_name_len(const char *name) +{ + unsigned len; + uni_stat_t uni_stat; + + // TODO: quote tab as \t, etc, if -Q + name = printable_string(&uni_stat, name); + + if (!(option_mask32 & OPT_Q)) { + return uni_stat.unicode_width; + } + + len = 2 + uni_stat.unicode_width; + while (*name) { + if (*name == '"' || *name == '\\') { + len++; + } + name++; + } + return len; +} + + +/* Return the number of used columns. + * Note that only STYLE_COLUMNS uses return value. + * STYLE_SINGLE and STYLE_LONG don't care. + * coreutils 7.2 also supports: + * ls -b (--escape) = octal escapes (although it doesn't look like working) + * ls -N (--literal) = not escape at all + */ +static unsigned print_name(const char *name) +{ + unsigned len; + uni_stat_t uni_stat; + + // TODO: quote tab as \t, etc, if -Q + name = printable_string(&uni_stat, name); + + if (!(option_mask32 & OPT_Q)) { + fputs(name, stdout); + return uni_stat.unicode_width; + } + + len = 2 + uni_stat.unicode_width; + putchar('"'); + while (*name) { + if (*name == '"' || *name == '\\') { + putchar('\\'); + len++; + } + putchar(*name++); + } + putchar('"'); + return len; +} + +/* Return the number of used columns. + * Note that only STYLE_COLUMNS uses return value, + * STYLE_SINGLE and STYLE_LONG don't care. + */ +static NOINLINE unsigned list_single(const struct dnode *dn) { - unsigned i, ncols, nrows, row, nc; unsigned column = 0; - unsigned nexttab = 0; - unsigned column_width = 0; /* for STYLE_LONG and STYLE_SINGLE not used */ + char *lpath = lpath; /* for compiler */ +#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR + struct stat info; + char append; +#endif /* Never happens: - if (dn == NULL || nfiles < 1) - return; + if (dn->fullname == NULL) + return 0; */ - if (all_fmt & STYLE_LONG) { +#if ENABLE_FEATURE_LS_FILETYPES + append = append_char(dn->dstat.st_mode); +#endif + + /* Do readlink early, so that if it fails, error message + * does not appear *inside* the "ls -l" line */ + if (all_fmt & LIST_SYMLINK) + if (S_ISLNK(dn->dstat.st_mode)) + lpath = xmalloc_readlink_or_warn(dn->fullname); + + if (all_fmt & LIST_INO) + column += printf("%7llu ", (long long) dn->dstat.st_ino); + if (all_fmt & LIST_BLOCKS) + column += printf("%4"OFF_FMT"u ", (off_t) (dn->dstat.st_blocks >> 1)); + if (all_fmt & LIST_MODEBITS) + column += printf("%-10s ", (char *) bb_mode_string(dn->dstat.st_mode)); + if (all_fmt & LIST_NLINKS) + column += printf("%4lu ", (long) dn->dstat.st_nlink); +#if ENABLE_FEATURE_LS_USERNAME + if (all_fmt & LIST_ID_NAME) { + if (option_mask32 & OPT_g) { + column += printf("%-8.8s ", + get_cached_username(dn->dstat.st_uid)); + } else { + column += printf("%-8.8s %-8.8s ", + get_cached_username(dn->dstat.st_uid), + get_cached_groupname(dn->dstat.st_gid)); + } + } +#endif + if (all_fmt & LIST_ID_NUMERIC) { + if (option_mask32 & OPT_g) + column += printf("%-8u ", (int) dn->dstat.st_uid); + else + column += printf("%-8u %-8u ", + (int) dn->dstat.st_uid, + (int) dn->dstat.st_gid); + } + if (all_fmt & (LIST_SIZE /*|LIST_DEV*/ )) { + if (S_ISBLK(dn->dstat.st_mode) || S_ISCHR(dn->dstat.st_mode)) { + column += printf("%4u, %3u ", + (int) major(dn->dstat.st_rdev), + (int) minor(dn->dstat.st_rdev)); + } else { + if (all_fmt & LS_DISP_HR) { + column += printf("%"HUMAN_READABLE_MAX_WIDTH_STR"s ", + /* print st_size, show one fractional, use suffixes */ + make_human_readable_str(dn->dstat.st_size, 1, 0) + ); + } else { + column += printf("%9"OFF_FMT"u ", (off_t) dn->dstat.st_size); + } + } + } +#if ENABLE_FEATURE_LS_TIMESTAMPS + if (all_fmt & (LIST_FULLTIME|LIST_DATE_TIME)) { + char *filetime; + time_t ttime = dn->dstat.st_mtime; + if (all_fmt & TIME_ACCESS) + ttime = dn->dstat.st_atime; + if (all_fmt & TIME_CHANGE) + ttime = dn->dstat.st_ctime; + filetime = ctime(&ttime); + /* filetime's format: "Wed Jun 30 21:49:08 1993\n" */ + if (all_fmt & LIST_FULLTIME) + column += printf("%.24s ", filetime); + else { /* LIST_DATE_TIME */ + /* current_time_t ~== time(NULL) */ + time_t age = current_time_t - ttime; + printf("%.6s ", filetime + 4); /* "Jun 30" */ + if (age < 3600L * 24 * 365 / 2 && age > -15 * 60) { + /* hh:mm if less than 6 months old */ + printf("%.5s ", filetime + 11); + } else { /* year. buggy if year > 9999 ;) */ + printf(" %.4s ", filetime + 20); + } + column += 13; + } + } +#endif +#if ENABLE_SELINUX + if (all_fmt & LIST_CONTEXT) { + column += printf("%-32s ", dn->sid ? dn->sid : "unknown"); + freecon(dn->sid); + } +#endif + if (all_fmt & LIST_FILENAME) { +#if ENABLE_FEATURE_LS_COLOR + if (show_color) { + info.st_mode = 0; /* for fgcolor() */ + lstat(dn->fullname, &info); + printf("\033[%u;%um", bold(info.st_mode), + fgcolor(info.st_mode)); + } +#endif + column += print_name(dn->name); + if (show_color) { + printf("\033[0m"); + } + } + if (all_fmt & LIST_SYMLINK) { + if (S_ISLNK(dn->dstat.st_mode) && lpath) { + printf(" -> "); +#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR +#if ENABLE_FEATURE_LS_COLOR + info.st_mode = 0; /* for fgcolor() */ +#endif + if (stat(dn->fullname, &info) == 0) { + append = append_char(info.st_mode); + } +#endif +#if ENABLE_FEATURE_LS_COLOR + if (show_color) { + printf("\033[%u;%um", bold(info.st_mode), + fgcolor(info.st_mode)); + } +#endif + column += print_name(lpath) + 4; + if (show_color) { + printf("\033[0m"); + } + free(lpath); + } + } +#if ENABLE_FEATURE_LS_FILETYPES + if (all_fmt & LIST_FILETYPE) { + if (append) { + putchar(append); + column++; + } + } +#endif + + return column; +} + +static void showfiles(struct dnode **dn, unsigned nfiles) +{ + unsigned i, ncols, nrows, row, nc; + unsigned column = 0; + unsigned nexttab = 0; + unsigned column_width = 0; /* used only by STYLE_COLUMNS */ + + if (all_fmt & STYLE_LONG) { /* STYLE_LONG or STYLE_SINGLE */ ncols = 1; } else { /* find the longest file name, use that as the column width */ for (i = 0; dn[i]; i++) { - int len = unicode_strlen(dn[i]->name); + int len = calc_name_len(dn[i]->name); if (column_width < len) column_width = len; } column_width += tabstops + IF_SELINUX( ((all_fmt & LIST_CONTEXT) ? 33 : 0) + ) - ((all_fmt & LIST_INO) ? 8 : 0) + - ((all_fmt & LIST_BLOCKS) ? 5 : 0); + ((all_fmt & LIST_INO) ? 8 : 0) + + ((all_fmt & LIST_BLOCKS) ? 5 : 0); ncols = (int) (terminal_width / column_width); } @@ -618,6 +820,8 @@ static off_t calculate_blocks(struct dnode **dn) #endif +static struct dnode **list_dir(const char *, unsigned *); + static void showdirs(struct dnode **dn, int first) { unsigned nfiles; @@ -733,188 +937,6 @@ static struct dnode **list_dir(const char *path, unsigned *nfiles_p) } -static int print_name(const char *name) -{ - if (option_mask32 & OPT_Q) { -#if ENABLE_FEATURE_ASSUME_UNICODE - unsigned len = 2 + unicode_strlen(name); -#else - unsigned len = 2; -#endif - putchar('"'); - while (*name) { - if (*name == '"') { - putchar('\\'); - len++; - } - putchar(*name++); - if (!ENABLE_FEATURE_ASSUME_UNICODE) - len++; - } - putchar('"'); - return len; - } - /* No -Q: */ -#if ENABLE_FEATURE_ASSUME_UNICODE - fputs(name, stdout); - return unicode_strlen(name); -#else - return printf("%s", name); -#endif -} - - -static NOINLINE unsigned list_single(const struct dnode *dn) -{ - unsigned column = 0; - char *lpath = lpath; /* for compiler */ -#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR - struct stat info; - char append; -#endif - - /* Never happens: - if (dn->fullname == NULL) - return 0; - */ - -#if ENABLE_FEATURE_LS_FILETYPES - append = append_char(dn->dstat.st_mode); -#endif - - /* Do readlink early, so that if it fails, error message - * does not appear *inside* the "ls -l" line */ - if (all_fmt & LIST_SYMLINK) - if (S_ISLNK(dn->dstat.st_mode)) - lpath = xmalloc_readlink_or_warn(dn->fullname); - - if (all_fmt & LIST_INO) - column += printf("%7llu ", (long long) dn->dstat.st_ino); - if (all_fmt & LIST_BLOCKS) - column += printf("%4"OFF_FMT"u ", (off_t) (dn->dstat.st_blocks >> 1)); - if (all_fmt & LIST_MODEBITS) - column += printf("%-10s ", (char *) bb_mode_string(dn->dstat.st_mode)); - if (all_fmt & LIST_NLINKS) - column += printf("%4lu ", (long) dn->dstat.st_nlink); -#if ENABLE_FEATURE_LS_USERNAME - if (all_fmt & LIST_ID_NAME) { - if (option_mask32 & OPT_g) { - column += printf("%-8.8s ", - get_cached_username(dn->dstat.st_uid)); - } else { - column += printf("%-8.8s %-8.8s ", - get_cached_username(dn->dstat.st_uid), - get_cached_groupname(dn->dstat.st_gid)); - } - } -#endif - if (all_fmt & LIST_ID_NUMERIC) { - if (option_mask32 & OPT_g) - column += printf("%-8u ", (int) dn->dstat.st_uid); - else - column += printf("%-8u %-8u ", - (int) dn->dstat.st_uid, - (int) dn->dstat.st_gid); - } - if (all_fmt & (LIST_SIZE /*|LIST_DEV*/ )) { - if (S_ISBLK(dn->dstat.st_mode) || S_ISCHR(dn->dstat.st_mode)) { - column += printf("%4u, %3u ", - (int) major(dn->dstat.st_rdev), - (int) minor(dn->dstat.st_rdev)); - } else { - if (all_fmt & LS_DISP_HR) { - column += printf("%"HUMAN_READABLE_MAX_WIDTH_STR"s ", - /* print st_size, show one fractional, use suffixes */ - make_human_readable_str(dn->dstat.st_size, 1, 0) - ); - } else { - column += printf("%9"OFF_FMT"u ", (off_t) dn->dstat.st_size); - } - } - } -#if ENABLE_FEATURE_LS_TIMESTAMPS - if (all_fmt & (LIST_FULLTIME|LIST_DATE_TIME)) { - char *filetime; - time_t ttime = dn->dstat.st_mtime; - if (all_fmt & TIME_ACCESS) - ttime = dn->dstat.st_atime; - if (all_fmt & TIME_CHANGE) - ttime = dn->dstat.st_ctime; - filetime = ctime(&ttime); - /* filetime's format: "Wed Jun 30 21:49:08 1993\n" */ - if (all_fmt & LIST_FULLTIME) - column += printf("%.24s ", filetime); - else { /* LIST_DATE_TIME */ - /* current_time_t ~== time(NULL) */ - time_t age = current_time_t - ttime; - printf("%.6s ", filetime + 4); /* "Jun 30" */ - if (age < 3600L * 24 * 365 / 2 && age > -15 * 60) { - /* hh:mm if less than 6 months old */ - printf("%.5s ", filetime + 11); - } else { /* year. buggy if year > 9999 ;) */ - printf(" %.4s ", filetime + 20); - } - column += 13; - } - } -#endif -#if ENABLE_SELINUX - if (all_fmt & LIST_CONTEXT) { - column += printf("%-32s ", dn->sid ? dn->sid : "unknown"); - freecon(dn->sid); - } -#endif - if (all_fmt & LIST_FILENAME) { -#if ENABLE_FEATURE_LS_COLOR - if (show_color) { - info.st_mode = 0; /* for fgcolor() */ - lstat(dn->fullname, &info); - printf("\033[%u;%um", bold(info.st_mode), - fgcolor(info.st_mode)); - } -#endif - column += print_name(dn->name); - if (show_color) { - printf("\033[0m"); - } - } - if (all_fmt & LIST_SYMLINK) { - if (S_ISLNK(dn->dstat.st_mode) && lpath) { - printf(" -> "); -#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR -#if ENABLE_FEATURE_LS_COLOR - info.st_mode = 0; /* for fgcolor() */ -#endif - if (stat(dn->fullname, &info) == 0) { - append = append_char(info.st_mode); - } -#endif -#if ENABLE_FEATURE_LS_COLOR - if (show_color) { - printf("\033[%u;%um", bold(info.st_mode), - fgcolor(info.st_mode)); - } -#endif - column += print_name(lpath) + 4; - if (show_color) { - printf("\033[0m"); - } - free(lpath); - } - } -#if ENABLE_FEATURE_LS_FILETYPES - if (all_fmt & LIST_FILETYPE) { - if (append) { - putchar(append); - column++; - } - } -#endif - - return column; -} - - int ls_main(int argc UNUSED_PARAM, char **argv) { struct dnode **dnd; diff --git a/include/libbb.h b/include/libbb.h index 73aea409e..a86d64400 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -577,11 +577,6 @@ char *strncpy_IFNAMSIZ(char *dst, const char *src) FAST_FUNC; * But potentially slow, don't use in one-billion-times loops */ int bb_putchar(int ch) FAST_FUNC; char *xasprintf(const char *format, ...) __attribute__ ((format(printf, 1, 2))) FAST_FUNC RETURNS_MALLOC; -/* Prints unprintable chars ch as ^C or M-c to file - * (M-c is used only if ch is ORed with PRINTABLE_META), - * else it is printed as-is (except for ch = 0x9b) */ -enum { PRINTABLE_META = 0x100 }; -void fputc_printable(int ch, FILE *file) FAST_FUNC; // gcc-4.1.1 still isn't good enough at optimizing it // (+200 bytes compared to macro) //static ALWAYS_INLINE @@ -594,6 +589,20 @@ void fputc_printable(int ch, FILE *file) FAST_FUNC; #define NOT_LONE_CHAR(s,c) ((s)[0] != (c) || (s)[1]) #define DOT_OR_DOTDOT(s) ((s)[0] == '.' && (!(s)[1] || ((s)[1] == '.' && !(s)[2]))) +typedef struct uni_stat_t { + unsigned byte_count; + unsigned unicode_count; + unsigned unicode_width; +} uni_stat_t; +/* Returns a string with unprintable chars replaced by '?' or + * SUBST_WCHAR. This function is unicode-aware. */ +const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str); +/* Prints unprintable char ch as ^C or M-c to file + * (M-c is used only if ch is ORed with PRINTABLE_META), + * else it is printed as-is (except for ch = 0x9b) */ +enum { PRINTABLE_META = 0x100 }; +void fputc_printable(int ch, FILE *file) FAST_FUNC; + /* dmalloc will redefine these to it's own implementation. It is safe * to have the prototypes here unconditionally. */ void *malloc_or_warn(size_t size) FAST_FUNC RETURNS_MALLOC; diff --git a/include/unicode.h b/include/unicode.h index f32e56599..25ef7407e 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -23,11 +23,6 @@ size_t FAST_FUNC unicode_strlen(const char *string); enum { UNI_FLAG_PAD = (1 << 0), }; -typedef struct uni_stat_t { - unsigned byte_count; - unsigned unicode_count; - unsigned unicode_width; -} uni_stat_t; //UNUSED: unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src); //UNUSED: char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags); char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src); diff --git a/libbb/Kbuild b/libbb/Kbuild index 243626d67..7e793109e 100644 --- a/libbb/Kbuild +++ b/libbb/Kbuild @@ -73,6 +73,7 @@ lib-y += perror_nomsg_and_die.o lib-y += pidfile.o lib-y += platform.o lib-y += printable.o +lib-y += printable_string.o lib-y += print_flags.o lib-y += process_escape_sequence.o lib-y += procps.o diff --git a/libbb/printable_string.c b/libbb/printable_string.c new file mode 100644 index 000000000..47565de0d --- /dev/null +++ b/libbb/printable_string.c @@ -0,0 +1,65 @@ +/* vi: set sw=4 ts=4: */ +/* + * Unicode support routines. + * + * Copyright (C) 2010 Denys Vlasenko + * + * Licensed under GPL version 2, see file LICENSE in this tarball for details. + */ +#include "libbb.h" +#include "unicode.h" + +const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str) +{ + static char *saved[4]; + static unsigned cur_saved; /* = 0 */ + + char *dst; + const char *s; + + s = str; + while (1) { + unsigned char c = *s; + if (c == '\0') { + /* 99+% of inputs do not need conversion */ + if (stats) { + stats->byte_count = (s - str); + stats->unicode_count = (s - str); + stats->unicode_width = (s - str); + } + return str; + } + if (c < ' ') + break; + if (c >= 0x7f) + break; + s++; + } + +#if ENABLE_FEATURE_ASSUME_UNICODE + dst = unicode_conv_to_printable(stats, str); +#else + { + char *d = dst = xstrdup(str); + while (1) { + unsigned char c = *d; + if (c == '\0') + break; + if (c < ' ' || c >= 0x7f) + *d = '?'; + d++; + } + if (stats) { + stats->byte_count = (d - dst); + stats->unicode_count = (d - dst); + stats->unicode_width = (d - dst); + } + } +#endif + + free(saved[cur_saved]); + saved[cur_saved] = dst; + cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1); + + return dst; +} diff --git a/testsuite/ls.mk_uni_tests b/testsuite/ls.mk_uni_tests new file mode 100644 index 000000000..da0c29f29 --- /dev/null +++ b/testsuite/ls.mk_uni_tests @@ -0,0 +1,111 @@ +# DO NOT EDIT THIS FILE! MOST TEXT EDITORS WILL DAMAGE IT! +>'0001_1__Some_correct_UTF-8_text___________________________________________|' +>'0002_2__Boundary_condition_test_cases_____________________________________|' +>'0003_2.1__First_possible_sequence_of_a_certain_length_____________________|' +>'0004_2.1.2__2_bytes__U-00000080_:________"€"______________________________|' +>'0005_2.1.3__3_bytes__U-00000800_:________"ࠀ"______________________________|' +>'0006_2.1.4__4_bytes__U-00010000_:________"𐀀"______________________________|' +>'0007_2.1.5__5_bytes__U-00200000_:________"øˆ€€€"______________________________|' +>'0008_2.1.6__6_bytes__U-04000000_:________"ü„€€€€"______________________________|' +>'0009_2.2__Last_possible_sequence_of_a_certain_length______________________|' +>'0010_2.2.1__1_byte___U-0000007F_:________""______________________________|' +>'0011_2.2.2__2_bytes__U-000007FF_:________"ß¿"______________________________|' +>'0012_2.2.3__3_bytes__U-0000FFFF_:________"ï¿¿"______________________________|' +>'0013_2.2.4__4_bytes__U-001FFFFF_:________"÷¿¿¿"______________________________|' +>'0014_2.2.5__5_bytes__U-03FFFFFF_:________"û¿¿¿¿"______________________________|' +>'0015_2.2.6__6_bytes__U-7FFFFFFF_:________"ý¿¿¿¿¿"______________________________|' +>'0016_2.3__Other_boundary_conditions_______________________________________|' +>'0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_"퟿"___________________________________|' +>'0018_2.3.2__U-0000E000_=_ee_80_80_=_""___________________________________|' +>'0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"�"___________________________________|' +>'0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"ô¿¿"________________________________|' +>'0021_2.3.5__U-00110000_=_f4_90_80_80_=_"ô€€"________________________________|' +>'0022_3__Malformed_sequences_______________________________________________|' +>'0023_3.1__Unexpected_continuation_bytes___________________________________|' +>'0024_3.1.1__First_continuation_byte_0x80:_"€"_____________________________|' +>'0025_3.1.2__Last__continuation_byte_0xbf:_"¿"_____________________________|' +>'0026_3.1.3__2_continuation_bytes:_"€¿"____________________________________|' +>'0027_3.1.4__3_continuation_bytes:_"€¿€"___________________________________|' +>'0028_3.1.5__4_continuation_bytes:_"€¿€¿"__________________________________|' +>'0029_3.1.6__5_continuation_bytes:_"€¿€¿€"_________________________________|' +>'0030_3.1.7__6_continuation_bytes:_"€¿€¿€¿"________________________________|' +>'0031_3.1.8__7_continuation_bytes:_"€¿€¿€¿€"_______________________________|' +>'0032_3.1.9__Sequence_of_all_64_possible_continuation_bytes__0x80-0xbf_:___|' +>'0033____"€‚ƒ„…†‡ˆ‰Š‹ŒŽ_________________________________________________|' +>'0034_____‘’“”•–—˜™š›œžŸ_________________________________________________|' +>'0035_____ ¡¢£¤¥¦§¨©ª«¬­®¯_________________________________________________|' +>'0036_____°±²³´µ¶·¸¹º»¼½¾¿"________________________________________________|' +>'0037_3.2__Lonely_start_characters_________________________________________|' +>'0038_3.2.1__All_32_first_bytes_of_2-byte_sequences__0xc0-0xdf_,___________|' +>'0039________each_followed_by_a_space_character:___________________________|' +>'0040____"À_Á_Â_Ã_Ä_Å_Æ_Ç_È_É_Ê_Ë_Ì_Í_Î_Ï__________________________________|' +>'0041_____Ð_Ñ_Ò_Ó_Ô_Õ_Ö_×_Ø_Ù_Ú_Û_Ü_Ý_Þ_ß_"________________________________|' +>'0042_3.2.2__All_16_first_bytes_of_3-byte_sequences__0xe0-0xef_,___________|' +>'0043________each_followed_by_a_space_character:___________________________|' +>'0044____"à_á_â_ã_ä_å_æ_ç_è_é_ê_ë_ì_í_î_ï_"________________________________|' +>'0045_3.2.3__All_8_first_bytes_of_4-byte_sequences__0xf0-0xf7_,____________|' +>'0046________each_followed_by_a_space_character:___________________________|' +>'0047____"ð_ñ_ò_ó_ô_õ_ö_÷_"________________________________________________|' +>'0048_3.2.4__All_4_first_bytes_of_5-byte_sequences__0xf8-0xfb_,____________|' +>'0049________each_followed_by_a_space_character:___________________________|' +>'0050____"ø_ù_ú_û_"________________________________________________________|' +>'0051_3.2.5__All_2_first_bytes_of_6-byte_sequences__0xfc-0xfd_,____________|' +>'0052________each_followed_by_a_space_character:___________________________|' +>'0053____"ü_ý_"____________________________________________________________|' +>'0054_3.3__Sequences_with_last_continuation_byte_missing___________________|' +>'0055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____"À"______|' +>'0056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____"à€"______|' +>'0057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____"ð€€"______|' +>'0058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____"ø€€€"______|' +>'0059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____"ü€€€€"______|' +>'0060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_"ß"______|' +>'0061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_"ï¿"______|' +>'0062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_"÷¿¿"______|' +>'0063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_"û¿¿¿"______|' +>'0064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_"ý¿¿¿¿"______|' +>'0065_3.4__Concatenation_of_incomplete_sequences___________________________|' +>'0066____"Àà€ð€€ø€€€ü€€€€ßï¿÷¿¿û¿¿¿ý¿¿¿¿"______________________________________________________|' +>'0067_3.5__Impossible_bytes________________________________________________|' +>'0068_3.5.1__fe_=_"þ"______________________________________________________|' +>'0069_3.5.2__ff_=_"ÿ"______________________________________________________|' +>'0070_3.5.3__fe_fe_ff_ff_=_"þþÿÿ"__________________________________________|' +>'0071_4__Overlong_sequences________________________________________________|' +>'0072_4.1__Examples_of_an_overlong_ASCII_character_________________________|' +>'0073_4.1.1_U+002F_=_c0_af_____________=_"À¯"_______________________________|' +>'0074_4.1.2_U+002F_=_e0_80_af__________=_"à€¯"_______________________________|' +>'0075_4.1.3_U+002F_=_f0_80_80_af_______=_"ð€€¯"_______________________________|' +>'0076_4.1.4_U+002F_=_f8_80_80_80_af____=_"ø€€€¯"_______________________________|' +>'0077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_"ü€€€€¯"_______________________________|' +>'0078_4.2__Maximum_overlong_sequences______________________________________|' +>'0079_4.2.1__U-0000007F_=_c1_bf_____________=_"Á¿"__________________________|' +>'0080_4.2.2__U-000007FF_=_e0_9f_bf__________=_"àŸ¿"__________________________|' +>'0081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_"ð¿¿"__________________________|' +>'0082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_"ø‡¿¿¿"__________________________|' +>'0083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_"üƒ¿¿¿¿"__________________________|' +>'0084_4.3__Overlong_representation_of_the_NUL_character____________________|' +>'0085_4.3.1__U+0000_=_c0_80_____________=_"À€"______________________________|' +>'0086_4.3.2__U+0000_=_e0_80_80__________=_"à€€"______________________________|' +>'0087_4.3.3__U+0000_=_f0_80_80_80_______=_"ð€€€"______________________________|' +>'0088_4.3.4__U+0000_=_f8_80_80_80_80____=_"ø€€€€"______________________________|' +>'0089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_"ü€€€€€"______________________________|' +>'0090_5__Illegal_code_positions____________________________________________|' +>'0091_5.1_Single_UTF-16_surrogates_________________________________________|' +>'0092_5.1.1__U+D800_=_ed_a0_80_=_"í €"_______________________________________|' +>'0093_5.1.2__U+DB7F_=_ed_ad_bf_=_"í­¿"_______________________________________|' +>'0094_5.1.3__U+DB80_=_ed_ae_80_=_"í®€"_______________________________________|' +>'0095_5.1.4__U+DBFF_=_ed_af_bf_=_"í¯¿"_______________________________________|' +>'0096_5.1.5__U+DC00_=_ed_b0_80_=_"í°€"_______________________________________|' +>'0097_5.1.6__U+DF80_=_ed_be_80_=_"í¾€"_______________________________________|' +>'0098_5.1.7__U+DFFF_=_ed_bf_bf_=_"í¿¿"_______________________________________|' +>'0099_5.2_Paired_UTF-16_surrogates_________________________________________|' +>'0100_5.2.1__U+D800_U+DC00_=_ed_a0_80_ed_b0_80_=_"𐀀"______________________|' +>'0101_5.2.2__U+D800_U+DFFF_=_ed_a0_80_ed_bf_bf_=_"𐏿"______________________|' +>'0102_5.2.3__U+DB7F_U+DC00_=_ed_ad_bf_ed_b0_80_=_"󯰀"______________________|' +>'0103_5.2.4__U+DB7F_U+DFFF_=_ed_ad_bf_ed_bf_bf_=_"í­¿í¿¿"______________________|' +>'0104_5.2.5__U+DB80_U+DC00_=_ed_ae_80_ed_b0_80_=_"󰀀"______________________|' +>'0105_5.2.6__U+DB80_U+DFFF_=_ed_ae_80_ed_bf_bf_=_"󰏿"______________________|' +>'0106_5.2.7__U+DBFF_U+DC00_=_ed_af_bf_ed_b0_80_=_"􏰀"______________________|' +>'0107_5.2.8__U+DBFF_U+DFFF_=_ed_af_bf_ed_bf_bf_=_"􏿿"______________________|' +>'0108_5.3_Other_illegal_code_positions_____________________________________|' +>'0109_5.3.1__U+FFFE_=_ef_bf_be_=_"￾"_______________________________________|' +>'0110_5.3.2__U+FFFF_=_ef_bf_bf_=_"ï¿¿"_______________________________________|' diff --git a/testsuite/ls.tests b/testsuite/ls.tests new file mode 100755 index 000000000..b0c5da7f9 --- /dev/null +++ b/testsuite/ls.tests @@ -0,0 +1,136 @@ +#!/bin/sh +# Copyright 2010 by Denys Vlasenko +# Licensed under GPL v2, see file LICENSE for details. + +. ./testing.sh + +test -f "$bindir/.config" && . "$bindir/.config" + +rm -rf ls.testdir >/dev/null +mkdir ls.testdir || exit 1 + +# testing "test name" "command" "expected result" "file input" "stdin" + +# The test isn't passing correctly now - all | chars should line up +# perfectly in the correctly passed test. +test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \ +&& test x"$CONFIG_SUBST_WCHAR" = x"63" \ +&& test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"767" \ +&& testing "ls unicode test" \ +"(cd ls.testdir && sh ../ls.mk_uni_tests) && ls -1 ls.testdir" \ +'0001_1__Some_correct_UTF-8_text___________________________________________| +0002_2__Boundary_condition_test_cases_____________________________________| +0003_2.1__First_possible_sequence_of_a_certain_length_____________________| +0004_2.1.2__2_bytes__U-00000080_:________"?"______________________________| +0005_2.1.3__3_bytes__U-00000800_:________"?"______________________________| +0006_2.1.4__4_bytes__U-00010000_:________"?"______________________________| +0007_2.1.5__5_bytes__U-00200000_:________"?"______________________________| +0008_2.1.6__6_bytes__U-04000000_:________"?"______________________________| +0009_2.2__Last_possible_sequence_of_a_certain_length______________________| +0010_2.2.1__1_byte___U-0000007F_:________"?"______________________________| +0011_2.2.2__2_bytes__U-000007FF_:________"?"______________________________| +0012_2.2.3__3_bytes__U-0000FFFF_:________"?"______________________________| +0013_2.2.4__4_bytes__U-001FFFFF_:________"?"______________________________| +0014_2.2.5__5_bytes__U-03FFFFFF_:________"?"______________________________| +0015_2.2.6__6_bytes__U-7FFFFFFF_:________"?"______________________________| +0016_2.3__Other_boundary_conditions_______________________________________| +0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_"?"___________________________________| +0018_2.3.2__U-0000E000_=_ee_80_80_=_"?"___________________________________| +0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"?"___________________________________| +0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"?"________________________________| +0021_2.3.5__U-00110000_=_f4_90_80_80_=_"?"________________________________| +0022_3__Malformed_sequences_______________________________________________| +0023_3.1__Unexpected_continuation_bytes___________________________________| +0024_3.1.1__First_continuation_byte_0x80:_"?"_____________________________| +0025_3.1.2__Last__continuation_byte_0xbf:_"?"_____________________________| +0026_3.1.3__2_continuation_bytes:_"??"____________________________________| +0027_3.1.4__3_continuation_bytes:_"???"___________________________________| +0028_3.1.5__4_continuation_bytes:_"????"__________________________________| +0029_3.1.6__5_continuation_bytes:_"?????"_________________________________| +0030_3.1.7__6_continuation_bytes:_"??????"________________________________| +0031_3.1.8__7_continuation_bytes:_"???????"_______________________________| +0032_3.1.9__Sequence_of_all_64_possible_continuation_bytes__0x80-0xbf_:___| +0033____"????????????????_________________________________________________| +0034_____????????????????_________________________________________________| +0035_____????????????????_________________________________________________| +0036_____????????????????"________________________________________________| +0037_3.2__Lonely_start_characters_________________________________________| +0038_3.2.1__All_32_first_bytes_of_2-byte_sequences__0xc0-0xdf_,___________| +0039________each_followed_by_a_space_character:___________________________| +0040____"?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?__________________________________| +0041_____?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_"________________________________| +0042_3.2.2__All_16_first_bytes_of_3-byte_sequences__0xe0-0xef_,___________| +0043________each_followed_by_a_space_character:___________________________| +0044____"?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_"________________________________| +0045_3.2.3__All_8_first_bytes_of_4-byte_sequences__0xf0-0xf7_,____________| +0046________each_followed_by_a_space_character:___________________________| +0047____"?_?_?_?_?_?_?_?_"________________________________________________| +0048_3.2.4__All_4_first_bytes_of_5-byte_sequences__0xf8-0xfb_,____________| +0049________each_followed_by_a_space_character:___________________________| +0050____"?_?_?_?_"________________________________________________________| +0051_3.2.5__All_2_first_bytes_of_6-byte_sequences__0xfc-0xfd_,____________| +0052________each_followed_by_a_space_character:___________________________| +0053____"?_?_"____________________________________________________________| +0054_3.3__Sequences_with_last_continuation_byte_missing___________________| +0055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____"?"______| +0056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____"??"______| +0057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____"???"______| +0058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____"????"______| +0059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____"?????"______| +0060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_"?"______| +0061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_"??"______| +0062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_"???"______| +0063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_"????"______| +0064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_"?????"______| +0065_3.4__Concatenation_of_incomplete_sequences___________________________| +0066____"??????????????????????????????"______________________________________________________| +0067_3.5__Impossible_bytes________________________________________________| +0068_3.5.1__fe_=_"?"______________________________________________________| +0069_3.5.2__ff_=_"?"______________________________________________________| +0070_3.5.3__fe_fe_ff_ff_=_"????"__________________________________________| +0071_4__Overlong_sequences________________________________________________| +0072_4.1__Examples_of_an_overlong_ASCII_character_________________________| +0073_4.1.1_U+002F_=_c0_af_____________=_"??"_______________________________| +0074_4.1.2_U+002F_=_e0_80_af__________=_"???"_______________________________| +0075_4.1.3_U+002F_=_f0_80_80_af_______=_"????"_______________________________| +0076_4.1.4_U+002F_=_f8_80_80_80_af____=_"?????"_______________________________| +0077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_"??????"_______________________________| +0078_4.2__Maximum_overlong_sequences______________________________________| +0079_4.2.1__U-0000007F_=_c1_bf_____________=_"??"__________________________| +0080_4.2.2__U-000007FF_=_e0_9f_bf__________=_"?"__________________________| +0081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_"?"__________________________| +0082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_"?"__________________________| +0083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_"?"__________________________| +0084_4.3__Overlong_representation_of_the_NUL_character____________________| +0085_4.3.1__U+0000_=_c0_80_____________=_"??"______________________________| +0086_4.3.2__U+0000_=_e0_80_80__________=_"???"______________________________| +0087_4.3.3__U+0000_=_f0_80_80_80_______=_"????"______________________________| +0088_4.3.4__U+0000_=_f8_80_80_80_80____=_"?????"______________________________| +0089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_"??????"______________________________| +0090_5__Illegal_code_positions____________________________________________| +0091_5.1_Single_UTF-16_surrogates_________________________________________| +0092_5.1.1__U+D800_=_ed_a0_80_=_"?"_______________________________________| +0093_5.1.2__U+DB7F_=_ed_ad_bf_=_"?"_______________________________________| +0094_5.1.3__U+DB80_=_ed_ae_80_=_"?"_______________________________________| +0095_5.1.4__U+DBFF_=_ed_af_bf_=_"?"_______________________________________| +0096_5.1.5__U+DC00_=_ed_b0_80_=_"?"_______________________________________| +0097_5.1.6__U+DF80_=_ed_be_80_=_"?"_______________________________________| +0098_5.1.7__U+DFFF_=_ed_bf_bf_=_"?"_______________________________________| +0099_5.2_Paired_UTF-16_surrogates_________________________________________| +0100_5.2.1__U+D800_U+DC00_=_ed_a0_80_ed_b0_80_=_"??"______________________| +0101_5.2.2__U+D800_U+DFFF_=_ed_a0_80_ed_bf_bf_=_"??"______________________| +0102_5.2.3__U+DB7F_U+DC00_=_ed_ad_bf_ed_b0_80_=_"??"______________________| +0103_5.2.4__U+DB7F_U+DFFF_=_ed_ad_bf_ed_bf_bf_=_"??"______________________| +0104_5.2.5__U+DB80_U+DC00_=_ed_ae_80_ed_b0_80_=_"??"______________________| +0105_5.2.6__U+DB80_U+DFFF_=_ed_ae_80_ed_bf_bf_=_"??"______________________| +0106_5.2.7__U+DBFF_U+DC00_=_ed_af_bf_ed_b0_80_=_"??"______________________| +0107_5.2.8__U+DBFF_U+DFFF_=_ed_af_bf_ed_bf_bf_=_"??"______________________| +0108_5.3_Other_illegal_code_positions_____________________________________| +0109_5.3.1__U+FFFE_=_ef_bf_be_=_"?"_______________________________________| +0110_5.3.2__U+FFFF_=_ef_bf_bf_=_"?"_______________________________________| +' "" "" + +# Clean up +rm -rf ls.testdir 2>/dev/null + +exit $FAILCOUNT -- 2.25.1