From 58f108eb339957f58d5a6034d82b09c4d50b53e3 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 11 Mar 2010 21:17:55 +0100 Subject: [PATCH] lineedit: fix another corner case with bad unicode input function old new delta read_key 607 646 +39 readit 50 55 +5 getch_nowait 290 295 +5 hash_find 233 234 +1 xstrtoul_range_sfx 231 230 -1 passwd_main 1058 1056 -2 builtin_exit 45 43 -2 cmp_main 649 645 -4 lineedit_read_key 257 245 -12 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 4/5 up/down: 50/-21) Total: 29 bytes Signed-off-by: Denys Vlasenko --- editors/vi.c | 2 +- include/libbb.h | 6 +++++- libbb/lineedit.c | 45 ++++++++++++++++++++++++--------------------- libbb/read_key.c | 21 ++++++++++++++++----- miscutils/less.c | 2 +- testsuite/ash.tests | 26 ++++++++++++++++++++------ 6 files changed, 67 insertions(+), 35 deletions(-) diff --git a/editors/vi.c b/editors/vi.c index c4bca2a9c..a22a6a104 100644 --- a/editors/vi.c +++ b/editors/vi.c @@ -2205,7 +2205,7 @@ static int readit(void) // read (maybe cursor) key from stdin int c; fflush_all(); - c = read_key(STDIN_FILENO, readbuffer); + c = read_key(STDIN_FILENO, readbuffer, /*timeout off:*/ -2); if (c == -1) { // EOF/error go_bottom_and_clear_to_eol(); cookmode(); // terminal to "cooked" diff --git a/include/libbb.h b/include/libbb.h index fccc816cb..044d09047 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -1275,8 +1275,12 @@ enum { * Return of -1 means EOF or error (errno == 0 on EOF). * buffer[0] is used as a counter of buffered chars and must be 0 * on first call. + * timeout: + * -2: do not poll for input; + * -1: poll(-1) (i.e. block); + * >=0: poll for TIMEOUT milliseconds, return -1/EAGAIN on timeout */ -int64_t read_key(int fd, char *buffer) FAST_FUNC; +int64_t read_key(int fd, char *buffer, int timeout) FAST_FUNC; void read_key_ungets(char *buffer, const char *str, unsigned len) FAST_FUNC; diff --git a/libbb/lineedit.c b/libbb/lineedit.c index 8e339da53..7c0eef90d 100644 --- a/libbb/lineedit.c +++ b/libbb/lineedit.c @@ -1658,27 +1658,28 @@ static void win_changed(int nsig) static int lineedit_read_key(char *read_key_buffer) { int64_t ic; - struct pollfd pfd; - int delay = -1; + int timeout = -1; #if ENABLE_FEATURE_ASSUME_UNICODE char unicode_buf[MB_CUR_MAX + 1]; int unicode_idx = 0; #endif - pfd.fd = STDIN_FILENO; - pfd.events = POLLIN; - do { -#if ENABLE_FEATURE_EDITING_ASK_TERMINAL || ENABLE_FEATURE_ASSUME_UNICODE - poll_again: + while (1) { + /* Wait for input. TIMEOUT = -1 makes read_key wait even + * on nonblocking stdin, TIMEOUT = 50 makes sure we won't + * insist on full MB_CUR_MAX buffer to declare input like + * "\xff\n",pause,"ls\n" invalid and thus won't lose "ls". + * + * Note: read_key sets errno to 0 on success. + */ + ic = read_key(STDIN_FILENO, read_key_buffer, timeout); + if (errno) { +#if ENABLE_FEATURE_ASSUME_UNICODE + if (errno == EAGAIN && unicode_idx != 0) + goto pushback; #endif - if (read_key_buffer[0] == 0) { - /* Wait for input. Can't just call read_key, - * it returns at once if stdin - * is in non-blocking mode. */ - safe_poll(&pfd, 1, delay); + break; } - /* Note: read_key sets errno to 0 on success: */ - ic = read_key(STDIN_FILENO, read_key_buffer); #if ENABLE_FEATURE_EDITING_ASK_TERMINAL if ((int32_t)ic == KEYCODE_CURSOR_POS @@ -1695,7 +1696,7 @@ static int lineedit_read_key(char *read_key_buffer) } } } - goto poll_again; + continue; } #endif @@ -1704,19 +1705,20 @@ static int lineedit_read_key(char *read_key_buffer) wchar_t wc; if ((int32_t)ic < 0) /* KEYCODE_xxx */ - return ic; - // TODO: imagine sequence like: 0xff, : we are currently losing 0xff... + break; + // TODO: imagine sequence like: 0xff,: we are currently losing 0xff... unicode_buf[unicode_idx++] = ic; unicode_buf[unicode_idx] = '\0'; if (mbstowcs(&wc, unicode_buf, 1) != 1) { /* Not (yet?) a valid unicode char */ if (unicode_idx < MB_CUR_MAX) { - delay = 50; - goto poll_again; + timeout = 50; + continue; } + pushback: /* Invalid sequence. Save all "bad bytes" except first */ - read_key_ungets(read_key_buffer, unicode_buf + 1, MB_CUR_MAX - 1); + read_key_ungets(read_key_buffer, unicode_buf + 1, unicode_idx - 1); /* * ic = unicode_buf[0] sounds even better, but currently * this does not work: wchar_t[] -> char[] conversion @@ -1730,7 +1732,8 @@ static int lineedit_read_key(char *read_key_buffer) } } #endif - } while (errno == EAGAIN); + break; + } return ic; } diff --git a/libbb/read_key.c b/libbb/read_key.c index 98b3131de..0faa12c97 100644 --- a/libbb/read_key.c +++ b/libbb/read_key.c @@ -9,7 +9,7 @@ */ #include "libbb.h" -int64_t FAST_FUNC read_key(int fd, char *buffer) +int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout) { struct pollfd pfd; const char *seq; @@ -90,14 +90,27 @@ int64_t FAST_FUNC read_key(int fd, char *buffer) /* ESC [ Z - Shift-Tab */ }; + pfd.fd = fd; + pfd.events = POLLIN; + buffer++; /* saved chars counter is in buffer[-1] now */ start_over: errno = 0; n = (unsigned char)buffer[-1]; if (n == 0) { - /* If no data, block waiting for input. - * It is tempting to read more than one byte here, + /* If no data, wait for input. + * If requested, wait TIMEOUT ms. TIMEOUT = -1 is useful + * if fd can be in non-blocking mode. + */ + if (timeout >= -1) { + if (safe_poll(&pfd, 1, timeout) == 0) { + /* Timed out */ + errno = EAGAIN; + return -1; + } + } + /* It is tempting to read more than one byte here, * but it breaks pasting. Example: at shell prompt, * user presses "c","a","t" and then pastes "\nline\n". * When we were reading 3 bytes here, we were eating @@ -121,8 +134,6 @@ int64_t FAST_FUNC read_key(int fd, char *buffer) } /* Loop through known ESC sequences */ - pfd.fd = fd; - pfd.events = POLLIN; seq = esccmds; while (*seq != '\0') { /* n - position in sequence we did not read yet */ diff --git a/miscutils/less.c b/miscutils/less.c index 92d0f3271..e4f8ab979 100644 --- a/miscutils/less.c +++ b/miscutils/less.c @@ -855,7 +855,7 @@ static int getch_nowait(void) /* We have kbd_fd in O_NONBLOCK mode, read inside read_key() * would not block even if there is no input available */ - rd = read_key(kbd_fd, kbd_input); + rd = read_key(kbd_fd, kbd_input, /*timeout off:*/ -2); if (rd == -1) { if (errno == EAGAIN) { /* No keyboard input available. Since poll() did return, diff --git a/testsuite/ash.tests b/testsuite/ash.tests index 4b6efe42c..6b2caf316 100755 --- a/testsuite/ash.tests +++ b/testsuite/ash.tests @@ -10,33 +10,47 @@ # testing "test name" "options" "expected result" "file input" "stdin" testing "One byte which is not valid unicode char followed by valid input" \ - "script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \ + "script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \ "\ 00000000 3f 2d 0a |?-.| 00000003 " \ "" \ - "echo \xff- | hexdump -C >output; exit; exit; exit; exit\n" \ + "echo \xff- | hexdump -C >ash.output; exit; exit; exit; exit\n" testing "30 bytes which are not valid unicode chars followed by valid input" \ - "script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \ + "script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \ "\ 00000000 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f |????????????????| 00000010 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 2d 0a |??????????????-.| 00000020 " \ "" \ - "echo \xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff- | hexdump -C >output; exit; exit; exit; exit\n" \ + "echo \xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff- | hexdump -C >ash.output; exit; exit; exit; exit\n" # Not sure this behavior is perfect: we lose all invalid input which precedes # arrow keys and such. In this example, \xff\xff are lost testing "2 bytes which are not valid unicode chars followed by left arrow key" \ - "script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \ + "script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \ "\ 00000000 3d 2d 0a |=-.| 00000003 " \ "" \ - "echo =+\xff\xff\x1b\x5b\x44- | hexdump -C >output; exit; exit; exit; exit\n" \ + "echo =+\xff\xff\x1b\x5b\x44- | hexdump -C >ash.output; exit; exit; exit; exit\n" + +# ash should see "echo \xff\n",pause -> execute it as "echo ?" (which is +# not checked by the test), then read and execute the rest: "echo A | ..." +# The bug was that ash was eating the beginning of "echo A" despite the pause. +testing "Invalid unicode chars followed by a pause do not eat next chars" \ + "{ echo -ne 'echo \xff\n'; sleep 1; echo -ne 'echo A | hexdump -C >ash.output; exit; exit; exit; exit\n'; } \ + | script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \ + "\ +00000000 41 0a |A.| +00000002 +" \ + "" "" + +rm ash.output exit $FAILCOUNT -- 2.25.1