lineedit: fix another corner case with bad unicode input
authorDenys Vlasenko <vda.linux@googlemail.com>
Thu, 11 Mar 2010 20:17:55 +0000 (21:17 +0100)
committerDenys Vlasenko <vda.linux@googlemail.com>
Thu, 11 Mar 2010 20:17:55 +0000 (21:17 +0100)
function                                             old     new   delta
read_key                                             607     646     +39
readit                                                50      55      +5
getch_nowait                                         290     295      +5
hash_find                                            233     234      +1
xstrtoul_range_sfx                                   231     230      -1
passwd_main                                         1058    1056      -2
builtin_exit                                          45      43      -2
cmp_main                                             649     645      -4
lineedit_read_key                                    257     245     -12
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 4/5 up/down: 50/-21)             Total: 29 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
editors/vi.c
include/libbb.h
libbb/lineedit.c
libbb/read_key.c
miscutils/less.c
testsuite/ash.tests

index c4bca2a9c1918ee8210e43654e54840cdb6e1fe9..a22a6a104adaf772b4df175cc0f7a5b2dbee7c6e 100644 (file)
@@ -2205,7 +2205,7 @@ static int readit(void) // read (maybe cursor) key from stdin
        int c;
 
        fflush_all();
-       c = read_key(STDIN_FILENO, readbuffer);
+       c = read_key(STDIN_FILENO, readbuffer, /*timeout off:*/ -2);
        if (c == -1) { // EOF/error
                go_bottom_and_clear_to_eol();
                cookmode(); // terminal to "cooked"
index fccc816cb4dd6fb570e498f7cb700b335ff71661..044d09047a607a725d00ae688b59e7acda4dd4e9 100644 (file)
@@ -1275,8 +1275,12 @@ enum {
  * Return of -1 means EOF or error (errno == 0 on EOF).
  * buffer[0] is used as a counter of buffered chars and must be 0
  * on first call.
+ * timeout:
+ * -2: do not poll for input;
+ * -1: poll(-1) (i.e. block);
+ * >=0: poll for TIMEOUT milliseconds, return -1/EAGAIN on timeout
  */
-int64_t read_key(int fd, char *buffer) FAST_FUNC;
+int64_t read_key(int fd, char *buffer, int timeout) FAST_FUNC;
 void read_key_ungets(char *buffer, const char *str, unsigned len) FAST_FUNC;
 
 
index 8e339da5304fc00f87214c0edf4cc2307e371ff8..7c0eef90d2d1aec5b1a9a63498c41b659eef1af5 100644 (file)
@@ -1658,27 +1658,28 @@ static void win_changed(int nsig)
 static int lineedit_read_key(char *read_key_buffer)
 {
        int64_t ic;
-       struct pollfd pfd;
-       int delay = -1;
+       int timeout = -1;
 #if ENABLE_FEATURE_ASSUME_UNICODE
        char unicode_buf[MB_CUR_MAX + 1];
        int unicode_idx = 0;
 #endif
 
-       pfd.fd = STDIN_FILENO;
-       pfd.events = POLLIN;
-       do {
-#if ENABLE_FEATURE_EDITING_ASK_TERMINAL || ENABLE_FEATURE_ASSUME_UNICODE
- poll_again:
+       while (1) {
+               /* Wait for input. TIMEOUT = -1 makes read_key wait even
+                * on nonblocking stdin, TIMEOUT = 50 makes sure we won't
+                * insist on full MB_CUR_MAX buffer to declare input like
+                * "\xff\n",pause,"ls\n" invalid and thus won't lose "ls".
+                *
+                * Note: read_key sets errno to 0 on success.
+                */
+               ic = read_key(STDIN_FILENO, read_key_buffer, timeout);
+               if (errno) {
+#if ENABLE_FEATURE_ASSUME_UNICODE
+                       if (errno == EAGAIN && unicode_idx != 0)
+                               goto pushback;
 #endif
-               if (read_key_buffer[0] == 0) {
-                       /* Wait for input. Can't just call read_key,
-                        * it returns at once if stdin
-                        * is in non-blocking mode. */
-                       safe_poll(&pfd, 1, delay);
+                       break;
                }
-               /* Note: read_key sets errno to 0 on success: */
-               ic = read_key(STDIN_FILENO, read_key_buffer);
 
 #if ENABLE_FEATURE_EDITING_ASK_TERMINAL
                if ((int32_t)ic == KEYCODE_CURSOR_POS
@@ -1695,7 +1696,7 @@ static int lineedit_read_key(char *read_key_buffer)
                                        }
                                }
                        }
-                       goto poll_again;
+                       continue;
                }
 #endif
 
@@ -1704,19 +1705,20 @@ static int lineedit_read_key(char *read_key_buffer)
                        wchar_t wc;
 
                        if ((int32_t)ic < 0) /* KEYCODE_xxx */
-                               return ic;
-                       // TODO: imagine sequence like: 0xff, <left-arrow>: we are currently losing 0xff...
+                               break;
+                       // TODO: imagine sequence like: 0xff,<left-arrow>: we are currently losing 0xff...
 
                        unicode_buf[unicode_idx++] = ic;
                        unicode_buf[unicode_idx] = '\0';
                        if (mbstowcs(&wc, unicode_buf, 1) != 1) {
                                /* Not (yet?) a valid unicode char */
                                if (unicode_idx < MB_CUR_MAX) {
-                                       delay = 50;
-                                       goto poll_again;
+                                       timeout = 50;
+                                       continue;
                                }
+ pushback:
                                /* Invalid sequence. Save all "bad bytes" except first */
-                               read_key_ungets(read_key_buffer, unicode_buf + 1, MB_CUR_MAX - 1);
+                               read_key_ungets(read_key_buffer, unicode_buf + 1, unicode_idx - 1);
                                /*
                                 * ic = unicode_buf[0] sounds even better, but currently
                                 * this does not work: wchar_t[] -> char[] conversion
@@ -1730,7 +1732,8 @@ static int lineedit_read_key(char *read_key_buffer)
                        }
                }
 #endif
-       } while (errno == EAGAIN);
+               break;
+       }
 
        return ic;
 }
index 98b3131de7b9a779e0ebb92cc6e2ca763651980f..0faa12c973668c57f2f9d770ecbe7c6c1b3b63f9 100644 (file)
@@ -9,7 +9,7 @@
  */
 #include "libbb.h"
 
-int64_t FAST_FUNC read_key(int fd, char *buffer)
+int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout)
 {
        struct pollfd pfd;
        const char *seq;
@@ -90,14 +90,27 @@ int64_t FAST_FUNC read_key(int fd, char *buffer)
                /* ESC [ Z - Shift-Tab */
        };
 
+       pfd.fd = fd;
+       pfd.events = POLLIN;
+
        buffer++; /* saved chars counter is in buffer[-1] now */
 
  start_over:
        errno = 0;
        n = (unsigned char)buffer[-1];
        if (n == 0) {
-               /* If no data, block waiting for input.
-                * It is tempting to read more than one byte here,
+               /* If no data, wait for input.
+                * If requested, wait TIMEOUT ms. TIMEOUT = -1 is useful
+                * if fd can be in non-blocking mode.
+                */
+               if (timeout >= -1) {
+                       if (safe_poll(&pfd, 1, timeout) == 0) {
+                               /* Timed out */
+                               errno = EAGAIN;
+                               return -1;
+                       }
+               }
+               /* It is tempting to read more than one byte here,
                 * but it breaks pasting. Example: at shell prompt,
                 * user presses "c","a","t" and then pastes "\nline\n".
                 * When we were reading 3 bytes here, we were eating
@@ -121,8 +134,6 @@ int64_t FAST_FUNC read_key(int fd, char *buffer)
        }
 
        /* Loop through known ESC sequences */
-       pfd.fd = fd;
-       pfd.events = POLLIN;
        seq = esccmds;
        while (*seq != '\0') {
                /* n - position in sequence we did not read yet */
index 92d0f32713829b5bd2ce8202eeddf0f6857f5cb4..e4f8ab979099ce83c15f1af693692c1c79926875 100644 (file)
@@ -855,7 +855,7 @@ static int getch_nowait(void)
 
        /* We have kbd_fd in O_NONBLOCK mode, read inside read_key()
         * would not block even if there is no input available */
-       rd = read_key(kbd_fd, kbd_input);
+       rd = read_key(kbd_fd, kbd_input, /*timeout off:*/ -2);
        if (rd == -1) {
                if (errno == EAGAIN) {
                        /* No keyboard input available. Since poll() did return,
index 4b6efe42c86fb502e71d4f8f4e39885c002d5393..6b2caf316cbef241e235099b394f2ea0a2297537 100755 (executable)
 # testing "test name" "options" "expected result" "file input" "stdin"
 
 testing "One byte which is not valid unicode char followed by valid input" \
-       "script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \
+       "script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \
        "\
 00000000  3f 2d 0a                                          |?-.|
 00000003
 " \
        "" \
-       "echo \xff- | hexdump -C >output; exit; exit; exit; exit\n" \
+       "echo \xff- | hexdump -C >ash.output; exit; exit; exit; exit\n"
 
 testing "30 bytes which are not valid unicode chars followed by valid input" \
-       "script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \
+       "script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \
        "\
 00000000  3f 3f 3f 3f 3f 3f 3f 3f  3f 3f 3f 3f 3f 3f 3f 3f  |????????????????|
 00000010  3f 3f 3f 3f 3f 3f 3f 3f  3f 3f 3f 3f 3f 3f 2d 0a  |??????????????-.|
 00000020
 " \
        "" \
-       "echo \xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff- | hexdump -C >output; exit; exit; exit; exit\n" \
+       "echo \xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff- | hexdump -C >ash.output; exit; exit; exit; exit\n"
 
 # Not sure this behavior is perfect: we lose all invalid input which precedes
 # arrow keys and such. In this example, \xff\xff are lost
 testing "2 bytes which are not valid unicode chars followed by left arrow key" \
-       "script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \
+       "script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \
        "\
 00000000  3d 2d 0a                                          |=-.|
 00000003
 " \
        "" \
-       "echo =+\xff\xff\x1b\x5b\x44- | hexdump -C >output; exit; exit; exit; exit\n" \
+       "echo =+\xff\xff\x1b\x5b\x44- | hexdump -C >ash.output; exit; exit; exit; exit\n"
+
+# ash should see "echo \xff\n",pause -> execute it as "echo ?" (which is
+# not checked by the test), then read and execute the rest: "echo A | ..."
+# The bug was that ash was eating the beginning of "echo A" despite the pause.
+testing "Invalid unicode chars followed by a pause do not eat next chars" \
+       "{ echo -ne 'echo \xff\n'; sleep 1; echo -ne 'echo A | hexdump -C >ash.output; exit; exit; exit; exit\n'; } \
+         | script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \
+       "\
+00000000  41 0a                                             |A.|
+00000002
+" \
+       "" ""
+
+rm ash.output
 
 exit $FAILCOUNT