aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2010-03-11 21:17:55 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2010-03-11 21:17:55 +0100
commit58f108eb339957f58d5a6034d82b09c4d50b53e3 (patch)
tree4fdfae71136811f3b9c8ffd56656b78da19739da
parentb0a57abb79001b994115d2c96a7d9e1f2f511430 (diff)
downloadbusybox-w32-58f108eb339957f58d5a6034d82b09c4d50b53e3.tar.gz
busybox-w32-58f108eb339957f58d5a6034d82b09c4d50b53e3.tar.bz2
busybox-w32-58f108eb339957f58d5a6034d82b09c4d50b53e3.zip
lineedit: fix another corner case with bad unicode input
function old new delta read_key 607 646 +39 readit 50 55 +5 getch_nowait 290 295 +5 hash_find 233 234 +1 xstrtoul_range_sfx 231 230 -1 passwd_main 1058 1056 -2 builtin_exit 45 43 -2 cmp_main 649 645 -4 lineedit_read_key 257 245 -12 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 4/5 up/down: 50/-21) Total: 29 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--editors/vi.c2
-rw-r--r--include/libbb.h6
-rw-r--r--libbb/lineedit.c45
-rw-r--r--libbb/read_key.c21
-rw-r--r--miscutils/less.c2
-rwxr-xr-xtestsuite/ash.tests26
6 files changed, 67 insertions, 35 deletions
diff --git a/editors/vi.c b/editors/vi.c
index c4bca2a9c..a22a6a104 100644
--- a/editors/vi.c
+++ b/editors/vi.c
@@ -2205,7 +2205,7 @@ static int readit(void) // read (maybe cursor) key from stdin
2205 int c; 2205 int c;
2206 2206
2207 fflush_all(); 2207 fflush_all();
2208 c = read_key(STDIN_FILENO, readbuffer); 2208 c = read_key(STDIN_FILENO, readbuffer, /*timeout off:*/ -2);
2209 if (c == -1) { // EOF/error 2209 if (c == -1) { // EOF/error
2210 go_bottom_and_clear_to_eol(); 2210 go_bottom_and_clear_to_eol();
2211 cookmode(); // terminal to "cooked" 2211 cookmode(); // terminal to "cooked"
diff --git a/include/libbb.h b/include/libbb.h
index fccc816cb..044d09047 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -1275,8 +1275,12 @@ enum {
1275 * Return of -1 means EOF or error (errno == 0 on EOF). 1275 * Return of -1 means EOF or error (errno == 0 on EOF).
1276 * buffer[0] is used as a counter of buffered chars and must be 0 1276 * buffer[0] is used as a counter of buffered chars and must be 0
1277 * on first call. 1277 * on first call.
1278 * timeout:
1279 * -2: do not poll for input;
1280 * -1: poll(-1) (i.e. block);
1281 * >=0: poll for TIMEOUT milliseconds, return -1/EAGAIN on timeout
1278 */ 1282 */
1279int64_t read_key(int fd, char *buffer) FAST_FUNC; 1283int64_t read_key(int fd, char *buffer, int timeout) FAST_FUNC;
1280void read_key_ungets(char *buffer, const char *str, unsigned len) FAST_FUNC; 1284void read_key_ungets(char *buffer, const char *str, unsigned len) FAST_FUNC;
1281 1285
1282 1286
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index 8e339da53..7c0eef90d 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -1658,27 +1658,28 @@ static void win_changed(int nsig)
1658static int lineedit_read_key(char *read_key_buffer) 1658static int lineedit_read_key(char *read_key_buffer)
1659{ 1659{
1660 int64_t ic; 1660 int64_t ic;
1661 struct pollfd pfd; 1661 int timeout = -1;
1662 int delay = -1;
1663#if ENABLE_FEATURE_ASSUME_UNICODE 1662#if ENABLE_FEATURE_ASSUME_UNICODE
1664 char unicode_buf[MB_CUR_MAX + 1]; 1663 char unicode_buf[MB_CUR_MAX + 1];
1665 int unicode_idx = 0; 1664 int unicode_idx = 0;
1666#endif 1665#endif
1667 1666
1668 pfd.fd = STDIN_FILENO; 1667 while (1) {
1669 pfd.events = POLLIN; 1668 /* Wait for input. TIMEOUT = -1 makes read_key wait even
1670 do { 1669 * on nonblocking stdin, TIMEOUT = 50 makes sure we won't
1671#if ENABLE_FEATURE_EDITING_ASK_TERMINAL || ENABLE_FEATURE_ASSUME_UNICODE 1670 * insist on full MB_CUR_MAX buffer to declare input like
1672 poll_again: 1671 * "\xff\n",pause,"ls\n" invalid and thus won't lose "ls".
1672 *
1673 * Note: read_key sets errno to 0 on success.
1674 */
1675 ic = read_key(STDIN_FILENO, read_key_buffer, timeout);
1676 if (errno) {
1677#if ENABLE_FEATURE_ASSUME_UNICODE
1678 if (errno == EAGAIN && unicode_idx != 0)
1679 goto pushback;
1673#endif 1680#endif
1674 if (read_key_buffer[0] == 0) { 1681 break;
1675 /* Wait for input. Can't just call read_key,
1676 * it returns at once if stdin
1677 * is in non-blocking mode. */
1678 safe_poll(&pfd, 1, delay);
1679 } 1682 }
1680 /* Note: read_key sets errno to 0 on success: */
1681 ic = read_key(STDIN_FILENO, read_key_buffer);
1682 1683
1683#if ENABLE_FEATURE_EDITING_ASK_TERMINAL 1684#if ENABLE_FEATURE_EDITING_ASK_TERMINAL
1684 if ((int32_t)ic == KEYCODE_CURSOR_POS 1685 if ((int32_t)ic == KEYCODE_CURSOR_POS
@@ -1695,7 +1696,7 @@ static int lineedit_read_key(char *read_key_buffer)
1695 } 1696 }
1696 } 1697 }
1697 } 1698 }
1698 goto poll_again; 1699 continue;
1699 } 1700 }
1700#endif 1701#endif
1701 1702
@@ -1704,19 +1705,20 @@ static int lineedit_read_key(char *read_key_buffer)
1704 wchar_t wc; 1705 wchar_t wc;
1705 1706
1706 if ((int32_t)ic < 0) /* KEYCODE_xxx */ 1707 if ((int32_t)ic < 0) /* KEYCODE_xxx */
1707 return ic; 1708 break;
1708 // TODO: imagine sequence like: 0xff, <left-arrow>: we are currently losing 0xff... 1709 // TODO: imagine sequence like: 0xff,<left-arrow>: we are currently losing 0xff...
1709 1710
1710 unicode_buf[unicode_idx++] = ic; 1711 unicode_buf[unicode_idx++] = ic;
1711 unicode_buf[unicode_idx] = '\0'; 1712 unicode_buf[unicode_idx] = '\0';
1712 if (mbstowcs(&wc, unicode_buf, 1) != 1) { 1713 if (mbstowcs(&wc, unicode_buf, 1) != 1) {
1713 /* Not (yet?) a valid unicode char */ 1714 /* Not (yet?) a valid unicode char */
1714 if (unicode_idx < MB_CUR_MAX) { 1715 if (unicode_idx < MB_CUR_MAX) {
1715 delay = 50; 1716 timeout = 50;
1716 goto poll_again; 1717 continue;
1717 } 1718 }
1719 pushback:
1718 /* Invalid sequence. Save all "bad bytes" except first */ 1720 /* Invalid sequence. Save all "bad bytes" except first */
1719 read_key_ungets(read_key_buffer, unicode_buf + 1, MB_CUR_MAX - 1); 1721 read_key_ungets(read_key_buffer, unicode_buf + 1, unicode_idx - 1);
1720 /* 1722 /*
1721 * ic = unicode_buf[0] sounds even better, but currently 1723 * ic = unicode_buf[0] sounds even better, but currently
1722 * this does not work: wchar_t[] -> char[] conversion 1724 * this does not work: wchar_t[] -> char[] conversion
@@ -1730,7 +1732,8 @@ static int lineedit_read_key(char *read_key_buffer)
1730 } 1732 }
1731 } 1733 }
1732#endif 1734#endif
1733 } while (errno == EAGAIN); 1735 break;
1736 }
1734 1737
1735 return ic; 1738 return ic;
1736} 1739}
diff --git a/libbb/read_key.c b/libbb/read_key.c
index 98b3131de..0faa12c97 100644
--- a/libbb/read_key.c
+++ b/libbb/read_key.c
@@ -9,7 +9,7 @@
9 */ 9 */
10#include "libbb.h" 10#include "libbb.h"
11 11
12int64_t FAST_FUNC read_key(int fd, char *buffer) 12int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout)
13{ 13{
14 struct pollfd pfd; 14 struct pollfd pfd;
15 const char *seq; 15 const char *seq;
@@ -90,14 +90,27 @@ int64_t FAST_FUNC read_key(int fd, char *buffer)
90 /* ESC [ Z - Shift-Tab */ 90 /* ESC [ Z - Shift-Tab */
91 }; 91 };
92 92
93 pfd.fd = fd;
94 pfd.events = POLLIN;
95
93 buffer++; /* saved chars counter is in buffer[-1] now */ 96 buffer++; /* saved chars counter is in buffer[-1] now */
94 97
95 start_over: 98 start_over:
96 errno = 0; 99 errno = 0;
97 n = (unsigned char)buffer[-1]; 100 n = (unsigned char)buffer[-1];
98 if (n == 0) { 101 if (n == 0) {
99 /* If no data, block waiting for input. 102 /* If no data, wait for input.
100 * It is tempting to read more than one byte here, 103 * If requested, wait TIMEOUT ms. TIMEOUT = -1 is useful
104 * if fd can be in non-blocking mode.
105 */
106 if (timeout >= -1) {
107 if (safe_poll(&pfd, 1, timeout) == 0) {
108 /* Timed out */
109 errno = EAGAIN;
110 return -1;
111 }
112 }
113 /* It is tempting to read more than one byte here,
101 * but it breaks pasting. Example: at shell prompt, 114 * but it breaks pasting. Example: at shell prompt,
102 * user presses "c","a","t" and then pastes "\nline\n". 115 * user presses "c","a","t" and then pastes "\nline\n".
103 * When we were reading 3 bytes here, we were eating 116 * When we were reading 3 bytes here, we were eating
@@ -121,8 +134,6 @@ int64_t FAST_FUNC read_key(int fd, char *buffer)
121 } 134 }
122 135
123 /* Loop through known ESC sequences */ 136 /* Loop through known ESC sequences */
124 pfd.fd = fd;
125 pfd.events = POLLIN;
126 seq = esccmds; 137 seq = esccmds;
127 while (*seq != '\0') { 138 while (*seq != '\0') {
128 /* n - position in sequence we did not read yet */ 139 /* n - position in sequence we did not read yet */
diff --git a/miscutils/less.c b/miscutils/less.c
index 92d0f3271..e4f8ab979 100644
--- a/miscutils/less.c
+++ b/miscutils/less.c
@@ -855,7 +855,7 @@ static int getch_nowait(void)
855 855
856 /* We have kbd_fd in O_NONBLOCK mode, read inside read_key() 856 /* We have kbd_fd in O_NONBLOCK mode, read inside read_key()
857 * would not block even if there is no input available */ 857 * would not block even if there is no input available */
858 rd = read_key(kbd_fd, kbd_input); 858 rd = read_key(kbd_fd, kbd_input, /*timeout off:*/ -2);
859 if (rd == -1) { 859 if (rd == -1) {
860 if (errno == EAGAIN) { 860 if (errno == EAGAIN) {
861 /* No keyboard input available. Since poll() did return, 861 /* No keyboard input available. Since poll() did return,
diff --git a/testsuite/ash.tests b/testsuite/ash.tests
index 4b6efe42c..6b2caf316 100755
--- a/testsuite/ash.tests
+++ b/testsuite/ash.tests
@@ -10,33 +10,47 @@
10# testing "test name" "options" "expected result" "file input" "stdin" 10# testing "test name" "options" "expected result" "file input" "stdin"
11 11
12testing "One byte which is not valid unicode char followed by valid input" \ 12testing "One byte which is not valid unicode char followed by valid input" \
13 "script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \ 13 "script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \
14 "\ 14 "\
1500000000 3f 2d 0a |?-.| 1500000000 3f 2d 0a |?-.|
1600000003 1600000003
17" \ 17" \
18 "" \ 18 "" \
19 "echo \xff- | hexdump -C >output; exit; exit; exit; exit\n" \ 19 "echo \xff- | hexdump -C >ash.output; exit; exit; exit; exit\n"
20 20
21testing "30 bytes which are not valid unicode chars followed by valid input" \ 21testing "30 bytes which are not valid unicode chars followed by valid input" \
22 "script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \ 22 "script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \
23 "\ 23 "\
2400000000 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f |????????????????| 2400000000 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f |????????????????|
2500000010 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 2d 0a |??????????????-.| 2500000010 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 2d 0a |??????????????-.|
2600000020 2600000020
27" \ 27" \
28 "" \ 28 "" \
29 "echo \xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff- | hexdump -C >output; exit; exit; exit; exit\n" \ 29 "echo \xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff- | hexdump -C >ash.output; exit; exit; exit; exit\n"
30 30
31# Not sure this behavior is perfect: we lose all invalid input which precedes 31# Not sure this behavior is perfect: we lose all invalid input which precedes
32# arrow keys and such. In this example, \xff\xff are lost 32# arrow keys and such. In this example, \xff\xff are lost
33testing "2 bytes which are not valid unicode chars followed by left arrow key" \ 33testing "2 bytes which are not valid unicode chars followed by left arrow key" \
34 "script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \ 34 "script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \
35 "\ 35 "\
3600000000 3d 2d 0a |=-.| 3600000000 3d 2d 0a |=-.|
3700000003 3700000003
38" \ 38" \
39 "" \ 39 "" \
40 "echo =+\xff\xff\x1b\x5b\x44- | hexdump -C >output; exit; exit; exit; exit\n" \ 40 "echo =+\xff\xff\x1b\x5b\x44- | hexdump -C >ash.output; exit; exit; exit; exit\n"
41
42# ash should see "echo \xff\n",pause -> execute it as "echo ?" (which is
43# not checked by the test), then read and execute the rest: "echo A | ..."
44# The bug was that ash was eating the beginning of "echo A" despite the pause.
45testing "Invalid unicode chars followed by a pause do not eat next chars" \
46 "{ echo -ne 'echo \xff\n'; sleep 1; echo -ne 'echo A | hexdump -C >ash.output; exit; exit; exit; exit\n'; } \
47 | script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \
48 "\
4900000000 41 0a |A.|
5000000002
51" \
52 "" ""
53
54rm ash.output
41 55
42exit $FAILCOUNT 56exit $FAILCOUNT