aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomas Heinrich <heinrich.tomas@gmail.com>2010-03-09 14:09:24 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2010-03-09 14:09:24 +0100
commitd2b04050c0a9a15e29e15cbf9c487db93d07c46e (patch)
tree19929bed97b4e6ddc028465f9fd0a7b3a5d28b5f
parentf15620c3774c164ee6c1e2fbf9dd481b606a95a1 (diff)
downloadbusybox-w32-d2b04050c0a9a15e29e15cbf9c487db93d07c46e.tar.gz
busybox-w32-d2b04050c0a9a15e29e15cbf9c487db93d07c46e.tar.bz2
busybox-w32-d2b04050c0a9a15e29e15cbf9c487db93d07c46e.zip
lineedit: invalid unicode characters are replaced with CONFIG_SUBST_WCHAR
function old new delta read_key_ungets - 50 +50 lineedit_read_key 223 252 +29 Signed-off-by: Tomas Heinrich <heinrich.tomas@gmail.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--include/libbb.h1
-rw-r--r--libbb/lineedit.c26
-rw-r--r--libbb/read_key.c9
-rwxr-xr-xtestsuite/ash.tests42
4 files changed, 73 insertions, 5 deletions
diff --git a/include/libbb.h b/include/libbb.h
index ead1020dd..fccc816cb 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -1277,6 +1277,7 @@ enum {
1277 * on first call. 1277 * on first call.
1278 */ 1278 */
1279int64_t read_key(int fd, char *buffer) FAST_FUNC; 1279int64_t read_key(int fd, char *buffer) FAST_FUNC;
1280void read_key_ungets(char *buffer, const char *str, unsigned len) FAST_FUNC;
1280 1281
1281 1282
1282#if ENABLE_FEATURE_EDITING 1283#if ENABLE_FEATURE_EDITING
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index c50b31d67..8e339da53 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -1700,18 +1700,34 @@ static int lineedit_read_key(char *read_key_buffer)
1700#endif 1700#endif
1701 1701
1702#if ENABLE_FEATURE_ASSUME_UNICODE 1702#if ENABLE_FEATURE_ASSUME_UNICODE
1703 { 1703 if (unicode_status == UNICODE_ON) {
1704 wchar_t wc; 1704 wchar_t wc;
1705 1705
1706 if ((int32_t)ic < 0) /* KEYCODE_xxx */ 1706 if ((int32_t)ic < 0) /* KEYCODE_xxx */
1707 return ic; 1707 return ic;
1708 // TODO: imagine sequence like: 0xff, <left-arrow>: we are currently losing 0xff...
1709
1708 unicode_buf[unicode_idx++] = ic; 1710 unicode_buf[unicode_idx++] = ic;
1709 unicode_buf[unicode_idx] = '\0'; 1711 unicode_buf[unicode_idx] = '\0';
1710 if (mbstowcs(&wc, unicode_buf, 1) != 1 && unicode_idx < MB_CUR_MAX) { 1712 if (mbstowcs(&wc, unicode_buf, 1) != 1) {
1711 delay = 50; 1713 /* Not (yet?) a valid unicode char */
1712 goto poll_again; 1714 if (unicode_idx < MB_CUR_MAX) {
1715 delay = 50;
1716 goto poll_again;
1717 }
1718 /* Invalid sequence. Save all "bad bytes" except first */
1719 read_key_ungets(read_key_buffer, unicode_buf + 1, MB_CUR_MAX - 1);
1720 /*
1721 * ic = unicode_buf[0] sounds even better, but currently
1722 * this does not work: wchar_t[] -> char[] conversion
1723 * when lineedit finishes mangles such "raw bytes"
1724 * (by misinterpreting them as unicode chars):
1725 */
1726 ic = CONFIG_SUBST_WCHAR;
1727 } else {
1728 /* Valid unicode char, return its code */
1729 ic = wc;
1713 } 1730 }
1714 ic = wc;
1715 } 1731 }
1716#endif 1732#endif
1717 } while (errno == EAGAIN); 1733 } while (errno == EAGAIN);
diff --git a/libbb/read_key.c b/libbb/read_key.c
index a2253ce3e..98b3131de 100644
--- a/libbb/read_key.c
+++ b/libbb/read_key.c
@@ -246,3 +246,12 @@ int64_t FAST_FUNC read_key(int fd, char *buffer)
246 buffer[-1] = 0; 246 buffer[-1] = 0;
247 goto start_over; 247 goto start_over;
248} 248}
249
250void FAST_FUNC read_key_ungets(char *buffer, const char *str, unsigned len)
251{
252 unsigned cur_len = (unsigned char)buffer[0];
253 if (len > KEYCODE_BUFFER_SIZE-1 - cur_len)
254 len = KEYCODE_BUFFER_SIZE-1 - cur_len;
255 memcpy(buffer + 1 + cur_len, str, len);
256 buffer[0] += cur_len + len;
257}
diff --git a/testsuite/ash.tests b/testsuite/ash.tests
new file mode 100755
index 000000000..4b6efe42c
--- /dev/null
+++ b/testsuite/ash.tests
@@ -0,0 +1,42 @@
1#!/bin/sh
2#
3# These are not ash tests, we use ash as a way to test lineedit!
4#
5# Copyright 2010 by Denys Vlasenko
6# Licensed under GPL v2, see file LICENSE for details.
7
8. ./testing.sh
9
10# testing "test name" "options" "expected result" "file input" "stdin"
11
12testing "One byte which is not valid unicode char followed by valid input" \
13 "script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \
14 "\
1500000000 3f 2d 0a |?-.|
1600000003
17" \
18 "" \
19 "echo \xff- | hexdump -C >output; exit; exit; exit; exit\n" \
20
21testing "30 bytes which are not valid unicode chars followed by valid input" \
22 "script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \
23 "\
2400000000 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f |????????????????|
2500000010 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 2d 0a |??????????????-.|
2600000020
27" \
28 "" \
29 "echo \xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff- | hexdump -C >output; exit; exit; exit; exit\n" \
30
31# Not sure this behavior is perfect: we lose all invalid input which precedes
32# arrow keys and such. In this example, \xff\xff are lost
33testing "2 bytes which are not valid unicode chars followed by left arrow key" \
34 "script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \
35 "\
3600000000 3d 2d 0a |=-.|
3700000003
38" \
39 "" \
40 "echo =+\xff\xff\x1b\x5b\x44- | hexdump -C >output; exit; exit; exit; exit\n" \
41
42exit $FAILCOUNT