aboutsummaryrefslogtreecommitdiff
path: root/libbb
diff options
context:
space:
mode:
authorTomas Heinrich <heinrich.tomas@gmail.com>2010-04-29 13:43:39 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2010-04-29 13:43:39 +0200
commita659b81dfa435aa19130a8c7dd1bfe8fa9a22131 (patch)
tree6e42922ad325142290898143818bcd819f799c27 /libbb
parent25b10d97e66a74d4e5a5571afb1b8369c31eefca (diff)
downloadbusybox-w32-a659b81dfa435aa19130a8c7dd1bfe8fa9a22131.tar.gz
busybox-w32-a659b81dfa435aa19130a8c7dd1bfe8fa9a22131.tar.bz2
busybox-w32-a659b81dfa435aa19130a8c7dd1bfe8fa9a22131.zip
libbb/lineedit: add support for preserving "broken" (non-unicode) chars
Signed-off-by: Tomas Heinrich <heinrich.tomas@gmail.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb')
-rw-r--r--libbb/lineedit.c62
-rw-r--r--libbb/unicode.c12
2 files changed, 54 insertions, 20 deletions
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index dc90846f9..622f9ddfc 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -68,7 +68,7 @@
68 68
69#undef CHAR_T 69#undef CHAR_T
70#if ENABLE_UNICODE_SUPPORT 70#if ENABLE_UNICODE_SUPPORT
71# define BB_NUL L'\0' 71# define BB_NUL ((wchar_t)0)
72# define CHAR_T wchar_t 72# define CHAR_T wchar_t
73static bool BB_isspace(CHAR_T c) { return ((unsigned)c < 256 && isspace(c)); } 73static bool BB_isspace(CHAR_T c) { return ((unsigned)c < 256 && isspace(c)); }
74# if ENABLE_FEATURE_EDITING_VI 74# if ENABLE_FEATURE_EDITING_VI
@@ -92,6 +92,14 @@ static bool BB_ispunct(CHAR_T c) { return ((unsigned)c < 256 && ispunct(c)); }
92#endif 92#endif
93 93
94 94
95# if ENABLE_UNICODE_PRESERVE_BROKEN
96# define unicode_mark_inv_wchar(wc) ((wc) | 0x20000000)
97# define unicode_is_inv_wchar(wc) ((wc) & 0x20000000)
98# else
99# define unicode_is_inv_wchar(wc) 0
100# endif
101
102
95enum { 103enum {
96 /* We use int16_t for positions, need to limit line len */ 104 /* We use int16_t for positions, need to limit line len */
97 MAX_LINELEN = CONFIG_FEATURE_EDITING_MAX_LEN < 0x7ff0 105 MAX_LINELEN = CONFIG_FEATURE_EDITING_MAX_LEN < 0x7ff0
@@ -208,24 +216,58 @@ static size_t load_string(const char *src, int maxsize)
208 ssize_t len = mbstowcs(command_ps, src, maxsize - 1); 216 ssize_t len = mbstowcs(command_ps, src, maxsize - 1);
209 if (len < 0) 217 if (len < 0)
210 len = 0; 218 len = 0;
211 command_ps[len] = L'\0'; 219 command_ps[len] = 0;
212 return len; 220 return len;
213} 221}
214static size_t save_string(char *dst, int maxsize) 222static unsigned save_string(char *dst, unsigned maxsize)
215{ 223{
224#if !ENABLE_UNICODE_PRESERVE_BROKEN
216 ssize_t len = wcstombs(dst, command_ps, maxsize - 1); 225 ssize_t len = wcstombs(dst, command_ps, maxsize - 1);
217 if (len < 0) 226 if (len < 0)
218 len = 0; 227 len = 0;
219 dst[len] = '\0'; 228 dst[len] = '\0';
220 return len; 229 return len;
230#else
231 unsigned dstpos = 0;
232 unsigned srcpos = 0;
233
234 maxsize--;
235 while (dstpos < maxsize) {
236 wchar_t wc;
237 int n = srcpos;
238 while ((wc = command_ps[srcpos]) != 0
239 && !unicode_is_inv_wchar(wc)
240 ) {
241 srcpos++;
242 }
243 command_ps[srcpos] = 0;
244 n = wcstombs(dst + dstpos, command_ps + n, maxsize - dstpos);
245 if (n < 0) /* should not happen */
246 break;
247 dstpos += n;
248 if (wc == 0) /* usually is */
249 break;
250 /* We do have invalid byte here! */
251 command_ps[srcpos] = wc; /* restore it */
252 srcpos++;
253 if (dstpos == maxsize)
254 break;
255 dst[dstpos++] = (char) wc;
256 }
257 dst[dstpos] = '\0';
258 return dstpos;
259#endif
221} 260}
222/* I thought just fputwc(c, stdout) would work. But no... */ 261/* I thought just fputwc(c, stdout) would work. But no... */
223static void BB_PUTCHAR(wchar_t c) 262static void BB_PUTCHAR(wchar_t c)
224{ 263{
225 char buf[MB_CUR_MAX + 1]; 264 char buf[MB_CUR_MAX + 1];
226 mbstate_t mbst = { 0 }; 265 mbstate_t mbst = { 0 };
227 ssize_t len = wcrtomb(buf, c, &mbst); 266 ssize_t len;
228 267
268 if (unicode_is_inv_wchar(c))
269 c = CONFIG_SUBST_WCHAR;
270 len = wcrtomb(buf, c, &mbst);
229 if (len > 0) { 271 if (len > 0) {
230 buf[len] = '\0'; 272 buf[len] = '\0';
231 fputs(buf, stdout); 273 fputs(buf, stdout);
@@ -238,7 +280,7 @@ static size_t load_string(const char *src, int maxsize)
238 return strlen(command_ps); 280 return strlen(command_ps);
239} 281}
240# if ENABLE_FEATURE_TAB_COMPLETION 282# if ENABLE_FEATURE_TAB_COMPLETION
241static void save_string(char *dst, int maxsize) 283static void save_string(char *dst, unsigned maxsize)
242{ 284{
243 safe_strncpy(dst, command_ps, maxsize); 285 safe_strncpy(dst, command_ps, maxsize);
244} 286}
@@ -1719,13 +1761,11 @@ static int lineedit_read_key(char *read_key_buffer)
1719 pushback: 1761 pushback:
1720 /* Invalid sequence. Save all "bad bytes" except first */ 1762 /* Invalid sequence. Save all "bad bytes" except first */
1721 read_key_ungets(read_key_buffer, unicode_buf + 1, unicode_idx - 1); 1763 read_key_ungets(read_key_buffer, unicode_buf + 1, unicode_idx - 1);
1722 /* 1764# if !ENABLE_UNICODE_PRESERVE_BROKEN
1723 * ic = unicode_buf[0] sounds even better, but currently
1724 * this does not work: wchar_t[] -> char[] conversion
1725 * when lineedit finishes mangles such "raw bytes"
1726 * (by misinterpreting them as unicode chars):
1727 */
1728 ic = CONFIG_SUBST_WCHAR; 1765 ic = CONFIG_SUBST_WCHAR;
1766# else
1767 ic = unicode_mark_inv_wchar(unicode_buf[0]);
1768# endif
1729 } else { 1769 } else {
1730 /* Valid unicode char, return its code */ 1770 /* Valid unicode char, return its code */
1731 ic = wc; 1771 ic = wc;
diff --git a/libbb/unicode.c b/libbb/unicode.c
index 83e70b412..d1c6167c7 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -423,7 +423,6 @@ static int wcwidth(unsigned ucs)
423# if LAST_SUPPORTED_WCHAR >= 0x300 423# if LAST_SUPPORTED_WCHAR >= 0x300
424 /* sorted list of non-overlapping intervals of non-spacing characters */ 424 /* sorted list of non-overlapping intervals of non-spacing characters */
425 /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ 425 /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
426 static const struct interval combining[] = {
427# define BIG_(a,b) { a, b }, 426# define BIG_(a,b) { a, b },
428# define PAIR(a,b) 427# define PAIR(a,b)
429# define ARRAY /* PAIR if < 0x4000 and no more than 4 chars big */ \ 428# define ARRAY /* PAIR if < 0x4000 and no more than 4 chars big */ \
@@ -557,10 +556,9 @@ static int wcwidth(unsigned ucs)
557 BIG_(0xFE20, 0xFE23) \ 556 BIG_(0xFE20, 0xFE23) \
558 BIG_(0xFEFF, 0xFEFF) \ 557 BIG_(0xFEFF, 0xFEFF) \
559 BIG_(0xFFF9, 0xFFFB) 558 BIG_(0xFFF9, 0xFFFB)
560 ARRAY 559 static const struct interval combining[] = { ARRAY };
561# undef BIG_ 560# undef BIG_
562# undef PAIR 561# undef PAIR
563 };
564# define BIG_(a,b) 562# define BIG_(a,b)
565# define PAIR(a,b) (a << 2) | (b-a), 563# define PAIR(a,b) (a << 2) | (b-a),
566 static const uint16_t combining1[] = { ARRAY }; 564 static const uint16_t combining1[] = { ARRAY };
@@ -668,7 +666,6 @@ int FAST_FUNC unicode_bidi_isrtl(wint_t wc)
668 * http://www.unicode.org/Public/5.2.0/ucd/extracted/DerivedBidiClass.txt 666 * http://www.unicode.org/Public/5.2.0/ucd/extracted/DerivedBidiClass.txt
669 * Bidi_Class=Left_To_Right | Bidi_Class=Arabic_Letter 667 * Bidi_Class=Left_To_Right | Bidi_Class=Arabic_Letter
670 */ 668 */
671 static const struct interval rtl_b[] = {
672# define BIG_(a,b) { a, b }, 669# define BIG_(a,b) { a, b },
673# define PAIR(a,b) 670# define PAIR(a,b)
674# define ARRAY \ 671# define ARRAY \
@@ -723,10 +720,9 @@ int FAST_FUNC unicode_bidi_isrtl(wint_t wc)
723 {0x10E7F, 0x10FFF}, 720 {0x10E7F, 0x10FFF},
724 {0x1E800, 0x1EFFF} 721 {0x1E800, 0x1EFFF}
725 */ 722 */
726 ARRAY 723 static const struct interval rtl_b[] = { ARRAY };
727# undef BIG_ 724# undef BIG_
728# undef PAIR 725# undef PAIR
729 };
730# define BIG_(a,b) 726# define BIG_(a,b)
731# define PAIR(a,b) (a << 2) | (b-a), 727# define PAIR(a,b) (a << 2) | (b-a),
732 static const uint16_t rtl_p[] = { ARRAY }; 728 static const uint16_t rtl_p[] = { ARRAY };
@@ -755,7 +751,6 @@ int FAST_FUNC unicode_bidi_is_neutral_wchar(wint_t wc)
755 * White_Space, Other_Neutral, European_Number, European_Separator, 751 * White_Space, Other_Neutral, European_Number, European_Separator,
756 * European_Terminator, Arabic_Number, Common_Separator 752 * European_Terminator, Arabic_Number, Common_Separator
757 */ 753 */
758 static const struct interval neutral_b[] = {
759# define BIG_(a,b) { a, b }, 754# define BIG_(a,b) { a, b },
760# define PAIR(a,b) 755# define PAIR(a,b)
761# define ARRAY \ 756# define ARRAY \
@@ -929,10 +924,9 @@ int FAST_FUNC unicode_bidi_is_neutral_wchar(wint_t wc)
929 {0x1F030, 0x1F093}, 924 {0x1F030, 0x1F093},
930 {0x1F100, 0x1F10A} 925 {0x1F100, 0x1F10A}
931 */ 926 */
932 ARRAY 927 static const struct interval neutral_b[] = { ARRAY };
933# undef BIG_ 928# undef BIG_
934# undef PAIR 929# undef PAIR
935 };
936# define BIG_(a,b) 930# define BIG_(a,b)
937# define PAIR(a,b) (a << 2) | (b-a), 931# define PAIR(a,b) (a << 2) | (b-a),
938 static const uint16_t neutral_p[] = { ARRAY }; 932 static const uint16_t neutral_p[] = { ARRAY };