diff options
author | Tomas Heinrich <heinrich.tomas@gmail.com> | 2010-04-29 13:43:39 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2010-04-29 13:43:39 +0200 |
commit | a659b81dfa435aa19130a8c7dd1bfe8fa9a22131 (patch) | |
tree | 6e42922ad325142290898143818bcd819f799c27 /libbb/lineedit.c | |
parent | 25b10d97e66a74d4e5a5571afb1b8369c31eefca (diff) | |
download | busybox-w32-a659b81dfa435aa19130a8c7dd1bfe8fa9a22131.tar.gz busybox-w32-a659b81dfa435aa19130a8c7dd1bfe8fa9a22131.tar.bz2 busybox-w32-a659b81dfa435aa19130a8c7dd1bfe8fa9a22131.zip |
libbb/lineedit: add support for preserving "broken" (non-unicode) chars
Signed-off-by: Tomas Heinrich <heinrich.tomas@gmail.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb/lineedit.c')
-rw-r--r-- | libbb/lineedit.c | 62 |
1 files changed, 51 insertions, 11 deletions
diff --git a/libbb/lineedit.c b/libbb/lineedit.c index dc90846f9..622f9ddfc 100644 --- a/libbb/lineedit.c +++ b/libbb/lineedit.c | |||
@@ -68,7 +68,7 @@ | |||
68 | 68 | ||
69 | #undef CHAR_T | 69 | #undef CHAR_T |
70 | #if ENABLE_UNICODE_SUPPORT | 70 | #if ENABLE_UNICODE_SUPPORT |
71 | # define BB_NUL L'\0' | 71 | # define BB_NUL ((wchar_t)0) |
72 | # define CHAR_T wchar_t | 72 | # define CHAR_T wchar_t |
73 | static bool BB_isspace(CHAR_T c) { return ((unsigned)c < 256 && isspace(c)); } | 73 | static bool BB_isspace(CHAR_T c) { return ((unsigned)c < 256 && isspace(c)); } |
74 | # if ENABLE_FEATURE_EDITING_VI | 74 | # if ENABLE_FEATURE_EDITING_VI |
@@ -92,6 +92,14 @@ static bool BB_ispunct(CHAR_T c) { return ((unsigned)c < 256 && ispunct(c)); } | |||
92 | #endif | 92 | #endif |
93 | 93 | ||
94 | 94 | ||
95 | # if ENABLE_UNICODE_PRESERVE_BROKEN | ||
96 | # define unicode_mark_inv_wchar(wc) ((wc) | 0x20000000) | ||
97 | # define unicode_is_inv_wchar(wc) ((wc) & 0x20000000) | ||
98 | # else | ||
99 | # define unicode_is_inv_wchar(wc) 0 | ||
100 | # endif | ||
101 | |||
102 | |||
95 | enum { | 103 | enum { |
96 | /* We use int16_t for positions, need to limit line len */ | 104 | /* We use int16_t for positions, need to limit line len */ |
97 | MAX_LINELEN = CONFIG_FEATURE_EDITING_MAX_LEN < 0x7ff0 | 105 | MAX_LINELEN = CONFIG_FEATURE_EDITING_MAX_LEN < 0x7ff0 |
@@ -208,24 +216,58 @@ static size_t load_string(const char *src, int maxsize) | |||
208 | ssize_t len = mbstowcs(command_ps, src, maxsize - 1); | 216 | ssize_t len = mbstowcs(command_ps, src, maxsize - 1); |
209 | if (len < 0) | 217 | if (len < 0) |
210 | len = 0; | 218 | len = 0; |
211 | command_ps[len] = L'\0'; | 219 | command_ps[len] = 0; |
212 | return len; | 220 | return len; |
213 | } | 221 | } |
214 | static size_t save_string(char *dst, int maxsize) | 222 | static unsigned save_string(char *dst, unsigned maxsize) |
215 | { | 223 | { |
224 | #if !ENABLE_UNICODE_PRESERVE_BROKEN | ||
216 | ssize_t len = wcstombs(dst, command_ps, maxsize - 1); | 225 | ssize_t len = wcstombs(dst, command_ps, maxsize - 1); |
217 | if (len < 0) | 226 | if (len < 0) |
218 | len = 0; | 227 | len = 0; |
219 | dst[len] = '\0'; | 228 | dst[len] = '\0'; |
220 | return len; | 229 | return len; |
230 | #else | ||
231 | unsigned dstpos = 0; | ||
232 | unsigned srcpos = 0; | ||
233 | |||
234 | maxsize--; | ||
235 | while (dstpos < maxsize) { | ||
236 | wchar_t wc; | ||
237 | int n = srcpos; | ||
238 | while ((wc = command_ps[srcpos]) != 0 | ||
239 | && !unicode_is_inv_wchar(wc) | ||
240 | ) { | ||
241 | srcpos++; | ||
242 | } | ||
243 | command_ps[srcpos] = 0; | ||
244 | n = wcstombs(dst + dstpos, command_ps + n, maxsize - dstpos); | ||
245 | if (n < 0) /* should not happen */ | ||
246 | break; | ||
247 | dstpos += n; | ||
248 | if (wc == 0) /* usually is */ | ||
249 | break; | ||
250 | /* We do have invalid byte here! */ | ||
251 | command_ps[srcpos] = wc; /* restore it */ | ||
252 | srcpos++; | ||
253 | if (dstpos == maxsize) | ||
254 | break; | ||
255 | dst[dstpos++] = (char) wc; | ||
256 | } | ||
257 | dst[dstpos] = '\0'; | ||
258 | return dstpos; | ||
259 | #endif | ||
221 | } | 260 | } |
222 | /* I thought just fputwc(c, stdout) would work. But no... */ | 261 | /* I thought just fputwc(c, stdout) would work. But no... */ |
223 | static void BB_PUTCHAR(wchar_t c) | 262 | static void BB_PUTCHAR(wchar_t c) |
224 | { | 263 | { |
225 | char buf[MB_CUR_MAX + 1]; | 264 | char buf[MB_CUR_MAX + 1]; |
226 | mbstate_t mbst = { 0 }; | 265 | mbstate_t mbst = { 0 }; |
227 | ssize_t len = wcrtomb(buf, c, &mbst); | 266 | ssize_t len; |
228 | 267 | ||
268 | if (unicode_is_inv_wchar(c)) | ||
269 | c = CONFIG_SUBST_WCHAR; | ||
270 | len = wcrtomb(buf, c, &mbst); | ||
229 | if (len > 0) { | 271 | if (len > 0) { |
230 | buf[len] = '\0'; | 272 | buf[len] = '\0'; |
231 | fputs(buf, stdout); | 273 | fputs(buf, stdout); |
@@ -238,7 +280,7 @@ static size_t load_string(const char *src, int maxsize) | |||
238 | return strlen(command_ps); | 280 | return strlen(command_ps); |
239 | } | 281 | } |
240 | # if ENABLE_FEATURE_TAB_COMPLETION | 282 | # if ENABLE_FEATURE_TAB_COMPLETION |
241 | static void save_string(char *dst, int maxsize) | 283 | static void save_string(char *dst, unsigned maxsize) |
242 | { | 284 | { |
243 | safe_strncpy(dst, command_ps, maxsize); | 285 | safe_strncpy(dst, command_ps, maxsize); |
244 | } | 286 | } |
@@ -1719,13 +1761,11 @@ static int lineedit_read_key(char *read_key_buffer) | |||
1719 | pushback: | 1761 | pushback: |
1720 | /* Invalid sequence. Save all "bad bytes" except first */ | 1762 | /* Invalid sequence. Save all "bad bytes" except first */ |
1721 | read_key_ungets(read_key_buffer, unicode_buf + 1, unicode_idx - 1); | 1763 | read_key_ungets(read_key_buffer, unicode_buf + 1, unicode_idx - 1); |
1722 | /* | 1764 | # if !ENABLE_UNICODE_PRESERVE_BROKEN |
1723 | * ic = unicode_buf[0] sounds even better, but currently | ||
1724 | * this does not work: wchar_t[] -> char[] conversion | ||
1725 | * when lineedit finishes mangles such "raw bytes" | ||
1726 | * (by misinterpreting them as unicode chars): | ||
1727 | */ | ||
1728 | ic = CONFIG_SUBST_WCHAR; | 1765 | ic = CONFIG_SUBST_WCHAR; |
1766 | # else | ||
1767 | ic = unicode_mark_inv_wchar(unicode_buf[0]); | ||
1768 | # endif | ||
1729 | } else { | 1769 | } else { |
1730 | /* Valid unicode char, return its code */ | 1770 | /* Valid unicode char, return its code */ |
1731 | ic = wc; | 1771 | ic = wc; |