aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomas Heinrich <heinrich.tomas@gmail.com>2010-03-18 18:35:37 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2010-03-18 18:35:37 +0100
commitc5c006c10c060e7f1a97250d039051b93ed390b2 (patch)
treeb281136c99e6a27a530282a0b1b7eaf938704bb2
parent98f1dc12f1554aca6c3743bec1c3d8982a077f7c (diff)
downloadbusybox-w32-c5c006c10c060e7f1a97250d039051b93ed390b2.tar.gz
busybox-w32-c5c006c10c060e7f1a97250d039051b93ed390b2.tar.bz2
busybox-w32-c5c006c10c060e7f1a97250d039051b93ed390b2.zip
lineedit: first shot at optional unicode bidi input support
function old new delta read_line_input 4886 5003 +117 in_uint16_table - 97 +97 in_interval_table - 78 +78 static.rtl_b - 68 +68 unicode_isrtl - 55 +55 isrtl_str - 51 +51 static.rtl_p - 42 +42 unicode_conv_to_printable2 633 477 -156 ------------------------------------------------------------------------------ (add/remove: 6/0 grow/shrink: 1/1 up/down: 508/-156) Total: 352 bytes Signed-off-by: Tomas Heinrich <heinrich.tomas@gmail.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--Config.in8
-rw-r--r--include/unicode.h17
-rw-r--r--libbb/lineedit.c44
-rw-r--r--libbb/unicode.c132
-rw-r--r--libbb/unicode_wcwidth.c6
5 files changed, 189 insertions, 18 deletions
diff --git a/Config.in b/Config.in
index e7bb05dce..e0c01f3ef 100644
--- a/Config.in
+++ b/Config.in
@@ -196,6 +196,14 @@ config UNICODE_WIDE_WCHARS
196 With this option off, any Unicode char with width > 1 196 With this option off, any Unicode char with width > 1
197 is substituted on output. 197 is substituted on output.
198 198
199config UNICODE_BIDI_SUPPORT
200 bool "Bidirectional character-aware line input"
201 default y
202 depends on FEATURE_ASSUME_UNICODE && !LOCALE_SUPPORT
203 help
204 With this option on, right-to-left Unicode characters
205 are treated differently on input (e.g. cursor movement).
206
199config LONG_OPTS 207config LONG_OPTS
200 bool "Support for --long-options" 208 bool "Support for --long-options"
201 default y 209 default y
diff --git a/include/unicode.h b/include/unicode.h
index 857aab138..05bdbca02 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -18,6 +18,8 @@ enum {
18 UNICODE_ON = 2, 18 UNICODE_ON = 2,
19}; 19};
20 20
21#define unicode_isrtl(wc) 0
22
21#if !ENABLE_FEATURE_ASSUME_UNICODE 23#if !ENABLE_FEATURE_ASSUME_UNICODE
22 24
23# define unicode_strlen(string) strlen(string) 25# define unicode_strlen(string) strlen(string)
@@ -26,6 +28,17 @@ enum {
26 28
27#else 29#else
28 30
31# if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR >= 0x30000
32# define LAST_SUPPORTED_WCHAR 0x2ffff
33# else
34# define LAST_SUPPORTED_WCHAR CONFIG_LAST_SUPPORTED_WCHAR
35# endif
36
37# if LAST_SUPPORTED_WCHAR < 0x590
38# undef ENABLE_UNICODE_BIDI_SUPPORT
39# define ENABLE_UNICODE_BIDI_SUPPORT 0
40# endif
41
29size_t FAST_FUNC unicode_strlen(const char *string); 42size_t FAST_FUNC unicode_strlen(const char *string);
30enum { 43enum {
31 UNI_FLAG_PAD = (1 << 0), 44 UNI_FLAG_PAD = (1 << 0),
@@ -78,6 +91,10 @@ size_t wcrtomb(char *s, wchar_t wc, mbstate_t *ps) FAST_FUNC;
78int iswspace(wint_t wc) FAST_FUNC; 91int iswspace(wint_t wc) FAST_FUNC;
79int iswalnum(wint_t wc) FAST_FUNC; 92int iswalnum(wint_t wc) FAST_FUNC;
80int iswpunct(wint_t wc) FAST_FUNC; 93int iswpunct(wint_t wc) FAST_FUNC;
94# if ENABLE_UNICODE_BIDI_SUPPORT
95# undef unicode_isrtl
96int unicode_isrtl(wint_t wc) FAST_FUNC;
97# endif
81 98
82 99
83# endif /* !LOCALE_SUPPORT */ 100# endif /* !LOCALE_SUPPORT */
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index 7c0eef90d..be022e8ae 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -1738,6 +1738,18 @@ static int lineedit_read_key(char *read_key_buffer)
1738 return ic; 1738 return ic;
1739} 1739}
1740 1740
1741#if ENABLE_UNICODE_BIDI_SUPPORT
1742static int isrtl_str(void)
1743{
1744 int idx = cursor;
1745 while (command_ps[idx] >= ' ' && command_ps[idx] < 127 && !isalpha(command_ps[idx]))
1746 idx++;
1747 return unicode_isrtl(command_ps[idx]);
1748}
1749#else
1750# define isrtl_str() 0
1751#endif
1752
1741/* leave out the "vi-mode"-only case labels if vi editing isn't 1753/* leave out the "vi-mode"-only case labels if vi editing isn't
1742 * configured. */ 1754 * configured. */
1743#define vi_case(caselabel) IF_FEATURE_EDITING_VI(case caselabel) 1755#define vi_case(caselabel) IF_FEATURE_EDITING_VI(case caselabel)
@@ -1895,10 +1907,9 @@ int FAST_FUNC read_line_input(const char *prompt, char *command, int maxsize, li
1895 break; 1907 break;
1896 case CTRL('B'): 1908 case CTRL('B'):
1897 vi_case('h'|VI_CMDMODE_BIT:) 1909 vi_case('h'|VI_CMDMODE_BIT:)
1898 vi_case('\b'|VI_CMDMODE_BIT:) 1910 vi_case('\b'|VI_CMDMODE_BIT:) /* ^H */
1899 vi_case('\x7f'|VI_CMDMODE_BIT:) /* DEL */ 1911 vi_case('\x7f'|VI_CMDMODE_BIT:) /* DEL */
1900 /* Control-b -- Move back one character */ 1912 input_backward(1); /* Move back one character */
1901 input_backward(1);
1902 break; 1913 break;
1903 case CTRL('E'): 1914 case CTRL('E'):
1904 vi_case('$'|VI_CMDMODE_BIT:) 1915 vi_case('$'|VI_CMDMODE_BIT:)
@@ -1908,13 +1919,20 @@ int FAST_FUNC read_line_input(const char *prompt, char *command, int maxsize, li
1908 case CTRL('F'): 1919 case CTRL('F'):
1909 vi_case('l'|VI_CMDMODE_BIT:) 1920 vi_case('l'|VI_CMDMODE_BIT:)
1910 vi_case(' '|VI_CMDMODE_BIT:) 1921 vi_case(' '|VI_CMDMODE_BIT:)
1911 /* Control-f -- Move forward one character */ 1922 input_forward(); /* Move forward one character */
1912 input_forward();
1913 break; 1923 break;
1914 case '\b': 1924 case '\b': /* ^H */
1915 case '\x7f': /* DEL */ 1925 case '\x7f': /* DEL */
1916 /* Control-h and DEL */ 1926 if (!isrtl_str())
1917 input_backspace(); 1927 input_backspace();
1928 else
1929 input_delete(0);
1930 break;
1931 case KEYCODE_DELETE:
1932 if (!isrtl_str())
1933 input_delete(0);
1934 else
1935 input_backspace();
1918 break; 1936 break;
1919#if ENABLE_FEATURE_TAB_COMPLETION 1937#if ENABLE_FEATURE_TAB_COMPLETION
1920 case '\t': 1938 case '\t':
@@ -2137,9 +2155,6 @@ int FAST_FUNC read_line_input(const char *prompt, char *command, int maxsize, li
2137 case KEYCODE_CTRL_RIGHT: 2155 case KEYCODE_CTRL_RIGHT:
2138 ctrl_right(); 2156 ctrl_right();
2139 break; 2157 break;
2140 case KEYCODE_DELETE:
2141 input_delete(0);
2142 break;
2143 case KEYCODE_HOME: 2158 case KEYCODE_HOME:
2144 input_backward(cursor); 2159 input_backward(cursor);
2145 break; 2160 break;
@@ -2205,14 +2220,19 @@ int FAST_FUNC read_line_input(const char *prompt, char *command, int maxsize, li
2205 command_ps[cursor] = ic; 2220 command_ps[cursor] = ic;
2206 command_ps[cursor + 1] = BB_NUL; 2221 command_ps[cursor + 1] = BB_NUL;
2207 cmdedit_set_out_char(' '); 2222 cmdedit_set_out_char(' ');
2223 if (unicode_isrtl(ic))
2224 input_backward(1);
2208 } else { 2225 } else {
2209 /* In the middle, insert */ 2226 /* In the middle, insert */
2227 /* is char right-to-left, or "neutral" one (e.g. comma) added to rtl text? */
2228 int rtl = ENABLE_UNICODE_BIDI_SUPPORT ? (unicode_isrtl(ic) || (ic < 127 && !isalpha(ic) && isrtl_str())) : 0;
2210 int sc = cursor; 2229 int sc = cursor;
2211 2230
2212 memmove(command_ps + sc + 1, command_ps + sc, 2231 memmove(command_ps + sc + 1, command_ps + sc,
2213 (command_len - sc) * sizeof(command_ps[0])); 2232 (command_len - sc) * sizeof(command_ps[0]));
2214 command_ps[sc] = ic; 2233 command_ps[sc] = ic;
2215 sc++; 2234 if (!rtl)
2235 sc++;
2216 /* rewrite from cursor */ 2236 /* rewrite from cursor */
2217 input_end(); 2237 input_end();
2218 /* to prev x pos + 1 */ 2238 /* to prev x pos + 1 */
diff --git a/libbb/unicode.c b/libbb/unicode.c
index 7c41ef30b..91667ea72 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -241,6 +241,138 @@ int FAST_FUNC iswpunct(wint_t wc)
241 241
242#include "unicode_wcwidth.c" 242#include "unicode_wcwidth.c"
243 243
244# if ENABLE_UNICODE_BIDI_SUPPORT
245int FAST_FUNC unicode_isrtl(wint_t wc)
246{
247 /* ranges taken from
248 * http://www.unicode.org/Public/5.2.0/ucd/extracted/DerivedBidiClass.txt
249 * Bidi_Class=Left_To_Right | Bidi_Class=Arabic_Letter
250 */
251 static const struct interval rtl_b[] = {
252# define BIG_(a,b) { a, b },
253# define PAIR(a,b)
254 PAIR(0x0590, 0x0590)
255 PAIR(0x05BE, 0x05BE)
256 PAIR(0x05C0, 0x05C0)
257 PAIR(0x05C3, 0x05C3)
258 PAIR(0x05C6, 0x05C6)
259 BIG_(0x05C8, 0x05FF)
260 PAIR(0x0604, 0x0605)
261 PAIR(0x0608, 0x0608)
262 PAIR(0x060B, 0x060B)
263 PAIR(0x060D, 0x060D)
264 BIG_(0x061B, 0x064A)
265 PAIR(0x065F, 0x065F)
266 PAIR(0x066D, 0x066F)
267 BIG_(0x0671, 0x06D5)
268 PAIR(0x06E5, 0x06E6)
269 PAIR(0x06EE, 0x06EF)
270 BIG_(0x06FA, 0x070E)
271 PAIR(0x0710, 0x0710)
272 BIG_(0x0712, 0x072F)
273 BIG_(0x074B, 0x07A5)
274 BIG_(0x07B1, 0x07EA)
275 PAIR(0x07F4, 0x07F5)
276 BIG_(0x07FA, 0x0815)
277 PAIR(0x081A, 0x081A)
278 PAIR(0x0824, 0x0824)
279 PAIR(0x0828, 0x0828)
280 BIG_(0x082E, 0x08FF)
281 PAIR(0x200F, 0x200F)
282 PAIR(0x202B, 0x202B)
283 PAIR(0x202E, 0x202E)
284 BIG_(0xFB1D, 0xFB1D)
285 BIG_(0xFB1F, 0xFB28)
286 BIG_(0xFB2A, 0xFD3D)
287 BIG_(0xFD40, 0xFDCF)
288 BIG_(0xFDC8, 0xFDCF)
289 BIG_(0xFDF0, 0xFDFC)
290 BIG_(0xFDFE, 0xFDFF)
291 BIG_(0xFE70, 0xFEFE)
292 /* Probably not necessary
293 {0x10800, 0x1091E},
294 {0x10920, 0x10A00},
295 {0x10A04, 0x10A04},
296 {0x10A07, 0x10A0B},
297 {0x10A10, 0x10A37},
298 {0x10A3B, 0x10A3E},
299 {0x10A40, 0x10A7F},
300 {0x10B36, 0x10B38},
301 {0x10B40, 0x10E5F},
302 {0x10E7F, 0x10FFF},
303 {0x1E800, 0x1EFFF}
304 */
305# undef BIG_
306# undef PAIR
307 };
308
309 static const uint16_t rtl_p[] = {
310# define BIG_(a,b)
311# define PAIR(a,b) (a << 2) | (b-a),
312 /* Exact copy-n-paste of the above: */
313 PAIR(0x0590, 0x0590)
314 PAIR(0x05BE, 0x05BE)
315 PAIR(0x05C0, 0x05C0)
316 PAIR(0x05C3, 0x05C3)
317 PAIR(0x05C6, 0x05C6)
318 BIG_(0x05C8, 0x05FF)
319 PAIR(0x0604, 0x0605)
320 PAIR(0x0608, 0x0608)
321 PAIR(0x060B, 0x060B)
322 PAIR(0x060D, 0x060D)
323 BIG_(0x061B, 0x064A)
324 PAIR(0x065F, 0x065F)
325 PAIR(0x066D, 0x066F)
326 BIG_(0x0671, 0x06D5)
327 PAIR(0x06E5, 0x06E6)
328 PAIR(0x06EE, 0x06EF)
329 BIG_(0x06FA, 0x070E)
330 PAIR(0x0710, 0x0710)
331 BIG_(0x0712, 0x072F)
332 BIG_(0x074B, 0x07A5)
333 BIG_(0x07B1, 0x07EA)
334 PAIR(0x07F4, 0x07F5)
335 BIG_(0x07FA, 0x0815)
336 PAIR(0x081A, 0x081A)
337 PAIR(0x0824, 0x0824)
338 PAIR(0x0828, 0x0828)
339 BIG_(0x082E, 0x08FF)
340 PAIR(0x200F, 0x200F)
341 PAIR(0x202B, 0x202B)
342 PAIR(0x202E, 0x202E)
343 BIG_(0xFB1D, 0xFB1D)
344 BIG_(0xFB1F, 0xFB28)
345 BIG_(0xFB2A, 0xFD3D)
346 BIG_(0xFD40, 0xFDCF)
347 BIG_(0xFDC8, 0xFDCF)
348 BIG_(0xFDF0, 0xFDFC)
349 BIG_(0xFDFE, 0xFDFF)
350 BIG_(0xFE70, 0xFEFE)
351 /* Probably not necessary
352 {0x10800, 0x1091E},
353 {0x10920, 0x10A00},
354 {0x10A04, 0x10A04},
355 {0x10A07, 0x10A0B},
356 {0x10A10, 0x10A37},
357 {0x10A3B, 0x10A3E},
358 {0x10A40, 0x10A7F},
359 {0x10B36, 0x10B38},
360 {0x10B40, 0x10E5F},
361 {0x10E7F, 0x10FFF},
362 {0x1E800, 0x1EFFF}
363 */
364# undef BIG_
365# undef PAIR
366 };
367
368 if (in_interval_table(wc, rtl_b, ARRAY_SIZE(rtl_b) - 1))
369 return 1;
370 if (in_uint16_table(wc, rtl_p, ARRAY_SIZE(rtl_p) - 1))
371 return 1;
372 return 0;
373}
374# endif /* UNICODE_BIDI_SUPPORT */
375
244#endif /* Homegrown Unicode support */ 376#endif /* Homegrown Unicode support */
245 377
246 378
diff --git a/libbb/unicode_wcwidth.c b/libbb/unicode_wcwidth.c
index a81a98038..7eccc394c 100644
--- a/libbb/unicode_wcwidth.c
+++ b/libbb/unicode_wcwidth.c
@@ -90,12 +90,6 @@
90 * until Unicode committee assigns something there. 90 * until Unicode committee assigns something there.
91 */ 91 */
92 92
93#if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR >= 0x30000
94# define LAST_SUPPORTED_WCHAR 0x2ffff
95#else
96# define LAST_SUPPORTED_WCHAR CONFIG_LAST_SUPPORTED_WCHAR
97#endif
98
99#if LAST_SUPPORTED_WCHAR >= 0x300 93#if LAST_SUPPORTED_WCHAR >= 0x300
100struct interval { 94struct interval {
101 uint16_t first; 95 uint16_t first;