diff options
author | Tomas Heinrich <heinrich.tomas@gmail.com> | 2010-03-18 18:35:37 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2010-03-18 18:35:37 +0100 |
commit | c5c006c10c060e7f1a97250d039051b93ed390b2 (patch) | |
tree | b281136c99e6a27a530282a0b1b7eaf938704bb2 | |
parent | 98f1dc12f1554aca6c3743bec1c3d8982a077f7c (diff) | |
download | busybox-w32-c5c006c10c060e7f1a97250d039051b93ed390b2.tar.gz busybox-w32-c5c006c10c060e7f1a97250d039051b93ed390b2.tar.bz2 busybox-w32-c5c006c10c060e7f1a97250d039051b93ed390b2.zip |
lineedit: first shot at optional unicode bidi input support
function old new delta
read_line_input 4886 5003 +117
in_uint16_table - 97 +97
in_interval_table - 78 +78
static.rtl_b - 68 +68
unicode_isrtl - 55 +55
isrtl_str - 51 +51
static.rtl_p - 42 +42
unicode_conv_to_printable2 633 477 -156
------------------------------------------------------------------------------
(add/remove: 6/0 grow/shrink: 1/1 up/down: 508/-156) Total: 352 bytes
Signed-off-by: Tomas Heinrich <heinrich.tomas@gmail.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | Config.in | 8 | ||||
-rw-r--r-- | include/unicode.h | 17 | ||||
-rw-r--r-- | libbb/lineedit.c | 44 | ||||
-rw-r--r-- | libbb/unicode.c | 132 | ||||
-rw-r--r-- | libbb/unicode_wcwidth.c | 6 |
5 files changed, 189 insertions, 18 deletions
@@ -196,6 +196,14 @@ config UNICODE_WIDE_WCHARS | |||
196 | With this option off, any Unicode char with width > 1 | 196 | With this option off, any Unicode char with width > 1 |
197 | is substituted on output. | 197 | is substituted on output. |
198 | 198 | ||
199 | config UNICODE_BIDI_SUPPORT | ||
200 | bool "Bidirectional character-aware line input" | ||
201 | default y | ||
202 | depends on FEATURE_ASSUME_UNICODE && !LOCALE_SUPPORT | ||
203 | help | ||
204 | With this option on, right-to-left Unicode characters | ||
205 | are treated differently on input (e.g. cursor movement). | ||
206 | |||
199 | config LONG_OPTS | 207 | config LONG_OPTS |
200 | bool "Support for --long-options" | 208 | bool "Support for --long-options" |
201 | default y | 209 | default y |
diff --git a/include/unicode.h b/include/unicode.h index 857aab138..05bdbca02 100644 --- a/include/unicode.h +++ b/include/unicode.h | |||
@@ -18,6 +18,8 @@ enum { | |||
18 | UNICODE_ON = 2, | 18 | UNICODE_ON = 2, |
19 | }; | 19 | }; |
20 | 20 | ||
21 | #define unicode_isrtl(wc) 0 | ||
22 | |||
21 | #if !ENABLE_FEATURE_ASSUME_UNICODE | 23 | #if !ENABLE_FEATURE_ASSUME_UNICODE |
22 | 24 | ||
23 | # define unicode_strlen(string) strlen(string) | 25 | # define unicode_strlen(string) strlen(string) |
@@ -26,6 +28,17 @@ enum { | |||
26 | 28 | ||
27 | #else | 29 | #else |
28 | 30 | ||
31 | # if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR >= 0x30000 | ||
32 | # define LAST_SUPPORTED_WCHAR 0x2ffff | ||
33 | # else | ||
34 | # define LAST_SUPPORTED_WCHAR CONFIG_LAST_SUPPORTED_WCHAR | ||
35 | # endif | ||
36 | |||
37 | # if LAST_SUPPORTED_WCHAR < 0x590 | ||
38 | # undef ENABLE_UNICODE_BIDI_SUPPORT | ||
39 | # define ENABLE_UNICODE_BIDI_SUPPORT 0 | ||
40 | # endif | ||
41 | |||
29 | size_t FAST_FUNC unicode_strlen(const char *string); | 42 | size_t FAST_FUNC unicode_strlen(const char *string); |
30 | enum { | 43 | enum { |
31 | UNI_FLAG_PAD = (1 << 0), | 44 | UNI_FLAG_PAD = (1 << 0), |
@@ -78,6 +91,10 @@ size_t wcrtomb(char *s, wchar_t wc, mbstate_t *ps) FAST_FUNC; | |||
78 | int iswspace(wint_t wc) FAST_FUNC; | 91 | int iswspace(wint_t wc) FAST_FUNC; |
79 | int iswalnum(wint_t wc) FAST_FUNC; | 92 | int iswalnum(wint_t wc) FAST_FUNC; |
80 | int iswpunct(wint_t wc) FAST_FUNC; | 93 | int iswpunct(wint_t wc) FAST_FUNC; |
94 | # if ENABLE_UNICODE_BIDI_SUPPORT | ||
95 | # undef unicode_isrtl | ||
96 | int unicode_isrtl(wint_t wc) FAST_FUNC; | ||
97 | # endif | ||
81 | 98 | ||
82 | 99 | ||
83 | # endif /* !LOCALE_SUPPORT */ | 100 | # endif /* !LOCALE_SUPPORT */ |
diff --git a/libbb/lineedit.c b/libbb/lineedit.c index 7c0eef90d..be022e8ae 100644 --- a/libbb/lineedit.c +++ b/libbb/lineedit.c | |||
@@ -1738,6 +1738,18 @@ static int lineedit_read_key(char *read_key_buffer) | |||
1738 | return ic; | 1738 | return ic; |
1739 | } | 1739 | } |
1740 | 1740 | ||
1741 | #if ENABLE_UNICODE_BIDI_SUPPORT | ||
1742 | static int isrtl_str(void) | ||
1743 | { | ||
1744 | int idx = cursor; | ||
1745 | while (command_ps[idx] >= ' ' && command_ps[idx] < 127 && !isalpha(command_ps[idx])) | ||
1746 | idx++; | ||
1747 | return unicode_isrtl(command_ps[idx]); | ||
1748 | } | ||
1749 | #else | ||
1750 | # define isrtl_str() 0 | ||
1751 | #endif | ||
1752 | |||
1741 | /* leave out the "vi-mode"-only case labels if vi editing isn't | 1753 | /* leave out the "vi-mode"-only case labels if vi editing isn't |
1742 | * configured. */ | 1754 | * configured. */ |
1743 | #define vi_case(caselabel) IF_FEATURE_EDITING_VI(case caselabel) | 1755 | #define vi_case(caselabel) IF_FEATURE_EDITING_VI(case caselabel) |
@@ -1895,10 +1907,9 @@ int FAST_FUNC read_line_input(const char *prompt, char *command, int maxsize, li | |||
1895 | break; | 1907 | break; |
1896 | case CTRL('B'): | 1908 | case CTRL('B'): |
1897 | vi_case('h'|VI_CMDMODE_BIT:) | 1909 | vi_case('h'|VI_CMDMODE_BIT:) |
1898 | vi_case('\b'|VI_CMDMODE_BIT:) | 1910 | vi_case('\b'|VI_CMDMODE_BIT:) /* ^H */ |
1899 | vi_case('\x7f'|VI_CMDMODE_BIT:) /* DEL */ | 1911 | vi_case('\x7f'|VI_CMDMODE_BIT:) /* DEL */ |
1900 | /* Control-b -- Move back one character */ | 1912 | input_backward(1); /* Move back one character */ |
1901 | input_backward(1); | ||
1902 | break; | 1913 | break; |
1903 | case CTRL('E'): | 1914 | case CTRL('E'): |
1904 | vi_case('$'|VI_CMDMODE_BIT:) | 1915 | vi_case('$'|VI_CMDMODE_BIT:) |
@@ -1908,13 +1919,20 @@ int FAST_FUNC read_line_input(const char *prompt, char *command, int maxsize, li | |||
1908 | case CTRL('F'): | 1919 | case CTRL('F'): |
1909 | vi_case('l'|VI_CMDMODE_BIT:) | 1920 | vi_case('l'|VI_CMDMODE_BIT:) |
1910 | vi_case(' '|VI_CMDMODE_BIT:) | 1921 | vi_case(' '|VI_CMDMODE_BIT:) |
1911 | /* Control-f -- Move forward one character */ | 1922 | input_forward(); /* Move forward one character */ |
1912 | input_forward(); | ||
1913 | break; | 1923 | break; |
1914 | case '\b': | 1924 | case '\b': /* ^H */ |
1915 | case '\x7f': /* DEL */ | 1925 | case '\x7f': /* DEL */ |
1916 | /* Control-h and DEL */ | 1926 | if (!isrtl_str()) |
1917 | input_backspace(); | 1927 | input_backspace(); |
1928 | else | ||
1929 | input_delete(0); | ||
1930 | break; | ||
1931 | case KEYCODE_DELETE: | ||
1932 | if (!isrtl_str()) | ||
1933 | input_delete(0); | ||
1934 | else | ||
1935 | input_backspace(); | ||
1918 | break; | 1936 | break; |
1919 | #if ENABLE_FEATURE_TAB_COMPLETION | 1937 | #if ENABLE_FEATURE_TAB_COMPLETION |
1920 | case '\t': | 1938 | case '\t': |
@@ -2137,9 +2155,6 @@ int FAST_FUNC read_line_input(const char *prompt, char *command, int maxsize, li | |||
2137 | case KEYCODE_CTRL_RIGHT: | 2155 | case KEYCODE_CTRL_RIGHT: |
2138 | ctrl_right(); | 2156 | ctrl_right(); |
2139 | break; | 2157 | break; |
2140 | case KEYCODE_DELETE: | ||
2141 | input_delete(0); | ||
2142 | break; | ||
2143 | case KEYCODE_HOME: | 2158 | case KEYCODE_HOME: |
2144 | input_backward(cursor); | 2159 | input_backward(cursor); |
2145 | break; | 2160 | break; |
@@ -2205,14 +2220,19 @@ int FAST_FUNC read_line_input(const char *prompt, char *command, int maxsize, li | |||
2205 | command_ps[cursor] = ic; | 2220 | command_ps[cursor] = ic; |
2206 | command_ps[cursor + 1] = BB_NUL; | 2221 | command_ps[cursor + 1] = BB_NUL; |
2207 | cmdedit_set_out_char(' '); | 2222 | cmdedit_set_out_char(' '); |
2223 | if (unicode_isrtl(ic)) | ||
2224 | input_backward(1); | ||
2208 | } else { | 2225 | } else { |
2209 | /* In the middle, insert */ | 2226 | /* In the middle, insert */ |
2227 | /* is char right-to-left, or "neutral" one (e.g. comma) added to rtl text? */ | ||
2228 | int rtl = ENABLE_UNICODE_BIDI_SUPPORT ? (unicode_isrtl(ic) || (ic < 127 && !isalpha(ic) && isrtl_str())) : 0; | ||
2210 | int sc = cursor; | 2229 | int sc = cursor; |
2211 | 2230 | ||
2212 | memmove(command_ps + sc + 1, command_ps + sc, | 2231 | memmove(command_ps + sc + 1, command_ps + sc, |
2213 | (command_len - sc) * sizeof(command_ps[0])); | 2232 | (command_len - sc) * sizeof(command_ps[0])); |
2214 | command_ps[sc] = ic; | 2233 | command_ps[sc] = ic; |
2215 | sc++; | 2234 | if (!rtl) |
2235 | sc++; | ||
2216 | /* rewrite from cursor */ | 2236 | /* rewrite from cursor */ |
2217 | input_end(); | 2237 | input_end(); |
2218 | /* to prev x pos + 1 */ | 2238 | /* to prev x pos + 1 */ |
diff --git a/libbb/unicode.c b/libbb/unicode.c index 7c41ef30b..91667ea72 100644 --- a/libbb/unicode.c +++ b/libbb/unicode.c | |||
@@ -241,6 +241,138 @@ int FAST_FUNC iswpunct(wint_t wc) | |||
241 | 241 | ||
242 | #include "unicode_wcwidth.c" | 242 | #include "unicode_wcwidth.c" |
243 | 243 | ||
244 | # if ENABLE_UNICODE_BIDI_SUPPORT | ||
245 | int FAST_FUNC unicode_isrtl(wint_t wc) | ||
246 | { | ||
247 | /* ranges taken from | ||
248 | * http://www.unicode.org/Public/5.2.0/ucd/extracted/DerivedBidiClass.txt | ||
249 | * Bidi_Class=Left_To_Right | Bidi_Class=Arabic_Letter | ||
250 | */ | ||
251 | static const struct interval rtl_b[] = { | ||
252 | # define BIG_(a,b) { a, b }, | ||
253 | # define PAIR(a,b) | ||
254 | PAIR(0x0590, 0x0590) | ||
255 | PAIR(0x05BE, 0x05BE) | ||
256 | PAIR(0x05C0, 0x05C0) | ||
257 | PAIR(0x05C3, 0x05C3) | ||
258 | PAIR(0x05C6, 0x05C6) | ||
259 | BIG_(0x05C8, 0x05FF) | ||
260 | PAIR(0x0604, 0x0605) | ||
261 | PAIR(0x0608, 0x0608) | ||
262 | PAIR(0x060B, 0x060B) | ||
263 | PAIR(0x060D, 0x060D) | ||
264 | BIG_(0x061B, 0x064A) | ||
265 | PAIR(0x065F, 0x065F) | ||
266 | PAIR(0x066D, 0x066F) | ||
267 | BIG_(0x0671, 0x06D5) | ||
268 | PAIR(0x06E5, 0x06E6) | ||
269 | PAIR(0x06EE, 0x06EF) | ||
270 | BIG_(0x06FA, 0x070E) | ||
271 | PAIR(0x0710, 0x0710) | ||
272 | BIG_(0x0712, 0x072F) | ||
273 | BIG_(0x074B, 0x07A5) | ||
274 | BIG_(0x07B1, 0x07EA) | ||
275 | PAIR(0x07F4, 0x07F5) | ||
276 | BIG_(0x07FA, 0x0815) | ||
277 | PAIR(0x081A, 0x081A) | ||
278 | PAIR(0x0824, 0x0824) | ||
279 | PAIR(0x0828, 0x0828) | ||
280 | BIG_(0x082E, 0x08FF) | ||
281 | PAIR(0x200F, 0x200F) | ||
282 | PAIR(0x202B, 0x202B) | ||
283 | PAIR(0x202E, 0x202E) | ||
284 | BIG_(0xFB1D, 0xFB1D) | ||
285 | BIG_(0xFB1F, 0xFB28) | ||
286 | BIG_(0xFB2A, 0xFD3D) | ||
287 | BIG_(0xFD40, 0xFDCF) | ||
288 | BIG_(0xFDC8, 0xFDCF) | ||
289 | BIG_(0xFDF0, 0xFDFC) | ||
290 | BIG_(0xFDFE, 0xFDFF) | ||
291 | BIG_(0xFE70, 0xFEFE) | ||
292 | /* Probably not necessary | ||
293 | {0x10800, 0x1091E}, | ||
294 | {0x10920, 0x10A00}, | ||
295 | {0x10A04, 0x10A04}, | ||
296 | {0x10A07, 0x10A0B}, | ||
297 | {0x10A10, 0x10A37}, | ||
298 | {0x10A3B, 0x10A3E}, | ||
299 | {0x10A40, 0x10A7F}, | ||
300 | {0x10B36, 0x10B38}, | ||
301 | {0x10B40, 0x10E5F}, | ||
302 | {0x10E7F, 0x10FFF}, | ||
303 | {0x1E800, 0x1EFFF} | ||
304 | */ | ||
305 | # undef BIG_ | ||
306 | # undef PAIR | ||
307 | }; | ||
308 | |||
309 | static const uint16_t rtl_p[] = { | ||
310 | # define BIG_(a,b) | ||
311 | # define PAIR(a,b) (a << 2) | (b-a), | ||
312 | /* Exact copy-n-paste of the above: */ | ||
313 | PAIR(0x0590, 0x0590) | ||
314 | PAIR(0x05BE, 0x05BE) | ||
315 | PAIR(0x05C0, 0x05C0) | ||
316 | PAIR(0x05C3, 0x05C3) | ||
317 | PAIR(0x05C6, 0x05C6) | ||
318 | BIG_(0x05C8, 0x05FF) | ||
319 | PAIR(0x0604, 0x0605) | ||
320 | PAIR(0x0608, 0x0608) | ||
321 | PAIR(0x060B, 0x060B) | ||
322 | PAIR(0x060D, 0x060D) | ||
323 | BIG_(0x061B, 0x064A) | ||
324 | PAIR(0x065F, 0x065F) | ||
325 | PAIR(0x066D, 0x066F) | ||
326 | BIG_(0x0671, 0x06D5) | ||
327 | PAIR(0x06E5, 0x06E6) | ||
328 | PAIR(0x06EE, 0x06EF) | ||
329 | BIG_(0x06FA, 0x070E) | ||
330 | PAIR(0x0710, 0x0710) | ||
331 | BIG_(0x0712, 0x072F) | ||
332 | BIG_(0x074B, 0x07A5) | ||
333 | BIG_(0x07B1, 0x07EA) | ||
334 | PAIR(0x07F4, 0x07F5) | ||
335 | BIG_(0x07FA, 0x0815) | ||
336 | PAIR(0x081A, 0x081A) | ||
337 | PAIR(0x0824, 0x0824) | ||
338 | PAIR(0x0828, 0x0828) | ||
339 | BIG_(0x082E, 0x08FF) | ||
340 | PAIR(0x200F, 0x200F) | ||
341 | PAIR(0x202B, 0x202B) | ||
342 | PAIR(0x202E, 0x202E) | ||
343 | BIG_(0xFB1D, 0xFB1D) | ||
344 | BIG_(0xFB1F, 0xFB28) | ||
345 | BIG_(0xFB2A, 0xFD3D) | ||
346 | BIG_(0xFD40, 0xFDCF) | ||
347 | BIG_(0xFDC8, 0xFDCF) | ||
348 | BIG_(0xFDF0, 0xFDFC) | ||
349 | BIG_(0xFDFE, 0xFDFF) | ||
350 | BIG_(0xFE70, 0xFEFE) | ||
351 | /* Probably not necessary | ||
352 | {0x10800, 0x1091E}, | ||
353 | {0x10920, 0x10A00}, | ||
354 | {0x10A04, 0x10A04}, | ||
355 | {0x10A07, 0x10A0B}, | ||
356 | {0x10A10, 0x10A37}, | ||
357 | {0x10A3B, 0x10A3E}, | ||
358 | {0x10A40, 0x10A7F}, | ||
359 | {0x10B36, 0x10B38}, | ||
360 | {0x10B40, 0x10E5F}, | ||
361 | {0x10E7F, 0x10FFF}, | ||
362 | {0x1E800, 0x1EFFF} | ||
363 | */ | ||
364 | # undef BIG_ | ||
365 | # undef PAIR | ||
366 | }; | ||
367 | |||
368 | if (in_interval_table(wc, rtl_b, ARRAY_SIZE(rtl_b) - 1)) | ||
369 | return 1; | ||
370 | if (in_uint16_table(wc, rtl_p, ARRAY_SIZE(rtl_p) - 1)) | ||
371 | return 1; | ||
372 | return 0; | ||
373 | } | ||
374 | # endif /* UNICODE_BIDI_SUPPORT */ | ||
375 | |||
244 | #endif /* Homegrown Unicode support */ | 376 | #endif /* Homegrown Unicode support */ |
245 | 377 | ||
246 | 378 | ||
diff --git a/libbb/unicode_wcwidth.c b/libbb/unicode_wcwidth.c index a81a98038..7eccc394c 100644 --- a/libbb/unicode_wcwidth.c +++ b/libbb/unicode_wcwidth.c | |||
@@ -90,12 +90,6 @@ | |||
90 | * until Unicode committee assigns something there. | 90 | * until Unicode committee assigns something there. |
91 | */ | 91 | */ |
92 | 92 | ||
93 | #if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR >= 0x30000 | ||
94 | # define LAST_SUPPORTED_WCHAR 0x2ffff | ||
95 | #else | ||
96 | # define LAST_SUPPORTED_WCHAR CONFIG_LAST_SUPPORTED_WCHAR | ||
97 | #endif | ||
98 | |||
99 | #if LAST_SUPPORTED_WCHAR >= 0x300 | 93 | #if LAST_SUPPORTED_WCHAR >= 0x300 |
100 | struct interval { | 94 | struct interval { |
101 | uint16_t first; | 95 | uint16_t first; |