diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2010-01-30 23:16:21 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2010-01-30 23:16:21 +0100 |
commit | e17764c8fb566f85020217dd8fd05fb6bc227e98 (patch) | |
tree | cf0a42cc23cd4aae92e69924087610a941c712a4 | |
parent | ecd90fd488cd0c519070656f5cfa0b0959979be9 (diff) | |
download | busybox-w32-e17764c8fb566f85020217dd8fd05fb6bc227e98.tar.gz busybox-w32-e17764c8fb566f85020217dd8fd05fb6bc227e98.tar.bz2 busybox-w32-e17764c8fb566f85020217dd8fd05fb6bc227e98.zip |
further work on unicodization
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | TODO_unicode | 45 | ||||
-rw-r--r-- | coreutils/cal.c | 2 | ||||
-rw-r--r-- | coreutils/df.c | 17 | ||||
-rw-r--r-- | include/unicode.h | 16 | ||||
-rw-r--r-- | libbb/unicode.c | 77 | ||||
-rw-r--r-- | modutils/lsmod.c | 23 | ||||
-rw-r--r-- | networking/udhcp/dumpleases.c | 7 |
7 files changed, 146 insertions, 41 deletions
diff --git a/TODO_unicode b/TODO_unicode new file mode 100644 index 000000000..c29fd933b --- /dev/null +++ b/TODO_unicode | |||
@@ -0,0 +1,45 @@ | |||
1 | Already fixed applets: | ||
2 | cal | ||
3 | lsmod | ||
4 | df | ||
5 | dumpleases | ||
6 | |||
7 | Applets which may need unicode handling (more extensive than sanitizing | ||
8 | of filenames in error messages): | ||
9 | |||
10 | ls - uses unicode_strlen, not scrlen | ||
11 | expand, unexpand - uses unicode_strlen, not scrlen | ||
12 | ash, hush through lineedit - uses unicode_strlen, not scrlen | ||
13 | top - need to sanitize process args | ||
14 | ps - need to sanitize process args | ||
15 | less | ||
16 | more | ||
17 | vi | ||
18 | ed | ||
19 | cut | ||
20 | awk | ||
21 | sed | ||
22 | tr | ||
23 | grep egrep fgrep | ||
24 | fold | ||
25 | sort | ||
26 | head, tail | ||
27 | catv - "display nonprinting chars" - what this could mean for unicode? | ||
28 | wc | ||
29 | chat | ||
30 | dumpkmap | ||
31 | last - just line up columns | ||
32 | man | ||
33 | microcom | ||
34 | strings | ||
35 | watch | ||
36 | |||
37 | Unsure, may need fixing: | ||
38 | |||
39 | hostname - do we really want to protect against bad chars in it? | ||
40 | patch | ||
41 | addgroup, adduser, delgroup, deluser | ||
42 | telnet | ||
43 | telnetd | ||
44 | od | ||
45 | printf | ||
diff --git a/coreutils/cal.c b/coreutils/cal.c index 5ecb9131d..207fa967b 100644 --- a/coreutils/cal.c +++ b/coreutils/cal.c | |||
@@ -135,7 +135,7 @@ int cal_main(int argc UNUSED_PARAM, char **argv) | |||
135 | if (julian) | 135 | if (julian) |
136 | *hp++ = ' '; | 136 | *hp++ = ' '; |
137 | { | 137 | { |
138 | char *two_wchars = unicode_cut_nchars(2, buf); | 138 | char *two_wchars = unicode_conv_to_printable_fixedwidth(NULL, buf, 2); |
139 | strcpy(hp, two_wchars); | 139 | strcpy(hp, two_wchars); |
140 | free(two_wchars); | 140 | free(two_wchars); |
141 | } | 141 | } |
diff --git a/coreutils/df.c b/coreutils/df.c index ae68f0831..4b23faa7a 100644 --- a/coreutils/df.c +++ b/coreutils/df.c | |||
@@ -114,9 +114,6 @@ int df_main(int argc UNUSED_PARAM, char **argv) | |||
114 | while (1) { | 114 | while (1) { |
115 | const char *device; | 115 | const char *device; |
116 | const char *mount_point; | 116 | const char *mount_point; |
117 | #if ENABLE_FEATURE_ASSUME_UNICODE | ||
118 | size_t dev_len; | ||
119 | #endif | ||
120 | 117 | ||
121 | if (mount_table) { | 118 | if (mount_table) { |
122 | mount_entry = getmntent(mount_table); | 119 | mount_entry = getmntent(mount_table); |
@@ -178,11 +175,15 @@ int df_main(int argc UNUSED_PARAM, char **argv) | |||
178 | #endif | 175 | #endif |
179 | 176 | ||
180 | #if ENABLE_FEATURE_ASSUME_UNICODE | 177 | #if ENABLE_FEATURE_ASSUME_UNICODE |
181 | dev_len = unicode_strlen(device); | 178 | { |
182 | if (dev_len > 20) { | 179 | uni_stat_t uni_stat; |
183 | printf("%s\n%20s", device, ""); | 180 | char *uni_dev = unicode_conv_to_printable(&uni_stat, device); |
184 | } else { | 181 | if (uni_stat.unicode_width > 20) { |
185 | printf("%s%*s", device, 20 - (int)dev_len, ""); | 182 | printf("%s\n%20s", uni_dev, ""); |
183 | } else { | ||
184 | printf("%s%*s", uni_dev, 20 - (int)uni_stat.unicode_width, ""); | ||
185 | } | ||
186 | free(uni_dev); | ||
186 | } | 187 | } |
187 | #else | 188 | #else |
188 | if (printf("\n%-20s" + 1, device) > 20) | 189 | if (printf("\n%-20s" + 1, device) > 20) |
diff --git a/include/unicode.h b/include/unicode.h index f1a252cc7..f32e56599 100644 --- a/include/unicode.h +++ b/include/unicode.h | |||
@@ -14,15 +14,25 @@ enum { | |||
14 | #if !ENABLE_FEATURE_ASSUME_UNICODE | 14 | #if !ENABLE_FEATURE_ASSUME_UNICODE |
15 | 15 | ||
16 | # define unicode_strlen(string) strlen(string) | 16 | # define unicode_strlen(string) strlen(string) |
17 | # define unicode_scrlen(string) TODO | ||
18 | # define unicode_status UNICODE_OFF | 17 | # define unicode_status UNICODE_OFF |
19 | # define init_unicode() ((void)0) | 18 | # define init_unicode() ((void)0) |
20 | 19 | ||
21 | #else | 20 | #else |
22 | 21 | ||
23 | size_t FAST_FUNC unicode_strlen(const char *string); | 22 | size_t FAST_FUNC unicode_strlen(const char *string); |
24 | char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src); | 23 | enum { |
25 | unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src); | 24 | UNI_FLAG_PAD = (1 << 0), |
25 | }; | ||
26 | typedef struct uni_stat_t { | ||
27 | unsigned byte_count; | ||
28 | unsigned unicode_count; | ||
29 | unsigned unicode_width; | ||
30 | } uni_stat_t; | ||
31 | //UNUSED: unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src); | ||
32 | //UNUSED: char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags); | ||
33 | char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src); | ||
34 | char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth); | ||
35 | char* FAST_FUNC unicode_conv_to_printable_fixedwidth(uni_stat_t *stats, const char *src, unsigned width); | ||
26 | 36 | ||
27 | # if ENABLE_LOCALE_SUPPORT | 37 | # if ENABLE_LOCALE_SUPPORT |
28 | 38 | ||
diff --git a/libbb/unicode.c b/libbb/unicode.c index 878af84bc..4e7e3a96a 100644 --- a/libbb/unicode.c +++ b/libbb/unicode.c | |||
@@ -246,29 +246,45 @@ size_t FAST_FUNC unicode_strlen(const char *string) | |||
246 | return width; | 246 | return width; |
247 | } | 247 | } |
248 | 248 | ||
249 | char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src) | 249 | static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags) |
250 | { | 250 | { |
251 | char *dst; | 251 | char *dst; |
252 | unsigned dst_len; | 252 | unsigned dst_len; |
253 | unsigned uni_count; | ||
254 | unsigned uni_width; | ||
253 | 255 | ||
254 | if (unicode_status != UNICODE_ON) { | 256 | if (unicode_status != UNICODE_ON) { |
255 | char *d = dst = xmalloc(width + 1); | 257 | char *d; |
256 | while ((int)--width >= 0) { | 258 | if (flags & UNI_FLAG_PAD) { |
257 | unsigned char c = *src; | 259 | d = dst = xmalloc(width + 1); |
258 | if (c == '\0') { | 260 | while ((int)--width >= 0) { |
259 | do | 261 | unsigned char c = *src; |
260 | *d++ = ' '; | 262 | if (c == '\0') { |
261 | while ((int)--width >= 0); | 263 | do |
262 | break; | 264 | *d++ = ' '; |
265 | while ((int)--width >= 0); | ||
266 | break; | ||
267 | } | ||
268 | *d++ = (c >= ' ' && c < 0x7f) ? c : '?'; | ||
269 | src++; | ||
270 | } | ||
271 | *d = '\0'; | ||
272 | } else { | ||
273 | d = dst = xstrndup(src, width); | ||
274 | while (*d) { | ||
275 | unsigned char c = *d; | ||
276 | if (c < ' ' || c >= 0x7f) | ||
277 | *d = '?'; | ||
278 | d++; | ||
263 | } | 279 | } |
264 | *d++ = (c >= ' ' && c < 0x7f) ? c : '?'; | ||
265 | src++; | ||
266 | } | 280 | } |
267 | *d = '\0'; | 281 | if (stats) |
282 | stats->byte_count = stats->unicode_count = (d - dst); | ||
268 | return dst; | 283 | return dst; |
269 | } | 284 | } |
270 | 285 | ||
271 | dst = NULL; | 286 | dst = NULL; |
287 | uni_count = uni_width = 0; | ||
272 | dst_len = 0; | 288 | dst_len = 0; |
273 | while (1) { | 289 | while (1) { |
274 | int w; | 290 | int w; |
@@ -301,7 +317,7 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src) | |||
301 | /* src = NULL: invalid sequence is seen, | 317 | /* src = NULL: invalid sequence is seen, |
302 | * else: wc is set, src is advanced to next mb char | 318 | * else: wc is set, src is advanced to next mb char |
303 | */ | 319 | */ |
304 | if (src1) {/* no error */ | 320 | if (src1) { /* no error */ |
305 | if (wc == 0) /* end-of-string */ | 321 | if (wc == 0) /* end-of-string */ |
306 | break; | 322 | break; |
307 | src = src1; | 323 | src = src1; |
@@ -315,8 +331,8 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src) | |||
315 | goto subst; | 331 | goto subst; |
316 | w = wcwidth(wc); | 332 | w = wcwidth(wc); |
317 | if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */ | 333 | if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */ |
318 | || (!ENABLE_UNICODE_COMBINING_WCHARS && wc <= 0) | 334 | || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0) |
319 | || (!ENABLE_UNICODE_WIDE_WCHARS && wc > 1) | 335 | || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1) |
320 | ) { | 336 | ) { |
321 | subst: | 337 | subst: |
322 | wc = CONFIG_SUBST_WCHAR; | 338 | wc = CONFIG_SUBST_WCHAR; |
@@ -331,6 +347,8 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src) | |||
331 | break; | 347 | break; |
332 | } | 348 | } |
333 | 349 | ||
350 | uni_count++; | ||
351 | uni_width += w; | ||
334 | dst = xrealloc(dst, dst_len + MB_CUR_MAX); | 352 | dst = xrealloc(dst, dst_len + MB_CUR_MAX); |
335 | #if ENABLE_LOCALE_SUPPORT | 353 | #if ENABLE_LOCALE_SUPPORT |
336 | { | 354 | { |
@@ -343,15 +361,37 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src) | |||
343 | } | 361 | } |
344 | 362 | ||
345 | /* Pad to remaining width */ | 363 | /* Pad to remaining width */ |
346 | dst = xrealloc(dst, dst_len + width + 1); | 364 | if (flags & UNI_FLAG_PAD) { |
347 | while ((int)--width >= 0) { | 365 | dst = xrealloc(dst, dst_len + width + 1); |
348 | dst[dst_len++] = ' '; | 366 | uni_count += width; |
367 | uni_width += width; | ||
368 | while ((int)--width >= 0) { | ||
369 | dst[dst_len++] = ' '; | ||
370 | } | ||
349 | } | 371 | } |
350 | dst[dst_len] = '\0'; | 372 | dst[dst_len] = '\0'; |
373 | if (stats) { | ||
374 | stats->byte_count = dst_len; | ||
375 | stats->unicode_count = uni_count; | ||
376 | stats->unicode_width = uni_width; | ||
377 | } | ||
351 | 378 | ||
352 | return dst; | 379 | return dst; |
353 | } | 380 | } |
381 | char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src) | ||
382 | { | ||
383 | return unicode_conv_to_printable2(stats, src, INT_MAX, 0); | ||
384 | } | ||
385 | char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth) | ||
386 | { | ||
387 | return unicode_conv_to_printable2(stats, src, maxwidth, 0); | ||
388 | } | ||
389 | char* FAST_FUNC unicode_conv_to_printable_fixedwidth(uni_stat_t *stats, const char *src, unsigned width) | ||
390 | { | ||
391 | return unicode_conv_to_printable2(stats, src, width, UNI_FLAG_PAD); | ||
392 | } | ||
354 | 393 | ||
394 | #ifdef UNUSED | ||
355 | unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src) | 395 | unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src) |
356 | { | 396 | { |
357 | if (unicode_status != UNICODE_ON) { | 397 | if (unicode_status != UNICODE_ON) { |
@@ -382,3 +422,4 @@ unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src) | |||
382 | return 0; | 422 | return 0; |
383 | } | 423 | } |
384 | } | 424 | } |
425 | #endif | ||
diff --git a/modutils/lsmod.c b/modutils/lsmod.c index cc6b6162f..50621c245 100644 --- a/modutils/lsmod.c +++ b/modutils/lsmod.c | |||
@@ -46,9 +46,6 @@ int lsmod_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM) | |||
46 | #if ENABLE_FEATURE_LSMOD_PRETTY_2_6_OUTPUT | 46 | #if ENABLE_FEATURE_LSMOD_PRETTY_2_6_OUTPUT |
47 | char *token[4]; | 47 | char *token[4]; |
48 | parser_t *parser = config_open("/proc/modules"); | 48 | parser_t *parser = config_open("/proc/modules"); |
49 | # if ENABLE_FEATURE_ASSUME_UNICODE | ||
50 | size_t name_len; | ||
51 | # endif | ||
52 | init_unicode(); | 49 | init_unicode(); |
53 | 50 | ||
54 | printf("%-24sSize Used by", "Module"); | 51 | printf("%-24sSize Used by", "Module"); |
@@ -64,9 +61,13 @@ int lsmod_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM) | |||
64 | } else | 61 | } else |
65 | token[3] = (char *) ""; | 62 | token[3] = (char *) ""; |
66 | # if ENABLE_FEATURE_ASSUME_UNICODE | 63 | # if ENABLE_FEATURE_ASSUME_UNICODE |
67 | name_len = unicode_strlen(token[0]); | 64 | { |
68 | name_len = (name_len > 19) ? 0 : 19 - name_len; | 65 | uni_stat_t uni_stat; |
69 | printf("%s%*s %8s %2s %s\n", token[0], name_len, "", token[1], token[2], token[3]); | 66 | char *uni_name = unicode_conv_to_printable(&uni_stat, token[0]); |
67 | unsigned pad_len = (uni_stat.unicode_width > 19) ? 0 : 19 - uni_stat.unicode_width; | ||
68 | printf("%s%*s %8s %2s %s\n", uni_name, pad_len, "", token[1], token[2], token[3]); | ||
69 | free(uni_name); | ||
70 | } | ||
70 | # else | 71 | # else |
71 | printf("%-19s %8s %2s %s\n", token[0], token[1], token[2], token[3]); | 72 | printf("%-19s %8s %2s %s\n", token[0], token[1], token[2], token[3]); |
72 | # endif | 73 | # endif |
@@ -78,9 +79,13 @@ int lsmod_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM) | |||
78 | // so trimming the trailing char is just what we need! | 79 | // so trimming the trailing char is just what we need! |
79 | token[3][strlen(token[3])-1] = '\0'; | 80 | token[3][strlen(token[3])-1] = '\0'; |
80 | # if ENABLE_FEATURE_ASSUME_UNICODE | 81 | # if ENABLE_FEATURE_ASSUME_UNICODE |
81 | name_len = unicode_strlen(token[0]); | 82 | { |
82 | name_len = (name_len > 19) ? 0 : 19 - name_len; | 83 | uni_stat_t uni_stat; |
83 | printf("%s%*s %8s %2s %s\n", token[0], name_len, "", token[1], token[2], token[3]); | 84 | char *uni_name = unicode_conv_to_printable(&uni_stat, token[0]); |
85 | unsigned pad_len = (uni_stat.unicode_width > 19) ? 0 : 19 - uni_stat.unicode_width; | ||
86 | printf("%s%*s %8s %2s %s\n", uni_name, pad_len, "", token[1], token[2], token[3]); | ||
87 | free(uni_name); | ||
88 | } | ||
84 | # else | 89 | # else |
85 | printf("%-19s %8s %2s %s\n", token[0], token[1], token[2], token[3]); | 90 | printf("%-19s %8s %2s %s\n", token[0], token[1], token[2], token[3]); |
86 | # endif | 91 | # endif |
diff --git a/networking/udhcp/dumpleases.c b/networking/udhcp/dumpleases.c index d8f5da7fb..eab9713f4 100644 --- a/networking/udhcp/dumpleases.c +++ b/networking/udhcp/dumpleases.c | |||
@@ -71,8 +71,11 @@ int dumpleases_main(int argc UNUSED_PARAM, char **argv) | |||
71 | /* actually, 15+1 and 19+1, +1 is a space between columns */ | 71 | /* actually, 15+1 and 19+1, +1 is a space between columns */ |
72 | /* lease.hostname is char[20] and is always NUL terminated */ | 72 | /* lease.hostname is char[20] and is always NUL terminated */ |
73 | #if ENABLE_FEATURE_ASSUME_UNICODE | 73 | #if ENABLE_FEATURE_ASSUME_UNICODE |
74 | printf(" %-16s%s%*s", inet_ntoa(addr), lease.hostname, | 74 | { |
75 | 20 - (int)unicode_strlen(lease.hostname), ""); | 75 | char *uni_name = unicode_conv_to_printable_fixedwidth(NULL, lease.hostname, 20); |
76 | printf(" %-16s%s", inet_ntoa(addr), uni_name); | ||
77 | free(uni_name); | ||
78 | } | ||
76 | #else | 79 | #else |
77 | printf(" %-16s%-20s", inet_ntoa(addr), lease.hostname); | 80 | printf(" %-16s%-20s", inet_ntoa(addr), lease.hostname); |
78 | #endif | 81 | #endif |