aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2010-01-30 23:16:21 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2010-01-30 23:16:21 +0100
commite17764c8fb566f85020217dd8fd05fb6bc227e98 (patch)
treecf0a42cc23cd4aae92e69924087610a941c712a4
parentecd90fd488cd0c519070656f5cfa0b0959979be9 (diff)
downloadbusybox-w32-e17764c8fb566f85020217dd8fd05fb6bc227e98.tar.gz
busybox-w32-e17764c8fb566f85020217dd8fd05fb6bc227e98.tar.bz2
busybox-w32-e17764c8fb566f85020217dd8fd05fb6bc227e98.zip
further work on unicodization
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--TODO_unicode45
-rw-r--r--coreutils/cal.c2
-rw-r--r--coreutils/df.c17
-rw-r--r--include/unicode.h16
-rw-r--r--libbb/unicode.c77
-rw-r--r--modutils/lsmod.c23
-rw-r--r--networking/udhcp/dumpleases.c7
7 files changed, 146 insertions, 41 deletions
diff --git a/TODO_unicode b/TODO_unicode
new file mode 100644
index 000000000..c29fd933b
--- /dev/null
+++ b/TODO_unicode
@@ -0,0 +1,45 @@
1Already fixed applets:
2cal
3lsmod
4df
5dumpleases
6
7Applets which may need unicode handling (more extensive than sanitizing
8of filenames in error messages):
9
10ls - uses unicode_strlen, not scrlen
11expand, unexpand - uses unicode_strlen, not scrlen
12ash, hush through lineedit - uses unicode_strlen, not scrlen
13top - need to sanitize process args
14ps - need to sanitize process args
15less
16more
17vi
18ed
19cut
20awk
21sed
22tr
23grep egrep fgrep
24fold
25sort
26head, tail
27catv - "display nonprinting chars" - what this could mean for unicode?
28wc
29chat
30dumpkmap
31last - just line up columns
32man
33microcom
34strings
35watch
36
37Unsure, may need fixing:
38
39hostname - do we really want to protect against bad chars in it?
40patch
41addgroup, adduser, delgroup, deluser
42telnet
43telnetd
44od
45printf
diff --git a/coreutils/cal.c b/coreutils/cal.c
index 5ecb9131d..207fa967b 100644
--- a/coreutils/cal.c
+++ b/coreutils/cal.c
@@ -135,7 +135,7 @@ int cal_main(int argc UNUSED_PARAM, char **argv)
135 if (julian) 135 if (julian)
136 *hp++ = ' '; 136 *hp++ = ' ';
137 { 137 {
138 char *two_wchars = unicode_cut_nchars(2, buf); 138 char *two_wchars = unicode_conv_to_printable_fixedwidth(NULL, buf, 2);
139 strcpy(hp, two_wchars); 139 strcpy(hp, two_wchars);
140 free(two_wchars); 140 free(two_wchars);
141 } 141 }
diff --git a/coreutils/df.c b/coreutils/df.c
index ae68f0831..4b23faa7a 100644
--- a/coreutils/df.c
+++ b/coreutils/df.c
@@ -114,9 +114,6 @@ int df_main(int argc UNUSED_PARAM, char **argv)
114 while (1) { 114 while (1) {
115 const char *device; 115 const char *device;
116 const char *mount_point; 116 const char *mount_point;
117#if ENABLE_FEATURE_ASSUME_UNICODE
118 size_t dev_len;
119#endif
120 117
121 if (mount_table) { 118 if (mount_table) {
122 mount_entry = getmntent(mount_table); 119 mount_entry = getmntent(mount_table);
@@ -178,11 +175,15 @@ int df_main(int argc UNUSED_PARAM, char **argv)
178#endif 175#endif
179 176
180#if ENABLE_FEATURE_ASSUME_UNICODE 177#if ENABLE_FEATURE_ASSUME_UNICODE
181 dev_len = unicode_strlen(device); 178 {
182 if (dev_len > 20) { 179 uni_stat_t uni_stat;
183 printf("%s\n%20s", device, ""); 180 char *uni_dev = unicode_conv_to_printable(&uni_stat, device);
184 } else { 181 if (uni_stat.unicode_width > 20) {
185 printf("%s%*s", device, 20 - (int)dev_len, ""); 182 printf("%s\n%20s", uni_dev, "");
183 } else {
184 printf("%s%*s", uni_dev, 20 - (int)uni_stat.unicode_width, "");
185 }
186 free(uni_dev);
186 } 187 }
187#else 188#else
188 if (printf("\n%-20s" + 1, device) > 20) 189 if (printf("\n%-20s" + 1, device) > 20)
diff --git a/include/unicode.h b/include/unicode.h
index f1a252cc7..f32e56599 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -14,15 +14,25 @@ enum {
14#if !ENABLE_FEATURE_ASSUME_UNICODE 14#if !ENABLE_FEATURE_ASSUME_UNICODE
15 15
16# define unicode_strlen(string) strlen(string) 16# define unicode_strlen(string) strlen(string)
17# define unicode_scrlen(string) TODO
18# define unicode_status UNICODE_OFF 17# define unicode_status UNICODE_OFF
19# define init_unicode() ((void)0) 18# define init_unicode() ((void)0)
20 19
21#else 20#else
22 21
23size_t FAST_FUNC unicode_strlen(const char *string); 22size_t FAST_FUNC unicode_strlen(const char *string);
24char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src); 23enum {
25unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src); 24 UNI_FLAG_PAD = (1 << 0),
25};
26typedef struct uni_stat_t {
27 unsigned byte_count;
28 unsigned unicode_count;
29 unsigned unicode_width;
30} uni_stat_t;
31//UNUSED: unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src);
32//UNUSED: char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags);
33char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src);
34char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth);
35char* FAST_FUNC unicode_conv_to_printable_fixedwidth(uni_stat_t *stats, const char *src, unsigned width);
26 36
27# if ENABLE_LOCALE_SUPPORT 37# if ENABLE_LOCALE_SUPPORT
28 38
diff --git a/libbb/unicode.c b/libbb/unicode.c
index 878af84bc..4e7e3a96a 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -246,29 +246,45 @@ size_t FAST_FUNC unicode_strlen(const char *string)
246 return width; 246 return width;
247} 247}
248 248
249char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src) 249static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)
250{ 250{
251 char *dst; 251 char *dst;
252 unsigned dst_len; 252 unsigned dst_len;
253 unsigned uni_count;
254 unsigned uni_width;
253 255
254 if (unicode_status != UNICODE_ON) { 256 if (unicode_status != UNICODE_ON) {
255 char *d = dst = xmalloc(width + 1); 257 char *d;
256 while ((int)--width >= 0) { 258 if (flags & UNI_FLAG_PAD) {
257 unsigned char c = *src; 259 d = dst = xmalloc(width + 1);
258 if (c == '\0') { 260 while ((int)--width >= 0) {
259 do 261 unsigned char c = *src;
260 *d++ = ' '; 262 if (c == '\0') {
261 while ((int)--width >= 0); 263 do
262 break; 264 *d++ = ' ';
265 while ((int)--width >= 0);
266 break;
267 }
268 *d++ = (c >= ' ' && c < 0x7f) ? c : '?';
269 src++;
270 }
271 *d = '\0';
272 } else {
273 d = dst = xstrndup(src, width);
274 while (*d) {
275 unsigned char c = *d;
276 if (c < ' ' || c >= 0x7f)
277 *d = '?';
278 d++;
263 } 279 }
264 *d++ = (c >= ' ' && c < 0x7f) ? c : '?';
265 src++;
266 } 280 }
267 *d = '\0'; 281 if (stats)
282 stats->byte_count = stats->unicode_count = (d - dst);
268 return dst; 283 return dst;
269 } 284 }
270 285
271 dst = NULL; 286 dst = NULL;
287 uni_count = uni_width = 0;
272 dst_len = 0; 288 dst_len = 0;
273 while (1) { 289 while (1) {
274 int w; 290 int w;
@@ -301,7 +317,7 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
301 /* src = NULL: invalid sequence is seen, 317 /* src = NULL: invalid sequence is seen,
302 * else: wc is set, src is advanced to next mb char 318 * else: wc is set, src is advanced to next mb char
303 */ 319 */
304 if (src1) {/* no error */ 320 if (src1) { /* no error */
305 if (wc == 0) /* end-of-string */ 321 if (wc == 0) /* end-of-string */
306 break; 322 break;
307 src = src1; 323 src = src1;
@@ -315,8 +331,8 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
315 goto subst; 331 goto subst;
316 w = wcwidth(wc); 332 w = wcwidth(wc);
317 if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */ 333 if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */
318 || (!ENABLE_UNICODE_COMBINING_WCHARS && wc <= 0) 334 || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)
319 || (!ENABLE_UNICODE_WIDE_WCHARS && wc > 1) 335 || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)
320 ) { 336 ) {
321 subst: 337 subst:
322 wc = CONFIG_SUBST_WCHAR; 338 wc = CONFIG_SUBST_WCHAR;
@@ -331,6 +347,8 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
331 break; 347 break;
332 } 348 }
333 349
350 uni_count++;
351 uni_width += w;
334 dst = xrealloc(dst, dst_len + MB_CUR_MAX); 352 dst = xrealloc(dst, dst_len + MB_CUR_MAX);
335#if ENABLE_LOCALE_SUPPORT 353#if ENABLE_LOCALE_SUPPORT
336 { 354 {
@@ -343,15 +361,37 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
343 } 361 }
344 362
345 /* Pad to remaining width */ 363 /* Pad to remaining width */
346 dst = xrealloc(dst, dst_len + width + 1); 364 if (flags & UNI_FLAG_PAD) {
347 while ((int)--width >= 0) { 365 dst = xrealloc(dst, dst_len + width + 1);
348 dst[dst_len++] = ' '; 366 uni_count += width;
367 uni_width += width;
368 while ((int)--width >= 0) {
369 dst[dst_len++] = ' ';
370 }
349 } 371 }
350 dst[dst_len] = '\0'; 372 dst[dst_len] = '\0';
373 if (stats) {
374 stats->byte_count = dst_len;
375 stats->unicode_count = uni_count;
376 stats->unicode_width = uni_width;
377 }
351 378
352 return dst; 379 return dst;
353} 380}
381char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src)
382{
383 return unicode_conv_to_printable2(stats, src, INT_MAX, 0);
384}
385char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth)
386{
387 return unicode_conv_to_printable2(stats, src, maxwidth, 0);
388}
389char* FAST_FUNC unicode_conv_to_printable_fixedwidth(uni_stat_t *stats, const char *src, unsigned width)
390{
391 return unicode_conv_to_printable2(stats, src, width, UNI_FLAG_PAD);
392}
354 393
394#ifdef UNUSED
355unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src) 395unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
356{ 396{
357 if (unicode_status != UNICODE_ON) { 397 if (unicode_status != UNICODE_ON) {
@@ -382,3 +422,4 @@ unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
382 return 0; 422 return 0;
383 } 423 }
384} 424}
425#endif
diff --git a/modutils/lsmod.c b/modutils/lsmod.c
index cc6b6162f..50621c245 100644
--- a/modutils/lsmod.c
+++ b/modutils/lsmod.c
@@ -46,9 +46,6 @@ int lsmod_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
46#if ENABLE_FEATURE_LSMOD_PRETTY_2_6_OUTPUT 46#if ENABLE_FEATURE_LSMOD_PRETTY_2_6_OUTPUT
47 char *token[4]; 47 char *token[4];
48 parser_t *parser = config_open("/proc/modules"); 48 parser_t *parser = config_open("/proc/modules");
49# if ENABLE_FEATURE_ASSUME_UNICODE
50 size_t name_len;
51# endif
52 init_unicode(); 49 init_unicode();
53 50
54 printf("%-24sSize Used by", "Module"); 51 printf("%-24sSize Used by", "Module");
@@ -64,9 +61,13 @@ int lsmod_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
64 } else 61 } else
65 token[3] = (char *) ""; 62 token[3] = (char *) "";
66# if ENABLE_FEATURE_ASSUME_UNICODE 63# if ENABLE_FEATURE_ASSUME_UNICODE
67 name_len = unicode_strlen(token[0]); 64 {
68 name_len = (name_len > 19) ? 0 : 19 - name_len; 65 uni_stat_t uni_stat;
69 printf("%s%*s %8s %2s %s\n", token[0], name_len, "", token[1], token[2], token[3]); 66 char *uni_name = unicode_conv_to_printable(&uni_stat, token[0]);
67 unsigned pad_len = (uni_stat.unicode_width > 19) ? 0 : 19 - uni_stat.unicode_width;
68 printf("%s%*s %8s %2s %s\n", uni_name, pad_len, "", token[1], token[2], token[3]);
69 free(uni_name);
70 }
70# else 71# else
71 printf("%-19s %8s %2s %s\n", token[0], token[1], token[2], token[3]); 72 printf("%-19s %8s %2s %s\n", token[0], token[1], token[2], token[3]);
72# endif 73# endif
@@ -78,9 +79,13 @@ int lsmod_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
78 // so trimming the trailing char is just what we need! 79 // so trimming the trailing char is just what we need!
79 token[3][strlen(token[3])-1] = '\0'; 80 token[3][strlen(token[3])-1] = '\0';
80# if ENABLE_FEATURE_ASSUME_UNICODE 81# if ENABLE_FEATURE_ASSUME_UNICODE
81 name_len = unicode_strlen(token[0]); 82 {
82 name_len = (name_len > 19) ? 0 : 19 - name_len; 83 uni_stat_t uni_stat;
83 printf("%s%*s %8s %2s %s\n", token[0], name_len, "", token[1], token[2], token[3]); 84 char *uni_name = unicode_conv_to_printable(&uni_stat, token[0]);
85 unsigned pad_len = (uni_stat.unicode_width > 19) ? 0 : 19 - uni_stat.unicode_width;
86 printf("%s%*s %8s %2s %s\n", uni_name, pad_len, "", token[1], token[2], token[3]);
87 free(uni_name);
88 }
84# else 89# else
85 printf("%-19s %8s %2s %s\n", token[0], token[1], token[2], token[3]); 90 printf("%-19s %8s %2s %s\n", token[0], token[1], token[2], token[3]);
86# endif 91# endif
diff --git a/networking/udhcp/dumpleases.c b/networking/udhcp/dumpleases.c
index d8f5da7fb..eab9713f4 100644
--- a/networking/udhcp/dumpleases.c
+++ b/networking/udhcp/dumpleases.c
@@ -71,8 +71,11 @@ int dumpleases_main(int argc UNUSED_PARAM, char **argv)
71 /* actually, 15+1 and 19+1, +1 is a space between columns */ 71 /* actually, 15+1 and 19+1, +1 is a space between columns */
72 /* lease.hostname is char[20] and is always NUL terminated */ 72 /* lease.hostname is char[20] and is always NUL terminated */
73#if ENABLE_FEATURE_ASSUME_UNICODE 73#if ENABLE_FEATURE_ASSUME_UNICODE
74 printf(" %-16s%s%*s", inet_ntoa(addr), lease.hostname, 74 {
75 20 - (int)unicode_strlen(lease.hostname), ""); 75 char *uni_name = unicode_conv_to_printable_fixedwidth(NULL, lease.hostname, 20);
76 printf(" %-16s%s", inet_ntoa(addr), uni_name);
77 free(uni_name);
78 }
76#else 79#else
77 printf(" %-16s%-20s", inet_ntoa(addr), lease.hostname); 80 printf(" %-16s%-20s", inet_ntoa(addr), lease.hostname);
78#endif 81#endif