aboutsummaryrefslogtreecommitdiff
path: root/libbb
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2010-01-30 23:16:21 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2010-01-30 23:16:21 +0100
commite17764c8fb566f85020217dd8fd05fb6bc227e98 (patch)
treecf0a42cc23cd4aae92e69924087610a941c712a4 /libbb
parentecd90fd488cd0c519070656f5cfa0b0959979be9 (diff)
downloadbusybox-w32-e17764c8fb566f85020217dd8fd05fb6bc227e98.tar.gz
busybox-w32-e17764c8fb566f85020217dd8fd05fb6bc227e98.tar.bz2
busybox-w32-e17764c8fb566f85020217dd8fd05fb6bc227e98.zip
further work on unicodization
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb')
-rw-r--r--libbb/unicode.c77
1 files changed, 59 insertions, 18 deletions
diff --git a/libbb/unicode.c b/libbb/unicode.c
index 878af84bc..4e7e3a96a 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -246,29 +246,45 @@ size_t FAST_FUNC unicode_strlen(const char *string)
246 return width; 246 return width;
247} 247}
248 248
249char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src) 249static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)
250{ 250{
251 char *dst; 251 char *dst;
252 unsigned dst_len; 252 unsigned dst_len;
253 unsigned uni_count;
254 unsigned uni_width;
253 255
254 if (unicode_status != UNICODE_ON) { 256 if (unicode_status != UNICODE_ON) {
255 char *d = dst = xmalloc(width + 1); 257 char *d;
256 while ((int)--width >= 0) { 258 if (flags & UNI_FLAG_PAD) {
257 unsigned char c = *src; 259 d = dst = xmalloc(width + 1);
258 if (c == '\0') { 260 while ((int)--width >= 0) {
259 do 261 unsigned char c = *src;
260 *d++ = ' '; 262 if (c == '\0') {
261 while ((int)--width >= 0); 263 do
262 break; 264 *d++ = ' ';
265 while ((int)--width >= 0);
266 break;
267 }
268 *d++ = (c >= ' ' && c < 0x7f) ? c : '?';
269 src++;
270 }
271 *d = '\0';
272 } else {
273 d = dst = xstrndup(src, width);
274 while (*d) {
275 unsigned char c = *d;
276 if (c < ' ' || c >= 0x7f)
277 *d = '?';
278 d++;
263 } 279 }
264 *d++ = (c >= ' ' && c < 0x7f) ? c : '?';
265 src++;
266 } 280 }
267 *d = '\0'; 281 if (stats)
282 stats->byte_count = stats->unicode_count = (d - dst);
268 return dst; 283 return dst;
269 } 284 }
270 285
271 dst = NULL; 286 dst = NULL;
287 uni_count = uni_width = 0;
272 dst_len = 0; 288 dst_len = 0;
273 while (1) { 289 while (1) {
274 int w; 290 int w;
@@ -301,7 +317,7 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
301 /* src = NULL: invalid sequence is seen, 317 /* src = NULL: invalid sequence is seen,
302 * else: wc is set, src is advanced to next mb char 318 * else: wc is set, src is advanced to next mb char
303 */ 319 */
304 if (src1) {/* no error */ 320 if (src1) { /* no error */
305 if (wc == 0) /* end-of-string */ 321 if (wc == 0) /* end-of-string */
306 break; 322 break;
307 src = src1; 323 src = src1;
@@ -315,8 +331,8 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
315 goto subst; 331 goto subst;
316 w = wcwidth(wc); 332 w = wcwidth(wc);
317 if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */ 333 if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */
318 || (!ENABLE_UNICODE_COMBINING_WCHARS && wc <= 0) 334 || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)
319 || (!ENABLE_UNICODE_WIDE_WCHARS && wc > 1) 335 || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)
320 ) { 336 ) {
321 subst: 337 subst:
322 wc = CONFIG_SUBST_WCHAR; 338 wc = CONFIG_SUBST_WCHAR;
@@ -331,6 +347,8 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
331 break; 347 break;
332 } 348 }
333 349
350 uni_count++;
351 uni_width += w;
334 dst = xrealloc(dst, dst_len + MB_CUR_MAX); 352 dst = xrealloc(dst, dst_len + MB_CUR_MAX);
335#if ENABLE_LOCALE_SUPPORT 353#if ENABLE_LOCALE_SUPPORT
336 { 354 {
@@ -343,15 +361,37 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
343 } 361 }
344 362
345 /* Pad to remaining width */ 363 /* Pad to remaining width */
346 dst = xrealloc(dst, dst_len + width + 1); 364 if (flags & UNI_FLAG_PAD) {
347 while ((int)--width >= 0) { 365 dst = xrealloc(dst, dst_len + width + 1);
348 dst[dst_len++] = ' '; 366 uni_count += width;
367 uni_width += width;
368 while ((int)--width >= 0) {
369 dst[dst_len++] = ' ';
370 }
349 } 371 }
350 dst[dst_len] = '\0'; 372 dst[dst_len] = '\0';
373 if (stats) {
374 stats->byte_count = dst_len;
375 stats->unicode_count = uni_count;
376 stats->unicode_width = uni_width;
377 }
351 378
352 return dst; 379 return dst;
353} 380}
381char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src)
382{
383 return unicode_conv_to_printable2(stats, src, INT_MAX, 0);
384}
385char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth)
386{
387 return unicode_conv_to_printable2(stats, src, maxwidth, 0);
388}
389char* FAST_FUNC unicode_conv_to_printable_fixedwidth(uni_stat_t *stats, const char *src, unsigned width)
390{
391 return unicode_conv_to_printable2(stats, src, width, UNI_FLAG_PAD);
392}
354 393
394#ifdef UNUSED
355unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src) 395unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
356{ 396{
357 if (unicode_status != UNICODE_ON) { 397 if (unicode_status != UNICODE_ON) {
@@ -382,3 +422,4 @@ unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
382 return 0; 422 return 0;
383 } 423 }
384} 424}
425#endif