diff options
Diffstat (limited to 'libbb/unicode.c')
-rw-r--r-- | libbb/unicode.c | 86 |
1 files changed, 70 insertions, 16 deletions
diff --git a/libbb/unicode.c b/libbb/unicode.c index 39b173e9c..878af84bc 100644 --- a/libbb/unicode.c +++ b/libbb/unicode.c | |||
@@ -216,8 +216,6 @@ size_t FAST_FUNC mbstowcs(wchar_t *dest, const char *src, size_t n) | |||
216 | return org_n - n; | 216 | return org_n - n; |
217 | } | 217 | } |
218 | 218 | ||
219 | #include "unicode_wcwidth.c" | ||
220 | |||
221 | int FAST_FUNC iswspace(wint_t wc) | 219 | int FAST_FUNC iswspace(wint_t wc) |
222 | { | 220 | { |
223 | return (unsigned)wc <= 0x7f && isspace(wc); | 221 | return (unsigned)wc <= 0x7f && isspace(wc); |
@@ -233,6 +231,8 @@ int FAST_FUNC iswpunct(wint_t wc) | |||
233 | return (unsigned)wc <= 0x7f && ispunct(wc); | 231 | return (unsigned)wc <= 0x7f && ispunct(wc); |
234 | } | 232 | } |
235 | 233 | ||
234 | #include "unicode_wcwidth.c" | ||
235 | |||
236 | #endif /* Homegrown Unicode support */ | 236 | #endif /* Homegrown Unicode support */ |
237 | 237 | ||
238 | 238 | ||
@@ -251,8 +251,22 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src) | |||
251 | char *dst; | 251 | char *dst; |
252 | unsigned dst_len; | 252 | unsigned dst_len; |
253 | 253 | ||
254 | if (unicode_status != UNICODE_ON) | 254 | if (unicode_status != UNICODE_ON) { |
255 | return xasprintf("%-*.*s", width, width, src); | 255 | char *d = dst = xmalloc(width + 1); |
256 | while ((int)--width >= 0) { | ||
257 | unsigned char c = *src; | ||
258 | if (c == '\0') { | ||
259 | do | ||
260 | *d++ = ' '; | ||
261 | while ((int)--width >= 0); | ||
262 | break; | ||
263 | } | ||
264 | *d++ = (c >= ' ' && c < 0x7f) ? c : '?'; | ||
265 | src++; | ||
266 | } | ||
267 | *d = '\0'; | ||
268 | return dst; | ||
269 | } | ||
256 | 270 | ||
257 | dst = NULL; | 271 | dst = NULL; |
258 | dst_len = 0; | 272 | dst_len = 0; |
@@ -260,31 +274,64 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src) | |||
260 | int w; | 274 | int w; |
261 | wchar_t wc; | 275 | wchar_t wc; |
262 | 276 | ||
263 | dst = xrealloc(dst, dst_len + 2 * MB_CUR_MAX); | ||
264 | #if ENABLE_LOCALE_SUPPORT | 277 | #if ENABLE_LOCALE_SUPPORT |
265 | { | 278 | { |
266 | mbstate_t mbst = { 0 }; | 279 | mbstate_t mbst = { 0 }; |
267 | ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst); | 280 | ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst); |
268 | if (rc <= 0) /* error, or end-of-string */ | 281 | /* If invalid sequence is seen: -1 is returned, |
282 | * src points to the invalid sequence, errno = EILSEQ. | ||
283 | * Else number of wchars (excluding terminating L'\0') | ||
284 | * written to dest is returned. | ||
285 | * If len (here: 1) non-L'\0' wchars stored at dest, | ||
286 | * src points to the next char to be converted. | ||
287 | * If string is completely converted: src = NULL. | ||
288 | */ | ||
289 | if (rc == 0) /* end-of-string */ | ||
269 | break; | 290 | break; |
291 | if (rc < 0) { /* error */ | ||
292 | src++; | ||
293 | goto subst; | ||
294 | } | ||
295 | if (!iswprint(wc)) | ||
296 | goto subst; | ||
270 | } | 297 | } |
271 | #else | 298 | #else |
272 | src = mbstowc_internal(&wc, src); | 299 | { |
273 | if (!src || wc == 0) /* error, or end-of-string */ | 300 | const char *src1 = mbstowc_internal(&wc, src); |
274 | break; | 301 | /* src = NULL: invalid sequence is seen, |
302 | * else: wc is set, src is advanced to next mb char | ||
303 | */ | ||
304 | if (src1) {/* no error */ | ||
305 | if (wc == 0) /* end-of-string */ | ||
306 | break; | ||
307 | src = src1; | ||
308 | } else { /* error */ | ||
309 | src++; | ||
310 | goto subst; | ||
311 | } | ||
312 | } | ||
275 | #endif | 313 | #endif |
314 | if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR) | ||
315 | goto subst; | ||
276 | w = wcwidth(wc); | 316 | w = wcwidth(wc); |
277 | if (w < 0) /* non-printable wchar */ | 317 | if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */ |
278 | break; | 318 | || (!ENABLE_UNICODE_COMBINING_WCHARS && wc <= 0) |
319 | || (!ENABLE_UNICODE_WIDE_WCHARS && wc > 1) | ||
320 | ) { | ||
321 | subst: | ||
322 | wc = CONFIG_SUBST_WCHAR; | ||
323 | w = 1; | ||
324 | } | ||
279 | width -= w; | 325 | width -= w; |
280 | if ((int)width < 0) { /* string is longer than width */ | 326 | /* Note: if width == 0, we still may add more chars, |
327 | * they may be zero-width or combining ones */ | ||
328 | if ((int)width < 0) { | ||
329 | /* can't add this wc, string would become longer than width */ | ||
281 | width += w; | 330 | width += w; |
282 | while (width) { | ||
283 | dst[dst_len++] = ' '; | ||
284 | width--; | ||
285 | } | ||
286 | break; | 331 | break; |
287 | } | 332 | } |
333 | |||
334 | dst = xrealloc(dst, dst_len + MB_CUR_MAX); | ||
288 | #if ENABLE_LOCALE_SUPPORT | 335 | #if ENABLE_LOCALE_SUPPORT |
289 | { | 336 | { |
290 | mbstate_t mbst = { 0 }; | 337 | mbstate_t mbst = { 0 }; |
@@ -294,7 +341,14 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src) | |||
294 | dst_len += wcrtomb_internal(&dst[dst_len], wc); | 341 | dst_len += wcrtomb_internal(&dst[dst_len], wc); |
295 | #endif | 342 | #endif |
296 | } | 343 | } |
344 | |||
345 | /* Pad to remaining width */ | ||
346 | dst = xrealloc(dst, dst_len + width + 1); | ||
347 | while ((int)--width >= 0) { | ||
348 | dst[dst_len++] = ' '; | ||
349 | } | ||
297 | dst[dst_len] = '\0'; | 350 | dst[dst_len] = '\0'; |
351 | |||
298 | return dst; | 352 | return dst; |
299 | } | 353 | } |
300 | 354 | ||