diff options
| author | Denys Vlasenko <vda.linux@googlemail.com> | 2010-01-31 16:34:37 +0100 |
|---|---|---|
| committer | Denys Vlasenko <vda.linux@googlemail.com> | 2010-01-31 16:34:37 +0100 |
| commit | b1edf20f1848cd741e8a8395afb4a4655a210906 (patch) | |
| tree | ff6f99354d507ae1bb3bcf29ca99e1626cad0733 | |
| parent | 40e4e88a28398c49d326b0fdf0d7f100f08b8f8d (diff) | |
| download | busybox-w32-b1edf20f1848cd741e8a8395afb4a4655a210906.tar.gz busybox-w32-b1edf20f1848cd741e8a8395afb4a4655a210906.tar.bz2 busybox-w32-b1edf20f1848cd741e8a8395afb4a4655a210906.zip | |
unicode: exclude FDD0..FDEF range too
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
| -rw-r--r-- | libbb/unicode_wcwidth.c | 96 | ||||
| -rwxr-xr-x | testsuite/ls.tests | 8 |
2 files changed, 52 insertions, 52 deletions
diff --git a/libbb/unicode_wcwidth.c b/libbb/unicode_wcwidth.c index 410c741ac..c7cc524a6 100644 --- a/libbb/unicode_wcwidth.c +++ b/libbb/unicode_wcwidth.c | |||
| @@ -90,13 +90,13 @@ | |||
| 90 | * until Unicode committee assigns something there. | 90 | * until Unicode committee assigns something there. |
| 91 | */ | 91 | */ |
| 92 | 92 | ||
| 93 | #if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR > 0x30000 | 93 | #if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR >= 0x30000 |
| 94 | # define LAST_SUPPORTED_WCHAR 0x30000 | 94 | # define LAST_SUPPORTED_WCHAR 0x2ffff |
| 95 | #else | 95 | #else |
| 96 | # define LAST_SUPPORTED_WCHAR CONFIG_LAST_SUPPORTED_WCHAR | 96 | # define LAST_SUPPORTED_WCHAR CONFIG_LAST_SUPPORTED_WCHAR |
| 97 | #endif | 97 | #endif |
| 98 | 98 | ||
| 99 | #if LAST_SUPPORTED_WCHAR >= 0x0300 | 99 | #if LAST_SUPPORTED_WCHAR >= 0x300 |
| 100 | struct interval { | 100 | struct interval { |
| 101 | uint16_t first; | 101 | uint16_t first; |
| 102 | uint16_t last; | 102 | uint16_t last; |
| @@ -185,7 +185,7 @@ static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max) | |||
| 185 | */ | 185 | */ |
| 186 | static int wcwidth(unsigned ucs) | 186 | static int wcwidth(unsigned ucs) |
| 187 | { | 187 | { |
| 188 | #if LAST_SUPPORTED_WCHAR >= 0x0300 | 188 | #if LAST_SUPPORTED_WCHAR >= 0x300 |
| 189 | /* sorted list of non-overlapping intervals of non-spacing characters */ | 189 | /* sorted list of non-overlapping intervals of non-spacing characters */ |
| 190 | /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ | 190 | /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ |
| 191 | static const struct interval combining[] = { | 191 | static const struct interval combining[] = { |
| @@ -460,75 +460,75 @@ static int wcwidth(unsigned ucs) | |||
| 460 | #undef BIG_ | 460 | #undef BIG_ |
| 461 | #undef PAIR | 461 | #undef PAIR |
| 462 | }; | 462 | }; |
| 463 | # if LAST_SUPPORTED_WCHAR >= 0x10000 | ||
| 464 | /* Combining chars in Supplementary Multilingual Plane 0x1xxxx */ | ||
| 465 | static const struct interval combining0x10000[] = { | ||
| 466 | { 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F }, | ||
| 467 | { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 }, | ||
| 468 | { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD }, | ||
| 469 | { 0xD242, 0xD244 } | ||
| 470 | }; | ||
| 471 | # endif | ||
| 472 | #endif | 463 | #endif |
| 473 | 464 | ||
| 474 | if (ucs == 0) | 465 | if (ucs == 0) |
| 475 | return 0; | 466 | return 0; |
| 476 | /* test for 8-bit control characters (00-1f, 80-9f, 7f) */ | 467 | |
| 468 | /* Test for 8-bit control characters (00-1f, 80-9f, 7f) */ | ||
| 477 | if ((ucs & ~0x80) < 0x20 || ucs == 0x7f) | 469 | if ((ucs & ~0x80) < 0x20 || ucs == 0x7f) |
| 478 | return -1; | 470 | return -1; |
| 479 | if (ucs < 0x0300) /* optimization */ | 471 | /* Quick abort if it is an obviously invalid char */ |
| 472 | if (ucs > LAST_SUPPORTED_WCHAR) | ||
| 473 | return -1; | ||
| 474 | |||
| 475 | /* Optimization: no combining chars below 0x300 */ | ||
| 476 | if (LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300) | ||
| 480 | return 1; | 477 | return 1; |
| 481 | 478 | ||
| 482 | #if LAST_SUPPORTED_WCHAR < 0x0300 | 479 | #if LAST_SUPPORTED_WCHAR >= 0x300 |
| 483 | return -1; | 480 | /* Binary search in table of non-spacing characters */ |
| 484 | #else | ||
| 485 | /* binary search in table of non-spacing characters */ | ||
| 486 | if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1)) | 481 | if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1)) |
| 487 | return 0; | 482 | return 0; |
| 488 | if (in_uint16_table(ucs, combining1, ARRAY_SIZE(combining1) - 1)) | 483 | if (in_uint16_table(ucs, combining1, ARRAY_SIZE(combining1) - 1)) |
| 489 | return 0; | 484 | return 0; |
| 490 | 485 | ||
| 491 | if (ucs < 0x1100) /* optimization */ | 486 | /* Optimization: all chars below 0x1100 are not double-width */ |
| 487 | if (LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100) | ||
| 492 | return 1; | 488 | return 1; |
| 493 | 489 | ||
| 494 | # if LAST_SUPPORTED_WCHAR < 0x1100 | 490 | # if LAST_SUPPORTED_WCHAR >= 0x1100 |
| 495 | return -1; | 491 | /* Invalid code points: */ |
| 496 | # else | 492 | /* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */ |
| 497 | if (ucs >= LAST_SUPPORTED_WCHAR) | 493 | /* Private Use Area (e000..f8ff) */ |
| 498 | return -1; | 494 | /* Noncharacters fdd0..fdef */ |
| 499 | 495 | if ((LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff) | |
| 500 | /* High (d800..dbff) and low (dc00..dfff) surrogates are invalid (used only by UTF16) */ | 496 | || (LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef) |
| 501 | /* We also exclude Private Use Area (e000..f8ff) */ | ||
| 502 | if (LAST_SUPPORTED_WCHAR >= 0xd800 | ||
| 503 | && (ucs >= 0xd800 || ucs <= 0xf8ff) | ||
| 504 | ) { | 497 | ) { |
| 505 | return -1; | 498 | return -1; |
| 506 | } | 499 | } |
| 507 | |||
| 508 | /* 0xfffe and 0xffff in every plane are invalid */ | 500 | /* 0xfffe and 0xffff in every plane are invalid */ |
| 509 | if (LAST_SUPPORTED_WCHAR >= 0xfffe | 501 | if (LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) { |
| 510 | && (ucs & 0xfffe) == 0xfffe | ||
| 511 | ) { | ||
| 512 | return -1; | 502 | return -1; |
| 513 | } | 503 | } |
| 514 | 504 | ||
| 515 | # if LAST_SUPPORTED_WCHAR >= 0x10000 | 505 | # if LAST_SUPPORTED_WCHAR >= 0x10000 |
| 516 | /* binary search in table of non-spacing characters in Supplementary Multilingual Plane */ | 506 | if (ucs >= 0x10000) { |
| 517 | if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1)) | 507 | /* Combining chars in Supplementary Multilingual Plane 0x1xxxx */ |
| 518 | return 0; | 508 | static const struct interval combining0x10000[] = { |
| 519 | # endif | 509 | { 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F }, |
| 520 | /* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */ | 510 | { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 }, |
| 521 | if (LAST_SUPPORTED_WCHAR >= 0xE0001 | 511 | { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD }, |
| 522 | && ( ucs == 0xE0001 | 512 | { 0xD242, 0xD244 } |
| 523 | || (ucs >= 0xE0020 && ucs <= 0xE007F) | 513 | }; |
| 524 | || (ucs >= 0xE0100 && ucs <= 0xE01EF) | 514 | /* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */ |
| 525 | ) | 515 | if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1)) |
| 526 | ) { | 516 | return 0; |
| 527 | return 0; | 517 | /* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */ |
| 518 | if (LAST_SUPPORTED_WCHAR >= 0xE0001 | ||
| 519 | && ( ucs == 0xE0001 | ||
| 520 | || (ucs >= 0xE0020 && ucs <= 0xE007F) | ||
| 521 | || (ucs >= 0xE0100 && ucs <= 0xE01EF) | ||
| 522 | ) | ||
| 523 | ) { | ||
| 524 | return 0; | ||
| 525 | } | ||
| 528 | } | 526 | } |
| 527 | # endif | ||
| 529 | 528 | ||
| 530 | /* if we arrive here, ucs is not a combining or C0/C1 control character */ | 529 | /* If we arrive here, ucs is not a combining or C0/C1 control character. |
| 531 | 530 | * Check whether it's 1 char or 2-shar wide. | |
| 531 | */ | ||
| 532 | return 1 + | 532 | return 1 + |
| 533 | ( (/*ucs >= 0x1100 &&*/ ucs <= 0x115f) /* Hangul Jamo init. consonants */ | 533 | ( (/*ucs >= 0x1100 &&*/ ucs <= 0x115f) /* Hangul Jamo init. consonants */ |
| 534 | || ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */ | 534 | || ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */ |
diff --git a/testsuite/ls.tests b/testsuite/ls.tests index e08249ea6..169313a63 100755 --- a/testsuite/ls.tests +++ b/testsuite/ls.tests | |||
| @@ -13,7 +13,7 @@ mkdir ls.testdir || exit 1 | |||
| 13 | 13 | ||
| 14 | # With Unicode provided by libc locale, I'm not sure this test can pass. | 14 | # With Unicode provided by libc locale, I'm not sure this test can pass. |
| 15 | # I suspect we might fail to skip exactly correct number of bytes | 15 | # I suspect we might fail to skip exactly correct number of bytes |
| 16 | # over broken unicode sequences. | 16 | # over broked unicode sequences. |
| 17 | test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \ | 17 | test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \ |
| 18 | && test x"$CONFIG_LOCALE_SUPPORT" != x"y" \ | 18 | && test x"$CONFIG_LOCALE_SUPPORT" != x"y" \ |
| 19 | && test x"$CONFIG_SUBST_WCHAR" = x"63" \ | 19 | && test x"$CONFIG_SUBST_WCHAR" = x"63" \ |
| @@ -144,7 +144,7 @@ test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \ | |||
| 144 | 0003_2.1__First_possible_sequence_of_a_certain_length_____________________| | 144 | 0003_2.1__First_possible_sequence_of_a_certain_length_____________________| |
| 145 | 0004_2.1.2__2_bytes__U-00000080_:________"?"______________________________| | 145 | 0004_2.1.2__2_bytes__U-00000080_:________"?"______________________________| |
| 146 | 0005_2.1.3__3_bytes__U-00000800_:________"ࠀ"______________________________| | 146 | 0005_2.1.3__3_bytes__U-00000800_:________"ࠀ"______________________________| |
| 147 | 0006_2.1.4__4_bytes__U-00010000_:________"?"______________________________| | 147 | 0006_2.1.4__4_bytes__U-00010000_:________"𐀀"______________________________| |
| 148 | 0007_2.1.5__5_bytes__U-00200000_:________"?"______________________________| | 148 | 0007_2.1.5__5_bytes__U-00200000_:________"?"______________________________| |
| 149 | 0008_2.1.6__6_bytes__U-04000000_:________"?"______________________________| | 149 | 0008_2.1.6__6_bytes__U-04000000_:________"?"______________________________| |
| 150 | 0009_2.2__Last_possible_sequence_of_a_certain_length______________________| | 150 | 0009_2.2__Last_possible_sequence_of_a_certain_length______________________| |
| @@ -155,9 +155,9 @@ test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \ | |||
| 155 | 0014_2.2.5__5_bytes__U-03FFFFFF_:________"?"______________________________| | 155 | 0014_2.2.5__5_bytes__U-03FFFFFF_:________"?"______________________________| |
| 156 | 0015_2.2.6__6_bytes__U-7FFFFFFF_:________"?"______________________________| | 156 | 0015_2.2.6__6_bytes__U-7FFFFFFF_:________"?"______________________________| |
| 157 | 0016_2.3__Other_boundary_conditions_______________________________________| | 157 | 0016_2.3__Other_boundary_conditions_______________________________________| |
| 158 | 0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_"?"___________________________________| | 158 | 0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_""___________________________________| |
| 159 | 0018_2.3.2__U-0000E000_=_ee_80_80_=_"?"___________________________________| | 159 | 0018_2.3.2__U-0000E000_=_ee_80_80_=_"?"___________________________________| |
| 160 | 0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"?"___________________________________| | 160 | 0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"�"___________________________________| |
| 161 | 0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"?"________________________________| | 161 | 0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"?"________________________________| |
| 162 | 0021_2.3.5__U-00110000_=_f4_90_80_80_=_"?"________________________________| | 162 | 0021_2.3.5__U-00110000_=_f4_90_80_80_=_"?"________________________________| |
| 163 | 0022_3__Malformed_sequences_______________________________________________| | 163 | 0022_3__Malformed_sequences_______________________________________________| |
