diff options
Diffstat (limited to 'libbb/unicode.c')
-rw-r--r-- | libbb/unicode.c | 40 |
1 files changed, 17 insertions, 23 deletions
diff --git a/libbb/unicode.c b/libbb/unicode.c index 83e70b412..b2c28239b 100644 --- a/libbb/unicode.c +++ b/libbb/unicode.c | |||
@@ -240,7 +240,7 @@ int FAST_FUNC iswpunct(wint_t wc) | |||
240 | } | 240 | } |
241 | 241 | ||
242 | 242 | ||
243 | # if LAST_SUPPORTED_WCHAR >= 0x300 | 243 | # if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300 |
244 | struct interval { | 244 | struct interval { |
245 | uint16_t first; | 245 | uint16_t first; |
246 | uint16_t last; | 246 | uint16_t last; |
@@ -418,12 +418,11 @@ static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max) | |||
418 | * This implementation assumes that wchar_t characters are encoded | 418 | * This implementation assumes that wchar_t characters are encoded |
419 | * in ISO 10646. | 419 | * in ISO 10646. |
420 | */ | 420 | */ |
421 | static int wcwidth(unsigned ucs) | 421 | int FAST_FUNC wcwidth(unsigned ucs) |
422 | { | 422 | { |
423 | # if LAST_SUPPORTED_WCHAR >= 0x300 | 423 | # if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300 |
424 | /* sorted list of non-overlapping intervals of non-spacing characters */ | 424 | /* sorted list of non-overlapping intervals of non-spacing characters */ |
425 | /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ | 425 | /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ |
426 | static const struct interval combining[] = { | ||
427 | # define BIG_(a,b) { a, b }, | 426 | # define BIG_(a,b) { a, b }, |
428 | # define PAIR(a,b) | 427 | # define PAIR(a,b) |
429 | # define ARRAY /* PAIR if < 0x4000 and no more than 4 chars big */ \ | 428 | # define ARRAY /* PAIR if < 0x4000 and no more than 4 chars big */ \ |
@@ -557,10 +556,9 @@ static int wcwidth(unsigned ucs) | |||
557 | BIG_(0xFE20, 0xFE23) \ | 556 | BIG_(0xFE20, 0xFE23) \ |
558 | BIG_(0xFEFF, 0xFEFF) \ | 557 | BIG_(0xFEFF, 0xFEFF) \ |
559 | BIG_(0xFFF9, 0xFFFB) | 558 | BIG_(0xFFF9, 0xFFFB) |
560 | ARRAY | 559 | static const struct interval combining[] = { ARRAY }; |
561 | # undef BIG_ | 560 | # undef BIG_ |
562 | # undef PAIR | 561 | # undef PAIR |
563 | }; | ||
564 | # define BIG_(a,b) | 562 | # define BIG_(a,b) |
565 | # define PAIR(a,b) (a << 2) | (b-a), | 563 | # define PAIR(a,b) (a << 2) | (b-a), |
566 | static const uint16_t combining1[] = { ARRAY }; | 564 | static const uint16_t combining1[] = { ARRAY }; |
@@ -581,14 +579,14 @@ static int wcwidth(unsigned ucs) | |||
581 | if ((ucs & ~0x80) < 0x20 || ucs == 0x7f) | 579 | if ((ucs & ~0x80) < 0x20 || ucs == 0x7f) |
582 | return -1; | 580 | return -1; |
583 | /* Quick abort if it is an obviously invalid char */ | 581 | /* Quick abort if it is an obviously invalid char */ |
584 | if (ucs > LAST_SUPPORTED_WCHAR) | 582 | if (ucs > CONFIG_LAST_SUPPORTED_WCHAR) |
585 | return -1; | 583 | return -1; |
586 | 584 | ||
587 | /* Optimization: no combining chars below 0x300 */ | 585 | /* Optimization: no combining chars below 0x300 */ |
588 | if (LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300) | 586 | if (CONFIG_LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300) |
589 | return 1; | 587 | return 1; |
590 | 588 | ||
591 | # if LAST_SUPPORTED_WCHAR >= 0x300 | 589 | # if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300 |
592 | /* Binary search in table of non-spacing characters */ | 590 | /* Binary search in table of non-spacing characters */ |
593 | if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1)) | 591 | if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1)) |
594 | return 0; | 592 | return 0; |
@@ -596,25 +594,25 @@ static int wcwidth(unsigned ucs) | |||
596 | return 0; | 594 | return 0; |
597 | 595 | ||
598 | /* Optimization: all chars below 0x1100 are not double-width */ | 596 | /* Optimization: all chars below 0x1100 are not double-width */ |
599 | if (LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100) | 597 | if (CONFIG_LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100) |
600 | return 1; | 598 | return 1; |
601 | 599 | ||
602 | # if LAST_SUPPORTED_WCHAR >= 0x1100 | 600 | # if CONFIG_LAST_SUPPORTED_WCHAR >= 0x1100 |
603 | /* Invalid code points: */ | 601 | /* Invalid code points: */ |
604 | /* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */ | 602 | /* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */ |
605 | /* Private Use Area (e000..f8ff) */ | 603 | /* Private Use Area (e000..f8ff) */ |
606 | /* Noncharacters fdd0..fdef */ | 604 | /* Noncharacters fdd0..fdef */ |
607 | if ((LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff) | 605 | if ((CONFIG_LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff) |
608 | || (LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef) | 606 | || (CONFIG_LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef) |
609 | ) { | 607 | ) { |
610 | return -1; | 608 | return -1; |
611 | } | 609 | } |
612 | /* 0xfffe and 0xffff in every plane are invalid */ | 610 | /* 0xfffe and 0xffff in every plane are invalid */ |
613 | if (LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) { | 611 | if (CONFIG_LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) { |
614 | return -1; | 612 | return -1; |
615 | } | 613 | } |
616 | 614 | ||
617 | # if LAST_SUPPORTED_WCHAR >= 0x10000 | 615 | # if CONFIG_LAST_SUPPORTED_WCHAR >= 0x10000 |
618 | if (ucs >= 0x10000) { | 616 | if (ucs >= 0x10000) { |
619 | /* Combining chars in Supplementary Multilingual Plane 0x1xxxx */ | 617 | /* Combining chars in Supplementary Multilingual Plane 0x1xxxx */ |
620 | static const struct interval combining0x10000[] = { | 618 | static const struct interval combining0x10000[] = { |
@@ -627,7 +625,7 @@ static int wcwidth(unsigned ucs) | |||
627 | if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1)) | 625 | if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1)) |
628 | return 0; | 626 | return 0; |
629 | /* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */ | 627 | /* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */ |
630 | if (LAST_SUPPORTED_WCHAR >= 0xE0001 | 628 | if (CONFIG_LAST_SUPPORTED_WCHAR >= 0xE0001 |
631 | && ( ucs == 0xE0001 | 629 | && ( ucs == 0xE0001 |
632 | || (ucs >= 0xE0020 && ucs <= 0xE007F) | 630 | || (ucs >= 0xE0020 && ucs <= 0xE007F) |
633 | || (ucs >= 0xE0100 && ucs <= 0xE01EF) | 631 | || (ucs >= 0xE0100 && ucs <= 0xE01EF) |
@@ -646,7 +644,7 @@ static int wcwidth(unsigned ucs) | |||
646 | || ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */ | 644 | || ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */ |
647 | || ucs == 0x232a /* right-pointing angle bracket; also CJK punct. char */ | 645 | || ucs == 0x232a /* right-pointing angle bracket; also CJK punct. char */ |
648 | || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */ | 646 | || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */ |
649 | # if LAST_SUPPORTED_WCHAR >= 0xac00 | 647 | # if CONFIG_LAST_SUPPORTED_WCHAR >= 0xac00 |
650 | || (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */ | 648 | || (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */ |
651 | || (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */ | 649 | || (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */ |
652 | || (ucs >= 0xfe10 && ucs <= 0xfe19) /* Vertical forms */ | 650 | || (ucs >= 0xfe10 && ucs <= 0xfe19) /* Vertical forms */ |
@@ -668,7 +666,6 @@ int FAST_FUNC unicode_bidi_isrtl(wint_t wc) | |||
668 | * http://www.unicode.org/Public/5.2.0/ucd/extracted/DerivedBidiClass.txt | 666 | * http://www.unicode.org/Public/5.2.0/ucd/extracted/DerivedBidiClass.txt |
669 | * Bidi_Class=Left_To_Right | Bidi_Class=Arabic_Letter | 667 | * Bidi_Class=Left_To_Right | Bidi_Class=Arabic_Letter |
670 | */ | 668 | */ |
671 | static const struct interval rtl_b[] = { | ||
672 | # define BIG_(a,b) { a, b }, | 669 | # define BIG_(a,b) { a, b }, |
673 | # define PAIR(a,b) | 670 | # define PAIR(a,b) |
674 | # define ARRAY \ | 671 | # define ARRAY \ |
@@ -723,10 +720,9 @@ int FAST_FUNC unicode_bidi_isrtl(wint_t wc) | |||
723 | {0x10E7F, 0x10FFF}, | 720 | {0x10E7F, 0x10FFF}, |
724 | {0x1E800, 0x1EFFF} | 721 | {0x1E800, 0x1EFFF} |
725 | */ | 722 | */ |
726 | ARRAY | 723 | static const struct interval rtl_b[] = { ARRAY }; |
727 | # undef BIG_ | 724 | # undef BIG_ |
728 | # undef PAIR | 725 | # undef PAIR |
729 | }; | ||
730 | # define BIG_(a,b) | 726 | # define BIG_(a,b) |
731 | # define PAIR(a,b) (a << 2) | (b-a), | 727 | # define PAIR(a,b) (a << 2) | (b-a), |
732 | static const uint16_t rtl_p[] = { ARRAY }; | 728 | static const uint16_t rtl_p[] = { ARRAY }; |
@@ -755,7 +751,6 @@ int FAST_FUNC unicode_bidi_is_neutral_wchar(wint_t wc) | |||
755 | * White_Space, Other_Neutral, European_Number, European_Separator, | 751 | * White_Space, Other_Neutral, European_Number, European_Separator, |
756 | * European_Terminator, Arabic_Number, Common_Separator | 752 | * European_Terminator, Arabic_Number, Common_Separator |
757 | */ | 753 | */ |
758 | static const struct interval neutral_b[] = { | ||
759 | # define BIG_(a,b) { a, b }, | 754 | # define BIG_(a,b) { a, b }, |
760 | # define PAIR(a,b) | 755 | # define PAIR(a,b) |
761 | # define ARRAY \ | 756 | # define ARRAY \ |
@@ -929,10 +924,9 @@ int FAST_FUNC unicode_bidi_is_neutral_wchar(wint_t wc) | |||
929 | {0x1F030, 0x1F093}, | 924 | {0x1F030, 0x1F093}, |
930 | {0x1F100, 0x1F10A} | 925 | {0x1F100, 0x1F10A} |
931 | */ | 926 | */ |
932 | ARRAY | 927 | static const struct interval neutral_b[] = { ARRAY }; |
933 | # undef BIG_ | 928 | # undef BIG_ |
934 | # undef PAIR | 929 | # undef PAIR |
935 | }; | ||
936 | # define BIG_(a,b) | 930 | # define BIG_(a,b) |
937 | # define PAIR(a,b) (a << 2) | (b-a), | 931 | # define PAIR(a,b) (a << 2) | (b-a), |
938 | static const uint16_t neutral_p[] = { ARRAY }; | 932 | static const uint16_t neutral_p[] = { ARRAY }; |