summaryrefslogtreecommitdiff
path: root/libbb/unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbb/unicode.c')
-rw-r--r--libbb/unicode.c40
1 files changed, 17 insertions, 23 deletions
diff --git a/libbb/unicode.c b/libbb/unicode.c
index 83e70b412..b2c28239b 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -240,7 +240,7 @@ int FAST_FUNC iswpunct(wint_t wc)
240} 240}
241 241
242 242
243# if LAST_SUPPORTED_WCHAR >= 0x300 243# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
244struct interval { 244struct interval {
245 uint16_t first; 245 uint16_t first;
246 uint16_t last; 246 uint16_t last;
@@ -418,12 +418,11 @@ static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
418 * This implementation assumes that wchar_t characters are encoded 418 * This implementation assumes that wchar_t characters are encoded
419 * in ISO 10646. 419 * in ISO 10646.
420 */ 420 */
421static int wcwidth(unsigned ucs) 421int FAST_FUNC wcwidth(unsigned ucs)
422{ 422{
423# if LAST_SUPPORTED_WCHAR >= 0x300 423# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
424 /* sorted list of non-overlapping intervals of non-spacing characters */ 424 /* sorted list of non-overlapping intervals of non-spacing characters */
425 /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ 425 /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
426 static const struct interval combining[] = {
427# define BIG_(a,b) { a, b }, 426# define BIG_(a,b) { a, b },
428# define PAIR(a,b) 427# define PAIR(a,b)
429# define ARRAY /* PAIR if < 0x4000 and no more than 4 chars big */ \ 428# define ARRAY /* PAIR if < 0x4000 and no more than 4 chars big */ \
@@ -557,10 +556,9 @@ static int wcwidth(unsigned ucs)
557 BIG_(0xFE20, 0xFE23) \ 556 BIG_(0xFE20, 0xFE23) \
558 BIG_(0xFEFF, 0xFEFF) \ 557 BIG_(0xFEFF, 0xFEFF) \
559 BIG_(0xFFF9, 0xFFFB) 558 BIG_(0xFFF9, 0xFFFB)
560 ARRAY 559 static const struct interval combining[] = { ARRAY };
561# undef BIG_ 560# undef BIG_
562# undef PAIR 561# undef PAIR
563 };
564# define BIG_(a,b) 562# define BIG_(a,b)
565# define PAIR(a,b) (a << 2) | (b-a), 563# define PAIR(a,b) (a << 2) | (b-a),
566 static const uint16_t combining1[] = { ARRAY }; 564 static const uint16_t combining1[] = { ARRAY };
@@ -581,14 +579,14 @@ static int wcwidth(unsigned ucs)
581 if ((ucs & ~0x80) < 0x20 || ucs == 0x7f) 579 if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
582 return -1; 580 return -1;
583 /* Quick abort if it is an obviously invalid char */ 581 /* Quick abort if it is an obviously invalid char */
584 if (ucs > LAST_SUPPORTED_WCHAR) 582 if (ucs > CONFIG_LAST_SUPPORTED_WCHAR)
585 return -1; 583 return -1;
586 584
587 /* Optimization: no combining chars below 0x300 */ 585 /* Optimization: no combining chars below 0x300 */
588 if (LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300) 586 if (CONFIG_LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
589 return 1; 587 return 1;
590 588
591# if LAST_SUPPORTED_WCHAR >= 0x300 589# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
592 /* Binary search in table of non-spacing characters */ 590 /* Binary search in table of non-spacing characters */
593 if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1)) 591 if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
594 return 0; 592 return 0;
@@ -596,25 +594,25 @@ static int wcwidth(unsigned ucs)
596 return 0; 594 return 0;
597 595
598 /* Optimization: all chars below 0x1100 are not double-width */ 596 /* Optimization: all chars below 0x1100 are not double-width */
599 if (LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100) 597 if (CONFIG_LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
600 return 1; 598 return 1;
601 599
602# if LAST_SUPPORTED_WCHAR >= 0x1100 600# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x1100
603 /* Invalid code points: */ 601 /* Invalid code points: */
604 /* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */ 602 /* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */
605 /* Private Use Area (e000..f8ff) */ 603 /* Private Use Area (e000..f8ff) */
606 /* Noncharacters fdd0..fdef */ 604 /* Noncharacters fdd0..fdef */
607 if ((LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff) 605 if ((CONFIG_LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
608 || (LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef) 606 || (CONFIG_LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
609 ) { 607 ) {
610 return -1; 608 return -1;
611 } 609 }
612 /* 0xfffe and 0xffff in every plane are invalid */ 610 /* 0xfffe and 0xffff in every plane are invalid */
613 if (LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) { 611 if (CONFIG_LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
614 return -1; 612 return -1;
615 } 613 }
616 614
617# if LAST_SUPPORTED_WCHAR >= 0x10000 615# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x10000
618 if (ucs >= 0x10000) { 616 if (ucs >= 0x10000) {
619 /* Combining chars in Supplementary Multilingual Plane 0x1xxxx */ 617 /* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
620 static const struct interval combining0x10000[] = { 618 static const struct interval combining0x10000[] = {
@@ -627,7 +625,7 @@ static int wcwidth(unsigned ucs)
627 if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1)) 625 if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
628 return 0; 626 return 0;
629 /* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */ 627 /* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
630 if (LAST_SUPPORTED_WCHAR >= 0xE0001 628 if (CONFIG_LAST_SUPPORTED_WCHAR >= 0xE0001
631 && ( ucs == 0xE0001 629 && ( ucs == 0xE0001
632 || (ucs >= 0xE0020 && ucs <= 0xE007F) 630 || (ucs >= 0xE0020 && ucs <= 0xE007F)
633 || (ucs >= 0xE0100 && ucs <= 0xE01EF) 631 || (ucs >= 0xE0100 && ucs <= 0xE01EF)
@@ -646,7 +644,7 @@ static int wcwidth(unsigned ucs)
646 || ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */ 644 || ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */
647 || ucs == 0x232a /* right-pointing angle bracket; also CJK punct. char */ 645 || ucs == 0x232a /* right-pointing angle bracket; also CJK punct. char */
648 || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */ 646 || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */
649# if LAST_SUPPORTED_WCHAR >= 0xac00 647# if CONFIG_LAST_SUPPORTED_WCHAR >= 0xac00
650 || (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */ 648 || (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */
651 || (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */ 649 || (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */
652 || (ucs >= 0xfe10 && ucs <= 0xfe19) /* Vertical forms */ 650 || (ucs >= 0xfe10 && ucs <= 0xfe19) /* Vertical forms */
@@ -668,7 +666,6 @@ int FAST_FUNC unicode_bidi_isrtl(wint_t wc)
668 * http://www.unicode.org/Public/5.2.0/ucd/extracted/DerivedBidiClass.txt 666 * http://www.unicode.org/Public/5.2.0/ucd/extracted/DerivedBidiClass.txt
669 * Bidi_Class=Left_To_Right | Bidi_Class=Arabic_Letter 667 * Bidi_Class=Left_To_Right | Bidi_Class=Arabic_Letter
670 */ 668 */
671 static const struct interval rtl_b[] = {
672# define BIG_(a,b) { a, b }, 669# define BIG_(a,b) { a, b },
673# define PAIR(a,b) 670# define PAIR(a,b)
674# define ARRAY \ 671# define ARRAY \
@@ -723,10 +720,9 @@ int FAST_FUNC unicode_bidi_isrtl(wint_t wc)
723 {0x10E7F, 0x10FFF}, 720 {0x10E7F, 0x10FFF},
724 {0x1E800, 0x1EFFF} 721 {0x1E800, 0x1EFFF}
725 */ 722 */
726 ARRAY 723 static const struct interval rtl_b[] = { ARRAY };
727# undef BIG_ 724# undef BIG_
728# undef PAIR 725# undef PAIR
729 };
730# define BIG_(a,b) 726# define BIG_(a,b)
731# define PAIR(a,b) (a << 2) | (b-a), 727# define PAIR(a,b) (a << 2) | (b-a),
732 static const uint16_t rtl_p[] = { ARRAY }; 728 static const uint16_t rtl_p[] = { ARRAY };
@@ -755,7 +751,6 @@ int FAST_FUNC unicode_bidi_is_neutral_wchar(wint_t wc)
755 * White_Space, Other_Neutral, European_Number, European_Separator, 751 * White_Space, Other_Neutral, European_Number, European_Separator,
756 * European_Terminator, Arabic_Number, Common_Separator 752 * European_Terminator, Arabic_Number, Common_Separator
757 */ 753 */
758 static const struct interval neutral_b[] = {
759# define BIG_(a,b) { a, b }, 754# define BIG_(a,b) { a, b },
760# define PAIR(a,b) 755# define PAIR(a,b)
761# define ARRAY \ 756# define ARRAY \
@@ -929,10 +924,9 @@ int FAST_FUNC unicode_bidi_is_neutral_wchar(wint_t wc)
929 {0x1F030, 0x1F093}, 924 {0x1F030, 0x1F093},
930 {0x1F100, 0x1F10A} 925 {0x1F100, 0x1F10A}
931 */ 926 */
932 ARRAY 927 static const struct interval neutral_b[] = { ARRAY };
933# undef BIG_ 928# undef BIG_
934# undef PAIR 929# undef PAIR
935 };
936# define BIG_(a,b) 930# define BIG_(a,b)
937# define PAIR(a,b) (a << 2) | (b-a), 931# define PAIR(a,b) (a << 2) | (b-a),
938 static const uint16_t neutral_p[] = { ARRAY }; 932 static const uint16_t neutral_p[] = { ARRAY };