diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2010-01-31 05:15:38 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2010-01-31 05:15:38 +0100 |
commit | d8528b8e56bab7643722e4453121882d23c23c07 (patch) | |
tree | c742df066326cd571327b10d4cca3341c798d129 | |
parent | ed910c750d7908a31262488e04d38b7bf3d75322 (diff) | |
download | busybox-w32-d8528b8e56bab7643722e4453121882d23c23c07.tar.gz busybox-w32-d8528b8e56bab7643722e4453121882d23c23c07.tar.bz2 busybox-w32-d8528b8e56bab7643722e4453121882d23c23c07.zip |
ls: unicode fixes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | TODO_unicode | 2 | ||||
-rw-r--r-- | coreutils/ls.c | 412 | ||||
-rw-r--r-- | include/libbb.h | 19 | ||||
-rw-r--r-- | include/unicode.h | 5 | ||||
-rw-r--r-- | libbb/Kbuild | 1 | ||||
-rw-r--r-- | libbb/printable_string.c | 65 | ||||
-rw-r--r-- | testsuite/ls.mk_uni_tests | 111 | ||||
-rwxr-xr-x | testsuite/ls.tests | 136 |
8 files changed, 545 insertions, 206 deletions
diff --git a/TODO_unicode b/TODO_unicode index c29fd933b..b310e8d4d 100644 --- a/TODO_unicode +++ b/TODO_unicode | |||
@@ -7,7 +7,7 @@ dumpleases | |||
7 | Applets which may need unicode handling (more extensive than sanitizing | 7 | Applets which may need unicode handling (more extensive than sanitizing |
8 | of filenames in error messages): | 8 | of filenames in error messages): |
9 | 9 | ||
10 | ls - uses unicode_strlen, not scrlen | 10 | ls - work in progress |
11 | expand, unexpand - uses unicode_strlen, not scrlen | 11 | expand, unexpand - uses unicode_strlen, not scrlen |
12 | ash, hush through lineedit - uses unicode_strlen, not scrlen | 12 | ash, hush through lineedit - uses unicode_strlen, not scrlen |
13 | top - need to sanitize process args | 13 | top - need to sanitize process args |
diff --git a/coreutils/ls.c b/coreutils/ls.c index 6c898b793..d004ce8b1 100644 --- a/coreutils/ls.c +++ b/coreutils/ls.c | |||
@@ -241,9 +241,6 @@ struct dnode { | |||
241 | IF_SELINUX(security_context_t sid;) | 241 | IF_SELINUX(security_context_t sid;) |
242 | }; | 242 | }; |
243 | 243 | ||
244 | static struct dnode **list_dir(const char *, unsigned *); | ||
245 | static unsigned list_single(const struct dnode *); | ||
246 | |||
247 | struct globals { | 244 | struct globals { |
248 | #if ENABLE_FEATURE_LS_COLOR | 245 | #if ENABLE_FEATURE_LS_COLOR |
249 | smallint show_color; | 246 | smallint show_color; |
@@ -528,31 +525,236 @@ static void dnsort(struct dnode **dn, int size) | |||
528 | #endif | 525 | #endif |
529 | 526 | ||
530 | 527 | ||
531 | static void showfiles(struct dnode **dn, unsigned nfiles) | 528 | static unsigned calc_name_len(const char *name) |
529 | { | ||
530 | unsigned len; | ||
531 | uni_stat_t uni_stat; | ||
532 | |||
533 | // TODO: quote tab as \t, etc, if -Q | ||
534 | name = printable_string(&uni_stat, name); | ||
535 | |||
536 | if (!(option_mask32 & OPT_Q)) { | ||
537 | return uni_stat.unicode_width; | ||
538 | } | ||
539 | |||
540 | len = 2 + uni_stat.unicode_width; | ||
541 | while (*name) { | ||
542 | if (*name == '"' || *name == '\\') { | ||
543 | len++; | ||
544 | } | ||
545 | name++; | ||
546 | } | ||
547 | return len; | ||
548 | } | ||
549 | |||
550 | |||
551 | /* Return the number of used columns. | ||
552 | * Note that only STYLE_COLUMNS uses return value. | ||
553 | * STYLE_SINGLE and STYLE_LONG don't care. | ||
554 | * coreutils 7.2 also supports: | ||
555 | * ls -b (--escape) = octal escapes (although it doesn't look like working) | ||
556 | * ls -N (--literal) = not escape at all | ||
557 | */ | ||
558 | static unsigned print_name(const char *name) | ||
559 | { | ||
560 | unsigned len; | ||
561 | uni_stat_t uni_stat; | ||
562 | |||
563 | // TODO: quote tab as \t, etc, if -Q | ||
564 | name = printable_string(&uni_stat, name); | ||
565 | |||
566 | if (!(option_mask32 & OPT_Q)) { | ||
567 | fputs(name, stdout); | ||
568 | return uni_stat.unicode_width; | ||
569 | } | ||
570 | |||
571 | len = 2 + uni_stat.unicode_width; | ||
572 | putchar('"'); | ||
573 | while (*name) { | ||
574 | if (*name == '"' || *name == '\\') { | ||
575 | putchar('\\'); | ||
576 | len++; | ||
577 | } | ||
578 | putchar(*name++); | ||
579 | } | ||
580 | putchar('"'); | ||
581 | return len; | ||
582 | } | ||
583 | |||
584 | /* Return the number of used columns. | ||
585 | * Note that only STYLE_COLUMNS uses return value, | ||
586 | * STYLE_SINGLE and STYLE_LONG don't care. | ||
587 | */ | ||
588 | static NOINLINE unsigned list_single(const struct dnode *dn) | ||
532 | { | 589 | { |
533 | unsigned i, ncols, nrows, row, nc; | ||
534 | unsigned column = 0; | 590 | unsigned column = 0; |
535 | unsigned nexttab = 0; | 591 | char *lpath = lpath; /* for compiler */ |
536 | unsigned column_width = 0; /* for STYLE_LONG and STYLE_SINGLE not used */ | 592 | #if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR |
593 | struct stat info; | ||
594 | char append; | ||
595 | #endif | ||
537 | 596 | ||
538 | /* Never happens: | 597 | /* Never happens: |
539 | if (dn == NULL || nfiles < 1) | 598 | if (dn->fullname == NULL) |
540 | return; | 599 | return 0; |
541 | */ | 600 | */ |
542 | 601 | ||
543 | if (all_fmt & STYLE_LONG) { | 602 | #if ENABLE_FEATURE_LS_FILETYPES |
603 | append = append_char(dn->dstat.st_mode); | ||
604 | #endif | ||
605 | |||
606 | /* Do readlink early, so that if it fails, error message | ||
607 | * does not appear *inside* the "ls -l" line */ | ||
608 | if (all_fmt & LIST_SYMLINK) | ||
609 | if (S_ISLNK(dn->dstat.st_mode)) | ||
610 | lpath = xmalloc_readlink_or_warn(dn->fullname); | ||
611 | |||
612 | if (all_fmt & LIST_INO) | ||
613 | column += printf("%7llu ", (long long) dn->dstat.st_ino); | ||
614 | if (all_fmt & LIST_BLOCKS) | ||
615 | column += printf("%4"OFF_FMT"u ", (off_t) (dn->dstat.st_blocks >> 1)); | ||
616 | if (all_fmt & LIST_MODEBITS) | ||
617 | column += printf("%-10s ", (char *) bb_mode_string(dn->dstat.st_mode)); | ||
618 | if (all_fmt & LIST_NLINKS) | ||
619 | column += printf("%4lu ", (long) dn->dstat.st_nlink); | ||
620 | #if ENABLE_FEATURE_LS_USERNAME | ||
621 | if (all_fmt & LIST_ID_NAME) { | ||
622 | if (option_mask32 & OPT_g) { | ||
623 | column += printf("%-8.8s ", | ||
624 | get_cached_username(dn->dstat.st_uid)); | ||
625 | } else { | ||
626 | column += printf("%-8.8s %-8.8s ", | ||
627 | get_cached_username(dn->dstat.st_uid), | ||
628 | get_cached_groupname(dn->dstat.st_gid)); | ||
629 | } | ||
630 | } | ||
631 | #endif | ||
632 | if (all_fmt & LIST_ID_NUMERIC) { | ||
633 | if (option_mask32 & OPT_g) | ||
634 | column += printf("%-8u ", (int) dn->dstat.st_uid); | ||
635 | else | ||
636 | column += printf("%-8u %-8u ", | ||
637 | (int) dn->dstat.st_uid, | ||
638 | (int) dn->dstat.st_gid); | ||
639 | } | ||
640 | if (all_fmt & (LIST_SIZE /*|LIST_DEV*/ )) { | ||
641 | if (S_ISBLK(dn->dstat.st_mode) || S_ISCHR(dn->dstat.st_mode)) { | ||
642 | column += printf("%4u, %3u ", | ||
643 | (int) major(dn->dstat.st_rdev), | ||
644 | (int) minor(dn->dstat.st_rdev)); | ||
645 | } else { | ||
646 | if (all_fmt & LS_DISP_HR) { | ||
647 | column += printf("%"HUMAN_READABLE_MAX_WIDTH_STR"s ", | ||
648 | /* print st_size, show one fractional, use suffixes */ | ||
649 | make_human_readable_str(dn->dstat.st_size, 1, 0) | ||
650 | ); | ||
651 | } else { | ||
652 | column += printf("%9"OFF_FMT"u ", (off_t) dn->dstat.st_size); | ||
653 | } | ||
654 | } | ||
655 | } | ||
656 | #if ENABLE_FEATURE_LS_TIMESTAMPS | ||
657 | if (all_fmt & (LIST_FULLTIME|LIST_DATE_TIME)) { | ||
658 | char *filetime; | ||
659 | time_t ttime = dn->dstat.st_mtime; | ||
660 | if (all_fmt & TIME_ACCESS) | ||
661 | ttime = dn->dstat.st_atime; | ||
662 | if (all_fmt & TIME_CHANGE) | ||
663 | ttime = dn->dstat.st_ctime; | ||
664 | filetime = ctime(&ttime); | ||
665 | /* filetime's format: "Wed Jun 30 21:49:08 1993\n" */ | ||
666 | if (all_fmt & LIST_FULLTIME) | ||
667 | column += printf("%.24s ", filetime); | ||
668 | else { /* LIST_DATE_TIME */ | ||
669 | /* current_time_t ~== time(NULL) */ | ||
670 | time_t age = current_time_t - ttime; | ||
671 | printf("%.6s ", filetime + 4); /* "Jun 30" */ | ||
672 | if (age < 3600L * 24 * 365 / 2 && age > -15 * 60) { | ||
673 | /* hh:mm if less than 6 months old */ | ||
674 | printf("%.5s ", filetime + 11); | ||
675 | } else { /* year. buggy if year > 9999 ;) */ | ||
676 | printf(" %.4s ", filetime + 20); | ||
677 | } | ||
678 | column += 13; | ||
679 | } | ||
680 | } | ||
681 | #endif | ||
682 | #if ENABLE_SELINUX | ||
683 | if (all_fmt & LIST_CONTEXT) { | ||
684 | column += printf("%-32s ", dn->sid ? dn->sid : "unknown"); | ||
685 | freecon(dn->sid); | ||
686 | } | ||
687 | #endif | ||
688 | if (all_fmt & LIST_FILENAME) { | ||
689 | #if ENABLE_FEATURE_LS_COLOR | ||
690 | if (show_color) { | ||
691 | info.st_mode = 0; /* for fgcolor() */ | ||
692 | lstat(dn->fullname, &info); | ||
693 | printf("\033[%u;%um", bold(info.st_mode), | ||
694 | fgcolor(info.st_mode)); | ||
695 | } | ||
696 | #endif | ||
697 | column += print_name(dn->name); | ||
698 | if (show_color) { | ||
699 | printf("\033[0m"); | ||
700 | } | ||
701 | } | ||
702 | if (all_fmt & LIST_SYMLINK) { | ||
703 | if (S_ISLNK(dn->dstat.st_mode) && lpath) { | ||
704 | printf(" -> "); | ||
705 | #if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR | ||
706 | #if ENABLE_FEATURE_LS_COLOR | ||
707 | info.st_mode = 0; /* for fgcolor() */ | ||
708 | #endif | ||
709 | if (stat(dn->fullname, &info) == 0) { | ||
710 | append = append_char(info.st_mode); | ||
711 | } | ||
712 | #endif | ||
713 | #if ENABLE_FEATURE_LS_COLOR | ||
714 | if (show_color) { | ||
715 | printf("\033[%u;%um", bold(info.st_mode), | ||
716 | fgcolor(info.st_mode)); | ||
717 | } | ||
718 | #endif | ||
719 | column += print_name(lpath) + 4; | ||
720 | if (show_color) { | ||
721 | printf("\033[0m"); | ||
722 | } | ||
723 | free(lpath); | ||
724 | } | ||
725 | } | ||
726 | #if ENABLE_FEATURE_LS_FILETYPES | ||
727 | if (all_fmt & LIST_FILETYPE) { | ||
728 | if (append) { | ||
729 | putchar(append); | ||
730 | column++; | ||
731 | } | ||
732 | } | ||
733 | #endif | ||
734 | |||
735 | return column; | ||
736 | } | ||
737 | |||
738 | static void showfiles(struct dnode **dn, unsigned nfiles) | ||
739 | { | ||
740 | unsigned i, ncols, nrows, row, nc; | ||
741 | unsigned column = 0; | ||
742 | unsigned nexttab = 0; | ||
743 | unsigned column_width = 0; /* used only by STYLE_COLUMNS */ | ||
744 | |||
745 | if (all_fmt & STYLE_LONG) { /* STYLE_LONG or STYLE_SINGLE */ | ||
544 | ncols = 1; | 746 | ncols = 1; |
545 | } else { | 747 | } else { |
546 | /* find the longest file name, use that as the column width */ | 748 | /* find the longest file name, use that as the column width */ |
547 | for (i = 0; dn[i]; i++) { | 749 | for (i = 0; dn[i]; i++) { |
548 | int len = unicode_strlen(dn[i]->name); | 750 | int len = calc_name_len(dn[i]->name); |
549 | if (column_width < len) | 751 | if (column_width < len) |
550 | column_width = len; | 752 | column_width = len; |
551 | } | 753 | } |
552 | column_width += tabstops + | 754 | column_width += tabstops + |
553 | IF_SELINUX( ((all_fmt & LIST_CONTEXT) ? 33 : 0) + ) | 755 | IF_SELINUX( ((all_fmt & LIST_CONTEXT) ? 33 : 0) + ) |
554 | ((all_fmt & LIST_INO) ? 8 : 0) + | 756 | ((all_fmt & LIST_INO) ? 8 : 0) + |
555 | ((all_fmt & LIST_BLOCKS) ? 5 : 0); | 757 | ((all_fmt & LIST_BLOCKS) ? 5 : 0); |
556 | ncols = (int) (terminal_width / column_width); | 758 | ncols = (int) (terminal_width / column_width); |
557 | } | 759 | } |
558 | 760 | ||
@@ -618,6 +820,8 @@ static off_t calculate_blocks(struct dnode **dn) | |||
618 | #endif | 820 | #endif |
619 | 821 | ||
620 | 822 | ||
823 | static struct dnode **list_dir(const char *, unsigned *); | ||
824 | |||
621 | static void showdirs(struct dnode **dn, int first) | 825 | static void showdirs(struct dnode **dn, int first) |
622 | { | 826 | { |
623 | unsigned nfiles; | 827 | unsigned nfiles; |
@@ -733,188 +937,6 @@ static struct dnode **list_dir(const char *path, unsigned *nfiles_p) | |||
733 | } | 937 | } |
734 | 938 | ||
735 | 939 | ||
736 | static int print_name(const char *name) | ||
737 | { | ||
738 | if (option_mask32 & OPT_Q) { | ||
739 | #if ENABLE_FEATURE_ASSUME_UNICODE | ||
740 | unsigned len = 2 + unicode_strlen(name); | ||
741 | #else | ||
742 | unsigned len = 2; | ||
743 | #endif | ||
744 | putchar('"'); | ||
745 | while (*name) { | ||
746 | if (*name == '"') { | ||
747 | putchar('\\'); | ||
748 | len++; | ||
749 | } | ||
750 | putchar(*name++); | ||
751 | if (!ENABLE_FEATURE_ASSUME_UNICODE) | ||
752 | len++; | ||
753 | } | ||
754 | putchar('"'); | ||
755 | return len; | ||
756 | } | ||
757 | /* No -Q: */ | ||
758 | #if ENABLE_FEATURE_ASSUME_UNICODE | ||
759 | fputs(name, stdout); | ||
760 | return unicode_strlen(name); | ||
761 | #else | ||
762 | return printf("%s", name); | ||
763 | #endif | ||
764 | } | ||
765 | |||
766 | |||
767 | static NOINLINE unsigned list_single(const struct dnode *dn) | ||
768 | { | ||
769 | unsigned column = 0; | ||
770 | char *lpath = lpath; /* for compiler */ | ||
771 | #if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR | ||
772 | struct stat info; | ||
773 | char append; | ||
774 | #endif | ||
775 | |||
776 | /* Never happens: | ||
777 | if (dn->fullname == NULL) | ||
778 | return 0; | ||
779 | */ | ||
780 | |||
781 | #if ENABLE_FEATURE_LS_FILETYPES | ||
782 | append = append_char(dn->dstat.st_mode); | ||
783 | #endif | ||
784 | |||
785 | /* Do readlink early, so that if it fails, error message | ||
786 | * does not appear *inside* the "ls -l" line */ | ||
787 | if (all_fmt & LIST_SYMLINK) | ||
788 | if (S_ISLNK(dn->dstat.st_mode)) | ||
789 | lpath = xmalloc_readlink_or_warn(dn->fullname); | ||
790 | |||
791 | if (all_fmt & LIST_INO) | ||
792 | column += printf("%7llu ", (long long) dn->dstat.st_ino); | ||
793 | if (all_fmt & LIST_BLOCKS) | ||
794 | column += printf("%4"OFF_FMT"u ", (off_t) (dn->dstat.st_blocks >> 1)); | ||
795 | if (all_fmt & LIST_MODEBITS) | ||
796 | column += printf("%-10s ", (char *) bb_mode_string(dn->dstat.st_mode)); | ||
797 | if (all_fmt & LIST_NLINKS) | ||
798 | column += printf("%4lu ", (long) dn->dstat.st_nlink); | ||
799 | #if ENABLE_FEATURE_LS_USERNAME | ||
800 | if (all_fmt & LIST_ID_NAME) { | ||
801 | if (option_mask32 & OPT_g) { | ||
802 | column += printf("%-8.8s ", | ||
803 | get_cached_username(dn->dstat.st_uid)); | ||
804 | } else { | ||
805 | column += printf("%-8.8s %-8.8s ", | ||
806 | get_cached_username(dn->dstat.st_uid), | ||
807 | get_cached_groupname(dn->dstat.st_gid)); | ||
808 | } | ||
809 | } | ||
810 | #endif | ||
811 | if (all_fmt & LIST_ID_NUMERIC) { | ||
812 | if (option_mask32 & OPT_g) | ||
813 | column += printf("%-8u ", (int) dn->dstat.st_uid); | ||
814 | else | ||
815 | column += printf("%-8u %-8u ", | ||
816 | (int) dn->dstat.st_uid, | ||
817 | (int) dn->dstat.st_gid); | ||
818 | } | ||
819 | if (all_fmt & (LIST_SIZE /*|LIST_DEV*/ )) { | ||
820 | if (S_ISBLK(dn->dstat.st_mode) || S_ISCHR(dn->dstat.st_mode)) { | ||
821 | column += printf("%4u, %3u ", | ||
822 | (int) major(dn->dstat.st_rdev), | ||
823 | (int) minor(dn->dstat.st_rdev)); | ||
824 | } else { | ||
825 | if (all_fmt & LS_DISP_HR) { | ||
826 | column += printf("%"HUMAN_READABLE_MAX_WIDTH_STR"s ", | ||
827 | /* print st_size, show one fractional, use suffixes */ | ||
828 | make_human_readable_str(dn->dstat.st_size, 1, 0) | ||
829 | ); | ||
830 | } else { | ||
831 | column += printf("%9"OFF_FMT"u ", (off_t) dn->dstat.st_size); | ||
832 | } | ||
833 | } | ||
834 | } | ||
835 | #if ENABLE_FEATURE_LS_TIMESTAMPS | ||
836 | if (all_fmt & (LIST_FULLTIME|LIST_DATE_TIME)) { | ||
837 | char *filetime; | ||
838 | time_t ttime = dn->dstat.st_mtime; | ||
839 | if (all_fmt & TIME_ACCESS) | ||
840 | ttime = dn->dstat.st_atime; | ||
841 | if (all_fmt & TIME_CHANGE) | ||
842 | ttime = dn->dstat.st_ctime; | ||
843 | filetime = ctime(&ttime); | ||
844 | /* filetime's format: "Wed Jun 30 21:49:08 1993\n" */ | ||
845 | if (all_fmt & LIST_FULLTIME) | ||
846 | column += printf("%.24s ", filetime); | ||
847 | else { /* LIST_DATE_TIME */ | ||
848 | /* current_time_t ~== time(NULL) */ | ||
849 | time_t age = current_time_t - ttime; | ||
850 | printf("%.6s ", filetime + 4); /* "Jun 30" */ | ||
851 | if (age < 3600L * 24 * 365 / 2 && age > -15 * 60) { | ||
852 | /* hh:mm if less than 6 months old */ | ||
853 | printf("%.5s ", filetime + 11); | ||
854 | } else { /* year. buggy if year > 9999 ;) */ | ||
855 | printf(" %.4s ", filetime + 20); | ||
856 | } | ||
857 | column += 13; | ||
858 | } | ||
859 | } | ||
860 | #endif | ||
861 | #if ENABLE_SELINUX | ||
862 | if (all_fmt & LIST_CONTEXT) { | ||
863 | column += printf("%-32s ", dn->sid ? dn->sid : "unknown"); | ||
864 | freecon(dn->sid); | ||
865 | } | ||
866 | #endif | ||
867 | if (all_fmt & LIST_FILENAME) { | ||
868 | #if ENABLE_FEATURE_LS_COLOR | ||
869 | if (show_color) { | ||
870 | info.st_mode = 0; /* for fgcolor() */ | ||
871 | lstat(dn->fullname, &info); | ||
872 | printf("\033[%u;%um", bold(info.st_mode), | ||
873 | fgcolor(info.st_mode)); | ||
874 | } | ||
875 | #endif | ||
876 | column += print_name(dn->name); | ||
877 | if (show_color) { | ||
878 | printf("\033[0m"); | ||
879 | } | ||
880 | } | ||
881 | if (all_fmt & LIST_SYMLINK) { | ||
882 | if (S_ISLNK(dn->dstat.st_mode) && lpath) { | ||
883 | printf(" -> "); | ||
884 | #if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR | ||
885 | #if ENABLE_FEATURE_LS_COLOR | ||
886 | info.st_mode = 0; /* for fgcolor() */ | ||
887 | #endif | ||
888 | if (stat(dn->fullname, &info) == 0) { | ||
889 | append = append_char(info.st_mode); | ||
890 | } | ||
891 | #endif | ||
892 | #if ENABLE_FEATURE_LS_COLOR | ||
893 | if (show_color) { | ||
894 | printf("\033[%u;%um", bold(info.st_mode), | ||
895 | fgcolor(info.st_mode)); | ||
896 | } | ||
897 | #endif | ||
898 | column += print_name(lpath) + 4; | ||
899 | if (show_color) { | ||
900 | printf("\033[0m"); | ||
901 | } | ||
902 | free(lpath); | ||
903 | } | ||
904 | } | ||
905 | #if ENABLE_FEATURE_LS_FILETYPES | ||
906 | if (all_fmt & LIST_FILETYPE) { | ||
907 | if (append) { | ||
908 | putchar(append); | ||
909 | column++; | ||
910 | } | ||
911 | } | ||
912 | #endif | ||
913 | |||
914 | return column; | ||
915 | } | ||
916 | |||
917 | |||
918 | int ls_main(int argc UNUSED_PARAM, char **argv) | 940 | int ls_main(int argc UNUSED_PARAM, char **argv) |
919 | { | 941 | { |
920 | struct dnode **dnd; | 942 | struct dnode **dnd; |
diff --git a/include/libbb.h b/include/libbb.h index 73aea409e..a86d64400 100644 --- a/include/libbb.h +++ b/include/libbb.h | |||
@@ -577,11 +577,6 @@ char *strncpy_IFNAMSIZ(char *dst, const char *src) FAST_FUNC; | |||
577 | * But potentially slow, don't use in one-billion-times loops */ | 577 | * But potentially slow, don't use in one-billion-times loops */ |
578 | int bb_putchar(int ch) FAST_FUNC; | 578 | int bb_putchar(int ch) FAST_FUNC; |
579 | char *xasprintf(const char *format, ...) __attribute__ ((format(printf, 1, 2))) FAST_FUNC RETURNS_MALLOC; | 579 | char *xasprintf(const char *format, ...) __attribute__ ((format(printf, 1, 2))) FAST_FUNC RETURNS_MALLOC; |
580 | /* Prints unprintable chars ch as ^C or M-c to file | ||
581 | * (M-c is used only if ch is ORed with PRINTABLE_META), | ||
582 | * else it is printed as-is (except for ch = 0x9b) */ | ||
583 | enum { PRINTABLE_META = 0x100 }; | ||
584 | void fputc_printable(int ch, FILE *file) FAST_FUNC; | ||
585 | // gcc-4.1.1 still isn't good enough at optimizing it | 580 | // gcc-4.1.1 still isn't good enough at optimizing it |
586 | // (+200 bytes compared to macro) | 581 | // (+200 bytes compared to macro) |
587 | //static ALWAYS_INLINE | 582 | //static ALWAYS_INLINE |
@@ -594,6 +589,20 @@ void fputc_printable(int ch, FILE *file) FAST_FUNC; | |||
594 | #define NOT_LONE_CHAR(s,c) ((s)[0] != (c) || (s)[1]) | 589 | #define NOT_LONE_CHAR(s,c) ((s)[0] != (c) || (s)[1]) |
595 | #define DOT_OR_DOTDOT(s) ((s)[0] == '.' && (!(s)[1] || ((s)[1] == '.' && !(s)[2]))) | 590 | #define DOT_OR_DOTDOT(s) ((s)[0] == '.' && (!(s)[1] || ((s)[1] == '.' && !(s)[2]))) |
596 | 591 | ||
592 | typedef struct uni_stat_t { | ||
593 | unsigned byte_count; | ||
594 | unsigned unicode_count; | ||
595 | unsigned unicode_width; | ||
596 | } uni_stat_t; | ||
597 | /* Returns a string with unprintable chars replaced by '?' or | ||
598 | * SUBST_WCHAR. This function is unicode-aware. */ | ||
599 | const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str); | ||
600 | /* Prints unprintable char ch as ^C or M-c to file | ||
601 | * (M-c is used only if ch is ORed with PRINTABLE_META), | ||
602 | * else it is printed as-is (except for ch = 0x9b) */ | ||
603 | enum { PRINTABLE_META = 0x100 }; | ||
604 | void fputc_printable(int ch, FILE *file) FAST_FUNC; | ||
605 | |||
597 | /* dmalloc will redefine these to it's own implementation. It is safe | 606 | /* dmalloc will redefine these to it's own implementation. It is safe |
598 | * to have the prototypes here unconditionally. */ | 607 | * to have the prototypes here unconditionally. */ |
599 | void *malloc_or_warn(size_t size) FAST_FUNC RETURNS_MALLOC; | 608 | void *malloc_or_warn(size_t size) FAST_FUNC RETURNS_MALLOC; |
diff --git a/include/unicode.h b/include/unicode.h index f32e56599..25ef7407e 100644 --- a/include/unicode.h +++ b/include/unicode.h | |||
@@ -23,11 +23,6 @@ size_t FAST_FUNC unicode_strlen(const char *string); | |||
23 | enum { | 23 | enum { |
24 | UNI_FLAG_PAD = (1 << 0), | 24 | UNI_FLAG_PAD = (1 << 0), |
25 | }; | 25 | }; |
26 | typedef struct uni_stat_t { | ||
27 | unsigned byte_count; | ||
28 | unsigned unicode_count; | ||
29 | unsigned unicode_width; | ||
30 | } uni_stat_t; | ||
31 | //UNUSED: unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src); | 26 | //UNUSED: unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src); |
32 | //UNUSED: char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags); | 27 | //UNUSED: char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags); |
33 | char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src); | 28 | char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src); |
diff --git a/libbb/Kbuild b/libbb/Kbuild index 243626d67..7e793109e 100644 --- a/libbb/Kbuild +++ b/libbb/Kbuild | |||
@@ -73,6 +73,7 @@ lib-y += perror_nomsg_and_die.o | |||
73 | lib-y += pidfile.o | 73 | lib-y += pidfile.o |
74 | lib-y += platform.o | 74 | lib-y += platform.o |
75 | lib-y += printable.o | 75 | lib-y += printable.o |
76 | lib-y += printable_string.o | ||
76 | lib-y += print_flags.o | 77 | lib-y += print_flags.o |
77 | lib-y += process_escape_sequence.o | 78 | lib-y += process_escape_sequence.o |
78 | lib-y += procps.o | 79 | lib-y += procps.o |
diff --git a/libbb/printable_string.c b/libbb/printable_string.c new file mode 100644 index 000000000..47565de0d --- /dev/null +++ b/libbb/printable_string.c | |||
@@ -0,0 +1,65 @@ | |||
1 | /* vi: set sw=4 ts=4: */ | ||
2 | /* | ||
3 | * Unicode support routines. | ||
4 | * | ||
5 | * Copyright (C) 2010 Denys Vlasenko | ||
6 | * | ||
7 | * Licensed under GPL version 2, see file LICENSE in this tarball for details. | ||
8 | */ | ||
9 | #include "libbb.h" | ||
10 | #include "unicode.h" | ||
11 | |||
12 | const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str) | ||
13 | { | ||
14 | static char *saved[4]; | ||
15 | static unsigned cur_saved; /* = 0 */ | ||
16 | |||
17 | char *dst; | ||
18 | const char *s; | ||
19 | |||
20 | s = str; | ||
21 | while (1) { | ||
22 | unsigned char c = *s; | ||
23 | if (c == '\0') { | ||
24 | /* 99+% of inputs do not need conversion */ | ||
25 | if (stats) { | ||
26 | stats->byte_count = (s - str); | ||
27 | stats->unicode_count = (s - str); | ||
28 | stats->unicode_width = (s - str); | ||
29 | } | ||
30 | return str; | ||
31 | } | ||
32 | if (c < ' ') | ||
33 | break; | ||
34 | if (c >= 0x7f) | ||
35 | break; | ||
36 | s++; | ||
37 | } | ||
38 | |||
39 | #if ENABLE_FEATURE_ASSUME_UNICODE | ||
40 | dst = unicode_conv_to_printable(stats, str); | ||
41 | #else | ||
42 | { | ||
43 | char *d = dst = xstrdup(str); | ||
44 | while (1) { | ||
45 | unsigned char c = *d; | ||
46 | if (c == '\0') | ||
47 | break; | ||
48 | if (c < ' ' || c >= 0x7f) | ||
49 | *d = '?'; | ||
50 | d++; | ||
51 | } | ||
52 | if (stats) { | ||
53 | stats->byte_count = (d - dst); | ||
54 | stats->unicode_count = (d - dst); | ||
55 | stats->unicode_width = (d - dst); | ||
56 | } | ||
57 | } | ||
58 | #endif | ||
59 | |||
60 | free(saved[cur_saved]); | ||
61 | saved[cur_saved] = dst; | ||
62 | cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1); | ||
63 | |||
64 | return dst; | ||
65 | } | ||
diff --git a/testsuite/ls.mk_uni_tests b/testsuite/ls.mk_uni_tests new file mode 100644 index 000000000..da0c29f29 --- /dev/null +++ b/testsuite/ls.mk_uni_tests | |||
@@ -0,0 +1,111 @@ | |||
1 | # DO NOT EDIT THIS FILE! MOST TEXT EDITORS WILL DAMAGE IT! | ||
2 | >'0001_1__Some_correct_UTF-8_text___________________________________________|' | ||
3 | >'0002_2__Boundary_condition_test_cases_____________________________________|' | ||
4 | >'0003_2.1__First_possible_sequence_of_a_certain_length_____________________|' | ||
5 | >'0004_2.1.2__2_bytes__U-00000080_:________""______________________________|' | ||
6 | >'0005_2.1.3__3_bytes__U-00000800_:________"ࠀ"______________________________|' | ||
7 | >'0006_2.1.4__4_bytes__U-00010000_:________"𐀀"______________________________|' | ||
8 | >'0007_2.1.5__5_bytes__U-00200000_:________""______________________________|' | ||
9 | >'0008_2.1.6__6_bytes__U-04000000_:________""______________________________|' | ||
10 | >'0009_2.2__Last_possible_sequence_of_a_certain_length______________________|' | ||
11 | >'0010_2.2.1__1_byte___U-0000007F_:________""______________________________|' | ||
12 | >'0011_2.2.2__2_bytes__U-000007FF_:________"߿"______________________________|' | ||
13 | >'0012_2.2.3__3_bytes__U-0000FFFF_:________""______________________________|' | ||
14 | >'0013_2.2.4__4_bytes__U-001FFFFF_:________""______________________________|' | ||
15 | >'0014_2.2.5__5_bytes__U-03FFFFFF_:________""______________________________|' | ||
16 | >'0015_2.2.6__6_bytes__U-7FFFFFFF_:________""______________________________|' | ||
17 | >'0016_2.3__Other_boundary_conditions_______________________________________|' | ||
18 | >'0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_""___________________________________|' | ||
19 | >'0018_2.3.2__U-0000E000_=_ee_80_80_=_""___________________________________|' | ||
20 | >'0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"�"___________________________________|' | ||
21 | >'0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_""________________________________|' | ||
22 | >'0021_2.3.5__U-00110000_=_f4_90_80_80_=_""________________________________|' | ||
23 | >'0022_3__Malformed_sequences_______________________________________________|' | ||
24 | >'0023_3.1__Unexpected_continuation_bytes___________________________________|' | ||
25 | >'0024_3.1.1__First_continuation_byte_0x80:_""_____________________________|' | ||
26 | >'0025_3.1.2__Last__continuation_byte_0xbf:_""_____________________________|' | ||
27 | >'0026_3.1.3__2_continuation_bytes:_""____________________________________|' | ||
28 | >'0027_3.1.4__3_continuation_bytes:_""___________________________________|' | ||
29 | >'0028_3.1.5__4_continuation_bytes:_""__________________________________|' | ||
30 | >'0029_3.1.6__5_continuation_bytes:_""_________________________________|' | ||
31 | >'0030_3.1.7__6_continuation_bytes:_""________________________________|' | ||
32 | >'0031_3.1.8__7_continuation_bytes:_""_______________________________|' | ||
33 | >'0032_3.1.9__Sequence_of_all_64_possible_continuation_bytes__0x80-0xbf_:___|' | ||
34 | >'0033____"_________________________________________________|' | ||
35 | >'0034______________________________________________________|' | ||
36 | >'0035______________________________________________________|' | ||
37 | >'0036_____"________________________________________________|' | ||
38 | >'0037_3.2__Lonely_start_characters_________________________________________|' | ||
39 | >'0038_3.2.1__All_32_first_bytes_of_2-byte_sequences__0xc0-0xdf_,___________|' | ||
40 | >'0039________each_followed_by_a_space_character:___________________________|' | ||
41 | >'0040____"_________________________________________________|' | ||
42 | >'0041_____________________"________________________________|' | ||
43 | >'0042_3.2.2__All_16_first_bytes_of_3-byte_sequences__0xe0-0xef_,___________|' | ||
44 | >'0043________each_followed_by_a_space_character:___________________________|' | ||
45 | >'0044____"________________"________________________________|' | ||
46 | >'0045_3.2.3__All_8_first_bytes_of_4-byte_sequences__0xf0-0xf7_,____________|' | ||
47 | >'0046________each_followed_by_a_space_character:___________________________|' | ||
48 | >'0047____"________"________________________________________________|' | ||
49 | >'0048_3.2.4__All_4_first_bytes_of_5-byte_sequences__0xf8-0xfb_,____________|' | ||
50 | >'0049________each_followed_by_a_space_character:___________________________|' | ||
51 | >'0050____"____"________________________________________________________|' | ||
52 | >'0051_3.2.5__All_2_first_bytes_of_6-byte_sequences__0xfc-0xfd_,____________|' | ||
53 | >'0052________each_followed_by_a_space_character:___________________________|' | ||
54 | >'0053____"__"____________________________________________________________|' | ||
55 | >'0054_3.3__Sequences_with_last_continuation_byte_missing___________________|' | ||
56 | >'0055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____""______|' | ||
57 | >'0056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____""______|' | ||
58 | >'0057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____""______|' | ||
59 | >'0058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____""______|' | ||
60 | >'0059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____""______|' | ||
61 | >'0060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_""______|' | ||
62 | >'0061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_""______|' | ||
63 | >'0062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_""______|' | ||
64 | >'0063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_""______|' | ||
65 | >'0064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_""______|' | ||
66 | >'0065_3.4__Concatenation_of_incomplete_sequences___________________________|' | ||
67 | >'0066____""______________________________________________________|' | ||
68 | >'0067_3.5__Impossible_bytes________________________________________________|' | ||
69 | >'0068_3.5.1__fe_=_""______________________________________________________|' | ||
70 | >'0069_3.5.2__ff_=_""______________________________________________________|' | ||
71 | >'0070_3.5.3__fe_fe_ff_ff_=_""__________________________________________|' | ||
72 | >'0071_4__Overlong_sequences________________________________________________|' | ||
73 | >'0072_4.1__Examples_of_an_overlong_ASCII_character_________________________|' | ||
74 | >'0073_4.1.1_U+002F_=_c0_af_____________=_""_______________________________|' | ||
75 | >'0074_4.1.2_U+002F_=_e0_80_af__________=_""_______________________________|' | ||
76 | >'0075_4.1.3_U+002F_=_f0_80_80_af_______=_""_______________________________|' | ||
77 | >'0076_4.1.4_U+002F_=_f8_80_80_80_af____=_""_______________________________|' | ||
78 | >'0077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_""_______________________________|' | ||
79 | >'0078_4.2__Maximum_overlong_sequences______________________________________|' | ||
80 | >'0079_4.2.1__U-0000007F_=_c1_bf_____________=_""__________________________|' | ||
81 | >'0080_4.2.2__U-000007FF_=_e0_9f_bf__________=_""__________________________|' | ||
82 | >'0081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_""__________________________|' | ||
83 | >'0082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_""__________________________|' | ||
84 | >'0083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_""__________________________|' | ||
85 | >'0084_4.3__Overlong_representation_of_the_NUL_character____________________|' | ||
86 | >'0085_4.3.1__U+0000_=_c0_80_____________=_""______________________________|' | ||
87 | >'0086_4.3.2__U+0000_=_e0_80_80__________=_""______________________________|' | ||
88 | >'0087_4.3.3__U+0000_=_f0_80_80_80_______=_""______________________________|' | ||
89 | >'0088_4.3.4__U+0000_=_f8_80_80_80_80____=_""______________________________|' | ||
90 | >'0089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_""______________________________|' | ||
91 | >'0090_5__Illegal_code_positions____________________________________________|' | ||
92 | >'0091_5.1_Single_UTF-16_surrogates_________________________________________|' | ||
93 | >'0092_5.1.1__U+D800_=_ed_a0_80_=_""_______________________________________|' | ||
94 | >'0093_5.1.2__U+DB7F_=_ed_ad_bf_=_""_______________________________________|' | ||
95 | >'0094_5.1.3__U+DB80_=_ed_ae_80_=_""_______________________________________|' | ||
96 | >'0095_5.1.4__U+DBFF_=_ed_af_bf_=_""_______________________________________|' | ||
97 | >'0096_5.1.5__U+DC00_=_ed_b0_80_=_""_______________________________________|' | ||
98 | >'0097_5.1.6__U+DF80_=_ed_be_80_=_""_______________________________________|' | ||
99 | >'0098_5.1.7__U+DFFF_=_ed_bf_bf_=_""_______________________________________|' | ||
100 | >'0099_5.2_Paired_UTF-16_surrogates_________________________________________|' | ||
101 | >'0100_5.2.1__U+D800_U+DC00_=_ed_a0_80_ed_b0_80_=_""______________________|' | ||
102 | >'0101_5.2.2__U+D800_U+DFFF_=_ed_a0_80_ed_bf_bf_=_""______________________|' | ||
103 | >'0102_5.2.3__U+DB7F_U+DC00_=_ed_ad_bf_ed_b0_80_=_""______________________|' | ||
104 | >'0103_5.2.4__U+DB7F_U+DFFF_=_ed_ad_bf_ed_bf_bf_=_""______________________|' | ||
105 | >'0104_5.2.5__U+DB80_U+DC00_=_ed_ae_80_ed_b0_80_=_""______________________|' | ||
106 | >'0105_5.2.6__U+DB80_U+DFFF_=_ed_ae_80_ed_bf_bf_=_""______________________|' | ||
107 | >'0106_5.2.7__U+DBFF_U+DC00_=_ed_af_bf_ed_b0_80_=_""______________________|' | ||
108 | >'0107_5.2.8__U+DBFF_U+DFFF_=_ed_af_bf_ed_bf_bf_=_""______________________|' | ||
109 | >'0108_5.3_Other_illegal_code_positions_____________________________________|' | ||
110 | >'0109_5.3.1__U+FFFE_=_ef_bf_be_=_""_______________________________________|' | ||
111 | >'0110_5.3.2__U+FFFF_=_ef_bf_bf_=_""_______________________________________|' | ||
diff --git a/testsuite/ls.tests b/testsuite/ls.tests new file mode 100755 index 000000000..b0c5da7f9 --- /dev/null +++ b/testsuite/ls.tests | |||
@@ -0,0 +1,136 @@ | |||
1 | #!/bin/sh | ||
2 | # Copyright 2010 by Denys Vlasenko | ||
3 | # Licensed under GPL v2, see file LICENSE for details. | ||
4 | |||
5 | . ./testing.sh | ||
6 | |||
7 | test -f "$bindir/.config" && . "$bindir/.config" | ||
8 | |||
9 | rm -rf ls.testdir >/dev/null | ||
10 | mkdir ls.testdir || exit 1 | ||
11 | |||
12 | # testing "test name" "command" "expected result" "file input" "stdin" | ||
13 | |||
14 | # The test isn't passing correctly now - all | chars should line up | ||
15 | # perfectly in the correctly passed test. | ||
16 | test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \ | ||
17 | && test x"$CONFIG_SUBST_WCHAR" = x"63" \ | ||
18 | && test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"767" \ | ||
19 | && testing "ls unicode test" \ | ||
20 | "(cd ls.testdir && sh ../ls.mk_uni_tests) && ls -1 ls.testdir" \ | ||
21 | '0001_1__Some_correct_UTF-8_text___________________________________________| | ||
22 | 0002_2__Boundary_condition_test_cases_____________________________________| | ||
23 | 0003_2.1__First_possible_sequence_of_a_certain_length_____________________| | ||
24 | 0004_2.1.2__2_bytes__U-00000080_:________"?"______________________________| | ||
25 | 0005_2.1.3__3_bytes__U-00000800_:________"?"______________________________| | ||
26 | 0006_2.1.4__4_bytes__U-00010000_:________"?"______________________________| | ||
27 | 0007_2.1.5__5_bytes__U-00200000_:________"?"______________________________| | ||
28 | 0008_2.1.6__6_bytes__U-04000000_:________"?"______________________________| | ||
29 | 0009_2.2__Last_possible_sequence_of_a_certain_length______________________| | ||
30 | 0010_2.2.1__1_byte___U-0000007F_:________"?"______________________________| | ||
31 | 0011_2.2.2__2_bytes__U-000007FF_:________"?"______________________________| | ||
32 | 0012_2.2.3__3_bytes__U-0000FFFF_:________"?"______________________________| | ||
33 | 0013_2.2.4__4_bytes__U-001FFFFF_:________"?"______________________________| | ||
34 | 0014_2.2.5__5_bytes__U-03FFFFFF_:________"?"______________________________| | ||
35 | 0015_2.2.6__6_bytes__U-7FFFFFFF_:________"?"______________________________| | ||
36 | 0016_2.3__Other_boundary_conditions_______________________________________| | ||
37 | 0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_"?"___________________________________| | ||
38 | 0018_2.3.2__U-0000E000_=_ee_80_80_=_"?"___________________________________| | ||
39 | 0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"?"___________________________________| | ||
40 | 0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"?"________________________________| | ||
41 | 0021_2.3.5__U-00110000_=_f4_90_80_80_=_"?"________________________________| | ||
42 | 0022_3__Malformed_sequences_______________________________________________| | ||
43 | 0023_3.1__Unexpected_continuation_bytes___________________________________| | ||
44 | 0024_3.1.1__First_continuation_byte_0x80:_"?"_____________________________| | ||
45 | 0025_3.1.2__Last__continuation_byte_0xbf:_"?"_____________________________| | ||
46 | 0026_3.1.3__2_continuation_bytes:_"??"____________________________________| | ||
47 | 0027_3.1.4__3_continuation_bytes:_"???"___________________________________| | ||
48 | 0028_3.1.5__4_continuation_bytes:_"????"__________________________________| | ||
49 | 0029_3.1.6__5_continuation_bytes:_"?????"_________________________________| | ||
50 | 0030_3.1.7__6_continuation_bytes:_"??????"________________________________| | ||
51 | 0031_3.1.8__7_continuation_bytes:_"???????"_______________________________| | ||
52 | 0032_3.1.9__Sequence_of_all_64_possible_continuation_bytes__0x80-0xbf_:___| | ||
53 | 0033____"????????????????_________________________________________________| | ||
54 | 0034_____????????????????_________________________________________________| | ||
55 | 0035_____????????????????_________________________________________________| | ||
56 | 0036_____????????????????"________________________________________________| | ||
57 | 0037_3.2__Lonely_start_characters_________________________________________| | ||
58 | 0038_3.2.1__All_32_first_bytes_of_2-byte_sequences__0xc0-0xdf_,___________| | ||
59 | 0039________each_followed_by_a_space_character:___________________________| | ||
60 | 0040____"?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?__________________________________| | ||
61 | 0041_____?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_"________________________________| | ||
62 | 0042_3.2.2__All_16_first_bytes_of_3-byte_sequences__0xe0-0xef_,___________| | ||
63 | 0043________each_followed_by_a_space_character:___________________________| | ||
64 | 0044____"?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_"________________________________| | ||
65 | 0045_3.2.3__All_8_first_bytes_of_4-byte_sequences__0xf0-0xf7_,____________| | ||
66 | 0046________each_followed_by_a_space_character:___________________________| | ||
67 | 0047____"?_?_?_?_?_?_?_?_"________________________________________________| | ||
68 | 0048_3.2.4__All_4_first_bytes_of_5-byte_sequences__0xf8-0xfb_,____________| | ||
69 | 0049________each_followed_by_a_space_character:___________________________| | ||
70 | 0050____"?_?_?_?_"________________________________________________________| | ||
71 | 0051_3.2.5__All_2_first_bytes_of_6-byte_sequences__0xfc-0xfd_,____________| | ||
72 | 0052________each_followed_by_a_space_character:___________________________| | ||
73 | 0053____"?_?_"____________________________________________________________| | ||
74 | 0054_3.3__Sequences_with_last_continuation_byte_missing___________________| | ||
75 | 0055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____"?"______| | ||
76 | 0056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____"??"______| | ||
77 | 0057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____"???"______| | ||
78 | 0058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____"????"______| | ||
79 | 0059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____"?????"______| | ||
80 | 0060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_"?"______| | ||
81 | 0061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_"??"______| | ||
82 | 0062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_"???"______| | ||
83 | 0063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_"????"______| | ||
84 | 0064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_"?????"______| | ||
85 | 0065_3.4__Concatenation_of_incomplete_sequences___________________________| | ||
86 | 0066____"??????????????????????????????"______________________________________________________| | ||
87 | 0067_3.5__Impossible_bytes________________________________________________| | ||
88 | 0068_3.5.1__fe_=_"?"______________________________________________________| | ||
89 | 0069_3.5.2__ff_=_"?"______________________________________________________| | ||
90 | 0070_3.5.3__fe_fe_ff_ff_=_"????"__________________________________________| | ||
91 | 0071_4__Overlong_sequences________________________________________________| | ||
92 | 0072_4.1__Examples_of_an_overlong_ASCII_character_________________________| | ||
93 | 0073_4.1.1_U+002F_=_c0_af_____________=_"??"_______________________________| | ||
94 | 0074_4.1.2_U+002F_=_e0_80_af__________=_"???"_______________________________| | ||
95 | 0075_4.1.3_U+002F_=_f0_80_80_af_______=_"????"_______________________________| | ||
96 | 0076_4.1.4_U+002F_=_f8_80_80_80_af____=_"?????"_______________________________| | ||
97 | 0077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_"??????"_______________________________| | ||
98 | 0078_4.2__Maximum_overlong_sequences______________________________________| | ||
99 | 0079_4.2.1__U-0000007F_=_c1_bf_____________=_"??"__________________________| | ||
100 | 0080_4.2.2__U-000007FF_=_e0_9f_bf__________=_"?"__________________________| | ||
101 | 0081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_"?"__________________________| | ||
102 | 0082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_"?"__________________________| | ||
103 | 0083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_"?"__________________________| | ||
104 | 0084_4.3__Overlong_representation_of_the_NUL_character____________________| | ||
105 | 0085_4.3.1__U+0000_=_c0_80_____________=_"??"______________________________| | ||
106 | 0086_4.3.2__U+0000_=_e0_80_80__________=_"???"______________________________| | ||
107 | 0087_4.3.3__U+0000_=_f0_80_80_80_______=_"????"______________________________| | ||
108 | 0088_4.3.4__U+0000_=_f8_80_80_80_80____=_"?????"______________________________| | ||
109 | 0089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_"??????"______________________________| | ||
110 | 0090_5__Illegal_code_positions____________________________________________| | ||
111 | 0091_5.1_Single_UTF-16_surrogates_________________________________________| | ||
112 | 0092_5.1.1__U+D800_=_ed_a0_80_=_"?"_______________________________________| | ||
113 | 0093_5.1.2__U+DB7F_=_ed_ad_bf_=_"?"_______________________________________| | ||
114 | 0094_5.1.3__U+DB80_=_ed_ae_80_=_"?"_______________________________________| | ||
115 | 0095_5.1.4__U+DBFF_=_ed_af_bf_=_"?"_______________________________________| | ||
116 | 0096_5.1.5__U+DC00_=_ed_b0_80_=_"?"_______________________________________| | ||
117 | 0097_5.1.6__U+DF80_=_ed_be_80_=_"?"_______________________________________| | ||
118 | 0098_5.1.7__U+DFFF_=_ed_bf_bf_=_"?"_______________________________________| | ||
119 | 0099_5.2_Paired_UTF-16_surrogates_________________________________________| | ||
120 | 0100_5.2.1__U+D800_U+DC00_=_ed_a0_80_ed_b0_80_=_"??"______________________| | ||
121 | 0101_5.2.2__U+D800_U+DFFF_=_ed_a0_80_ed_bf_bf_=_"??"______________________| | ||
122 | 0102_5.2.3__U+DB7F_U+DC00_=_ed_ad_bf_ed_b0_80_=_"??"______________________| | ||
123 | 0103_5.2.4__U+DB7F_U+DFFF_=_ed_ad_bf_ed_bf_bf_=_"??"______________________| | ||
124 | 0104_5.2.5__U+DB80_U+DC00_=_ed_ae_80_ed_b0_80_=_"??"______________________| | ||
125 | 0105_5.2.6__U+DB80_U+DFFF_=_ed_ae_80_ed_bf_bf_=_"??"______________________| | ||
126 | 0106_5.2.7__U+DBFF_U+DC00_=_ed_af_bf_ed_b0_80_=_"??"______________________| | ||
127 | 0107_5.2.8__U+DBFF_U+DFFF_=_ed_af_bf_ed_bf_bf_=_"??"______________________| | ||
128 | 0108_5.3_Other_illegal_code_positions_____________________________________| | ||
129 | 0109_5.3.1__U+FFFE_=_ef_bf_be_=_"?"_______________________________________| | ||
130 | 0110_5.3.2__U+FFFF_=_ef_bf_bf_=_"?"_______________________________________| | ||
131 | ' "" "" | ||
132 | |||
133 | # Clean up | ||
134 | rm -rf ls.testdir 2>/dev/null | ||
135 | |||
136 | exit $FAILCOUNT | ||