aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2010-01-31 05:15:38 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2010-01-31 05:15:38 +0100
commitd8528b8e56bab7643722e4453121882d23c23c07 (patch)
treec742df066326cd571327b10d4cca3341c798d129
parented910c750d7908a31262488e04d38b7bf3d75322 (diff)
downloadbusybox-w32-d8528b8e56bab7643722e4453121882d23c23c07.tar.gz
busybox-w32-d8528b8e56bab7643722e4453121882d23c23c07.tar.bz2
busybox-w32-d8528b8e56bab7643722e4453121882d23c23c07.zip
ls: unicode fixes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--TODO_unicode2
-rw-r--r--coreutils/ls.c412
-rw-r--r--include/libbb.h19
-rw-r--r--include/unicode.h5
-rw-r--r--libbb/Kbuild1
-rw-r--r--libbb/printable_string.c65
-rw-r--r--testsuite/ls.mk_uni_tests111
-rwxr-xr-xtestsuite/ls.tests136
8 files changed, 545 insertions, 206 deletions
diff --git a/TODO_unicode b/TODO_unicode
index c29fd933b..b310e8d4d 100644
--- a/TODO_unicode
+++ b/TODO_unicode
@@ -7,7 +7,7 @@ dumpleases
7Applets which may need unicode handling (more extensive than sanitizing 7Applets which may need unicode handling (more extensive than sanitizing
8of filenames in error messages): 8of filenames in error messages):
9 9
10ls - uses unicode_strlen, not scrlen 10ls - work in progress
11expand, unexpand - uses unicode_strlen, not scrlen 11expand, unexpand - uses unicode_strlen, not scrlen
12ash, hush through lineedit - uses unicode_strlen, not scrlen 12ash, hush through lineedit - uses unicode_strlen, not scrlen
13top - need to sanitize process args 13top - need to sanitize process args
diff --git a/coreutils/ls.c b/coreutils/ls.c
index 6c898b793..d004ce8b1 100644
--- a/coreutils/ls.c
+++ b/coreutils/ls.c
@@ -241,9 +241,6 @@ struct dnode {
241 IF_SELINUX(security_context_t sid;) 241 IF_SELINUX(security_context_t sid;)
242}; 242};
243 243
244static struct dnode **list_dir(const char *, unsigned *);
245static unsigned list_single(const struct dnode *);
246
247struct globals { 244struct globals {
248#if ENABLE_FEATURE_LS_COLOR 245#if ENABLE_FEATURE_LS_COLOR
249 smallint show_color; 246 smallint show_color;
@@ -528,31 +525,236 @@ static void dnsort(struct dnode **dn, int size)
528#endif 525#endif
529 526
530 527
531static void showfiles(struct dnode **dn, unsigned nfiles) 528static unsigned calc_name_len(const char *name)
529{
530 unsigned len;
531 uni_stat_t uni_stat;
532
533 // TODO: quote tab as \t, etc, if -Q
534 name = printable_string(&uni_stat, name);
535
536 if (!(option_mask32 & OPT_Q)) {
537 return uni_stat.unicode_width;
538 }
539
540 len = 2 + uni_stat.unicode_width;
541 while (*name) {
542 if (*name == '"' || *name == '\\') {
543 len++;
544 }
545 name++;
546 }
547 return len;
548}
549
550
551/* Return the number of used columns.
552 * Note that only STYLE_COLUMNS uses return value.
553 * STYLE_SINGLE and STYLE_LONG don't care.
554 * coreutils 7.2 also supports:
555 * ls -b (--escape) = octal escapes (although it doesn't look like working)
556 * ls -N (--literal) = not escape at all
557 */
558static unsigned print_name(const char *name)
559{
560 unsigned len;
561 uni_stat_t uni_stat;
562
563 // TODO: quote tab as \t, etc, if -Q
564 name = printable_string(&uni_stat, name);
565
566 if (!(option_mask32 & OPT_Q)) {
567 fputs(name, stdout);
568 return uni_stat.unicode_width;
569 }
570
571 len = 2 + uni_stat.unicode_width;
572 putchar('"');
573 while (*name) {
574 if (*name == '"' || *name == '\\') {
575 putchar('\\');
576 len++;
577 }
578 putchar(*name++);
579 }
580 putchar('"');
581 return len;
582}
583
584/* Return the number of used columns.
585 * Note that only STYLE_COLUMNS uses return value,
586 * STYLE_SINGLE and STYLE_LONG don't care.
587 */
588static NOINLINE unsigned list_single(const struct dnode *dn)
532{ 589{
533 unsigned i, ncols, nrows, row, nc;
534 unsigned column = 0; 590 unsigned column = 0;
535 unsigned nexttab = 0; 591 char *lpath = lpath; /* for compiler */
536 unsigned column_width = 0; /* for STYLE_LONG and STYLE_SINGLE not used */ 592#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR
593 struct stat info;
594 char append;
595#endif
537 596
538 /* Never happens: 597 /* Never happens:
539 if (dn == NULL || nfiles < 1) 598 if (dn->fullname == NULL)
540 return; 599 return 0;
541 */ 600 */
542 601
543 if (all_fmt & STYLE_LONG) { 602#if ENABLE_FEATURE_LS_FILETYPES
603 append = append_char(dn->dstat.st_mode);
604#endif
605
606 /* Do readlink early, so that if it fails, error message
607 * does not appear *inside* the "ls -l" line */
608 if (all_fmt & LIST_SYMLINK)
609 if (S_ISLNK(dn->dstat.st_mode))
610 lpath = xmalloc_readlink_or_warn(dn->fullname);
611
612 if (all_fmt & LIST_INO)
613 column += printf("%7llu ", (long long) dn->dstat.st_ino);
614 if (all_fmt & LIST_BLOCKS)
615 column += printf("%4"OFF_FMT"u ", (off_t) (dn->dstat.st_blocks >> 1));
616 if (all_fmt & LIST_MODEBITS)
617 column += printf("%-10s ", (char *) bb_mode_string(dn->dstat.st_mode));
618 if (all_fmt & LIST_NLINKS)
619 column += printf("%4lu ", (long) dn->dstat.st_nlink);
620#if ENABLE_FEATURE_LS_USERNAME
621 if (all_fmt & LIST_ID_NAME) {
622 if (option_mask32 & OPT_g) {
623 column += printf("%-8.8s ",
624 get_cached_username(dn->dstat.st_uid));
625 } else {
626 column += printf("%-8.8s %-8.8s ",
627 get_cached_username(dn->dstat.st_uid),
628 get_cached_groupname(dn->dstat.st_gid));
629 }
630 }
631#endif
632 if (all_fmt & LIST_ID_NUMERIC) {
633 if (option_mask32 & OPT_g)
634 column += printf("%-8u ", (int) dn->dstat.st_uid);
635 else
636 column += printf("%-8u %-8u ",
637 (int) dn->dstat.st_uid,
638 (int) dn->dstat.st_gid);
639 }
640 if (all_fmt & (LIST_SIZE /*|LIST_DEV*/ )) {
641 if (S_ISBLK(dn->dstat.st_mode) || S_ISCHR(dn->dstat.st_mode)) {
642 column += printf("%4u, %3u ",
643 (int) major(dn->dstat.st_rdev),
644 (int) minor(dn->dstat.st_rdev));
645 } else {
646 if (all_fmt & LS_DISP_HR) {
647 column += printf("%"HUMAN_READABLE_MAX_WIDTH_STR"s ",
648 /* print st_size, show one fractional, use suffixes */
649 make_human_readable_str(dn->dstat.st_size, 1, 0)
650 );
651 } else {
652 column += printf("%9"OFF_FMT"u ", (off_t) dn->dstat.st_size);
653 }
654 }
655 }
656#if ENABLE_FEATURE_LS_TIMESTAMPS
657 if (all_fmt & (LIST_FULLTIME|LIST_DATE_TIME)) {
658 char *filetime;
659 time_t ttime = dn->dstat.st_mtime;
660 if (all_fmt & TIME_ACCESS)
661 ttime = dn->dstat.st_atime;
662 if (all_fmt & TIME_CHANGE)
663 ttime = dn->dstat.st_ctime;
664 filetime = ctime(&ttime);
665 /* filetime's format: "Wed Jun 30 21:49:08 1993\n" */
666 if (all_fmt & LIST_FULLTIME)
667 column += printf("%.24s ", filetime);
668 else { /* LIST_DATE_TIME */
669 /* current_time_t ~== time(NULL) */
670 time_t age = current_time_t - ttime;
671 printf("%.6s ", filetime + 4); /* "Jun 30" */
672 if (age < 3600L * 24 * 365 / 2 && age > -15 * 60) {
673 /* hh:mm if less than 6 months old */
674 printf("%.5s ", filetime + 11);
675 } else { /* year. buggy if year > 9999 ;) */
676 printf(" %.4s ", filetime + 20);
677 }
678 column += 13;
679 }
680 }
681#endif
682#if ENABLE_SELINUX
683 if (all_fmt & LIST_CONTEXT) {
684 column += printf("%-32s ", dn->sid ? dn->sid : "unknown");
685 freecon(dn->sid);
686 }
687#endif
688 if (all_fmt & LIST_FILENAME) {
689#if ENABLE_FEATURE_LS_COLOR
690 if (show_color) {
691 info.st_mode = 0; /* for fgcolor() */
692 lstat(dn->fullname, &info);
693 printf("\033[%u;%um", bold(info.st_mode),
694 fgcolor(info.st_mode));
695 }
696#endif
697 column += print_name(dn->name);
698 if (show_color) {
699 printf("\033[0m");
700 }
701 }
702 if (all_fmt & LIST_SYMLINK) {
703 if (S_ISLNK(dn->dstat.st_mode) && lpath) {
704 printf(" -> ");
705#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR
706#if ENABLE_FEATURE_LS_COLOR
707 info.st_mode = 0; /* for fgcolor() */
708#endif
709 if (stat(dn->fullname, &info) == 0) {
710 append = append_char(info.st_mode);
711 }
712#endif
713#if ENABLE_FEATURE_LS_COLOR
714 if (show_color) {
715 printf("\033[%u;%um", bold(info.st_mode),
716 fgcolor(info.st_mode));
717 }
718#endif
719 column += print_name(lpath) + 4;
720 if (show_color) {
721 printf("\033[0m");
722 }
723 free(lpath);
724 }
725 }
726#if ENABLE_FEATURE_LS_FILETYPES
727 if (all_fmt & LIST_FILETYPE) {
728 if (append) {
729 putchar(append);
730 column++;
731 }
732 }
733#endif
734
735 return column;
736}
737
738static void showfiles(struct dnode **dn, unsigned nfiles)
739{
740 unsigned i, ncols, nrows, row, nc;
741 unsigned column = 0;
742 unsigned nexttab = 0;
743 unsigned column_width = 0; /* used only by STYLE_COLUMNS */
744
745 if (all_fmt & STYLE_LONG) { /* STYLE_LONG or STYLE_SINGLE */
544 ncols = 1; 746 ncols = 1;
545 } else { 747 } else {
546 /* find the longest file name, use that as the column width */ 748 /* find the longest file name, use that as the column width */
547 for (i = 0; dn[i]; i++) { 749 for (i = 0; dn[i]; i++) {
548 int len = unicode_strlen(dn[i]->name); 750 int len = calc_name_len(dn[i]->name);
549 if (column_width < len) 751 if (column_width < len)
550 column_width = len; 752 column_width = len;
551 } 753 }
552 column_width += tabstops + 754 column_width += tabstops +
553 IF_SELINUX( ((all_fmt & LIST_CONTEXT) ? 33 : 0) + ) 755 IF_SELINUX( ((all_fmt & LIST_CONTEXT) ? 33 : 0) + )
554 ((all_fmt & LIST_INO) ? 8 : 0) + 756 ((all_fmt & LIST_INO) ? 8 : 0) +
555 ((all_fmt & LIST_BLOCKS) ? 5 : 0); 757 ((all_fmt & LIST_BLOCKS) ? 5 : 0);
556 ncols = (int) (terminal_width / column_width); 758 ncols = (int) (terminal_width / column_width);
557 } 759 }
558 760
@@ -618,6 +820,8 @@ static off_t calculate_blocks(struct dnode **dn)
618#endif 820#endif
619 821
620 822
823static struct dnode **list_dir(const char *, unsigned *);
824
621static void showdirs(struct dnode **dn, int first) 825static void showdirs(struct dnode **dn, int first)
622{ 826{
623 unsigned nfiles; 827 unsigned nfiles;
@@ -733,188 +937,6 @@ static struct dnode **list_dir(const char *path, unsigned *nfiles_p)
733} 937}
734 938
735 939
736static int print_name(const char *name)
737{
738 if (option_mask32 & OPT_Q) {
739#if ENABLE_FEATURE_ASSUME_UNICODE
740 unsigned len = 2 + unicode_strlen(name);
741#else
742 unsigned len = 2;
743#endif
744 putchar('"');
745 while (*name) {
746 if (*name == '"') {
747 putchar('\\');
748 len++;
749 }
750 putchar(*name++);
751 if (!ENABLE_FEATURE_ASSUME_UNICODE)
752 len++;
753 }
754 putchar('"');
755 return len;
756 }
757 /* No -Q: */
758#if ENABLE_FEATURE_ASSUME_UNICODE
759 fputs(name, stdout);
760 return unicode_strlen(name);
761#else
762 return printf("%s", name);
763#endif
764}
765
766
767static NOINLINE unsigned list_single(const struct dnode *dn)
768{
769 unsigned column = 0;
770 char *lpath = lpath; /* for compiler */
771#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR
772 struct stat info;
773 char append;
774#endif
775
776 /* Never happens:
777 if (dn->fullname == NULL)
778 return 0;
779 */
780
781#if ENABLE_FEATURE_LS_FILETYPES
782 append = append_char(dn->dstat.st_mode);
783#endif
784
785 /* Do readlink early, so that if it fails, error message
786 * does not appear *inside* the "ls -l" line */
787 if (all_fmt & LIST_SYMLINK)
788 if (S_ISLNK(dn->dstat.st_mode))
789 lpath = xmalloc_readlink_or_warn(dn->fullname);
790
791 if (all_fmt & LIST_INO)
792 column += printf("%7llu ", (long long) dn->dstat.st_ino);
793 if (all_fmt & LIST_BLOCKS)
794 column += printf("%4"OFF_FMT"u ", (off_t) (dn->dstat.st_blocks >> 1));
795 if (all_fmt & LIST_MODEBITS)
796 column += printf("%-10s ", (char *) bb_mode_string(dn->dstat.st_mode));
797 if (all_fmt & LIST_NLINKS)
798 column += printf("%4lu ", (long) dn->dstat.st_nlink);
799#if ENABLE_FEATURE_LS_USERNAME
800 if (all_fmt & LIST_ID_NAME) {
801 if (option_mask32 & OPT_g) {
802 column += printf("%-8.8s ",
803 get_cached_username(dn->dstat.st_uid));
804 } else {
805 column += printf("%-8.8s %-8.8s ",
806 get_cached_username(dn->dstat.st_uid),
807 get_cached_groupname(dn->dstat.st_gid));
808 }
809 }
810#endif
811 if (all_fmt & LIST_ID_NUMERIC) {
812 if (option_mask32 & OPT_g)
813 column += printf("%-8u ", (int) dn->dstat.st_uid);
814 else
815 column += printf("%-8u %-8u ",
816 (int) dn->dstat.st_uid,
817 (int) dn->dstat.st_gid);
818 }
819 if (all_fmt & (LIST_SIZE /*|LIST_DEV*/ )) {
820 if (S_ISBLK(dn->dstat.st_mode) || S_ISCHR(dn->dstat.st_mode)) {
821 column += printf("%4u, %3u ",
822 (int) major(dn->dstat.st_rdev),
823 (int) minor(dn->dstat.st_rdev));
824 } else {
825 if (all_fmt & LS_DISP_HR) {
826 column += printf("%"HUMAN_READABLE_MAX_WIDTH_STR"s ",
827 /* print st_size, show one fractional, use suffixes */
828 make_human_readable_str(dn->dstat.st_size, 1, 0)
829 );
830 } else {
831 column += printf("%9"OFF_FMT"u ", (off_t) dn->dstat.st_size);
832 }
833 }
834 }
835#if ENABLE_FEATURE_LS_TIMESTAMPS
836 if (all_fmt & (LIST_FULLTIME|LIST_DATE_TIME)) {
837 char *filetime;
838 time_t ttime = dn->dstat.st_mtime;
839 if (all_fmt & TIME_ACCESS)
840 ttime = dn->dstat.st_atime;
841 if (all_fmt & TIME_CHANGE)
842 ttime = dn->dstat.st_ctime;
843 filetime = ctime(&ttime);
844 /* filetime's format: "Wed Jun 30 21:49:08 1993\n" */
845 if (all_fmt & LIST_FULLTIME)
846 column += printf("%.24s ", filetime);
847 else { /* LIST_DATE_TIME */
848 /* current_time_t ~== time(NULL) */
849 time_t age = current_time_t - ttime;
850 printf("%.6s ", filetime + 4); /* "Jun 30" */
851 if (age < 3600L * 24 * 365 / 2 && age > -15 * 60) {
852 /* hh:mm if less than 6 months old */
853 printf("%.5s ", filetime + 11);
854 } else { /* year. buggy if year > 9999 ;) */
855 printf(" %.4s ", filetime + 20);
856 }
857 column += 13;
858 }
859 }
860#endif
861#if ENABLE_SELINUX
862 if (all_fmt & LIST_CONTEXT) {
863 column += printf("%-32s ", dn->sid ? dn->sid : "unknown");
864 freecon(dn->sid);
865 }
866#endif
867 if (all_fmt & LIST_FILENAME) {
868#if ENABLE_FEATURE_LS_COLOR
869 if (show_color) {
870 info.st_mode = 0; /* for fgcolor() */
871 lstat(dn->fullname, &info);
872 printf("\033[%u;%um", bold(info.st_mode),
873 fgcolor(info.st_mode));
874 }
875#endif
876 column += print_name(dn->name);
877 if (show_color) {
878 printf("\033[0m");
879 }
880 }
881 if (all_fmt & LIST_SYMLINK) {
882 if (S_ISLNK(dn->dstat.st_mode) && lpath) {
883 printf(" -> ");
884#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR
885#if ENABLE_FEATURE_LS_COLOR
886 info.st_mode = 0; /* for fgcolor() */
887#endif
888 if (stat(dn->fullname, &info) == 0) {
889 append = append_char(info.st_mode);
890 }
891#endif
892#if ENABLE_FEATURE_LS_COLOR
893 if (show_color) {
894 printf("\033[%u;%um", bold(info.st_mode),
895 fgcolor(info.st_mode));
896 }
897#endif
898 column += print_name(lpath) + 4;
899 if (show_color) {
900 printf("\033[0m");
901 }
902 free(lpath);
903 }
904 }
905#if ENABLE_FEATURE_LS_FILETYPES
906 if (all_fmt & LIST_FILETYPE) {
907 if (append) {
908 putchar(append);
909 column++;
910 }
911 }
912#endif
913
914 return column;
915}
916
917
918int ls_main(int argc UNUSED_PARAM, char **argv) 940int ls_main(int argc UNUSED_PARAM, char **argv)
919{ 941{
920 struct dnode **dnd; 942 struct dnode **dnd;
diff --git a/include/libbb.h b/include/libbb.h
index 73aea409e..a86d64400 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -577,11 +577,6 @@ char *strncpy_IFNAMSIZ(char *dst, const char *src) FAST_FUNC;
577 * But potentially slow, don't use in one-billion-times loops */ 577 * But potentially slow, don't use in one-billion-times loops */
578int bb_putchar(int ch) FAST_FUNC; 578int bb_putchar(int ch) FAST_FUNC;
579char *xasprintf(const char *format, ...) __attribute__ ((format(printf, 1, 2))) FAST_FUNC RETURNS_MALLOC; 579char *xasprintf(const char *format, ...) __attribute__ ((format(printf, 1, 2))) FAST_FUNC RETURNS_MALLOC;
580/* Prints unprintable chars ch as ^C or M-c to file
581 * (M-c is used only if ch is ORed with PRINTABLE_META),
582 * else it is printed as-is (except for ch = 0x9b) */
583enum { PRINTABLE_META = 0x100 };
584void fputc_printable(int ch, FILE *file) FAST_FUNC;
585// gcc-4.1.1 still isn't good enough at optimizing it 580// gcc-4.1.1 still isn't good enough at optimizing it
586// (+200 bytes compared to macro) 581// (+200 bytes compared to macro)
587//static ALWAYS_INLINE 582//static ALWAYS_INLINE
@@ -594,6 +589,20 @@ void fputc_printable(int ch, FILE *file) FAST_FUNC;
594#define NOT_LONE_CHAR(s,c) ((s)[0] != (c) || (s)[1]) 589#define NOT_LONE_CHAR(s,c) ((s)[0] != (c) || (s)[1])
595#define DOT_OR_DOTDOT(s) ((s)[0] == '.' && (!(s)[1] || ((s)[1] == '.' && !(s)[2]))) 590#define DOT_OR_DOTDOT(s) ((s)[0] == '.' && (!(s)[1] || ((s)[1] == '.' && !(s)[2])))
596 591
592typedef struct uni_stat_t {
593 unsigned byte_count;
594 unsigned unicode_count;
595 unsigned unicode_width;
596} uni_stat_t;
597/* Returns a string with unprintable chars replaced by '?' or
598 * SUBST_WCHAR. This function is unicode-aware. */
599const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str);
600/* Prints unprintable char ch as ^C or M-c to file
601 * (M-c is used only if ch is ORed with PRINTABLE_META),
602 * else it is printed as-is (except for ch = 0x9b) */
603enum { PRINTABLE_META = 0x100 };
604void fputc_printable(int ch, FILE *file) FAST_FUNC;
605
597/* dmalloc will redefine these to it's own implementation. It is safe 606/* dmalloc will redefine these to it's own implementation. It is safe
598 * to have the prototypes here unconditionally. */ 607 * to have the prototypes here unconditionally. */
599void *malloc_or_warn(size_t size) FAST_FUNC RETURNS_MALLOC; 608void *malloc_or_warn(size_t size) FAST_FUNC RETURNS_MALLOC;
diff --git a/include/unicode.h b/include/unicode.h
index f32e56599..25ef7407e 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -23,11 +23,6 @@ size_t FAST_FUNC unicode_strlen(const char *string);
23enum { 23enum {
24 UNI_FLAG_PAD = (1 << 0), 24 UNI_FLAG_PAD = (1 << 0),
25}; 25};
26typedef struct uni_stat_t {
27 unsigned byte_count;
28 unsigned unicode_count;
29 unsigned unicode_width;
30} uni_stat_t;
31//UNUSED: unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src); 26//UNUSED: unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src);
32//UNUSED: char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags); 27//UNUSED: char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags);
33char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src); 28char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src);
diff --git a/libbb/Kbuild b/libbb/Kbuild
index 243626d67..7e793109e 100644
--- a/libbb/Kbuild
+++ b/libbb/Kbuild
@@ -73,6 +73,7 @@ lib-y += perror_nomsg_and_die.o
73lib-y += pidfile.o 73lib-y += pidfile.o
74lib-y += platform.o 74lib-y += platform.o
75lib-y += printable.o 75lib-y += printable.o
76lib-y += printable_string.o
76lib-y += print_flags.o 77lib-y += print_flags.o
77lib-y += process_escape_sequence.o 78lib-y += process_escape_sequence.o
78lib-y += procps.o 79lib-y += procps.o
diff --git a/libbb/printable_string.c b/libbb/printable_string.c
new file mode 100644
index 000000000..47565de0d
--- /dev/null
+++ b/libbb/printable_string.c
@@ -0,0 +1,65 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Unicode support routines.
4 *
5 * Copyright (C) 2010 Denys Vlasenko
6 *
7 * Licensed under GPL version 2, see file LICENSE in this tarball for details.
8 */
9#include "libbb.h"
10#include "unicode.h"
11
12const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)
13{
14 static char *saved[4];
15 static unsigned cur_saved; /* = 0 */
16
17 char *dst;
18 const char *s;
19
20 s = str;
21 while (1) {
22 unsigned char c = *s;
23 if (c == '\0') {
24 /* 99+% of inputs do not need conversion */
25 if (stats) {
26 stats->byte_count = (s - str);
27 stats->unicode_count = (s - str);
28 stats->unicode_width = (s - str);
29 }
30 return str;
31 }
32 if (c < ' ')
33 break;
34 if (c >= 0x7f)
35 break;
36 s++;
37 }
38
39#if ENABLE_FEATURE_ASSUME_UNICODE
40 dst = unicode_conv_to_printable(stats, str);
41#else
42 {
43 char *d = dst = xstrdup(str);
44 while (1) {
45 unsigned char c = *d;
46 if (c == '\0')
47 break;
48 if (c < ' ' || c >= 0x7f)
49 *d = '?';
50 d++;
51 }
52 if (stats) {
53 stats->byte_count = (d - dst);
54 stats->unicode_count = (d - dst);
55 stats->unicode_width = (d - dst);
56 }
57 }
58#endif
59
60 free(saved[cur_saved]);
61 saved[cur_saved] = dst;
62 cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);
63
64 return dst;
65}
diff --git a/testsuite/ls.mk_uni_tests b/testsuite/ls.mk_uni_tests
new file mode 100644
index 000000000..da0c29f29
--- /dev/null
+++ b/testsuite/ls.mk_uni_tests
@@ -0,0 +1,111 @@
1# DO NOT EDIT THIS FILE! MOST TEXT EDITORS WILL DAMAGE IT!
2>'0001_1__Some_correct_UTF-8_text___________________________________________|'
3>'0002_2__Boundary_condition_test_cases_____________________________________|'
4>'0003_2.1__First_possible_sequence_of_a_certain_length_____________________|'
5>'0004_2.1.2__2_bytes__U-00000080_:________"€"______________________________|'
6>'0005_2.1.3__3_bytes__U-00000800_:________"ࠀ"______________________________|'
7>'0006_2.1.4__4_bytes__U-00010000_:________"𐀀"______________________________|'
8>'0007_2.1.5__5_bytes__U-00200000_:________""______________________________|'
9>'0008_2.1.6__6_bytes__U-04000000_:________""______________________________|'
10>'0009_2.2__Last_possible_sequence_of_a_certain_length______________________|'
11>'0010_2.2.1__1_byte___U-0000007F_:________""______________________________|'
12>'0011_2.2.2__2_bytes__U-000007FF_:________"߿"______________________________|'
13>'0012_2.2.3__3_bytes__U-0000FFFF_:________"￿"______________________________|'
14>'0013_2.2.4__4_bytes__U-001FFFFF_:________""______________________________|'
15>'0014_2.2.5__5_bytes__U-03FFFFFF_:________""______________________________|'
16>'0015_2.2.6__6_bytes__U-7FFFFFFF_:________""______________________________|'
17>'0016_2.3__Other_boundary_conditions_______________________________________|'
18>'0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_"퟿"___________________________________|'
19>'0018_2.3.2__U-0000E000_=_ee_80_80_=_""___________________________________|'
20>'0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"�"___________________________________|'
21>'0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"􏿿"________________________________|'
22>'0021_2.3.5__U-00110000_=_f4_90_80_80_=_""________________________________|'
23>'0022_3__Malformed_sequences_______________________________________________|'
24>'0023_3.1__Unexpected_continuation_bytes___________________________________|'
25>'0024_3.1.1__First_continuation_byte_0x80:_""_____________________________|'
26>'0025_3.1.2__Last__continuation_byte_0xbf:_""_____________________________|'
27>'0026_3.1.3__2_continuation_bytes:_""____________________________________|'
28>'0027_3.1.4__3_continuation_bytes:_""___________________________________|'
29>'0028_3.1.5__4_continuation_bytes:_""__________________________________|'
30>'0029_3.1.6__5_continuation_bytes:_""_________________________________|'
31>'0030_3.1.7__6_continuation_bytes:_""________________________________|'
32>'0031_3.1.8__7_continuation_bytes:_""_______________________________|'
33>'0032_3.1.9__Sequence_of_all_64_possible_continuation_bytes__0x80-0xbf_:___|'
34>'0033____"_________________________________________________|'
35>'0034______________________________________________________|'
36>'0035______________________________________________________|'
37>'0036_____"________________________________________________|'
38>'0037_3.2__Lonely_start_characters_________________________________________|'
39>'0038_3.2.1__All_32_first_bytes_of_2-byte_sequences__0xc0-0xdf_,___________|'
40>'0039________each_followed_by_a_space_character:___________________________|'
41>'0040____"_________________________________________________|'
42>'0041_____________________"________________________________|'
43>'0042_3.2.2__All_16_first_bytes_of_3-byte_sequences__0xe0-0xef_,___________|'
44>'0043________each_followed_by_a_space_character:___________________________|'
45>'0044____"________________"________________________________|'
46>'0045_3.2.3__All_8_first_bytes_of_4-byte_sequences__0xf0-0xf7_,____________|'
47>'0046________each_followed_by_a_space_character:___________________________|'
48>'0047____"________"________________________________________________|'
49>'0048_3.2.4__All_4_first_bytes_of_5-byte_sequences__0xf8-0xfb_,____________|'
50>'0049________each_followed_by_a_space_character:___________________________|'
51>'0050____"____"________________________________________________________|'
52>'0051_3.2.5__All_2_first_bytes_of_6-byte_sequences__0xfc-0xfd_,____________|'
53>'0052________each_followed_by_a_space_character:___________________________|'
54>'0053____"__"____________________________________________________________|'
55>'0054_3.3__Sequences_with_last_continuation_byte_missing___________________|'
56>'0055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____""______|'
57>'0056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____""______|'
58>'0057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____""______|'
59>'0058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____""______|'
60>'0059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____""______|'
61>'0060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_""______|'
62>'0061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_""______|'
63>'0062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_""______|'
64>'0063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_""______|'
65>'0064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_""______|'
66>'0065_3.4__Concatenation_of_incomplete_sequences___________________________|'
67>'0066____""______________________________________________________|'
68>'0067_3.5__Impossible_bytes________________________________________________|'
69>'0068_3.5.1__fe_=_""______________________________________________________|'
70>'0069_3.5.2__ff_=_""______________________________________________________|'
71>'0070_3.5.3__fe_fe_ff_ff_=_""__________________________________________|'
72>'0071_4__Overlong_sequences________________________________________________|'
73>'0072_4.1__Examples_of_an_overlong_ASCII_character_________________________|'
74>'0073_4.1.1_U+002F_=_c0_af_____________=_""_______________________________|'
75>'0074_4.1.2_U+002F_=_e0_80_af__________=_""_______________________________|'
76>'0075_4.1.3_U+002F_=_f0_80_80_af_______=_""_______________________________|'
77>'0076_4.1.4_U+002F_=_f8_80_80_80_af____=_""_______________________________|'
78>'0077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_""_______________________________|'
79>'0078_4.2__Maximum_overlong_sequences______________________________________|'
80>'0079_4.2.1__U-0000007F_=_c1_bf_____________=_""__________________________|'
81>'0080_4.2.2__U-000007FF_=_e0_9f_bf__________=_""__________________________|'
82>'0081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_""__________________________|'
83>'0082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_""__________________________|'
84>'0083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_""__________________________|'
85>'0084_4.3__Overlong_representation_of_the_NUL_character____________________|'
86>'0085_4.3.1__U+0000_=_c0_80_____________=_""______________________________|'
87>'0086_4.3.2__U+0000_=_e0_80_80__________=_""______________________________|'
88>'0087_4.3.3__U+0000_=_f0_80_80_80_______=_""______________________________|'
89>'0088_4.3.4__U+0000_=_f8_80_80_80_80____=_""______________________________|'
90>'0089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_""______________________________|'
91>'0090_5__Illegal_code_positions____________________________________________|'
92>'0091_5.1_Single_UTF-16_surrogates_________________________________________|'
93>'0092_5.1.1__U+D800_=_ed_a0_80_=_""_______________________________________|'
94>'0093_5.1.2__U+DB7F_=_ed_ad_bf_=_""_______________________________________|'
95>'0094_5.1.3__U+DB80_=_ed_ae_80_=_""_______________________________________|'
96>'0095_5.1.4__U+DBFF_=_ed_af_bf_=_""_______________________________________|'
97>'0096_5.1.5__U+DC00_=_ed_b0_80_=_""_______________________________________|'
98>'0097_5.1.6__U+DF80_=_ed_be_80_=_""_______________________________________|'
99>'0098_5.1.7__U+DFFF_=_ed_bf_bf_=_""_______________________________________|'
100>'0099_5.2_Paired_UTF-16_surrogates_________________________________________|'
101>'0100_5.2.1__U+D800_U+DC00_=_ed_a0_80_ed_b0_80_=_""______________________|'
102>'0101_5.2.2__U+D800_U+DFFF_=_ed_a0_80_ed_bf_bf_=_""______________________|'
103>'0102_5.2.3__U+DB7F_U+DC00_=_ed_ad_bf_ed_b0_80_=_""______________________|'
104>'0103_5.2.4__U+DB7F_U+DFFF_=_ed_ad_bf_ed_bf_bf_=_""______________________|'
105>'0104_5.2.5__U+DB80_U+DC00_=_ed_ae_80_ed_b0_80_=_""______________________|'
106>'0105_5.2.6__U+DB80_U+DFFF_=_ed_ae_80_ed_bf_bf_=_""______________________|'
107>'0106_5.2.7__U+DBFF_U+DC00_=_ed_af_bf_ed_b0_80_=_""______________________|'
108>'0107_5.2.8__U+DBFF_U+DFFF_=_ed_af_bf_ed_bf_bf_=_""______________________|'
109>'0108_5.3_Other_illegal_code_positions_____________________________________|'
110>'0109_5.3.1__U+FFFE_=_ef_bf_be_=_"￾"_______________________________________|'
111>'0110_5.3.2__U+FFFF_=_ef_bf_bf_=_"￿"_______________________________________|'
diff --git a/testsuite/ls.tests b/testsuite/ls.tests
new file mode 100755
index 000000000..b0c5da7f9
--- /dev/null
+++ b/testsuite/ls.tests
@@ -0,0 +1,136 @@
1#!/bin/sh
2# Copyright 2010 by Denys Vlasenko
3# Licensed under GPL v2, see file LICENSE for details.
4
5. ./testing.sh
6
7test -f "$bindir/.config" && . "$bindir/.config"
8
9rm -rf ls.testdir >/dev/null
10mkdir ls.testdir || exit 1
11
12# testing "test name" "command" "expected result" "file input" "stdin"
13
14# The test isn't passing correctly now - all | chars should line up
15# perfectly in the correctly passed test.
16test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
17&& test x"$CONFIG_SUBST_WCHAR" = x"63" \
18&& test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"767" \
19&& testing "ls unicode test" \
20"(cd ls.testdir && sh ../ls.mk_uni_tests) && ls -1 ls.testdir" \
21'0001_1__Some_correct_UTF-8_text___________________________________________|
220002_2__Boundary_condition_test_cases_____________________________________|
230003_2.1__First_possible_sequence_of_a_certain_length_____________________|
240004_2.1.2__2_bytes__U-00000080_:________"?"______________________________|
250005_2.1.3__3_bytes__U-00000800_:________"?"______________________________|
260006_2.1.4__4_bytes__U-00010000_:________"?"______________________________|
270007_2.1.5__5_bytes__U-00200000_:________"?"______________________________|
280008_2.1.6__6_bytes__U-04000000_:________"?"______________________________|
290009_2.2__Last_possible_sequence_of_a_certain_length______________________|
300010_2.2.1__1_byte___U-0000007F_:________"?"______________________________|
310011_2.2.2__2_bytes__U-000007FF_:________"?"______________________________|
320012_2.2.3__3_bytes__U-0000FFFF_:________"?"______________________________|
330013_2.2.4__4_bytes__U-001FFFFF_:________"?"______________________________|
340014_2.2.5__5_bytes__U-03FFFFFF_:________"?"______________________________|
350015_2.2.6__6_bytes__U-7FFFFFFF_:________"?"______________________________|
360016_2.3__Other_boundary_conditions_______________________________________|
370017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_"?"___________________________________|
380018_2.3.2__U-0000E000_=_ee_80_80_=_"?"___________________________________|
390019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"?"___________________________________|
400020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"?"________________________________|
410021_2.3.5__U-00110000_=_f4_90_80_80_=_"?"________________________________|
420022_3__Malformed_sequences_______________________________________________|
430023_3.1__Unexpected_continuation_bytes___________________________________|
440024_3.1.1__First_continuation_byte_0x80:_"?"_____________________________|
450025_3.1.2__Last__continuation_byte_0xbf:_"?"_____________________________|
460026_3.1.3__2_continuation_bytes:_"??"____________________________________|
470027_3.1.4__3_continuation_bytes:_"???"___________________________________|
480028_3.1.5__4_continuation_bytes:_"????"__________________________________|
490029_3.1.6__5_continuation_bytes:_"?????"_________________________________|
500030_3.1.7__6_continuation_bytes:_"??????"________________________________|
510031_3.1.8__7_continuation_bytes:_"???????"_______________________________|
520032_3.1.9__Sequence_of_all_64_possible_continuation_bytes__0x80-0xbf_:___|
530033____"????????????????_________________________________________________|
540034_____????????????????_________________________________________________|
550035_____????????????????_________________________________________________|
560036_____????????????????"________________________________________________|
570037_3.2__Lonely_start_characters_________________________________________|
580038_3.2.1__All_32_first_bytes_of_2-byte_sequences__0xc0-0xdf_,___________|
590039________each_followed_by_a_space_character:___________________________|
600040____"?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?__________________________________|
610041_____?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_"________________________________|
620042_3.2.2__All_16_first_bytes_of_3-byte_sequences__0xe0-0xef_,___________|
630043________each_followed_by_a_space_character:___________________________|
640044____"?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_"________________________________|
650045_3.2.3__All_8_first_bytes_of_4-byte_sequences__0xf0-0xf7_,____________|
660046________each_followed_by_a_space_character:___________________________|
670047____"?_?_?_?_?_?_?_?_"________________________________________________|
680048_3.2.4__All_4_first_bytes_of_5-byte_sequences__0xf8-0xfb_,____________|
690049________each_followed_by_a_space_character:___________________________|
700050____"?_?_?_?_"________________________________________________________|
710051_3.2.5__All_2_first_bytes_of_6-byte_sequences__0xfc-0xfd_,____________|
720052________each_followed_by_a_space_character:___________________________|
730053____"?_?_"____________________________________________________________|
740054_3.3__Sequences_with_last_continuation_byte_missing___________________|
750055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____"?"______|
760056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____"??"______|
770057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____"???"______|
780058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____"????"______|
790059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____"?????"______|
800060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_"?"______|
810061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_"??"______|
820062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_"???"______|
830063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_"????"______|
840064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_"?????"______|
850065_3.4__Concatenation_of_incomplete_sequences___________________________|
860066____"??????????????????????????????"______________________________________________________|
870067_3.5__Impossible_bytes________________________________________________|
880068_3.5.1__fe_=_"?"______________________________________________________|
890069_3.5.2__ff_=_"?"______________________________________________________|
900070_3.5.3__fe_fe_ff_ff_=_"????"__________________________________________|
910071_4__Overlong_sequences________________________________________________|
920072_4.1__Examples_of_an_overlong_ASCII_character_________________________|
930073_4.1.1_U+002F_=_c0_af_____________=_"??"_______________________________|
940074_4.1.2_U+002F_=_e0_80_af__________=_"???"_______________________________|
950075_4.1.3_U+002F_=_f0_80_80_af_______=_"????"_______________________________|
960076_4.1.4_U+002F_=_f8_80_80_80_af____=_"?????"_______________________________|
970077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_"??????"_______________________________|
980078_4.2__Maximum_overlong_sequences______________________________________|
990079_4.2.1__U-0000007F_=_c1_bf_____________=_"??"__________________________|
1000080_4.2.2__U-000007FF_=_e0_9f_bf__________=_"?"__________________________|
1010081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_"?"__________________________|
1020082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_"?"__________________________|
1030083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_"?"__________________________|
1040084_4.3__Overlong_representation_of_the_NUL_character____________________|
1050085_4.3.1__U+0000_=_c0_80_____________=_"??"______________________________|
1060086_4.3.2__U+0000_=_e0_80_80__________=_"???"______________________________|
1070087_4.3.3__U+0000_=_f0_80_80_80_______=_"????"______________________________|
1080088_4.3.4__U+0000_=_f8_80_80_80_80____=_"?????"______________________________|
1090089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_"??????"______________________________|
1100090_5__Illegal_code_positions____________________________________________|
1110091_5.1_Single_UTF-16_surrogates_________________________________________|
1120092_5.1.1__U+D800_=_ed_a0_80_=_"?"_______________________________________|
1130093_5.1.2__U+DB7F_=_ed_ad_bf_=_"?"_______________________________________|
1140094_5.1.3__U+DB80_=_ed_ae_80_=_"?"_______________________________________|
1150095_5.1.4__U+DBFF_=_ed_af_bf_=_"?"_______________________________________|
1160096_5.1.5__U+DC00_=_ed_b0_80_=_"?"_______________________________________|
1170097_5.1.6__U+DF80_=_ed_be_80_=_"?"_______________________________________|
1180098_5.1.7__U+DFFF_=_ed_bf_bf_=_"?"_______________________________________|
1190099_5.2_Paired_UTF-16_surrogates_________________________________________|
1200100_5.2.1__U+D800_U+DC00_=_ed_a0_80_ed_b0_80_=_"??"______________________|
1210101_5.2.2__U+D800_U+DFFF_=_ed_a0_80_ed_bf_bf_=_"??"______________________|
1220102_5.2.3__U+DB7F_U+DC00_=_ed_ad_bf_ed_b0_80_=_"??"______________________|
1230103_5.2.4__U+DB7F_U+DFFF_=_ed_ad_bf_ed_bf_bf_=_"??"______________________|
1240104_5.2.5__U+DB80_U+DC00_=_ed_ae_80_ed_b0_80_=_"??"______________________|
1250105_5.2.6__U+DB80_U+DFFF_=_ed_ae_80_ed_bf_bf_=_"??"______________________|
1260106_5.2.7__U+DBFF_U+DC00_=_ed_af_bf_ed_b0_80_=_"??"______________________|
1270107_5.2.8__U+DBFF_U+DFFF_=_ed_af_bf_ed_bf_bf_=_"??"______________________|
1280108_5.3_Other_illegal_code_positions_____________________________________|
1290109_5.3.1__U+FFFE_=_ef_bf_be_=_"?"_______________________________________|
1300110_5.3.2__U+FFFF_=_ef_bf_bf_=_"?"_______________________________________|
131' "" ""
132
133# Clean up
134rm -rf ls.testdir 2>/dev/null
135
136exit $FAILCOUNT