diff options
-rw-r--r-- | coreutils/wc.c | 58 | ||||
-rw-r--r-- | include/usage.src.h | 15 |
2 files changed, 45 insertions, 28 deletions
diff --git a/coreutils/wc.c b/coreutils/wc.c index ae38fd5fe..ecadae59b 100644 --- a/coreutils/wc.c +++ b/coreutils/wc.c | |||
@@ -7,7 +7,7 @@ | |||
7 | * Licensed under GPLv2 or later, see file LICENSE in this source tree. | 7 | * Licensed under GPLv2 or later, see file LICENSE in this source tree. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | /* BB_AUDIT SUSv3 _NOT_ compliant -- option -m is not currently supported. */ | 10 | /* BB_AUDIT SUSv3 compliant. */ |
11 | /* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */ | 11 | /* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */ |
12 | 12 | ||
13 | /* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) | 13 | /* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) |
@@ -19,10 +19,6 @@ | |||
19 | * 3) no checking of ferror on EOF returns | 19 | * 3) no checking of ferror on EOF returns |
20 | * 4) isprint() wasn't considered when word counting. | 20 | * 4) isprint() wasn't considered when word counting. |
21 | * | 21 | * |
22 | * TODO: | ||
23 | * | ||
24 | * When locale support is enabled, count multibyte chars in the '-m' case. | ||
25 | * | ||
26 | * NOTES: | 22 | * NOTES: |
27 | * | 23 | * |
28 | * The previous busybox wc attempted an optimization using stat for the | 24 | * The previous busybox wc attempted an optimization using stat for the |
@@ -40,8 +36,8 @@ | |||
40 | * | 36 | * |
41 | * for which 'wc -c' should output '0'. | 37 | * for which 'wc -c' should output '0'. |
42 | */ | 38 | */ |
43 | |||
44 | #include "libbb.h" | 39 | #include "libbb.h" |
40 | #include "unicode.h" | ||
45 | 41 | ||
46 | #if !ENABLE_LOCALE_SUPPORT | 42 | #if !ENABLE_LOCALE_SUPPORT |
47 | # undef isprint | 43 | # undef isprint |
@@ -58,12 +54,39 @@ | |||
58 | # define COUNT_FMT "u" | 54 | # define COUNT_FMT "u" |
59 | #endif | 55 | #endif |
60 | 56 | ||
57 | /* We support -m even when UNICODE_SUPPORT is off, | ||
58 | * we just don't advertise it in help text, | ||
59 | * since it is the same as -c in this case. | ||
60 | */ | ||
61 | |||
62 | //usage:#define wc_trivial_usage | ||
63 | //usage: "[-c"IF_UNICODE_SUPPORT("m")"lwL] [FILE]..." | ||
64 | //usage: | ||
65 | //usage:#define wc_full_usage "\n\n" | ||
66 | //usage: "Count lines, words, and bytes for each FILE (or stdin)\n" | ||
67 | //usage: "\nOptions:" | ||
68 | //usage: "\n -c Count bytes" | ||
69 | //usage: IF_UNICODE_SUPPORT( | ||
70 | //usage: "\n -m Count characters" | ||
71 | //usage: ) | ||
72 | //usage: "\n -l Count newlines" | ||
73 | //usage: "\n -w Count words" | ||
74 | //usage: "\n -L Print longest line length" | ||
75 | //usage: | ||
76 | //usage:#define wc_example_usage | ||
77 | //usage: "$ wc /etc/passwd\n" | ||
78 | //usage: " 31 46 1365 /etc/passwd\n" | ||
79 | |||
80 | /* Order is important if we want to be compatible with | ||
81 | * column order in "wc -cmlwL" output: | ||
82 | */ | ||
61 | enum { | 83 | enum { |
62 | WC_LINES = 0, | 84 | WC_LINES = 0, |
63 | WC_WORDS = 1, | 85 | WC_WORDS = 1, |
64 | WC_CHARS = 2, | 86 | WC_UNICHARS = 2, |
65 | WC_LENGTH = 3, | 87 | WC_CHARS = 3, |
66 | NUM_WCS = 4, | 88 | WC_LENGTH = 4, |
89 | NUM_WCS = 5, | ||
67 | }; | 90 | }; |
68 | 91 | ||
69 | int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; | 92 | int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
@@ -79,7 +102,9 @@ int wc_main(int argc UNUSED_PARAM, char **argv) | |||
79 | smallint status = EXIT_SUCCESS; | 102 | smallint status = EXIT_SUCCESS; |
80 | unsigned print_type; | 103 | unsigned print_type; |
81 | 104 | ||
82 | print_type = getopt32(argv, "lwcL"); | 105 | init_unicode(); |
106 | |||
107 | print_type = getopt32(argv, "lwcmL"); | ||
83 | 108 | ||
84 | if (print_type == 0) { | 109 | if (print_type == 0) { |
85 | print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS); | 110 | print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS); |
@@ -130,9 +155,16 @@ int wc_main(int argc UNUSED_PARAM, char **argv) | |||
130 | } | 155 | } |
131 | goto DO_EOF; /* Treat an EOF as '\r'. */ | 156 | goto DO_EOF; /* Treat an EOF as '\r'. */ |
132 | } | 157 | } |
158 | |||
159 | /* Cater for -c and -m */ | ||
133 | ++counts[WC_CHARS]; | 160 | ++counts[WC_CHARS]; |
161 | if (unicode_status != UNICODE_ON /* every byte is a new char */ | ||
162 | || (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */ | ||
163 | ) { | ||
164 | ++counts[WC_UNICHARS]; | ||
165 | } | ||
134 | 166 | ||
135 | if (isprint_asciionly(c)) { | 167 | if (isprint_asciionly(c)) { /* FIXME: not unicode-aware */ |
136 | ++linepos; | 168 | ++linepos; |
137 | if (!isspace(c)) { | 169 | if (!isspace(c)) { |
138 | in_word = 1; | 170 | in_word = 1; |
diff --git a/include/usage.src.h b/include/usage.src.h index 577eb5746..e7e9269e9 100644 --- a/include/usage.src.h +++ b/include/usage.src.h | |||
@@ -4764,21 +4764,6 @@ INSERT | |||
4764 | "\n" \ | 4764 | "\n" \ |
4765 | "\nUse 500ms to specify period in milliseconds" \ | 4765 | "\nUse 500ms to specify period in milliseconds" \ |
4766 | 4766 | ||
4767 | #define wc_trivial_usage \ | ||
4768 | "[OPTIONS] [FILE]..." | ||
4769 | #define wc_full_usage "\n\n" \ | ||
4770 | "Print line, word, and byte counts for each FILE (or stdin),\n" \ | ||
4771 | "and a total line if more than one FILE is specified\n" \ | ||
4772 | "\nOptions:" \ | ||
4773 | "\n -c Print the byte counts" \ | ||
4774 | "\n -l Print the newline counts" \ | ||
4775 | "\n -L Print the length of the longest line" \ | ||
4776 | "\n -w Print the word counts" \ | ||
4777 | |||
4778 | #define wc_example_usage \ | ||
4779 | "$ wc /etc/passwd\n" \ | ||
4780 | " 31 46 1365 /etc/passwd\n" | ||
4781 | |||
4782 | #define wget_trivial_usage \ | 4767 | #define wget_trivial_usage \ |
4783 | IF_FEATURE_WGET_LONG_OPTIONS( \ | 4768 | IF_FEATURE_WGET_LONG_OPTIONS( \ |
4784 | "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n" \ | 4769 | "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n" \ |