diff options
Diffstat (limited to 'coreutils/wc.c')
-rw-r--r-- | coreutils/wc.c | 79 |
1 files changed, 56 insertions, 23 deletions
diff --git a/coreutils/wc.c b/coreutils/wc.c index 4f14374c3..ecadae59b 100644 --- a/coreutils/wc.c +++ b/coreutils/wc.c | |||
@@ -7,7 +7,7 @@ | |||
7 | * Licensed under GPLv2 or later, see file LICENSE in this source tree. | 7 | * Licensed under GPLv2 or later, see file LICENSE in this source tree. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | /* BB_AUDIT SUSv3 _NOT_ compliant -- option -m is not currently supported. */ | 10 | /* BB_AUDIT SUSv3 compliant. */ |
11 | /* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */ | 11 | /* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */ |
12 | 12 | ||
13 | /* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) | 13 | /* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) |
@@ -19,10 +19,6 @@ | |||
19 | * 3) no checking of ferror on EOF returns | 19 | * 3) no checking of ferror on EOF returns |
20 | * 4) isprint() wasn't considered when word counting. | 20 | * 4) isprint() wasn't considered when word counting. |
21 | * | 21 | * |
22 | * TODO: | ||
23 | * | ||
24 | * When locale support is enabled, count multibyte chars in the '-m' case. | ||
25 | * | ||
26 | * NOTES: | 22 | * NOTES: |
27 | * | 23 | * |
28 | * The previous busybox wc attempted an optimization using stat for the | 24 | * The previous busybox wc attempted an optimization using stat for the |
@@ -40,8 +36,8 @@ | |||
40 | * | 36 | * |
41 | * for which 'wc -c' should output '0'. | 37 | * for which 'wc -c' should output '0'. |
42 | */ | 38 | */ |
43 | |||
44 | #include "libbb.h" | 39 | #include "libbb.h" |
40 | #include "unicode.h" | ||
45 | 41 | ||
46 | #if !ENABLE_LOCALE_SUPPORT | 42 | #if !ENABLE_LOCALE_SUPPORT |
47 | # undef isprint | 43 | # undef isprint |
@@ -58,11 +54,39 @@ | |||
58 | # define COUNT_FMT "u" | 54 | # define COUNT_FMT "u" |
59 | #endif | 55 | #endif |
60 | 56 | ||
57 | /* We support -m even when UNICODE_SUPPORT is off, | ||
58 | * we just don't advertise it in help text, | ||
59 | * since it is the same as -c in this case. | ||
60 | */ | ||
61 | |||
62 | //usage:#define wc_trivial_usage | ||
63 | //usage: "[-c"IF_UNICODE_SUPPORT("m")"lwL] [FILE]..." | ||
64 | //usage: | ||
65 | //usage:#define wc_full_usage "\n\n" | ||
66 | //usage: "Count lines, words, and bytes for each FILE (or stdin)\n" | ||
67 | //usage: "\nOptions:" | ||
68 | //usage: "\n -c Count bytes" | ||
69 | //usage: IF_UNICODE_SUPPORT( | ||
70 | //usage: "\n -m Count characters" | ||
71 | //usage: ) | ||
72 | //usage: "\n -l Count newlines" | ||
73 | //usage: "\n -w Count words" | ||
74 | //usage: "\n -L Print longest line length" | ||
75 | //usage: | ||
76 | //usage:#define wc_example_usage | ||
77 | //usage: "$ wc /etc/passwd\n" | ||
78 | //usage: " 31 46 1365 /etc/passwd\n" | ||
79 | |||
80 | /* Order is important if we want to be compatible with | ||
81 | * column order in "wc -cmlwL" output: | ||
82 | */ | ||
61 | enum { | 83 | enum { |
62 | WC_LINES = 0, | 84 | WC_LINES = 0, |
63 | WC_WORDS = 1, | 85 | WC_WORDS = 1, |
64 | WC_CHARS = 2, | 86 | WC_UNICHARS = 2, |
65 | WC_LENGTH = 3 | 87 | WC_CHARS = 3, |
88 | WC_LENGTH = 4, | ||
89 | NUM_WCS = 5, | ||
66 | }; | 90 | }; |
67 | 91 | ||
68 | int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; | 92 | int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
@@ -72,13 +96,15 @@ int wc_main(int argc UNUSED_PARAM, char **argv) | |||
72 | const char *start_fmt = " %9"COUNT_FMT + 1; | 96 | const char *start_fmt = " %9"COUNT_FMT + 1; |
73 | const char *fname_fmt = " %s\n"; | 97 | const char *fname_fmt = " %s\n"; |
74 | COUNT_T *pcounts; | 98 | COUNT_T *pcounts; |
75 | COUNT_T counts[4]; | 99 | COUNT_T counts[NUM_WCS]; |
76 | COUNT_T totals[4]; | 100 | COUNT_T totals[NUM_WCS]; |
77 | int num_files; | 101 | int num_files; |
78 | smallint status = EXIT_SUCCESS; | 102 | smallint status = EXIT_SUCCESS; |
79 | unsigned print_type; | 103 | unsigned print_type; |
80 | 104 | ||
81 | print_type = getopt32(argv, "lwcL"); | 105 | init_unicode(); |
106 | |||
107 | print_type = getopt32(argv, "lwcmL"); | ||
82 | 108 | ||
83 | if (print_type == 0) { | 109 | if (print_type == 0) { |
84 | print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS); | 110 | print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS); |
@@ -99,7 +125,7 @@ int wc_main(int argc UNUSED_PARAM, char **argv) | |||
99 | pcounts = counts; | 125 | pcounts = counts; |
100 | 126 | ||
101 | num_files = 0; | 127 | num_files = 0; |
102 | while ((arg = *argv++) != 0) { | 128 | while ((arg = *argv++) != NULL) { |
103 | FILE *fp; | 129 | FILE *fp; |
104 | const char *s; | 130 | const char *s; |
105 | unsigned u; | 131 | unsigned u; |
@@ -117,21 +143,28 @@ int wc_main(int argc UNUSED_PARAM, char **argv) | |||
117 | linepos = 0; | 143 | linepos = 0; |
118 | in_word = 0; | 144 | in_word = 0; |
119 | 145 | ||
120 | do { | 146 | while (1) { |
121 | int c; | 147 | int c; |
122 | /* Our -w doesn't match GNU wc exactly... oh well */ | 148 | /* Our -w doesn't match GNU wc exactly... oh well */ |
123 | 149 | ||
124 | ++counts[WC_CHARS]; | ||
125 | c = getc(fp); | 150 | c = getc(fp); |
126 | if (c == EOF) { | 151 | if (c == EOF) { |
127 | if (ferror(fp)) { | 152 | if (ferror(fp)) { |
128 | bb_simple_perror_msg(arg); | 153 | bb_simple_perror_msg(arg); |
129 | status = EXIT_FAILURE; | 154 | status = EXIT_FAILURE; |
130 | } | 155 | } |
131 | --counts[WC_CHARS]; | ||
132 | goto DO_EOF; /* Treat an EOF as '\r'. */ | 156 | goto DO_EOF; /* Treat an EOF as '\r'. */ |
133 | } | 157 | } |
134 | if (isprint_asciionly(c)) { | 158 | |
159 | /* Cater for -c and -m */ | ||
160 | ++counts[WC_CHARS]; | ||
161 | if (unicode_status != UNICODE_ON /* every byte is a new char */ | ||
162 | || (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */ | ||
163 | ) { | ||
164 | ++counts[WC_UNICHARS]; | ||
165 | } | ||
166 | |||
167 | if (isprint_asciionly(c)) { /* FIXME: not unicode-aware */ | ||
135 | ++linepos; | 168 | ++linepos; |
136 | if (!isspace(c)) { | 169 | if (!isspace(c)) { |
137 | in_word = 1; | 170 | in_word = 1; |
@@ -167,18 +200,18 @@ int wc_main(int argc UNUSED_PARAM, char **argv) | |||
167 | if (c == EOF) { | 200 | if (c == EOF) { |
168 | break; | 201 | break; |
169 | } | 202 | } |
170 | } while (1); | 203 | } |
204 | |||
205 | fclose_if_not_stdin(fp); | ||
171 | 206 | ||
172 | if (totals[WC_LENGTH] < counts[WC_LENGTH]) { | 207 | if (totals[WC_LENGTH] < counts[WC_LENGTH]) { |
173 | totals[WC_LENGTH] = counts[WC_LENGTH]; | 208 | totals[WC_LENGTH] = counts[WC_LENGTH]; |
174 | } | 209 | } |
175 | totals[WC_LENGTH] -= counts[WC_LENGTH]; | 210 | totals[WC_LENGTH] -= counts[WC_LENGTH]; |
176 | 211 | ||
177 | fclose_if_not_stdin(fp); | ||
178 | |||
179 | OUTPUT: | 212 | OUTPUT: |
180 | /* coreutils wc tries hard to print pretty columns | 213 | /* coreutils wc tries hard to print pretty columns |
181 | * (saves results for all files, find max col len etc...) | 214 | * (saves results for all files, finds max col len etc...) |
182 | * we won't try that hard, it will bloat us too much */ | 215 | * we won't try that hard, it will bloat us too much */ |
183 | s = start_fmt; | 216 | s = start_fmt; |
184 | u = 0; | 217 | u = 0; |
@@ -188,7 +221,7 @@ int wc_main(int argc UNUSED_PARAM, char **argv) | |||
188 | s = " %9"COUNT_FMT; /* Ok... restore the leading space. */ | 221 | s = " %9"COUNT_FMT; /* Ok... restore the leading space. */ |
189 | } | 222 | } |
190 | totals[u] += pcounts[u]; | 223 | totals[u] += pcounts[u]; |
191 | } while (++u < 4); | 224 | } while (++u < NUM_WCS); |
192 | printf(fname_fmt, arg); | 225 | printf(fname_fmt, arg); |
193 | } | 226 | } |
194 | 227 | ||