aboutsummaryrefslogtreecommitdiff
path: root/coreutils/wc.c
diff options
context:
space:
mode:
Diffstat (limited to 'coreutils/wc.c')
-rw-r--r--coreutils/wc.c79
1 files changed, 56 insertions, 23 deletions
diff --git a/coreutils/wc.c b/coreutils/wc.c
index 4f14374c3..ecadae59b 100644
--- a/coreutils/wc.c
+++ b/coreutils/wc.c
@@ -7,7 +7,7 @@
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree. 7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */ 8 */
9 9
10/* BB_AUDIT SUSv3 _NOT_ compliant -- option -m is not currently supported. */ 10/* BB_AUDIT SUSv3 compliant. */
11/* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */ 11/* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */
12 12
13/* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) 13/* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org)
@@ -19,10 +19,6 @@
19 * 3) no checking of ferror on EOF returns 19 * 3) no checking of ferror on EOF returns
20 * 4) isprint() wasn't considered when word counting. 20 * 4) isprint() wasn't considered when word counting.
21 * 21 *
22 * TODO:
23 *
24 * When locale support is enabled, count multibyte chars in the '-m' case.
25 *
26 * NOTES: 22 * NOTES:
27 * 23 *
28 * The previous busybox wc attempted an optimization using stat for the 24 * The previous busybox wc attempted an optimization using stat for the
@@ -40,8 +36,8 @@
40 * 36 *
41 * for which 'wc -c' should output '0'. 37 * for which 'wc -c' should output '0'.
42 */ 38 */
43
44#include "libbb.h" 39#include "libbb.h"
40#include "unicode.h"
45 41
46#if !ENABLE_LOCALE_SUPPORT 42#if !ENABLE_LOCALE_SUPPORT
47# undef isprint 43# undef isprint
@@ -58,11 +54,39 @@
58# define COUNT_FMT "u" 54# define COUNT_FMT "u"
59#endif 55#endif
60 56
57/* We support -m even when UNICODE_SUPPORT is off,
58 * we just don't advertise it in help text,
59 * since it is the same as -c in this case.
60 */
61
62//usage:#define wc_trivial_usage
63//usage: "[-c"IF_UNICODE_SUPPORT("m")"lwL] [FILE]..."
64//usage:
65//usage:#define wc_full_usage "\n\n"
66//usage: "Count lines, words, and bytes for each FILE (or stdin)\n"
67//usage: "\nOptions:"
68//usage: "\n -c Count bytes"
69//usage: IF_UNICODE_SUPPORT(
70//usage: "\n -m Count characters"
71//usage: )
72//usage: "\n -l Count newlines"
73//usage: "\n -w Count words"
74//usage: "\n -L Print longest line length"
75//usage:
76//usage:#define wc_example_usage
77//usage: "$ wc /etc/passwd\n"
78//usage: " 31 46 1365 /etc/passwd\n"
79
80/* Order is important if we want to be compatible with
81 * column order in "wc -cmlwL" output:
82 */
61enum { 83enum {
62 WC_LINES = 0, 84 WC_LINES = 0,
63 WC_WORDS = 1, 85 WC_WORDS = 1,
64 WC_CHARS = 2, 86 WC_UNICHARS = 2,
65 WC_LENGTH = 3 87 WC_CHARS = 3,
88 WC_LENGTH = 4,
89 NUM_WCS = 5,
66}; 90};
67 91
68int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; 92int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
@@ -72,13 +96,15 @@ int wc_main(int argc UNUSED_PARAM, char **argv)
72 const char *start_fmt = " %9"COUNT_FMT + 1; 96 const char *start_fmt = " %9"COUNT_FMT + 1;
73 const char *fname_fmt = " %s\n"; 97 const char *fname_fmt = " %s\n";
74 COUNT_T *pcounts; 98 COUNT_T *pcounts;
75 COUNT_T counts[4]; 99 COUNT_T counts[NUM_WCS];
76 COUNT_T totals[4]; 100 COUNT_T totals[NUM_WCS];
77 int num_files; 101 int num_files;
78 smallint status = EXIT_SUCCESS; 102 smallint status = EXIT_SUCCESS;
79 unsigned print_type; 103 unsigned print_type;
80 104
81 print_type = getopt32(argv, "lwcL"); 105 init_unicode();
106
107 print_type = getopt32(argv, "lwcmL");
82 108
83 if (print_type == 0) { 109 if (print_type == 0) {
84 print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS); 110 print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS);
@@ -99,7 +125,7 @@ int wc_main(int argc UNUSED_PARAM, char **argv)
99 pcounts = counts; 125 pcounts = counts;
100 126
101 num_files = 0; 127 num_files = 0;
102 while ((arg = *argv++) != 0) { 128 while ((arg = *argv++) != NULL) {
103 FILE *fp; 129 FILE *fp;
104 const char *s; 130 const char *s;
105 unsigned u; 131 unsigned u;
@@ -117,21 +143,28 @@ int wc_main(int argc UNUSED_PARAM, char **argv)
117 linepos = 0; 143 linepos = 0;
118 in_word = 0; 144 in_word = 0;
119 145
120 do { 146 while (1) {
121 int c; 147 int c;
122 /* Our -w doesn't match GNU wc exactly... oh well */ 148 /* Our -w doesn't match GNU wc exactly... oh well */
123 149
124 ++counts[WC_CHARS];
125 c = getc(fp); 150 c = getc(fp);
126 if (c == EOF) { 151 if (c == EOF) {
127 if (ferror(fp)) { 152 if (ferror(fp)) {
128 bb_simple_perror_msg(arg); 153 bb_simple_perror_msg(arg);
129 status = EXIT_FAILURE; 154 status = EXIT_FAILURE;
130 } 155 }
131 --counts[WC_CHARS];
132 goto DO_EOF; /* Treat an EOF as '\r'. */ 156 goto DO_EOF; /* Treat an EOF as '\r'. */
133 } 157 }
134 if (isprint_asciionly(c)) { 158
159 /* Cater for -c and -m */
160 ++counts[WC_CHARS];
161 if (unicode_status != UNICODE_ON /* every byte is a new char */
162 || (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */
163 ) {
164 ++counts[WC_UNICHARS];
165 }
166
167 if (isprint_asciionly(c)) { /* FIXME: not unicode-aware */
135 ++linepos; 168 ++linepos;
136 if (!isspace(c)) { 169 if (!isspace(c)) {
137 in_word = 1; 170 in_word = 1;
@@ -167,18 +200,18 @@ int wc_main(int argc UNUSED_PARAM, char **argv)
167 if (c == EOF) { 200 if (c == EOF) {
168 break; 201 break;
169 } 202 }
170 } while (1); 203 }
204
205 fclose_if_not_stdin(fp);
171 206
172 if (totals[WC_LENGTH] < counts[WC_LENGTH]) { 207 if (totals[WC_LENGTH] < counts[WC_LENGTH]) {
173 totals[WC_LENGTH] = counts[WC_LENGTH]; 208 totals[WC_LENGTH] = counts[WC_LENGTH];
174 } 209 }
175 totals[WC_LENGTH] -= counts[WC_LENGTH]; 210 totals[WC_LENGTH] -= counts[WC_LENGTH];
176 211
177 fclose_if_not_stdin(fp);
178
179 OUTPUT: 212 OUTPUT:
180 /* coreutils wc tries hard to print pretty columns 213 /* coreutils wc tries hard to print pretty columns
181 * (saves results for all files, find max col len etc...) 214 * (saves results for all files, finds max col len etc...)
182 * we won't try that hard, it will bloat us too much */ 215 * we won't try that hard, it will bloat us too much */
183 s = start_fmt; 216 s = start_fmt;
184 u = 0; 217 u = 0;
@@ -188,7 +221,7 @@ int wc_main(int argc UNUSED_PARAM, char **argv)
188 s = " %9"COUNT_FMT; /* Ok... restore the leading space. */ 221 s = " %9"COUNT_FMT; /* Ok... restore the leading space. */
189 } 222 }
190 totals[u] += pcounts[u]; 223 totals[u] += pcounts[u];
191 } while (++u < 4); 224 } while (++u < NUM_WCS);
192 printf(fname_fmt, arg); 225 printf(fname_fmt, arg);
193 } 226 }
194 227