diff options
author | Mark Whitley <markw@lineo.com> | 2000-08-02 18:30:11 +0000 |
---|---|---|
committer | Mark Whitley <markw@lineo.com> | 2000-08-02 18:30:11 +0000 |
commit | 807f0fd637ecc25e60c81e41ad969c177c7df327 (patch) | |
tree | f1c0645b6dc9c6c87e41280b822bc3b230e1f168 /coreutils/cut.c | |
parent | d5fa3e3e9a8a1548b2e405ca9684bebbf946fd4f (diff) | |
download | busybox-w32-807f0fd637ecc25e60c81e41ad969c177c7df327.tar.gz busybox-w32-807f0fd637ecc25e60c81e41ad969c177c7df327.tar.bz2 busybox-w32-807f0fd637ecc25e60c81e41ad969c177c7df327.zip |
Brand, new version of cut. This fixes the bugs in the old cut, is smaller, and
is a sight easier to understand than the Minix cut.
Diffstat (limited to 'coreutils/cut.c')
-rw-r--r-- | coreutils/cut.c | 479 |
1 files changed, 171 insertions, 308 deletions
diff --git a/coreutils/cut.c b/coreutils/cut.c index 4907ed935..e90a0e2e9 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c | |||
@@ -1,11 +1,8 @@ | |||
1 | /* vi: set sw=4 ts=4: */ | ||
2 | /* | 1 | /* |
3 | * cut implementation for busybox | 2 | * cut.c - minimalist version of cut |
4 | * | 3 | * |
5 | * Copyright (c) Michael J. Holme | 4 | * Copyright (C) 1999,2000 by Lineo, inc. |
6 | * | 5 | * Written by Mark Whitley <markw@lineo.com>, <markw@enol.com> |
7 | * This version of cut is adapted from Minix cut and was modified | ||
8 | * by Erik Andersen <andersee@debian.org> to be used in busybox. | ||
9 | * | 6 | * |
10 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License as published by | 8 | * it under the terms of the GNU General Public License as published by |
@@ -20,351 +17,217 @@ | |||
20 | * You should have received a copy of the GNU General Public License | 17 | * You should have received a copy of the GNU General Public License |
21 | * along with this program; if not, write to the Free Software | 18 | * along with this program; if not, write to the Free Software |
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
23 | * | 20 | * |
24 | * Original copyright notice is retained at the end of this file. | ||
25 | */ | 21 | */ |
26 | 22 | ||
27 | #include "internal.h" | 23 | #include <stdio.h> |
28 | #include <sys/types.h> | 24 | #include <stdlib.h> |
29 | #include <ctype.h> | 25 | #include <unistd.h> /* getopt */ |
30 | #include <string.h> | 26 | #include <string.h> |
27 | #include <ctype.h> | ||
31 | #include <errno.h> | 28 | #include <errno.h> |
32 | #include <stdlib.h> | 29 | #include "internal.h" |
33 | #include <stdio.h> | ||
34 | #define BB_DECLARE_EXTERN | ||
35 | #define bb_need_help | ||
36 | #include "messages.c" | ||
37 | |||
38 | #define MAX_FIELD 80 /* Pointers to the beginning of each field | ||
39 | * are stored in columns[], if a line holds | ||
40 | * more than MAX_FIELD columns the array | ||
41 | * boundary is exceed. But unlikely at 80 */ | ||
42 | |||
43 | #define MAX_ARGS 32 /* Maximum number of fields following -f or | ||
44 | * -c switches */ | ||
45 | int args[MAX_ARGS * 2]; | ||
46 | int num_args; | ||
47 | |||
48 | /* Lots of new defines, should easen maintainance... */ | ||
49 | #define DUMP_STDIN 0 /* define for mode: no options */ | ||
50 | #define OPTIONF 1 /* define for mode: option -f */ | ||
51 | #define OPTIONC 2 /* define for mode: option -c */ | ||
52 | #define OPTIONB 3 /* define for mode: option -b */ | ||
53 | #define NOTSET 0 /* option not selected */ | ||
54 | #define SET 1 /* option selected */ | ||
55 | #define OPTIONS 1 /*define option -s */ | ||
56 | /* Defines for the warnings */ | ||
57 | #define DELIMITER_NOT_APPLICABLE 0 | ||
58 | #define OVERRIDING_PREVIOUS_MODE 1 | ||
59 | #define OPTION_NOT_APPLICABLE 2 | ||
60 | #define UNKNOWN_OPTION 3 | ||
61 | #define FILE_NOT_READABLE 4 | ||
62 | /* Defines for the fatal errors */ | ||
63 | #define SYNTAX_ERROR 101 | ||
64 | #define POSITION_ERROR 102 | ||
65 | #define LINE_TO_LONG_ERROR 103 | ||
66 | #define RANGE_ERROR 104 | ||
67 | #define MAX_FIELDS_EXEEDED_ERROR 105 | ||
68 | #define MAX_ARGS_EXEEDED_ERROR 106 | ||
69 | |||
70 | |||
71 | int mode; /* 0 = dump stdin to stdout, 1=-f, 2=-c */ | ||
72 | char delim = '\t'; /* default delimiting character */ | ||
73 | FILE *fd; | ||
74 | char line[BUFSIZ]; | ||
75 | int exit_status; | ||
76 | int option = 0; /* for -s option */ | ||
77 | |||
78 | int cut_main(int argc, char **argv); | ||
79 | void warn(int warn_number, char *option); | ||
80 | void cuterror(int err); | ||
81 | void get_args(void); | ||
82 | void cut(void); | ||
83 | 30 | ||
84 | void warn(int warn_number, char *option) | ||
85 | { | ||
86 | static char *warn_msg[] = { | ||
87 | "Option -%s allowed only with -f\n", | ||
88 | "-%s overrides earlier option\n", | ||
89 | "-%s not allowed in current mode\n", | ||
90 | "Cannot open %s\n" | ||
91 | }; | ||
92 | 31 | ||
93 | errorMsg(warn_msg[warn_number], option); | 32 | /* globals from other files */ |
94 | exit_status = warn_number + 1; | 33 | extern int optind; |
34 | extern char *optarg; | ||
95 | 35 | ||
96 | } | ||
97 | |||
98 | void cuterror(int err) | ||
99 | { | ||
100 | static char *err_mes[] = { | ||
101 | "syntax error\n", | ||
102 | "position must be >0\n", | ||
103 | "line longer than BUFSIZ\n", | ||
104 | "range must not decrease from left to right\n", | ||
105 | "MAX_FIELD exceeded\n", | ||
106 | "MAX_ARGS exceeded\n" | ||
107 | }; | ||
108 | 36 | ||
109 | errorMsg(err_mes[err - 101]); | 37 | /* globals in this file only */ |
110 | exit(err); | 38 | static char part = 0; /* (b)yte, (c)har, (f)ields */ |
111 | } | 39 | static int startpos = 1; |
40 | static int endpos = -1; | ||
41 | static char delim = '\t'; /* delimiter, default is tab */ | ||
42 | static unsigned int supress_non_delimited_lines = 0; | ||
112 | 43 | ||
113 | 44 | ||
114 | void get_args() | 45 | /* |
46 | * decompose_list() - parses a list and puts values into startpos and endpos. | ||
47 | * valid list formats: N, N-, N-M, -M | ||
48 | */ | ||
49 | static void decompose_list(const char *list) | ||
115 | { | 50 | { |
116 | int i = 0; | 51 | unsigned int nminus = 0; |
117 | int arg_ptr = 0; | 52 | char *ptr; |
118 | int flag; | ||
119 | 53 | ||
120 | num_args = 0; | 54 | /* the list must contain only digits and no more than one minus sign */ |
121 | do { | 55 | for (ptr = (char *)list; *ptr; ptr++) { |
122 | if (num_args == MAX_ARGS) | 56 | if (!isdigit(*ptr) && *ptr != '-') { |
123 | cuterror(MAX_ARGS_EXEEDED_ERROR); | 57 | fatalError("invalid byte or field list\n"); |
124 | if (!isdigit(line[i]) && line[i] != '-') | 58 | } |
125 | cuterror(SYNTAX_ERROR); | 59 | if (*ptr == '-') { |
60 | nminus++; | ||
61 | if (nminus > 1) { | ||
62 | fatalError("invalid byte or field list\n"); | ||
63 | } | ||
64 | } | ||
65 | } | ||
126 | 66 | ||
127 | args[arg_ptr] = 1; | 67 | /* handle single value 'N' case */ |
128 | args[arg_ptr + 1] = BUFSIZ; | 68 | if (nminus == 0) { |
129 | flag = 1; | 69 | startpos = strtol(list, &ptr, 10); |
70 | if (startpos == 0) { | ||
71 | fatalError("missing list of fields\n"); | ||
72 | } | ||
73 | endpos = startpos; | ||
74 | } | ||
75 | /* handle multi-value cases */ | ||
76 | else if (nminus == 1) { | ||
77 | /* handle 'N-' case */ | ||
78 | if (list[strlen(list) - 1] == '-') { | ||
79 | startpos = strtol(list, &ptr, 10); | ||
80 | } | ||
81 | /* handle '-M' case */ | ||
82 | else if (list[0] == '-') { | ||
83 | endpos = strtol(&list[1], NULL, 10); | ||
84 | } | ||
85 | /* handle 'N-M' case */ | ||
86 | else { | ||
87 | startpos = strtol(list, &ptr, 10); | ||
88 | endpos = strtol(ptr+1, &ptr, 10); | ||
89 | } | ||
130 | 90 | ||
131 | while (line[i] != ',' && line[i] != 0) { | 91 | /* a sanity check */ |
132 | if (isdigit(line[i])) { | 92 | if (startpos == 0) { |
133 | args[arg_ptr] = 0; | 93 | startpos = 1; |
134 | while (isdigit(line[i])) | ||
135 | args[arg_ptr] = 10 * args[arg_ptr] + line[i++] - '0'; | ||
136 | if (!args[arg_ptr]) | ||
137 | cuterror(POSITION_ERROR); | ||
138 | arg_ptr++; | ||
139 | } | ||
140 | if (line[i] == '-') { | ||
141 | arg_ptr |= 1; | ||
142 | i++; | ||
143 | flag = 0; | ||
144 | } | ||
145 | } | 94 | } |
146 | if (flag && arg_ptr & 1) | ||
147 | args[arg_ptr] = args[arg_ptr - 1]; | ||
148 | if (args[num_args * 2] > args[num_args * 2 + 1]) | ||
149 | cuterror(RANGE_ERROR); | ||
150 | num_args++; | ||
151 | arg_ptr = num_args * 2; | ||
152 | } | 95 | } |
153 | while (line[i++]); | ||
154 | } | 96 | } |
155 | 97 | ||
156 | 98 | ||
157 | void cut() | 99 | /* |
100 | * snippy-snip | ||
101 | */ | ||
102 | static void cut_file(FILE *file) | ||
158 | { | 103 | { |
159 | int i, j, length, maxcol=0; | 104 | char *line; |
160 | char *columns[MAX_FIELD]; | 105 | |
161 | 106 | /* go through every line in the file */ | |
162 | while (fgets(line, BUFSIZ, fd)) { | 107 | for (line = NULL; (line = get_line_from_file(file)) != NULL; free(line)) { |
163 | maxcol=0; | 108 | |
164 | length = strlen(line) - 1; | 109 | /* cut based on chars/bytes */ |
165 | *(line + length) = 0; | 110 | if (part == 'c' || part == 'b') { |
166 | switch (mode) { | 111 | int i; |
167 | case DUMP_STDIN: | 112 | /* a valid end position has been specified */ |
168 | printf("%s", line); | 113 | if (endpos > 0) { |
169 | break; | 114 | for (i = startpos-1; i < endpos; i++) { |
170 | case OPTIONF: | 115 | fputc(line[i], stdout); |
171 | columns[maxcol++] = line; | ||
172 | for (i = 0; i < length; i++) { | ||
173 | if (*(line + i) == delim) { | ||
174 | *(line + i) = 0; | ||
175 | if (maxcol == MAX_FIELD) | ||
176 | cuterror(MAX_FIELDS_EXEEDED_ERROR); | ||
177 | columns[maxcol] = line + i + 1; | ||
178 | maxcol++; | ||
179 | } | 116 | } |
117 | fputc('\n', stdout); | ||
180 | } | 118 | } |
181 | if (maxcol != 1) { | 119 | /* otherwise, just go to the end of the line */ |
182 | for (i = 0; i < num_args; i++) { | 120 | else { |
183 | for (j = args[i * 2]; j <= args[i * 2 + 1]; j++) | 121 | for (i = startpos-1; line[i]; i++) { |
184 | if (j <= maxcol) { | 122 | fputc(line[i], stdout); |
185 | |||
186 | printf("%s", columns[j - 1]); | ||
187 | |||
188 | if (i != num_args - 1 || j != args[i * 2 + 1]) | ||
189 | putchar(delim); | ||
190 | } | ||
191 | } | 123 | } |
192 | } else if (option != OPTIONS) { | ||
193 | printf("%s",line); | ||
194 | } | 124 | } |
195 | break; | 125 | } |
196 | case OPTIONC: | 126 | /* cut based on fields */ |
197 | for (i = 0; i < num_args; i++) { | 127 | else if (part == 'f') { |
198 | for (j = args[i * 2]; | 128 | char *ptr; |
199 | j <= (args[i * 2 + 1] > | 129 | char *start = line; |
200 | length ? length : args[i * 2 + 1]); j++) | 130 | unsigned int delims_hit = 0; |
201 | putchar(*(line + j - 1)); | 131 | |
132 | for (ptr = line; (ptr = strchr(ptr, delim)) != NULL; ptr++) { | ||
133 | delims_hit++; | ||
134 | if (delims_hit == (startpos - 1)) { | ||
135 | start = ptr+1; | ||
136 | } | ||
137 | if (delims_hit == endpos) { | ||
138 | break; | ||
139 | } | ||
140 | } | ||
141 | /* we didn't hit any delimeters */ | ||
142 | if (delims_hit == 0 && !supress_non_delimited_lines) { | ||
143 | fputs(line, stdout); | ||
144 | } | ||
145 | /* we =did= hit some delimiters */ | ||
146 | else if (delims_hit > 0) { | ||
147 | /* we have a fixed end point */ | ||
148 | if (ptr) { | ||
149 | while (start < ptr) { | ||
150 | fputc(*start, stdout); | ||
151 | start++; | ||
152 | } | ||
153 | fputc('\n', stdout); | ||
154 | } | ||
155 | /* or we're just going til the end of the line */ | ||
156 | else { | ||
157 | while (*start) { | ||
158 | fputc(*start, stdout); | ||
159 | start++; | ||
160 | } | ||
161 | } | ||
202 | } | 162 | } |
203 | } | 163 | } |
204 | if (maxcol != 1) | ||
205 | putchar('\n'); | ||
206 | } | 164 | } |
207 | } | 165 | } |
208 | 166 | ||
209 | int cut_main(int argc, char **argv) | 167 | extern int cut_main(int argc, char **argv) |
210 | { | 168 | { |
211 | int i = 1; | 169 | int opt; |
212 | int numberFilenames = 0; | ||
213 | 170 | ||
214 | while (i < argc) { | 171 | while ((opt = getopt(argc, argv, "b:c:d:f:ns")) > 0) { |
215 | if (argv[i][0] == '-') { | 172 | switch (opt) { |
216 | switch (argv[i++][1]) { | ||
217 | case 'd': | ||
218 | if (mode == OPTIONC || mode == OPTIONB) | ||
219 | warn(DELIMITER_NOT_APPLICABLE, "d"); | ||
220 | if (argc > i) | ||
221 | delim = argv[i++][0]; | ||
222 | else | ||
223 | cuterror(SYNTAX_ERROR); | ||
224 | break; | ||
225 | case 'f': | ||
226 | sprintf(line, "%s", argv[i++]); | ||
227 | if (mode == OPTIONC || mode == OPTIONB) | ||
228 | warn(OVERRIDING_PREVIOUS_MODE, "f"); | ||
229 | mode = OPTIONF; | ||
230 | break; | ||
231 | case 'b': | 173 | case 'b': |
232 | sprintf(line, "%s", argv[i++]); | ||
233 | if (mode == OPTIONF || mode == OPTIONC) | ||
234 | warn(OVERRIDING_PREVIOUS_MODE, "b"); | ||
235 | mode = OPTIONB; | ||
236 | break; | ||
237 | case 'c': | 174 | case 'c': |
238 | sprintf(line, "%s", argv[i++]); | 175 | case 'f': |
239 | if (mode == OPTIONF || mode == OPTIONB) | 176 | /* make sure they didn't ask for two types of lists */ |
240 | warn(OVERRIDING_PREVIOUS_MODE, "c"); | 177 | if (part != 0) { |
241 | mode = OPTIONC; | 178 | fatalError("only one type of list may be specified"); |
179 | } | ||
180 | part = (char)opt; | ||
181 | decompose_list(optarg); | ||
242 | break; | 182 | break; |
243 | case 's': | 183 | case 'd': |
244 | option = OPTIONS; | 184 | if (strlen(optarg) > 1) { |
245 | 185 | fatalError("the delimiter must be a single character\n"); | |
186 | } | ||
187 | delim = optarg[0]; | ||
246 | break; | 188 | break; |
247 | case '\0': /* - means: read from stdin */ | 189 | case 'n': |
248 | numberFilenames++; | 190 | /* no-op */ |
249 | break; | 191 | break; |
250 | case 'n': /* needed for Posix, but no effect here */ | 192 | case 's': |
251 | if (mode != OPTIONB) | 193 | supress_non_delimited_lines++; |
252 | warn(OPTION_NOT_APPLICABLE, "n"); | ||
253 | break; | 194 | break; |
254 | default: | ||
255 | warn(UNKNOWN_OPTION, &(argv[i - 1][1])); | ||
256 | } | ||
257 | } else { | ||
258 | i++; | ||
259 | numberFilenames++; | ||
260 | } | 195 | } |
261 | } | 196 | } |
262 | 197 | ||
263 | /* Here follow the checks, if the selected options are reasonable. */ | 198 | if (part == 0) { |
264 | if (mode == OPTIONB) /* since in Minix char := byte */ | 199 | fatalError("you must specify a list of bytes, characters, or fields\n"); |
265 | mode = OPTIONC; | 200 | } |
266 | 201 | ||
267 | if (mode != OPTIONF && option == OPTIONS) | 202 | if (supress_non_delimited_lines && part != 'f') { |
268 | warn(DELIMITER_NOT_APPLICABLE,"s"); | 203 | fatalError("suppressing non-delimited lines makes sense |
269 | get_args(); | 204 | only when operating on fields\n"); |
270 | if (numberFilenames != 0) { | 205 | } |
271 | i = 1; | 206 | |
272 | while (i < argc) { | 207 | if (delim != '\t' && part != 'f') { |
273 | if (argv[i][0] == '-') { | 208 | fatalError("a delimiter may be specified only when operating on fields\n"); |
274 | switch (argv[i][1]) { | 209 | } |
275 | case 'f': | 210 | |
276 | case 'c': | 211 | /* argv[(optind)..(argc-1)] should be names of file to process. If no |
277 | case 'b': | 212 | * files were specified or '-' was specified, take input from stdin. |
278 | case 'd': | 213 | * Otherwise, we process all the files specified. */ |
279 | i += 2; | 214 | if (argv[optind] == NULL || (strcmp(argv[optind], "-") == 0)) { |
280 | break; | 215 | cut_file(stdin); |
281 | case 'n': | 216 | } |
282 | case 'i': | 217 | else { |
283 | case 's': | 218 | int i; |
284 | i++; | 219 | FILE *file; |
285 | break; | 220 | for (i = optind; i < argc; i++) { |
286 | case '\0': | 221 | file = fopen(argv[i], "r"); |
287 | fd = stdin; | 222 | if (file == NULL) { |
288 | i++; | 223 | /* errorMsg("%s: %s\n", argv[i], strerror(errno)); */ |
289 | cut(); | 224 | fprintf(stderr, "%s: %s\n", argv[i], strerror(errno)); |
290 | break; | ||
291 | default: | ||
292 | i++; | ||
293 | } | ||
294 | } else { | 225 | } else { |
295 | if ((fd = fopen(argv[i++], "r")) == NULL) { | 226 | cut_file(file); |
296 | warn(FILE_NOT_READABLE, argv[i - 1]); | 227 | fclose(file); |
297 | } else { | ||
298 | cut(); | ||
299 | fclose(fd); | ||
300 | } | ||
301 | } | 228 | } |
302 | } | 229 | } |
303 | } else { | ||
304 | fd = stdin; | ||
305 | cut(); | ||
306 | } | 230 | } |
307 | 231 | ||
308 | return(exit_status); | 232 | return 0; |
309 | } | 233 | } |
310 | |||
311 | /* cut - extract columns from a file or stdin. Author: Michael J. Holme | ||
312 | * | ||
313 | * Copyright 1989, Michael John Holme, All rights reserved. | ||
314 | * This code may be freely distributed, provided that this notice | ||
315 | * remains intact. | ||
316 | * | ||
317 | * V1.1: 6th September 1989 | ||
318 | * | ||
319 | * Bugs, criticisms, etc, | ||
320 | * c/o Mark Powell | ||
321 | * JANET sq79@uk.ac.liv | ||
322 | * ARPA sq79%liv.ac.uk@nsfnet-relay.ac.uk | ||
323 | * UUCP ...!mcvax!ukc!liv.ac.uk!sq79 | ||
324 | *------------------------------------------------------------------------- | ||
325 | * Changed for POSIX1003.2/Draft10 conformance | ||
326 | * Thomas Brupbacher (tobr@mw.lpc.ethz.ch), September 1990. | ||
327 | * Changes: | ||
328 | * - separation of error messages ( stderr) and output (stdout). | ||
329 | * - support for -b and -n (no effect, -b acts as -c) | ||
330 | * - support for -s | ||
331 | *------------------------------------------------------------------------- | ||
332 | */ | ||
333 | |||
334 | /* | ||
335 | * Copyright (c) 1987,1997, Prentice Hall | ||
336 | * All rights reserved. | ||
337 | * | ||
338 | * Redistribution and use of the MINIX operating system in source and | ||
339 | * binary forms, with or without modification, are permitted provided | ||
340 | * that the following conditions are met: | ||
341 | * | ||
342 | * Redistributions of source code must retain the above copyright | ||
343 | * notice, this list of conditions and the following disclaimer. | ||
344 | * | ||
345 | * Redistributions in binary form must reproduce the above | ||
346 | * copyright notice, this list of conditions and the following | ||
347 | * disclaimer in the documentation and/or other materials provided | ||
348 | * with the distribution. | ||
349 | * | ||
350 | * Neither the name of Prentice Hall nor the names of the software | ||
351 | * authors or contributors may be used to endorse or promote | ||
352 | * products derived from this software without specific prior | ||
353 | * written permission. | ||
354 | * | ||
355 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS, AUTHORS, AND | ||
356 | * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, | ||
357 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
358 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | ||
359 | * IN NO EVENT SHALL PRENTICE HALL OR ANY AUTHORS OR CONTRIBUTORS BE | ||
360 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
361 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
362 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
363 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | ||
364 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE | ||
365 | * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, | ||
366 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
367 | * | ||
368 | */ | ||
369 | |||
370 | |||