diff options
Diffstat (limited to 'coreutils/sort.c')
-rw-r--r-- | coreutils/sort.c | 357 |
1 files changed, 357 insertions, 0 deletions
diff --git a/coreutils/sort.c b/coreutils/sort.c new file mode 100644 index 000000000..c23bf9c60 --- /dev/null +++ b/coreutils/sort.c | |||
@@ -0,0 +1,357 @@ | |||
1 | /* vi: set sw=4 ts=4: */ | ||
2 | /* | ||
3 | * SuS3 compliant sort implementation for busybox | ||
4 | * | ||
5 | * Copyright (C) 2004 by Rob Landley <rob@landley.net> | ||
6 | * | ||
7 | * MAINTAINER: Rob Landley <rob@landley.net> | ||
8 | * | ||
9 | * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. | ||
10 | * | ||
11 | * See SuS3 sort standard at: | ||
12 | * http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html | ||
13 | */ | ||
14 | |||
15 | #include "busybox.h" | ||
16 | |||
17 | static int global_flags; | ||
18 | |||
19 | /* | ||
20 | sort [-m][-o output][-bdfinru][-t char][-k keydef]... [file...] | ||
21 | sort -c [-bdfinru][-t char][-k keydef][file] | ||
22 | */ | ||
23 | |||
24 | /* These are sort types */ | ||
25 | #define FLAG_n 1 /* Numeric sort */ | ||
26 | #define FLAG_g 2 /* Sort using strtod() */ | ||
27 | #define FLAG_M 4 /* Sort date */ | ||
28 | /* ucsz apply to root level only, not keys. b at root level implies bb */ | ||
29 | #define FLAG_u 8 /* Unique */ | ||
30 | #define FLAG_c 16 /* Check: no output, exit(!ordered) */ | ||
31 | #define FLAG_s 32 /* Stable sort, no ascii fallback at end */ | ||
32 | #define FLAG_z 64 /* Input is null terminated, not \n */ | ||
33 | /* These can be applied to search keys, the previous four can't */ | ||
34 | #define FLAG_b 128 /* Ignore leading blanks */ | ||
35 | #define FLAG_r 256 /* Reverse */ | ||
36 | #define FLAG_d 512 /* Ignore !(isalnum()|isspace()) */ | ||
37 | #define FLAG_f 1024 /* Force uppercase */ | ||
38 | #define FLAG_i 2048 /* Ignore !isprint() */ | ||
39 | #define FLAG_bb 32768 /* Ignore trailing blanks */ | ||
40 | |||
41 | |||
42 | #if ENABLE_FEATURE_SORT_BIG | ||
43 | static char key_separator; | ||
44 | |||
45 | static struct sort_key { | ||
46 | struct sort_key *next_key; /* linked list */ | ||
47 | unsigned short range[4]; /* start word, start char, end word, end char */ | ||
48 | int flags; | ||
49 | } *key_list; | ||
50 | |||
51 | static char *get_key(char *str, struct sort_key *key, int flags) | ||
52 | { | ||
53 | int start = 0, end = 0, len, i, j; | ||
54 | |||
55 | /* Special case whole string, so we don't have to make a copy */ | ||
56 | if (key->range[0] == 1 && !key->range[1] && !key->range[2] && !key->range[3] | ||
57 | && !(flags & (FLAG_b | FLAG_d | FLAG_f | FLAG_i | FLAG_bb)) | ||
58 | ) { | ||
59 | return str; | ||
60 | } | ||
61 | |||
62 | /* Find start of key on first pass, end on second pass*/ | ||
63 | len = strlen(str); | ||
64 | for (j = 0; j < 2; j++) { | ||
65 | if (!key->range[2*j]) | ||
66 | end = len; | ||
67 | /* Loop through fields */ | ||
68 | else { | ||
69 | end = 0; | ||
70 | for (i = 1; i < key->range[2*j] + j; i++) { | ||
71 | /* Skip leading blanks or first separator */ | ||
72 | if (str[end]) { | ||
73 | if (!key_separator) | ||
74 | while (isspace(str[end])) end++; | ||
75 | } | ||
76 | /* Skip body of key */ | ||
77 | for (; str[end]; end++) { | ||
78 | if (key_separator) { | ||
79 | if (str[end] == key_separator) | ||
80 | break; | ||
81 | } else { | ||
82 | if (isspace(str[end])) | ||
83 | break; | ||
84 | } | ||
85 | } | ||
86 | } | ||
87 | } | ||
88 | if (!j) start = end; | ||
89 | } | ||
90 | /* Key with explicit separator starts after separator */ | ||
91 | if (key_separator && str[start] == key_separator) | ||
92 | start++; | ||
93 | /* Strip leading whitespace if necessary */ | ||
94 | //XXX: skip_whitespace() | ||
95 | if (flags & FLAG_b) | ||
96 | while (isspace(str[start])) start++; | ||
97 | /* Strip trailing whitespace if necessary */ | ||
98 | if (flags & FLAG_bb) | ||
99 | while (end > start && isspace(str[end-1])) end--; | ||
100 | /* Handle offsets on start and end */ | ||
101 | if (key->range[3]) { | ||
102 | end += key->range[3] - 1; | ||
103 | if (end > len) end = len; | ||
104 | } | ||
105 | if (key->range[1]) { | ||
106 | start += key->range[1] - 1; | ||
107 | if (start > len) start = len; | ||
108 | } | ||
109 | /* Make the copy */ | ||
110 | if (end < start) end = start; | ||
111 | str = xstrndup(str+start, end-start); | ||
112 | /* Handle -d */ | ||
113 | if (flags & FLAG_d) { | ||
114 | for (start = end = 0; str[end]; end++) | ||
115 | if (isspace(str[end]) || isalnum(str[end])) | ||
116 | str[start++] = str[end]; | ||
117 | str[start] = '\0'; | ||
118 | } | ||
119 | /* Handle -i */ | ||
120 | if (flags & FLAG_i) { | ||
121 | for (start = end = 0; str[end]; end++) | ||
122 | if (isprint(str[end])) | ||
123 | str[start++] = str[end]; | ||
124 | str[start] = '\0'; | ||
125 | } | ||
126 | /* Handle -f */ | ||
127 | if (flags & FLAG_f) | ||
128 | for (i = 0; str[i]; i++) | ||
129 | str[i] = toupper(str[i]); | ||
130 | |||
131 | return str; | ||
132 | } | ||
133 | |||
134 | static struct sort_key *add_key(void) | ||
135 | { | ||
136 | struct sort_key **pkey = &key_list; | ||
137 | while (*pkey) | ||
138 | pkey = &((*pkey)->next_key); | ||
139 | return *pkey = xzalloc(sizeof(struct sort_key)); | ||
140 | } | ||
141 | |||
142 | #define GET_LINE(fp) \ | ||
143 | ((global_flags & FLAG_z) \ | ||
144 | ? bb_get_chunk_from_file(fp, NULL) \ | ||
145 | : xmalloc_getline(fp)) | ||
146 | #else | ||
147 | #define GET_LINE(fp) xmalloc_getline(fp) | ||
148 | #endif | ||
149 | |||
150 | /* Iterate through keys list and perform comparisons */ | ||
151 | static int compare_keys(const void *xarg, const void *yarg) | ||
152 | { | ||
153 | int flags = global_flags, retval = 0; | ||
154 | char *x, *y; | ||
155 | |||
156 | #if ENABLE_FEATURE_SORT_BIG | ||
157 | struct sort_key *key; | ||
158 | |||
159 | for (key = key_list; !retval && key; key = key->next_key) { | ||
160 | flags = key->flags ? key->flags : global_flags; | ||
161 | /* Chop out and modify key chunks, handling -dfib */ | ||
162 | x = get_key(*(char **)xarg, key, flags); | ||
163 | y = get_key(*(char **)yarg, key, flags); | ||
164 | #else | ||
165 | /* This curly bracket serves no purpose but to match the nesting | ||
166 | level of the for() loop we're not using */ | ||
167 | { | ||
168 | x = *(char **)xarg; | ||
169 | y = *(char **)yarg; | ||
170 | #endif | ||
171 | /* Perform actual comparison */ | ||
172 | switch (flags & 7) { | ||
173 | default: | ||
174 | bb_error_msg_and_die("unknown sort type"); | ||
175 | break; | ||
176 | /* Ascii sort */ | ||
177 | case 0: | ||
178 | retval = strcmp(x, y); | ||
179 | break; | ||
180 | #if ENABLE_FEATURE_SORT_BIG | ||
181 | case FLAG_g: { | ||
182 | char *xx, *yy; | ||
183 | double dx = strtod(x, &xx); | ||
184 | double dy = strtod(y, &yy); | ||
185 | /* not numbers < NaN < -infinity < numbers < +infinity) */ | ||
186 | if (x == xx) | ||
187 | retval = (y == yy ? 0 : -1); | ||
188 | else if (y == yy) | ||
189 | retval = 1; | ||
190 | /* Check for isnan */ | ||
191 | else if (dx != dx) | ||
192 | retval = (dy != dy) ? 0 : -1; | ||
193 | else if (dy != dy) | ||
194 | retval = 1; | ||
195 | /* Check for infinity. Could underflow, but it avoids libm. */ | ||
196 | else if (1.0 / dx == 0.0) { | ||
197 | if (dx < 0) | ||
198 | retval = (1.0 / dy == 0.0 && dy < 0) ? 0 : -1; | ||
199 | else | ||
200 | retval = (1.0 / dy == 0.0 && dy > 0) ? 0 : 1; | ||
201 | } else if (1.0 / dy == 0.0) | ||
202 | retval = (dy < 0) ? 1 : -1; | ||
203 | else | ||
204 | retval = (dx > dy) ? 1 : ((dx < dy) ? -1 : 0); | ||
205 | break; | ||
206 | } | ||
207 | case FLAG_M: { | ||
208 | struct tm thyme; | ||
209 | int dx; | ||
210 | char *xx, *yy; | ||
211 | |||
212 | xx = strptime(x, "%b", &thyme); | ||
213 | dx = thyme.tm_mon; | ||
214 | yy = strptime(y, "%b", &thyme); | ||
215 | if (!xx) | ||
216 | retval = (!yy) ? 0 : -1; | ||
217 | else if (!yy) | ||
218 | retval = 1; | ||
219 | else | ||
220 | retval = (dx == thyme.tm_mon) ? 0 : dx - thyme.tm_mon; | ||
221 | break; | ||
222 | } | ||
223 | /* Full floating point version of -n */ | ||
224 | case FLAG_n: { | ||
225 | double dx = atof(x); | ||
226 | double dy = atof(y); | ||
227 | retval = (dx > dy) ? 1 : ((dx < dy) ? -1 : 0); | ||
228 | break; | ||
229 | } | ||
230 | } /* switch */ | ||
231 | /* Free key copies. */ | ||
232 | if (x != *(char **)xarg) free(x); | ||
233 | if (y != *(char **)yarg) free(y); | ||
234 | /* if (retval) break; - done by for() anyway */ | ||
235 | #else | ||
236 | /* Integer version of -n for tiny systems */ | ||
237 | case FLAG_n: | ||
238 | retval = atoi(x) - atoi(y); | ||
239 | break; | ||
240 | } /* switch */ | ||
241 | #endif | ||
242 | } | ||
243 | /* Perform fallback sort if necessary */ | ||
244 | if (!retval && !(global_flags & FLAG_s)) | ||
245 | retval = strcmp(*(char **)xarg, *(char **)yarg); | ||
246 | |||
247 | if (flags & FLAG_r) return -retval; | ||
248 | return retval; | ||
249 | } | ||
250 | |||
251 | int sort_main(int argc, char **argv) | ||
252 | { | ||
253 | FILE *fp, *outfile = NULL; | ||
254 | int linecount = 0, i, flag; | ||
255 | char *line, **lines = NULL, *optlist = "ngMucszbrdfimS:T:o:k:t:"; | ||
256 | int c; | ||
257 | |||
258 | xfunc_error_retval = 2; | ||
259 | /* Parse command line options */ | ||
260 | while ((c = getopt(argc, argv, optlist)) > 0) { | ||
261 | line = strchr(optlist, c); | ||
262 | if (!line) bb_show_usage(); | ||
263 | switch (*line) { | ||
264 | #if ENABLE_FEATURE_SORT_BIG | ||
265 | case 'o': | ||
266 | if (outfile) bb_error_msg_and_die("too many -o"); | ||
267 | outfile = xfopen(optarg, "w"); | ||
268 | break; | ||
269 | case 't': | ||
270 | if (key_separator || optarg[1]) | ||
271 | bb_error_msg_and_die("too many -t"); | ||
272 | key_separator = *optarg; | ||
273 | break; | ||
274 | /* parse sort key */ | ||
275 | case 'k': { | ||
276 | struct sort_key *key = add_key(); | ||
277 | char *temp, *temp2; | ||
278 | |||
279 | temp = optarg; | ||
280 | for (i = 0; *temp;) { | ||
281 | /* Start of range */ | ||
282 | key->range[2*i] = (unsigned short)strtol(temp, &temp, 10); | ||
283 | if (*temp == '.') | ||
284 | key->range[(2*i)+1] = (unsigned short)strtol(temp+1, &temp, 10); | ||
285 | for (; *temp; temp++) { | ||
286 | if (*temp == ',' && !i++) { | ||
287 | temp++; | ||
288 | break; | ||
289 | } /* no else needed: fall through to syntax error | ||
290 | because comma isn't in optlist */ | ||
291 | temp2 = strchr(optlist, *temp); | ||
292 | flag = (1 << (temp2 - optlist)); | ||
293 | if (!temp2 || (flag > FLAG_M && flag < FLAG_b)) | ||
294 | bb_error_msg_and_die("unknown key option"); | ||
295 | /* b after ',' means strip _trailing_ space */ | ||
296 | if (i && flag == FLAG_b) flag = FLAG_bb; | ||
297 | key->flags |= flag; | ||
298 | } | ||
299 | } | ||
300 | break; | ||
301 | } | ||
302 | #endif | ||
303 | default: | ||
304 | global_flags |= (1 << (line - optlist)); | ||
305 | /* global b strips leading and trailing spaces */ | ||
306 | if (global_flags & FLAG_b) global_flags |= FLAG_bb; | ||
307 | break; | ||
308 | } | ||
309 | } | ||
310 | /* Open input files and read data */ | ||
311 | for (i = argv[optind] ? optind : optind-1; argv[i]; i++) { | ||
312 | fp = stdin; | ||
313 | if (i >= optind && (argv[i][0] != '-' || argv[i][1])) | ||
314 | fp = xfopen(argv[i], "r"); | ||
315 | for (;;) { | ||
316 | line = GET_LINE(fp); | ||
317 | if (!line) break; | ||
318 | if (!(linecount & 63)) | ||
319 | lines = xrealloc(lines, sizeof(char *) * (linecount + 64)); | ||
320 | lines[linecount++] = line; | ||
321 | } | ||
322 | fclose(fp); | ||
323 | } | ||
324 | #if ENABLE_FEATURE_SORT_BIG | ||
325 | /* if no key, perform alphabetic sort */ | ||
326 | if (!key_list) | ||
327 | add_key()->range[0] = 1; | ||
328 | /* handle -c */ | ||
329 | if (global_flags & FLAG_c) { | ||
330 | int j = (global_flags & FLAG_u) ? -1 : 0; | ||
331 | for (i = 1; i < linecount; i++) | ||
332 | if (compare_keys(&lines[i-1], &lines[i]) > j) { | ||
333 | fprintf(stderr, "Check line %d\n", i); | ||
334 | return 1; | ||
335 | } | ||
336 | return 0; | ||
337 | } | ||
338 | #endif | ||
339 | /* Perform the actual sort */ | ||
340 | qsort(lines, linecount, sizeof(char *), compare_keys); | ||
341 | /* handle -u */ | ||
342 | if (global_flags & FLAG_u) { | ||
343 | for (flag = 0, i = 1; i < linecount; i++) { | ||
344 | if (!compare_keys(&lines[flag], &lines[i])) | ||
345 | free(lines[i]); | ||
346 | else | ||
347 | lines[++flag] = lines[i]; | ||
348 | } | ||
349 | if (linecount) linecount = flag+1; | ||
350 | } | ||
351 | /* Print it */ | ||
352 | if (!outfile) outfile = stdout; | ||
353 | for (i = 0; i < linecount; i++) | ||
354 | fprintf(outfile, "%s\n", lines[i]); | ||
355 | |||
356 | fflush_stdout_and_exit(EXIT_SUCCESS); | ||
357 | } | ||