aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2005-01-24 07:00:02 +0000
committerRob Landley <rob@landley.net>2005-01-24 07:00:02 +0000
commitc0dedd05e81ac03a1793abd8cbfacf8c546e976f (patch)
treecd761995a05504c6d068ba33ddc62e86f9342e4c
parentf4bb212d6ccc1a14724ba56fa57c3bc3ca66cf22 (diff)
downloadbusybox-w32-c0dedd05e81ac03a1793abd8cbfacf8c546e976f.tar.gz
busybox-w32-c0dedd05e81ac03a1793abd8cbfacf8c546e976f.tar.bz2
busybox-w32-c0dedd05e81ac03a1793abd8cbfacf8c546e976f.zip
Sort rewrite to be SUSv3 compliant. New config option, updated help, and
a couple of infrastructure bits.
-rw-r--r--coreutils/Config.in12
-rw-r--r--coreutils/sort.c350
-rw-r--r--include/libbb.h1
-rw-r--r--include/usage.h50
-rw-r--r--libbb/get_line_from_file.c8
5 files changed, 351 insertions, 70 deletions
diff --git a/coreutils/Config.in b/coreutils/Config.in
index e1f0516fd..4aff5ce69 100644
--- a/coreutils/Config.in
+++ b/coreutils/Config.in
@@ -398,6 +398,18 @@ config CONFIG_SORT
398 help 398 help
399 sort is used to sort lines of text in specified files. 399 sort is used to sort lines of text in specified files.
400 400
401config CONFIG_SORT_BIG
402 bool " full SuSv3 compliant sort (Support -ktcsbdfiozgM)"
403 default y
404 depends on CONFIG_SORT
405 help
406 Without this, sort only supports -r, -u, and an integer version
407 of -n. Selecting this adds sort keys, floating point support, and
408 more. This adds a little over 3k to a nonstatic build on x86.
409
410 The SuSv3 sort standard is available at:
411 http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html
412
401config CONFIG_STTY 413config CONFIG_STTY
402 bool "stty" 414 bool "stty"
403 default n 415 default n
diff --git a/coreutils/sort.c b/coreutils/sort.c
index 8cc4d8886..c701b5e4f 100644
--- a/coreutils/sort.c
+++ b/coreutils/sort.c
@@ -1,8 +1,8 @@
1/* vi: set sw=4 ts=4: */ 1/* vi: set sw=4 ts=4: */
2/* 2/*
3 * Mini sort implementation for busybox 3 * SuS3 compliant sort implementation for busybox
4 * 4 *
5 * Copyright (C) 2000 by Matt Kraai <kraai@alumni.carnegiemellon.edu> 5 * Copyright (C) 2004 by Rob Landley <rob@landley.net>
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by 8 * it under the terms of the GNU General Public License as published by
@@ -18,83 +18,321 @@
18 * along with this program; if not, write to the Free Software 18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * 20 *
21 * See SuS3 sort standard at:
22 * http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html
21 */ 23 */
22 24
23/* BB_AUDIT SUSv3 _NOT_ compliant -- a number of options are not supported. */ 25#include <ctype.h>
24/* http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html */ 26#include <math.h>
25
26/* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org)
27 *
28 * Now does proper error checking on i/o. Plus some space savings.
29 */
30
31#include <stdio.h> 27#include <stdio.h>
32#include <stdlib.h> 28#include <stdlib.h>
33#include <string.h> 29#include <string.h>
30#include <time.h>
34#include <unistd.h> 31#include <unistd.h>
35#include "busybox.h" 32#include "busybox.h"
36#include "libcoreutils/coreutils.h"
37 33
38static int compare_ascii(const void *x, const void *y) 34int global_flags;
35
36/*
37 sort [-m][-o output][-bdfinru][-t char][-k keydef]... [file...]
38 sort -c [-bdfinru][-t char][-k keydef][file]
39*/
40
41/* These are sort types */
42#define FLAG_n 1 /* Numeric sort */
43#define FLAG_g 2 /* Sort using strtod() */
44#define FLAG_M 4 /* Sort date */
45/* ucsz apply to root level only, not keys. b at root level implies bb */
46#define FLAG_u 8 /* Unique */
47#define FLAG_c 16 /* Check: no output, exit(!ordered) */
48#define FLAG_s 32 /* Stable sort, no ascii fallback at end */
49#define FLAG_z 64 /* Input is null terminated, not \n */
50/* These can be applied to search keys, the previous four can't */
51#define FLAG_b 128 /* Ignore leading blanks */
52#define FLAG_r 256 /* Reverse */
53#define FLAG_d 512 /* Ignore !(isalnum()|isspace()) */
54#define FLAG_f 1024 /* Force uppercase */
55#define FLAG_i 2048 /* Ignore !isprint() */
56#define FLAG_bb 32768 /* Ignore trailing blanks */
57
58
59#ifdef CONFIG_SORT_BIG
60char key_separator;
61
62struct sort_key
39{ 63{
40 return strcmp(*(char **)x, *(char **)y); 64 struct sort_key *next_key; /* linked list */
41} 65 unsigned short range[4]; /* start word, start char, end word, end char */
66 int flags;
67} *key_list;
42 68
43static int compare_numeric(const void *x, const void *y) 69static char *get_key(char *str, struct sort_key *key, int flags)
44{ 70{
45 int z = atoi(*(char **)x) - atoi(*(char **)y); 71 int start=0,end,len,i,j;
46 return z ? z : strcmp(*(char **)x, *(char **)y); 72
73 /* Special case whole string, so we don't have to make a copy */
74 if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
75 && !(flags&(FLAG_b&FLAG_d&FLAG_f&FLAG_i&FLAG_bb))) return str;
76 /* Find start of key on first pass, end on second pass*/
77 len=strlen(str);
78
79 for(j=0;j<2;j++) {
80 if(!key->range[2*j]) end=len;
81 /* Loop through fields */
82 else {
83 end=0;
84 for(i=1;i<key->range[2*j]+j;i++) {
85 /* Skip leading blanks or first separator */
86 if(str[end]) {
87 if(key_separator) {
88 if(str[end]==key_separator) end++;
89 } else if(isspace(str[end]))
90 while(isspace(str[end])) end++;
91 }
92 /* Skip body of key */
93 for(;str[end];end++) {
94 if(key_separator) {
95 if(str[end]==key_separator) break;
96 } else if(isspace(str[end])) break;
97 }
98 }
99 }
100 if(!j) start=end;
101 }
102 /* Key with explicit separator starts after separator */
103 if(key_separator && str[start]==key_separator) start++;
104 /* Strip leading whitespace if necessary */
105 if(flags&FLAG_b) while(isspace(str[start])) start++;
106 /* Strip trailing whitespace if necessary */
107 if(flags&FLAG_bb) while(end>start && isspace(str[end-1])) end--;
108 /* Handle offsets on start and end */
109 if(key->range[3]) {
110 end+=key->range[3]-1;
111 if(end>len) end=len;
112 }
113 if(key->range[1]) {
114 start+=key->range[1]-1;
115 if(start>len) start=len;
116 }
117 /* Make the copy */
118 if(end<start) end=start;
119 str=bb_xstrndup(str+start,end-start);
120 /* Handle -d */
121 if(flags&FLAG_d) {
122 for(start=end=0;str[end];end++)
123 if(isspace(str[end]) || isalnum(str[end])) str[start++]=str[end];
124 str[start]=0;
125 }
126 /* Handle -i */
127 if(flags&FLAG_i) {
128 for(start=end=0;str[end];end++)
129 if(isprint(str[end])) str[start++]=str[end];
130 str[start]=0;
131 }
132 /* Handle -f */
133 if(flags*FLAG_f) for(i=0;str[i];i++) str[i]=toupper(str[i]);
134
135 return str;
47} 136}
48 137
49int sort_main(int argc, char **argv) 138static struct sort_key *add_key(void)
50{ 139{
51 FILE *fp; 140 struct sort_key **pkey=&key_list;
52 char *line, **lines = NULL; 141 while(*pkey) pkey=&((*pkey)->next_key);
53 int i, nlines = 0, inc; 142 return *pkey=xcalloc(1,sizeof(struct sort_key));
54 int (*compare)(const void *, const void *) = compare_ascii; 143}
55 144
56 int flags; 145#define GET_LINE(fp) (global_flags&FLAG_z) ? bb_get_chunk_from_file(fp) \
146 : bb_get_chomped_line_from_file(fp)
147#else
148#define GET_LINE(fp) bb_get_chomped_line_from_file(fp)
149#endif
57 150
58 bb_default_error_retval = 2; 151/* Iterate through keys list and perform comparisons */
152static int compare_keys(const void *xarg, const void *yarg)
153{
154 int flags=global_flags,retval=0;
155 char *x,*y;
59 156
60 flags = bb_getopt_ulflags(argc, argv, "nru"); 157#ifdef CONFIG_SORT_BIG
61 if (flags & 1) { 158 struct sort_key *key;
62 compare = compare_numeric; 159
63 } 160 for(key=key_list;!retval && key;key=key->next_key) {
161 flags=(key->flags) ? key->flags : global_flags;
162 /* Chop out and modify key chunks, handling -dfib */
163 x=get_key(*(char **)xarg,key,flags);
164 y=get_key(*(char **)yarg,key,flags);
165#else
166 /* This curly bracket serves no purpose but to match the nesting
167 level of the for() loop we're not using */
168 {
169 x=*(char **)xarg;
170 y=*(char **)yarg;
171#endif
172 /* Perform actual comparison */
173 switch(flags&7) {
174 default:
175 bb_error_msg_and_die("Unknown sort type.");
176 break;
177 /* Ascii sort */
178 case 0:
179 retval=strcmp(x,y);
180 break;
181#ifdef CONFIG_SORT_BIG
182 case FLAG_g:
183 {
184 char *xx,*yy;
185 double dx=strtod(x,&xx), dy=strtod(y,&yy);
186 /* not numbers < NaN < -infinity < numbers < +infinity) */
187 if(x==xx) retval=(y==yy ? 0 : -1);
188 else if(y==yy) retval=1;
189 else if(isnan(dx)) retval=isnan(dy) ? 0 : -1;
190 else if(isnan(dy)) retval=1;
191 else if(isinf(dx)) {
192 if(dx<0) retval=((isinf(dy) && dy<0) ? 0 : -1);
193 else retval=((isinf(dy) && dy>0) ? 0 : 1);
194 } else if(isinf(dy)) retval=dy<0 ? 1 : -1;
195 else retval=dx>dy ? 1 : (dx<dy ? -1 : 0);
196 break;
197 }
198 case FLAG_M:
199 {
200 struct tm thyme;
201 int dx;
202 char *xx,*yy;
64 203
65 argv += optind; 204 xx=strptime(x,"%b",&thyme);
66 if (!*argv) { 205 dx=thyme.tm_mon;
67 *--argv = "-"; 206 yy=strptime(y,"%b",&thyme);
207 if(!xx) retval=(!yy ? 0 : -1);
208 else if(!yy) retval=1;
209 else retval=(dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon);
210 break;
211 }
212 /* Full floating point version of -n */
213 case FLAG_n:
214 {
215 double dx=atof(x),dy=atof(y);
216 retval=dx>dy ? 1 : (dx<dy ? -1 : 0);
217 break;
218 }
219 }
220 /* Free key copies. */
221 if(x!=*(char **)xarg) free(x);
222 if(y!=*(char **)yarg) free(y);
223 if(retval) break;
224#else
225 /* Integer version of -n for tiny systems */
226 case FLAG_n:
227 retval=atoi(x)-atoi(y);
228 break;
229 }
230#endif
68 } 231 }
232 /* Perform fallback sort if necessary */
233 if(!retval && !(global_flags&FLAG_s))
234 retval=strcmp(*(char **)xarg, *(char **)yarg);
235 return ((flags&FLAG_r)?-1:1)*retval;
236}
69 237
70 do { 238int sort_main(int argc, char **argv)
71 fp = xgetoptfile_sort_uniq(argv, "r"); 239{
72 while ((line = bb_get_chomped_line_from_file(fp)) != NULL) { 240 FILE *fp,*outfile=NULL;
73 lines = xrealloc(lines, sizeof(char *) * (nlines + 1)); 241 int linecount=0,i,flag;
74 lines[nlines++] = line; 242 char *line,**lines=NULL,c,*optlist="ngMucszbrdfimS:T:o:k:t:";
243
244 bb_default_error_retval = 2;
245 /* Parse command line options */
246 while((c=getopt(argc,argv,optlist))>0) {
247 line=index(optlist,c);
248 if(!line) bb_show_usage();
249 switch(*line) {
250#ifdef CONFIG_SORT_BIG
251 case 'o':
252 if(outfile) bb_error_msg_and_die("Too many -o.");
253 outfile=bb_xfopen(optarg,"w");
254 break;
255 case 't':
256 if(key_separator || optarg[1])
257 bb_error_msg_and_die("Too many -t.");
258 key_separator=*optarg;
259 break;
260 /* parse sort key */
261 case 'k':
262 {
263 struct sort_key *key=add_key();
264 char *temp, *temp2;
265
266 temp=optarg;
267 for(i=0;*temp;) {
268 /* Start of range */
269 key->range[2*i]=(unsigned short)strtol(temp,&temp,10);
270 if(*temp=='.')
271 key->range[(2*i)+1]=(unsigned short)strtol(temp+1,&temp,10);
272 for(;*temp;temp++) {
273 if(*temp==',' && !i++) {
274 temp++;
275 break;
276 } /* no else needed: fall through to syntax error
277 because comma isn't in optlist */
278 temp2=index(optlist,*temp);
279 flag=(1<<(temp2-optlist));
280 if(!temp2 || (flag>FLAG_M && flag<FLAG_b))
281 bb_error_msg_and_die("Unknown key option.");
282 /* b after , means strip _trailing_ space */
283 if(i && flag==FLAG_b) flag=FLAG_bb;
284 key->flags|=flag;
285 }
286 }
287 break;
288 }
289#endif
290 default:
291 global_flags|=(1<<(line-optlist));
292 /* global b strips leading and trailing spaces */
293 if(global_flags&FLAG_b) global_flags|=FLAG_bb;
294 break;
75 } 295 }
76 bb_xferror(fp, *argv);
77 bb_fclose_nonstdin(fp);
78 } while (*++argv);
79
80 /* sort it */
81 qsort(lines, nlines, sizeof(char *), compare);
82
83 /* print it */
84 i = 0;
85 --nlines;
86 if ((inc = 1 - (flags & 2)) < 0) { /* reverse */
87 i = nlines;
88 } 296 }
89 flags &= 4; 297 /* Open input files and read data */
90 298 for(i=argv[optind] ? optind : optind-1;argv[i];i++) {
91 while (nlines >= 0) { 299 if(i<optind || (*argv[i]=='-' && !argv[i][1])) fp=stdin;
92 if (!flags || !nlines || strcmp(lines[i+inc], lines[i])) { 300 else fp=bb_xfopen(argv[i],"r");
93 puts(lines[i]); 301 for(;;) {
302 line=GET_LINE(fp);
303 if(!line) break;
304 if(!(linecount&63))
305 lines=xrealloc(lines, sizeof(char *)*(linecount+64));
306 lines[linecount++]=line;
94 } 307 }
95 i += inc; 308 fclose(fp);
96 --nlines;
97 } 309 }
98 310#ifdef CONFIG_SORT_BIG
311 /* if no key, perform alphabetic sort */
312 if(!key_list) add_key()->range[0]=1;
313 /* handle -c */
314 if(global_flags&FLAG_c) {
315 int j=(global_flags&FLAG_u) ? -1 : 0;
316 for(i=1;i<linecount;i++)
317 if(compare_keys(&lines[i-1],&lines[i])>j) {
318 fprintf(stderr,"Check line %d\n",i);
319 return 1;
320 }
321 return 0;
322 }
323#endif
324 /* Perform the actual sort */
325 qsort(lines,linecount,sizeof(char *),compare_keys);
326 /* handle -u */
327 if(global_flags&FLAG_u) {
328 for(flag=0,i=1;i<linecount;i++) {
329 if(!compare_keys(&lines[flag],&lines[i])) free(lines[i]);
330 else lines[++flag]=lines[i];
331 }
332 if(linecount) linecount=flag+1;
333 }
334 /* Print it */
335 if(!outfile) outfile=stdout;
336 for(i=0;i<linecount;i++) fprintf(outfile,"%s\n",lines[i]);
99 bb_fflush_stdout_and_exit(EXIT_SUCCESS); 337 bb_fflush_stdout_and_exit(EXIT_SUCCESS);
100} 338}
diff --git a/include/libbb.h b/include/libbb.h
index 93ab5375c..0119aabe4 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -137,6 +137,7 @@ extern long *find_pid_by_name( const char* pidName);
137extern char *find_real_root_device_name(void); 137extern char *find_real_root_device_name(void);
138extern char *bb_get_line_from_file(FILE *file); 138extern char *bb_get_line_from_file(FILE *file);
139extern char *bb_get_chomped_line_from_file(FILE *file); 139extern char *bb_get_chomped_line_from_file(FILE *file);
140extern char *bb_get_chunk_from_file(FILE *file);
140extern int bb_copyfd_size(int fd1, int fd2, const off_t size); 141extern int bb_copyfd_size(int fd1, int fd2, const off_t size);
141extern int bb_copyfd_eof(int fd1, int fd2); 142extern int bb_copyfd_eof(int fd1, int fd2);
142extern void bb_xprint_and_close_file(FILE *file); 143extern void bb_xprint_and_close_file(FILE *file);
diff --git a/include/usage.h b/include/usage.h
index 7cf44d74a..c53ead0c7 100644
--- a/include/usage.h
+++ b/include/usage.h
@@ -2193,24 +2193,40 @@
2193 USAGE_FANCY_SLEEP("$ sleep 1d 3h 22m 8s\n" \ 2193 USAGE_FANCY_SLEEP("$ sleep 1d 3h 22m 8s\n" \
2194 "[98528 second delay results]\n") 2194 "[98528 second delay results]\n")
2195 2195
2196#ifdef CONFIG_FEATURE_SORT_UNIQUE 2196#ifdef CONFIG_SORT_BIG
2197 #define USAGE_SORT_UNIQUE(a) a 2197 #define USAGE_SORT_BIG(a) a
2198#else 2198#else
2199 #define USAGE_SORT_UNIQUE(a) 2199 #define USAGE_SORT_BIG(a)
2200#endif
2201#ifdef CONFIG_FEATURE_SORT_REVERSE
2202 #define USAGE_SORT_REVERSE(a) a
2203#else
2204 #define USAGE_SORT_REVERSE(a)
2205#endif 2200#endif
2201
2202
2206#define sort_trivial_usage \ 2203#define sort_trivial_usage \
2207 "[-n" USAGE_SORT_REVERSE("r") USAGE_SORT_UNIQUE("u") "] [FILE]..." 2204 "[-nru" USAGE_SORT_BIG("gMcszbdfimSTokt] [-o outfile] [-k start[.offset][opts][,end[.offset][opts]] [-t char") "] [FILE]..."
2208#define sort_full_usage \ 2205#define sort_full_usage \
2209 "Sorts lines of text in the specified files\n\n"\ 2206 "Sorts lines of text in the specified files\n\n"\
2210 "Options:\n" \ 2207 "Options:\n" \
2211 USAGE_SORT_UNIQUE("\t-u\tsuppress duplicate lines\n") \ 2208 USAGE_SORT_BIG( \
2212 USAGE_SORT_REVERSE("\t-r\tsort in reverse order\n") \ 2209 "\t-b\tignore leading blanks\n" \
2213 "\t-n\tsort numerics" 2210 "\t-c\tcheck whether input is sorted\n" \
2211 "\t-d\tdictionary order (blank or alphanumeric only)\n" \
2212 "\t-f\tignore case\n" \
2213 "\t-g\tgeneral numerical sort\n" \
2214 "\t-i\tignore unprintable characters\n" \
2215 "\t-k\tspecify sort key\n" \
2216 "\t-M\tsort month\n" \
2217 ) \
2218 "\t-n\tsort numbers\n" \
2219 USAGE_SORT_BIG( \
2220 "\t-o\toutput to file\n" \
2221 "\t-k\tsort by key\n" \
2222 "\t-t\tuse key separator other than whitespace\n" \
2223 ) \
2224 "\t-r\treverse sort order\n" \
2225 USAGE_SORT_BIG("\t-s\tstable (don't sort ties alphabetically)\n") \
2226 "\t-u\tsuppress duplicate lines" \
2227 USAGE_SORT_BIG("\n\t-z\tinput terminated by nulls, not newlines\n") \
2228 USAGE_SORT_BIG("\t-mST\tignored for GNU compatability") \
2229 ""
2214#define sort_example_usage \ 2230#define sort_example_usage \
2215 "$ echo -e \"e\\nf\\nb\\nd\\nc\\na\" | sort\n" \ 2231 "$ echo -e \"e\\nf\\nb\\nd\\nc\\na\" | sort\n" \
2216 "a\n" \ 2232 "a\n" \
@@ -2218,7 +2234,15 @@
2218 "c\n" \ 2234 "c\n" \
2219 "d\n" \ 2235 "d\n" \
2220 "e\n" \ 2236 "e\n" \
2221 "f\n" 2237 "f\n" \
2238 USAGE_SORT_BIG( \
2239 "$ echo -e \"c 3\\nb 2\\nd 2\" | $SORT -k 2,2n -k 1,1r\n" \
2240 "d 2\n" \
2241 "b 2\n" \
2242 "c 3\n" \
2243 ) \
2244 ""
2245
2222 2246
2223#define start_stop_daemon_trivial_usage \ 2247#define start_stop_daemon_trivial_usage \
2224 "[OPTIONS] [--start|--stop] ... [-- arguments...]\n" 2248 "[OPTIONS] [--start|--stop] ... [-- arguments...]\n"
diff --git a/libbb/get_line_from_file.c b/libbb/get_line_from_file.c
index 6d12b21c4..a27edc3bd 100644
--- a/libbb/get_line_from_file.c
+++ b/libbb/get_line_from_file.c
@@ -44,7 +44,8 @@ static char *private_get_line_from_file(FILE *file, int c)
44 linebuf = xrealloc(linebuf, linebufsz += GROWBY); 44 linebuf = xrealloc(linebuf, linebufsz += GROWBY);
45 } 45 }
46 linebuf[idx++] = (char)ch; 46 linebuf[idx++] = (char)ch;
47 if (ch == '\n' || ch == '\0') { 47 if (!ch) return linebuf;
48 if (c<2 && ch == '\n') {
48 if (c) { 49 if (c) {
49 --idx; 50 --idx;
50 } 51 }
@@ -71,6 +72,11 @@ extern char *bb_get_chomped_line_from_file(FILE *file)
71 return private_get_line_from_file(file, 1); 72 return private_get_line_from_file(file, 1);
72} 73}
73 74
75extern char *bb_get_chunk_from_file(FILE *file)
76{
77 return private_get_line_from_file(file, 2);
78}
79
74 80
75/* END CODE */ 81/* END CODE */
76/* 82/*