diff options
author | Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> | 2007-03-30 14:43:27 +0000 |
---|---|---|
committer | Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> | 2007-03-30 14:43:27 +0000 |
commit | eceecea568a483a3acec2d4c03228b39ae2d6501 (patch) | |
tree | 7faa56bbba59529be6f671026ae97bda84267917 /coreutils/tr.c | |
parent | 52a9db6bb874c9ce7a8b7e935ba3ebbefdf3f7d4 (diff) | |
download | busybox-w32-eceecea568a483a3acec2d4c03228b39ae2d6501.tar.gz busybox-w32-eceecea568a483a3acec2d4c03228b39ae2d6501.tar.bz2 busybox-w32-eceecea568a483a3acec2d4c03228b39ae2d6501.zip |
- fix bug where we did not reject invalid classes like '[[:alpha'
- debloat while at it:
text data bss dec hex filename
1554 0 19 1573 625 tr.o.oorig
1357 0 16 1373 55d tr.o
Diffstat (limited to 'coreutils/tr.c')
-rw-r--r-- | coreutils/tr.c | 161 |
1 files changed, 82 insertions, 79 deletions
diff --git a/coreutils/tr.c b/coreutils/tr.c index cea3b05b6..f423ab0cf 100644 --- a/coreutils/tr.c +++ b/coreutils/tr.c | |||
@@ -15,64 +15,61 @@ | |||
15 | * | 15 | * |
16 | * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. | 16 | * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. |
17 | */ | 17 | */ |
18 | 18 | /* http://www.opengroup.org/onlinepubs/009695399/utilities/tr.html | |
19 | * TODO: xdigit, graph, print | ||
20 | */ | ||
19 | #include "busybox.h" | 21 | #include "busybox.h" |
20 | 22 | ||
21 | // Even with -funsigned-char, gcc still complains about char as an array index. | ||
22 | |||
23 | #define GCC4_IS_STUPID int | ||
24 | |||
25 | #define ASCII 0377 | 23 | #define ASCII 0377 |
26 | 24 | ||
25 | #define TR_OPT_complement (1<<0) | ||
26 | #define TR_OPT_delete (1<<1) | ||
27 | #define TR_OPT_squeeze_reps (1<<2) | ||
27 | /* some "globals" shared across this file */ | 28 | /* some "globals" shared across this file */ |
28 | static char com_fl, del_fl, sq_fl; | ||
29 | /* these last are pointers to static buffers declared in tr_main */ | 29 | /* these last are pointers to static buffers declared in tr_main */ |
30 | static char *poutput, *pvector, *pinvec, *poutvec; | 30 | static char *poutput, *pvector, *pinvec, *poutvec; |
31 | 31 | ||
32 | static void convert(void) | 32 | static void ATTRIBUTE_NORETURN convert(const smalluint flags) |
33 | { | 33 | { |
34 | int read_chars = 0, in_index = 0, out_index = 0, c, coded, last = -1; | 34 | size_t read_chars = 0, in_index = 0, out_index = 0, c, coded, last = -1; |
35 | 35 | ||
36 | for (;;) { | 36 | for (;;) { |
37 | // If we're out of input, flush output and read more input. | 37 | /* If we're out of input, flush output and read more input. */ |
38 | |||
39 | if (in_index == read_chars) { | 38 | if (in_index == read_chars) { |
40 | if (out_index) { | 39 | if (out_index) { |
41 | if (write(1, (char *) poutput, out_index) != out_index) | 40 | xwrite(STDOUT_FILENO, (char *)poutput, out_index); |
42 | bb_error_msg_and_die(bb_msg_write_error); | ||
43 | out_index = 0; | 41 | out_index = 0; |
44 | } | 42 | } |
45 | 43 | if ((read_chars = read(STDIN_FILENO, bb_common_bufsiz1, BUFSIZ)) <= 0) { | |
46 | if ((read_chars = read(0, bb_common_bufsiz1, BUFSIZ)) <= 0) { | 44 | if (write(STDOUT_FILENO, (char *)poutput, out_index) != out_index) |
47 | if (write(1, (char *) poutput, out_index) != out_index) | 45 | bb_perror_msg(bb_msg_write_error); |
48 | bb_error_msg(bb_msg_write_error); | 46 | exit(EXIT_SUCCESS); |
49 | exit(0); | ||
50 | } | 47 | } |
51 | in_index = 0; | 48 | in_index = 0; |
52 | } | 49 | } |
53 | c = bb_common_bufsiz1[in_index++]; | 50 | c = bb_common_bufsiz1[in_index++]; |
54 | coded = pvector[c]; | 51 | coded = pvector[c]; |
55 | if (del_fl && pinvec[c]) | 52 | if ((flags & TR_OPT_delete) && pinvec[c]) |
56 | continue; | 53 | continue; |
57 | if (sq_fl && last == coded && (pinvec[c] || poutvec[coded])) | 54 | if ((flags & TR_OPT_squeeze_reps) && last == coded && |
55 | (pinvec[c] || poutvec[coded])) | ||
58 | continue; | 56 | continue; |
59 | poutput[out_index++] = last = coded; | 57 | poutput[out_index++] = last = coded; |
60 | } | 58 | } |
61 | |||
62 | /* NOTREACHED */ | 59 | /* NOTREACHED */ |
63 | } | 60 | } |
64 | 61 | ||
65 | static void map(char *string1, unsigned int string1_len, | 62 | static void map(unsigned char *string1, unsigned int string1_len, |
66 | char *string2, unsigned int string2_len) | 63 | unsigned char *string2, unsigned int string2_len) |
67 | { | 64 | { |
68 | char last = '0'; | 65 | char last = '0'; |
69 | unsigned int i, j; | 66 | unsigned int i, j; |
70 | 67 | ||
71 | for (j = 0, i = 0; i < string1_len; i++) { | 68 | for (j = 0, i = 0; i < string1_len; i++) { |
72 | if (string2_len <= j) | 69 | if (string2_len <= j) |
73 | pvector[(GCC4_IS_STUPID)string1[i]] = last; | 70 | pvector[string1[i]] = last; |
74 | else | 71 | else |
75 | pvector[(GCC4_IS_STUPID)string1[i]] = last = string2[j++]; | 72 | pvector[string1[i]] = last = string2[j++]; |
76 | } | 73 | } |
77 | } | 74 | } |
78 | 75 | ||
@@ -84,15 +81,35 @@ static void map(char *string1, unsigned int string1_len, | |||
84 | static unsigned int expand(const char *arg, char *buffer) | 81 | static unsigned int expand(const char *arg, char *buffer) |
85 | { | 82 | { |
86 | char *buffer_start = buffer; | 83 | char *buffer_start = buffer; |
87 | int i, ac; | 84 | unsigned i; /* XXX: FIXME: use unsigned char? */ |
88 | 85 | unsigned char ac; | |
86 | #if ENABLE_FEATURE_TR_CLASSES | ||
87 | #define CLO ":]" | ||
88 | const char * const classes[] = { | ||
89 | "alpha"CLO, "alnum"CLO, "digit"CLO, "lower"CLO, "upper"CLO, "space"CLO, | ||
90 | "blank"CLO, "punct"CLO, "cntrl"CLO, NULL | ||
91 | }; | ||
92 | #define CLASS_invalid 0 /* we increment the retval */ | ||
93 | #define CLASS_alpha 1 | ||
94 | #define CLASS_alnum 2 | ||
95 | #define CLASS_digit 3 | ||
96 | #define CLASS_lower 4 | ||
97 | #define CLASS_upper 5 | ||
98 | #define CLASS_space 6 | ||
99 | #define CLASS_blank 7 | ||
100 | #define CLASS_punct 8 | ||
101 | #define CLASS_cntrl 9 | ||
102 | //#define CLASS_xdigit 10 | ||
103 | //#define CLASS_graph 11 | ||
104 | //#define CLASS_print 12 | ||
105 | #endif | ||
89 | while (*arg) { | 106 | while (*arg) { |
90 | if (*arg == '\\') { | 107 | if (*arg == '\\') { |
91 | arg++; | 108 | arg++; |
92 | *buffer++ = bb_process_escape_sequence(&arg); | 109 | *buffer++ = bb_process_escape_sequence(&arg); |
93 | } else if (*(arg+1) == '-') { | 110 | } else if (*(arg+1) == '-') { |
94 | ac = *(arg+2); | 111 | ac = *(arg+2); |
95 | if(ac == 0) { | 112 | if (ac == 0) { |
96 | *buffer++ = *arg++; | 113 | *buffer++ = *arg++; |
97 | continue; | 114 | continue; |
98 | } | 115 | } |
@@ -104,50 +121,42 @@ static unsigned int expand(const char *arg, char *buffer) | |||
104 | arg++; | 121 | arg++; |
105 | i = *arg++; | 122 | i = *arg++; |
106 | if (ENABLE_FEATURE_TR_CLASSES && i == ':') { | 123 | if (ENABLE_FEATURE_TR_CLASSES && i == ':') { |
107 | if (strncmp(arg, "alpha", 5) == 0) { | 124 | smalluint j; |
108 | for (i = 'A'; i <= 'Z'; i++) | 125 | { /* not really pretty.. */ |
109 | *buffer++ = i; | 126 | char *tmp = xstrndup(arg, 7); // warning: xdigit needs 8, not 7 |
110 | for (i = 'a'; i <= 'z'; i++) | 127 | j = index_in_str_array(classes, tmp) + 1; |
111 | *buffer++ = i; | 128 | free(tmp); |
112 | } | 129 | } |
113 | else if (strncmp(arg, "alnum", 5) == 0) { | 130 | if (j == CLASS_alnum || j == CLASS_digit) { |
114 | for (i = '0'; i <= '9'; i++) | 131 | for (i = '0'; i <= '9'; i++) |
115 | *buffer++ = i; | 132 | *buffer++ = i; |
133 | } | ||
134 | if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_upper) { | ||
116 | for (i = 'A'; i <= 'Z'; i++) | 135 | for (i = 'A'; i <= 'Z'; i++) |
117 | *buffer++ = i; | 136 | *buffer++ = i; |
118 | for (i = 'a'; i <= 'z'; i++) | ||
119 | *buffer++ = i; | ||
120 | } | 137 | } |
121 | else if (strncmp(arg, "digit", 5) == 0) | 138 | if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_lower) { |
122 | for (i = '0'; i <= '9'; i++) | ||
123 | *buffer++ = i; | ||
124 | else if (strncmp(arg, "lower", 5) == 0) | ||
125 | for (i = 'a'; i <= 'z'; i++) | 139 | for (i = 'a'; i <= 'z'; i++) |
126 | *buffer++ = i; | 140 | *buffer++ = i; |
127 | else if (strncmp(arg, "upper", 5) == 0) | ||
128 | for (i = 'A'; i <= 'Z'; i++) | ||
129 | *buffer++ = i; | ||
130 | else if (strncmp(arg, "space", 5) == 0) { | ||
131 | const char s[] = "\t\n\v\f\r "; | ||
132 | strcat((char*)buffer, s); | ||
133 | buffer += sizeof(s) - 1; | ||
134 | } | 141 | } |
135 | else if (strncmp(arg, "blank", 5) == 0) { | 142 | if (j == CLASS_space || j == CLASS_blank) { |
136 | *buffer++ = '\t'; | 143 | *buffer++ = '\t'; |
144 | if (j == CLASS_space) { | ||
145 | *buffer++ = '\n'; | ||
146 | *buffer++ = '\v'; | ||
147 | *buffer++ = '\f'; | ||
148 | *buffer++ = '\r'; | ||
149 | } | ||
137 | *buffer++ = ' '; | 150 | *buffer++ = ' '; |
138 | } | 151 | } |
139 | /* gcc gives a warning if braces aren't used here */ | 152 | if (j == CLASS_punct || j == CLASS_cntrl) { |
140 | else if (strncmp(arg, "punct", 5) == 0) { | ||
141 | for (i = 0; i <= ASCII; i++) | 153 | for (i = 0; i <= ASCII; i++) |
142 | if (isprint(i) && (!isalnum(i)) && (!isspace(i))) | 154 | if ((j == CLASS_punct && |
155 | isprint(i) && (!isalnum(i)) && (!isspace(i))) || | ||
156 | (j == CLASS_cntrl && iscntrl(i))) | ||
143 | *buffer++ = i; | 157 | *buffer++ = i; |
144 | } | 158 | } |
145 | else if (strncmp(arg, "cntrl", 5) == 0) { | 159 | if (j == CLASS_invalid) { |
146 | for (i = 0; i <= ASCII; i++) | ||
147 | if (iscntrl(i)) | ||
148 | *buffer++ = i; | ||
149 | } | ||
150 | else { | ||
151 | *buffer++ = '['; | 160 | *buffer++ = '['; |
152 | *buffer++ = ':'; | 161 | *buffer++ = ':'; |
153 | continue; | 162 | continue; |
@@ -156,8 +165,7 @@ static unsigned int expand(const char *arg, char *buffer) | |||
156 | } | 165 | } |
157 | if (ENABLE_FEATURE_TR_EQUIV && i == '=') { | 166 | if (ENABLE_FEATURE_TR_EQUIV && i == '=') { |
158 | *buffer++ = *arg; | 167 | *buffer++ = *arg; |
159 | /* skip the closing =] */ | 168 | arg += 3; /* Skip the closing =] */ |
160 | arg += 3; | ||
161 | continue; | 169 | continue; |
162 | } | 170 | } |
163 | if (*arg++ != '-') { | 171 | if (*arg++ != '-') { |
@@ -168,11 +176,10 @@ static unsigned int expand(const char *arg, char *buffer) | |||
168 | ac = *arg++; | 176 | ac = *arg++; |
169 | while (i <= ac) | 177 | while (i <= ac) |
170 | *buffer++ = i++; | 178 | *buffer++ = i++; |
171 | arg++; /* Skip the assumed ']' */ | 179 | arg++; /* Skip the assumed ']' */ |
172 | } else | 180 | } else |
173 | *buffer++ = *arg++; | 181 | *buffer++ = *arg++; |
174 | } | 182 | } |
175 | |||
176 | return (buffer - buffer_start); | 183 | return (buffer - buffer_start); |
177 | } | 184 | } |
178 | 185 | ||
@@ -197,10 +204,11 @@ int tr_main(int argc, char **argv); | |||
197 | int tr_main(int argc, char **argv) | 204 | int tr_main(int argc, char **argv) |
198 | { | 205 | { |
199 | unsigned char *ptr; | 206 | unsigned char *ptr; |
200 | int output_length=0, input_length; | 207 | int output_length = 0, input_length; |
201 | int idx = 1; | 208 | int idx = 1; |
202 | int i; | 209 | int i; |
203 | RESERVE_CONFIG_BUFFER(output, BUFSIZ); | 210 | smalluint flags = 0; |
211 | RESERVE_CONFIG_UBUFFER(output, BUFSIZ); | ||
204 | RESERVE_CONFIG_BUFFER(vector, ASCII+1); | 212 | RESERVE_CONFIG_BUFFER(vector, ASCII+1); |
205 | RESERVE_CONFIG_BUFFER(invec, ASCII+1); | 213 | RESERVE_CONFIG_BUFFER(invec, ASCII+1); |
206 | RESERVE_CONFIG_BUFFER(outvec, ASCII+1); | 214 | RESERVE_CONFIG_BUFFER(outvec, ASCII+1); |
@@ -213,19 +221,14 @@ int tr_main(int argc, char **argv) | |||
213 | 221 | ||
214 | if (argc > 1 && argv[idx][0] == '-') { | 222 | if (argc > 1 && argv[idx][0] == '-') { |
215 | for (ptr = (unsigned char *) &argv[idx][1]; *ptr; ptr++) { | 223 | for (ptr = (unsigned char *) &argv[idx][1]; *ptr; ptr++) { |
216 | switch (*ptr) { | 224 | if (*ptr == 'c') |
217 | case 'c': | 225 | flags |= TR_OPT_complement; |
218 | com_fl = TRUE; | 226 | else if (*ptr == 'd') |
219 | break; | 227 | flags |= TR_OPT_delete; |
220 | case 'd': | 228 | else if (*ptr == 's') |
221 | del_fl = TRUE; | 229 | flags |= TR_OPT_squeeze_reps; |
222 | break; | 230 | else |
223 | case 's': | ||
224 | sq_fl = TRUE; | ||
225 | break; | ||
226 | default: | ||
227 | bb_show_usage(); | 231 | bb_show_usage(); |
228 | } | ||
229 | } | 232 | } |
230 | idx++; | 233 | idx++; |
231 | } | 234 | } |
@@ -236,7 +239,7 @@ int tr_main(int argc, char **argv) | |||
236 | 239 | ||
237 | if (argv[idx] != NULL) { | 240 | if (argv[idx] != NULL) { |
238 | input_length = expand(argv[idx++], bb_common_bufsiz1); | 241 | input_length = expand(argv[idx++], bb_common_bufsiz1); |
239 | if (com_fl) | 242 | if (flags & TR_OPT_complement) |
240 | input_length = complement(bb_common_bufsiz1, input_length); | 243 | input_length = complement(bb_common_bufsiz1, input_length); |
241 | if (argv[idx] != NULL) { | 244 | if (argv[idx] != NULL) { |
242 | if (*argv[idx] == '\0') | 245 | if (*argv[idx] == '\0') |
@@ -245,10 +248,10 @@ int tr_main(int argc, char **argv) | |||
245 | map(bb_common_bufsiz1, input_length, output, output_length); | 248 | map(bb_common_bufsiz1, input_length, output, output_length); |
246 | } | 249 | } |
247 | for (i = 0; i < input_length; i++) | 250 | for (i = 0; i < input_length; i++) |
248 | invec[(GCC4_IS_STUPID)bb_common_bufsiz1[i]] = TRUE; | 251 | invec[(unsigned char)bb_common_bufsiz1[i]] = TRUE; |
249 | for (i = 0; i < output_length; i++) | 252 | for (i = 0; i < output_length; i++) |
250 | outvec[(GCC4_IS_STUPID)output[i]] = TRUE; | 253 | outvec[output[i]] = TRUE; |
251 | } | 254 | } |
252 | convert(); | 255 | convert(flags); |
253 | return 0; | 256 | return EXIT_SUCCESS; |
254 | } | 257 | } |