aboutsummaryrefslogtreecommitdiff
path: root/coreutils/tr.c
diff options
context:
space:
mode:
authorBernhard Reutner-Fischer <rep.dot.nop@gmail.com>2007-03-30 14:43:27 +0000
committerBernhard Reutner-Fischer <rep.dot.nop@gmail.com>2007-03-30 14:43:27 +0000
commiteceecea568a483a3acec2d4c03228b39ae2d6501 (patch)
tree7faa56bbba59529be6f671026ae97bda84267917 /coreutils/tr.c
parent52a9db6bb874c9ce7a8b7e935ba3ebbefdf3f7d4 (diff)
downloadbusybox-w32-eceecea568a483a3acec2d4c03228b39ae2d6501.tar.gz
busybox-w32-eceecea568a483a3acec2d4c03228b39ae2d6501.tar.bz2
busybox-w32-eceecea568a483a3acec2d4c03228b39ae2d6501.zip
- fix bug where we did not reject invalid classes like '[[:alpha'
- debloat while at it: text data bss dec hex filename 1554 0 19 1573 625 tr.o.oorig 1357 0 16 1373 55d tr.o
Diffstat (limited to 'coreutils/tr.c')
-rw-r--r--coreutils/tr.c161
1 files changed, 82 insertions, 79 deletions
diff --git a/coreutils/tr.c b/coreutils/tr.c
index cea3b05b6..f423ab0cf 100644
--- a/coreutils/tr.c
+++ b/coreutils/tr.c
@@ -15,64 +15,61 @@
15 * 15 *
16 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. 16 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
17 */ 17 */
18 18/* http://www.opengroup.org/onlinepubs/009695399/utilities/tr.html
19 * TODO: xdigit, graph, print
20 */
19#include "busybox.h" 21#include "busybox.h"
20 22
21// Even with -funsigned-char, gcc still complains about char as an array index.
22
23#define GCC4_IS_STUPID int
24
25#define ASCII 0377 23#define ASCII 0377
26 24
25#define TR_OPT_complement (1<<0)
26#define TR_OPT_delete (1<<1)
27#define TR_OPT_squeeze_reps (1<<2)
27/* some "globals" shared across this file */ 28/* some "globals" shared across this file */
28static char com_fl, del_fl, sq_fl;
29/* these last are pointers to static buffers declared in tr_main */ 29/* these last are pointers to static buffers declared in tr_main */
30static char *poutput, *pvector, *pinvec, *poutvec; 30static char *poutput, *pvector, *pinvec, *poutvec;
31 31
32static void convert(void) 32static void ATTRIBUTE_NORETURN convert(const smalluint flags)
33{ 33{
34 int read_chars = 0, in_index = 0, out_index = 0, c, coded, last = -1; 34 size_t read_chars = 0, in_index = 0, out_index = 0, c, coded, last = -1;
35 35
36 for (;;) { 36 for (;;) {
37 // If we're out of input, flush output and read more input. 37 /* If we're out of input, flush output and read more input. */
38
39 if (in_index == read_chars) { 38 if (in_index == read_chars) {
40 if (out_index) { 39 if (out_index) {
41 if (write(1, (char *) poutput, out_index) != out_index) 40 xwrite(STDOUT_FILENO, (char *)poutput, out_index);
42 bb_error_msg_and_die(bb_msg_write_error);
43 out_index = 0; 41 out_index = 0;
44 } 42 }
45 43 if ((read_chars = read(STDIN_FILENO, bb_common_bufsiz1, BUFSIZ)) <= 0) {
46 if ((read_chars = read(0, bb_common_bufsiz1, BUFSIZ)) <= 0) { 44 if (write(STDOUT_FILENO, (char *)poutput, out_index) != out_index)
47 if (write(1, (char *) poutput, out_index) != out_index) 45 bb_perror_msg(bb_msg_write_error);
48 bb_error_msg(bb_msg_write_error); 46 exit(EXIT_SUCCESS);
49 exit(0);
50 } 47 }
51 in_index = 0; 48 in_index = 0;
52 } 49 }
53 c = bb_common_bufsiz1[in_index++]; 50 c = bb_common_bufsiz1[in_index++];
54 coded = pvector[c]; 51 coded = pvector[c];
55 if (del_fl && pinvec[c]) 52 if ((flags & TR_OPT_delete) && pinvec[c])
56 continue; 53 continue;
57 if (sq_fl && last == coded && (pinvec[c] || poutvec[coded])) 54 if ((flags & TR_OPT_squeeze_reps) && last == coded &&
55 (pinvec[c] || poutvec[coded]))
58 continue; 56 continue;
59 poutput[out_index++] = last = coded; 57 poutput[out_index++] = last = coded;
60 } 58 }
61
62 /* NOTREACHED */ 59 /* NOTREACHED */
63} 60}
64 61
65static void map(char *string1, unsigned int string1_len, 62static void map(unsigned char *string1, unsigned int string1_len,
66 char *string2, unsigned int string2_len) 63 unsigned char *string2, unsigned int string2_len)
67{ 64{
68 char last = '0'; 65 char last = '0';
69 unsigned int i, j; 66 unsigned int i, j;
70 67
71 for (j = 0, i = 0; i < string1_len; i++) { 68 for (j = 0, i = 0; i < string1_len; i++) {
72 if (string2_len <= j) 69 if (string2_len <= j)
73 pvector[(GCC4_IS_STUPID)string1[i]] = last; 70 pvector[string1[i]] = last;
74 else 71 else
75 pvector[(GCC4_IS_STUPID)string1[i]] = last = string2[j++]; 72 pvector[string1[i]] = last = string2[j++];
76 } 73 }
77} 74}
78 75
@@ -84,15 +81,35 @@ static void map(char *string1, unsigned int string1_len,
84static unsigned int expand(const char *arg, char *buffer) 81static unsigned int expand(const char *arg, char *buffer)
85{ 82{
86 char *buffer_start = buffer; 83 char *buffer_start = buffer;
87 int i, ac; 84 unsigned i; /* XXX: FIXME: use unsigned char? */
88 85 unsigned char ac;
86#if ENABLE_FEATURE_TR_CLASSES
87#define CLO ":]"
88 const char * const classes[] = {
89 "alpha"CLO, "alnum"CLO, "digit"CLO, "lower"CLO, "upper"CLO, "space"CLO,
90 "blank"CLO, "punct"CLO, "cntrl"CLO, NULL
91 };
92#define CLASS_invalid 0 /* we increment the retval */
93#define CLASS_alpha 1
94#define CLASS_alnum 2
95#define CLASS_digit 3
96#define CLASS_lower 4
97#define CLASS_upper 5
98#define CLASS_space 6
99#define CLASS_blank 7
100#define CLASS_punct 8
101#define CLASS_cntrl 9
102//#define CLASS_xdigit 10
103//#define CLASS_graph 11
104//#define CLASS_print 12
105#endif
89 while (*arg) { 106 while (*arg) {
90 if (*arg == '\\') { 107 if (*arg == '\\') {
91 arg++; 108 arg++;
92 *buffer++ = bb_process_escape_sequence(&arg); 109 *buffer++ = bb_process_escape_sequence(&arg);
93 } else if (*(arg+1) == '-') { 110 } else if (*(arg+1) == '-') {
94 ac = *(arg+2); 111 ac = *(arg+2);
95 if(ac == 0) { 112 if (ac == 0) {
96 *buffer++ = *arg++; 113 *buffer++ = *arg++;
97 continue; 114 continue;
98 } 115 }
@@ -104,50 +121,42 @@ static unsigned int expand(const char *arg, char *buffer)
104 arg++; 121 arg++;
105 i = *arg++; 122 i = *arg++;
106 if (ENABLE_FEATURE_TR_CLASSES && i == ':') { 123 if (ENABLE_FEATURE_TR_CLASSES && i == ':') {
107 if (strncmp(arg, "alpha", 5) == 0) { 124 smalluint j;
108 for (i = 'A'; i <= 'Z'; i++) 125 { /* not really pretty.. */
109 *buffer++ = i; 126 char *tmp = xstrndup(arg, 7); // warning: xdigit needs 8, not 7
110 for (i = 'a'; i <= 'z'; i++) 127 j = index_in_str_array(classes, tmp) + 1;
111 *buffer++ = i; 128 free(tmp);
112 } 129 }
113 else if (strncmp(arg, "alnum", 5) == 0) { 130 if (j == CLASS_alnum || j == CLASS_digit) {
114 for (i = '0'; i <= '9'; i++) 131 for (i = '0'; i <= '9'; i++)
115 *buffer++ = i; 132 *buffer++ = i;
133 }
134 if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_upper) {
116 for (i = 'A'; i <= 'Z'; i++) 135 for (i = 'A'; i <= 'Z'; i++)
117 *buffer++ = i; 136 *buffer++ = i;
118 for (i = 'a'; i <= 'z'; i++)
119 *buffer++ = i;
120 } 137 }
121 else if (strncmp(arg, "digit", 5) == 0) 138 if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_lower) {
122 for (i = '0'; i <= '9'; i++)
123 *buffer++ = i;
124 else if (strncmp(arg, "lower", 5) == 0)
125 for (i = 'a'; i <= 'z'; i++) 139 for (i = 'a'; i <= 'z'; i++)
126 *buffer++ = i; 140 *buffer++ = i;
127 else if (strncmp(arg, "upper", 5) == 0)
128 for (i = 'A'; i <= 'Z'; i++)
129 *buffer++ = i;
130 else if (strncmp(arg, "space", 5) == 0) {
131 const char s[] = "\t\n\v\f\r ";
132 strcat((char*)buffer, s);
133 buffer += sizeof(s) - 1;
134 } 141 }
135 else if (strncmp(arg, "blank", 5) == 0) { 142 if (j == CLASS_space || j == CLASS_blank) {
136 *buffer++ = '\t'; 143 *buffer++ = '\t';
144 if (j == CLASS_space) {
145 *buffer++ = '\n';
146 *buffer++ = '\v';
147 *buffer++ = '\f';
148 *buffer++ = '\r';
149 }
137 *buffer++ = ' '; 150 *buffer++ = ' ';
138 } 151 }
139 /* gcc gives a warning if braces aren't used here */ 152 if (j == CLASS_punct || j == CLASS_cntrl) {
140 else if (strncmp(arg, "punct", 5) == 0) {
141 for (i = 0; i <= ASCII; i++) 153 for (i = 0; i <= ASCII; i++)
142 if (isprint(i) && (!isalnum(i)) && (!isspace(i))) 154 if ((j == CLASS_punct &&
155 isprint(i) && (!isalnum(i)) && (!isspace(i))) ||
156 (j == CLASS_cntrl && iscntrl(i)))
143 *buffer++ = i; 157 *buffer++ = i;
144 } 158 }
145 else if (strncmp(arg, "cntrl", 5) == 0) { 159 if (j == CLASS_invalid) {
146 for (i = 0; i <= ASCII; i++)
147 if (iscntrl(i))
148 *buffer++ = i;
149 }
150 else {
151 *buffer++ = '['; 160 *buffer++ = '[';
152 *buffer++ = ':'; 161 *buffer++ = ':';
153 continue; 162 continue;
@@ -156,8 +165,7 @@ static unsigned int expand(const char *arg, char *buffer)
156 } 165 }
157 if (ENABLE_FEATURE_TR_EQUIV && i == '=') { 166 if (ENABLE_FEATURE_TR_EQUIV && i == '=') {
158 *buffer++ = *arg; 167 *buffer++ = *arg;
159 /* skip the closing =] */ 168 arg += 3; /* Skip the closing =] */
160 arg += 3;
161 continue; 169 continue;
162 } 170 }
163 if (*arg++ != '-') { 171 if (*arg++ != '-') {
@@ -168,11 +176,10 @@ static unsigned int expand(const char *arg, char *buffer)
168 ac = *arg++; 176 ac = *arg++;
169 while (i <= ac) 177 while (i <= ac)
170 *buffer++ = i++; 178 *buffer++ = i++;
171 arg++; /* Skip the assumed ']' */ 179 arg++; /* Skip the assumed ']' */
172 } else 180 } else
173 *buffer++ = *arg++; 181 *buffer++ = *arg++;
174 } 182 }
175
176 return (buffer - buffer_start); 183 return (buffer - buffer_start);
177} 184}
178 185
@@ -197,10 +204,11 @@ int tr_main(int argc, char **argv);
197int tr_main(int argc, char **argv) 204int tr_main(int argc, char **argv)
198{ 205{
199 unsigned char *ptr; 206 unsigned char *ptr;
200 int output_length=0, input_length; 207 int output_length = 0, input_length;
201 int idx = 1; 208 int idx = 1;
202 int i; 209 int i;
203 RESERVE_CONFIG_BUFFER(output, BUFSIZ); 210 smalluint flags = 0;
211 RESERVE_CONFIG_UBUFFER(output, BUFSIZ);
204 RESERVE_CONFIG_BUFFER(vector, ASCII+1); 212 RESERVE_CONFIG_BUFFER(vector, ASCII+1);
205 RESERVE_CONFIG_BUFFER(invec, ASCII+1); 213 RESERVE_CONFIG_BUFFER(invec, ASCII+1);
206 RESERVE_CONFIG_BUFFER(outvec, ASCII+1); 214 RESERVE_CONFIG_BUFFER(outvec, ASCII+1);
@@ -213,19 +221,14 @@ int tr_main(int argc, char **argv)
213 221
214 if (argc > 1 && argv[idx][0] == '-') { 222 if (argc > 1 && argv[idx][0] == '-') {
215 for (ptr = (unsigned char *) &argv[idx][1]; *ptr; ptr++) { 223 for (ptr = (unsigned char *) &argv[idx][1]; *ptr; ptr++) {
216 switch (*ptr) { 224 if (*ptr == 'c')
217 case 'c': 225 flags |= TR_OPT_complement;
218 com_fl = TRUE; 226 else if (*ptr == 'd')
219 break; 227 flags |= TR_OPT_delete;
220 case 'd': 228 else if (*ptr == 's')
221 del_fl = TRUE; 229 flags |= TR_OPT_squeeze_reps;
222 break; 230 else
223 case 's':
224 sq_fl = TRUE;
225 break;
226 default:
227 bb_show_usage(); 231 bb_show_usage();
228 }
229 } 232 }
230 idx++; 233 idx++;
231 } 234 }
@@ -236,7 +239,7 @@ int tr_main(int argc, char **argv)
236 239
237 if (argv[idx] != NULL) { 240 if (argv[idx] != NULL) {
238 input_length = expand(argv[idx++], bb_common_bufsiz1); 241 input_length = expand(argv[idx++], bb_common_bufsiz1);
239 if (com_fl) 242 if (flags & TR_OPT_complement)
240 input_length = complement(bb_common_bufsiz1, input_length); 243 input_length = complement(bb_common_bufsiz1, input_length);
241 if (argv[idx] != NULL) { 244 if (argv[idx] != NULL) {
242 if (*argv[idx] == '\0') 245 if (*argv[idx] == '\0')
@@ -245,10 +248,10 @@ int tr_main(int argc, char **argv)
245 map(bb_common_bufsiz1, input_length, output, output_length); 248 map(bb_common_bufsiz1, input_length, output, output_length);
246 } 249 }
247 for (i = 0; i < input_length; i++) 250 for (i = 0; i < input_length; i++)
248 invec[(GCC4_IS_STUPID)bb_common_bufsiz1[i]] = TRUE; 251 invec[(unsigned char)bb_common_bufsiz1[i]] = TRUE;
249 for (i = 0; i < output_length; i++) 252 for (i = 0; i < output_length; i++)
250 outvec[(GCC4_IS_STUPID)output[i]] = TRUE; 253 outvec[output[i]] = TRUE;
251 } 254 }
252 convert(); 255 convert(flags);
253 return 0; 256 return EXIT_SUCCESS;
254} 257}