diff options
author | Denis Vlasenko <vda.linux@googlemail.com> | 2009-03-01 04:50:18 +0000 |
---|---|---|
committer | Denis Vlasenko <vda.linux@googlemail.com> | 2009-03-01 04:50:18 +0000 |
commit | 1bfcc8b08e511be7228f93079f669d95392ef184 (patch) | |
tree | 72a5d206c0c7707927f5f58c118518330dbd3c73 | |
parent | 31773b71e89f79cb5afda834ce15538a8818067d (diff) | |
download | busybox-w32-1bfcc8b08e511be7228f93079f669d95392ef184.tar.gz busybox-w32-1bfcc8b08e511be7228f93079f669d95392ef184.tar.bz2 busybox-w32-1bfcc8b08e511be7228f93079f669d95392ef184.zip |
tr: support [:xdigit:], fix handling of ranges and [x]'s.
add testsuite entry for each of 3 bugs fixed.
function old new delta
static.classes 73 82 +9
expand 1738 1743 +5
complement 74 72 -2
tr_main 472 463 -9
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/2 up/down: 14/-11) Total: 3 bytes
-rw-r--r-- | coreutils/tr.c | 85 | ||||
-rw-r--r-- | testsuite/tr.tests | 22 |
2 files changed, 69 insertions, 38 deletions
diff --git a/coreutils/tr.c b/coreutils/tr.c index c736c716b..cd31b8550 100644 --- a/coreutils/tr.c +++ b/coreutils/tr.c | |||
@@ -23,11 +23,11 @@ | |||
23 | #define ASCII 0377 | 23 | #define ASCII 0377 |
24 | 24 | ||
25 | static void map(char *pvector, | 25 | static void map(char *pvector, |
26 | unsigned char *string1, unsigned int string1_len, | 26 | unsigned char *string1, unsigned string1_len, |
27 | unsigned char *string2, unsigned int string2_len) | 27 | unsigned char *string2, unsigned string2_len) |
28 | { | 28 | { |
29 | char last = '0'; | 29 | char last = '0'; |
30 | unsigned int i, j; | 30 | unsigned i, j; |
31 | 31 | ||
32 | for (j = 0, i = 0; i < string1_len; i++) { | 32 | for (j = 0, i = 0; i < string1_len; i++) { |
33 | if (string2_len <= j) | 33 | if (string2_len <= j) |
@@ -39,12 +39,11 @@ static void map(char *pvector, | |||
39 | 39 | ||
40 | /* supported constructs: | 40 | /* supported constructs: |
41 | * Ranges, e.g., 0-9 ==> 0123456789 | 41 | * Ranges, e.g., 0-9 ==> 0123456789 |
42 | * Ranges, e.g., [0-9] ==> 0123456789 | ||
43 | * Escapes, e.g., \a ==> Control-G | 42 | * Escapes, e.g., \a ==> Control-G |
44 | * Character classes, e.g. [:upper:] ==> A...Z | 43 | * Character classes, e.g. [:upper:] ==> A...Z |
45 | * Equiv classess, e.g. [=A=] ==> A (hmmmmmmm?) | 44 | * Equiv classess, e.g. [=A=] ==> A (hmmmmmmm?) |
46 | */ | 45 | */ |
47 | static unsigned int expand(const char *arg, char *buffer) | 46 | static unsigned expand(const char *arg, char *buffer) |
48 | { | 47 | { |
49 | char *buffer_start = buffer; | 48 | char *buffer_start = buffer; |
50 | unsigned i; /* can't be unsigned char: must be able to hold 256 */ | 49 | unsigned i; /* can't be unsigned char: must be able to hold 256 */ |
@@ -77,7 +76,8 @@ static unsigned int expand(const char *arg, char *buffer) | |||
77 | static const char classes[] ALIGN1 = | 76 | static const char classes[] ALIGN1 = |
78 | "alpha"CLO "alnum"CLO "digit"CLO | 77 | "alpha"CLO "alnum"CLO "digit"CLO |
79 | "lower"CLO "upper"CLO "space"CLO | 78 | "lower"CLO "upper"CLO "space"CLO |
80 | "blank"CLO "punct"CLO "cntrl"CLO; | 79 | "blank"CLO "punct"CLO "cntrl"CLO |
80 | "xdigit"CLO; | ||
81 | #define CLASS_invalid 0 /* we increment the retval */ | 81 | #define CLASS_invalid 0 /* we increment the retval */ |
82 | #define CLASS_alpha 1 | 82 | #define CLASS_alpha 1 |
83 | #define CLASS_alnum 2 | 83 | #define CLASS_alnum 2 |
@@ -88,16 +88,17 @@ static unsigned int expand(const char *arg, char *buffer) | |||
88 | #define CLASS_blank 7 | 88 | #define CLASS_blank 7 |
89 | #define CLASS_punct 8 | 89 | #define CLASS_punct 8 |
90 | #define CLASS_cntrl 9 | 90 | #define CLASS_cntrl 9 |
91 | //#define CLASS_xdigit 10 | 91 | #define CLASS_xdigit 10 |
92 | //#define CLASS_graph 11 | 92 | //#define CLASS_graph 11 |
93 | //#define CLASS_print 12 | 93 | //#define CLASS_print 12 |
94 | smalluint j; | 94 | smalluint j; |
95 | { /* not really pretty.. */ | 95 | { |
96 | char *tmp = xstrndup(arg, 7); // warning: xdigit would need 8, not 7 | 96 | /* xdigit needs 8, not 7 */ |
97 | char *tmp = xstrndup(arg, 7 + (arg[0]=='x')); | ||
97 | j = index_in_strings(classes, tmp) + 1; | 98 | j = index_in_strings(classes, tmp) + 1; |
98 | free(tmp); | 99 | free(tmp); |
99 | } | 100 | } |
100 | if (j == CLASS_alnum || j == CLASS_digit) { | 101 | if (j == CLASS_alnum || j == CLASS_digit || j == CLASS_xdigit) { |
101 | for (i = '0'; i <= '9'; i++) | 102 | for (i = '0'; i <= '9'; i++) |
102 | *buffer++ = i; | 103 | *buffer++ = i; |
103 | } | 104 | } |
@@ -125,6 +126,12 @@ static unsigned int expand(const char *arg, char *buffer) | |||
125 | || (j == CLASS_cntrl && iscntrl(i))) | 126 | || (j == CLASS_cntrl && iscntrl(i))) |
126 | *buffer++ = i; | 127 | *buffer++ = i; |
127 | } | 128 | } |
129 | if (j == CLASS_xdigit) { | ||
130 | for (i = 'A'; i <= 'F'; i++) { | ||
131 | *buffer++ = i; | ||
132 | *buffer++ = i | 0x20; | ||
133 | } | ||
134 | } | ||
128 | if (j == CLASS_invalid) { | 135 | if (j == CLASS_invalid) { |
129 | *buffer++ = '['; | 136 | *buffer++ = '['; |
130 | *buffer++ = ':'; | 137 | *buffer++ = ':'; |
@@ -140,19 +147,14 @@ static unsigned int expand(const char *arg, char *buffer) | |||
140 | arg += 3; /* skip CHAR=] */ | 147 | arg += 3; /* skip CHAR=] */ |
141 | continue; | 148 | continue; |
142 | } | 149 | } |
143 | if (i == '\0' || *arg != '-') { /* not [x-...] - copy verbatim */ | 150 | /* The rest of [xyz... cases is treated as normal |
144 | *buffer++ = '['; | 151 | * string, '[' has no special meaning here: |
145 | arg--; /* points to x */ | 152 | * tr "[a-z]" "[A-Z]" can be written as tr "a-z" "A-Z", |
146 | continue; /* copy all, including eventual ']' */ | 153 | * also try tr "[a-z]" "_A-Z+" and you'll see that |
147 | } | 154 | * [] is not special here. |
148 | /* [x-z] */ | 155 | */ |
149 | arg++; /* skip - */ | 156 | *buffer++ = '['; |
150 | if (arg[0] == '\0' || arg[1] != ']') | 157 | arg--; /* points to x */ |
151 | bb_show_usage(); | ||
152 | ac = *arg++; | ||
153 | while (i <= ac) | ||
154 | *buffer++ = i++; | ||
155 | arg++; /* skip ] */ | ||
156 | continue; | 158 | continue; |
157 | } | 159 | } |
158 | *buffer++ = *arg++; | 160 | *buffer++ = *arg++; |
@@ -162,29 +164,30 @@ static unsigned int expand(const char *arg, char *buffer) | |||
162 | 164 | ||
163 | static int complement(char *buffer, int buffer_len) | 165 | static int complement(char *buffer, int buffer_len) |
164 | { | 166 | { |
165 | int i, j, ix; | 167 | int ch, j, len; |
166 | char conv[ASCII + 2]; | 168 | char conv[ASCII + 2]; |
167 | 169 | ||
168 | ix = 0; | 170 | len = 0; |
169 | for (i = '\0'; i <= ASCII; i++) { | 171 | for (ch = '\0'; ch <= ASCII; ch++) { |
170 | for (j = 0; j < buffer_len; j++) | 172 | for (j = 0; j < buffer_len; j++) |
171 | if (buffer[j] == i) | 173 | if (buffer[j] == ch) |
172 | break; | 174 | goto next_ch; |
173 | if (j == buffer_len) | 175 | /* Didn't find it */ |
174 | conv[ix++] = i & ASCII; | 176 | conv[len++] = (char) ch; |
177 | next_ch: | ||
178 | continue; | ||
175 | } | 179 | } |
176 | memcpy(buffer, conv, ix); | 180 | memcpy(buffer, conv, len); |
177 | return ix; | 181 | return len; |
178 | } | 182 | } |
179 | 183 | ||
180 | int tr_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; | 184 | int tr_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
181 | int tr_main(int argc UNUSED_PARAM, char **argv) | 185 | int tr_main(int argc UNUSED_PARAM, char **argv) |
182 | { | 186 | { |
183 | int output_length = 0, input_length; | ||
184 | int i; | 187 | int i; |
185 | smalluint flags; | 188 | smalluint flags; |
186 | ssize_t read_chars = 0; | 189 | ssize_t read_chars; |
187 | size_t in_index = 0, out_index = 0; | 190 | size_t in_index, out_index; |
188 | unsigned last = UCHAR_MAX + 1; /* not equal to any char */ | 191 | unsigned last = UCHAR_MAX + 1; /* not equal to any char */ |
189 | unsigned char coded, c; | 192 | unsigned char coded, c; |
190 | unsigned char *output = xmalloc(BUFSIZ); | 193 | unsigned char *output = xmalloc(BUFSIZ); |
@@ -206,6 +209,9 @@ int tr_main(int argc UNUSED_PARAM, char **argv) | |||
206 | 209 | ||
207 | #define tr_buf bb_common_bufsiz1 | 210 | #define tr_buf bb_common_bufsiz1 |
208 | if (*argv != NULL) { | 211 | if (*argv != NULL) { |
212 | int output_length = 0; | ||
213 | int input_length; | ||
214 | |||
209 | input_length = expand(*argv++, tr_buf); | 215 | input_length = expand(*argv++, tr_buf); |
210 | if (flags & TR_OPT_complement) | 216 | if (flags & TR_OPT_complement) |
211 | input_length = complement(tr_buf, input_length); | 217 | input_length = complement(tr_buf, input_length); |
@@ -221,30 +227,33 @@ int tr_main(int argc UNUSED_PARAM, char **argv) | |||
221 | outvec[output[i]] = TRUE; | 227 | outvec[output[i]] = TRUE; |
222 | } | 228 | } |
223 | 229 | ||
230 | goto start_from; | ||
231 | |||
224 | for (;;) { | 232 | for (;;) { |
225 | /* If we're out of input, flush output and read more input. */ | 233 | /* If we're out of input, flush output and read more input. */ |
226 | if ((ssize_t)in_index == read_chars) { | 234 | if ((ssize_t)in_index == read_chars) { |
227 | if (out_index) { | 235 | if (out_index) { |
228 | xwrite(STDOUT_FILENO, (char *)output, out_index); | 236 | xwrite(STDOUT_FILENO, (char *)output, out_index); |
237 | start_from: | ||
229 | out_index = 0; | 238 | out_index = 0; |
230 | } | 239 | } |
231 | read_chars = safe_read(STDIN_FILENO, tr_buf, BUFSIZ); | 240 | read_chars = safe_read(STDIN_FILENO, tr_buf, BUFSIZ); |
232 | if (read_chars <= 0) { | 241 | if (read_chars <= 0) { |
233 | if (read_chars < 0) | 242 | if (read_chars < 0) |
234 | bb_perror_msg_and_die(bb_msg_read_error); | 243 | bb_perror_msg_and_die(bb_msg_read_error); |
235 | exit(EXIT_SUCCESS); | 244 | break; |
236 | } | 245 | } |
237 | in_index = 0; | 246 | in_index = 0; |
238 | } | 247 | } |
239 | c = tr_buf[in_index++]; | 248 | c = tr_buf[in_index++]; |
240 | coded = vector[c]; | ||
241 | if ((flags & TR_OPT_delete) && invec[c]) | 249 | if ((flags & TR_OPT_delete) && invec[c]) |
242 | continue; | 250 | continue; |
251 | coded = vector[c]; | ||
243 | if ((flags & TR_OPT_squeeze_reps) && last == coded | 252 | if ((flags & TR_OPT_squeeze_reps) && last == coded |
244 | && (invec[c] || outvec[coded])) | 253 | && (invec[c] || outvec[coded])) |
245 | continue; | 254 | continue; |
246 | output[out_index++] = last = coded; | 255 | output[out_index++] = last = coded; |
247 | } | 256 | } |
248 | /* NOTREACHED */ | 257 | |
249 | return EXIT_SUCCESS; | 258 | return EXIT_SUCCESS; |
250 | } | 259 | } |
diff --git a/testsuite/tr.tests b/testsuite/tr.tests new file mode 100644 index 000000000..7339ccff2 --- /dev/null +++ b/testsuite/tr.tests | |||
@@ -0,0 +1,22 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | # Copyright 2009 by Denys Vlasenko <vda.linux@googlemail.com> | ||
4 | # Licensed under GPL v2, see file LICENSE for details. | ||
5 | |||
6 | . testing.sh | ||
7 | |||
8 | # testing "description" "arguments" "result" "infile" "stdin" | ||
9 | |||
10 | testing "tr does not treat [] in [a-z] as special" \ | ||
11 | "tr '[q-z]' '_Q-Z+'" \ | ||
12 | "_QWe+" "" "[qwe]" | ||
13 | |||
14 | testing "tr understands 0-9A-F" \ | ||
15 | "tr -cd '[0-9A-F]'" \ | ||
16 | "19AF" "" "19AFH\n" | ||
17 | |||
18 | testing "tr understands [:xdigit:]" \ | ||
19 | "tr -cd '[:xdigit:]'" \ | ||
20 | "19AF" "" "19AFH\n" | ||
21 | |||
22 | exit $FAILCOUNT | ||