diff options
| author | Denis Vlasenko <vda.linux@googlemail.com> | 2009-03-01 04:50:18 +0000 |
|---|---|---|
| committer | Denis Vlasenko <vda.linux@googlemail.com> | 2009-03-01 04:50:18 +0000 |
| commit | 1bfcc8b08e511be7228f93079f669d95392ef184 (patch) | |
| tree | 72a5d206c0c7707927f5f58c118518330dbd3c73 /coreutils | |
| parent | 31773b71e89f79cb5afda834ce15538a8818067d (diff) | |
| download | busybox-w32-1bfcc8b08e511be7228f93079f669d95392ef184.tar.gz busybox-w32-1bfcc8b08e511be7228f93079f669d95392ef184.tar.bz2 busybox-w32-1bfcc8b08e511be7228f93079f669d95392ef184.zip | |
tr: support [:xdigit:], fix handling of ranges and [x]'s.
add testsuite entry for each of 3 bugs fixed.
function old new delta
static.classes 73 82 +9
expand 1738 1743 +5
complement 74 72 -2
tr_main 472 463 -9
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/2 up/down: 14/-11) Total: 3 bytes
Diffstat (limited to 'coreutils')
| -rw-r--r-- | coreutils/tr.c | 85 |
1 files changed, 47 insertions, 38 deletions
diff --git a/coreutils/tr.c b/coreutils/tr.c index c736c716b..cd31b8550 100644 --- a/coreutils/tr.c +++ b/coreutils/tr.c | |||
| @@ -23,11 +23,11 @@ | |||
| 23 | #define ASCII 0377 | 23 | #define ASCII 0377 |
| 24 | 24 | ||
| 25 | static void map(char *pvector, | 25 | static void map(char *pvector, |
| 26 | unsigned char *string1, unsigned int string1_len, | 26 | unsigned char *string1, unsigned string1_len, |
| 27 | unsigned char *string2, unsigned int string2_len) | 27 | unsigned char *string2, unsigned string2_len) |
| 28 | { | 28 | { |
| 29 | char last = '0'; | 29 | char last = '0'; |
| 30 | unsigned int i, j; | 30 | unsigned i, j; |
| 31 | 31 | ||
| 32 | for (j = 0, i = 0; i < string1_len; i++) { | 32 | for (j = 0, i = 0; i < string1_len; i++) { |
| 33 | if (string2_len <= j) | 33 | if (string2_len <= j) |
| @@ -39,12 +39,11 @@ static void map(char *pvector, | |||
| 39 | 39 | ||
| 40 | /* supported constructs: | 40 | /* supported constructs: |
| 41 | * Ranges, e.g., 0-9 ==> 0123456789 | 41 | * Ranges, e.g., 0-9 ==> 0123456789 |
| 42 | * Ranges, e.g., [0-9] ==> 0123456789 | ||
| 43 | * Escapes, e.g., \a ==> Control-G | 42 | * Escapes, e.g., \a ==> Control-G |
| 44 | * Character classes, e.g. [:upper:] ==> A...Z | 43 | * Character classes, e.g. [:upper:] ==> A...Z |
| 45 | * Equiv classess, e.g. [=A=] ==> A (hmmmmmmm?) | 44 | * Equiv classess, e.g. [=A=] ==> A (hmmmmmmm?) |
| 46 | */ | 45 | */ |
| 47 | static unsigned int expand(const char *arg, char *buffer) | 46 | static unsigned expand(const char *arg, char *buffer) |
| 48 | { | 47 | { |
| 49 | char *buffer_start = buffer; | 48 | char *buffer_start = buffer; |
| 50 | unsigned i; /* can't be unsigned char: must be able to hold 256 */ | 49 | unsigned i; /* can't be unsigned char: must be able to hold 256 */ |
| @@ -77,7 +76,8 @@ static unsigned int expand(const char *arg, char *buffer) | |||
| 77 | static const char classes[] ALIGN1 = | 76 | static const char classes[] ALIGN1 = |
| 78 | "alpha"CLO "alnum"CLO "digit"CLO | 77 | "alpha"CLO "alnum"CLO "digit"CLO |
| 79 | "lower"CLO "upper"CLO "space"CLO | 78 | "lower"CLO "upper"CLO "space"CLO |
| 80 | "blank"CLO "punct"CLO "cntrl"CLO; | 79 | "blank"CLO "punct"CLO "cntrl"CLO |
| 80 | "xdigit"CLO; | ||
| 81 | #define CLASS_invalid 0 /* we increment the retval */ | 81 | #define CLASS_invalid 0 /* we increment the retval */ |
| 82 | #define CLASS_alpha 1 | 82 | #define CLASS_alpha 1 |
| 83 | #define CLASS_alnum 2 | 83 | #define CLASS_alnum 2 |
| @@ -88,16 +88,17 @@ static unsigned int expand(const char *arg, char *buffer) | |||
| 88 | #define CLASS_blank 7 | 88 | #define CLASS_blank 7 |
| 89 | #define CLASS_punct 8 | 89 | #define CLASS_punct 8 |
| 90 | #define CLASS_cntrl 9 | 90 | #define CLASS_cntrl 9 |
| 91 | //#define CLASS_xdigit 10 | 91 | #define CLASS_xdigit 10 |
| 92 | //#define CLASS_graph 11 | 92 | //#define CLASS_graph 11 |
| 93 | //#define CLASS_print 12 | 93 | //#define CLASS_print 12 |
| 94 | smalluint j; | 94 | smalluint j; |
| 95 | { /* not really pretty.. */ | 95 | { |
| 96 | char *tmp = xstrndup(arg, 7); // warning: xdigit would need 8, not 7 | 96 | /* xdigit needs 8, not 7 */ |
| 97 | char *tmp = xstrndup(arg, 7 + (arg[0]=='x')); | ||
| 97 | j = index_in_strings(classes, tmp) + 1; | 98 | j = index_in_strings(classes, tmp) + 1; |
| 98 | free(tmp); | 99 | free(tmp); |
| 99 | } | 100 | } |
| 100 | if (j == CLASS_alnum || j == CLASS_digit) { | 101 | if (j == CLASS_alnum || j == CLASS_digit || j == CLASS_xdigit) { |
| 101 | for (i = '0'; i <= '9'; i++) | 102 | for (i = '0'; i <= '9'; i++) |
| 102 | *buffer++ = i; | 103 | *buffer++ = i; |
| 103 | } | 104 | } |
| @@ -125,6 +126,12 @@ static unsigned int expand(const char *arg, char *buffer) | |||
| 125 | || (j == CLASS_cntrl && iscntrl(i))) | 126 | || (j == CLASS_cntrl && iscntrl(i))) |
| 126 | *buffer++ = i; | 127 | *buffer++ = i; |
| 127 | } | 128 | } |
| 129 | if (j == CLASS_xdigit) { | ||
| 130 | for (i = 'A'; i <= 'F'; i++) { | ||
| 131 | *buffer++ = i; | ||
| 132 | *buffer++ = i | 0x20; | ||
| 133 | } | ||
| 134 | } | ||
| 128 | if (j == CLASS_invalid) { | 135 | if (j == CLASS_invalid) { |
| 129 | *buffer++ = '['; | 136 | *buffer++ = '['; |
| 130 | *buffer++ = ':'; | 137 | *buffer++ = ':'; |
| @@ -140,19 +147,14 @@ static unsigned int expand(const char *arg, char *buffer) | |||
| 140 | arg += 3; /* skip CHAR=] */ | 147 | arg += 3; /* skip CHAR=] */ |
| 141 | continue; | 148 | continue; |
| 142 | } | 149 | } |
| 143 | if (i == '\0' || *arg != '-') { /* not [x-...] - copy verbatim */ | 150 | /* The rest of [xyz... cases is treated as normal |
| 144 | *buffer++ = '['; | 151 | * string, '[' has no special meaning here: |
| 145 | arg--; /* points to x */ | 152 | * tr "[a-z]" "[A-Z]" can be written as tr "a-z" "A-Z", |
| 146 | continue; /* copy all, including eventual ']' */ | 153 | * also try tr "[a-z]" "_A-Z+" and you'll see that |
| 147 | } | 154 | * [] is not special here. |
| 148 | /* [x-z] */ | 155 | */ |
| 149 | arg++; /* skip - */ | 156 | *buffer++ = '['; |
| 150 | if (arg[0] == '\0' || arg[1] != ']') | 157 | arg--; /* points to x */ |
| 151 | bb_show_usage(); | ||
| 152 | ac = *arg++; | ||
| 153 | while (i <= ac) | ||
| 154 | *buffer++ = i++; | ||
| 155 | arg++; /* skip ] */ | ||
| 156 | continue; | 158 | continue; |
| 157 | } | 159 | } |
| 158 | *buffer++ = *arg++; | 160 | *buffer++ = *arg++; |
| @@ -162,29 +164,30 @@ static unsigned int expand(const char *arg, char *buffer) | |||
| 162 | 164 | ||
| 163 | static int complement(char *buffer, int buffer_len) | 165 | static int complement(char *buffer, int buffer_len) |
| 164 | { | 166 | { |
| 165 | int i, j, ix; | 167 | int ch, j, len; |
| 166 | char conv[ASCII + 2]; | 168 | char conv[ASCII + 2]; |
| 167 | 169 | ||
| 168 | ix = 0; | 170 | len = 0; |
| 169 | for (i = '\0'; i <= ASCII; i++) { | 171 | for (ch = '\0'; ch <= ASCII; ch++) { |
| 170 | for (j = 0; j < buffer_len; j++) | 172 | for (j = 0; j < buffer_len; j++) |
| 171 | if (buffer[j] == i) | 173 | if (buffer[j] == ch) |
| 172 | break; | 174 | goto next_ch; |
| 173 | if (j == buffer_len) | 175 | /* Didn't find it */ |
| 174 | conv[ix++] = i & ASCII; | 176 | conv[len++] = (char) ch; |
| 177 | next_ch: | ||
| 178 | continue; | ||
| 175 | } | 179 | } |
| 176 | memcpy(buffer, conv, ix); | 180 | memcpy(buffer, conv, len); |
| 177 | return ix; | 181 | return len; |
| 178 | } | 182 | } |
| 179 | 183 | ||
| 180 | int tr_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; | 184 | int tr_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
| 181 | int tr_main(int argc UNUSED_PARAM, char **argv) | 185 | int tr_main(int argc UNUSED_PARAM, char **argv) |
| 182 | { | 186 | { |
| 183 | int output_length = 0, input_length; | ||
| 184 | int i; | 187 | int i; |
| 185 | smalluint flags; | 188 | smalluint flags; |
| 186 | ssize_t read_chars = 0; | 189 | ssize_t read_chars; |
| 187 | size_t in_index = 0, out_index = 0; | 190 | size_t in_index, out_index; |
| 188 | unsigned last = UCHAR_MAX + 1; /* not equal to any char */ | 191 | unsigned last = UCHAR_MAX + 1; /* not equal to any char */ |
| 189 | unsigned char coded, c; | 192 | unsigned char coded, c; |
| 190 | unsigned char *output = xmalloc(BUFSIZ); | 193 | unsigned char *output = xmalloc(BUFSIZ); |
| @@ -206,6 +209,9 @@ int tr_main(int argc UNUSED_PARAM, char **argv) | |||
| 206 | 209 | ||
| 207 | #define tr_buf bb_common_bufsiz1 | 210 | #define tr_buf bb_common_bufsiz1 |
| 208 | if (*argv != NULL) { | 211 | if (*argv != NULL) { |
| 212 | int output_length = 0; | ||
| 213 | int input_length; | ||
| 214 | |||
| 209 | input_length = expand(*argv++, tr_buf); | 215 | input_length = expand(*argv++, tr_buf); |
| 210 | if (flags & TR_OPT_complement) | 216 | if (flags & TR_OPT_complement) |
| 211 | input_length = complement(tr_buf, input_length); | 217 | input_length = complement(tr_buf, input_length); |
| @@ -221,30 +227,33 @@ int tr_main(int argc UNUSED_PARAM, char **argv) | |||
| 221 | outvec[output[i]] = TRUE; | 227 | outvec[output[i]] = TRUE; |
| 222 | } | 228 | } |
| 223 | 229 | ||
| 230 | goto start_from; | ||
| 231 | |||
| 224 | for (;;) { | 232 | for (;;) { |
| 225 | /* If we're out of input, flush output and read more input. */ | 233 | /* If we're out of input, flush output and read more input. */ |
| 226 | if ((ssize_t)in_index == read_chars) { | 234 | if ((ssize_t)in_index == read_chars) { |
| 227 | if (out_index) { | 235 | if (out_index) { |
| 228 | xwrite(STDOUT_FILENO, (char *)output, out_index); | 236 | xwrite(STDOUT_FILENO, (char *)output, out_index); |
| 237 | start_from: | ||
| 229 | out_index = 0; | 238 | out_index = 0; |
| 230 | } | 239 | } |
| 231 | read_chars = safe_read(STDIN_FILENO, tr_buf, BUFSIZ); | 240 | read_chars = safe_read(STDIN_FILENO, tr_buf, BUFSIZ); |
| 232 | if (read_chars <= 0) { | 241 | if (read_chars <= 0) { |
| 233 | if (read_chars < 0) | 242 | if (read_chars < 0) |
| 234 | bb_perror_msg_and_die(bb_msg_read_error); | 243 | bb_perror_msg_and_die(bb_msg_read_error); |
| 235 | exit(EXIT_SUCCESS); | 244 | break; |
| 236 | } | 245 | } |
| 237 | in_index = 0; | 246 | in_index = 0; |
| 238 | } | 247 | } |
| 239 | c = tr_buf[in_index++]; | 248 | c = tr_buf[in_index++]; |
| 240 | coded = vector[c]; | ||
| 241 | if ((flags & TR_OPT_delete) && invec[c]) | 249 | if ((flags & TR_OPT_delete) && invec[c]) |
| 242 | continue; | 250 | continue; |
| 251 | coded = vector[c]; | ||
| 243 | if ((flags & TR_OPT_squeeze_reps) && last == coded | 252 | if ((flags & TR_OPT_squeeze_reps) && last == coded |
| 244 | && (invec[c] || outvec[coded])) | 253 | && (invec[c] || outvec[coded])) |
| 245 | continue; | 254 | continue; |
| 246 | output[out_index++] = last = coded; | 255 | output[out_index++] = last = coded; |
| 247 | } | 256 | } |
| 248 | /* NOTREACHED */ | 257 | |
| 249 | return EXIT_SUCCESS; | 258 | return EXIT_SUCCESS; |
| 250 | } | 259 | } |
