diff options
Diffstat (limited to 'coreutils')
| -rw-r--r-- | coreutils/tr.c | 715 |
1 files changed, 190 insertions, 525 deletions
diff --git a/coreutils/tr.c b/coreutils/tr.c index 5f4938028..079c252d3 100644 --- a/coreutils/tr.c +++ b/coreutils/tr.c | |||
| @@ -1,574 +1,239 @@ | |||
| 1 | /* vi: set sw=4 ts=4: */ | 1 | /* vi: set sw=4 ts=4: */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 1988, 1993 | 3 | * Mini tr implementation for busybox |
| 4 | * The Regents of the University of California. All rights reserved. | ||
| 5 | * | 4 | * |
| 6 | * Redistribution and use in source and binary forms, with or without | 5 | * This version of tr is adapted from Minix tr |
| 7 | * modification, are permitted provided that the following conditions | 6 | * Author: Michiel Huisjes |
| 8 | * are met: | ||
| 9 | * 1. Redistributions of source code must retain the above copyright | ||
| 10 | * notice, this list of conditions and the following disclaimer. | ||
| 11 | * 2. Redistributions in binary form must reproduce the above copyright | ||
| 12 | * notice, this list of conditions and the following disclaimer in the | ||
| 13 | * documentation and/or other materials provided with the distribution. | ||
| 14 | * 3. All advertising materials mentioning features or use of this software | ||
| 15 | * must display the following acknowledgement: | ||
| 16 | * This product includes software developed by the University of | ||
| 17 | * California, Berkeley and its contributors. | ||
| 18 | * 4. Neither the name of the University nor the names of its contributors | ||
| 19 | * may be used to endorse or promote products derived from this software | ||
| 20 | * without specific prior written permission. | ||
| 21 | * | 7 | * |
| 22 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 8 | * This program is free software; you can redistribute it and/or modify |
| 23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 9 | * it under the terms of the GNU General Public License as published by |
| 24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 10 | * the Free Software Foundation; either version 2 of the License, or |
| 25 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 11 | * (at your option) any later version. |
| 26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 12 | * |
| 27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 13 | * This program is distributed in the hope that it will be useful, |
| 28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 16 | * General Public License for more details. |
| 31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 17 | * |
| 32 | * SUCH DAMAGE. | 18 | * You should have received a copy of the GNU General Public License |
| 19 | * along with this program; if not, write to the Free Software | ||
| 20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
| 21 | * | ||
| 22 | * Original copyright notice is retained at the end of this file. | ||
| 33 | */ | 23 | */ |
| 34 | 24 | ||
| 35 | #if 0 | ||
| 36 | #ifndef lint | ||
| 37 | static const char copyright[] = "@(#) Copyright (c) 1988, 1993\n\ | ||
| 38 | The Regents of the University of California. All rights reserved.\n"; | ||
| 39 | #endif /* not lint */ | ||
| 40 | |||
| 41 | #ifndef lint | ||
| 42 | #if 0 | ||
| 43 | static char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95"; | ||
| 44 | #endif | ||
| 45 | static const char rcsid[] = | ||
| 46 | |||
| 47 | "$Id: tr.c,v 1.4 2000/04/17 16:44:46 erik Exp $"; | ||
| 48 | #endif /* not lint */ | ||
| 49 | #endif /* #if 0 */ | ||
| 50 | |||
| 51 | #include "internal.h" | 25 | #include "internal.h" |
| 52 | #include <locale.h> | ||
| 53 | #include <sys/types.h> | ||
| 54 | #include <sys/cdefs.h> | ||
| 55 | #include <sys/types.h> | ||
| 56 | |||
| 57 | #include <err.h> | ||
| 58 | #include <stdio.h> | 26 | #include <stdio.h> |
| 59 | #include <stdlib.h> | ||
| 60 | #include <string.h> | 27 | #include <string.h> |
| 28 | #include <stdlib.h> | ||
| 61 | #include <unistd.h> | 29 | #include <unistd.h> |
| 30 | #include <sys/types.h> | ||
| 62 | 31 | ||
| 63 | #include <ctype.h> | ||
| 64 | #include <err.h> | ||
| 65 | #include <stddef.h> | ||
| 66 | |||
| 67 | typedef struct { | ||
| 68 | enum { STRING1, STRING2 } which; | ||
| 69 | enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state; | ||
| 70 | int cnt; /* character count */ | ||
| 71 | int lastch; /* last character */ | ||
| 72 | int equiv[2]; /* equivalence set */ | ||
| 73 | int *set; /* set of characters */ | ||
| 74 | char *str; /* user's string */ | ||
| 75 | } STR; | ||
| 76 | |||
| 77 | #include <limits.h> | ||
| 78 | #define NCHARS (UCHAR_MAX + 1) /* Number of possible characters. */ | ||
| 79 | #define OOBCH (UCHAR_MAX + 1) /* Out of band character value. */ | ||
| 80 | |||
| 81 | static int next __P((STR *)); | ||
| 82 | |||
| 83 | static int string1[NCHARS] = { | ||
| 84 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ASCII */ | ||
| 85 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | ||
| 86 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | ||
| 87 | 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, | ||
| 88 | 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, | ||
| 89 | 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, | ||
| 90 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, | ||
| 91 | 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, | ||
| 92 | 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, | ||
| 93 | 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, | ||
| 94 | 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, | ||
| 95 | 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, | ||
| 96 | 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||
| 97 | 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, | ||
| 98 | 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||
| 99 | 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, | ||
| 100 | 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, | ||
| 101 | 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, | ||
| 102 | 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, | ||
| 103 | 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, | ||
| 104 | 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, | ||
| 105 | 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, | ||
| 106 | 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, | ||
| 107 | 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, | ||
| 108 | 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, | ||
| 109 | 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, | ||
| 110 | 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, | ||
| 111 | 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, | ||
| 112 | 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, | ||
| 113 | 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, | ||
| 114 | 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, | ||
| 115 | 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, | ||
| 116 | }, string2[NCHARS]; | ||
| 117 | |||
| 118 | STR s1 = { STRING1, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL }; | ||
| 119 | STR s2 = { STRING2, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL }; | ||
| 120 | 32 | ||
| 121 | static void setup(string, arg, str, cflag) | ||
| 122 | int *string; | ||
| 123 | char *arg; | ||
| 124 | STR *str; | ||
| 125 | int cflag; | ||
| 126 | { | ||
| 127 | register int cnt, *p; | ||
| 128 | 33 | ||
| 129 | str->str = arg; | 34 | #ifdef TRUE |
| 130 | bzero(string, NCHARS * sizeof(int)); | 35 | #undef TRUE |
| 36 | #undef FALSE | ||
| 37 | #define TRUE 1 | ||
| 38 | #define FALSE 0 | ||
| 39 | #endif | ||
| 131 | 40 | ||
| 132 | while (next(str)) | 41 | #define ASCII 0377 |
| 133 | string[str->lastch] = 1; | ||
| 134 | if (cflag) | ||
| 135 | for (p = string, cnt = NCHARS; cnt--; ++p) | ||
| 136 | *p = !*p; | ||
| 137 | } | ||
| 138 | 42 | ||
| 139 | static void tr_usage() | 43 | /* some glabals shared across this file */ |
| 140 | { | 44 | static char com_fl, del_fl, sq_fl; |
| 141 | usage( "\ttr [-cdsu] string1 [string2]\n\n" | 45 | static unsigned char output[BUFSIZ], input[BUFSIZ]; |
| 142 | "Translate, squeeze, and/or delete characters from standard\n" | 46 | static unsigned char vector[ASCII + 1]; |
| 143 | "input, writing to standard output.\n"); | 47 | static char invec[ASCII + 1], outvec[ASCII + 1]; |
| 144 | } | 48 | static short in_index, out_index; |
| 145 | 49 | ||
| 146 | 50 | ||
| 147 | extern int tr_main(argc, argv) | 51 | static void convert() |
| 148 | int argc; | ||
| 149 | char **argv; | ||
| 150 | { | 52 | { |
| 151 | register int ch, cnt, lastch, *p; | 53 | short read_chars = 0; |
| 152 | int cflag, dflag, sflag, isstring2; | 54 | short c, coded; |
| 153 | 55 | short last = -1; | |
| 154 | (void) setlocale(LC_CTYPE, ""); | 56 | |
| 155 | 57 | for (;;) { | |
| 156 | cflag = dflag = sflag = 0; | 58 | if (in_index == read_chars) { |
| 157 | while ((ch = getopt(argc, argv, "cdsu")) != -1) | 59 | if ((read_chars = read(0, (char *) input, BUFSIZ)) <= 0) { |
| 158 | switch ((char) ch) { | 60 | if (write(1, (char *) output, out_index) != out_index) |
| 159 | case 'c': | 61 | write(2, "Bad write\n", 10); |
| 160 | cflag = 1; | 62 | exit(0); |
| 161 | break; | ||
| 162 | case 'd': | ||
| 163 | dflag = 1; | ||
| 164 | break; | ||
| 165 | case 's': | ||
| 166 | sflag = 1; | ||
| 167 | break; | ||
| 168 | case 'u': | ||
| 169 | setbuf(stdout, (char *) NULL); | ||
| 170 | break; | ||
| 171 | case '?': | ||
| 172 | default: | ||
| 173 | tr_usage(); | ||
| 174 | } | ||
| 175 | argc -= optind; | ||
| 176 | argv += optind; | ||
| 177 | |||
| 178 | switch (argc) { | ||
| 179 | case 0: | ||
| 180 | default: | ||
| 181 | tr_usage(); | ||
| 182 | /* NOTREACHED */ | ||
| 183 | case 1: | ||
| 184 | isstring2 = 0; | ||
| 185 | break; | ||
| 186 | case 2: | ||
| 187 | isstring2 = 1; | ||
| 188 | break; | ||
| 189 | } | ||
| 190 | |||
| 191 | /* | ||
| 192 | * tr -ds [-c] string1 string2 | ||
| 193 | * Delete all characters (or complemented characters) in string1. | ||
| 194 | * Squeeze all characters in string2. | ||
| 195 | */ | ||
| 196 | if (dflag && sflag) { | ||
| 197 | if (!isstring2) | ||
| 198 | tr_usage(); | ||
| 199 | |||
| 200 | setup(string1, argv[0], &s1, cflag); | ||
| 201 | setup(string2, argv[1], &s2, 0); | ||
| 202 | |||
| 203 | for (lastch = OOBCH; (ch = getchar()) != EOF;) | ||
| 204 | if (!string1[ch] && (!string2[ch] || lastch != ch)) { | ||
| 205 | lastch = ch; | ||
| 206 | (void) putchar(ch); | ||
| 207 | } | 63 | } |
| 208 | exit(0); | 64 | in_index = 0; |
| 209 | } | ||
| 210 | |||
| 211 | /* | ||
| 212 | * tr -d [-c] string1 | ||
| 213 | * Delete all characters (or complemented characters) in string1. | ||
| 214 | */ | ||
| 215 | if (dflag) { | ||
| 216 | if (isstring2) | ||
| 217 | tr_usage(); | ||
| 218 | |||
| 219 | setup(string1, argv[0], &s1, cflag); | ||
| 220 | |||
| 221 | while ((ch = getchar()) != EOF) | ||
| 222 | if (!string1[ch]) | ||
| 223 | (void) putchar(ch); | ||
| 224 | exit(0); | ||
| 225 | } | ||
| 226 | |||
| 227 | /* | ||
| 228 | * tr -s [-c] string1 | ||
| 229 | * Squeeze all characters (or complemented characters) in string1. | ||
| 230 | */ | ||
| 231 | if (sflag && !isstring2) { | ||
| 232 | setup(string1, argv[0], &s1, cflag); | ||
| 233 | |||
| 234 | for (lastch = OOBCH; (ch = getchar()) != EOF;) | ||
| 235 | if (!string1[ch] || lastch != ch) { | ||
| 236 | lastch = ch; | ||
| 237 | (void) putchar(ch); | ||
| 238 | } | ||
| 239 | exit(0); | ||
| 240 | } | ||
| 241 | |||
| 242 | /* | ||
| 243 | * tr [-cs] string1 string2 | ||
| 244 | * Replace all characters (or complemented characters) in string1 with | ||
| 245 | * the character in the same position in string2. If the -s option is | ||
| 246 | * specified, squeeze all the characters in string2. | ||
| 247 | */ | ||
| 248 | if (!isstring2) | ||
| 249 | tr_usage(); | ||
| 250 | |||
| 251 | s1.str = argv[0]; | ||
| 252 | s2.str = argv[1]; | ||
| 253 | |||
| 254 | if (cflag) | ||
| 255 | for (cnt = NCHARS, p = string1; cnt--;) | ||
| 256 | *p++ = OOBCH; | ||
| 257 | |||
| 258 | if (!next(&s2)) | ||
| 259 | errx(1, "empty string2"); | ||
| 260 | |||
| 261 | /* If string2 runs out of characters, use the last one specified. */ | ||
| 262 | if (sflag) | ||
| 263 | while (next(&s1)) { | ||
| 264 | string1[s1.lastch] = ch = s2.lastch; | ||
| 265 | string2[ch] = 1; | ||
| 266 | (void) next(&s2); | ||
| 267 | } else | ||
| 268 | while (next(&s1)) { | ||
| 269 | string1[s1.lastch] = ch = s2.lastch; | ||
| 270 | (void) next(&s2); | ||
| 271 | } | 65 | } |
| 272 | 66 | c = input[in_index++]; | |
| 273 | if (cflag) | 67 | coded = vector[c]; |
| 274 | for (cnt = 0, p = string1; cnt < NCHARS; ++p, ++cnt) | 68 | if (del_fl && invec[c]) |
| 275 | *p = *p == OOBCH ? ch : cnt; | 69 | continue; |
| 276 | 70 | if (sq_fl && last == coded && outvec[coded]) | |
| 277 | if (sflag) | 71 | continue; |
| 278 | for (lastch = OOBCH; (ch = getchar()) != EOF;) { | 72 | output[out_index++] = last = coded; |
| 279 | ch = string1[ch]; | 73 | if (out_index == BUFSIZ) { |
| 280 | if (!string2[ch] || lastch != ch) { | 74 | if (write(1, (char *) output, out_index) != out_index) { |
| 281 | lastch = ch; | 75 | write(2, "Bad write\n", 10); |
| 282 | (void) putchar(ch); | 76 | exit(1); |
| 283 | } | 77 | } |
| 284 | } else | 78 | out_index = 0; |
| 285 | while ((ch = getchar()) != EOF) | ||
| 286 | (void) putchar(string1[ch]); | ||
| 287 | exit(0); | ||
| 288 | } | ||
| 289 | |||
| 290 | static int backslash __P((STR *)); | ||
| 291 | static int bracket __P((STR *)); | ||
| 292 | static int c_class __P((const void *, const void *)); | ||
| 293 | static void genclass __P((STR *)); | ||
| 294 | static void genequiv __P((STR *)); | ||
| 295 | static int genrange __P((STR *)); | ||
| 296 | static void genseq __P((STR *)); | ||
| 297 | |||
| 298 | static int next(s) | ||
| 299 | register STR *s; | ||
| 300 | { | ||
| 301 | register int ch; | ||
| 302 | |||
| 303 | switch (s->state) { | ||
| 304 | case EOS: | ||
| 305 | return (0); | ||
| 306 | case INFINITE: | ||
| 307 | return (1); | ||
| 308 | case NORMAL: | ||
| 309 | switch (ch = (u_char) * s->str) { | ||
| 310 | case '\0': | ||
| 311 | s->state = EOS; | ||
| 312 | return (0); | ||
| 313 | case '\\': | ||
| 314 | s->lastch = backslash(s); | ||
| 315 | break; | ||
| 316 | case '[': | ||
| 317 | if (bracket(s)) | ||
| 318 | return (next(s)); | ||
| 319 | /* FALLTHROUGH */ | ||
| 320 | default: | ||
| 321 | ++s->str; | ||
| 322 | s->lastch = ch; | ||
| 323 | break; | ||
| 324 | } | ||
| 325 | |||
| 326 | /* We can start a range at any time. */ | ||
| 327 | if (s->str[0] == '-' && genrange(s)) | ||
| 328 | return (next(s)); | ||
| 329 | return (1); | ||
| 330 | case RANGE: | ||
| 331 | if (s->cnt-- == 0) { | ||
| 332 | s->state = NORMAL; | ||
| 333 | return (next(s)); | ||
| 334 | } | ||
| 335 | ++s->lastch; | ||
| 336 | return (1); | ||
| 337 | case SEQUENCE: | ||
| 338 | if (s->cnt-- == 0) { | ||
| 339 | s->state = NORMAL; | ||
| 340 | return (next(s)); | ||
| 341 | } | ||
| 342 | return (1); | ||
| 343 | case SET: | ||
| 344 | if ((s->lastch = s->set[s->cnt++]) == OOBCH) { | ||
| 345 | s->state = NORMAL; | ||
| 346 | return (next(s)); | ||
| 347 | } | 79 | } |
| 348 | return (1); | ||
| 349 | } | 80 | } |
| 350 | /* NOTREACHED */ | ||
| 351 | return (0); | ||
| 352 | } | ||
| 353 | 81 | ||
| 354 | static int bracket(s) | ||
| 355 | register STR *s; | ||
| 356 | { | ||
| 357 | register char *p; | ||
| 358 | |||
| 359 | switch (s->str[1]) { | ||
| 360 | case ':': /* "[:class:]" */ | ||
| 361 | if ((p = strstr(s->str + 2, ":]")) == NULL) | ||
| 362 | return (0); | ||
| 363 | *p = '\0'; | ||
| 364 | s->str += 2; | ||
| 365 | genclass(s); | ||
| 366 | s->str = p + 2; | ||
| 367 | return (1); | ||
| 368 | case '=': /* "[=equiv=]" */ | ||
| 369 | if ((p = strstr(s->str + 2, "=]")) == NULL) | ||
| 370 | return (0); | ||
| 371 | s->str += 2; | ||
| 372 | genequiv(s); | ||
| 373 | return (1); | ||
| 374 | default: /* "[\###*n]" or "[#*n]" */ | ||
| 375 | if ((p = strpbrk(s->str + 2, "*]")) == NULL) | ||
| 376 | return (0); | ||
| 377 | if (p[0] != '*' || index(p, ']') == NULL) | ||
| 378 | return (0); | ||
| 379 | s->str += 1; | ||
| 380 | genseq(s); | ||
| 381 | return (1); | ||
| 382 | } | ||
| 383 | /* NOTREACHED */ | 82 | /* NOTREACHED */ |
| 384 | } | 83 | } |
| 385 | 84 | ||
| 386 | typedef struct { | 85 | static void map(register unsigned char *string1, register unsigned char *string2) |
| 387 | char *name; | ||
| 388 | int (*func) __P((int)); | ||
| 389 | int *set; | ||
| 390 | } CLASS; | ||
| 391 | |||
| 392 | static CLASS classes[] = { | ||
| 393 | #undef isalnum | ||
| 394 | {"alnum", isalnum,}, | ||
| 395 | #undef isalpha | ||
| 396 | {"alpha", isalpha,}, | ||
| 397 | /*#undef isblank | ||
| 398 | { "blank", isblank, },*/ | ||
| 399 | #undef iscntrl | ||
| 400 | {"cntrl", iscntrl,}, | ||
| 401 | #undef isdigit | ||
| 402 | {"digit", isdigit,}, | ||
| 403 | #undef isgraph | ||
| 404 | {"graph", isgraph,}, | ||
| 405 | #undef islower | ||
| 406 | {"lower", islower,}, | ||
| 407 | #undef isprint | ||
| 408 | {"print", isprint,}, | ||
| 409 | #undef ispunct | ||
| 410 | {"punct", ispunct,}, | ||
| 411 | #undef isspace | ||
| 412 | {"space", isspace,}, | ||
| 413 | #undef isupper | ||
| 414 | {"upper", isupper,}, | ||
| 415 | #undef isxdigit | ||
| 416 | {"xdigit", isxdigit,}, | ||
| 417 | }; | ||
| 418 | |||
| 419 | static void genclass(s) | ||
| 420 | STR *s; | ||
| 421 | { | 86 | { |
| 422 | register int cnt, (*func) __P((int)); | 87 | unsigned char last = '0'; |
| 423 | CLASS *cp, tmp; | 88 | |
| 424 | int *p; | 89 | while (*string1) { |
| 425 | 90 | if (*string2 == '\0') | |
| 426 | tmp.name = s->str; | 91 | vector[*string1] = last; |
| 427 | if ((cp = (CLASS *) bsearch(&tmp, classes, sizeof(classes) / | 92 | else |
| 428 | sizeof(CLASS), sizeof(CLASS), | 93 | vector[*string1] = last = *string2++; |
| 429 | c_class)) == NULL) errx(1, | 94 | string1++; |
| 430 | "unknown class %s", | 95 | } |
| 431 | s->str); | ||
| 432 | |||
| 433 | cp->set = p = xmalloc((NCHARS + 1) * sizeof(int)); | ||
| 434 | bzero(p, (NCHARS + 1) * sizeof(int)); | ||
| 435 | |||
| 436 | for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt) | ||
| 437 | if ((func) (cnt)) | ||
| 438 | *p++ = cnt; | ||
| 439 | *p = OOBCH; | ||
| 440 | |||
| 441 | s->cnt = 0; | ||
| 442 | s->state = SET; | ||
| 443 | s->set = cp->set; | ||
| 444 | } | ||
| 445 | |||
| 446 | static int c_class(a, b) | ||
| 447 | const void *a, *b; | ||
| 448 | { | ||
| 449 | return (strcmp(((CLASS *) a)->name, ((CLASS *) b)->name)); | ||
| 450 | } | 96 | } |
| 451 | 97 | ||
| 452 | /* | 98 | static void expand(register char *arg, register unsigned char *buffer) |
| 453 | * English doesn't have any equivalence classes, so for now | ||
| 454 | * we just syntax check and grab the character. | ||
| 455 | */ | ||
| 456 | static void genequiv(s) | ||
| 457 | STR *s; | ||
| 458 | { | 99 | { |
| 459 | if (*s->str == '\\') { | 100 | int i, ac; |
| 460 | s->equiv[0] = backslash(s); | 101 | |
| 461 | if (*s->str != '=') | 102 | while (*arg) { |
| 462 | errx(1, "misplaced equivalence equals sign"); | 103 | if (*arg == '\\') { |
| 463 | } else { | 104 | arg++; |
| 464 | s->equiv[0] = s->str[0]; | 105 | i = ac = 0; |
| 465 | if (s->str[1] != '=') | 106 | if (*arg >= '0' && *arg <= '7') { |
| 466 | errx(1, "misplaced equivalence equals sign"); | 107 | do { |
| 108 | ac = (ac << 3) + *arg++ - '0'; | ||
| 109 | i++; | ||
| 110 | } while (i < 4 && *arg >= '0' && *arg <= '7'); | ||
| 111 | *buffer++ = ac; | ||
| 112 | } else if (*arg != '\0') | ||
| 113 | *buffer++ = *arg++; | ||
| 114 | } else if (*arg == '[') { | ||
| 115 | arg++; | ||
| 116 | i = *arg++; | ||
| 117 | if (*arg++ != '-') { | ||
| 118 | *buffer++ = '['; | ||
| 119 | arg -= 2; | ||
| 120 | continue; | ||
| 121 | } | ||
| 122 | ac = *arg++; | ||
| 123 | while (i <= ac) | ||
| 124 | *buffer++ = i++; | ||
| 125 | arg++; /* Skip ']' */ | ||
| 126 | } else | ||
| 127 | *buffer++ = *arg++; | ||
| 467 | } | 128 | } |
| 468 | s->str += 2; | ||
| 469 | s->cnt = 0; | ||
| 470 | s->state = SET; | ||
| 471 | s->set = s->equiv; | ||
| 472 | } | 129 | } |
| 473 | 130 | ||
| 474 | static int genrange(s) | 131 | static void complement(unsigned char *buffer) |
| 475 | STR *s; | ||
| 476 | { | 132 | { |
| 477 | int stopval; | 133 | register unsigned char *ptr; |
| 478 | char *savestart; | 134 | register short i, index; |
| 479 | 135 | unsigned char conv[ASCII + 2]; | |
| 480 | savestart = s->str; | 136 | |
| 481 | stopval = *++s->str == '\\' ? backslash(s) : (u_char) * s->str++; | 137 | index = 0; |
| 482 | if (stopval < (u_char) s->lastch) { | 138 | for (i = 1; i <= ASCII; i++) { |
| 483 | s->str = savestart; | 139 | for (ptr = buffer; *ptr; ptr++) |
| 484 | return (0); | 140 | if (*ptr == i) |
| 141 | break; | ||
| 142 | if (*ptr == '\0') | ||
| 143 | conv[index++] = i & ASCII; | ||
| 485 | } | 144 | } |
| 486 | s->cnt = stopval - s->lastch + 1; | 145 | conv[index] = '\0'; |
| 487 | s->state = RANGE; | 146 | strcpy((char *) buffer, (char *) conv); |
| 488 | --s->lastch; | ||
| 489 | return (1); | ||
| 490 | } | 147 | } |
| 491 | 148 | ||
| 492 | static void genseq(s) | 149 | extern int tr_main(int argc, char **argv) |
| 493 | STR *s; | ||
| 494 | { | 150 | { |
| 495 | char *ep; | 151 | register unsigned char *ptr; |
| 496 | 152 | int index = 1; | |
| 497 | if (s->which == STRING1) | 153 | short i; |
| 498 | errx(1, "sequences only valid in string2"); | 154 | |
| 499 | 155 | if (argc > 1 && argv[index][0] == '-') { | |
| 500 | if (*s->str == '\\') | 156 | for (ptr = (unsigned char *) &argv[index][1]; *ptr; ptr++) { |
| 501 | s->lastch = backslash(s); | 157 | switch (*ptr) { |
| 502 | else | 158 | case 'c': |
| 503 | s->lastch = *s->str++; | 159 | com_fl = TRUE; |
| 504 | if (*s->str != '*') | 160 | break; |
| 505 | errx(1, "misplaced sequence asterisk"); | 161 | case 'd': |
| 506 | 162 | del_fl = TRUE; | |
| 507 | switch (*++s->str) { | ||
| 508 | case '\\': | ||
| 509 | s->cnt = backslash(s); | ||
| 510 | break; | ||
| 511 | case ']': | ||
| 512 | s->cnt = 0; | ||
| 513 | ++s->str; | ||
| 514 | break; | ||
| 515 | default: | ||
| 516 | if (isdigit((u_char) * s->str)) { | ||
| 517 | s->cnt = strtol(s->str, &ep, 0); | ||
| 518 | if (*ep == ']') { | ||
| 519 | s->str = ep + 1; | ||
| 520 | break; | 163 | break; |
| 164 | case 's': | ||
| 165 | sq_fl = TRUE; | ||
| 166 | break; | ||
| 167 | default: | ||
| 168 | usage("tr [-cds] STRING1 [STRING2]\n" | ||
| 169 | #ifndef BB_FEATURE_TRIVIAL_HELP | ||
| 170 | "\nTranslate, squeeze, and/or delete characters from\n" | ||
| 171 | "standard input, writing to standard output.\n\n" | ||
| 172 | "Options:\n" | ||
| 173 | "\t-c\ttake complement of STRING1\n" | ||
| 174 | "\t-d\tdelete input characters coded STRING1\n" | ||
| 175 | "\t-s\tsqueeze multiple output characters of STRING2 into one character\n" | ||
| 176 | #endif | ||
| 177 | ); | ||
| 521 | } | 178 | } |
| 522 | } | 179 | } |
| 523 | errx(1, "illegal sequence count"); | 180 | index++; |
| 524 | /* NOTREACHED */ | 181 | } |
| 182 | for (i = 0; i <= ASCII; i++) { | ||
| 183 | vector[i] = i; | ||
| 184 | invec[i] = outvec[i] = FALSE; | ||
| 525 | } | 185 | } |
| 526 | 186 | ||
| 527 | s->state = s->cnt ? SEQUENCE : INFINITE; | 187 | if (argv[index] != NULL) { |
| 188 | expand(argv[index++], input); | ||
| 189 | if (com_fl) | ||
| 190 | complement(input); | ||
| 191 | if (argv[index] != NULL) | ||
| 192 | expand(argv[index], output); | ||
| 193 | if (argv[index] != NULL) | ||
| 194 | map(input, output); | ||
| 195 | for (ptr = input; *ptr; ptr++) | ||
| 196 | invec[*ptr] = TRUE; | ||
| 197 | for (ptr = output; *ptr; ptr++) | ||
| 198 | outvec[*ptr] = TRUE; | ||
| 199 | } | ||
| 200 | convert(); | ||
| 201 | return (0); | ||
| 528 | } | 202 | } |
| 529 | 203 | ||
| 530 | /* | 204 | /* |
| 531 | * Translate \??? into a character. Up to 3 octal digits, if no digits either | 205 | * Copyright (c) 1987,1997, Prentice Hall |
| 532 | * an escape code or a literal character. | 206 | * All rights reserved. |
| 207 | * | ||
| 208 | * Redistribution and use of the MINIX operating system in source and | ||
| 209 | * binary forms, with or without modification, are permitted provided | ||
| 210 | * that the following conditions are met: | ||
| 211 | * | ||
| 212 | * Redistributions of source code must retain the above copyright | ||
| 213 | * notice, this list of conditions and the following disclaimer. | ||
| 214 | * | ||
| 215 | * Redistributions in binary form must reproduce the above | ||
| 216 | * copyright notice, this list of conditions and the following | ||
| 217 | * disclaimer in the documentation and/or other materials provided | ||
| 218 | * with the distribution. | ||
| 219 | * | ||
| 220 | * Neither the name of Prentice Hall nor the names of the software | ||
| 221 | * authors or contributors may be used to endorse or promote | ||
| 222 | * products derived from this software without specific prior | ||
| 223 | * written permission. | ||
| 224 | * | ||
| 225 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS, AUTHORS, AND | ||
| 226 | * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, | ||
| 227 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
| 228 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | ||
| 229 | * IN NO EVENT SHALL PRENTICE HALL OR ANY AUTHORS OR CONTRIBUTORS BE | ||
| 230 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
| 231 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
| 232 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
| 233 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | ||
| 234 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE | ||
| 235 | * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, | ||
| 236 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 237 | * | ||
| 533 | */ | 238 | */ |
| 534 | static int backslash(s) | ||
| 535 | register STR *s; | ||
| 536 | { | ||
| 537 | register int ch, cnt, val; | ||
| 538 | 239 | ||
| 539 | for (cnt = val = 0;;) { | ||
| 540 | ch = (u_char) * ++s->str; | ||
| 541 | if (!isascii(ch) || !isdigit(ch)) | ||
| 542 | break; | ||
| 543 | val = val * 8 + ch - '0'; | ||
| 544 | if (++cnt == 3) { | ||
| 545 | ++s->str; | ||
| 546 | break; | ||
| 547 | } | ||
| 548 | } | ||
| 549 | if (cnt) | ||
| 550 | return (val); | ||
| 551 | if (ch != '\0') | ||
| 552 | ++s->str; | ||
| 553 | switch (ch) { | ||
| 554 | case 'a': /* escape characters */ | ||
| 555 | return ('\7'); | ||
| 556 | case 'b': | ||
| 557 | return ('\b'); | ||
| 558 | case 'f': | ||
| 559 | return ('\f'); | ||
| 560 | case 'n': | ||
| 561 | return ('\n'); | ||
| 562 | case 'r': | ||
| 563 | return ('\r'); | ||
| 564 | case 't': | ||
| 565 | return ('\t'); | ||
| 566 | case 'v': | ||
| 567 | return ('\13'); | ||
| 568 | case '\0': /* \" -> \ */ | ||
| 569 | s->state = EOS; | ||
| 570 | return ('\\'); | ||
| 571 | default: /* \x" -> x */ | ||
| 572 | return (ch); | ||
| 573 | } | ||
| 574 | } | ||
