diff options
Diffstat (limited to 'regexp.c')
-rw-r--r-- | regexp.c | 826 |
1 files changed, 826 insertions, 0 deletions
diff --git a/regexp.c b/regexp.c new file mode 100644 index 000000000..6017d79b4 --- /dev/null +++ b/regexp.c | |||
@@ -0,0 +1,826 @@ | |||
1 | /* regexp.c */ | ||
2 | |||
3 | #include "internal.h" | ||
4 | #include "regexp.h" | ||
5 | #include <setjmp.h> | ||
6 | #include <stdio.h> | ||
7 | #include <ctype.h> | ||
8 | |||
9 | |||
10 | #if ( defined BB_GREP || defined BB_FIND ) | ||
11 | |||
12 | /* This also tries to find a needle in a haystack, but uses | ||
13 | * real regular expressions.... The fake regular expression | ||
14 | * version of find_match lives in utility.c. Using this version | ||
15 | * will add 3.9k to busybox... | ||
16 | * -Erik Andersen | ||
17 | */ | ||
18 | extern int find_match(char *haystack, char *needle, int ignoreCase) | ||
19 | { | ||
20 | int status; | ||
21 | struct regexp* re; | ||
22 | re = regcomp( needle); | ||
23 | status = regexec(re, haystack, FALSE, ignoreCase); | ||
24 | free( re); | ||
25 | return( status); | ||
26 | } | ||
27 | |||
28 | |||
29 | /* code swiped from elvis-tiny 1.4 (a clone of vi) and adjusted to | ||
30 | * suit the needs of busybox by Erik Andersen. | ||
31 | * | ||
32 | * From the README: | ||
33 | * "Elvis is freely redistributable, in either source form or executable form. | ||
34 | * There are no restrictions on how you may use it". | ||
35 | * Elvis was written by Steve Kirkendall <kirkenda@cs.pdx.edu> | ||
36 | * | ||
37 | * | ||
38 | * This file contains the code that compiles regular expressions and executes | ||
39 | * them. It supports the same syntax and features as vi's regular expression | ||
40 | * code. Specifically, the meta characters are: | ||
41 | * ^ matches the beginning of a line | ||
42 | * $ matches the end of a line | ||
43 | * \< matches the beginning of a word | ||
44 | * \> matches the end of a word | ||
45 | * . matches any single character | ||
46 | * [] matches any character in a character class | ||
47 | * \( delimits the start of a subexpression | ||
48 | * \) delimits the end of a subexpression | ||
49 | * * repeats the preceding 0 or more times | ||
50 | * NOTE: You cannot follow a \) with a *. | ||
51 | * | ||
52 | * The physical structure of a compiled RE is as follows: | ||
53 | * - First, there is a one-byte value that says how many character classes | ||
54 | * are used in this regular expression | ||
55 | * - Next, each character class is stored as a bitmap that is 256 bits | ||
56 | * (32 bytes) long. | ||
57 | * - A mixture of literal characters and compiled meta characters follows. | ||
58 | * This begins with M_BEGIN(0) and ends with M_END(0). All meta chars | ||
59 | * are stored as a \n followed by a one-byte code, so they take up two | ||
60 | * bytes apiece. Literal characters take up one byte apiece. \n can't | ||
61 | * be used as a literal character. | ||
62 | * | ||
63 | */ | ||
64 | |||
65 | |||
66 | |||
67 | static char *previous; /* the previous regexp, used when null regexp is given */ | ||
68 | static char *previous1; /* a copy of the text from the previous substitution for regsub()*/ | ||
69 | |||
70 | |||
71 | /* These are used to classify or recognize meta-characters */ | ||
72 | #define META '\0' | ||
73 | #define BASE_META(m) ((m) - 256) | ||
74 | #define INT_META(c) ((c) + 256) | ||
75 | #define IS_META(m) ((m) >= 256) | ||
76 | #define IS_CLASS(m) ((m) >= M_CLASS(0) && (m) <= M_CLASS(9)) | ||
77 | #define IS_START(m) ((m) >= M_START(0) && (m) <= M_START(9)) | ||
78 | #define IS_END(m) ((m) >= M_END(0) && (m) <= M_END(9)) | ||
79 | #define IS_CLOSURE(m) ((m) >= M_SPLAT && (m) <= M_QMARK) | ||
80 | #define ADD_META(s,m) (*(s)++ = META, *(s)++ = BASE_META(m)) | ||
81 | #define GET_META(s) (*(s) == META ? INT_META(*++(s)) : *s) | ||
82 | |||
83 | /* These are the internal codes used for each type of meta-character */ | ||
84 | #define M_BEGLINE 256 /* internal code for ^ */ | ||
85 | #define M_ENDLINE 257 /* internal code for $ */ | ||
86 | #define M_BEGWORD 258 /* internal code for \< */ | ||
87 | #define M_ENDWORD 259 /* internal code for \> */ | ||
88 | #define M_ANY 260 /* internal code for . */ | ||
89 | #define M_SPLAT 261 /* internal code for * */ | ||
90 | #define M_PLUS 262 /* internal code for \+ */ | ||
91 | #define M_QMARK 263 /* internal code for \? */ | ||
92 | #define M_CLASS(n) (264+(n)) /* internal code for [] */ | ||
93 | #define M_START(n) (274+(n)) /* internal code for \( */ | ||
94 | #define M_END(n) (284+(n)) /* internal code for \) */ | ||
95 | |||
96 | /* These are used during compilation */ | ||
97 | static int class_cnt; /* used to assign class IDs */ | ||
98 | static int start_cnt; /* used to assign start IDs */ | ||
99 | static int end_stk[NSUBEXP];/* used to assign end IDs */ | ||
100 | static int end_sp; | ||
101 | static char *retext; /* points to the text being compiled */ | ||
102 | |||
103 | /* error-handling stuff */ | ||
104 | jmp_buf errorhandler; | ||
105 | #define FAIL(why) fprintf(stderr, why); longjmp(errorhandler, 1) | ||
106 | |||
107 | |||
108 | |||
109 | |||
110 | /* This function builds a bitmap for a particular class */ | ||
111 | /* text -- start of the class */ | ||
112 | /* bmap -- the bitmap */ | ||
113 | static char *makeclass(char* text, char* bmap) | ||
114 | { | ||
115 | int i; | ||
116 | int complement = 0; | ||
117 | |||
118 | |||
119 | /* zero the bitmap */ | ||
120 | for (i = 0; bmap && i < 32; i++) | ||
121 | { | ||
122 | bmap[i] = 0; | ||
123 | } | ||
124 | |||
125 | /* see if we're going to complement this class */ | ||
126 | if (*text == '^') | ||
127 | { | ||
128 | text++; | ||
129 | complement = 1; | ||
130 | } | ||
131 | |||
132 | /* add in the characters */ | ||
133 | while (*text && *text != ']') | ||
134 | { | ||
135 | /* is this a span of characters? */ | ||
136 | if (text[1] == '-' && text[2]) | ||
137 | { | ||
138 | /* spans can't be backwards */ | ||
139 | if (text[0] > text[2]) | ||
140 | { | ||
141 | FAIL("Backwards span in []"); | ||
142 | } | ||
143 | |||
144 | /* add each character in the span to the bitmap */ | ||
145 | for (i = text[0]; bmap && i <= text[2]; i++) | ||
146 | { | ||
147 | bmap[i >> 3] |= (1 << (i & 7)); | ||
148 | } | ||
149 | |||
150 | /* move past this span */ | ||
151 | text += 3; | ||
152 | } | ||
153 | else | ||
154 | { | ||
155 | /* add this single character to the span */ | ||
156 | i = *text++; | ||
157 | if (bmap) | ||
158 | { | ||
159 | bmap[i >> 3] |= (1 << (i & 7)); | ||
160 | } | ||
161 | } | ||
162 | } | ||
163 | |||
164 | /* make sure the closing ] is missing */ | ||
165 | if (*text++ != ']') | ||
166 | { | ||
167 | FAIL("] missing"); | ||
168 | } | ||
169 | |||
170 | /* if we're supposed to complement this class, then do so */ | ||
171 | if (complement && bmap) | ||
172 | { | ||
173 | for (i = 0; i < 32; i++) | ||
174 | { | ||
175 | bmap[i] = ~bmap[i]; | ||
176 | } | ||
177 | } | ||
178 | |||
179 | return text; | ||
180 | } | ||
181 | |||
182 | |||
183 | |||
184 | |||
185 | /* This function gets the next character or meta character from a string. | ||
186 | * The pointer is incremented by 1, or by 2 for \-quoted characters. For [], | ||
187 | * a bitmap is generated via makeclass() (if re is given), and the | ||
188 | * character-class text is skipped. | ||
189 | */ | ||
190 | static int gettoken(sptr, re) | ||
191 | char **sptr; | ||
192 | regexp *re; | ||
193 | { | ||
194 | int c; | ||
195 | |||
196 | c = **sptr; | ||
197 | ++*sptr; | ||
198 | if (c == '\\') | ||
199 | { | ||
200 | c = **sptr; | ||
201 | ++*sptr; | ||
202 | switch (c) | ||
203 | { | ||
204 | case '<': | ||
205 | return M_BEGWORD; | ||
206 | |||
207 | case '>': | ||
208 | return M_ENDWORD; | ||
209 | |||
210 | case '(': | ||
211 | if (start_cnt >= NSUBEXP) | ||
212 | { | ||
213 | FAIL("Too many \\(s"); | ||
214 | } | ||
215 | end_stk[end_sp++] = start_cnt; | ||
216 | return M_START(start_cnt++); | ||
217 | |||
218 | case ')': | ||
219 | if (end_sp <= 0) | ||
220 | { | ||
221 | FAIL("Mismatched \\)"); | ||
222 | } | ||
223 | return M_END(end_stk[--end_sp]); | ||
224 | |||
225 | case '*': | ||
226 | return M_SPLAT; | ||
227 | |||
228 | case '.': | ||
229 | return M_ANY; | ||
230 | |||
231 | case '+': | ||
232 | return M_PLUS; | ||
233 | |||
234 | case '?': | ||
235 | return M_QMARK; | ||
236 | |||
237 | default: | ||
238 | return c; | ||
239 | } | ||
240 | } | ||
241 | else { | ||
242 | switch (c) | ||
243 | { | ||
244 | case '^': | ||
245 | if (*sptr == retext + 1) | ||
246 | { | ||
247 | return M_BEGLINE; | ||
248 | } | ||
249 | return c; | ||
250 | |||
251 | case '$': | ||
252 | if (!**sptr) | ||
253 | { | ||
254 | return M_ENDLINE; | ||
255 | } | ||
256 | return c; | ||
257 | |||
258 | case '.': | ||
259 | return M_ANY; | ||
260 | |||
261 | case '*': | ||
262 | return M_SPLAT; | ||
263 | |||
264 | case '[': | ||
265 | /* make sure we don't have too many classes */ | ||
266 | if (class_cnt >= 10) | ||
267 | { | ||
268 | FAIL("Too many []s"); | ||
269 | } | ||
270 | |||
271 | /* process the character list for this class */ | ||
272 | if (re) | ||
273 | { | ||
274 | /* generate the bitmap for this class */ | ||
275 | *sptr = makeclass(*sptr, re->program + 1 + 32 * class_cnt); | ||
276 | } | ||
277 | else | ||
278 | { | ||
279 | /* skip to end of the class */ | ||
280 | *sptr = makeclass(*sptr, (char *)0); | ||
281 | } | ||
282 | return M_CLASS(class_cnt++); | ||
283 | |||
284 | default: | ||
285 | return c; | ||
286 | } | ||
287 | } | ||
288 | /*NOTREACHED*/ | ||
289 | } | ||
290 | |||
291 | |||
292 | |||
293 | |||
294 | /* This function calculates the number of bytes that will be needed for a | ||
295 | * compiled RE. Its argument is the uncompiled version. It is not clever | ||
296 | * about catching syntax errors; that is done in a later pass. | ||
297 | */ | ||
298 | static unsigned calcsize(text) | ||
299 | char *text; | ||
300 | { | ||
301 | unsigned size; | ||
302 | int token; | ||
303 | |||
304 | retext = text; | ||
305 | class_cnt = 0; | ||
306 | start_cnt = 1; | ||
307 | end_sp = 0; | ||
308 | size = 5; | ||
309 | while ((token = gettoken(&text, (regexp *)0)) != 0) | ||
310 | { | ||
311 | if (IS_CLASS(token)) | ||
312 | { | ||
313 | size += 34; | ||
314 | } | ||
315 | else if (IS_META(token)) | ||
316 | { | ||
317 | size += 2; | ||
318 | } | ||
319 | else | ||
320 | { | ||
321 | size++; | ||
322 | } | ||
323 | } | ||
324 | |||
325 | return size; | ||
326 | } | ||
327 | |||
328 | |||
329 | |||
330 | /*---------------------------------------------------------------------------*/ | ||
331 | |||
332 | |||
333 | /* This function checks for a match between a character and a token which is | ||
334 | * known to represent a single character. It returns 0 if they match, or | ||
335 | * 1 if they don't. | ||
336 | */ | ||
337 | static int match1(regexp* re, char ch, int token, int ignoreCase) | ||
338 | { | ||
339 | if (!ch) | ||
340 | { | ||
341 | /* the end of a line can't match any RE of width 1 */ | ||
342 | return 1; | ||
343 | } | ||
344 | if (token == M_ANY) | ||
345 | { | ||
346 | return 0; | ||
347 | } | ||
348 | else if (IS_CLASS(token)) | ||
349 | { | ||
350 | if (re->program[1 + 32 * (token - M_CLASS(0)) + (ch >> 3)] & (1 << (ch & 7))) | ||
351 | return 0; | ||
352 | } | ||
353 | else if (ch == token | ||
354 | || (ignoreCase==TRUE && isupper(ch) && tolower(ch) == token)) | ||
355 | { | ||
356 | return 0; | ||
357 | } | ||
358 | return 1; | ||
359 | } | ||
360 | |||
361 | |||
362 | |||
363 | /* This function checks characters up to and including the next closure, at | ||
364 | * which point it does a recursive call to check the rest of it. This function | ||
365 | * returns 0 if everything matches, or 1 if something doesn't match. | ||
366 | */ | ||
367 | /* re -- the regular expression */ | ||
368 | /* str -- the string */ | ||
369 | /* prog -- a portion of re->program, an compiled RE */ | ||
370 | /* here -- a portion of str, the string to compare it to */ | ||
371 | static int match(regexp* re, char* str, char* prog, char* here, int ignoreCase) | ||
372 | { | ||
373 | int token; | ||
374 | int nmatched; | ||
375 | int closure; | ||
376 | |||
377 | for (token = GET_META(prog); !IS_CLOSURE(token); prog++, token = GET_META(prog)) | ||
378 | { | ||
379 | switch (token) | ||
380 | { | ||
381 | /*case M_BEGLINE: can't happen; re->bol is used instead */ | ||
382 | case M_ENDLINE: | ||
383 | if (*here) | ||
384 | return 1; | ||
385 | break; | ||
386 | |||
387 | case M_BEGWORD: | ||
388 | if (here != str && | ||
389 | (here[-1] == '_' || | ||
390 | (isascii(here[-1]) && isalnum(here[-1])))) | ||
391 | return 1; | ||
392 | break; | ||
393 | |||
394 | case M_ENDWORD: | ||
395 | if ((here[0] == '_' || isascii(here[0])) && isalnum(here[0])) | ||
396 | return 1; | ||
397 | break; | ||
398 | |||
399 | case M_START(0): | ||
400 | case M_START(1): | ||
401 | case M_START(2): | ||
402 | case M_START(3): | ||
403 | case M_START(4): | ||
404 | case M_START(5): | ||
405 | case M_START(6): | ||
406 | case M_START(7): | ||
407 | case M_START(8): | ||
408 | case M_START(9): | ||
409 | re->startp[token - M_START(0)] = (char *)here; | ||
410 | break; | ||
411 | |||
412 | case M_END(0): | ||
413 | case M_END(1): | ||
414 | case M_END(2): | ||
415 | case M_END(3): | ||
416 | case M_END(4): | ||
417 | case M_END(5): | ||
418 | case M_END(6): | ||
419 | case M_END(7): | ||
420 | case M_END(8): | ||
421 | case M_END(9): | ||
422 | re->endp[token - M_END(0)] = (char *)here; | ||
423 | if (token == M_END(0)) | ||
424 | { | ||
425 | return 0; | ||
426 | } | ||
427 | break; | ||
428 | |||
429 | default: /* literal, M_CLASS(n), or M_ANY */ | ||
430 | if (match1(re, *here, token, ignoreCase) != 0) | ||
431 | return 1; | ||
432 | here++; | ||
433 | } | ||
434 | } | ||
435 | |||
436 | /* C L O S U R E */ | ||
437 | |||
438 | /* step 1: see what we have to match against, and move "prog" to point | ||
439 | * the the remainder of the compiled RE. | ||
440 | */ | ||
441 | closure = token; | ||
442 | prog++, token = GET_META(prog); | ||
443 | prog++; | ||
444 | |||
445 | /* step 2: see how many times we can match that token against the string */ | ||
446 | for (nmatched = 0; | ||
447 | (closure != M_QMARK || nmatched < 1) && *here && match1(re, *here, token, ignoreCase) == 0; | ||
448 | nmatched++, here++) | ||
449 | { | ||
450 | } | ||
451 | |||
452 | /* step 3: try to match the remainder, and back off if it doesn't */ | ||
453 | while (nmatched >= 0 && match(re, str, prog, here, ignoreCase) != 0) | ||
454 | { | ||
455 | nmatched--; | ||
456 | here--; | ||
457 | } | ||
458 | |||
459 | /* so how did it work out? */ | ||
460 | if (nmatched >= ((closure == M_PLUS) ? 1 : 0)) | ||
461 | return 0; | ||
462 | return 1; | ||
463 | } | ||
464 | |||
465 | |||
466 | /* This function compiles a regexp. */ | ||
467 | extern regexp *regcomp(char* text) | ||
468 | { | ||
469 | int needfirst; | ||
470 | unsigned size; | ||
471 | int token; | ||
472 | int peek; | ||
473 | char *build; | ||
474 | regexp *re; | ||
475 | |||
476 | |||
477 | /* prepare for error handling */ | ||
478 | re = (regexp *)0; | ||
479 | if (setjmp(errorhandler)) | ||
480 | { | ||
481 | if (re) | ||
482 | { | ||
483 | free(re); | ||
484 | } | ||
485 | return (regexp *)0; | ||
486 | } | ||
487 | |||
488 | /* if an empty regexp string was given, use the previous one */ | ||
489 | if (*text == 0) | ||
490 | { | ||
491 | if (!previous) | ||
492 | { | ||
493 | FAIL("No previous RE"); | ||
494 | } | ||
495 | text = previous; | ||
496 | } | ||
497 | else /* non-empty regexp given, so remember it */ | ||
498 | { | ||
499 | if (previous) | ||
500 | free(previous); | ||
501 | previous = (char *)malloc((unsigned)(strlen(text) + 1)); | ||
502 | if (previous) | ||
503 | strcpy(previous, text); | ||
504 | } | ||
505 | |||
506 | /* allocate memory */ | ||
507 | class_cnt = 0; | ||
508 | start_cnt = 1; | ||
509 | end_sp = 0; | ||
510 | retext = text; | ||
511 | size = calcsize(text) + sizeof(regexp); | ||
512 | re = (regexp *)malloc((unsigned)size); | ||
513 | |||
514 | if (!re) | ||
515 | { | ||
516 | FAIL("Not enough memory for this RE"); | ||
517 | } | ||
518 | |||
519 | /* compile it */ | ||
520 | build = &re->program[1 + 32 * class_cnt]; | ||
521 | re->program[0] = class_cnt; | ||
522 | for (token = 0; token < NSUBEXP; token++) | ||
523 | { | ||
524 | re->startp[token] = re->endp[token] = (char *)0; | ||
525 | } | ||
526 | re->first = 0; | ||
527 | re->bol = 0; | ||
528 | re->minlen = 0; | ||
529 | needfirst = 1; | ||
530 | class_cnt = 0; | ||
531 | start_cnt = 1; | ||
532 | end_sp = 0; | ||
533 | retext = text; | ||
534 | for (token = M_START(0), peek = gettoken(&text, re); | ||
535 | token; | ||
536 | token = peek, peek = gettoken(&text, re)) | ||
537 | { | ||
538 | /* special processing for the closure operator */ | ||
539 | if (IS_CLOSURE(peek)) | ||
540 | { | ||
541 | /* detect misuse of closure operator */ | ||
542 | if (IS_START(token)) | ||
543 | { | ||
544 | FAIL("* or \\+ or \\? follows nothing"); | ||
545 | } | ||
546 | else if (IS_META(token) && token != M_ANY && !IS_CLASS(token)) | ||
547 | { | ||
548 | FAIL("* or \\+ or \\? can only follow a normal character or . or []"); | ||
549 | } | ||
550 | |||
551 | /* it is okay -- make it prefix instead of postfix */ | ||
552 | ADD_META(build, peek); | ||
553 | |||
554 | /* take care of "needfirst" - is this the first char? */ | ||
555 | if (needfirst && peek == M_PLUS && !IS_META(token)) | ||
556 | { | ||
557 | re->first = token; | ||
558 | } | ||
559 | needfirst = 0; | ||
560 | |||
561 | /* we used "peek" -- need to refill it */ | ||
562 | peek = gettoken(&text, re); | ||
563 | if (IS_CLOSURE(peek)) | ||
564 | { | ||
565 | FAIL("* or \\+ or \\? doubled up"); | ||
566 | } | ||
567 | } | ||
568 | else if (!IS_META(token)) | ||
569 | { | ||
570 | /* normal char is NOT argument of closure */ | ||
571 | if (needfirst) | ||
572 | { | ||
573 | re->first = token; | ||
574 | needfirst = 0; | ||
575 | } | ||
576 | re->minlen++; | ||
577 | } | ||
578 | else if (token == M_ANY || IS_CLASS(token)) | ||
579 | { | ||
580 | /* . or [] is NOT argument of closure */ | ||
581 | needfirst = 0; | ||
582 | re->minlen++; | ||
583 | } | ||
584 | |||
585 | /* the "token" character is not closure -- process it normally */ | ||
586 | if (token == M_BEGLINE) | ||
587 | { | ||
588 | /* set the BOL flag instead of storing M_BEGLINE */ | ||
589 | re->bol = 1; | ||
590 | } | ||
591 | else if (IS_META(token)) | ||
592 | { | ||
593 | ADD_META(build, token); | ||
594 | } | ||
595 | else | ||
596 | { | ||
597 | *build++ = token; | ||
598 | } | ||
599 | } | ||
600 | |||
601 | /* end it with a \) which MUST MATCH the opening \( */ | ||
602 | ADD_META(build, M_END(0)); | ||
603 | if (end_sp > 0) | ||
604 | { | ||
605 | FAIL("Not enough \\)s"); | ||
606 | } | ||
607 | |||
608 | return re; | ||
609 | } | ||
610 | |||
611 | |||
612 | |||
613 | |||
614 | /* This function searches through a string for text that matches an RE. */ | ||
615 | /* re -- the compiled regexp to search for */ | ||
616 | /* str -- the string to search through */ | ||
617 | /* bol -- does str start at the beginning of a line? (boolean) */ | ||
618 | /* ignoreCase -- ignoreCase or not */ | ||
619 | extern int regexec(struct regexp* re, char* str, int bol, int ignoreCase) | ||
620 | { | ||
621 | char *prog; /* the entry point of re->program */ | ||
622 | int len; /* length of the string */ | ||
623 | char *here; | ||
624 | |||
625 | /* if must start at the beginning of a line, and this isn't, then fail */ | ||
626 | if (re->bol && bol==TRUE) | ||
627 | { | ||
628 | return FALSE; | ||
629 | } | ||
630 | |||
631 | len = strlen(str); | ||
632 | prog = re->program + 1 + 32 * re->program[0]; | ||
633 | |||
634 | /* search for the RE in the string */ | ||
635 | if (re->bol) | ||
636 | { | ||
637 | /* must occur at BOL */ | ||
638 | if ((re->first | ||
639 | && match1(re, *(char *)str, re->first, ignoreCase))/* wrong first letter? */ | ||
640 | || len < re->minlen /* not long enough? */ | ||
641 | || match(re, (char *)str, prog, str, ignoreCase)) /* doesn't match? */ | ||
642 | return FALSE; /* THEN FAIL! */ | ||
643 | } | ||
644 | else if (ignoreCase == FALSE) | ||
645 | { | ||
646 | /* can occur anywhere in the line, noignorecase */ | ||
647 | for (here = (char *)str; | ||
648 | (re->first && re->first != *here) | ||
649 | || match(re, (char *)str, prog, here, ignoreCase); | ||
650 | here++, len--) | ||
651 | { | ||
652 | if (len < re->minlen) | ||
653 | return FALSE; | ||
654 | } | ||
655 | } | ||
656 | else | ||
657 | { | ||
658 | /* can occur anywhere in the line, ignorecase */ | ||
659 | for (here = (char *)str; | ||
660 | (re->first && match1(re, *here, (int)re->first, ignoreCase)) | ||
661 | || match(re, (char *)str, prog, here, ignoreCase); | ||
662 | here++, len--) | ||
663 | { | ||
664 | if (len < re->minlen) | ||
665 | return FALSE; | ||
666 | } | ||
667 | } | ||
668 | |||
669 | /* if we didn't fail, then we must have succeeded */ | ||
670 | return TRUE; | ||
671 | } | ||
672 | |||
673 | |||
674 | |||
675 | |||
676 | |||
677 | /* This performs substitutions after a regexp match has been found. */ | ||
678 | extern void regsub(regexp* re, char* src, char* dst) | ||
679 | { | ||
680 | char *cpy; | ||
681 | char *end; | ||
682 | char c; | ||
683 | char *start; | ||
684 | int mod; | ||
685 | |||
686 | mod = 0; | ||
687 | |||
688 | start = src; | ||
689 | while ((c = *src++) != '\0') | ||
690 | { | ||
691 | /* recognize any meta characters */ | ||
692 | if (c == '&') | ||
693 | { | ||
694 | cpy = re->startp[0]; | ||
695 | end = re->endp[0]; | ||
696 | } | ||
697 | else if (c == '~') | ||
698 | { | ||
699 | cpy = previous1; | ||
700 | if (cpy) | ||
701 | end = cpy + strlen(cpy); | ||
702 | } | ||
703 | else | ||
704 | if (c == '\\') | ||
705 | { | ||
706 | c = *src++; | ||
707 | switch (c) | ||
708 | { | ||
709 | case '0': | ||
710 | case '1': | ||
711 | case '2': | ||
712 | case '3': | ||
713 | case '4': | ||
714 | case '5': | ||
715 | case '6': | ||
716 | case '7': | ||
717 | case '8': | ||
718 | case '9': | ||
719 | /* \0 thru \9 mean "copy subexpression" */ | ||
720 | c -= '0'; | ||
721 | cpy = re->startp[(int)c]; | ||
722 | end = re->endp[(int)c]; | ||
723 | break; | ||
724 | case 'U': | ||
725 | case 'u': | ||
726 | case 'L': | ||
727 | case 'l': | ||
728 | /* \U and \L mean "convert to upper/lowercase" */ | ||
729 | mod = c; | ||
730 | continue; | ||
731 | |||
732 | case 'E': | ||
733 | case 'e': | ||
734 | /* \E ends the \U or \L */ | ||
735 | mod = 0; | ||
736 | continue; | ||
737 | case '&': | ||
738 | /* "\&" means "original text" */ | ||
739 | *dst++ = c; | ||
740 | continue; | ||
741 | |||
742 | case '~': | ||
743 | /* "\~" means "previous text, if any" */ | ||
744 | *dst++ = c; | ||
745 | continue; | ||
746 | default: | ||
747 | /* ordinary char preceded by backslash */ | ||
748 | *dst++ = c; | ||
749 | continue; | ||
750 | } | ||
751 | } | ||
752 | else | ||
753 | { | ||
754 | /* ordinary character, so just copy it */ | ||
755 | *dst++ = c; | ||
756 | continue; | ||
757 | } | ||
758 | |||
759 | /* Note: to reach this point in the code, we must have evaded | ||
760 | * all "continue" statements. To do that, we must have hit | ||
761 | * a metacharacter that involves copying. | ||
762 | */ | ||
763 | |||
764 | /* if there is nothing to copy, loop */ | ||
765 | if (!cpy) | ||
766 | continue; | ||
767 | |||
768 | /* copy over a portion of the original */ | ||
769 | while (cpy < end) | ||
770 | { | ||
771 | switch (mod) | ||
772 | { | ||
773 | case 'U': | ||
774 | case 'u': | ||
775 | /* convert to uppercase */ | ||
776 | if (isascii(*cpy) && islower(*cpy)) | ||
777 | { | ||
778 | *dst++ = toupper(*cpy); | ||
779 | cpy++; | ||
780 | } | ||
781 | else | ||
782 | { | ||
783 | *dst++ = *cpy++; | ||
784 | } | ||
785 | break; | ||
786 | |||
787 | case 'L': | ||
788 | case 'l': | ||
789 | /* convert to lowercase */ | ||
790 | if (isascii(*cpy) && isupper(*cpy)) | ||
791 | { | ||
792 | *dst++ = tolower(*cpy); | ||
793 | cpy++; | ||
794 | } | ||
795 | else | ||
796 | { | ||
797 | *dst++ = *cpy++; | ||
798 | } | ||
799 | break; | ||
800 | |||
801 | default: | ||
802 | /* copy without any conversion */ | ||
803 | *dst++ = *cpy++; | ||
804 | } | ||
805 | |||
806 | /* \u and \l end automatically after the first char */ | ||
807 | if (mod && (mod == 'u' || mod == 'l')) | ||
808 | { | ||
809 | mod = 0; | ||
810 | } | ||
811 | } | ||
812 | } | ||
813 | *dst = '\0'; | ||
814 | |||
815 | /* remember what text we inserted this time */ | ||
816 | if (previous1) | ||
817 | free(previous1); | ||
818 | previous1 = (char *)malloc((unsigned)(strlen(start) + 1)); | ||
819 | if (previous1) | ||
820 | strcpy(previous1, start); | ||
821 | } | ||
822 | |||
823 | |||
824 | #endif /* BB_REGEXP */ | ||
825 | |||
826 | |||