summaryrefslogtreecommitdiff
path: root/regexp.c
diff options
context:
space:
mode:
authorErik Andersen <andersen@codepoet.org>2000-02-08 19:58:47 +0000
committerErik Andersen <andersen@codepoet.org>2000-02-08 19:58:47 +0000
commite49d5ecbbe51718fa925b6890a735e5937cc2aa2 (patch)
treec90bda10731ad9333ce3b404f993354c9fc104b8 /regexp.c
parentc0bf817bbc5c7867fbe8fb76d5c39f8ee802692f (diff)
downloadbusybox-w32-e49d5ecbbe51718fa925b6890a735e5937cc2aa2.tar.gz
busybox-w32-e49d5ecbbe51718fa925b6890a735e5937cc2aa2.tar.bz2
busybox-w32-e49d5ecbbe51718fa925b6890a735e5937cc2aa2.zip
Some formatting updates (ran the code through indent)
-Erik
Diffstat (limited to 'regexp.c')
-rw-r--r--regexp.c602
1 files changed, 258 insertions, 344 deletions
diff --git a/regexp.c b/regexp.c
index 11b46c720..f4a8aa1b0 100644
--- a/regexp.c
+++ b/regexp.c
@@ -1,3 +1,4 @@
1/* vi: set sw=4 ts=4: */
1/* regexp.c */ 2/* regexp.c */
2 3
3#include "internal.h" 4#include "internal.h"
@@ -17,44 +18,47 @@
17 */ 18 */
18extern int find_match(char *haystack, char *needle, int ignoreCase) 19extern int find_match(char *haystack, char *needle, int ignoreCase)
19{ 20{
20 int status; 21 int status;
21 struct regexp* re; 22 struct regexp *re;
22 re = regcomp( needle); 23
23 status = regexec(re, haystack, FALSE, ignoreCase); 24 re = regcomp(needle);
24 free( re); 25 status = regexec(re, haystack, FALSE, ignoreCase);
25 return( status); 26 free(re);
27 return (status);
26} 28}
27 29
28#if defined BB_SED 30#if defined BB_SED
29/* This performs substitutions after a regexp match has been found. 31/* This performs substitutions after a regexp match has been found.
30 * The new string is returned. It is malloc'ed, and do must be freed. */ 32 * The new string is returned. It is malloc'ed, and do must be freed. */
31extern int replace_match(char *haystack, char *needle, char *newNeedle, int ignoreCase) 33extern int replace_match(char *haystack, char *needle, char *newNeedle,
34 int ignoreCase)
32{ 35{
33 int status; 36 int status;
34 struct regexp* re; 37 struct regexp *re;
35 char *s, buf[BUF_SIZE], *d = buf; 38 char *s, buf[BUF_SIZE], *d = buf;
36 39
37 re = regcomp( needle); 40 re = regcomp(needle);
38 status = regexec(re, haystack, FALSE, ignoreCase); 41 status = regexec(re, haystack, FALSE, ignoreCase);
39 if (status==TRUE) { 42 if (status == TRUE) {
40 s=haystack; 43 s = haystack;
41 44
42 do { 45 do {
43 /* copy stuff from before the match */ 46 /* copy stuff from before the match */
44 while (s < re->startp[0]) 47 while (s < re->startp[0])
45 *d++ = *s++; 48 *d++ = *s++;
46 /* substitute for the matched part */ 49 /* substitute for the matched part */
47 regsub(re, newNeedle, d); 50 regsub(re, newNeedle, d);
48 s = re->endp[0]; 51 s = re->endp[0];
49 d += strlen(d); 52 d += strlen(d);
50 } while (regexec(re, s, FALSE, ignoreCase) == TRUE); 53 } while (regexec(re, s, FALSE, ignoreCase) == TRUE);
51 /* copy stuff from after the match */ 54 /* copy stuff from after the match */
52 while ( (*d++ = *s++) ) {} 55 while ((*d++ = *s++)) {
53 d[0] = '\0'; 56 }
54 strcpy(haystack, buf); 57 d[0] = '\0';
55 } 58 strcpy(haystack, buf);
56 free( re); 59 }
57 return( status); 60 free(re);
61 return (status);
58} 62}
59#endif 63#endif
60 64
@@ -97,9 +101,10 @@ extern int replace_match(char *haystack, char *needle, char *newNeedle, int igno
97 101
98 102
99 103
100static char *previous; /* the previous regexp, used when null regexp is given */ 104static char *previous; /* the previous regexp, used when null regexp is given */
105
101#if defined BB_SED 106#if defined BB_SED
102static char *previous1; /* a copy of the text from the previous substitution for regsub()*/ 107static char *previous1; /* a copy of the text from the previous substitution for regsub() */
103#endif 108#endif
104 109
105 110
@@ -116,27 +121,28 @@ static char *previous1; /* a copy of the text from the previous substitution for
116#define GET_META(s) (*(s) == META ? INT_META(*++(s)) : *s) 121#define GET_META(s) (*(s) == META ? INT_META(*++(s)) : *s)
117 122
118/* These are the internal codes used for each type of meta-character */ 123/* These are the internal codes used for each type of meta-character */
119#define M_BEGLINE 256 /* internal code for ^ */ 124#define M_BEGLINE 256 /* internal code for ^ */
120#define M_ENDLINE 257 /* internal code for $ */ 125#define M_ENDLINE 257 /* internal code for $ */
121#define M_BEGWORD 258 /* internal code for \< */ 126#define M_BEGWORD 258 /* internal code for \< */
122#define M_ENDWORD 259 /* internal code for \> */ 127#define M_ENDWORD 259 /* internal code for \> */
123#define M_ANY 260 /* internal code for . */ 128#define M_ANY 260 /* internal code for . */
124#define M_SPLAT 261 /* internal code for * */ 129#define M_SPLAT 261 /* internal code for * */
125#define M_PLUS 262 /* internal code for \+ */ 130#define M_PLUS 262 /* internal code for \+ */
126#define M_QMARK 263 /* internal code for \? */ 131#define M_QMARK 263 /* internal code for \? */
127#define M_CLASS(n) (264+(n)) /* internal code for [] */ 132#define M_CLASS(n) (264+(n)) /* internal code for [] */
128#define M_START(n) (274+(n)) /* internal code for \( */ 133#define M_START(n) (274+(n)) /* internal code for \( */
129#define M_END(n) (284+(n)) /* internal code for \) */ 134#define M_END(n) (284+(n)) /* internal code for \) */
130 135
131/* These are used during compilation */ 136/* These are used during compilation */
132static int class_cnt; /* used to assign class IDs */ 137static int class_cnt; /* used to assign class IDs */
133static int start_cnt; /* used to assign start IDs */ 138static int start_cnt; /* used to assign start IDs */
134static int end_stk[NSUBEXP];/* used to assign end IDs */ 139static int end_stk[NSUBEXP]; /* used to assign end IDs */
135static int end_sp; 140static int end_sp;
136static char *retext; /* points to the text being compiled */ 141static char *retext; /* points to the text being compiled */
137 142
138/* error-handling stuff */ 143/* error-handling stuff */
139jmp_buf errorhandler; 144jmp_buf errorhandler;
145
140#define FAIL(why) do {fprintf(stderr, why); longjmp(errorhandler, 1);} while (0) 146#define FAIL(why) do {fprintf(stderr, why); longjmp(errorhandler, 1);} while (0)
141 147
142 148
@@ -145,68 +151,56 @@ jmp_buf errorhandler;
145/* This function builds a bitmap for a particular class */ 151/* This function builds a bitmap for a particular class */
146/* text -- start of the class */ 152/* text -- start of the class */
147/* bmap -- the bitmap */ 153/* bmap -- the bitmap */
148static char *makeclass(char* text, char* bmap) 154static char *makeclass(char *text, char *bmap)
149{ 155{
150 int i; 156 int i;
151 int complement = 0; 157 int complement = 0;
152 158
153 159
154 /* zero the bitmap */ 160 /* zero the bitmap */
155 for (i = 0; bmap && i < 32; i++) 161 for (i = 0; bmap && i < 32; i++) {
156 {
157 bmap[i] = 0; 162 bmap[i] = 0;
158 } 163 }
159 164
160 /* see if we're going to complement this class */ 165 /* see if we're going to complement this class */
161 if (*text == '^') 166 if (*text == '^') {
162 {
163 text++; 167 text++;
164 complement = 1; 168 complement = 1;
165 } 169 }
166 170
167 /* add in the characters */ 171 /* add in the characters */
168 while (*text && *text != ']') 172 while (*text && *text != ']') {
169 {
170 /* is this a span of characters? */ 173 /* is this a span of characters? */
171 if (text[1] == '-' && text[2]) 174 if (text[1] == '-' && text[2]) {
172 {
173 /* spans can't be backwards */ 175 /* spans can't be backwards */
174 if (text[0] > text[2]) 176 if (text[0] > text[2]) {
175 {
176 FAIL("Backwards span in []"); 177 FAIL("Backwards span in []");
177 } 178 }
178 179
179 /* add each character in the span to the bitmap */ 180 /* add each character in the span to the bitmap */
180 for (i = text[0]; bmap && i <= text[2]; i++) 181 for (i = text[0]; bmap && i <= text[2]; i++) {
181 {
182 bmap[i >> 3] |= (1 << (i & 7)); 182 bmap[i >> 3] |= (1 << (i & 7));
183 } 183 }
184 184
185 /* move past this span */ 185 /* move past this span */
186 text += 3; 186 text += 3;
187 } 187 } else {
188 else
189 {
190 /* add this single character to the span */ 188 /* add this single character to the span */
191 i = *text++; 189 i = *text++;
192 if (bmap) 190 if (bmap) {
193 {
194 bmap[i >> 3] |= (1 << (i & 7)); 191 bmap[i >> 3] |= (1 << (i & 7));
195 } 192 }
196 } 193 }
197 } 194 }
198 195
199 /* make sure the closing ] is missing */ 196 /* make sure the closing ] is missing */
200 if (*text++ != ']') 197 if (*text++ != ']') {
201 {
202 FAIL("] missing"); 198 FAIL("] missing");
203 } 199 }
204 200
205 /* if we're supposed to complement this class, then do so */ 201 /* if we're supposed to complement this class, then do so */
206 if (complement && bmap) 202 if (complement && bmap) {
207 { 203 for (i = 0; i < 32; i++) {
208 for (i = 0; i < 32; i++)
209 {
210 bmap[i] = ~bmap[i]; 204 bmap[i] = ~bmap[i];
211 } 205 }
212 } 206 }
@@ -223,105 +217,92 @@ static char *makeclass(char* text, char* bmap)
223 * character-class text is skipped. 217 * character-class text is skipped.
224 */ 218 */
225static int gettoken(sptr, re) 219static int gettoken(sptr, re)
226 char **sptr; 220char **sptr;
227 regexp *re; 221regexp *re;
228{ 222{
229 int c; 223 int c;
230 224
231 c = **sptr; 225 c = **sptr;
232 ++*sptr; 226 ++*sptr;
233 if (c == '\\') 227 if (c == '\\') {
234 {
235 c = **sptr; 228 c = **sptr;
236 ++*sptr; 229 ++*sptr;
237 switch (c) 230 switch (c) {
238 { 231 case '<':
239 case '<':
240 return M_BEGWORD; 232 return M_BEGWORD;
241 233
242 case '>': 234 case '>':
243 return M_ENDWORD; 235 return M_ENDWORD;
244 236
245 case '(': 237 case '(':
246 if (start_cnt >= NSUBEXP) 238 if (start_cnt >= NSUBEXP) {
247 {
248 FAIL("Too many \\(s"); 239 FAIL("Too many \\(s");
249 } 240 }
250 end_stk[end_sp++] = start_cnt; 241 end_stk[end_sp++] = start_cnt;
251 return M_START(start_cnt++); 242 return M_START(start_cnt++);
252 243
253 case ')': 244 case ')':
254 if (end_sp <= 0) 245 if (end_sp <= 0) {
255 {
256 FAIL("Mismatched \\)"); 246 FAIL("Mismatched \\)");
257 } 247 }
258 return M_END(end_stk[--end_sp]); 248 return M_END(end_stk[--end_sp]);
259 249
260 case '*': 250 case '*':
261 return M_SPLAT; 251 return M_SPLAT;
262 252
263 case '.': 253 case '.':
264 return M_ANY; 254 return M_ANY;
265 255
266 case '+': 256 case '+':
267 return M_PLUS; 257 return M_PLUS;
268 258
269 case '?': 259 case '?':
270 return M_QMARK; 260 return M_QMARK;
271 261
272 default: 262 default:
273 return c; 263 return c;
274 } 264 }
275 } 265 } else {
276 else { 266 switch (c) {
277 switch (c) 267 case '^':
278 { 268 if (*sptr == retext + 1) {
279 case '^':
280 if (*sptr == retext + 1)
281 {
282 return M_BEGLINE; 269 return M_BEGLINE;
283 } 270 }
284 return c; 271 return c;
285 272
286 case '$': 273 case '$':
287 if (!**sptr) 274 if (!**sptr) {
288 {
289 return M_ENDLINE; 275 return M_ENDLINE;
290 } 276 }
291 return c; 277 return c;
292 278
293 case '.': 279 case '.':
294 return M_ANY; 280 return M_ANY;
295 281
296 case '*': 282 case '*':
297 return M_SPLAT; 283 return M_SPLAT;
298 284
299 case '[': 285 case '[':
300 /* make sure we don't have too many classes */ 286 /* make sure we don't have too many classes */
301 if (class_cnt >= 10) 287 if (class_cnt >= 10) {
302 {
303 FAIL("Too many []s"); 288 FAIL("Too many []s");
304 } 289 }
305 290
306 /* process the character list for this class */ 291 /* process the character list for this class */
307 if (re) 292 if (re) {
308 {
309 /* generate the bitmap for this class */ 293 /* generate the bitmap for this class */
310 *sptr = makeclass(*sptr, re->program + 1 + 32 * class_cnt); 294 *sptr = makeclass(*sptr, re->program + 1 + 32 * class_cnt);
311 } 295 } else {
312 else
313 {
314 /* skip to end of the class */ 296 /* skip to end of the class */
315 *sptr = makeclass(*sptr, (char *)0); 297 *sptr = makeclass(*sptr, (char *) 0);
316 } 298 }
317 return M_CLASS(class_cnt++); 299 return M_CLASS(class_cnt++);
318 300
319 default: 301 default:
320 return c; 302 return c;
321 } 303 }
322 } 304 }
323 /*NOTREACHED*/ 305 /*NOTREACHED*/}
324}
325 306
326 307
327 308
@@ -331,28 +312,22 @@ static int gettoken(sptr, re)
331 * about catching syntax errors; that is done in a later pass. 312 * about catching syntax errors; that is done in a later pass.
332 */ 313 */
333static unsigned calcsize(text) 314static unsigned calcsize(text)
334 char *text; 315char *text;
335{ 316{
336 unsigned size; 317 unsigned size;
337 int token; 318 int token;
338 319
339 retext = text; 320 retext = text;
340 class_cnt = 0; 321 class_cnt = 0;
341 start_cnt = 1; 322 start_cnt = 1;
342 end_sp = 0; 323 end_sp = 0;
343 size = 5; 324 size = 5;
344 while ((token = gettoken(&text, (regexp *)0)) != 0) 325 while ((token = gettoken(&text, (regexp *) 0)) != 0) {
345 { 326 if (IS_CLASS(token)) {
346 if (IS_CLASS(token))
347 {
348 size += 34; 327 size += 34;
349 } 328 } else if (IS_META(token)) {
350 else if (IS_META(token))
351 {
352 size += 2; 329 size += 2;
353 } 330 } else {
354 else
355 {
356 size++; 331 size++;
357 } 332 }
358 } 333 }
@@ -369,26 +344,23 @@ static unsigned calcsize(text)
369 * known to represent a single character. It returns 0 if they match, or 344 * known to represent a single character. It returns 0 if they match, or
370 * 1 if they don't. 345 * 1 if they don't.
371 */ 346 */
372static int match1(regexp* re, char ch, int token, int ignoreCase) 347static int match1(regexp * re, char ch, int token, int ignoreCase)
373{ 348{
374 if (!ch) 349 if (!ch) {
375 {
376 /* the end of a line can't match any RE of width 1 */ 350 /* the end of a line can't match any RE of width 1 */
377 return 1; 351 return 1;
378 } 352 }
379 if (token == M_ANY) 353 if (token == M_ANY) {
380 {
381 return 0; 354 return 0;
382 } 355 } else if (IS_CLASS(token)) {
383 else if (IS_CLASS(token)) 356 if (re->
384 { 357 program[1 + 32 * (token - M_CLASS(0)) +
385 if (re->program[1 + 32 * (token - M_CLASS(0)) + (ch >> 3)] & (1 << (ch & 7))) 358 (ch >> 3)] & (1 << (ch & 7)))
386 return 0; 359 return 0;
387 } 360 }
388//fprintf(stderr, "match1: ch='%c' token='%c': ", ch, token); 361//fprintf(stderr, "match1: ch='%c' token='%c': ", ch, token);
389 if (ch == token 362 if (ch == token
390 || (ignoreCase==TRUE && tolower(ch) == tolower(token))) 363 || (ignoreCase == TRUE && tolower(ch) == tolower(token))) {
391 {
392//fprintf(stderr, "match\n"); 364//fprintf(stderr, "match\n");
393 return 0; 365 return 0;
394 } 366 }
@@ -406,65 +378,63 @@ static int match1(regexp* re, char ch, int token, int ignoreCase)
406/* str -- the string */ 378/* str -- the string */
407/* prog -- a portion of re->program, an compiled RE */ 379/* prog -- a portion of re->program, an compiled RE */
408/* here -- a portion of str, the string to compare it to */ 380/* here -- a portion of str, the string to compare it to */
409static int match(regexp* re, char* str, char* prog, char* here, int ignoreCase) 381static int match(regexp * re, char *str, char *prog, char *here,
382 int ignoreCase)
410{ 383{
411 int token; 384 int token;
412 int nmatched; 385 int nmatched;
413 int closure; 386 int closure;
414 387
415 for (token = GET_META(prog); !IS_CLOSURE(token); prog++, token = GET_META(prog)) 388 for (token = GET_META(prog); !IS_CLOSURE(token);
416 { 389 prog++, token = GET_META(prog)) {
417 switch (token) 390 switch (token) {
418 { 391 /*case M_BEGLINE: can't happen; re->bol is used instead */
419 /*case M_BEGLINE: can't happen; re->bol is used instead */ 392 case M_ENDLINE:
420 case M_ENDLINE:
421 if (*here) 393 if (*here)
422 return 1; 394 return 1;
423 break; 395 break;
424 396
425 case M_BEGWORD: 397 case M_BEGWORD:
426 if (here != str && 398 if (here != str &&
427 (here[-1] == '_' || 399 (here[-1] == '_' ||
428 (isascii(here[-1]) && isalnum(here[-1])))) 400 (isascii(here[-1]) && isalnum(here[-1])))) return 1;
429 return 1;
430 break; 401 break;
431 402
432 case M_ENDWORD: 403 case M_ENDWORD:
433 if ((here[0] == '_' || isascii(here[0])) && isalnum(here[0])) 404 if ((here[0] == '_' || isascii(here[0])) && isalnum(here[0]))
434 return 1; 405 return 1;
435 break; 406 break;
436 407
437 case M_START(0): 408 case M_START(0):
438 case M_START(1): 409 case M_START(1):
439 case M_START(2): 410 case M_START(2):
440 case M_START(3): 411 case M_START(3):
441 case M_START(4): 412 case M_START(4):
442 case M_START(5): 413 case M_START(5):
443 case M_START(6): 414 case M_START(6):
444 case M_START(7): 415 case M_START(7):
445 case M_START(8): 416 case M_START(8):
446 case M_START(9): 417 case M_START(9):
447 re->startp[token - M_START(0)] = (char *)here; 418 re->startp[token - M_START(0)] = (char *) here;
448 break; 419 break;
449 420
450 case M_END(0): 421 case M_END(0):
451 case M_END(1): 422 case M_END(1):
452 case M_END(2): 423 case M_END(2):
453 case M_END(3): 424 case M_END(3):
454 case M_END(4): 425 case M_END(4):
455 case M_END(5): 426 case M_END(5):
456 case M_END(6): 427 case M_END(6):
457 case M_END(7): 428 case M_END(7):
458 case M_END(8): 429 case M_END(8):
459 case M_END(9): 430 case M_END(9):
460 re->endp[token - M_END(0)] = (char *)here; 431 re->endp[token - M_END(0)] = (char *) here;
461 if (token == M_END(0)) 432 if (token == M_END(0)) {
462 {
463 return 0; 433 return 0;
464 } 434 }
465 break; 435 break;
466 436
467 default: /* literal, M_CLASS(n), or M_ANY */ 437 default: /* literal, M_CLASS(n), or M_ANY */
468 if (match1(re, *here, token, ignoreCase) != 0) 438 if (match1(re, *here, token, ignoreCase) != 0)
469 return 1; 439 return 1;
470 here++; 440 here++;
@@ -482,14 +452,12 @@ static int match(regexp* re, char* str, char* prog, char* here, int ignoreCase)
482 452
483 /* step 2: see how many times we can match that token against the string */ 453 /* step 2: see how many times we can match that token against the string */
484 for (nmatched = 0; 454 for (nmatched = 0;
485 (closure != M_QMARK || nmatched < 1) && *here && match1(re, *here, token, ignoreCase) == 0; 455 (closure != M_QMARK || nmatched < 1) && *here
486 nmatched++, here++) 456 && match1(re, *here, token, ignoreCase) == 0; nmatched++, here++) {
487 {
488 } 457 }
489 458
490 /* step 3: try to match the remainder, and back off if it doesn't */ 459 /* step 3: try to match the remainder, and back off if it doesn't */
491 while (nmatched >= 0 && match(re, str, prog, here, ignoreCase) != 0) 460 while (nmatched >= 0 && match(re, str, prog, here, ignoreCase) != 0) {
492 {
493 nmatched--; 461 nmatched--;
494 here--; 462 here--;
495 } 463 }
@@ -502,41 +470,36 @@ static int match(regexp* re, char* str, char* prog, char* here, int ignoreCase)
502 470
503 471
504/* This function compiles a regexp. */ 472/* This function compiles a regexp. */
505extern regexp *regcomp(char* text) 473extern regexp *regcomp(char *text)
506{ 474{
507 int needfirst; 475 int needfirst;
508 unsigned size; 476 unsigned size;
509 int token; 477 int token;
510 int peek; 478 int peek;
511 char *build; 479 char *build;
512 regexp *re; // Ignore compiler whining. If we longjmp, we don't use re anymore. 480 regexp *re; // Ignore compiler whining. If we longjmp, we don't use re anymore.
513 481
514 482
515 /* prepare for error handling */ 483 /* prepare for error handling */
516 re = (regexp *)0; 484 re = (regexp *) 0;
517 if (setjmp(errorhandler)) 485 if (setjmp(errorhandler)) {
518 { 486 if (re) {
519 if (re)
520 {
521 free(re); 487 free(re);
522 } 488 }
523 return (regexp *)0; 489 return (regexp *) 0;
524 } 490 }
525 491
526 /* if an empty regexp string was given, use the previous one */ 492 /* if an empty regexp string was given, use the previous one */
527 if (*text == 0) 493 if (*text == 0) {
528 { 494 if (!previous) {
529 if (!previous)
530 {
531 FAIL("No previous RE"); 495 FAIL("No previous RE");
532 } 496 }
533 text = previous; 497 text = previous;
534 } 498 } else { /* non-empty regexp given, so remember it */
535 else /* non-empty regexp given, so remember it */ 499
536 {
537 if (previous) 500 if (previous)
538 free(previous); 501 free(previous);
539 previous = (char *)malloc((unsigned)(strlen(text) + 1)); 502 previous = (char *) malloc((unsigned) (strlen(text) + 1));
540 if (previous) 503 if (previous)
541 strcpy(previous, text); 504 strcpy(previous, text);
542 } 505 }
@@ -547,19 +510,17 @@ extern regexp *regcomp(char* text)
547 end_sp = 0; 510 end_sp = 0;
548 retext = text; 511 retext = text;
549 size = calcsize(text) + sizeof(regexp); 512 size = calcsize(text) + sizeof(regexp);
550 re = (regexp *)malloc((unsigned)size); 513 re = (regexp *) malloc((unsigned) size);
551 514
552 if (!re) 515 if (!re) {
553 {
554 FAIL("Not enough memory for this RE"); 516 FAIL("Not enough memory for this RE");
555 } 517 }
556 518
557 /* compile it */ 519 /* compile it */
558 build = &re->program[1 + 32 * class_cnt]; 520 build = &re->program[1 + 32 * class_cnt];
559 re->program[0] = class_cnt; 521 re->program[0] = class_cnt;
560 for (token = 0; token < NSUBEXP; token++) 522 for (token = 0; token < NSUBEXP; token++) {
561 { 523 re->startp[token] = re->endp[token] = (char *) 0;
562 re->startp[token] = re->endp[token] = (char *)0;
563 } 524 }
564 re->first = 0; 525 re->first = 0;
565 re->bol = 0; 526 re->bol = 0;
@@ -570,76 +531,60 @@ extern regexp *regcomp(char* text)
570 end_sp = 0; 531 end_sp = 0;
571 retext = text; 532 retext = text;
572 for (token = M_START(0), peek = gettoken(&text, re); 533 for (token = M_START(0), peek = gettoken(&text, re);
573 token; 534 token; token = peek, peek = gettoken(&text, re)) {
574 token = peek, peek = gettoken(&text, re))
575 {
576 /* special processing for the closure operator */ 535 /* special processing for the closure operator */
577 if (IS_CLOSURE(peek)) 536 if (IS_CLOSURE(peek)) {
578 {
579 /* detect misuse of closure operator */ 537 /* detect misuse of closure operator */
580 if (IS_START(token)) 538 if (IS_START(token)) {
581 {
582 FAIL("* or \\+ or \\? follows nothing"); 539 FAIL("* or \\+ or \\? follows nothing");
583 } 540 }
584 else if (IS_META(token) && token != M_ANY && !IS_CLASS(token)) 541 else if (IS_META(token) && token != M_ANY
585 { 542 && !IS_CLASS(token)) {
586 FAIL("* or \\+ or \\? can only follow a normal character or . or []"); 543 FAIL
544 ("* or \\+ or \\? can only follow a normal character or . or []");
587 } 545 }
588 546
589 /* it is okay -- make it prefix instead of postfix */ 547 /* it is okay -- make it prefix instead of postfix */
590 ADD_META(build, peek); 548 ADD_META(build, peek);
591 549
592 /* take care of "needfirst" - is this the first char? */ 550 /* take care of "needfirst" - is this the first char? */
593 if (needfirst && peek == M_PLUS && !IS_META(token)) 551 if (needfirst && peek == M_PLUS && !IS_META(token)) {
594 {
595 re->first = token; 552 re->first = token;
596 } 553 }
597 needfirst = 0; 554 needfirst = 0;
598 555
599 /* we used "peek" -- need to refill it */ 556 /* we used "peek" -- need to refill it */
600 peek = gettoken(&text, re); 557 peek = gettoken(&text, re);
601 if (IS_CLOSURE(peek)) 558 if (IS_CLOSURE(peek)) {
602 {
603 FAIL("* or \\+ or \\? doubled up"); 559 FAIL("* or \\+ or \\? doubled up");
604 } 560 }
605 } 561 } else if (!IS_META(token)) {
606 else if (!IS_META(token))
607 {
608 /* normal char is NOT argument of closure */ 562 /* normal char is NOT argument of closure */
609 if (needfirst) 563 if (needfirst) {
610 {
611 re->first = token; 564 re->first = token;
612 needfirst = 0; 565 needfirst = 0;
613 } 566 }
614 re->minlen++; 567 re->minlen++;
615 } 568 } else if (token == M_ANY || IS_CLASS(token)) {
616 else if (token == M_ANY || IS_CLASS(token))
617 {
618 /* . or [] is NOT argument of closure */ 569 /* . or [] is NOT argument of closure */
619 needfirst = 0; 570 needfirst = 0;
620 re->minlen++; 571 re->minlen++;
621 } 572 }
622 573
623 /* the "token" character is not closure -- process it normally */ 574 /* the "token" character is not closure -- process it normally */
624 if (token == M_BEGLINE) 575 if (token == M_BEGLINE) {
625 {
626 /* set the BOL flag instead of storing M_BEGLINE */ 576 /* set the BOL flag instead of storing M_BEGLINE */
627 re->bol = 1; 577 re->bol = 1;
628 } 578 } else if (IS_META(token)) {
629 else if (IS_META(token))
630 {
631 ADD_META(build, token); 579 ADD_META(build, token);
632 } 580 } else {
633 else
634 {
635 *build++ = token; 581 *build++ = token;
636 } 582 }
637 } 583 }
638 584
639 /* end it with a \) which MUST MATCH the opening \( */ 585 /* end it with a \) which MUST MATCH the opening \( */
640 ADD_META(build, M_END(0)); 586 ADD_META(build, M_END(0));
641 if (end_sp > 0) 587 if (end_sp > 0) {
642 {
643 FAIL("Not enough \\)s"); 588 FAIL("Not enough \\)s");
644 } 589 }
645 590
@@ -654,15 +599,14 @@ extern regexp *regcomp(char* text)
654/* str -- the string to search through */ 599/* str -- the string to search through */
655/* bol -- does str start at the beginning of a line? (boolean) */ 600/* bol -- does str start at the beginning of a line? (boolean) */
656/* ignoreCase -- ignoreCase or not */ 601/* ignoreCase -- ignoreCase or not */
657extern int regexec(struct regexp* re, char* str, int bol, int ignoreCase) 602extern int regexec(struct regexp *re, char *str, int bol, int ignoreCase)
658{ 603{
659 char *prog; /* the entry point of re->program */ 604 char *prog; /* the entry point of re->program */
660 int len; /* length of the string */ 605 int len; /* length of the string */
661 char *here; 606 char *here;
662 607
663 /* if must start at the beginning of a line, and this isn't, then fail */ 608 /* if must start at the beginning of a line, and this isn't, then fail */
664 if (re->bol && bol==TRUE) 609 if (re->bol && bol == TRUE) {
665 {
666 return FALSE; 610 return FALSE;
667 } 611 }
668 612
@@ -670,35 +614,26 @@ extern int regexec(struct regexp* re, char* str, int bol, int ignoreCase)
670 prog = re->program + 1 + 32 * re->program[0]; 614 prog = re->program + 1 + 32 * re->program[0];
671 615
672 /* search for the RE in the string */ 616 /* search for the RE in the string */
673 if (re->bol) 617 if (re->bol) {
674 {
675 /* must occur at BOL */ 618 /* must occur at BOL */
676 if ((re->first 619 if ((re->first && match1(re, *(char *) str, re->first, ignoreCase)) /* wrong first letter? */
677 && match1(re, *(char *)str, re->first, ignoreCase))/* wrong first letter? */ 620 ||len < re->minlen /* not long enough? */
678 || len < re->minlen /* not long enough? */ 621 || match(re, (char *) str, prog, str, ignoreCase)) /* doesn't match? */
679 || match(re, (char *)str, prog, str, ignoreCase)) /* doesn't match? */ 622 return FALSE; /* THEN FAIL! */
680 return FALSE; /* THEN FAIL! */ 623 } else if (ignoreCase == FALSE) {
681 }
682 else if (ignoreCase == FALSE)
683 {
684 /* can occur anywhere in the line, noignorecase */ 624 /* can occur anywhere in the line, noignorecase */
685 for (here = (char *)str; 625 for (here = (char *) str; (re->first && re->first != *here)
686 (re->first && re->first != *here) 626 || match(re, (char *) str, prog, here, ignoreCase);
687 || match(re, (char *)str, prog, here, ignoreCase); 627 here++, len--) {
688 here++, len--)
689 {
690 if (len < re->minlen) 628 if (len < re->minlen)
691 return FALSE; 629 return FALSE;
692 } 630 }
693 } 631 } else {
694 else
695 {
696 /* can occur anywhere in the line, ignorecase */ 632 /* can occur anywhere in the line, ignorecase */
697 for (here = (char *)str; 633 for (here = (char *) str;
698 (re->first && match1(re, *here, (int)re->first, ignoreCase)) 634 (re->first && match1(re, *here, (int) re->first, ignoreCase))
699 || match(re, (char *)str, prog, here, ignoreCase); 635 || match(re, (char *) str, prog, here, ignoreCase);
700 here++, len--) 636 here++, len--) {
701 {
702 if (len < re->minlen) 637 if (len < re->minlen)
703 return FALSE; 638 return FALSE;
704 } 639 }
@@ -713,82 +648,72 @@ extern int regexec(struct regexp* re, char* str, int bol, int ignoreCase)
713 648
714#if defined BB_SED 649#if defined BB_SED
715/* This performs substitutions after a regexp match has been found. */ 650/* This performs substitutions after a regexp match has been found. */
716extern void regsub(regexp* re, char* src, char* dst) 651extern void regsub(regexp * re, char *src, char *dst)
717{ 652{
718 char *cpy; 653 char *cpy;
719 char *end; 654 char *end;
720 char c; 655 char c;
721 char *start; 656 char *start;
722 int mod; 657 int mod;
723 658
724 mod = 0; 659 mod = 0;
725 660
726 start = src; 661 start = src;
727 while ((c = *src++) != '\0') 662 while ((c = *src++) != '\0') {
728 {
729 /* recognize any meta characters */ 663 /* recognize any meta characters */
730 if (c == '&') 664 if (c == '&') {
731 {
732 cpy = re->startp[0]; 665 cpy = re->startp[0];
733 end = re->endp[0]; 666 end = re->endp[0];
734 } 667 } else if (c == '~') {
735 else if (c == '~')
736 {
737 cpy = previous1; 668 cpy = previous1;
738 if (cpy) 669 if (cpy)
739 end = cpy + strlen(cpy); 670 end = cpy + strlen(cpy);
740 } 671 } else if (c == '\\') {
741 else
742 if (c == '\\')
743 {
744 c = *src++; 672 c = *src++;
745 switch (c) 673 switch (c) {
746 { 674 case '0':
747 case '0': 675 case '1':
748 case '1': 676 case '2':
749 case '2': 677 case '3':
750 case '3': 678 case '4':
751 case '4': 679 case '5':
752 case '5': 680 case '6':
753 case '6': 681 case '7':
754 case '7': 682 case '8':
755 case '8': 683 case '9':
756 case '9':
757 /* \0 thru \9 mean "copy subexpression" */ 684 /* \0 thru \9 mean "copy subexpression" */
758 c -= '0'; 685 c -= '0';
759 cpy = re->startp[(int)c]; 686 cpy = re->startp[(int) c];
760 end = re->endp[(int)c]; 687 end = re->endp[(int) c];
761 break; 688 break;
762 case 'U': 689 case 'U':
763 case 'u': 690 case 'u':
764 case 'L': 691 case 'L':
765 case 'l': 692 case 'l':
766 /* \U and \L mean "convert to upper/lowercase" */ 693 /* \U and \L mean "convert to upper/lowercase" */
767 mod = c; 694 mod = c;
768 continue; 695 continue;
769 696
770 case 'E': 697 case 'E':
771 case 'e': 698 case 'e':
772 /* \E ends the \U or \L */ 699 /* \E ends the \U or \L */
773 mod = 0; 700 mod = 0;
774 continue; 701 continue;
775 case '&': 702 case '&':
776 /* "\&" means "original text" */ 703 /* "\&" means "original text" */
777 *dst++ = c; 704 *dst++ = c;
778 continue; 705 continue;
779 706
780 case '~': 707 case '~':
781 /* "\~" means "previous text, if any" */ 708 /* "\~" means "previous text, if any" */
782 *dst++ = c; 709 *dst++ = c;
783 continue; 710 continue;
784 default: 711 default:
785 /* ordinary char preceded by backslash */ 712 /* ordinary char preceded by backslash */
786 *dst++ = c; 713 *dst++ = c;
787 continue; 714 continue;
788 } 715 }
789 } 716 } else {
790 else
791 {
792 /* ordinary character, so just copy it */ 717 /* ordinary character, so just copy it */
793 *dst++ = c; 718 *dst++ = c;
794 continue; 719 continue;
@@ -804,46 +729,37 @@ extern void regsub(regexp* re, char* src, char* dst)
804 continue; 729 continue;
805 730
806 /* copy over a portion of the original */ 731 /* copy over a portion of the original */
807 while (cpy < end) 732 while (cpy < end) {
808 { 733 switch (mod) {
809 switch (mod) 734 case 'U':
810 { 735 case 'u':
811 case 'U':
812 case 'u':
813 /* convert to uppercase */ 736 /* convert to uppercase */
814 if (isascii(*cpy) && islower(*cpy)) 737 if (isascii(*cpy) && islower(*cpy)) {
815 {
816 *dst++ = toupper(*cpy); 738 *dst++ = toupper(*cpy);
817 cpy++; 739 cpy++;
818 } 740 } else {
819 else
820 {
821 *dst++ = *cpy++; 741 *dst++ = *cpy++;
822 } 742 }
823 break; 743 break;
824 744
825 case 'L': 745 case 'L':
826 case 'l': 746 case 'l':
827 /* convert to lowercase */ 747 /* convert to lowercase */
828 if (isascii(*cpy) && isupper(*cpy)) 748 if (isascii(*cpy) && isupper(*cpy)) {
829 {
830 *dst++ = tolower(*cpy); 749 *dst++ = tolower(*cpy);
831 cpy++; 750 cpy++;
832 } 751 } else {
833 else
834 {
835 *dst++ = *cpy++; 752 *dst++ = *cpy++;
836 } 753 }
837 break; 754 break;
838 755
839 default: 756 default:
840 /* copy without any conversion */ 757 /* copy without any conversion */
841 *dst++ = *cpy++; 758 *dst++ = *cpy++;
842 } 759 }
843 760
844 /* \u and \l end automatically after the first char */ 761 /* \u and \l end automatically after the first char */
845 if (mod && (mod == 'u' || mod == 'l')) 762 if (mod && (mod == 'u' || mod == 'l')) {
846 {
847 mod = 0; 763 mod = 0;
848 } 764 }
849 } 765 }
@@ -853,12 +769,10 @@ extern void regsub(regexp* re, char* src, char* dst)
853 /* remember what text we inserted this time */ 769 /* remember what text we inserted this time */
854 if (previous1) 770 if (previous1)
855 free(previous1); 771 free(previous1);
856 previous1 = (char *)malloc((unsigned)(strlen(start) + 1)); 772 previous1 = (char *) malloc((unsigned) (strlen(start) + 1));
857 if (previous1) 773 if (previous1)
858 strcpy(previous1, start); 774 strcpy(previous1, start);
859} 775}
860#endif 776#endif
861 777
862#endif /* BB_REGEXP */ 778#endif /* BB_REGEXP */
863
864