diff options
author | Erik Andersen <andersen@codepoet.org> | 2000-02-08 19:58:47 +0000 |
---|---|---|
committer | Erik Andersen <andersen@codepoet.org> | 2000-02-08 19:58:47 +0000 |
commit | e49d5ecbbe51718fa925b6890a735e5937cc2aa2 (patch) | |
tree | c90bda10731ad9333ce3b404f993354c9fc104b8 /regexp.c | |
parent | c0bf817bbc5c7867fbe8fb76d5c39f8ee802692f (diff) | |
download | busybox-w32-e49d5ecbbe51718fa925b6890a735e5937cc2aa2.tar.gz busybox-w32-e49d5ecbbe51718fa925b6890a735e5937cc2aa2.tar.bz2 busybox-w32-e49d5ecbbe51718fa925b6890a735e5937cc2aa2.zip |
Some formatting updates (ran the code through indent)
-Erik
Diffstat (limited to 'regexp.c')
-rw-r--r-- | regexp.c | 602 |
1 files changed, 258 insertions, 344 deletions
@@ -1,3 +1,4 @@ | |||
1 | /* vi: set sw=4 ts=4: */ | ||
1 | /* regexp.c */ | 2 | /* regexp.c */ |
2 | 3 | ||
3 | #include "internal.h" | 4 | #include "internal.h" |
@@ -17,44 +18,47 @@ | |||
17 | */ | 18 | */ |
18 | extern int find_match(char *haystack, char *needle, int ignoreCase) | 19 | extern int find_match(char *haystack, char *needle, int ignoreCase) |
19 | { | 20 | { |
20 | int status; | 21 | int status; |
21 | struct regexp* re; | 22 | struct regexp *re; |
22 | re = regcomp( needle); | 23 | |
23 | status = regexec(re, haystack, FALSE, ignoreCase); | 24 | re = regcomp(needle); |
24 | free( re); | 25 | status = regexec(re, haystack, FALSE, ignoreCase); |
25 | return( status); | 26 | free(re); |
27 | return (status); | ||
26 | } | 28 | } |
27 | 29 | ||
28 | #if defined BB_SED | 30 | #if defined BB_SED |
29 | /* This performs substitutions after a regexp match has been found. | 31 | /* This performs substitutions after a regexp match has been found. |
30 | * The new string is returned. It is malloc'ed, and do must be freed. */ | 32 | * The new string is returned. It is malloc'ed, and do must be freed. */ |
31 | extern int replace_match(char *haystack, char *needle, char *newNeedle, int ignoreCase) | 33 | extern int replace_match(char *haystack, char *needle, char *newNeedle, |
34 | int ignoreCase) | ||
32 | { | 35 | { |
33 | int status; | 36 | int status; |
34 | struct regexp* re; | 37 | struct regexp *re; |
35 | char *s, buf[BUF_SIZE], *d = buf; | 38 | char *s, buf[BUF_SIZE], *d = buf; |
36 | 39 | ||
37 | re = regcomp( needle); | 40 | re = regcomp(needle); |
38 | status = regexec(re, haystack, FALSE, ignoreCase); | 41 | status = regexec(re, haystack, FALSE, ignoreCase); |
39 | if (status==TRUE) { | 42 | if (status == TRUE) { |
40 | s=haystack; | 43 | s = haystack; |
41 | 44 | ||
42 | do { | 45 | do { |
43 | /* copy stuff from before the match */ | 46 | /* copy stuff from before the match */ |
44 | while (s < re->startp[0]) | 47 | while (s < re->startp[0]) |
45 | *d++ = *s++; | 48 | *d++ = *s++; |
46 | /* substitute for the matched part */ | 49 | /* substitute for the matched part */ |
47 | regsub(re, newNeedle, d); | 50 | regsub(re, newNeedle, d); |
48 | s = re->endp[0]; | 51 | s = re->endp[0]; |
49 | d += strlen(d); | 52 | d += strlen(d); |
50 | } while (regexec(re, s, FALSE, ignoreCase) == TRUE); | 53 | } while (regexec(re, s, FALSE, ignoreCase) == TRUE); |
51 | /* copy stuff from after the match */ | 54 | /* copy stuff from after the match */ |
52 | while ( (*d++ = *s++) ) {} | 55 | while ((*d++ = *s++)) { |
53 | d[0] = '\0'; | 56 | } |
54 | strcpy(haystack, buf); | 57 | d[0] = '\0'; |
55 | } | 58 | strcpy(haystack, buf); |
56 | free( re); | 59 | } |
57 | return( status); | 60 | free(re); |
61 | return (status); | ||
58 | } | 62 | } |
59 | #endif | 63 | #endif |
60 | 64 | ||
@@ -97,9 +101,10 @@ extern int replace_match(char *haystack, char *needle, char *newNeedle, int igno | |||
97 | 101 | ||
98 | 102 | ||
99 | 103 | ||
100 | static char *previous; /* the previous regexp, used when null regexp is given */ | 104 | static char *previous; /* the previous regexp, used when null regexp is given */ |
105 | |||
101 | #if defined BB_SED | 106 | #if defined BB_SED |
102 | static char *previous1; /* a copy of the text from the previous substitution for regsub()*/ | 107 | static char *previous1; /* a copy of the text from the previous substitution for regsub() */ |
103 | #endif | 108 | #endif |
104 | 109 | ||
105 | 110 | ||
@@ -116,27 +121,28 @@ static char *previous1; /* a copy of the text from the previous substitution for | |||
116 | #define GET_META(s) (*(s) == META ? INT_META(*++(s)) : *s) | 121 | #define GET_META(s) (*(s) == META ? INT_META(*++(s)) : *s) |
117 | 122 | ||
118 | /* These are the internal codes used for each type of meta-character */ | 123 | /* These are the internal codes used for each type of meta-character */ |
119 | #define M_BEGLINE 256 /* internal code for ^ */ | 124 | #define M_BEGLINE 256 /* internal code for ^ */ |
120 | #define M_ENDLINE 257 /* internal code for $ */ | 125 | #define M_ENDLINE 257 /* internal code for $ */ |
121 | #define M_BEGWORD 258 /* internal code for \< */ | 126 | #define M_BEGWORD 258 /* internal code for \< */ |
122 | #define M_ENDWORD 259 /* internal code for \> */ | 127 | #define M_ENDWORD 259 /* internal code for \> */ |
123 | #define M_ANY 260 /* internal code for . */ | 128 | #define M_ANY 260 /* internal code for . */ |
124 | #define M_SPLAT 261 /* internal code for * */ | 129 | #define M_SPLAT 261 /* internal code for * */ |
125 | #define M_PLUS 262 /* internal code for \+ */ | 130 | #define M_PLUS 262 /* internal code for \+ */ |
126 | #define M_QMARK 263 /* internal code for \? */ | 131 | #define M_QMARK 263 /* internal code for \? */ |
127 | #define M_CLASS(n) (264+(n)) /* internal code for [] */ | 132 | #define M_CLASS(n) (264+(n)) /* internal code for [] */ |
128 | #define M_START(n) (274+(n)) /* internal code for \( */ | 133 | #define M_START(n) (274+(n)) /* internal code for \( */ |
129 | #define M_END(n) (284+(n)) /* internal code for \) */ | 134 | #define M_END(n) (284+(n)) /* internal code for \) */ |
130 | 135 | ||
131 | /* These are used during compilation */ | 136 | /* These are used during compilation */ |
132 | static int class_cnt; /* used to assign class IDs */ | 137 | static int class_cnt; /* used to assign class IDs */ |
133 | static int start_cnt; /* used to assign start IDs */ | 138 | static int start_cnt; /* used to assign start IDs */ |
134 | static int end_stk[NSUBEXP];/* used to assign end IDs */ | 139 | static int end_stk[NSUBEXP]; /* used to assign end IDs */ |
135 | static int end_sp; | 140 | static int end_sp; |
136 | static char *retext; /* points to the text being compiled */ | 141 | static char *retext; /* points to the text being compiled */ |
137 | 142 | ||
138 | /* error-handling stuff */ | 143 | /* error-handling stuff */ |
139 | jmp_buf errorhandler; | 144 | jmp_buf errorhandler; |
145 | |||
140 | #define FAIL(why) do {fprintf(stderr, why); longjmp(errorhandler, 1);} while (0) | 146 | #define FAIL(why) do {fprintf(stderr, why); longjmp(errorhandler, 1);} while (0) |
141 | 147 | ||
142 | 148 | ||
@@ -145,68 +151,56 @@ jmp_buf errorhandler; | |||
145 | /* This function builds a bitmap for a particular class */ | 151 | /* This function builds a bitmap for a particular class */ |
146 | /* text -- start of the class */ | 152 | /* text -- start of the class */ |
147 | /* bmap -- the bitmap */ | 153 | /* bmap -- the bitmap */ |
148 | static char *makeclass(char* text, char* bmap) | 154 | static char *makeclass(char *text, char *bmap) |
149 | { | 155 | { |
150 | int i; | 156 | int i; |
151 | int complement = 0; | 157 | int complement = 0; |
152 | 158 | ||
153 | 159 | ||
154 | /* zero the bitmap */ | 160 | /* zero the bitmap */ |
155 | for (i = 0; bmap && i < 32; i++) | 161 | for (i = 0; bmap && i < 32; i++) { |
156 | { | ||
157 | bmap[i] = 0; | 162 | bmap[i] = 0; |
158 | } | 163 | } |
159 | 164 | ||
160 | /* see if we're going to complement this class */ | 165 | /* see if we're going to complement this class */ |
161 | if (*text == '^') | 166 | if (*text == '^') { |
162 | { | ||
163 | text++; | 167 | text++; |
164 | complement = 1; | 168 | complement = 1; |
165 | } | 169 | } |
166 | 170 | ||
167 | /* add in the characters */ | 171 | /* add in the characters */ |
168 | while (*text && *text != ']') | 172 | while (*text && *text != ']') { |
169 | { | ||
170 | /* is this a span of characters? */ | 173 | /* is this a span of characters? */ |
171 | if (text[1] == '-' && text[2]) | 174 | if (text[1] == '-' && text[2]) { |
172 | { | ||
173 | /* spans can't be backwards */ | 175 | /* spans can't be backwards */ |
174 | if (text[0] > text[2]) | 176 | if (text[0] > text[2]) { |
175 | { | ||
176 | FAIL("Backwards span in []"); | 177 | FAIL("Backwards span in []"); |
177 | } | 178 | } |
178 | 179 | ||
179 | /* add each character in the span to the bitmap */ | 180 | /* add each character in the span to the bitmap */ |
180 | for (i = text[0]; bmap && i <= text[2]; i++) | 181 | for (i = text[0]; bmap && i <= text[2]; i++) { |
181 | { | ||
182 | bmap[i >> 3] |= (1 << (i & 7)); | 182 | bmap[i >> 3] |= (1 << (i & 7)); |
183 | } | 183 | } |
184 | 184 | ||
185 | /* move past this span */ | 185 | /* move past this span */ |
186 | text += 3; | 186 | text += 3; |
187 | } | 187 | } else { |
188 | else | ||
189 | { | ||
190 | /* add this single character to the span */ | 188 | /* add this single character to the span */ |
191 | i = *text++; | 189 | i = *text++; |
192 | if (bmap) | 190 | if (bmap) { |
193 | { | ||
194 | bmap[i >> 3] |= (1 << (i & 7)); | 191 | bmap[i >> 3] |= (1 << (i & 7)); |
195 | } | 192 | } |
196 | } | 193 | } |
197 | } | 194 | } |
198 | 195 | ||
199 | /* make sure the closing ] is missing */ | 196 | /* make sure the closing ] is missing */ |
200 | if (*text++ != ']') | 197 | if (*text++ != ']') { |
201 | { | ||
202 | FAIL("] missing"); | 198 | FAIL("] missing"); |
203 | } | 199 | } |
204 | 200 | ||
205 | /* if we're supposed to complement this class, then do so */ | 201 | /* if we're supposed to complement this class, then do so */ |
206 | if (complement && bmap) | 202 | if (complement && bmap) { |
207 | { | 203 | for (i = 0; i < 32; i++) { |
208 | for (i = 0; i < 32; i++) | ||
209 | { | ||
210 | bmap[i] = ~bmap[i]; | 204 | bmap[i] = ~bmap[i]; |
211 | } | 205 | } |
212 | } | 206 | } |
@@ -223,105 +217,92 @@ static char *makeclass(char* text, char* bmap) | |||
223 | * character-class text is skipped. | 217 | * character-class text is skipped. |
224 | */ | 218 | */ |
225 | static int gettoken(sptr, re) | 219 | static int gettoken(sptr, re) |
226 | char **sptr; | 220 | char **sptr; |
227 | regexp *re; | 221 | regexp *re; |
228 | { | 222 | { |
229 | int c; | 223 | int c; |
230 | 224 | ||
231 | c = **sptr; | 225 | c = **sptr; |
232 | ++*sptr; | 226 | ++*sptr; |
233 | if (c == '\\') | 227 | if (c == '\\') { |
234 | { | ||
235 | c = **sptr; | 228 | c = **sptr; |
236 | ++*sptr; | 229 | ++*sptr; |
237 | switch (c) | 230 | switch (c) { |
238 | { | 231 | case '<': |
239 | case '<': | ||
240 | return M_BEGWORD; | 232 | return M_BEGWORD; |
241 | 233 | ||
242 | case '>': | 234 | case '>': |
243 | return M_ENDWORD; | 235 | return M_ENDWORD; |
244 | 236 | ||
245 | case '(': | 237 | case '(': |
246 | if (start_cnt >= NSUBEXP) | 238 | if (start_cnt >= NSUBEXP) { |
247 | { | ||
248 | FAIL("Too many \\(s"); | 239 | FAIL("Too many \\(s"); |
249 | } | 240 | } |
250 | end_stk[end_sp++] = start_cnt; | 241 | end_stk[end_sp++] = start_cnt; |
251 | return M_START(start_cnt++); | 242 | return M_START(start_cnt++); |
252 | 243 | ||
253 | case ')': | 244 | case ')': |
254 | if (end_sp <= 0) | 245 | if (end_sp <= 0) { |
255 | { | ||
256 | FAIL("Mismatched \\)"); | 246 | FAIL("Mismatched \\)"); |
257 | } | 247 | } |
258 | return M_END(end_stk[--end_sp]); | 248 | return M_END(end_stk[--end_sp]); |
259 | 249 | ||
260 | case '*': | 250 | case '*': |
261 | return M_SPLAT; | 251 | return M_SPLAT; |
262 | 252 | ||
263 | case '.': | 253 | case '.': |
264 | return M_ANY; | 254 | return M_ANY; |
265 | 255 | ||
266 | case '+': | 256 | case '+': |
267 | return M_PLUS; | 257 | return M_PLUS; |
268 | 258 | ||
269 | case '?': | 259 | case '?': |
270 | return M_QMARK; | 260 | return M_QMARK; |
271 | 261 | ||
272 | default: | 262 | default: |
273 | return c; | 263 | return c; |
274 | } | 264 | } |
275 | } | 265 | } else { |
276 | else { | 266 | switch (c) { |
277 | switch (c) | 267 | case '^': |
278 | { | 268 | if (*sptr == retext + 1) { |
279 | case '^': | ||
280 | if (*sptr == retext + 1) | ||
281 | { | ||
282 | return M_BEGLINE; | 269 | return M_BEGLINE; |
283 | } | 270 | } |
284 | return c; | 271 | return c; |
285 | 272 | ||
286 | case '$': | 273 | case '$': |
287 | if (!**sptr) | 274 | if (!**sptr) { |
288 | { | ||
289 | return M_ENDLINE; | 275 | return M_ENDLINE; |
290 | } | 276 | } |
291 | return c; | 277 | return c; |
292 | 278 | ||
293 | case '.': | 279 | case '.': |
294 | return M_ANY; | 280 | return M_ANY; |
295 | 281 | ||
296 | case '*': | 282 | case '*': |
297 | return M_SPLAT; | 283 | return M_SPLAT; |
298 | 284 | ||
299 | case '[': | 285 | case '[': |
300 | /* make sure we don't have too many classes */ | 286 | /* make sure we don't have too many classes */ |
301 | if (class_cnt >= 10) | 287 | if (class_cnt >= 10) { |
302 | { | ||
303 | FAIL("Too many []s"); | 288 | FAIL("Too many []s"); |
304 | } | 289 | } |
305 | 290 | ||
306 | /* process the character list for this class */ | 291 | /* process the character list for this class */ |
307 | if (re) | 292 | if (re) { |
308 | { | ||
309 | /* generate the bitmap for this class */ | 293 | /* generate the bitmap for this class */ |
310 | *sptr = makeclass(*sptr, re->program + 1 + 32 * class_cnt); | 294 | *sptr = makeclass(*sptr, re->program + 1 + 32 * class_cnt); |
311 | } | 295 | } else { |
312 | else | ||
313 | { | ||
314 | /* skip to end of the class */ | 296 | /* skip to end of the class */ |
315 | *sptr = makeclass(*sptr, (char *)0); | 297 | *sptr = makeclass(*sptr, (char *) 0); |
316 | } | 298 | } |
317 | return M_CLASS(class_cnt++); | 299 | return M_CLASS(class_cnt++); |
318 | 300 | ||
319 | default: | 301 | default: |
320 | return c; | 302 | return c; |
321 | } | 303 | } |
322 | } | 304 | } |
323 | /*NOTREACHED*/ | 305 | /*NOTREACHED*/} |
324 | } | ||
325 | 306 | ||
326 | 307 | ||
327 | 308 | ||
@@ -331,28 +312,22 @@ static int gettoken(sptr, re) | |||
331 | * about catching syntax errors; that is done in a later pass. | 312 | * about catching syntax errors; that is done in a later pass. |
332 | */ | 313 | */ |
333 | static unsigned calcsize(text) | 314 | static unsigned calcsize(text) |
334 | char *text; | 315 | char *text; |
335 | { | 316 | { |
336 | unsigned size; | 317 | unsigned size; |
337 | int token; | 318 | int token; |
338 | 319 | ||
339 | retext = text; | 320 | retext = text; |
340 | class_cnt = 0; | 321 | class_cnt = 0; |
341 | start_cnt = 1; | 322 | start_cnt = 1; |
342 | end_sp = 0; | 323 | end_sp = 0; |
343 | size = 5; | 324 | size = 5; |
344 | while ((token = gettoken(&text, (regexp *)0)) != 0) | 325 | while ((token = gettoken(&text, (regexp *) 0)) != 0) { |
345 | { | 326 | if (IS_CLASS(token)) { |
346 | if (IS_CLASS(token)) | ||
347 | { | ||
348 | size += 34; | 327 | size += 34; |
349 | } | 328 | } else if (IS_META(token)) { |
350 | else if (IS_META(token)) | ||
351 | { | ||
352 | size += 2; | 329 | size += 2; |
353 | } | 330 | } else { |
354 | else | ||
355 | { | ||
356 | size++; | 331 | size++; |
357 | } | 332 | } |
358 | } | 333 | } |
@@ -369,26 +344,23 @@ static unsigned calcsize(text) | |||
369 | * known to represent a single character. It returns 0 if they match, or | 344 | * known to represent a single character. It returns 0 if they match, or |
370 | * 1 if they don't. | 345 | * 1 if they don't. |
371 | */ | 346 | */ |
372 | static int match1(regexp* re, char ch, int token, int ignoreCase) | 347 | static int match1(regexp * re, char ch, int token, int ignoreCase) |
373 | { | 348 | { |
374 | if (!ch) | 349 | if (!ch) { |
375 | { | ||
376 | /* the end of a line can't match any RE of width 1 */ | 350 | /* the end of a line can't match any RE of width 1 */ |
377 | return 1; | 351 | return 1; |
378 | } | 352 | } |
379 | if (token == M_ANY) | 353 | if (token == M_ANY) { |
380 | { | ||
381 | return 0; | 354 | return 0; |
382 | } | 355 | } else if (IS_CLASS(token)) { |
383 | else if (IS_CLASS(token)) | 356 | if (re-> |
384 | { | 357 | program[1 + 32 * (token - M_CLASS(0)) + |
385 | if (re->program[1 + 32 * (token - M_CLASS(0)) + (ch >> 3)] & (1 << (ch & 7))) | 358 | (ch >> 3)] & (1 << (ch & 7))) |
386 | return 0; | 359 | return 0; |
387 | } | 360 | } |
388 | //fprintf(stderr, "match1: ch='%c' token='%c': ", ch, token); | 361 | //fprintf(stderr, "match1: ch='%c' token='%c': ", ch, token); |
389 | if (ch == token | 362 | if (ch == token |
390 | || (ignoreCase==TRUE && tolower(ch) == tolower(token))) | 363 | || (ignoreCase == TRUE && tolower(ch) == tolower(token))) { |
391 | { | ||
392 | //fprintf(stderr, "match\n"); | 364 | //fprintf(stderr, "match\n"); |
393 | return 0; | 365 | return 0; |
394 | } | 366 | } |
@@ -406,65 +378,63 @@ static int match1(regexp* re, char ch, int token, int ignoreCase) | |||
406 | /* str -- the string */ | 378 | /* str -- the string */ |
407 | /* prog -- a portion of re->program, an compiled RE */ | 379 | /* prog -- a portion of re->program, an compiled RE */ |
408 | /* here -- a portion of str, the string to compare it to */ | 380 | /* here -- a portion of str, the string to compare it to */ |
409 | static int match(regexp* re, char* str, char* prog, char* here, int ignoreCase) | 381 | static int match(regexp * re, char *str, char *prog, char *here, |
382 | int ignoreCase) | ||
410 | { | 383 | { |
411 | int token; | 384 | int token; |
412 | int nmatched; | 385 | int nmatched; |
413 | int closure; | 386 | int closure; |
414 | 387 | ||
415 | for (token = GET_META(prog); !IS_CLOSURE(token); prog++, token = GET_META(prog)) | 388 | for (token = GET_META(prog); !IS_CLOSURE(token); |
416 | { | 389 | prog++, token = GET_META(prog)) { |
417 | switch (token) | 390 | switch (token) { |
418 | { | 391 | /*case M_BEGLINE: can't happen; re->bol is used instead */ |
419 | /*case M_BEGLINE: can't happen; re->bol is used instead */ | 392 | case M_ENDLINE: |
420 | case M_ENDLINE: | ||
421 | if (*here) | 393 | if (*here) |
422 | return 1; | 394 | return 1; |
423 | break; | 395 | break; |
424 | 396 | ||
425 | case M_BEGWORD: | 397 | case M_BEGWORD: |
426 | if (here != str && | 398 | if (here != str && |
427 | (here[-1] == '_' || | 399 | (here[-1] == '_' || |
428 | (isascii(here[-1]) && isalnum(here[-1])))) | 400 | (isascii(here[-1]) && isalnum(here[-1])))) return 1; |
429 | return 1; | ||
430 | break; | 401 | break; |
431 | 402 | ||
432 | case M_ENDWORD: | 403 | case M_ENDWORD: |
433 | if ((here[0] == '_' || isascii(here[0])) && isalnum(here[0])) | 404 | if ((here[0] == '_' || isascii(here[0])) && isalnum(here[0])) |
434 | return 1; | 405 | return 1; |
435 | break; | 406 | break; |
436 | 407 | ||
437 | case M_START(0): | 408 | case M_START(0): |
438 | case M_START(1): | 409 | case M_START(1): |
439 | case M_START(2): | 410 | case M_START(2): |
440 | case M_START(3): | 411 | case M_START(3): |
441 | case M_START(4): | 412 | case M_START(4): |
442 | case M_START(5): | 413 | case M_START(5): |
443 | case M_START(6): | 414 | case M_START(6): |
444 | case M_START(7): | 415 | case M_START(7): |
445 | case M_START(8): | 416 | case M_START(8): |
446 | case M_START(9): | 417 | case M_START(9): |
447 | re->startp[token - M_START(0)] = (char *)here; | 418 | re->startp[token - M_START(0)] = (char *) here; |
448 | break; | 419 | break; |
449 | 420 | ||
450 | case M_END(0): | 421 | case M_END(0): |
451 | case M_END(1): | 422 | case M_END(1): |
452 | case M_END(2): | 423 | case M_END(2): |
453 | case M_END(3): | 424 | case M_END(3): |
454 | case M_END(4): | 425 | case M_END(4): |
455 | case M_END(5): | 426 | case M_END(5): |
456 | case M_END(6): | 427 | case M_END(6): |
457 | case M_END(7): | 428 | case M_END(7): |
458 | case M_END(8): | 429 | case M_END(8): |
459 | case M_END(9): | 430 | case M_END(9): |
460 | re->endp[token - M_END(0)] = (char *)here; | 431 | re->endp[token - M_END(0)] = (char *) here; |
461 | if (token == M_END(0)) | 432 | if (token == M_END(0)) { |
462 | { | ||
463 | return 0; | 433 | return 0; |
464 | } | 434 | } |
465 | break; | 435 | break; |
466 | 436 | ||
467 | default: /* literal, M_CLASS(n), or M_ANY */ | 437 | default: /* literal, M_CLASS(n), or M_ANY */ |
468 | if (match1(re, *here, token, ignoreCase) != 0) | 438 | if (match1(re, *here, token, ignoreCase) != 0) |
469 | return 1; | 439 | return 1; |
470 | here++; | 440 | here++; |
@@ -482,14 +452,12 @@ static int match(regexp* re, char* str, char* prog, char* here, int ignoreCase) | |||
482 | 452 | ||
483 | /* step 2: see how many times we can match that token against the string */ | 453 | /* step 2: see how many times we can match that token against the string */ |
484 | for (nmatched = 0; | 454 | for (nmatched = 0; |
485 | (closure != M_QMARK || nmatched < 1) && *here && match1(re, *here, token, ignoreCase) == 0; | 455 | (closure != M_QMARK || nmatched < 1) && *here |
486 | nmatched++, here++) | 456 | && match1(re, *here, token, ignoreCase) == 0; nmatched++, here++) { |
487 | { | ||
488 | } | 457 | } |
489 | 458 | ||
490 | /* step 3: try to match the remainder, and back off if it doesn't */ | 459 | /* step 3: try to match the remainder, and back off if it doesn't */ |
491 | while (nmatched >= 0 && match(re, str, prog, here, ignoreCase) != 0) | 460 | while (nmatched >= 0 && match(re, str, prog, here, ignoreCase) != 0) { |
492 | { | ||
493 | nmatched--; | 461 | nmatched--; |
494 | here--; | 462 | here--; |
495 | } | 463 | } |
@@ -502,41 +470,36 @@ static int match(regexp* re, char* str, char* prog, char* here, int ignoreCase) | |||
502 | 470 | ||
503 | 471 | ||
504 | /* This function compiles a regexp. */ | 472 | /* This function compiles a regexp. */ |
505 | extern regexp *regcomp(char* text) | 473 | extern regexp *regcomp(char *text) |
506 | { | 474 | { |
507 | int needfirst; | 475 | int needfirst; |
508 | unsigned size; | 476 | unsigned size; |
509 | int token; | 477 | int token; |
510 | int peek; | 478 | int peek; |
511 | char *build; | 479 | char *build; |
512 | regexp *re; // Ignore compiler whining. If we longjmp, we don't use re anymore. | 480 | regexp *re; // Ignore compiler whining. If we longjmp, we don't use re anymore. |
513 | 481 | ||
514 | 482 | ||
515 | /* prepare for error handling */ | 483 | /* prepare for error handling */ |
516 | re = (regexp *)0; | 484 | re = (regexp *) 0; |
517 | if (setjmp(errorhandler)) | 485 | if (setjmp(errorhandler)) { |
518 | { | 486 | if (re) { |
519 | if (re) | ||
520 | { | ||
521 | free(re); | 487 | free(re); |
522 | } | 488 | } |
523 | return (regexp *)0; | 489 | return (regexp *) 0; |
524 | } | 490 | } |
525 | 491 | ||
526 | /* if an empty regexp string was given, use the previous one */ | 492 | /* if an empty regexp string was given, use the previous one */ |
527 | if (*text == 0) | 493 | if (*text == 0) { |
528 | { | 494 | if (!previous) { |
529 | if (!previous) | ||
530 | { | ||
531 | FAIL("No previous RE"); | 495 | FAIL("No previous RE"); |
532 | } | 496 | } |
533 | text = previous; | 497 | text = previous; |
534 | } | 498 | } else { /* non-empty regexp given, so remember it */ |
535 | else /* non-empty regexp given, so remember it */ | 499 | |
536 | { | ||
537 | if (previous) | 500 | if (previous) |
538 | free(previous); | 501 | free(previous); |
539 | previous = (char *)malloc((unsigned)(strlen(text) + 1)); | 502 | previous = (char *) malloc((unsigned) (strlen(text) + 1)); |
540 | if (previous) | 503 | if (previous) |
541 | strcpy(previous, text); | 504 | strcpy(previous, text); |
542 | } | 505 | } |
@@ -547,19 +510,17 @@ extern regexp *regcomp(char* text) | |||
547 | end_sp = 0; | 510 | end_sp = 0; |
548 | retext = text; | 511 | retext = text; |
549 | size = calcsize(text) + sizeof(regexp); | 512 | size = calcsize(text) + sizeof(regexp); |
550 | re = (regexp *)malloc((unsigned)size); | 513 | re = (regexp *) malloc((unsigned) size); |
551 | 514 | ||
552 | if (!re) | 515 | if (!re) { |
553 | { | ||
554 | FAIL("Not enough memory for this RE"); | 516 | FAIL("Not enough memory for this RE"); |
555 | } | 517 | } |
556 | 518 | ||
557 | /* compile it */ | 519 | /* compile it */ |
558 | build = &re->program[1 + 32 * class_cnt]; | 520 | build = &re->program[1 + 32 * class_cnt]; |
559 | re->program[0] = class_cnt; | 521 | re->program[0] = class_cnt; |
560 | for (token = 0; token < NSUBEXP; token++) | 522 | for (token = 0; token < NSUBEXP; token++) { |
561 | { | 523 | re->startp[token] = re->endp[token] = (char *) 0; |
562 | re->startp[token] = re->endp[token] = (char *)0; | ||
563 | } | 524 | } |
564 | re->first = 0; | 525 | re->first = 0; |
565 | re->bol = 0; | 526 | re->bol = 0; |
@@ -570,76 +531,60 @@ extern regexp *regcomp(char* text) | |||
570 | end_sp = 0; | 531 | end_sp = 0; |
571 | retext = text; | 532 | retext = text; |
572 | for (token = M_START(0), peek = gettoken(&text, re); | 533 | for (token = M_START(0), peek = gettoken(&text, re); |
573 | token; | 534 | token; token = peek, peek = gettoken(&text, re)) { |
574 | token = peek, peek = gettoken(&text, re)) | ||
575 | { | ||
576 | /* special processing for the closure operator */ | 535 | /* special processing for the closure operator */ |
577 | if (IS_CLOSURE(peek)) | 536 | if (IS_CLOSURE(peek)) { |
578 | { | ||
579 | /* detect misuse of closure operator */ | 537 | /* detect misuse of closure operator */ |
580 | if (IS_START(token)) | 538 | if (IS_START(token)) { |
581 | { | ||
582 | FAIL("* or \\+ or \\? follows nothing"); | 539 | FAIL("* or \\+ or \\? follows nothing"); |
583 | } | 540 | } |
584 | else if (IS_META(token) && token != M_ANY && !IS_CLASS(token)) | 541 | else if (IS_META(token) && token != M_ANY |
585 | { | 542 | && !IS_CLASS(token)) { |
586 | FAIL("* or \\+ or \\? can only follow a normal character or . or []"); | 543 | FAIL |
544 | ("* or \\+ or \\? can only follow a normal character or . or []"); | ||
587 | } | 545 | } |
588 | 546 | ||
589 | /* it is okay -- make it prefix instead of postfix */ | 547 | /* it is okay -- make it prefix instead of postfix */ |
590 | ADD_META(build, peek); | 548 | ADD_META(build, peek); |
591 | 549 | ||
592 | /* take care of "needfirst" - is this the first char? */ | 550 | /* take care of "needfirst" - is this the first char? */ |
593 | if (needfirst && peek == M_PLUS && !IS_META(token)) | 551 | if (needfirst && peek == M_PLUS && !IS_META(token)) { |
594 | { | ||
595 | re->first = token; | 552 | re->first = token; |
596 | } | 553 | } |
597 | needfirst = 0; | 554 | needfirst = 0; |
598 | 555 | ||
599 | /* we used "peek" -- need to refill it */ | 556 | /* we used "peek" -- need to refill it */ |
600 | peek = gettoken(&text, re); | 557 | peek = gettoken(&text, re); |
601 | if (IS_CLOSURE(peek)) | 558 | if (IS_CLOSURE(peek)) { |
602 | { | ||
603 | FAIL("* or \\+ or \\? doubled up"); | 559 | FAIL("* or \\+ or \\? doubled up"); |
604 | } | 560 | } |
605 | } | 561 | } else if (!IS_META(token)) { |
606 | else if (!IS_META(token)) | ||
607 | { | ||
608 | /* normal char is NOT argument of closure */ | 562 | /* normal char is NOT argument of closure */ |
609 | if (needfirst) | 563 | if (needfirst) { |
610 | { | ||
611 | re->first = token; | 564 | re->first = token; |
612 | needfirst = 0; | 565 | needfirst = 0; |
613 | } | 566 | } |
614 | re->minlen++; | 567 | re->minlen++; |
615 | } | 568 | } else if (token == M_ANY || IS_CLASS(token)) { |
616 | else if (token == M_ANY || IS_CLASS(token)) | ||
617 | { | ||
618 | /* . or [] is NOT argument of closure */ | 569 | /* . or [] is NOT argument of closure */ |
619 | needfirst = 0; | 570 | needfirst = 0; |
620 | re->minlen++; | 571 | re->minlen++; |
621 | } | 572 | } |
622 | 573 | ||
623 | /* the "token" character is not closure -- process it normally */ | 574 | /* the "token" character is not closure -- process it normally */ |
624 | if (token == M_BEGLINE) | 575 | if (token == M_BEGLINE) { |
625 | { | ||
626 | /* set the BOL flag instead of storing M_BEGLINE */ | 576 | /* set the BOL flag instead of storing M_BEGLINE */ |
627 | re->bol = 1; | 577 | re->bol = 1; |
628 | } | 578 | } else if (IS_META(token)) { |
629 | else if (IS_META(token)) | ||
630 | { | ||
631 | ADD_META(build, token); | 579 | ADD_META(build, token); |
632 | } | 580 | } else { |
633 | else | ||
634 | { | ||
635 | *build++ = token; | 581 | *build++ = token; |
636 | } | 582 | } |
637 | } | 583 | } |
638 | 584 | ||
639 | /* end it with a \) which MUST MATCH the opening \( */ | 585 | /* end it with a \) which MUST MATCH the opening \( */ |
640 | ADD_META(build, M_END(0)); | 586 | ADD_META(build, M_END(0)); |
641 | if (end_sp > 0) | 587 | if (end_sp > 0) { |
642 | { | ||
643 | FAIL("Not enough \\)s"); | 588 | FAIL("Not enough \\)s"); |
644 | } | 589 | } |
645 | 590 | ||
@@ -654,15 +599,14 @@ extern regexp *regcomp(char* text) | |||
654 | /* str -- the string to search through */ | 599 | /* str -- the string to search through */ |
655 | /* bol -- does str start at the beginning of a line? (boolean) */ | 600 | /* bol -- does str start at the beginning of a line? (boolean) */ |
656 | /* ignoreCase -- ignoreCase or not */ | 601 | /* ignoreCase -- ignoreCase or not */ |
657 | extern int regexec(struct regexp* re, char* str, int bol, int ignoreCase) | 602 | extern int regexec(struct regexp *re, char *str, int bol, int ignoreCase) |
658 | { | 603 | { |
659 | char *prog; /* the entry point of re->program */ | 604 | char *prog; /* the entry point of re->program */ |
660 | int len; /* length of the string */ | 605 | int len; /* length of the string */ |
661 | char *here; | 606 | char *here; |
662 | 607 | ||
663 | /* if must start at the beginning of a line, and this isn't, then fail */ | 608 | /* if must start at the beginning of a line, and this isn't, then fail */ |
664 | if (re->bol && bol==TRUE) | 609 | if (re->bol && bol == TRUE) { |
665 | { | ||
666 | return FALSE; | 610 | return FALSE; |
667 | } | 611 | } |
668 | 612 | ||
@@ -670,35 +614,26 @@ extern int regexec(struct regexp* re, char* str, int bol, int ignoreCase) | |||
670 | prog = re->program + 1 + 32 * re->program[0]; | 614 | prog = re->program + 1 + 32 * re->program[0]; |
671 | 615 | ||
672 | /* search for the RE in the string */ | 616 | /* search for the RE in the string */ |
673 | if (re->bol) | 617 | if (re->bol) { |
674 | { | ||
675 | /* must occur at BOL */ | 618 | /* must occur at BOL */ |
676 | if ((re->first | 619 | if ((re->first && match1(re, *(char *) str, re->first, ignoreCase)) /* wrong first letter? */ |
677 | && match1(re, *(char *)str, re->first, ignoreCase))/* wrong first letter? */ | 620 | ||len < re->minlen /* not long enough? */ |
678 | || len < re->minlen /* not long enough? */ | 621 | || match(re, (char *) str, prog, str, ignoreCase)) /* doesn't match? */ |
679 | || match(re, (char *)str, prog, str, ignoreCase)) /* doesn't match? */ | 622 | return FALSE; /* THEN FAIL! */ |
680 | return FALSE; /* THEN FAIL! */ | 623 | } else if (ignoreCase == FALSE) { |
681 | } | ||
682 | else if (ignoreCase == FALSE) | ||
683 | { | ||
684 | /* can occur anywhere in the line, noignorecase */ | 624 | /* can occur anywhere in the line, noignorecase */ |
685 | for (here = (char *)str; | 625 | for (here = (char *) str; (re->first && re->first != *here) |
686 | (re->first && re->first != *here) | 626 | || match(re, (char *) str, prog, here, ignoreCase); |
687 | || match(re, (char *)str, prog, here, ignoreCase); | 627 | here++, len--) { |
688 | here++, len--) | ||
689 | { | ||
690 | if (len < re->minlen) | 628 | if (len < re->minlen) |
691 | return FALSE; | 629 | return FALSE; |
692 | } | 630 | } |
693 | } | 631 | } else { |
694 | else | ||
695 | { | ||
696 | /* can occur anywhere in the line, ignorecase */ | 632 | /* can occur anywhere in the line, ignorecase */ |
697 | for (here = (char *)str; | 633 | for (here = (char *) str; |
698 | (re->first && match1(re, *here, (int)re->first, ignoreCase)) | 634 | (re->first && match1(re, *here, (int) re->first, ignoreCase)) |
699 | || match(re, (char *)str, prog, here, ignoreCase); | 635 | || match(re, (char *) str, prog, here, ignoreCase); |
700 | here++, len--) | 636 | here++, len--) { |
701 | { | ||
702 | if (len < re->minlen) | 637 | if (len < re->minlen) |
703 | return FALSE; | 638 | return FALSE; |
704 | } | 639 | } |
@@ -713,82 +648,72 @@ extern int regexec(struct regexp* re, char* str, int bol, int ignoreCase) | |||
713 | 648 | ||
714 | #if defined BB_SED | 649 | #if defined BB_SED |
715 | /* This performs substitutions after a regexp match has been found. */ | 650 | /* This performs substitutions after a regexp match has been found. */ |
716 | extern void regsub(regexp* re, char* src, char* dst) | 651 | extern void regsub(regexp * re, char *src, char *dst) |
717 | { | 652 | { |
718 | char *cpy; | 653 | char *cpy; |
719 | char *end; | 654 | char *end; |
720 | char c; | 655 | char c; |
721 | char *start; | 656 | char *start; |
722 | int mod; | 657 | int mod; |
723 | 658 | ||
724 | mod = 0; | 659 | mod = 0; |
725 | 660 | ||
726 | start = src; | 661 | start = src; |
727 | while ((c = *src++) != '\0') | 662 | while ((c = *src++) != '\0') { |
728 | { | ||
729 | /* recognize any meta characters */ | 663 | /* recognize any meta characters */ |
730 | if (c == '&') | 664 | if (c == '&') { |
731 | { | ||
732 | cpy = re->startp[0]; | 665 | cpy = re->startp[0]; |
733 | end = re->endp[0]; | 666 | end = re->endp[0]; |
734 | } | 667 | } else if (c == '~') { |
735 | else if (c == '~') | ||
736 | { | ||
737 | cpy = previous1; | 668 | cpy = previous1; |
738 | if (cpy) | 669 | if (cpy) |
739 | end = cpy + strlen(cpy); | 670 | end = cpy + strlen(cpy); |
740 | } | 671 | } else if (c == '\\') { |
741 | else | ||
742 | if (c == '\\') | ||
743 | { | ||
744 | c = *src++; | 672 | c = *src++; |
745 | switch (c) | 673 | switch (c) { |
746 | { | 674 | case '0': |
747 | case '0': | 675 | case '1': |
748 | case '1': | 676 | case '2': |
749 | case '2': | 677 | case '3': |
750 | case '3': | 678 | case '4': |
751 | case '4': | 679 | case '5': |
752 | case '5': | 680 | case '6': |
753 | case '6': | 681 | case '7': |
754 | case '7': | 682 | case '8': |
755 | case '8': | 683 | case '9': |
756 | case '9': | ||
757 | /* \0 thru \9 mean "copy subexpression" */ | 684 | /* \0 thru \9 mean "copy subexpression" */ |
758 | c -= '0'; | 685 | c -= '0'; |
759 | cpy = re->startp[(int)c]; | 686 | cpy = re->startp[(int) c]; |
760 | end = re->endp[(int)c]; | 687 | end = re->endp[(int) c]; |
761 | break; | 688 | break; |
762 | case 'U': | 689 | case 'U': |
763 | case 'u': | 690 | case 'u': |
764 | case 'L': | 691 | case 'L': |
765 | case 'l': | 692 | case 'l': |
766 | /* \U and \L mean "convert to upper/lowercase" */ | 693 | /* \U and \L mean "convert to upper/lowercase" */ |
767 | mod = c; | 694 | mod = c; |
768 | continue; | 695 | continue; |
769 | 696 | ||
770 | case 'E': | 697 | case 'E': |
771 | case 'e': | 698 | case 'e': |
772 | /* \E ends the \U or \L */ | 699 | /* \E ends the \U or \L */ |
773 | mod = 0; | 700 | mod = 0; |
774 | continue; | 701 | continue; |
775 | case '&': | 702 | case '&': |
776 | /* "\&" means "original text" */ | 703 | /* "\&" means "original text" */ |
777 | *dst++ = c; | 704 | *dst++ = c; |
778 | continue; | 705 | continue; |
779 | 706 | ||
780 | case '~': | 707 | case '~': |
781 | /* "\~" means "previous text, if any" */ | 708 | /* "\~" means "previous text, if any" */ |
782 | *dst++ = c; | 709 | *dst++ = c; |
783 | continue; | 710 | continue; |
784 | default: | 711 | default: |
785 | /* ordinary char preceded by backslash */ | 712 | /* ordinary char preceded by backslash */ |
786 | *dst++ = c; | 713 | *dst++ = c; |
787 | continue; | 714 | continue; |
788 | } | 715 | } |
789 | } | 716 | } else { |
790 | else | ||
791 | { | ||
792 | /* ordinary character, so just copy it */ | 717 | /* ordinary character, so just copy it */ |
793 | *dst++ = c; | 718 | *dst++ = c; |
794 | continue; | 719 | continue; |
@@ -804,46 +729,37 @@ extern void regsub(regexp* re, char* src, char* dst) | |||
804 | continue; | 729 | continue; |
805 | 730 | ||
806 | /* copy over a portion of the original */ | 731 | /* copy over a portion of the original */ |
807 | while (cpy < end) | 732 | while (cpy < end) { |
808 | { | 733 | switch (mod) { |
809 | switch (mod) | 734 | case 'U': |
810 | { | 735 | case 'u': |
811 | case 'U': | ||
812 | case 'u': | ||
813 | /* convert to uppercase */ | 736 | /* convert to uppercase */ |
814 | if (isascii(*cpy) && islower(*cpy)) | 737 | if (isascii(*cpy) && islower(*cpy)) { |
815 | { | ||
816 | *dst++ = toupper(*cpy); | 738 | *dst++ = toupper(*cpy); |
817 | cpy++; | 739 | cpy++; |
818 | } | 740 | } else { |
819 | else | ||
820 | { | ||
821 | *dst++ = *cpy++; | 741 | *dst++ = *cpy++; |
822 | } | 742 | } |
823 | break; | 743 | break; |
824 | 744 | ||
825 | case 'L': | 745 | case 'L': |
826 | case 'l': | 746 | case 'l': |
827 | /* convert to lowercase */ | 747 | /* convert to lowercase */ |
828 | if (isascii(*cpy) && isupper(*cpy)) | 748 | if (isascii(*cpy) && isupper(*cpy)) { |
829 | { | ||
830 | *dst++ = tolower(*cpy); | 749 | *dst++ = tolower(*cpy); |
831 | cpy++; | 750 | cpy++; |
832 | } | 751 | } else { |
833 | else | ||
834 | { | ||
835 | *dst++ = *cpy++; | 752 | *dst++ = *cpy++; |
836 | } | 753 | } |
837 | break; | 754 | break; |
838 | 755 | ||
839 | default: | 756 | default: |
840 | /* copy without any conversion */ | 757 | /* copy without any conversion */ |
841 | *dst++ = *cpy++; | 758 | *dst++ = *cpy++; |
842 | } | 759 | } |
843 | 760 | ||
844 | /* \u and \l end automatically after the first char */ | 761 | /* \u and \l end automatically after the first char */ |
845 | if (mod && (mod == 'u' || mod == 'l')) | 762 | if (mod && (mod == 'u' || mod == 'l')) { |
846 | { | ||
847 | mod = 0; | 763 | mod = 0; |
848 | } | 764 | } |
849 | } | 765 | } |
@@ -853,12 +769,10 @@ extern void regsub(regexp* re, char* src, char* dst) | |||
853 | /* remember what text we inserted this time */ | 769 | /* remember what text we inserted this time */ |
854 | if (previous1) | 770 | if (previous1) |
855 | free(previous1); | 771 | free(previous1); |
856 | previous1 = (char *)malloc((unsigned)(strlen(start) + 1)); | 772 | previous1 = (char *) malloc((unsigned) (strlen(start) + 1)); |
857 | if (previous1) | 773 | if (previous1) |
858 | strcpy(previous1, start); | 774 | strcpy(previous1, start); |
859 | } | 775 | } |
860 | #endif | 776 | #endif |
861 | 777 | ||
862 | #endif /* BB_REGEXP */ | 778 | #endif /* BB_REGEXP */ |
863 | |||
864 | |||