aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>2010-04-14 02:04:07 +0200
committerNguyễn Thái Ngọc Duy <pclouds@gmail.com>2010-04-20 19:14:11 +0200
commita1b05ef61f4d1c4ba64f758292cece2fdf2828bf (patch)
tree5b9749ac8264948cda6b7c5a7d365f1cc98a20fe
parent27dadd087befac1f51eac50f886185a62c7d5860 (diff)
downloadbusybox-w32-a1b05ef61f4d1c4ba64f758292cece2fdf2828bf.tar.gz
busybox-w32-a1b05ef61f4d1c4ba64f758292cece2fdf2828bf.tar.bz2
busybox-w32-a1b05ef61f4d1c4ba64f758292cece2fdf2828bf.zip
win32: Import regex source
These were extracted from commit e56b799d6ad8afba4168fffa7218d44c041a72d2 in Git repository. Changes from the original version: > diff --git a/tmp/regex.c b/win32/regex.c > index 87b33e4..2cca169 100644 > --- a/tmp/regex.c > +++ b/win32/regex.c > @@ -24,7 +24,9 @@ > #pragma alloca > #endif > > +#ifndef _GNU_SOURCE > #define _GNU_SOURCE > +#endif > > /* We need this for `regex.h', and perhaps for the Emacs include files. */ > #include <sys/types.h> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
-rw-r--r--win32/regex.c4929
-rw-r--r--win32/regex.h490
2 files changed, 5419 insertions, 0 deletions
diff --git a/win32/regex.c b/win32/regex.c
new file mode 100644
index 000000000..2cca16934
--- /dev/null
+++ b/win32/regex.c
@@ -0,0 +1,4929 @@
1/* Extended regular expression matching and search library,
2 version 0.12.
3 (Implements POSIX draft P10003.2/D11.2, except for
4 internationalization features.)
5
6 Copyright (C) 1993 Free Software Foundation, Inc.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
21
22/* AIX requires this to be the first thing in the file. */
23#if defined (_AIX) && !defined (REGEX_MALLOC)
24 #pragma alloca
25#endif
26
27#ifndef _GNU_SOURCE
28#define _GNU_SOURCE
29#endif
30
31/* We need this for `regex.h', and perhaps for the Emacs include files. */
32#include <sys/types.h>
33
34/* We used to test for `BSTRING' here, but only GCC and Emacs define
35 `BSTRING', as far as I know, and neither of them use this code. */
36#include <string.h>
37#ifndef bcmp
38#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
39#endif
40#ifndef bcopy
41#define bcopy(s, d, n) memcpy ((d), (s), (n))
42#endif
43#ifndef bzero
44#define bzero(s, n) memset ((s), 0, (n))
45#endif
46
47#include <stdlib.h>
48
49
50/* Define the syntax stuff for \<, \>, etc. */
51
52/* This must be nonzero for the wordchar and notwordchar pattern
53 commands in re_match_2. */
54#ifndef Sword
55#define Sword 1
56#endif
57
58#ifdef SYNTAX_TABLE
59
60extern char *re_syntax_table;
61
62#else /* not SYNTAX_TABLE */
63
64/* How many characters in the character set. */
65#define CHAR_SET_SIZE 256
66
67static char re_syntax_table[CHAR_SET_SIZE];
68
69static void
70init_syntax_once ()
71{
72 register int c;
73 static int done = 0;
74
75 if (done)
76 return;
77
78 bzero (re_syntax_table, sizeof re_syntax_table);
79
80 for (c = 'a'; c <= 'z'; c++)
81 re_syntax_table[c] = Sword;
82
83 for (c = 'A'; c <= 'Z'; c++)
84 re_syntax_table[c] = Sword;
85
86 for (c = '0'; c <= '9'; c++)
87 re_syntax_table[c] = Sword;
88
89 re_syntax_table['_'] = Sword;
90
91 done = 1;
92}
93
94#endif /* not SYNTAX_TABLE */
95
96#define SYNTAX(c) re_syntax_table[c]
97
98
99/* Get the interface, including the syntax bits. */
100#include "regex.h"
101
102/* isalpha etc. are used for the character classes. */
103#include <ctype.h>
104
105#ifndef isascii
106#define isascii(c) 1
107#endif
108
109#ifdef isblank
110#define ISBLANK(c) (isascii (c) && isblank (c))
111#else
112#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
113#endif
114#ifdef isgraph
115#define ISGRAPH(c) (isascii (c) && isgraph (c))
116#else
117#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
118#endif
119
120#define ISPRINT(c) (isascii (c) && isprint (c))
121#define ISDIGIT(c) (isascii (c) && isdigit (c))
122#define ISALNUM(c) (isascii (c) && isalnum (c))
123#define ISALPHA(c) (isascii (c) && isalpha (c))
124#define ISCNTRL(c) (isascii (c) && iscntrl (c))
125#define ISLOWER(c) (isascii (c) && islower (c))
126#define ISPUNCT(c) (isascii (c) && ispunct (c))
127#define ISSPACE(c) (isascii (c) && isspace (c))
128#define ISUPPER(c) (isascii (c) && isupper (c))
129#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
130
131#ifndef NULL
132#define NULL 0
133#endif
134
135/* We remove any previous definition of `SIGN_EXTEND_CHAR',
136 since ours (we hope) works properly with all combinations of
137 machines, compilers, `char' and `unsigned char' argument types.
138 (Per Bothner suggested the basic approach.) */
139#undef SIGN_EXTEND_CHAR
140#if __STDC__
141#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
142#else /* not __STDC__ */
143/* As in Harbison and Steele. */
144#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
145#endif
146
147/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
148 use `alloca' instead of `malloc'. This is because using malloc in
149 re_search* or re_match* could cause memory leaks when C-g is used in
150 Emacs; also, malloc is slower and causes storage fragmentation. On
151 the other hand, malloc is more portable, and easier to debug.
152
153 Because we sometimes use alloca, some routines have to be macros,
154 not functions -- `alloca'-allocated space disappears at the end of the
155 function it is called in. */
156
157#ifdef REGEX_MALLOC
158
159#define REGEX_ALLOCATE malloc
160#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
161
162#else /* not REGEX_MALLOC */
163
164/* Emacs already defines alloca, sometimes. */
165#ifndef alloca
166
167/* Make alloca work the best possible way. */
168#ifdef __GNUC__
169#define alloca __builtin_alloca
170#else /* not __GNUC__ */
171#if HAVE_ALLOCA_H
172#include <alloca.h>
173#else /* not __GNUC__ or HAVE_ALLOCA_H */
174#ifndef _AIX /* Already did AIX, up at the top. */
175char *alloca ();
176#endif /* not _AIX */
177#endif /* not HAVE_ALLOCA_H */
178#endif /* not __GNUC__ */
179
180#endif /* not alloca */
181
182#define REGEX_ALLOCATE alloca
183
184/* Assumes a `char *destination' variable. */
185#define REGEX_REALLOCATE(source, osize, nsize) \
186 (destination = (char *) alloca (nsize), \
187 bcopy (source, destination, osize), \
188 destination)
189
190#endif /* not REGEX_MALLOC */
191
192
193/* True if `size1' is non-NULL and PTR is pointing anywhere inside
194 `string1' or just past its end. This works if PTR is NULL, which is
195 a good thing. */
196#define FIRST_STRING_P(ptr) \
197 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
198
199/* (Re)Allocate N items of type T using malloc, or fail. */
200#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
201#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
202#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
203
204#define BYTEWIDTH 8 /* In bits. */
205
206#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
207
208#define MAX(a, b) ((a) > (b) ? (a) : (b))
209#define MIN(a, b) ((a) < (b) ? (a) : (b))
210
211typedef char boolean;
212#define false 0
213#define true 1
214
215/* These are the command codes that appear in compiled regular
216 expressions. Some opcodes are followed by argument bytes. A
217 command code can specify any interpretation whatsoever for its
218 arguments. Zero bytes may appear in the compiled regular expression.
219
220 The value of `exactn' is needed in search.c (search_buffer) in Emacs.
221 So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
222 `exactn' we use here must also be 1. */
223
224typedef enum
225{
226 no_op = 0,
227
228 /* Followed by one byte giving n, then by n literal bytes. */
229 exactn = 1,
230
231 /* Matches any (more or less) character. */
232 anychar,
233
234 /* Matches any one char belonging to specified set. First
235 following byte is number of bitmap bytes. Then come bytes
236 for a bitmap saying which chars are in. Bits in each byte
237 are ordered low-bit-first. A character is in the set if its
238 bit is 1. A character too large to have a bit in the map is
239 automatically not in the set. */
240 charset,
241
242 /* Same parameters as charset, but match any character that is
243 not one of those specified. */
244 charset_not,
245
246 /* Start remembering the text that is matched, for storing in a
247 register. Followed by one byte with the register number, in
248 the range 0 to one less than the pattern buffer's re_nsub
249 field. Then followed by one byte with the number of groups
250 inner to this one. (This last has to be part of the
251 start_memory only because we need it in the on_failure_jump
252 of re_match_2.) */
253 start_memory,
254
255 /* Stop remembering the text that is matched and store it in a
256 memory register. Followed by one byte with the register
257 number, in the range 0 to one less than `re_nsub' in the
258 pattern buffer, and one byte with the number of inner groups,
259 just like `start_memory'. (We need the number of inner
260 groups here because we don't have any easy way of finding the
261 corresponding start_memory when we're at a stop_memory.) */
262 stop_memory,
263
264 /* Match a duplicate of something remembered. Followed by one
265 byte containing the register number. */
266 duplicate,
267
268 /* Fail unless at beginning of line. */
269 begline,
270
271 /* Fail unless at end of line. */
272 endline,
273
274 /* Succeeds if at beginning of buffer (if emacs) or at beginning
275 of string to be matched (if not). */
276 begbuf,
277
278 /* Analogously, for end of buffer/string. */
279 endbuf,
280
281 /* Followed by two byte relative address to which to jump. */
282 jump,
283
284 /* Same as jump, but marks the end of an alternative. */
285 jump_past_alt,
286
287 /* Followed by two-byte relative address of place to resume at
288 in case of failure. */
289 on_failure_jump,
290
291 /* Like on_failure_jump, but pushes a placeholder instead of the
292 current string position when executed. */
293 on_failure_keep_string_jump,
294
295 /* Throw away latest failure point and then jump to following
296 two-byte relative address. */
297 pop_failure_jump,
298
299 /* Change to pop_failure_jump if know won't have to backtrack to
300 match; otherwise change to jump. This is used to jump
301 back to the beginning of a repeat. If what follows this jump
302 clearly won't match what the repeat does, such that we can be
303 sure that there is no use backtracking out of repetitions
304 already matched, then we change it to a pop_failure_jump.
305 Followed by two-byte address. */
306 maybe_pop_jump,
307
308 /* Jump to following two-byte address, and push a dummy failure
309 point. This failure point will be thrown away if an attempt
310 is made to use it for a failure. A `+' construct makes this
311 before the first repeat. Also used as an intermediary kind
312 of jump when compiling an alternative. */
313 dummy_failure_jump,
314
315 /* Push a dummy failure point and continue. Used at the end of
316 alternatives. */
317 push_dummy_failure,
318
319 /* Followed by two-byte relative address and two-byte number n.
320 After matching N times, jump to the address upon failure. */
321 succeed_n,
322
323 /* Followed by two-byte relative address, and two-byte number n.
324 Jump to the address N times, then fail. */
325 jump_n,
326
327 /* Set the following two-byte relative address to the
328 subsequent two-byte number. The address *includes* the two
329 bytes of number. */
330 set_number_at,
331
332 wordchar, /* Matches any word-constituent character. */
333 notwordchar, /* Matches any char that is not a word-constituent. */
334
335 wordbeg, /* Succeeds if at word beginning. */
336 wordend, /* Succeeds if at word end. */
337
338 wordbound, /* Succeeds if at a word boundary. */
339 notwordbound /* Succeeds if not at a word boundary. */
340
341#ifdef emacs
342 ,before_dot, /* Succeeds if before point. */
343 at_dot, /* Succeeds if at point. */
344 after_dot, /* Succeeds if after point. */
345
346 /* Matches any character whose syntax is specified. Followed by
347 a byte which contains a syntax code, e.g., Sword. */
348 syntaxspec,
349
350 /* Matches any character whose syntax is not that specified. */
351 notsyntaxspec
352#endif /* emacs */
353} re_opcode_t;
354
355/* Common operations on the compiled pattern. */
356
357/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
358
359#define STORE_NUMBER(destination, number) \
360 do { \
361 (destination)[0] = (number) & 0377; \
362 (destination)[1] = (number) >> 8; \
363 } while (0)
364
365/* Same as STORE_NUMBER, except increment DESTINATION to
366 the byte after where the number is stored. Therefore, DESTINATION
367 must be an lvalue. */
368
369#define STORE_NUMBER_AND_INCR(destination, number) \
370 do { \
371 STORE_NUMBER (destination, number); \
372 (destination) += 2; \
373 } while (0)
374
375/* Put into DESTINATION a number stored in two contiguous bytes starting
376 at SOURCE. */
377
378#define EXTRACT_NUMBER(destination, source) \
379 do { \
380 (destination) = *(source) & 0377; \
381 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
382 } while (0)
383
384#ifdef DEBUG
385static void
386extract_number (dest, source)
387 int *dest;
388 unsigned char *source;
389{
390 int temp = SIGN_EXTEND_CHAR (*(source + 1));
391 *dest = *source & 0377;
392 *dest += temp << 8;
393}
394
395#ifndef EXTRACT_MACROS /* To debug the macros. */
396#undef EXTRACT_NUMBER
397#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
398#endif /* not EXTRACT_MACROS */
399
400#endif /* DEBUG */
401
402/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
403 SOURCE must be an lvalue. */
404
405#define EXTRACT_NUMBER_AND_INCR(destination, source) \
406 do { \
407 EXTRACT_NUMBER (destination, source); \
408 (source) += 2; \
409 } while (0)
410
411#ifdef DEBUG
412static void
413extract_number_and_incr (destination, source)
414 int *destination;
415 unsigned char **source;
416{
417 extract_number (destination, *source);
418 *source += 2;
419}
420
421#ifndef EXTRACT_MACROS
422#undef EXTRACT_NUMBER_AND_INCR
423#define EXTRACT_NUMBER_AND_INCR(dest, src) \
424 extract_number_and_incr (&dest, &src)
425#endif /* not EXTRACT_MACROS */
426
427#endif /* DEBUG */
428
429/* If DEBUG is defined, Regex prints many voluminous messages about what
430 it is doing (if the variable `debug' is nonzero). If linked with the
431 main program in `iregex.c', you can enter patterns and strings
432 interactively. And if linked with the main program in `main.c' and
433 the other test files, you can run the already-written tests. */
434
435#ifdef DEBUG
436
437/* We use standard I/O for debugging. */
438#include <stdio.h>
439
440/* It is useful to test things that ``must'' be true when debugging. */
441#include <assert.h>
442
443static int debug = 0;
444
445#define DEBUG_STATEMENT(e) e
446#define DEBUG_PRINT1(x) if (debug) printf (x)
447#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
448#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
449#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
450#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
451 if (debug) print_partial_compiled_pattern (s, e)
452#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
453 if (debug) print_double_string (w, s1, sz1, s2, sz2)
454
455
456extern void printchar ();
457
458/* Print the fastmap in human-readable form. */
459
460void
461print_fastmap (fastmap)
462 char *fastmap;
463{
464 unsigned was_a_range = 0;
465 unsigned i = 0;
466
467 while (i < (1 << BYTEWIDTH))
468 {
469 if (fastmap[i++])
470 {
471 was_a_range = 0;
472 printchar (i - 1);
473 while (i < (1 << BYTEWIDTH) && fastmap[i])
474 {
475 was_a_range = 1;
476 i++;
477 }
478 if (was_a_range)
479 {
480 printf ("-");
481 printchar (i - 1);
482 }
483 }
484 }
485 putchar ('\n');
486}
487
488
489/* Print a compiled pattern string in human-readable form, starting at
490 the START pointer into it and ending just before the pointer END. */
491
492void
493print_partial_compiled_pattern (start, end)
494 unsigned char *start;
495 unsigned char *end;
496{
497 int mcnt, mcnt2;
498 unsigned char *p = start;
499 unsigned char *pend = end;
500
501 if (start == NULL)
502 {
503 printf ("(null)\n");
504 return;
505 }
506
507 /* Loop over pattern commands. */
508 while (p < pend)
509 {
510 switch ((re_opcode_t) *p++)
511 {
512 case no_op:
513 printf ("/no_op");
514 break;
515
516 case exactn:
517 mcnt = *p++;
518 printf ("/exactn/%d", mcnt);
519 do
520 {
521 putchar ('/');
522 printchar (*p++);
523 }
524 while (--mcnt);
525 break;
526
527 case start_memory:
528 mcnt = *p++;
529 printf ("/start_memory/%d/%d", mcnt, *p++);
530 break;
531
532 case stop_memory:
533 mcnt = *p++;
534 printf ("/stop_memory/%d/%d", mcnt, *p++);
535 break;
536
537 case duplicate:
538 printf ("/duplicate/%d", *p++);
539 break;
540
541 case anychar:
542 printf ("/anychar");
543 break;
544
545 case charset:
546 case charset_not:
547 {
548 register int c;
549
550 printf ("/charset%s",
551 (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
552
553 assert (p + *p < pend);
554
555 for (c = 0; c < *p; c++)
556 {
557 unsigned bit;
558 unsigned char map_byte = p[1 + c];
559
560 putchar ('/');
561
562 for (bit = 0; bit < BYTEWIDTH; bit++)
563 if (map_byte & (1 << bit))
564 printchar (c * BYTEWIDTH + bit);
565 }
566 p += 1 + *p;
567 break;
568 }
569
570 case begline:
571 printf ("/begline");
572 break;
573
574 case endline:
575 printf ("/endline");
576 break;
577
578 case on_failure_jump:
579 extract_number_and_incr (&mcnt, &p);
580 printf ("/on_failure_jump/0/%d", mcnt);
581 break;
582
583 case on_failure_keep_string_jump:
584 extract_number_and_incr (&mcnt, &p);
585 printf ("/on_failure_keep_string_jump/0/%d", mcnt);
586 break;
587
588 case dummy_failure_jump:
589 extract_number_and_incr (&mcnt, &p);
590 printf ("/dummy_failure_jump/0/%d", mcnt);
591 break;
592
593 case push_dummy_failure:
594 printf ("/push_dummy_failure");
595 break;
596
597 case maybe_pop_jump:
598 extract_number_and_incr (&mcnt, &p);
599 printf ("/maybe_pop_jump/0/%d", mcnt);
600 break;
601
602 case pop_failure_jump:
603 extract_number_and_incr (&mcnt, &p);
604 printf ("/pop_failure_jump/0/%d", mcnt);
605 break;
606
607 case jump_past_alt:
608 extract_number_and_incr (&mcnt, &p);
609 printf ("/jump_past_alt/0/%d", mcnt);
610 break;
611
612 case jump:
613 extract_number_and_incr (&mcnt, &p);
614 printf ("/jump/0/%d", mcnt);
615 break;
616
617 case succeed_n:
618 extract_number_and_incr (&mcnt, &p);
619 extract_number_and_incr (&mcnt2, &p);
620 printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
621 break;
622
623 case jump_n:
624 extract_number_and_incr (&mcnt, &p);
625 extract_number_and_incr (&mcnt2, &p);
626 printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
627 break;
628
629 case set_number_at:
630 extract_number_and_incr (&mcnt, &p);
631 extract_number_and_incr (&mcnt2, &p);
632 printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
633 break;
634
635 case wordbound:
636 printf ("/wordbound");
637 break;
638
639 case notwordbound:
640 printf ("/notwordbound");
641 break;
642
643 case wordbeg:
644 printf ("/wordbeg");
645 break;
646
647 case wordend:
648 printf ("/wordend");
649
650#ifdef emacs
651 case before_dot:
652 printf ("/before_dot");
653 break;
654
655 case at_dot:
656 printf ("/at_dot");
657 break;
658
659 case after_dot:
660 printf ("/after_dot");
661 break;
662
663 case syntaxspec:
664 printf ("/syntaxspec");
665 mcnt = *p++;
666 printf ("/%d", mcnt);
667 break;
668
669 case notsyntaxspec:
670 printf ("/notsyntaxspec");
671 mcnt = *p++;
672 printf ("/%d", mcnt);
673 break;
674#endif /* emacs */
675
676 case wordchar:
677 printf ("/wordchar");
678 break;
679
680 case notwordchar:
681 printf ("/notwordchar");
682 break;
683
684 case begbuf:
685 printf ("/begbuf");
686 break;
687
688 case endbuf:
689 printf ("/endbuf");
690 break;
691
692 default:
693 printf ("?%d", *(p-1));
694 }
695 }
696 printf ("/\n");
697}
698
699
700void
701print_compiled_pattern (bufp)
702 struct re_pattern_buffer *bufp;
703{
704 unsigned char *buffer = bufp->buffer;
705
706 print_partial_compiled_pattern (buffer, buffer + bufp->used);
707 printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
708
709 if (bufp->fastmap_accurate && bufp->fastmap)
710 {
711 printf ("fastmap: ");
712 print_fastmap (bufp->fastmap);
713 }
714
715 printf ("re_nsub: %d\t", bufp->re_nsub);
716 printf ("regs_alloc: %d\t", bufp->regs_allocated);
717 printf ("can_be_null: %d\t", bufp->can_be_null);
718 printf ("newline_anchor: %d\n", bufp->newline_anchor);
719 printf ("no_sub: %d\t", bufp->no_sub);
720 printf ("not_bol: %d\t", bufp->not_bol);
721 printf ("not_eol: %d\t", bufp->not_eol);
722 printf ("syntax: %d\n", bufp->syntax);
723 /* Perhaps we should print the translate table? */
724}
725
726
727void
728print_double_string (where, string1, size1, string2, size2)
729 const char *where;
730 const char *string1;
731 const char *string2;
732 int size1;
733 int size2;
734{
735 unsigned this_char;
736
737 if (where == NULL)
738 printf ("(null)");
739 else
740 {
741 if (FIRST_STRING_P (where))
742 {
743 for (this_char = where - string1; this_char < size1; this_char++)
744 printchar (string1[this_char]);
745
746 where = string2;
747 }
748
749 for (this_char = where - string2; this_char < size2; this_char++)
750 printchar (string2[this_char]);
751 }
752}
753
754#else /* not DEBUG */
755
756#undef assert
757#define assert(e)
758
759#define DEBUG_STATEMENT(e)
760#define DEBUG_PRINT1(x)
761#define DEBUG_PRINT2(x1, x2)
762#define DEBUG_PRINT3(x1, x2, x3)
763#define DEBUG_PRINT4(x1, x2, x3, x4)
764#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
765#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
766
767#endif /* not DEBUG */
768
769/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
770 also be assigned to arbitrarily: each pattern buffer stores its own
771 syntax, so it can be changed between regex compilations. */
772reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
773
774
775/* Specify the precise syntax of regexps for compilation. This provides
776 for compatibility for various utilities which historically have
777 different, incompatible syntaxes.
778
779 The argument SYNTAX is a bit mask comprised of the various bits
780 defined in regex.h. We return the old syntax. */
781
782reg_syntax_t
783re_set_syntax (syntax)
784 reg_syntax_t syntax;
785{
786 reg_syntax_t ret = re_syntax_options;
787
788 re_syntax_options = syntax;
789 return ret;
790}
791
792/* This table gives an error message for each of the error codes listed
793 in regex.h. Obviously the order here has to be same as there. */
794
795static const char *re_error_msg[] =
796 { NULL, /* REG_NOERROR */
797 "No match", /* REG_NOMATCH */
798 "Invalid regular expression", /* REG_BADPAT */
799 "Invalid collation character", /* REG_ECOLLATE */
800 "Invalid character class name", /* REG_ECTYPE */
801 "Trailing backslash", /* REG_EESCAPE */
802 "Invalid back reference", /* REG_ESUBREG */
803 "Unmatched [ or [^", /* REG_EBRACK */
804 "Unmatched ( or \\(", /* REG_EPAREN */
805 "Unmatched \\{", /* REG_EBRACE */
806 "Invalid content of \\{\\}", /* REG_BADBR */
807 "Invalid range end", /* REG_ERANGE */
808 "Memory exhausted", /* REG_ESPACE */
809 "Invalid preceding regular expression", /* REG_BADRPT */
810 "Premature end of regular expression", /* REG_EEND */
811 "Regular expression too big", /* REG_ESIZE */
812 "Unmatched ) or \\)", /* REG_ERPAREN */
813 };
814
815/* Subroutine declarations and macros for regex_compile. */
816
817static void store_op1 (), store_op2 ();
818static void insert_op1 (), insert_op2 ();
819static boolean at_begline_loc_p (), at_endline_loc_p ();
820static boolean group_in_compile_stack ();
821static reg_errcode_t compile_range ();
822
823/* Fetch the next character in the uncompiled pattern---translating it
824 if necessary. Also cast from a signed character in the constant
825 string passed to us by the user to an unsigned char that we can use
826 as an array index (in, e.g., `translate'). */
827#define PATFETCH(c) \
828 do {if (p == pend) return REG_EEND; \
829 c = (unsigned char) *p++; \
830 if (translate) c = translate[c]; \
831 } while (0)
832
833/* Fetch the next character in the uncompiled pattern, with no
834 translation. */
835#define PATFETCH_RAW(c) \
836 do {if (p == pend) return REG_EEND; \
837 c = (unsigned char) *p++; \
838 } while (0)
839
840/* Go backwards one character in the pattern. */
841#define PATUNFETCH p--
842
843
844/* If `translate' is non-null, return translate[D], else just D. We
845 cast the subscript to translate because some data is declared as
846 `char *', to avoid warnings when a string constant is passed. But
847 when we use a character as a subscript we must make it unsigned. */
848#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
849
850
851/* Macros for outputting the compiled pattern into `buffer'. */
852
853/* If the buffer isn't allocated when it comes in, use this. */
854#define INIT_BUF_SIZE 32
855
856/* Make sure we have at least N more bytes of space in buffer. */
857#define GET_BUFFER_SPACE(n) \
858 while (b - bufp->buffer + (n) > bufp->allocated) \
859 EXTEND_BUFFER ()
860
861/* Make sure we have one more byte of buffer space and then add C to it. */
862#define BUF_PUSH(c) \
863 do { \
864 GET_BUFFER_SPACE (1); \
865 *b++ = (unsigned char) (c); \
866 } while (0)
867
868
869/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
870#define BUF_PUSH_2(c1, c2) \
871 do { \
872 GET_BUFFER_SPACE (2); \
873 *b++ = (unsigned char) (c1); \
874 *b++ = (unsigned char) (c2); \
875 } while (0)
876
877
878/* As with BUF_PUSH_2, except for three bytes. */
879#define BUF_PUSH_3(c1, c2, c3) \
880 do { \
881 GET_BUFFER_SPACE (3); \
882 *b++ = (unsigned char) (c1); \
883 *b++ = (unsigned char) (c2); \
884 *b++ = (unsigned char) (c3); \
885 } while (0)
886
887
888/* Store a jump with opcode OP at LOC to location TO. We store a
889 relative address offset by the three bytes the jump itself occupies. */
890#define STORE_JUMP(op, loc, to) \
891 store_op1 (op, loc, (to) - (loc) - 3)
892
893/* Likewise, for a two-argument jump. */
894#define STORE_JUMP2(op, loc, to, arg) \
895 store_op2 (op, loc, (to) - (loc) - 3, arg)
896
897/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
898#define INSERT_JUMP(op, loc, to) \
899 insert_op1 (op, loc, (to) - (loc) - 3, b)
900
901/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
902#define INSERT_JUMP2(op, loc, to, arg) \
903 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
904
905
906/* This is not an arbitrary limit: the arguments which represent offsets
907 into the pattern are two bytes long. So if 2^16 bytes turns out to
908 be too small, many things would have to change. */
909#define MAX_BUF_SIZE (1L << 16)
910
911
912/* Extend the buffer by twice its current size via realloc and
913 reset the pointers that pointed into the old block to point to the
914 correct places in the new one. If extending the buffer results in it
915 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
916#define EXTEND_BUFFER() \
917 do { \
918 unsigned char *old_buffer = bufp->buffer; \
919 if (bufp->allocated == MAX_BUF_SIZE) \
920 return REG_ESIZE; \
921 bufp->allocated <<= 1; \
922 if (bufp->allocated > MAX_BUF_SIZE) \
923 bufp->allocated = MAX_BUF_SIZE; \
924 bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
925 if (bufp->buffer == NULL) \
926 return REG_ESPACE; \
927 /* If the buffer moved, move all the pointers into it. */ \
928 if (old_buffer != bufp->buffer) \
929 { \
930 b = (b - old_buffer) + bufp->buffer; \
931 begalt = (begalt - old_buffer) + bufp->buffer; \
932 if (fixup_alt_jump) \
933 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
934 if (laststart) \
935 laststart = (laststart - old_buffer) + bufp->buffer; \
936 if (pending_exact) \
937 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
938 } \
939 } while (0)
940
941
942/* Since we have one byte reserved for the register number argument to
943 {start,stop}_memory, the maximum number of groups we can report
944 things about is what fits in that byte. */
945#define MAX_REGNUM 255
946
947/* But patterns can have more than `MAX_REGNUM' registers. We just
948 ignore the excess. */
949typedef unsigned regnum_t;
950
951
952/* Macros for the compile stack. */
953
954/* Since offsets can go either forwards or backwards, this type needs to
955 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
956typedef int pattern_offset_t;
957
958typedef struct
959{
960 pattern_offset_t begalt_offset;
961 pattern_offset_t fixup_alt_jump;
962 pattern_offset_t inner_group_offset;
963 pattern_offset_t laststart_offset;
964 regnum_t regnum;
965} compile_stack_elt_t;
966
967
968typedef struct
969{
970 compile_stack_elt_t *stack;
971 unsigned size;
972 unsigned avail; /* Offset of next open position. */
973} compile_stack_type;
974
975
976#define INIT_COMPILE_STACK_SIZE 32
977
978#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
979#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
980
981/* The next available element. */
982#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
983
984
985/* Set the bit for character C in a list. */
986#define SET_LIST_BIT(c) \
987 (b[((unsigned char) (c)) / BYTEWIDTH] \
988 |= 1 << (((unsigned char) c) % BYTEWIDTH))
989
990
991/* Get the next unsigned number in the uncompiled pattern. */
992#define GET_UNSIGNED_NUMBER(num) \
993 { if (p != pend) \
994 { \
995 PATFETCH (c); \
996 while (ISDIGIT (c)) \
997 { \
998 if (num < 0) \
999 num = 0; \
1000 num = num * 10 + c - '0'; \
1001 if (p == pend) \
1002 break; \
1003 PATFETCH (c); \
1004 } \
1005 } \
1006 }
1007
1008#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
1009
1010#define IS_CHAR_CLASS(string) \
1011 (STREQ (string, "alpha") || STREQ (string, "upper") \
1012 || STREQ (string, "lower") || STREQ (string, "digit") \
1013 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
1014 || STREQ (string, "space") || STREQ (string, "print") \
1015 || STREQ (string, "punct") || STREQ (string, "graph") \
1016 || STREQ (string, "cntrl") || STREQ (string, "blank"))
1017
1018/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1019 Returns one of error codes defined in `regex.h', or zero for success.
1020
1021 Assumes the `allocated' (and perhaps `buffer') and `translate'
1022 fields are set in BUFP on entry.
1023
1024 If it succeeds, results are put in BUFP (if it returns an error, the
1025 contents of BUFP are undefined):
1026 `buffer' is the compiled pattern;
1027 `syntax' is set to SYNTAX;
1028 `used' is set to the length of the compiled pattern;
1029 `fastmap_accurate' is zero;
1030 `re_nsub' is the number of subexpressions in PATTERN;
1031 `not_bol' and `not_eol' are zero;
1032
1033 The `fastmap' and `newline_anchor' fields are neither
1034 examined nor set. */
1035
1036static reg_errcode_t
1037regex_compile (pattern, size, syntax, bufp)
1038 const char *pattern;
1039 int size;
1040 reg_syntax_t syntax;
1041 struct re_pattern_buffer *bufp;
1042{
1043 /* We fetch characters from PATTERN here. Even though PATTERN is
1044 `char *' (i.e., signed), we declare these variables as unsigned, so
1045 they can be reliably used as array indices. */
1046 register unsigned char c, c1;
1047
1048 /* A random tempory spot in PATTERN. */
1049 const char *p1;
1050
1051 /* Points to the end of the buffer, where we should append. */
1052 register unsigned char *b;
1053
1054 /* Keeps track of unclosed groups. */
1055 compile_stack_type compile_stack;
1056
1057 /* Points to the current (ending) position in the pattern. */
1058 const char *p = pattern;
1059 const char *pend = pattern + size;
1060
1061 /* How to translate the characters in the pattern. */
1062 char *translate = bufp->translate;
1063
1064 /* Address of the count-byte of the most recently inserted `exactn'
1065 command. This makes it possible to tell if a new exact-match
1066 character can be added to that command or if the character requires
1067 a new `exactn' command. */
1068 unsigned char *pending_exact = 0;
1069
1070 /* Address of start of the most recently finished expression.
1071 This tells, e.g., postfix * where to find the start of its
1072 operand. Reset at the beginning of groups and alternatives. */
1073 unsigned char *laststart = 0;
1074
1075 /* Address of beginning of regexp, or inside of last group. */
1076 unsigned char *begalt;
1077
1078 /* Place in the uncompiled pattern (i.e., the {) to
1079 which to go back if the interval is invalid. */
1080 const char *beg_interval;
1081
1082 /* Address of the place where a forward jump should go to the end of
1083 the containing expression. Each alternative of an `or' -- except the
1084 last -- ends with a forward jump of this sort. */
1085 unsigned char *fixup_alt_jump = 0;
1086
1087 /* Counts open-groups as they are encountered. Remembered for the
1088 matching close-group on the compile stack, so the same register
1089 number is put in the stop_memory as the start_memory. */
1090 regnum_t regnum = 0;
1091
1092#ifdef DEBUG
1093 DEBUG_PRINT1 ("\nCompiling pattern: ");
1094 if (debug)
1095 {
1096 unsigned debug_count;
1097
1098 for (debug_count = 0; debug_count < size; debug_count++)
1099 printchar (pattern[debug_count]);
1100 putchar ('\n');
1101 }
1102#endif /* DEBUG */
1103
1104 /* Initialize the compile stack. */
1105 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1106 if (compile_stack.stack == NULL)
1107 return REG_ESPACE;
1108
1109 compile_stack.size = INIT_COMPILE_STACK_SIZE;
1110 compile_stack.avail = 0;
1111
1112 /* Initialize the pattern buffer. */
1113 bufp->syntax = syntax;
1114 bufp->fastmap_accurate = 0;
1115 bufp->not_bol = bufp->not_eol = 0;
1116
1117 /* Set `used' to zero, so that if we return an error, the pattern
1118 printer (for debugging) will think there's no pattern. We reset it
1119 at the end. */
1120 bufp->used = 0;
1121
1122 /* Always count groups, whether or not bufp->no_sub is set. */
1123 bufp->re_nsub = 0;
1124
1125#if !defined (emacs) && !defined (SYNTAX_TABLE)
1126 /* Initialize the syntax table. */
1127 init_syntax_once ();
1128#endif
1129
1130 if (bufp->allocated == 0)
1131 {
1132 if (bufp->buffer)
1133 { /* If zero allocated, but buffer is non-null, try to realloc
1134 enough space. This loses if buffer's address is bogus, but
1135 that is the user's responsibility. */
1136 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1137 }
1138 else
1139 { /* Caller did not allocate a buffer. Do it for them. */
1140 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1141 }
1142 if (!bufp->buffer) return REG_ESPACE;
1143
1144 bufp->allocated = INIT_BUF_SIZE;
1145 }
1146
1147 begalt = b = bufp->buffer;
1148
1149 /* Loop through the uncompiled pattern until we're at the end. */
1150 while (p != pend)
1151 {
1152 PATFETCH (c);
1153
1154 switch (c)
1155 {
1156 case '^':
1157 {
1158 if ( /* If at start of pattern, it's an operator. */
1159 p == pattern + 1
1160 /* If context independent, it's an operator. */
1161 || syntax & RE_CONTEXT_INDEP_ANCHORS
1162 /* Otherwise, depends on what's come before. */
1163 || at_begline_loc_p (pattern, p, syntax))
1164 BUF_PUSH (begline);
1165 else
1166 goto normal_char;
1167 }
1168 break;
1169
1170
1171 case '$':
1172 {
1173 if ( /* If at end of pattern, it's an operator. */
1174 p == pend
1175 /* If context independent, it's an operator. */
1176 || syntax & RE_CONTEXT_INDEP_ANCHORS
1177 /* Otherwise, depends on what's next. */
1178 || at_endline_loc_p (p, pend, syntax))
1179 BUF_PUSH (endline);
1180 else
1181 goto normal_char;
1182 }
1183 break;
1184
1185
1186 case '+':
1187 case '?':
1188 if ((syntax & RE_BK_PLUS_QM)
1189 || (syntax & RE_LIMITED_OPS))
1190 goto normal_char;
1191 handle_plus:
1192 case '*':
1193 /* If there is no previous pattern... */
1194 if (!laststart)
1195 {
1196 if (syntax & RE_CONTEXT_INVALID_OPS)
1197 return REG_BADRPT;
1198 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1199 goto normal_char;
1200 }
1201
1202 {
1203 /* Are we optimizing this jump? */
1204 boolean keep_string_p = false;
1205
1206 /* 1 means zero (many) matches is allowed. */
1207 char zero_times_ok = 0, many_times_ok = 0;
1208
1209 /* If there is a sequence of repetition chars, collapse it
1210 down to just one (the right one). We can't combine
1211 interval operators with these because of, e.g., `a{2}*',
1212 which should only match an even number of `a's. */
1213
1214 for (;;)
1215 {
1216 zero_times_ok |= c != '+';
1217 many_times_ok |= c != '?';
1218
1219 if (p == pend)
1220 break;
1221
1222 PATFETCH (c);
1223
1224 if (c == '*'
1225 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1226 ;
1227
1228 else if (syntax & RE_BK_PLUS_QM && c == '\\')
1229 {
1230 if (p == pend) return REG_EESCAPE;
1231
1232 PATFETCH (c1);
1233 if (!(c1 == '+' || c1 == '?'))
1234 {
1235 PATUNFETCH;
1236 PATUNFETCH;
1237 break;
1238 }
1239
1240 c = c1;
1241 }
1242 else
1243 {
1244 PATUNFETCH;
1245 break;
1246 }
1247
1248 /* If we get here, we found another repeat character. */
1249 }
1250
1251 /* Star, etc. applied to an empty pattern is equivalent
1252 to an empty pattern. */
1253 if (!laststart)
1254 break;
1255
1256 /* Now we know whether or not zero matches is allowed
1257 and also whether or not two or more matches is allowed. */
1258 if (many_times_ok)
1259 { /* More than one repetition is allowed, so put in at the
1260 end a backward relative jump from `b' to before the next
1261 jump we're going to put in below (which jumps from
1262 laststart to after this jump).
1263
1264 But if we are at the `*' in the exact sequence `.*\n',
1265 insert an unconditional jump backwards to the .,
1266 instead of the beginning of the loop. This way we only
1267 push a failure point once, instead of every time
1268 through the loop. */
1269 assert (p - 1 > pattern);
1270
1271 /* Allocate the space for the jump. */
1272 GET_BUFFER_SPACE (3);
1273
1274 /* We know we are not at the first character of the pattern,
1275 because laststart was nonzero. And we've already
1276 incremented `p', by the way, to be the character after
1277 the `*'. Do we have to do something analogous here
1278 for null bytes, because of RE_DOT_NOT_NULL? */
1279 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1280 && zero_times_ok
1281 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1282 && !(syntax & RE_DOT_NEWLINE))
1283 { /* We have .*\n. */
1284 STORE_JUMP (jump, b, laststart);
1285 keep_string_p = true;
1286 }
1287 else
1288 /* Anything else. */
1289 STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1290
1291 /* We've added more stuff to the buffer. */
1292 b += 3;
1293 }
1294
1295 /* On failure, jump from laststart to b + 3, which will be the
1296 end of the buffer after this jump is inserted. */
1297 GET_BUFFER_SPACE (3);
1298 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1299 : on_failure_jump,
1300 laststart, b + 3);
1301 pending_exact = 0;
1302 b += 3;
1303
1304 if (!zero_times_ok)
1305 {
1306 /* At least one repetition is required, so insert a
1307 `dummy_failure_jump' before the initial
1308 `on_failure_jump' instruction of the loop. This
1309 effects a skip over that instruction the first time
1310 we hit that loop. */
1311 GET_BUFFER_SPACE (3);
1312 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1313 b += 3;
1314 }
1315 }
1316 break;
1317
1318
1319 case '.':
1320 laststart = b;
1321 BUF_PUSH (anychar);
1322 break;
1323
1324
1325 case '[':
1326 {
1327 boolean had_char_class = false;
1328
1329 if (p == pend) return REG_EBRACK;
1330
1331 /* Ensure that we have enough space to push a charset: the
1332 opcode, the length count, and the bitset; 34 bytes in all. */
1333 GET_BUFFER_SPACE (34);
1334
1335 laststart = b;
1336
1337 /* We test `*p == '^' twice, instead of using an if
1338 statement, so we only need one BUF_PUSH. */
1339 BUF_PUSH (*p == '^' ? charset_not : charset);
1340 if (*p == '^')
1341 p++;
1342
1343 /* Remember the first position in the bracket expression. */
1344 p1 = p;
1345
1346 /* Push the number of bytes in the bitmap. */
1347 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1348
1349 /* Clear the whole map. */
1350 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1351
1352 /* charset_not matches newline according to a syntax bit. */
1353 if ((re_opcode_t) b[-2] == charset_not
1354 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1355 SET_LIST_BIT ('\n');
1356
1357 /* Read in characters and ranges, setting map bits. */
1358 for (;;)
1359 {
1360 if (p == pend) return REG_EBRACK;
1361
1362 PATFETCH (c);
1363
1364 /* \ might escape characters inside [...] and [^...]. */
1365 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1366 {
1367 if (p == pend) return REG_EESCAPE;
1368
1369 PATFETCH (c1);
1370 SET_LIST_BIT (c1);
1371 continue;
1372 }
1373
1374 /* Could be the end of the bracket expression. If it's
1375 not (i.e., when the bracket expression is `[]' so
1376 far), the ']' character bit gets set way below. */
1377 if (c == ']' && p != p1 + 1)
1378 break;
1379
1380 /* Look ahead to see if it's a range when the last thing
1381 was a character class. */
1382 if (had_char_class && c == '-' && *p != ']')
1383 return REG_ERANGE;
1384
1385 /* Look ahead to see if it's a range when the last thing
1386 was a character: if this is a hyphen not at the
1387 beginning or the end of a list, then it's the range
1388 operator. */
1389 if (c == '-'
1390 && !(p - 2 >= pattern && p[-2] == '[')
1391 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1392 && *p != ']')
1393 {
1394 reg_errcode_t ret
1395 = compile_range (&p, pend, translate, syntax, b);
1396 if (ret != REG_NOERROR) return ret;
1397 }
1398
1399 else if (p[0] == '-' && p[1] != ']')
1400 { /* This handles ranges made up of characters only. */
1401 reg_errcode_t ret;
1402
1403 /* Move past the `-'. */
1404 PATFETCH (c1);
1405
1406 ret = compile_range (&p, pend, translate, syntax, b);
1407 if (ret != REG_NOERROR) return ret;
1408 }
1409
1410 /* See if we're at the beginning of a possible character
1411 class. */
1412
1413 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1414 { /* Leave room for the null. */
1415 char str[CHAR_CLASS_MAX_LENGTH + 1];
1416
1417 PATFETCH (c);
1418 c1 = 0;
1419
1420 /* If pattern is `[[:'. */
1421 if (p == pend) return REG_EBRACK;
1422
1423 for (;;)
1424 {
1425 PATFETCH (c);
1426 if (c == ':' || c == ']' || p == pend
1427 || c1 == CHAR_CLASS_MAX_LENGTH)
1428 break;
1429 str[c1++] = c;
1430 }
1431 str[c1] = '\0';
1432
1433 /* If isn't a word bracketed by `[:' and:`]':
1434 undo the ending character, the letters, and leave
1435 the leading `:' and `[' (but set bits for them). */
1436 if (c == ':' && *p == ']')
1437 {
1438 int ch;
1439 boolean is_alnum = STREQ (str, "alnum");
1440 boolean is_alpha = STREQ (str, "alpha");
1441 boolean is_blank = STREQ (str, "blank");
1442 boolean is_cntrl = STREQ (str, "cntrl");
1443 boolean is_digit = STREQ (str, "digit");
1444 boolean is_graph = STREQ (str, "graph");
1445 boolean is_lower = STREQ (str, "lower");
1446 boolean is_print = STREQ (str, "print");
1447 boolean is_punct = STREQ (str, "punct");
1448 boolean is_space = STREQ (str, "space");
1449 boolean is_upper = STREQ (str, "upper");
1450 boolean is_xdigit = STREQ (str, "xdigit");
1451
1452 if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1453
1454 /* Throw away the ] at the end of the character
1455 class. */
1456 PATFETCH (c);
1457
1458 if (p == pend) return REG_EBRACK;
1459
1460 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1461 {
1462 if ( (is_alnum && ISALNUM (ch))
1463 || (is_alpha && ISALPHA (ch))
1464 || (is_blank && ISBLANK (ch))
1465 || (is_cntrl && ISCNTRL (ch))
1466 || (is_digit && ISDIGIT (ch))
1467 || (is_graph && ISGRAPH (ch))
1468 || (is_lower && ISLOWER (ch))
1469 || (is_print && ISPRINT (ch))
1470 || (is_punct && ISPUNCT (ch))
1471 || (is_space && ISSPACE (ch))
1472 || (is_upper && ISUPPER (ch))
1473 || (is_xdigit && ISXDIGIT (ch)))
1474 SET_LIST_BIT (ch);
1475 }
1476 had_char_class = true;
1477 }
1478 else
1479 {
1480 c1++;
1481 while (c1--)
1482 PATUNFETCH;
1483 SET_LIST_BIT ('[');
1484 SET_LIST_BIT (':');
1485 had_char_class = false;
1486 }
1487 }
1488 else
1489 {
1490 had_char_class = false;
1491 SET_LIST_BIT (c);
1492 }
1493 }
1494
1495 /* Discard any (non)matching list bytes that are all 0 at the
1496 end of the map. Decrease the map-length byte too. */
1497 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1498 b[-1]--;
1499 b += b[-1];
1500 }
1501 break;
1502
1503
1504 case '(':
1505 if (syntax & RE_NO_BK_PARENS)
1506 goto handle_open;
1507 else
1508 goto normal_char;
1509
1510
1511 case ')':
1512 if (syntax & RE_NO_BK_PARENS)
1513 goto handle_close;
1514 else
1515 goto normal_char;
1516
1517
1518 case '\n':
1519 if (syntax & RE_NEWLINE_ALT)
1520 goto handle_alt;
1521 else
1522 goto normal_char;
1523
1524
1525 case '|':
1526 if (syntax & RE_NO_BK_VBAR)
1527 goto handle_alt;
1528 else
1529 goto normal_char;
1530
1531
1532 case '{':
1533 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1534 goto handle_interval;
1535 else
1536 goto normal_char;
1537
1538
1539 case '\\':
1540 if (p == pend) return REG_EESCAPE;
1541
1542 /* Do not translate the character after the \, so that we can
1543 distinguish, e.g., \B from \b, even if we normally would
1544 translate, e.g., B to b. */
1545 PATFETCH_RAW (c);
1546
1547 switch (c)
1548 {
1549 case '(':
1550 if (syntax & RE_NO_BK_PARENS)
1551 goto normal_backslash;
1552
1553 handle_open:
1554 bufp->re_nsub++;
1555 regnum++;
1556
1557 if (COMPILE_STACK_FULL)
1558 {
1559 RETALLOC (compile_stack.stack, compile_stack.size << 1,
1560 compile_stack_elt_t);
1561 if (compile_stack.stack == NULL) return REG_ESPACE;
1562
1563 compile_stack.size <<= 1;
1564 }
1565
1566 /* These are the values to restore when we hit end of this
1567 group. They are all relative offsets, so that if the
1568 whole pattern moves because of realloc, they will still
1569 be valid. */
1570 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1571 COMPILE_STACK_TOP.fixup_alt_jump
1572 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1573 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1574 COMPILE_STACK_TOP.regnum = regnum;
1575
1576 /* We will eventually replace the 0 with the number of
1577 groups inner to this one. But do not push a
1578 start_memory for groups beyond the last one we can
1579 represent in the compiled pattern. */
1580 if (regnum <= MAX_REGNUM)
1581 {
1582 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1583 BUF_PUSH_3 (start_memory, regnum, 0);
1584 }
1585
1586 compile_stack.avail++;
1587
1588 fixup_alt_jump = 0;
1589 laststart = 0;
1590 begalt = b;
1591 /* If we've reached MAX_REGNUM groups, then this open
1592 won't actually generate any code, so we'll have to
1593 clear pending_exact explicitly. */
1594 pending_exact = 0;
1595 break;
1596
1597
1598 case ')':
1599 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1600
1601 if (COMPILE_STACK_EMPTY)
1602 {
1603 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1604 goto normal_backslash;
1605 else
1606 return REG_ERPAREN;
1607 }
1608
1609 handle_close:
1610 if (fixup_alt_jump)
1611 { /* Push a dummy failure point at the end of the
1612 alternative for a possible future
1613 `pop_failure_jump' to pop. See comments at
1614 `push_dummy_failure' in `re_match_2'. */
1615 BUF_PUSH (push_dummy_failure);
1616
1617 /* We allocated space for this jump when we assigned
1618 to `fixup_alt_jump', in the `handle_alt' case below. */
1619 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1620 }
1621
1622 /* See similar code for backslashed left paren above. */
1623 if (COMPILE_STACK_EMPTY)
1624 {
1625 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1626 goto normal_char;
1627 else
1628 return REG_ERPAREN;
1629 }
1630
1631 /* Since we just checked for an empty stack above, this
1632 ``can't happen''. */
1633 assert (compile_stack.avail != 0);
1634 {
1635 /* We don't just want to restore into `regnum', because
1636 later groups should continue to be numbered higher,
1637 as in `(ab)c(de)' -- the second group is #2. */
1638 regnum_t this_group_regnum;
1639
1640 compile_stack.avail--;
1641 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1642 fixup_alt_jump
1643 = COMPILE_STACK_TOP.fixup_alt_jump
1644 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1645 : 0;
1646 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1647 this_group_regnum = COMPILE_STACK_TOP.regnum;
1648 /* If we've reached MAX_REGNUM groups, then this open
1649 won't actually generate any code, so we'll have to
1650 clear pending_exact explicitly. */
1651 pending_exact = 0;
1652
1653 /* We're at the end of the group, so now we know how many
1654 groups were inside this one. */
1655 if (this_group_regnum <= MAX_REGNUM)
1656 {
1657 unsigned char *inner_group_loc
1658 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1659
1660 *inner_group_loc = regnum - this_group_regnum;
1661 BUF_PUSH_3 (stop_memory, this_group_regnum,
1662 regnum - this_group_regnum);
1663 }
1664 }
1665 break;
1666
1667
1668 case '|': /* `\|'. */
1669 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1670 goto normal_backslash;
1671 handle_alt:
1672 if (syntax & RE_LIMITED_OPS)
1673 goto normal_char;
1674
1675 /* Insert before the previous alternative a jump which
1676 jumps to this alternative if the former fails. */
1677 GET_BUFFER_SPACE (3);
1678 INSERT_JUMP (on_failure_jump, begalt, b + 6);
1679 pending_exact = 0;
1680 b += 3;
1681
1682 /* The alternative before this one has a jump after it
1683 which gets executed if it gets matched. Adjust that
1684 jump so it will jump to this alternative's analogous
1685 jump (put in below, which in turn will jump to the next
1686 (if any) alternative's such jump, etc.). The last such
1687 jump jumps to the correct final destination. A picture:
1688 _____ _____
1689 | | | |
1690 | v | v
1691 a | b | c
1692
1693 If we are at `b', then fixup_alt_jump right now points to a
1694 three-byte space after `a'. We'll put in the jump, set
1695 fixup_alt_jump to right after `b', and leave behind three
1696 bytes which we'll fill in when we get to after `c'. */
1697
1698 if (fixup_alt_jump)
1699 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1700
1701 /* Mark and leave space for a jump after this alternative,
1702 to be filled in later either by next alternative or
1703 when know we're at the end of a series of alternatives. */
1704 fixup_alt_jump = b;
1705 GET_BUFFER_SPACE (3);
1706 b += 3;
1707
1708 laststart = 0;
1709 begalt = b;
1710 break;
1711
1712
1713 case '{':
1714 /* If \{ is a literal. */
1715 if (!(syntax & RE_INTERVALS)
1716 /* If we're at `\{' and it's not the open-interval
1717 operator. */
1718 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1719 || (p - 2 == pattern && p == pend))
1720 goto normal_backslash;
1721
1722 handle_interval:
1723 {
1724 /* If got here, then the syntax allows intervals. */
1725
1726 /* At least (most) this many matches must be made. */
1727 int lower_bound = -1, upper_bound = -1;
1728
1729 beg_interval = p - 1;
1730
1731 if (p == pend)
1732 {
1733 if (syntax & RE_NO_BK_BRACES)
1734 goto unfetch_interval;
1735 else
1736 return REG_EBRACE;
1737 }
1738
1739 GET_UNSIGNED_NUMBER (lower_bound);
1740
1741 if (c == ',')
1742 {
1743 GET_UNSIGNED_NUMBER (upper_bound);
1744 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1745 }
1746 else
1747 /* Interval such as `{1}' => match exactly once. */
1748 upper_bound = lower_bound;
1749
1750 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1751 || lower_bound > upper_bound)
1752 {
1753 if (syntax & RE_NO_BK_BRACES)
1754 goto unfetch_interval;
1755 else
1756 return REG_BADBR;
1757 }
1758
1759 if (!(syntax & RE_NO_BK_BRACES))
1760 {
1761 if (c != '\\') return REG_EBRACE;
1762
1763 PATFETCH (c);
1764 }
1765
1766 if (c != '}')
1767 {
1768 if (syntax & RE_NO_BK_BRACES)
1769 goto unfetch_interval;
1770 else
1771 return REG_BADBR;
1772 }
1773
1774 /* We just parsed a valid interval. */
1775
1776 /* If it's invalid to have no preceding re. */
1777 if (!laststart)
1778 {
1779 if (syntax & RE_CONTEXT_INVALID_OPS)
1780 return REG_BADRPT;
1781 else if (syntax & RE_CONTEXT_INDEP_OPS)
1782 laststart = b;
1783 else
1784 goto unfetch_interval;
1785 }
1786
1787 /* If the upper bound is zero, don't want to succeed at
1788 all; jump from `laststart' to `b + 3', which will be
1789 the end of the buffer after we insert the jump. */
1790 if (upper_bound == 0)
1791 {
1792 GET_BUFFER_SPACE (3);
1793 INSERT_JUMP (jump, laststart, b + 3);
1794 b += 3;
1795 }
1796
1797 /* Otherwise, we have a nontrivial interval. When
1798 we're all done, the pattern will look like:
1799 set_number_at <jump count> <upper bound>
1800 set_number_at <succeed_n count> <lower bound>
1801 succeed_n <after jump addr> <succed_n count>
1802 <body of loop>
1803 jump_n <succeed_n addr> <jump count>
1804 (The upper bound and `jump_n' are omitted if
1805 `upper_bound' is 1, though.) */
1806 else
1807 { /* If the upper bound is > 1, we need to insert
1808 more at the end of the loop. */
1809 unsigned nbytes = 10 + (upper_bound > 1) * 10;
1810
1811 GET_BUFFER_SPACE (nbytes);
1812
1813 /* Initialize lower bound of the `succeed_n', even
1814 though it will be set during matching by its
1815 attendant `set_number_at' (inserted next),
1816 because `re_compile_fastmap' needs to know.
1817 Jump to the `jump_n' we might insert below. */
1818 INSERT_JUMP2 (succeed_n, laststart,
1819 b + 5 + (upper_bound > 1) * 5,
1820 lower_bound);
1821 b += 5;
1822
1823 /* Code to initialize the lower bound. Insert
1824 before the `succeed_n'. The `5' is the last two
1825 bytes of this `set_number_at', plus 3 bytes of
1826 the following `succeed_n'. */
1827 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1828 b += 5;
1829
1830 if (upper_bound > 1)
1831 { /* More than one repetition is allowed, so
1832 append a backward jump to the `succeed_n'
1833 that starts this interval.
1834
1835 When we've reached this during matching,
1836 we'll have matched the interval once, so
1837 jump back only `upper_bound - 1' times. */
1838 STORE_JUMP2 (jump_n, b, laststart + 5,
1839 upper_bound - 1);
1840 b += 5;
1841
1842 /* The location we want to set is the second
1843 parameter of the `jump_n'; that is `b-2' as
1844 an absolute address. `laststart' will be
1845 the `set_number_at' we're about to insert;
1846 `laststart+3' the number to set, the source
1847 for the relative address. But we are
1848 inserting into the middle of the pattern --
1849 so everything is getting moved up by 5.
1850 Conclusion: (b - 2) - (laststart + 3) + 5,
1851 i.e., b - laststart.
1852
1853 We insert this at the beginning of the loop
1854 so that if we fail during matching, we'll
1855 reinitialize the bounds. */
1856 insert_op2 (set_number_at, laststart, b - laststart,
1857 upper_bound - 1, b);
1858 b += 5;
1859 }
1860 }
1861 pending_exact = 0;
1862 beg_interval = NULL;
1863 }
1864 break;
1865
1866 unfetch_interval:
1867 /* If an invalid interval, match the characters as literals. */
1868 assert (beg_interval);
1869 p = beg_interval;
1870 beg_interval = NULL;
1871
1872 /* normal_char and normal_backslash need `c'. */
1873 PATFETCH (c);
1874
1875 if (!(syntax & RE_NO_BK_BRACES))
1876 {
1877 if (p > pattern && p[-1] == '\\')
1878 goto normal_backslash;
1879 }
1880 goto normal_char;
1881
1882#ifdef emacs
1883 /* There is no way to specify the before_dot and after_dot
1884 operators. rms says this is ok. --karl */
1885 case '=':
1886 BUF_PUSH (at_dot);
1887 break;
1888
1889 case 's':
1890 laststart = b;
1891 PATFETCH (c);
1892 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1893 break;
1894
1895 case 'S':
1896 laststart = b;
1897 PATFETCH (c);
1898 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1899 break;
1900#endif /* emacs */
1901
1902
1903 case 'w':
1904 laststart = b;
1905 BUF_PUSH (wordchar);
1906 break;
1907
1908
1909 case 'W':
1910 laststart = b;
1911 BUF_PUSH (notwordchar);
1912 break;
1913
1914
1915 case '<':
1916 BUF_PUSH (wordbeg);
1917 break;
1918
1919 case '>':
1920 BUF_PUSH (wordend);
1921 break;
1922
1923 case 'b':
1924 BUF_PUSH (wordbound);
1925 break;
1926
1927 case 'B':
1928 BUF_PUSH (notwordbound);
1929 break;
1930
1931 case '`':
1932 BUF_PUSH (begbuf);
1933 break;
1934
1935 case '\'':
1936 BUF_PUSH (endbuf);
1937 break;
1938
1939 case '1': case '2': case '3': case '4': case '5':
1940 case '6': case '7': case '8': case '9':
1941 if (syntax & RE_NO_BK_REFS)
1942 goto normal_char;
1943
1944 c1 = c - '0';
1945
1946 if (c1 > regnum)
1947 return REG_ESUBREG;
1948
1949 /* Can't back reference to a subexpression if inside of it. */
1950 if (group_in_compile_stack (compile_stack, c1))
1951 goto normal_char;
1952
1953 laststart = b;
1954 BUF_PUSH_2 (duplicate, c1);
1955 break;
1956
1957
1958 case '+':
1959 case '?':
1960 if (syntax & RE_BK_PLUS_QM)
1961 goto handle_plus;
1962 else
1963 goto normal_backslash;
1964
1965 default:
1966 normal_backslash:
1967 /* You might think it would be useful for \ to mean
1968 not to translate; but if we don't translate it
1969 it will never match anything. */
1970 c = TRANSLATE (c);
1971 goto normal_char;
1972 }
1973 break;
1974
1975
1976 default:
1977 /* Expects the character in `c'. */
1978 normal_char:
1979 /* If no exactn currently being built. */
1980 if (!pending_exact
1981
1982 /* If last exactn not at current position. */
1983 || pending_exact + *pending_exact + 1 != b
1984
1985 /* We have only one byte following the exactn for the count. */
1986 || *pending_exact == (1 << BYTEWIDTH) - 1
1987
1988 /* If followed by a repetition operator. */
1989 || *p == '*' || *p == '^'
1990 || ((syntax & RE_BK_PLUS_QM)
1991 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
1992 : (*p == '+' || *p == '?'))
1993 || ((syntax & RE_INTERVALS)
1994 && ((syntax & RE_NO_BK_BRACES)
1995 ? *p == '{'
1996 : (p[0] == '\\' && p[1] == '{'))))
1997 {
1998 /* Start building a new exactn. */
1999
2000 laststart = b;
2001
2002 BUF_PUSH_2 (exactn, 0);
2003 pending_exact = b - 1;
2004 }
2005
2006 BUF_PUSH (c);
2007 (*pending_exact)++;
2008 break;
2009 } /* switch (c) */
2010 } /* while p != pend */
2011
2012
2013 /* Through the pattern now. */
2014
2015 if (fixup_alt_jump)
2016 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2017
2018 if (!COMPILE_STACK_EMPTY)
2019 return REG_EPAREN;
2020
2021 free (compile_stack.stack);
2022
2023 /* We have succeeded; set the length of the buffer. */
2024 bufp->used = b - bufp->buffer;
2025
2026#ifdef DEBUG
2027 if (debug)
2028 {
2029 DEBUG_PRINT1 ("\nCompiled pattern: ");
2030 print_compiled_pattern (bufp);
2031 }
2032#endif /* DEBUG */
2033
2034 return REG_NOERROR;
2035} /* regex_compile */
2036
2037/* Subroutines for `regex_compile'. */
2038
2039/* Store OP at LOC followed by two-byte integer parameter ARG. */
2040
2041static void
2042store_op1 (op, loc, arg)
2043 re_opcode_t op;
2044 unsigned char *loc;
2045 int arg;
2046{
2047 *loc = (unsigned char) op;
2048 STORE_NUMBER (loc + 1, arg);
2049}
2050
2051
2052/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
2053
2054static void
2055store_op2 (op, loc, arg1, arg2)
2056 re_opcode_t op;
2057 unsigned char *loc;
2058 int arg1, arg2;
2059{
2060 *loc = (unsigned char) op;
2061 STORE_NUMBER (loc + 1, arg1);
2062 STORE_NUMBER (loc + 3, arg2);
2063}
2064
2065
2066/* Copy the bytes from LOC to END to open up three bytes of space at LOC
2067 for OP followed by two-byte integer parameter ARG. */
2068
2069static void
2070insert_op1 (op, loc, arg, end)
2071 re_opcode_t op;
2072 unsigned char *loc;
2073 int arg;
2074 unsigned char *end;
2075{
2076 register unsigned char *pfrom = end;
2077 register unsigned char *pto = end + 3;
2078
2079 while (pfrom != loc)
2080 *--pto = *--pfrom;
2081
2082 store_op1 (op, loc, arg);
2083}
2084
2085
2086/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
2087
2088static void
2089insert_op2 (op, loc, arg1, arg2, end)
2090 re_opcode_t op;
2091 unsigned char *loc;
2092 int arg1, arg2;
2093 unsigned char *end;
2094{
2095 register unsigned char *pfrom = end;
2096 register unsigned char *pto = end + 5;
2097
2098 while (pfrom != loc)
2099 *--pto = *--pfrom;
2100
2101 store_op2 (op, loc, arg1, arg2);
2102}
2103
2104
2105/* P points to just after a ^ in PATTERN. Return true if that ^ comes
2106 after an alternative or a begin-subexpression. We assume there is at
2107 least one character before the ^. */
2108
2109static boolean
2110at_begline_loc_p (pattern, p, syntax)
2111 const char *pattern, *p;
2112 reg_syntax_t syntax;
2113{
2114 const char *prev = p - 2;
2115 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2116
2117 return
2118 /* After a subexpression? */
2119 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2120 /* After an alternative? */
2121 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2122}
2123
2124
2125/* The dual of at_begline_loc_p. This one is for $. We assume there is
2126 at least one character after the $, i.e., `P < PEND'. */
2127
2128static boolean
2129at_endline_loc_p (p, pend, syntax)
2130 const char *p, *pend;
2131 int syntax;
2132{
2133 const char *next = p;
2134 boolean next_backslash = *next == '\\';
2135 const char *next_next = p + 1 < pend ? p + 1 : NULL;
2136
2137 return
2138 /* Before a subexpression? */
2139 (syntax & RE_NO_BK_PARENS ? *next == ')'
2140 : next_backslash && next_next && *next_next == ')')
2141 /* Before an alternative? */
2142 || (syntax & RE_NO_BK_VBAR ? *next == '|'
2143 : next_backslash && next_next && *next_next == '|');
2144}
2145
2146
2147/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2148 false if it's not. */
2149
2150static boolean
2151group_in_compile_stack (compile_stack, regnum)
2152 compile_stack_type compile_stack;
2153 regnum_t regnum;
2154{
2155 int this_element;
2156
2157 for (this_element = compile_stack.avail - 1;
2158 this_element >= 0;
2159 this_element--)
2160 if (compile_stack.stack[this_element].regnum == regnum)
2161 return true;
2162
2163 return false;
2164}
2165
2166
2167/* Read the ending character of a range (in a bracket expression) from the
2168 uncompiled pattern *P_PTR (which ends at PEND). We assume the
2169 starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
2170 Then we set the translation of all bits between the starting and
2171 ending characters (inclusive) in the compiled pattern B.
2172
2173 Return an error code.
2174
2175 We use these short variable names so we can use the same macros as
2176 `regex_compile' itself. */
2177
2178static reg_errcode_t
2179compile_range (p_ptr, pend, translate, syntax, b)
2180 const char **p_ptr, *pend;
2181 char *translate;
2182 reg_syntax_t syntax;
2183 unsigned char *b;
2184{
2185 unsigned this_char;
2186
2187 const char *p = *p_ptr;
2188 int range_start, range_end;
2189
2190 if (p == pend)
2191 return REG_ERANGE;
2192
2193 /* Even though the pattern is a signed `char *', we need to fetch
2194 with unsigned char *'s; if the high bit of the pattern character
2195 is set, the range endpoints will be negative if we fetch using a
2196 signed char *.
2197
2198 We also want to fetch the endpoints without translating them; the
2199 appropriate translation is done in the bit-setting loop below. */
2200 range_start = ((unsigned char *) p)[-2];
2201 range_end = ((unsigned char *) p)[0];
2202
2203 /* Have to increment the pointer into the pattern string, so the
2204 caller isn't still at the ending character. */
2205 (*p_ptr)++;
2206
2207 /* If the start is after the end, the range is empty. */
2208 if (range_start > range_end)
2209 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2210
2211 /* Here we see why `this_char' has to be larger than an `unsigned
2212 char' -- the range is inclusive, so if `range_end' == 0xff
2213 (assuming 8-bit characters), we would otherwise go into an infinite
2214 loop, since all characters <= 0xff. */
2215 for (this_char = range_start; this_char <= range_end; this_char++)
2216 {
2217 SET_LIST_BIT (TRANSLATE (this_char));
2218 }
2219
2220 return REG_NOERROR;
2221}
2222
2223/* Failure stack declarations and macros; both re_compile_fastmap and
2224 re_match_2 use a failure stack. These have to be macros because of
2225 REGEX_ALLOCATE. */
2226
2227
2228/* Number of failure points for which to initially allocate space
2229 when matching. If this number is exceeded, we allocate more
2230 space, so it is not a hard limit. */
2231#ifndef INIT_FAILURE_ALLOC
2232#define INIT_FAILURE_ALLOC 5
2233#endif
2234
2235/* Roughly the maximum number of failure points on the stack. Would be
2236 exactly that if always used MAX_FAILURE_SPACE each time we failed.
2237 This is a variable only so users of regex can assign to it; we never
2238 change it ourselves. */
2239int re_max_failures = 2000;
2240
2241typedef const unsigned char *fail_stack_elt_t;
2242
2243typedef struct
2244{
2245 fail_stack_elt_t *stack;
2246 unsigned size;
2247 unsigned avail; /* Offset of next open position. */
2248} fail_stack_type;
2249
2250#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
2251#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
2252#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
2253#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
2254
2255
2256/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
2257
2258#define INIT_FAIL_STACK() \
2259 do { \
2260 fail_stack.stack = (fail_stack_elt_t *) \
2261 REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
2262 \
2263 if (fail_stack.stack == NULL) \
2264 return -2; \
2265 \
2266 fail_stack.size = INIT_FAILURE_ALLOC; \
2267 fail_stack.avail = 0; \
2268 } while (0)
2269
2270
2271/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2272
2273 Return 1 if succeeds, and 0 if either ran out of memory
2274 allocating space for it or it was already too large.
2275
2276 REGEX_REALLOCATE requires `destination' be declared. */
2277
2278#define DOUBLE_FAIL_STACK(fail_stack) \
2279 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
2280 ? 0 \
2281 : ((fail_stack).stack = (fail_stack_elt_t *) \
2282 REGEX_REALLOCATE ((fail_stack).stack, \
2283 (fail_stack).size * sizeof (fail_stack_elt_t), \
2284 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
2285 \
2286 (fail_stack).stack == NULL \
2287 ? 0 \
2288 : ((fail_stack).size <<= 1, \
2289 1)))
2290
2291
2292/* Push PATTERN_OP on FAIL_STACK.
2293
2294 Return 1 if was able to do so and 0 if ran out of memory allocating
2295 space to do so. */
2296#define PUSH_PATTERN_OP(pattern_op, fail_stack) \
2297 ((FAIL_STACK_FULL () \
2298 && !DOUBLE_FAIL_STACK (fail_stack)) \
2299 ? 0 \
2300 : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
2301 1))
2302
2303/* This pushes an item onto the failure stack. Must be a four-byte
2304 value. Assumes the variable `fail_stack'. Probably should only
2305 be called from within `PUSH_FAILURE_POINT'. */
2306#define PUSH_FAILURE_ITEM(item) \
2307 fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2308
2309/* The complement operation. Assumes `fail_stack' is nonempty. */
2310#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
2311
2312/* Used to omit pushing failure point id's when we're not debugging. */
2313#ifdef DEBUG
2314#define DEBUG_PUSH PUSH_FAILURE_ITEM
2315#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
2316#else
2317#define DEBUG_PUSH(item)
2318#define DEBUG_POP(item_addr)
2319#endif
2320
2321
2322/* Push the information about the state we will need
2323 if we ever fail back to it.
2324
2325 Requires variables fail_stack, regstart, regend, reg_info, and
2326 num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
2327 declared.
2328
2329 Does `return FAILURE_CODE' if runs out of memory. */
2330
2331#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
2332 do { \
2333 char *destination; \
2334 /* Must be int, so when we don't save any registers, the arithmetic \
2335 of 0 + -1 isn't done as unsigned. */ \
2336 int this_reg; \
2337 \
2338 DEBUG_STATEMENT (failure_id++); \
2339 DEBUG_STATEMENT (nfailure_points_pushed++); \
2340 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
2341 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
2342 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
2343 \
2344 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
2345 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
2346 \
2347 /* Ensure we have enough space allocated for what we will push. */ \
2348 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
2349 { \
2350 if (!DOUBLE_FAIL_STACK (fail_stack)) \
2351 return failure_code; \
2352 \
2353 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
2354 (fail_stack).size); \
2355 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2356 } \
2357 \
2358 /* Push the info, starting with the registers. */ \
2359 DEBUG_PRINT1 ("\n"); \
2360 \
2361 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
2362 this_reg++) \
2363 { \
2364 DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
2365 DEBUG_STATEMENT (num_regs_pushed++); \
2366 \
2367 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
2368 PUSH_FAILURE_ITEM (regstart[this_reg]); \
2369 \
2370 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
2371 PUSH_FAILURE_ITEM (regend[this_reg]); \
2372 \
2373 DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
2374 DEBUG_PRINT2 (" match_null=%d", \
2375 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
2376 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
2377 DEBUG_PRINT2 (" matched_something=%d", \
2378 MATCHED_SOMETHING (reg_info[this_reg])); \
2379 DEBUG_PRINT2 (" ever_matched=%d", \
2380 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
2381 DEBUG_PRINT1 ("\n"); \
2382 PUSH_FAILURE_ITEM (reg_info[this_reg].word); \
2383 } \
2384 \
2385 DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
2386 PUSH_FAILURE_ITEM (lowest_active_reg); \
2387 \
2388 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
2389 PUSH_FAILURE_ITEM (highest_active_reg); \
2390 \
2391 DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
2392 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
2393 PUSH_FAILURE_ITEM (pattern_place); \
2394 \
2395 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
2396 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
2397 size2); \
2398 DEBUG_PRINT1 ("'\n"); \
2399 PUSH_FAILURE_ITEM (string_place); \
2400 \
2401 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
2402 DEBUG_PUSH (failure_id); \
2403 } while (0)
2404
2405/* This is the number of items that are pushed and popped on the stack
2406 for each register. */
2407#define NUM_REG_ITEMS 3
2408
2409/* Individual items aside from the registers. */
2410#ifdef DEBUG
2411#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
2412#else
2413#define NUM_NONREG_ITEMS 4
2414#endif
2415
2416/* We push at most this many items on the stack. */
2417#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2418
2419/* We actually push this many items. */
2420#define NUM_FAILURE_ITEMS \
2421 ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
2422 + NUM_NONREG_ITEMS)
2423
2424/* How many items can still be added to the stack without overflowing it. */
2425#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2426
2427
2428/* Pops what PUSH_FAIL_STACK pushes.
2429
2430 We restore into the parameters, all of which should be lvalues:
2431 STR -- the saved data position.
2432 PAT -- the saved pattern position.
2433 LOW_REG, HIGH_REG -- the highest and lowest active registers.
2434 REGSTART, REGEND -- arrays of string positions.
2435 REG_INFO -- array of information about each subexpression.
2436
2437 Also assumes the variables `fail_stack' and (if debugging), `bufp',
2438 `pend', `string1', `size1', `string2', and `size2'. */
2439
2440#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
2441{ \
2442 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
2443 int this_reg; \
2444 const unsigned char *string_temp; \
2445 \
2446 assert (!FAIL_STACK_EMPTY ()); \
2447 \
2448 /* Remove failure points and point to how many regs pushed. */ \
2449 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
2450 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
2451 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
2452 \
2453 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
2454 \
2455 DEBUG_POP (&failure_id); \
2456 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
2457 \
2458 /* If the saved string location is NULL, it came from an \
2459 on_failure_keep_string_jump opcode, and we want to throw away the \
2460 saved NULL, thus retaining our current position in the string. */ \
2461 string_temp = POP_FAILURE_ITEM (); \
2462 if (string_temp != NULL) \
2463 str = (const char *) string_temp; \
2464 \
2465 DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
2466 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
2467 DEBUG_PRINT1 ("'\n"); \
2468 \
2469 pat = (unsigned char *) POP_FAILURE_ITEM (); \
2470 DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
2471 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
2472 \
2473 /* Restore register info. */ \
2474 high_reg = (unsigned) POP_FAILURE_ITEM (); \
2475 DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
2476 \
2477 low_reg = (unsigned) POP_FAILURE_ITEM (); \
2478 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
2479 \
2480 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
2481 { \
2482 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
2483 \
2484 reg_info[this_reg].word = POP_FAILURE_ITEM (); \
2485 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
2486 \
2487 regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \
2488 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
2489 \
2490 regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
2491 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
2492 } \
2493 \
2494 DEBUG_STATEMENT (nfailure_points_popped++); \
2495} /* POP_FAILURE_POINT */
2496
2497/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2498 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
2499 characters can start a string that matches the pattern. This fastmap
2500 is used by re_search to skip quickly over impossible starting points.
2501
2502 The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2503 area as BUFP->fastmap.
2504
2505 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2506 the pattern buffer.
2507
2508 Returns 0 if we succeed, -2 if an internal error. */
2509
2510int
2511re_compile_fastmap (bufp)
2512 struct re_pattern_buffer *bufp;
2513{
2514 int j, k;
2515 fail_stack_type fail_stack;
2516#ifndef REGEX_MALLOC
2517 char *destination;
2518#endif
2519 /* We don't push any register information onto the failure stack. */
2520 unsigned num_regs = 0;
2521
2522 register char *fastmap = bufp->fastmap;
2523 unsigned char *pattern = bufp->buffer;
2524 unsigned long size = bufp->used;
2525 const unsigned char *p = pattern;
2526 register unsigned char *pend = pattern + size;
2527
2528 /* Assume that each path through the pattern can be null until
2529 proven otherwise. We set this false at the bottom of switch
2530 statement, to which we get only if a particular path doesn't
2531 match the empty string. */
2532 boolean path_can_be_null = true;
2533
2534 /* We aren't doing a `succeed_n' to begin with. */
2535 boolean succeed_n_p = false;
2536
2537 assert (fastmap != NULL && p != NULL);
2538
2539 INIT_FAIL_STACK ();
2540 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
2541 bufp->fastmap_accurate = 1; /* It will be when we're done. */
2542 bufp->can_be_null = 0;
2543
2544 while (p != pend || !FAIL_STACK_EMPTY ())
2545 {
2546 if (p == pend)
2547 {
2548 bufp->can_be_null |= path_can_be_null;
2549
2550 /* Reset for next path. */
2551 path_can_be_null = true;
2552
2553 p = fail_stack.stack[--fail_stack.avail];
2554 }
2555
2556 /* We should never be about to go beyond the end of the pattern. */
2557 assert (p < pend);
2558
2559#ifdef SWITCH_ENUM_BUG
2560 switch ((int) ((re_opcode_t) *p++))
2561#else
2562 switch ((re_opcode_t) *p++)
2563#endif
2564 {
2565
2566 /* I guess the idea here is to simply not bother with a fastmap
2567 if a backreference is used, since it's too hard to figure out
2568 the fastmap for the corresponding group. Setting
2569 `can_be_null' stops `re_search_2' from using the fastmap, so
2570 that is all we do. */
2571 case duplicate:
2572 bufp->can_be_null = 1;
2573 return 0;
2574
2575
2576 /* Following are the cases which match a character. These end
2577 with `break'. */
2578
2579 case exactn:
2580 fastmap[p[1]] = 1;
2581 break;
2582
2583
2584 case charset:
2585 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2586 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2587 fastmap[j] = 1;
2588 break;
2589
2590
2591 case charset_not:
2592 /* Chars beyond end of map must be allowed. */
2593 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2594 fastmap[j] = 1;
2595
2596 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2597 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2598 fastmap[j] = 1;
2599 break;
2600
2601
2602 case wordchar:
2603 for (j = 0; j < (1 << BYTEWIDTH); j++)
2604 if (SYNTAX (j) == Sword)
2605 fastmap[j] = 1;
2606 break;
2607
2608
2609 case notwordchar:
2610 for (j = 0; j < (1 << BYTEWIDTH); j++)
2611 if (SYNTAX (j) != Sword)
2612 fastmap[j] = 1;
2613 break;
2614
2615
2616 case anychar:
2617 /* `.' matches anything ... */
2618 for (j = 0; j < (1 << BYTEWIDTH); j++)
2619 fastmap[j] = 1;
2620
2621 /* ... except perhaps newline. */
2622 if (!(bufp->syntax & RE_DOT_NEWLINE))
2623 fastmap['\n'] = 0;
2624
2625 /* Return if we have already set `can_be_null'; if we have,
2626 then the fastmap is irrelevant. Something's wrong here. */
2627 else if (bufp->can_be_null)
2628 return 0;
2629
2630 /* Otherwise, have to check alternative paths. */
2631 break;
2632
2633
2634#ifdef emacs
2635 case syntaxspec:
2636 k = *p++;
2637 for (j = 0; j < (1 << BYTEWIDTH); j++)
2638 if (SYNTAX (j) == (enum syntaxcode) k)
2639 fastmap[j] = 1;
2640 break;
2641
2642
2643 case notsyntaxspec:
2644 k = *p++;
2645 for (j = 0; j < (1 << BYTEWIDTH); j++)
2646 if (SYNTAX (j) != (enum syntaxcode) k)
2647 fastmap[j] = 1;
2648 break;
2649
2650
2651 /* All cases after this match the empty string. These end with
2652 `continue'. */
2653
2654
2655 case before_dot:
2656 case at_dot:
2657 case after_dot:
2658 continue;
2659#endif /* not emacs */
2660
2661
2662 case no_op:
2663 case begline:
2664 case endline:
2665 case begbuf:
2666 case endbuf:
2667 case wordbound:
2668 case notwordbound:
2669 case wordbeg:
2670 case wordend:
2671 case push_dummy_failure:
2672 continue;
2673
2674
2675 case jump_n:
2676 case pop_failure_jump:
2677 case maybe_pop_jump:
2678 case jump:
2679 case jump_past_alt:
2680 case dummy_failure_jump:
2681 EXTRACT_NUMBER_AND_INCR (j, p);
2682 p += j;
2683 if (j > 0)
2684 continue;
2685
2686 /* Jump backward implies we just went through the body of a
2687 loop and matched nothing. Opcode jumped to should be
2688 `on_failure_jump' or `succeed_n'. Just treat it like an
2689 ordinary jump. For a * loop, it has pushed its failure
2690 point already; if so, discard that as redundant. */
2691 if ((re_opcode_t) *p != on_failure_jump
2692 && (re_opcode_t) *p != succeed_n)
2693 continue;
2694
2695 p++;
2696 EXTRACT_NUMBER_AND_INCR (j, p);
2697 p += j;
2698
2699 /* If what's on the stack is where we are now, pop it. */
2700 if (!FAIL_STACK_EMPTY ()
2701 && fail_stack.stack[fail_stack.avail - 1] == p)
2702 fail_stack.avail--;
2703
2704 continue;
2705
2706
2707 case on_failure_jump:
2708 case on_failure_keep_string_jump:
2709 handle_on_failure_jump:
2710 EXTRACT_NUMBER_AND_INCR (j, p);
2711
2712 /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2713 end of the pattern. We don't want to push such a point,
2714 since when we restore it above, entering the switch will
2715 increment `p' past the end of the pattern. We don't need
2716 to push such a point since we obviously won't find any more
2717 fastmap entries beyond `pend'. Such a pattern can match
2718 the null string, though. */
2719 if (p + j < pend)
2720 {
2721 if (!PUSH_PATTERN_OP (p + j, fail_stack))
2722 return -2;
2723 }
2724 else
2725 bufp->can_be_null = 1;
2726
2727 if (succeed_n_p)
2728 {
2729 EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
2730 succeed_n_p = false;
2731 }
2732
2733 continue;
2734
2735
2736 case succeed_n:
2737 /* Get to the number of times to succeed. */
2738 p += 2;
2739
2740 /* Increment p past the n for when k != 0. */
2741 EXTRACT_NUMBER_AND_INCR (k, p);
2742 if (k == 0)
2743 {
2744 p -= 4;
2745 succeed_n_p = true; /* Spaghetti code alert. */
2746 goto handle_on_failure_jump;
2747 }
2748 continue;
2749
2750
2751 case set_number_at:
2752 p += 4;
2753 continue;
2754
2755
2756 case start_memory:
2757 case stop_memory:
2758 p += 2;
2759 continue;
2760
2761
2762 default:
2763 abort (); /* We have listed all the cases. */
2764 } /* switch *p++ */
2765
2766 /* Getting here means we have found the possible starting
2767 characters for one path of the pattern -- and that the empty
2768 string does not match. We need not follow this path further.
2769 Instead, look at the next alternative (remembered on the
2770 stack), or quit if no more. The test at the top of the loop
2771 does these things. */
2772 path_can_be_null = false;
2773 p = pend;
2774 } /* while p */
2775
2776 /* Set `can_be_null' for the last path (also the first path, if the
2777 pattern is empty). */
2778 bufp->can_be_null |= path_can_be_null;
2779 return 0;
2780} /* re_compile_fastmap */
2781
2782/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2783 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
2784 this memory for recording register information. STARTS and ENDS
2785 must be allocated using the malloc library routine, and must each
2786 be at least NUM_REGS * sizeof (regoff_t) bytes long.
2787
2788 If NUM_REGS == 0, then subsequent matches should allocate their own
2789 register data.
2790
2791 Unless this function is called, the first search or match using
2792 PATTERN_BUFFER will allocate its own register data, without
2793 freeing the old data. */
2794
2795void
2796re_set_registers (bufp, regs, num_regs, starts, ends)
2797 struct re_pattern_buffer *bufp;
2798 struct re_registers *regs;
2799 unsigned num_regs;
2800 regoff_t *starts, *ends;
2801{
2802 if (num_regs)
2803 {
2804 bufp->regs_allocated = REGS_REALLOCATE;
2805 regs->num_regs = num_regs;
2806 regs->start = starts;
2807 regs->end = ends;
2808 }
2809 else
2810 {
2811 bufp->regs_allocated = REGS_UNALLOCATED;
2812 regs->num_regs = 0;
2813 regs->start = regs->end = (regoff_t) 0;
2814 }
2815}
2816
2817/* Searching routines. */
2818
2819/* Like re_search_2, below, but only one string is specified, and
2820 doesn't let you say where to stop matching. */
2821
2822int
2823re_search (bufp, string, size, startpos, range, regs)
2824 struct re_pattern_buffer *bufp;
2825 const char *string;
2826 int size, startpos, range;
2827 struct re_registers *regs;
2828{
2829 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
2830 regs, size);
2831}
2832
2833
2834/* Using the compiled pattern in BUFP->buffer, first tries to match the
2835 virtual concatenation of STRING1 and STRING2, starting first at index
2836 STARTPOS, then at STARTPOS + 1, and so on.
2837
2838 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2839
2840 RANGE is how far to scan while trying to match. RANGE = 0 means try
2841 only at STARTPOS; in general, the last start tried is STARTPOS +
2842 RANGE.
2843
2844 In REGS, return the indices of the virtual concatenation of STRING1
2845 and STRING2 that matched the entire BUFP->buffer and its contained
2846 subexpressions.
2847
2848 Do not consider matching one past the index STOP in the virtual
2849 concatenation of STRING1 and STRING2.
2850
2851 We return either the position in the strings at which the match was
2852 found, -1 if no match, or -2 if error (such as failure
2853 stack overflow). */
2854
2855int
2856re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2857 struct re_pattern_buffer *bufp;
2858 const char *string1, *string2;
2859 int size1, size2;
2860 int startpos;
2861 int range;
2862 struct re_registers *regs;
2863 int stop;
2864{
2865 int val;
2866 register char *fastmap = bufp->fastmap;
2867 register char *translate = bufp->translate;
2868 int total_size = size1 + size2;
2869 int endpos = startpos + range;
2870
2871 /* Check for out-of-range STARTPOS. */
2872 if (startpos < 0 || startpos > total_size)
2873 return -1;
2874
2875 /* Fix up RANGE if it might eventually take us outside
2876 the virtual concatenation of STRING1 and STRING2. */
2877 if (endpos < -1)
2878 range = -1 - startpos;
2879 else if (endpos > total_size)
2880 range = total_size - startpos;
2881
2882 /* If the search isn't to be a backwards one, don't waste time in a
2883 search for a pattern that must be anchored. */
2884 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2885 {
2886 if (startpos > 0)
2887 return -1;
2888 else
2889 range = 1;
2890 }
2891
2892 /* Update the fastmap now if not correct already. */
2893 if (fastmap && !bufp->fastmap_accurate)
2894 if (re_compile_fastmap (bufp) == -2)
2895 return -2;
2896
2897 /* Loop through the string, looking for a place to start matching. */
2898 for (;;)
2899 {
2900 /* If a fastmap is supplied, skip quickly over characters that
2901 cannot be the start of a match. If the pattern can match the
2902 null string, however, we don't need to skip characters; we want
2903 the first null string. */
2904 if (fastmap && startpos < total_size && !bufp->can_be_null)
2905 {
2906 if (range > 0) /* Searching forwards. */
2907 {
2908 register const char *d;
2909 register int lim = 0;
2910 int irange = range;
2911
2912 if (startpos < size1 && startpos + range >= size1)
2913 lim = range - (size1 - startpos);
2914
2915 d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2916
2917 /* Written out as an if-else to avoid testing `translate'
2918 inside the loop. */
2919 if (translate)
2920 while (range > lim
2921 && !fastmap[(unsigned char)
2922 translate[(unsigned char) *d++]])
2923 range--;
2924 else
2925 while (range > lim && !fastmap[(unsigned char) *d++])
2926 range--;
2927
2928 startpos += irange - range;
2929 }
2930 else /* Searching backwards. */
2931 {
2932 register char c = (size1 == 0 || startpos >= size1
2933 ? string2[startpos - size1]
2934 : string1[startpos]);
2935
2936 if (!fastmap[(unsigned char) TRANSLATE (c)])
2937 goto advance;
2938 }
2939 }
2940
2941 /* If can't match the null string, and that's all we have left, fail. */
2942 if (range >= 0 && startpos == total_size && fastmap
2943 && !bufp->can_be_null)
2944 return -1;
2945
2946 val = re_match_2 (bufp, string1, size1, string2, size2,
2947 startpos, regs, stop);
2948 if (val >= 0)
2949 return startpos;
2950
2951 if (val == -2)
2952 return -2;
2953
2954 advance:
2955 if (!range)
2956 break;
2957 else if (range > 0)
2958 {
2959 range--;
2960 startpos++;
2961 }
2962 else
2963 {
2964 range++;
2965 startpos--;
2966 }
2967 }
2968 return -1;
2969} /* re_search_2 */
2970
2971/* Declarations and macros for re_match_2. */
2972
2973static int bcmp_translate ();
2974static boolean alt_match_null_string_p (),
2975 common_op_match_null_string_p (),
2976 group_match_null_string_p ();
2977
2978/* Structure for per-register (a.k.a. per-group) information.
2979 This must not be longer than one word, because we push this value
2980 onto the failure stack. Other register information, such as the
2981 starting and ending positions (which are addresses), and the list of
2982 inner groups (which is a bits list) are maintained in separate
2983 variables.
2984
2985 We are making a (strictly speaking) nonportable assumption here: that
2986 the compiler will pack our bit fields into something that fits into
2987 the type of `word', i.e., is something that fits into one item on the
2988 failure stack. */
2989typedef union
2990{
2991 fail_stack_elt_t word;
2992 struct
2993 {
2994 /* This field is one if this group can match the empty string,
2995 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
2996#define MATCH_NULL_UNSET_VALUE 3
2997 unsigned match_null_string_p : 2;
2998 unsigned is_active : 1;
2999 unsigned matched_something : 1;
3000 unsigned ever_matched_something : 1;
3001 } bits;
3002} register_info_type;
3003
3004#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
3005#define IS_ACTIVE(R) ((R).bits.is_active)
3006#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
3007#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
3008
3009
3010/* Call this when have matched a real character; it sets `matched' flags
3011 for the subexpressions which we are currently inside. Also records
3012 that those subexprs have matched. */
3013#define SET_REGS_MATCHED() \
3014 do \
3015 { \
3016 unsigned r; \
3017 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
3018 { \
3019 MATCHED_SOMETHING (reg_info[r]) \
3020 = EVER_MATCHED_SOMETHING (reg_info[r]) \
3021 = 1; \
3022 } \
3023 } \
3024 while (0)
3025
3026
3027/* This converts PTR, a pointer into one of the search strings `string1'
3028 and `string2' into an offset from the beginning of that string. */
3029#define POINTER_TO_OFFSET(ptr) \
3030 (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3031
3032/* Registers are set to a sentinel when they haven't yet matched. */
3033#define REG_UNSET_VALUE ((char *) -1)
3034#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3035
3036
3037/* Macros for dealing with the split strings in re_match_2. */
3038
3039#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
3040
3041/* Call before fetching a character with *d. This switches over to
3042 string2 if necessary. */
3043#define PREFETCH() \
3044 while (d == dend) \
3045 { \
3046 /* End of string2 => fail. */ \
3047 if (dend == end_match_2) \
3048 goto fail; \
3049 /* End of string1 => advance to string2. */ \
3050 d = string2; \
3051 dend = end_match_2; \
3052 }
3053
3054
3055/* Test if at very beginning or at very end of the virtual concatenation
3056 of `string1' and `string2'. If only one string, it's `string2'. */
3057#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3058#define AT_STRINGS_END(d) ((d) == end2)
3059
3060
3061/* Test if D points to a character which is word-constituent. We have
3062 two special cases to check for: if past the end of string1, look at
3063 the first character in string2; and if before the beginning of
3064 string2, look at the last character in string1. */
3065#define WORDCHAR_P(d) \
3066 (SYNTAX ((d) == end1 ? *string2 \
3067 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
3068 == Sword)
3069
3070/* Test if the character before D and the one at D differ with respect
3071 to being word-constituent. */
3072#define AT_WORD_BOUNDARY(d) \
3073 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
3074 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3075
3076
3077/* Free everything we malloc. */
3078#ifdef REGEX_MALLOC
3079#define FREE_VAR(var) if (var) free (var); var = NULL
3080#define FREE_VARIABLES() \
3081 do { \
3082 FREE_VAR (fail_stack.stack); \
3083 FREE_VAR (regstart); \
3084 FREE_VAR (regend); \
3085 FREE_VAR (old_regstart); \
3086 FREE_VAR (old_regend); \
3087 FREE_VAR (best_regstart); \
3088 FREE_VAR (best_regend); \
3089 FREE_VAR (reg_info); \
3090 FREE_VAR (reg_dummy); \
3091 FREE_VAR (reg_info_dummy); \
3092 } while (0)
3093#else /* not REGEX_MALLOC */
3094/* Some MIPS systems (at least) want this to free alloca'd storage. */
3095#define FREE_VARIABLES() alloca (0)
3096#endif /* not REGEX_MALLOC */
3097
3098
3099/* These values must meet several constraints. They must not be valid
3100 register values; since we have a limit of 255 registers (because
3101 we use only one byte in the pattern for the register number), we can
3102 use numbers larger than 255. They must differ by 1, because of
3103 NUM_FAILURE_ITEMS above. And the value for the lowest register must
3104 be larger than the value for the highest register, so we do not try
3105 to actually save any registers when none are active. */
3106#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3107#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3108
3109/* Matching routines. */
3110
3111#ifndef emacs /* Emacs never uses this. */
3112/* re_match is like re_match_2 except it takes only a single string. */
3113
3114int
3115re_match (bufp, string, size, pos, regs)
3116 struct re_pattern_buffer *bufp;
3117 const char *string;
3118 int size, pos;
3119 struct re_registers *regs;
3120 {
3121 return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
3122}
3123#endif /* not emacs */
3124
3125
3126/* re_match_2 matches the compiled pattern in BUFP against the
3127 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3128 and SIZE2, respectively). We start matching at POS, and stop
3129 matching at STOP.
3130
3131 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3132 store offsets for the substring each group matched in REGS. See the
3133 documentation for exactly how many groups we fill.
3134
3135 We return -1 if no match, -2 if an internal error (such as the
3136 failure stack overflowing). Otherwise, we return the length of the
3137 matched substring. */
3138
3139int
3140re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3141 struct re_pattern_buffer *bufp;
3142 const char *string1, *string2;
3143 int size1, size2;
3144 int pos;
3145 struct re_registers *regs;
3146 int stop;
3147{
3148 /* General temporaries. */
3149 int mcnt;
3150 unsigned char *p1;
3151
3152 /* Just past the end of the corresponding string. */
3153 const char *end1, *end2;
3154
3155 /* Pointers into string1 and string2, just past the last characters in
3156 each to consider matching. */
3157 const char *end_match_1, *end_match_2;
3158
3159 /* Where we are in the data, and the end of the current string. */
3160 const char *d, *dend;
3161
3162 /* Where we are in the pattern, and the end of the pattern. */
3163 unsigned char *p = bufp->buffer;
3164 register unsigned char *pend = p + bufp->used;
3165
3166 /* We use this to map every character in the string. */
3167 char *translate = bufp->translate;
3168
3169 /* Failure point stack. Each place that can handle a failure further
3170 down the line pushes a failure point on this stack. It consists of
3171 restart, regend, and reg_info for all registers corresponding to
3172 the subexpressions we're currently inside, plus the number of such
3173 registers, and, finally, two char *'s. The first char * is where
3174 to resume scanning the pattern; the second one is where to resume
3175 scanning the strings. If the latter is zero, the failure point is
3176 a ``dummy''; if a failure happens and the failure point is a dummy,
3177 it gets discarded and the next next one is tried. */
3178 fail_stack_type fail_stack;
3179#ifdef DEBUG
3180 static unsigned failure_id = 0;
3181 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3182#endif
3183
3184 /* We fill all the registers internally, independent of what we
3185 return, for use in backreferences. The number here includes
3186 an element for register zero. */
3187 unsigned num_regs = bufp->re_nsub + 1;
3188
3189 /* The currently active registers. */
3190 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3191 unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3192
3193 /* Information on the contents of registers. These are pointers into
3194 the input strings; they record just what was matched (on this
3195 attempt) by a subexpression part of the pattern, that is, the
3196 regnum-th regstart pointer points to where in the pattern we began
3197 matching and the regnum-th regend points to right after where we
3198 stopped matching the regnum-th subexpression. (The zeroth register
3199 keeps track of what the whole pattern matches.) */
3200 const char **regstart = NULL, **regend = NULL;
3201
3202 /* If a group that's operated upon by a repetition operator fails to
3203 match anything, then the register for its start will need to be
3204 restored because it will have been set to wherever in the string we
3205 are when we last see its open-group operator. Similarly for a
3206 register's end. */
3207 const char **old_regstart = NULL, **old_regend = NULL;
3208
3209 /* The is_active field of reg_info helps us keep track of which (possibly
3210 nested) subexpressions we are currently in. The matched_something
3211 field of reg_info[reg_num] helps us tell whether or not we have
3212 matched any of the pattern so far this time through the reg_num-th
3213 subexpression. These two fields get reset each time through any
3214 loop their register is in. */
3215 register_info_type *reg_info = NULL;
3216
3217 /* The following record the register info as found in the above
3218 variables when we find a match better than any we've seen before.
3219 This happens as we backtrack through the failure points, which in
3220 turn happens only if we have not yet matched the entire string. */
3221 unsigned best_regs_set = false;
3222 const char **best_regstart = NULL, **best_regend = NULL;
3223
3224 /* Logically, this is `best_regend[0]'. But we don't want to have to
3225 allocate space for that if we're not allocating space for anything
3226 else (see below). Also, we never need info about register 0 for
3227 any of the other register vectors, and it seems rather a kludge to
3228 treat `best_regend' differently than the rest. So we keep track of
3229 the end of the best match so far in a separate variable. We
3230 initialize this to NULL so that when we backtrack the first time
3231 and need to test it, it's not garbage. */
3232 const char *match_end = NULL;
3233
3234 /* Used when we pop values we don't care about. */
3235 const char **reg_dummy = NULL;
3236 register_info_type *reg_info_dummy = NULL;
3237
3238#ifdef DEBUG
3239 /* Counts the total number of registers pushed. */
3240 unsigned num_regs_pushed = 0;
3241#endif
3242
3243 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3244
3245 INIT_FAIL_STACK ();
3246
3247 /* Do not bother to initialize all the register variables if there are
3248 no groups in the pattern, as it takes a fair amount of time. If
3249 there are groups, we include space for register 0 (the whole
3250 pattern), even though we never use it, since it simplifies the
3251 array indexing. We should fix this. */
3252 if (bufp->re_nsub)
3253 {
3254 regstart = REGEX_TALLOC (num_regs, const char *);
3255 regend = REGEX_TALLOC (num_regs, const char *);
3256 old_regstart = REGEX_TALLOC (num_regs, const char *);
3257 old_regend = REGEX_TALLOC (num_regs, const char *);
3258 best_regstart = REGEX_TALLOC (num_regs, const char *);
3259 best_regend = REGEX_TALLOC (num_regs, const char *);
3260 reg_info = REGEX_TALLOC (num_regs, register_info_type);
3261 reg_dummy = REGEX_TALLOC (num_regs, const char *);
3262 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3263
3264 if (!(regstart && regend && old_regstart && old_regend && reg_info
3265 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3266 {
3267 FREE_VARIABLES ();
3268 return -2;
3269 }
3270 }
3271#ifdef REGEX_MALLOC
3272 else
3273 {
3274 /* We must initialize all our variables to NULL, so that
3275 `FREE_VARIABLES' doesn't try to free them. */
3276 regstart = regend = old_regstart = old_regend = best_regstart
3277 = best_regend = reg_dummy = NULL;
3278 reg_info = reg_info_dummy = (register_info_type *) NULL;
3279 }
3280#endif /* REGEX_MALLOC */
3281
3282 /* The starting position is bogus. */
3283 if (pos < 0 || pos > size1 + size2)
3284 {
3285 FREE_VARIABLES ();
3286 return -1;
3287 }
3288
3289 /* Initialize subexpression text positions to -1 to mark ones that no
3290 start_memory/stop_memory has been seen for. Also initialize the
3291 register information struct. */
3292 for (mcnt = 1; mcnt < num_regs; mcnt++)
3293 {
3294 regstart[mcnt] = regend[mcnt]
3295 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3296
3297 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3298 IS_ACTIVE (reg_info[mcnt]) = 0;
3299 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3300 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3301 }
3302
3303 /* We move `string1' into `string2' if the latter's empty -- but not if
3304 `string1' is null. */
3305 if (size2 == 0 && string1 != NULL)
3306 {
3307 string2 = string1;
3308 size2 = size1;
3309 string1 = 0;
3310 size1 = 0;
3311 }
3312 end1 = string1 + size1;
3313 end2 = string2 + size2;
3314
3315 /* Compute where to stop matching, within the two strings. */
3316 if (stop <= size1)
3317 {
3318 end_match_1 = string1 + stop;
3319 end_match_2 = string2;
3320 }
3321 else
3322 {
3323 end_match_1 = end1;
3324 end_match_2 = string2 + stop - size1;
3325 }
3326
3327 /* `p' scans through the pattern as `d' scans through the data.
3328 `dend' is the end of the input string that `d' points within. `d'
3329 is advanced into the following input string whenever necessary, but
3330 this happens before fetching; therefore, at the beginning of the
3331 loop, `d' can be pointing at the end of a string, but it cannot
3332 equal `string2'. */
3333 if (size1 > 0 && pos <= size1)
3334 {
3335 d = string1 + pos;
3336 dend = end_match_1;
3337 }
3338 else
3339 {
3340 d = string2 + pos - size1;
3341 dend = end_match_2;
3342 }
3343
3344 DEBUG_PRINT1 ("The compiled pattern is: ");
3345 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3346 DEBUG_PRINT1 ("The string to match is: `");
3347 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3348 DEBUG_PRINT1 ("'\n");
3349
3350 /* This loops over pattern commands. It exits by returning from the
3351 function if the match is complete, or it drops through if the match
3352 fails at this starting point in the input data. */
3353 for (;;)
3354 {
3355 DEBUG_PRINT2 ("\n0x%x: ", p);
3356
3357 if (p == pend)
3358 { /* End of pattern means we might have succeeded. */
3359 DEBUG_PRINT1 ("end of pattern ... ");
3360
3361 /* If we haven't matched the entire string, and we want the
3362 longest match, try backtracking. */
3363 if (d != end_match_2)
3364 {
3365 DEBUG_PRINT1 ("backtracking.\n");
3366
3367 if (!FAIL_STACK_EMPTY ())
3368 { /* More failure points to try. */
3369 boolean same_str_p = (FIRST_STRING_P (match_end)
3370 == MATCHING_IN_FIRST_STRING);
3371
3372 /* If exceeds best match so far, save it. */
3373 if (!best_regs_set
3374 || (same_str_p && d > match_end)
3375 || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3376 {
3377 best_regs_set = true;
3378 match_end = d;
3379
3380 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3381
3382 for (mcnt = 1; mcnt < num_regs; mcnt++)
3383 {
3384 best_regstart[mcnt] = regstart[mcnt];
3385 best_regend[mcnt] = regend[mcnt];
3386 }
3387 }
3388 goto fail;
3389 }
3390
3391 /* If no failure points, don't restore garbage. */
3392 else if (best_regs_set)
3393 {
3394 restore_best_regs:
3395 /* Restore best match. It may happen that `dend ==
3396 end_match_1' while the restored d is in string2.
3397 For example, the pattern `x.*y.*z' against the
3398 strings `x-' and `y-z-', if the two strings are
3399 not consecutive in memory. */
3400 DEBUG_PRINT1 ("Restoring best registers.\n");
3401
3402 d = match_end;
3403 dend = ((d >= string1 && d <= end1)
3404 ? end_match_1 : end_match_2);
3405
3406 for (mcnt = 1; mcnt < num_regs; mcnt++)
3407 {
3408 regstart[mcnt] = best_regstart[mcnt];
3409 regend[mcnt] = best_regend[mcnt];
3410 }
3411 }
3412 } /* d != end_match_2 */
3413
3414 DEBUG_PRINT1 ("Accepting match.\n");
3415
3416 /* If caller wants register contents data back, do it. */
3417 if (regs && !bufp->no_sub)
3418 {
3419 /* Have the register data arrays been allocated? */
3420 if (bufp->regs_allocated == REGS_UNALLOCATED)
3421 { /* No. So allocate them with malloc. We need one
3422 extra element beyond `num_regs' for the `-1' marker
3423 GNU code uses. */
3424 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3425 regs->start = TALLOC (regs->num_regs, regoff_t);
3426 regs->end = TALLOC (regs->num_regs, regoff_t);
3427 if (regs->start == NULL || regs->end == NULL)
3428 return -2;
3429 bufp->regs_allocated = REGS_REALLOCATE;
3430 }
3431 else if (bufp->regs_allocated == REGS_REALLOCATE)
3432 { /* Yes. If we need more elements than were already
3433 allocated, reallocate them. If we need fewer, just
3434 leave it alone. */
3435 if (regs->num_regs < num_regs + 1)
3436 {
3437 regs->num_regs = num_regs + 1;
3438 RETALLOC (regs->start, regs->num_regs, regoff_t);
3439 RETALLOC (regs->end, regs->num_regs, regoff_t);
3440 if (regs->start == NULL || regs->end == NULL)
3441 return -2;
3442 }
3443 }
3444 else
3445 assert (bufp->regs_allocated == REGS_FIXED);
3446
3447 /* Convert the pointer data in `regstart' and `regend' to
3448 indices. Register zero has to be set differently,
3449 since we haven't kept track of any info for it. */
3450 if (regs->num_regs > 0)
3451 {
3452 regs->start[0] = pos;
3453 regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3454 : d - string2 + size1);
3455 }
3456
3457 /* Go through the first `min (num_regs, regs->num_regs)'
3458 registers, since that is all we initialized. */
3459 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3460 {
3461 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3462 regs->start[mcnt] = regs->end[mcnt] = -1;
3463 else
3464 {
3465 regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
3466 regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
3467 }
3468 }
3469
3470 /* If the regs structure we return has more elements than
3471 were in the pattern, set the extra elements to -1. If
3472 we (re)allocated the registers, this is the case,
3473 because we always allocate enough to have at least one
3474 -1 at the end. */
3475 for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3476 regs->start[mcnt] = regs->end[mcnt] = -1;
3477 } /* regs && !bufp->no_sub */
3478
3479 FREE_VARIABLES ();
3480 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3481 nfailure_points_pushed, nfailure_points_popped,
3482 nfailure_points_pushed - nfailure_points_popped);
3483 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3484
3485 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3486 ? string1
3487 : string2 - size1);
3488
3489 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3490
3491 return mcnt;
3492 }
3493
3494 /* Otherwise match next pattern command. */
3495#ifdef SWITCH_ENUM_BUG
3496 switch ((int) ((re_opcode_t) *p++))
3497#else
3498 switch ((re_opcode_t) *p++)
3499#endif
3500 {
3501 /* Ignore these. Used to ignore the n of succeed_n's which
3502 currently have n == 0. */
3503 case no_op:
3504 DEBUG_PRINT1 ("EXECUTING no_op.\n");
3505 break;
3506
3507
3508 /* Match the next n pattern characters exactly. The following
3509 byte in the pattern defines n, and the n bytes after that
3510 are the characters to match. */
3511 case exactn:
3512 mcnt = *p++;
3513 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3514
3515 /* This is written out as an if-else so we don't waste time
3516 testing `translate' inside the loop. */
3517 if (translate)
3518 {
3519 do
3520 {
3521 PREFETCH ();
3522 if (translate[(unsigned char) *d++] != (char) *p++)
3523 goto fail;
3524 }
3525 while (--mcnt);
3526 }
3527 else
3528 {
3529 do
3530 {
3531 PREFETCH ();
3532 if (*d++ != (char) *p++) goto fail;
3533 }
3534 while (--mcnt);
3535 }
3536 SET_REGS_MATCHED ();
3537 break;
3538
3539
3540 /* Match any character except possibly a newline or a null. */
3541 case anychar:
3542 DEBUG_PRINT1 ("EXECUTING anychar.\n");
3543
3544 PREFETCH ();
3545
3546 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3547 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3548 goto fail;
3549
3550 SET_REGS_MATCHED ();
3551 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
3552 d++;
3553 break;
3554
3555
3556 case charset:
3557 case charset_not:
3558 {
3559 register unsigned char c;
3560 boolean not = (re_opcode_t) *(p - 1) == charset_not;
3561
3562 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3563
3564 PREFETCH ();
3565 c = TRANSLATE (*d); /* The character to match. */
3566
3567 /* Cast to `unsigned' instead of `unsigned char' in case the
3568 bit list is a full 32 bytes long. */
3569 if (c < (unsigned) (*p * BYTEWIDTH)
3570 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3571 not = !not;
3572
3573 p += 1 + *p;
3574
3575 if (!not) goto fail;
3576
3577 SET_REGS_MATCHED ();
3578 d++;
3579 break;
3580 }
3581
3582
3583 /* The beginning of a group is represented by start_memory.
3584 The arguments are the register number in the next byte, and the
3585 number of groups inner to this one in the next. The text
3586 matched within the group is recorded (in the internal
3587 registers data structure) under the register number. */
3588 case start_memory:
3589 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3590
3591 /* Find out if this group can match the empty string. */
3592 p1 = p; /* To send to group_match_null_string_p. */
3593
3594 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3595 REG_MATCH_NULL_STRING_P (reg_info[*p])
3596 = group_match_null_string_p (&p1, pend, reg_info);
3597
3598 /* Save the position in the string where we were the last time
3599 we were at this open-group operator in case the group is
3600 operated upon by a repetition operator, e.g., with `(a*)*b'
3601 against `ab'; then we want to ignore where we are now in
3602 the string in case this attempt to match fails. */
3603 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3604 ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3605 : regstart[*p];
3606 DEBUG_PRINT2 (" old_regstart: %d\n",
3607 POINTER_TO_OFFSET (old_regstart[*p]));
3608
3609 regstart[*p] = d;
3610 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3611
3612 IS_ACTIVE (reg_info[*p]) = 1;
3613 MATCHED_SOMETHING (reg_info[*p]) = 0;
3614
3615 /* This is the new highest active register. */
3616 highest_active_reg = *p;
3617
3618 /* If nothing was active before, this is the new lowest active
3619 register. */
3620 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3621 lowest_active_reg = *p;
3622
3623 /* Move past the register number and inner group count. */
3624 p += 2;
3625 break;
3626
3627
3628 /* The stop_memory opcode represents the end of a group. Its
3629 arguments are the same as start_memory's: the register
3630 number, and the number of inner groups. */
3631 case stop_memory:
3632 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3633
3634 /* We need to save the string position the last time we were at
3635 this close-group operator in case the group is operated
3636 upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3637 against `aba'; then we want to ignore where we are now in
3638 the string in case this attempt to match fails. */
3639 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3640 ? REG_UNSET (regend[*p]) ? d : regend[*p]
3641 : regend[*p];
3642 DEBUG_PRINT2 (" old_regend: %d\n",
3643 POINTER_TO_OFFSET (old_regend[*p]));
3644
3645 regend[*p] = d;
3646 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3647
3648 /* This register isn't active anymore. */
3649 IS_ACTIVE (reg_info[*p]) = 0;
3650
3651 /* If this was the only register active, nothing is active
3652 anymore. */
3653 if (lowest_active_reg == highest_active_reg)
3654 {
3655 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3656 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3657 }
3658 else
3659 { /* We must scan for the new highest active register, since
3660 it isn't necessarily one less than now: consider
3661 (a(b)c(d(e)f)g). When group 3 ends, after the f), the
3662 new highest active register is 1. */
3663 unsigned char r = *p - 1;
3664 while (r > 0 && !IS_ACTIVE (reg_info[r]))
3665 r--;
3666
3667 /* If we end up at register zero, that means that we saved
3668 the registers as the result of an `on_failure_jump', not
3669 a `start_memory', and we jumped to past the innermost
3670 `stop_memory'. For example, in ((.)*) we save
3671 registers 1 and 2 as a result of the *, but when we pop
3672 back to the second ), we are at the stop_memory 1.
3673 Thus, nothing is active. */
3674 if (r == 0)
3675 {
3676 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3677 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3678 }
3679 else
3680 highest_active_reg = r;
3681 }
3682
3683 /* If just failed to match something this time around with a
3684 group that's operated on by a repetition operator, try to
3685 force exit from the ``loop'', and restore the register
3686 information for this group that we had before trying this
3687 last match. */
3688 if ((!MATCHED_SOMETHING (reg_info[*p])
3689 || (re_opcode_t) p[-3] == start_memory)
3690 && (p + 2) < pend)
3691 {
3692 boolean is_a_jump_n = false;
3693
3694 p1 = p + 2;
3695 mcnt = 0;
3696 switch ((re_opcode_t) *p1++)
3697 {
3698 case jump_n:
3699 is_a_jump_n = true;
3700 case pop_failure_jump:
3701 case maybe_pop_jump:
3702 case jump:
3703 case dummy_failure_jump:
3704 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3705 if (is_a_jump_n)
3706 p1 += 2;
3707 break;
3708
3709 default:
3710 /* do nothing */ ;
3711 }
3712 p1 += mcnt;
3713
3714 /* If the next operation is a jump backwards in the pattern
3715 to an on_failure_jump right before the start_memory
3716 corresponding to this stop_memory, exit from the loop
3717 by forcing a failure after pushing on the stack the
3718 on_failure_jump's jump in the pattern, and d. */
3719 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3720 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3721 {
3722 /* If this group ever matched anything, then restore
3723 what its registers were before trying this last
3724 failed match, e.g., with `(a*)*b' against `ab' for
3725 regstart[1], and, e.g., with `((a*)*(b*)*)*'
3726 against `aba' for regend[3].
3727
3728 Also restore the registers for inner groups for,
3729 e.g., `((a*)(b*))*' against `aba' (register 3 would
3730 otherwise get trashed). */
3731
3732 if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3733 {
3734 unsigned r;
3735
3736 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3737
3738 /* Restore this and inner groups' (if any) registers. */
3739 for (r = *p; r < *p + *(p + 1); r++)
3740 {
3741 regstart[r] = old_regstart[r];
3742
3743 /* xx why this test? */
3744 if ((int) old_regend[r] >= (int) regstart[r])
3745 regend[r] = old_regend[r];
3746 }
3747 }
3748 p1++;
3749 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3750 PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3751
3752 goto fail;
3753 }
3754 }
3755
3756 /* Move past the register number and the inner group count. */
3757 p += 2;
3758 break;
3759
3760
3761 /* \<digit> has been turned into a `duplicate' command which is
3762 followed by the numeric value of <digit> as the register number. */
3763 case duplicate:
3764 {
3765 register const char *d2, *dend2;
3766 int regno = *p++; /* Get which register to match against. */
3767 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3768
3769 /* Can't back reference a group which we've never matched. */
3770 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3771 goto fail;
3772
3773 /* Where in input to try to start matching. */
3774 d2 = regstart[regno];
3775
3776 /* Where to stop matching; if both the place to start and
3777 the place to stop matching are in the same string, then
3778 set to the place to stop, otherwise, for now have to use
3779 the end of the first string. */
3780
3781 dend2 = ((FIRST_STRING_P (regstart[regno])
3782 == FIRST_STRING_P (regend[regno]))
3783 ? regend[regno] : end_match_1);
3784 for (;;)
3785 {
3786 /* If necessary, advance to next segment in register
3787 contents. */
3788 while (d2 == dend2)
3789 {
3790 if (dend2 == end_match_2) break;
3791 if (dend2 == regend[regno]) break;
3792
3793 /* End of string1 => advance to string2. */
3794 d2 = string2;
3795 dend2 = regend[regno];
3796 }
3797 /* At end of register contents => success */
3798 if (d2 == dend2) break;
3799
3800 /* If necessary, advance to next segment in data. */
3801 PREFETCH ();
3802
3803 /* How many characters left in this segment to match. */
3804 mcnt = dend - d;
3805
3806 /* Want how many consecutive characters we can match in
3807 one shot, so, if necessary, adjust the count. */
3808 if (mcnt > dend2 - d2)
3809 mcnt = dend2 - d2;
3810
3811 /* Compare that many; failure if mismatch, else move
3812 past them. */
3813 if (translate
3814 ? bcmp_translate (d, d2, mcnt, translate)
3815 : bcmp (d, d2, mcnt))
3816 goto fail;
3817 d += mcnt, d2 += mcnt;
3818 }
3819 }
3820 break;
3821
3822
3823 /* begline matches the empty string at the beginning of the string
3824 (unless `not_bol' is set in `bufp'), and, if
3825 `newline_anchor' is set, after newlines. */
3826 case begline:
3827 DEBUG_PRINT1 ("EXECUTING begline.\n");
3828
3829 if (AT_STRINGS_BEG (d))
3830 {
3831 if (!bufp->not_bol) break;
3832 }
3833 else if (d[-1] == '\n' && bufp->newline_anchor)
3834 {
3835 break;
3836 }
3837 /* In all other cases, we fail. */
3838 goto fail;
3839
3840
3841 /* endline is the dual of begline. */
3842 case endline:
3843 DEBUG_PRINT1 ("EXECUTING endline.\n");
3844
3845 if (AT_STRINGS_END (d))
3846 {
3847 if (!bufp->not_eol) break;
3848 }
3849
3850 /* We have to ``prefetch'' the next character. */
3851 else if ((d == end1 ? *string2 : *d) == '\n'
3852 && bufp->newline_anchor)
3853 {
3854 break;
3855 }
3856 goto fail;
3857
3858
3859 /* Match at the very beginning of the data. */
3860 case begbuf:
3861 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3862 if (AT_STRINGS_BEG (d))
3863 break;
3864 goto fail;
3865
3866
3867 /* Match at the very end of the data. */
3868 case endbuf:
3869 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3870 if (AT_STRINGS_END (d))
3871 break;
3872 goto fail;
3873
3874
3875 /* on_failure_keep_string_jump is used to optimize `.*\n'. It
3876 pushes NULL as the value for the string on the stack. Then
3877 `pop_failure_point' will keep the current value for the
3878 string, instead of restoring it. To see why, consider
3879 matching `foo\nbar' against `.*\n'. The .* matches the foo;
3880 then the . fails against the \n. But the next thing we want
3881 to do is match the \n against the \n; if we restored the
3882 string value, we would be back at the foo.
3883
3884 Because this is used only in specific cases, we don't need to
3885 check all the things that `on_failure_jump' does, to make
3886 sure the right things get saved on the stack. Hence we don't
3887 share its code. The only reason to push anything on the
3888 stack at all is that otherwise we would have to change
3889 `anychar's code to do something besides goto fail in this
3890 case; that seems worse than this. */
3891 case on_failure_keep_string_jump:
3892 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
3893
3894 EXTRACT_NUMBER_AND_INCR (mcnt, p);
3895 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
3896
3897 PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
3898 break;
3899
3900
3901 /* Uses of on_failure_jump:
3902
3903 Each alternative starts with an on_failure_jump that points
3904 to the beginning of the next alternative. Each alternative
3905 except the last ends with a jump that in effect jumps past
3906 the rest of the alternatives. (They really jump to the
3907 ending jump of the following alternative, because tensioning
3908 these jumps is a hassle.)
3909
3910 Repeats start with an on_failure_jump that points past both
3911 the repetition text and either the following jump or
3912 pop_failure_jump back to this on_failure_jump. */
3913 case on_failure_jump:
3914 on_failure:
3915 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
3916
3917 EXTRACT_NUMBER_AND_INCR (mcnt, p);
3918 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
3919
3920 /* If this on_failure_jump comes right before a group (i.e.,
3921 the original * applied to a group), save the information
3922 for that group and all inner ones, so that if we fail back
3923 to this point, the group's information will be correct.
3924 For example, in \(a*\)*\1, we need the preceding group,
3925 and in \(\(a*\)b*\)\2, we need the inner group. */
3926
3927 /* We can't use `p' to check ahead because we push
3928 a failure point to `p + mcnt' after we do this. */
3929 p1 = p;
3930
3931 /* We need to skip no_op's before we look for the
3932 start_memory in case this on_failure_jump is happening as
3933 the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
3934 against aba. */
3935 while (p1 < pend && (re_opcode_t) *p1 == no_op)
3936 p1++;
3937
3938 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
3939 {
3940 /* We have a new highest active register now. This will
3941 get reset at the start_memory we are about to get to,
3942 but we will have saved all the registers relevant to
3943 this repetition op, as described above. */
3944 highest_active_reg = *(p1 + 1) + *(p1 + 2);
3945 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3946 lowest_active_reg = *(p1 + 1);
3947 }
3948
3949 DEBUG_PRINT1 (":\n");
3950 PUSH_FAILURE_POINT (p + mcnt, d, -2);
3951 break;
3952
3953
3954 /* A smart repeat ends with `maybe_pop_jump'.
3955 We change it to either `pop_failure_jump' or `jump'. */
3956 case maybe_pop_jump:
3957 EXTRACT_NUMBER_AND_INCR (mcnt, p);
3958 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
3959 {
3960 register unsigned char *p2 = p;
3961
3962 /* Compare the beginning of the repeat with what in the
3963 pattern follows its end. If we can establish that there
3964 is nothing that they would both match, i.e., that we
3965 would have to backtrack because of (as in, e.g., `a*a')
3966 then we can change to pop_failure_jump, because we'll
3967 never have to backtrack.
3968
3969 This is not true in the case of alternatives: in
3970 `(a|ab)*' we do need to backtrack to the `ab' alternative
3971 (e.g., if the string was `ab'). But instead of trying to
3972 detect that here, the alternative has put on a dummy
3973 failure point which is what we will end up popping. */
3974
3975 /* Skip over open/close-group commands. */
3976 while (p2 + 2 < pend
3977 && ((re_opcode_t) *p2 == stop_memory
3978 || (re_opcode_t) *p2 == start_memory))
3979 p2 += 3; /* Skip over args, too. */
3980
3981 /* If we're at the end of the pattern, we can change. */
3982 if (p2 == pend)
3983 {
3984 /* Consider what happens when matching ":\(.*\)"
3985 against ":/". I don't really understand this code
3986 yet. */
3987 p[-3] = (unsigned char) pop_failure_jump;
3988 DEBUG_PRINT1
3989 (" End of pattern: change to `pop_failure_jump'.\n");
3990 }
3991
3992 else if ((re_opcode_t) *p2 == exactn
3993 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
3994 {
3995 register unsigned char c
3996 = *p2 == (unsigned char) endline ? '\n' : p2[2];
3997 p1 = p + mcnt;
3998
3999 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4000 to the `maybe_finalize_jump' of this case. Examine what
4001 follows. */
4002 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4003 {
4004 p[-3] = (unsigned char) pop_failure_jump;
4005 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
4006 c, p1[5]);
4007 }
4008
4009 else if ((re_opcode_t) p1[3] == charset
4010 || (re_opcode_t) p1[3] == charset_not)
4011 {
4012 int not = (re_opcode_t) p1[3] == charset_not;
4013
4014 if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4015 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4016 not = !not;
4017
4018 /* `not' is equal to 1 if c would match, which means
4019 that we can't change to pop_failure_jump. */
4020 if (!not)
4021 {
4022 p[-3] = (unsigned char) pop_failure_jump;
4023 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
4024 }
4025 }
4026 }
4027 }
4028 p -= 2; /* Point at relative address again. */
4029 if ((re_opcode_t) p[-1] != pop_failure_jump)
4030 {
4031 p[-1] = (unsigned char) jump;
4032 DEBUG_PRINT1 (" Match => jump.\n");
4033 goto unconditional_jump;
4034 }
4035 /* Note fall through. */
4036
4037
4038 /* The end of a simple repeat has a pop_failure_jump back to
4039 its matching on_failure_jump, where the latter will push a
4040 failure point. The pop_failure_jump takes off failure
4041 points put on by this pop_failure_jump's matching
4042 on_failure_jump; we got through the pattern to here from the
4043 matching on_failure_jump, so didn't fail. */
4044 case pop_failure_jump:
4045 {
4046 /* We need to pass separate storage for the lowest and
4047 highest registers, even though we don't care about the
4048 actual values. Otherwise, we will restore only one
4049 register from the stack, since lowest will == highest in
4050 `pop_failure_point'. */
4051 unsigned dummy_low_reg, dummy_high_reg;
4052 unsigned char *pdummy;
4053 const char *sdummy;
4054
4055 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4056 POP_FAILURE_POINT (sdummy, pdummy,
4057 dummy_low_reg, dummy_high_reg,
4058 reg_dummy, reg_dummy, reg_info_dummy);
4059 }
4060 /* Note fall through. */
4061
4062
4063 /* Unconditionally jump (without popping any failure points). */
4064 case jump:
4065 unconditional_jump:
4066 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
4067 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4068 p += mcnt; /* Do the jump. */
4069 DEBUG_PRINT2 ("(to 0x%x).\n", p);
4070 break;
4071
4072
4073 /* We need this opcode so we can detect where alternatives end
4074 in `group_match_null_string_p' et al. */
4075 case jump_past_alt:
4076 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4077 goto unconditional_jump;
4078
4079
4080 /* Normally, the on_failure_jump pushes a failure point, which
4081 then gets popped at pop_failure_jump. We will end up at
4082 pop_failure_jump, also, and with a pattern of, say, `a+', we
4083 are skipping over the on_failure_jump, so we have to push
4084 something meaningless for pop_failure_jump to pop. */
4085 case dummy_failure_jump:
4086 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4087 /* It doesn't matter what we push for the string here. What
4088 the code at `fail' tests is the value for the pattern. */
4089 PUSH_FAILURE_POINT (0, 0, -2);
4090 goto unconditional_jump;
4091
4092
4093 /* At the end of an alternative, we need to push a dummy failure
4094 point in case we are followed by a `pop_failure_jump', because
4095 we don't want the failure point for the alternative to be
4096 popped. For example, matching `(a|ab)*' against `aab'
4097 requires that we match the `ab' alternative. */
4098 case push_dummy_failure:
4099 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4100 /* See comments just above at `dummy_failure_jump' about the
4101 two zeroes. */
4102 PUSH_FAILURE_POINT (0, 0, -2);
4103 break;
4104
4105 /* Have to succeed matching what follows at least n times.
4106 After that, handle like `on_failure_jump'. */
4107 case succeed_n:
4108 EXTRACT_NUMBER (mcnt, p + 2);
4109 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4110
4111 assert (mcnt >= 0);
4112 /* Originally, this is how many times we HAVE to succeed. */
4113 if (mcnt > 0)
4114 {
4115 mcnt--;
4116 p += 2;
4117 STORE_NUMBER_AND_INCR (p, mcnt);
4118 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
4119 }
4120 else if (mcnt == 0)
4121 {
4122 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
4123 p[2] = (unsigned char) no_op;
4124 p[3] = (unsigned char) no_op;
4125 goto on_failure;
4126 }
4127 break;
4128
4129 case jump_n:
4130 EXTRACT_NUMBER (mcnt, p + 2);
4131 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4132
4133 /* Originally, this is how many times we CAN jump. */
4134 if (mcnt)
4135 {
4136 mcnt--;
4137 STORE_NUMBER (p + 2, mcnt);
4138 goto unconditional_jump;
4139 }
4140 /* If don't have to jump any more, skip over the rest of command. */
4141 else
4142 p += 4;
4143 break;
4144
4145 case set_number_at:
4146 {
4147 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4148
4149 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4150 p1 = p + mcnt;
4151 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4152 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
4153 STORE_NUMBER (p1, mcnt);
4154 break;
4155 }
4156
4157 case wordbound:
4158 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4159 if (AT_WORD_BOUNDARY (d))
4160 break;
4161 goto fail;
4162
4163 case notwordbound:
4164 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4165 if (AT_WORD_BOUNDARY (d))
4166 goto fail;
4167 break;
4168
4169 case wordbeg:
4170 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4171 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4172 break;
4173 goto fail;
4174
4175 case wordend:
4176 DEBUG_PRINT1 ("EXECUTING wordend.\n");
4177 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4178 && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4179 break;
4180 goto fail;
4181
4182#ifdef emacs
4183#ifdef emacs19
4184 case before_dot:
4185 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4186 if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4187 goto fail;
4188 break;
4189
4190 case at_dot:
4191 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4192 if (PTR_CHAR_POS ((unsigned char *) d) != point)
4193 goto fail;
4194 break;
4195
4196 case after_dot:
4197 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4198 if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4199 goto fail;
4200 break;
4201#else /* not emacs19 */
4202 case at_dot:
4203 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4204 if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4205 goto fail;
4206 break;
4207#endif /* not emacs19 */
4208
4209 case syntaxspec:
4210 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4211 mcnt = *p++;
4212 goto matchsyntax;
4213
4214 case wordchar:
4215 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4216 mcnt = (int) Sword;
4217 matchsyntax:
4218 PREFETCH ();
4219 if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4220 goto fail;
4221 SET_REGS_MATCHED ();
4222 break;
4223
4224 case notsyntaxspec:
4225 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4226 mcnt = *p++;
4227 goto matchnotsyntax;
4228
4229 case notwordchar:
4230 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4231 mcnt = (int) Sword;
4232 matchnotsyntax:
4233 PREFETCH ();
4234 if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4235 goto fail;
4236 SET_REGS_MATCHED ();
4237 break;
4238
4239#else /* not emacs */
4240 case wordchar:
4241 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4242 PREFETCH ();
4243 if (!WORDCHAR_P (d))
4244 goto fail;
4245 SET_REGS_MATCHED ();
4246 d++;
4247 break;
4248
4249 case notwordchar:
4250 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4251 PREFETCH ();
4252 if (WORDCHAR_P (d))
4253 goto fail;
4254 SET_REGS_MATCHED ();
4255 d++;
4256 break;
4257#endif /* not emacs */
4258
4259 default:
4260 abort ();
4261 }
4262 continue; /* Successfully executed one pattern command; keep going. */
4263
4264
4265 /* We goto here if a matching operation fails. */
4266 fail:
4267 if (!FAIL_STACK_EMPTY ())
4268 { /* A restart point is known. Restore to that state. */
4269 DEBUG_PRINT1 ("\nFAIL:\n");
4270 POP_FAILURE_POINT (d, p,
4271 lowest_active_reg, highest_active_reg,
4272 regstart, regend, reg_info);
4273
4274 /* If this failure point is a dummy, try the next one. */
4275 if (!p)
4276 goto fail;
4277
4278 /* If we failed to the end of the pattern, don't examine *p. */
4279 assert (p <= pend);
4280 if (p < pend)
4281 {
4282 boolean is_a_jump_n = false;
4283
4284 /* If failed to a backwards jump that's part of a repetition
4285 loop, need to pop this failure point and use the next one. */
4286 switch ((re_opcode_t) *p)
4287 {
4288 case jump_n:
4289 is_a_jump_n = true;
4290 case maybe_pop_jump:
4291 case pop_failure_jump:
4292 case jump:
4293 p1 = p + 1;
4294 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4295 p1 += mcnt;
4296
4297 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4298 || (!is_a_jump_n
4299 && (re_opcode_t) *p1 == on_failure_jump))
4300 goto fail;
4301 break;
4302 default:
4303 /* do nothing */ ;
4304 }
4305 }
4306
4307 if (d >= string1 && d <= end1)
4308 dend = end_match_1;
4309 }
4310 else
4311 break; /* Matching at this starting point really fails. */
4312 } /* for (;;) */
4313
4314 if (best_regs_set)
4315 goto restore_best_regs;
4316
4317 FREE_VARIABLES ();
4318
4319 return -1; /* Failure to match. */
4320} /* re_match_2 */
4321
4322/* Subroutine definitions for re_match_2. */
4323
4324
4325/* We are passed P pointing to a register number after a start_memory.
4326
4327 Return true if the pattern up to the corresponding stop_memory can
4328 match the empty string, and false otherwise.
4329
4330 If we find the matching stop_memory, sets P to point to one past its number.
4331 Otherwise, sets P to an undefined byte less than or equal to END.
4332
4333 We don't handle duplicates properly (yet). */
4334
4335static boolean
4336group_match_null_string_p (p, end, reg_info)
4337 unsigned char **p, *end;
4338 register_info_type *reg_info;
4339{
4340 int mcnt;
4341 /* Point to after the args to the start_memory. */
4342 unsigned char *p1 = *p + 2;
4343
4344 while (p1 < end)
4345 {
4346 /* Skip over opcodes that can match nothing, and return true or
4347 false, as appropriate, when we get to one that can't, or to the
4348 matching stop_memory. */
4349
4350 switch ((re_opcode_t) *p1)
4351 {
4352 /* Could be either a loop or a series of alternatives. */
4353 case on_failure_jump:
4354 p1++;
4355 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4356
4357 /* If the next operation is not a jump backwards in the
4358 pattern. */
4359
4360 if (mcnt >= 0)
4361 {
4362 /* Go through the on_failure_jumps of the alternatives,
4363 seeing if any of the alternatives cannot match nothing.
4364 The last alternative starts with only a jump,
4365 whereas the rest start with on_failure_jump and end
4366 with a jump, e.g., here is the pattern for `a|b|c':
4367
4368 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4369 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4370 /exactn/1/c
4371
4372 So, we have to first go through the first (n-1)
4373 alternatives and then deal with the last one separately. */
4374
4375
4376 /* Deal with the first (n-1) alternatives, which start
4377 with an on_failure_jump (see above) that jumps to right
4378 past a jump_past_alt. */
4379
4380 while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4381 {
4382 /* `mcnt' holds how many bytes long the alternative
4383 is, including the ending `jump_past_alt' and
4384 its number. */
4385
4386 if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4387 reg_info))
4388 return false;
4389
4390 /* Move to right after this alternative, including the
4391 jump_past_alt. */
4392 p1 += mcnt;
4393
4394 /* Break if it's the beginning of an n-th alternative
4395 that doesn't begin with an on_failure_jump. */
4396 if ((re_opcode_t) *p1 != on_failure_jump)
4397 break;
4398
4399 /* Still have to check that it's not an n-th
4400 alternative that starts with an on_failure_jump. */
4401 p1++;
4402 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4403 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4404 {
4405 /* Get to the beginning of the n-th alternative. */
4406 p1 -= 3;
4407 break;
4408 }
4409 }
4410
4411 /* Deal with the last alternative: go back and get number
4412 of the `jump_past_alt' just before it. `mcnt' contains
4413 the length of the alternative. */
4414 EXTRACT_NUMBER (mcnt, p1 - 2);
4415
4416 if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4417 return false;
4418
4419 p1 += mcnt; /* Get past the n-th alternative. */
4420 } /* if mcnt > 0 */
4421 break;
4422
4423
4424 case stop_memory:
4425 assert (p1[1] == **p);
4426 *p = p1 + 2;
4427 return true;
4428
4429
4430 default:
4431 if (!common_op_match_null_string_p (&p1, end, reg_info))
4432 return false;
4433 }
4434 } /* while p1 < end */
4435
4436 return false;
4437} /* group_match_null_string_p */
4438
4439
4440/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4441 It expects P to be the first byte of a single alternative and END one
4442 byte past the last. The alternative can contain groups. */
4443
4444static boolean
4445alt_match_null_string_p (p, end, reg_info)
4446 unsigned char *p, *end;
4447 register_info_type *reg_info;
4448{
4449 int mcnt;
4450 unsigned char *p1 = p;
4451
4452 while (p1 < end)
4453 {
4454 /* Skip over opcodes that can match nothing, and break when we get
4455 to one that can't. */
4456
4457 switch ((re_opcode_t) *p1)
4458 {
4459 /* It's a loop. */
4460 case on_failure_jump:
4461 p1++;
4462 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4463 p1 += mcnt;
4464 break;
4465
4466 default:
4467 if (!common_op_match_null_string_p (&p1, end, reg_info))
4468 return false;
4469 }
4470 } /* while p1 < end */
4471
4472 return true;
4473} /* alt_match_null_string_p */
4474
4475
4476/* Deals with the ops common to group_match_null_string_p and
4477 alt_match_null_string_p.
4478
4479 Sets P to one after the op and its arguments, if any. */
4480
4481static boolean
4482common_op_match_null_string_p (p, end, reg_info)
4483 unsigned char **p, *end;
4484 register_info_type *reg_info;
4485{
4486 int mcnt;
4487 boolean ret;
4488 int reg_no;
4489 unsigned char *p1 = *p;
4490
4491 switch ((re_opcode_t) *p1++)
4492 {
4493 case no_op:
4494 case begline:
4495 case endline:
4496 case begbuf:
4497 case endbuf:
4498 case wordbeg:
4499 case wordend:
4500 case wordbound:
4501 case notwordbound:
4502#ifdef emacs
4503 case before_dot:
4504 case at_dot:
4505 case after_dot:
4506#endif
4507 break;
4508
4509 case start_memory:
4510 reg_no = *p1;
4511 assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4512 ret = group_match_null_string_p (&p1, end, reg_info);
4513
4514 /* Have to set this here in case we're checking a group which
4515 contains a group and a back reference to it. */
4516
4517 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4518 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4519
4520 if (!ret)
4521 return false;
4522 break;
4523
4524 /* If this is an optimized succeed_n for zero times, make the jump. */
4525 case jump:
4526 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4527 if (mcnt >= 0)
4528 p1 += mcnt;
4529 else
4530 return false;
4531 break;
4532
4533 case succeed_n:
4534 /* Get to the number of times to succeed. */
4535 p1 += 2;
4536 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4537
4538 if (mcnt == 0)
4539 {
4540 p1 -= 4;
4541 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4542 p1 += mcnt;
4543 }
4544 else
4545 return false;
4546 break;
4547
4548 case duplicate:
4549 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4550 return false;
4551 break;
4552
4553 case set_number_at:
4554 p1 += 4;
4555
4556 default:
4557 /* All other opcodes mean we cannot match the empty string. */
4558 return false;
4559 }
4560
4561 *p = p1;
4562 return true;
4563} /* common_op_match_null_string_p */
4564
4565
4566/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4567 bytes; nonzero otherwise. */
4568
4569static int
4570bcmp_translate(
4571 unsigned char *s1,
4572 unsigned char *s2,
4573 int len,
4574 char *translate
4575)
4576{
4577 register unsigned char *p1 = s1, *p2 = s2;
4578 while (len)
4579 {
4580 if (translate[*p1++] != translate[*p2++]) return 1;
4581 len--;
4582 }
4583 return 0;
4584}
4585
4586/* Entry points for GNU code. */
4587
4588/* re_compile_pattern is the GNU regular expression compiler: it
4589 compiles PATTERN (of length SIZE) and puts the result in BUFP.
4590 Returns 0 if the pattern was valid, otherwise an error string.
4591
4592 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4593 are set in BUFP on entry.
4594
4595 We call regex_compile to do the actual compilation. */
4596
4597const char *
4598re_compile_pattern (pattern, length, bufp)
4599 const char *pattern;
4600 int length;
4601 struct re_pattern_buffer *bufp;
4602{
4603 reg_errcode_t ret;
4604
4605 /* GNU code is written to assume at least RE_NREGS registers will be set
4606 (and at least one extra will be -1). */
4607 bufp->regs_allocated = REGS_UNALLOCATED;
4608
4609 /* And GNU code determines whether or not to get register information
4610 by passing null for the REGS argument to re_match, etc., not by
4611 setting no_sub. */
4612 bufp->no_sub = 0;
4613
4614 /* Match anchors at newline. */
4615 bufp->newline_anchor = 1;
4616
4617 ret = regex_compile (pattern, length, re_syntax_options, bufp);
4618
4619 return re_error_msg[(int) ret];
4620}
4621
4622/* Entry points compatible with 4.2 BSD regex library. We don't define
4623 them if this is an Emacs or POSIX compilation. */
4624
4625#if !defined (emacs) && !defined (_POSIX_SOURCE)
4626
4627/* BSD has one and only one pattern buffer. */
4628static struct re_pattern_buffer re_comp_buf;
4629
4630char *
4631re_comp (s)
4632 const char *s;
4633{
4634 reg_errcode_t ret;
4635
4636 if (!s)
4637 {
4638 if (!re_comp_buf.buffer)
4639 return "No previous regular expression";
4640 return 0;
4641 }
4642
4643 if (!re_comp_buf.buffer)
4644 {
4645 re_comp_buf.buffer = (unsigned char *) malloc (200);
4646 if (re_comp_buf.buffer == NULL)
4647 return "Memory exhausted";
4648 re_comp_buf.allocated = 200;
4649
4650 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4651 if (re_comp_buf.fastmap == NULL)
4652 return "Memory exhausted";
4653 }
4654
4655 /* Since `re_exec' always passes NULL for the `regs' argument, we
4656 don't need to initialize the pattern buffer fields which affect it. */
4657
4658 /* Match anchors at newlines. */
4659 re_comp_buf.newline_anchor = 1;
4660
4661 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4662
4663 /* Yes, we're discarding `const' here. */
4664 return (char *) re_error_msg[(int) ret];
4665}
4666
4667
4668int
4669re_exec (s)
4670 const char *s;
4671{
4672 const int len = strlen (s);
4673 return
4674 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4675}
4676#endif /* not emacs and not _POSIX_SOURCE */
4677
4678/* POSIX.2 functions. Don't define these for Emacs. */
4679
4680#ifndef emacs
4681
4682/* regcomp takes a regular expression as a string and compiles it.
4683
4684 PREG is a regex_t *. We do not expect any fields to be initialized,
4685 since POSIX says we shouldn't. Thus, we set
4686
4687 `buffer' to the compiled pattern;
4688 `used' to the length of the compiled pattern;
4689 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4690 REG_EXTENDED bit in CFLAGS is set; otherwise, to
4691 RE_SYNTAX_POSIX_BASIC;
4692 `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4693 `fastmap' and `fastmap_accurate' to zero;
4694 `re_nsub' to the number of subexpressions in PATTERN.
4695
4696 PATTERN is the address of the pattern string.
4697
4698 CFLAGS is a series of bits which affect compilation.
4699
4700 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4701 use POSIX basic syntax.
4702
4703 If REG_NEWLINE is set, then . and [^...] don't match newline.
4704 Also, regexec will try a match beginning after every newline.
4705
4706 If REG_ICASE is set, then we considers upper- and lowercase
4707 versions of letters to be equivalent when matching.
4708
4709 If REG_NOSUB is set, then when PREG is passed to regexec, that
4710 routine will report only success or failure, and nothing about the
4711 registers.
4712
4713 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
4714 the return codes and their meanings.) */
4715
4716int
4717regcomp (preg, pattern, cflags)
4718 regex_t *preg;
4719 const char *pattern;
4720 int cflags;
4721{
4722 reg_errcode_t ret;
4723 unsigned syntax
4724 = (cflags & REG_EXTENDED) ?
4725 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4726
4727 /* regex_compile will allocate the space for the compiled pattern. */
4728 preg->buffer = 0;
4729 preg->allocated = 0;
4730
4731 /* Don't bother to use a fastmap when searching. This simplifies the
4732 REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4733 characters after newlines into the fastmap. This way, we just try
4734 every character. */
4735 preg->fastmap = 0;
4736
4737 if (cflags & REG_ICASE)
4738 {
4739 unsigned i;
4740
4741 preg->translate = (char *) malloc (CHAR_SET_SIZE);
4742 if (preg->translate == NULL)
4743 return (int) REG_ESPACE;
4744
4745 /* Map uppercase characters to corresponding lowercase ones. */
4746 for (i = 0; i < CHAR_SET_SIZE; i++)
4747 preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
4748 }
4749 else
4750 preg->translate = NULL;
4751
4752 /* If REG_NEWLINE is set, newlines are treated differently. */
4753 if (cflags & REG_NEWLINE)
4754 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
4755 syntax &= ~RE_DOT_NEWLINE;
4756 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4757 /* It also changes the matching behavior. */
4758 preg->newline_anchor = 1;
4759 }
4760 else
4761 preg->newline_anchor = 0;
4762
4763 preg->no_sub = !!(cflags & REG_NOSUB);
4764
4765 /* POSIX says a null character in the pattern terminates it, so we
4766 can use strlen here in compiling the pattern. */
4767 ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4768
4769 /* POSIX doesn't distinguish between an unmatched open-group and an
4770 unmatched close-group: both are REG_EPAREN. */
4771 if (ret == REG_ERPAREN) ret = REG_EPAREN;
4772
4773 return (int) ret;
4774}
4775
4776
4777/* regexec searches for a given pattern, specified by PREG, in the
4778 string STRING.
4779
4780 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4781 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
4782 least NMATCH elements, and we set them to the offsets of the
4783 corresponding matched substrings.
4784
4785 EFLAGS specifies `execution flags' which affect matching: if
4786 REG_NOTBOL is set, then ^ does not match at the beginning of the
4787 string; if REG_NOTEOL is set, then $ does not match at the end.
4788
4789 We return 0 if we find a match and REG_NOMATCH if not. */
4790
4791int
4792regexec (preg, string, nmatch, pmatch, eflags)
4793 const regex_t *preg;
4794 const char *string;
4795 size_t nmatch;
4796 regmatch_t pmatch[];
4797 int eflags;
4798{
4799 int ret;
4800 struct re_registers regs;
4801 regex_t private_preg;
4802 int len = strlen (string);
4803 boolean want_reg_info = !preg->no_sub && nmatch > 0;
4804
4805 private_preg = *preg;
4806
4807 private_preg.not_bol = !!(eflags & REG_NOTBOL);
4808 private_preg.not_eol = !!(eflags & REG_NOTEOL);
4809
4810 /* The user has told us exactly how many registers to return
4811 information about, via `nmatch'. We have to pass that on to the
4812 matching routines. */
4813 private_preg.regs_allocated = REGS_FIXED;
4814
4815 if (want_reg_info)
4816 {
4817 regs.num_regs = nmatch;
4818 regs.start = TALLOC (nmatch, regoff_t);
4819 regs.end = TALLOC (nmatch, regoff_t);
4820 if (regs.start == NULL || regs.end == NULL)
4821 return (int) REG_NOMATCH;
4822 }
4823
4824 /* Perform the searching operation. */
4825 ret = re_search (&private_preg, string, len,
4826 /* start: */ 0, /* range: */ len,
4827 want_reg_info ? &regs : (struct re_registers *) 0);
4828
4829 /* Copy the register information to the POSIX structure. */
4830 if (want_reg_info)
4831 {
4832 if (ret >= 0)
4833 {
4834 unsigned r;
4835
4836 for (r = 0; r < nmatch; r++)
4837 {
4838 pmatch[r].rm_so = regs.start[r];
4839 pmatch[r].rm_eo = regs.end[r];
4840 }
4841 }
4842
4843 /* If we needed the temporary register info, free the space now. */
4844 free (regs.start);
4845 free (regs.end);
4846 }
4847
4848 /* We want zero return to mean success, unlike `re_search'. */
4849 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4850}
4851
4852
4853/* Returns a message corresponding to an error code, ERRCODE, returned
4854 from either regcomp or regexec. We don't use PREG here. */
4855
4856size_t
4857regerror (errcode, preg, errbuf, errbuf_size)
4858 int errcode;
4859 const regex_t *preg;
4860 char *errbuf;
4861 size_t errbuf_size;
4862{
4863 const char *msg;
4864 size_t msg_size;
4865
4866 if (errcode < 0
4867 || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4868 /* Only error codes returned by the rest of the code should be passed
4869 to this routine. If we are given anything else, or if other regex
4870 code generates an invalid error code, then the program has a bug.
4871 Dump core so we can fix it. */
4872 abort ();
4873
4874 msg = re_error_msg[errcode];
4875
4876 /* POSIX doesn't require that we do anything in this case, but why
4877 not be nice. */
4878 if (! msg)
4879 msg = "Success";
4880
4881 msg_size = strlen (msg) + 1; /* Includes the null. */
4882
4883 if (errbuf_size != 0)
4884 {
4885 if (msg_size > errbuf_size)
4886 {
4887 strncpy (errbuf, msg, errbuf_size - 1);
4888 errbuf[errbuf_size - 1] = 0;
4889 }
4890 else
4891 strcpy (errbuf, msg);
4892 }
4893
4894 return msg_size;
4895}
4896
4897
4898/* Free dynamically allocated space used by PREG. */
4899
4900void
4901regfree (preg)
4902 regex_t *preg;
4903{
4904 if (preg->buffer != NULL)
4905 free (preg->buffer);
4906 preg->buffer = NULL;
4907
4908 preg->allocated = 0;
4909 preg->used = 0;
4910
4911 if (preg->fastmap != NULL)
4912 free (preg->fastmap);
4913 preg->fastmap = NULL;
4914 preg->fastmap_accurate = 0;
4915
4916 if (preg->translate != NULL)
4917 free (preg->translate);
4918 preg->translate = NULL;
4919}
4920
4921#endif /* not emacs */
4922
4923/*
4924Local variables:
4925make-backup-files: t
4926version-control: t
4927trim-versions-without-asking: nil
4928End:
4929*/
diff --git a/win32/regex.h b/win32/regex.h
new file mode 100644
index 000000000..6eb64f140
--- /dev/null
+++ b/win32/regex.h
@@ -0,0 +1,490 @@
1/* Definitions for data structures and routines for the regular
2 expression library, version 0.12.
3
4 Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20#ifndef __REGEXP_LIBRARY_H__
21#define __REGEXP_LIBRARY_H__
22
23/* POSIX says that <sys/types.h> must be included (by the caller) before
24 <regex.h>. */
25
26#ifdef VMS
27/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
28 should be there. */
29#include <stddef.h>
30#endif
31
32
33/* The following bits are used to determine the regexp syntax we
34 recognize. The set/not-set meanings are chosen so that Emacs syntax
35 remains the value 0. The bits are given in alphabetical order, and
36 the definitions shifted by one from the previous bit; thus, when we
37 add or remove a bit, only one other definition need change. */
38typedef unsigned reg_syntax_t;
39
40/* If this bit is not set, then \ inside a bracket expression is literal.
41 If set, then such a \ quotes the following character. */
42#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
43
44/* If this bit is not set, then + and ? are operators, and \+ and \? are
45 literals.
46 If set, then \+ and \? are operators and + and ? are literals. */
47#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
48
49/* If this bit is set, then character classes are supported. They are:
50 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
51 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
52 If not set, then character classes are not supported. */
53#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
54
55/* If this bit is set, then ^ and $ are always anchors (outside bracket
56 expressions, of course).
57 If this bit is not set, then it depends:
58 ^ is an anchor if it is at the beginning of a regular
59 expression or after an open-group or an alternation operator;
60 $ is an anchor if it is at the end of a regular expression, or
61 before a close-group or an alternation operator.
62
63 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
64 POSIX draft 11.2 says that * etc. in leading positions is undefined.
65 We already implemented a previous draft which made those constructs
66 invalid, though, so we haven't changed the code back. */
67#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
68
69/* If this bit is set, then special characters are always special
70 regardless of where they are in the pattern.
71 If this bit is not set, then special characters are special only in
72 some contexts; otherwise they are ordinary. Specifically,
73 * + ? and intervals are only special when not after the beginning,
74 open-group, or alternation operator. */
75#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
76
77/* If this bit is set, then *, +, ?, and { cannot be first in an re or
78 immediately after an alternation or begin-group operator. */
79#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
80
81/* If this bit is set, then . matches newline.
82 If not set, then it doesn't. */
83#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
84
85/* If this bit is set, then . doesn't match NUL.
86 If not set, then it does. */
87#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
88
89/* If this bit is set, nonmatching lists [^...] do not match newline.
90 If not set, they do. */
91#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
92
93/* If this bit is set, either \{...\} or {...} defines an
94 interval, depending on RE_NO_BK_BRACES.
95 If not set, \{, \}, {, and } are literals. */
96#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
97
98/* If this bit is set, +, ? and | aren't recognized as operators.
99 If not set, they are. */
100#define RE_LIMITED_OPS (RE_INTERVALS << 1)
101
102/* If this bit is set, newline is an alternation operator.
103 If not set, newline is literal. */
104#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
105
106/* If this bit is set, then `{...}' defines an interval, and \{ and \}
107 are literals.
108 If not set, then `\{...\}' defines an interval. */
109#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
110
111/* If this bit is set, (...) defines a group, and \( and \) are literals.
112 If not set, \(...\) defines a group, and ( and ) are literals. */
113#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
114
115/* If this bit is set, then \<digit> matches <digit>.
116 If not set, then \<digit> is a back-reference. */
117#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
118
119/* If this bit is set, then | is an alternation operator, and \| is literal.
120 If not set, then \| is an alternation operator, and | is literal. */
121#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
122
123/* If this bit is set, then an ending range point collating higher
124 than the starting range point, as in [z-a], is invalid.
125 If not set, then when ending range point collates higher than the
126 starting range point, the range is ignored. */
127#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
128
129/* If this bit is set, then an unmatched ) is ordinary.
130 If not set, then an unmatched ) is invalid. */
131#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
132
133/* This global variable defines the particular regexp syntax to use (for
134 some interfaces). When a regexp is compiled, the syntax used is
135 stored in the pattern buffer, so changing this does not affect
136 already-compiled regexps. */
137extern reg_syntax_t re_syntax_options;
138
139/* Define combinations of the above bits for the standard possibilities.
140 (The [[[ comments delimit what gets put into the Texinfo file, so
141 don't delete them!) */
142/* [[[begin syntaxes]]] */
143#define RE_SYNTAX_EMACS 0
144
145#define RE_SYNTAX_AWK \
146 (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
147 | RE_NO_BK_PARENS | RE_NO_BK_REFS \
148 | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
149 | RE_UNMATCHED_RIGHT_PAREN_ORD)
150
151#define RE_SYNTAX_POSIX_AWK \
152 (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
153
154#define RE_SYNTAX_GREP \
155 (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
156 | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
157 | RE_NEWLINE_ALT)
158
159#define RE_SYNTAX_EGREP \
160 (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
161 | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
162 | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
163 | RE_NO_BK_VBAR)
164
165#define RE_SYNTAX_POSIX_EGREP \
166 (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
167
168/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
169#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
170
171#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
172
173/* Syntax bits common to both basic and extended POSIX regex syntax. */
174#define _RE_SYNTAX_POSIX_COMMON \
175 (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
176 | RE_INTERVALS | RE_NO_EMPTY_RANGES)
177
178#define RE_SYNTAX_POSIX_BASIC \
179 (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
180
181/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
182 RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
183 isn't minimal, since other operators, such as \`, aren't disabled. */
184#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
185 (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
186
187#define RE_SYNTAX_POSIX_EXTENDED \
188 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
189 | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
190 | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
191 | RE_UNMATCHED_RIGHT_PAREN_ORD)
192
193/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
194 replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
195#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
196 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
197 | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
198 | RE_NO_BK_PARENS | RE_NO_BK_REFS \
199 | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
200/* [[[end syntaxes]]] */
201
202/* Maximum number of duplicates an interval can allow. Some systems
203 (erroneously) define this in other header files, but we want our
204 value, so remove any previous define. */
205#ifdef RE_DUP_MAX
206#undef RE_DUP_MAX
207#endif
208#define RE_DUP_MAX ((1 << 15) - 1)
209
210
211/* POSIX `cflags' bits (i.e., information for `regcomp'). */
212
213/* If this bit is set, then use extended regular expression syntax.
214 If not set, then use basic regular expression syntax. */
215#define REG_EXTENDED 1
216
217/* If this bit is set, then ignore case when matching.
218 If not set, then case is significant. */
219#define REG_ICASE (REG_EXTENDED << 1)
220
221/* If this bit is set, then anchors do not match at newline
222 characters in the string.
223 If not set, then anchors do match at newlines. */
224#define REG_NEWLINE (REG_ICASE << 1)
225
226/* If this bit is set, then report only success or fail in regexec.
227 If not set, then returns differ between not matching and errors. */
228#define REG_NOSUB (REG_NEWLINE << 1)
229
230
231/* POSIX `eflags' bits (i.e., information for regexec). */
232
233/* If this bit is set, then the beginning-of-line operator doesn't match
234 the beginning of the string (presumably because it's not the
235 beginning of a line).
236 If not set, then the beginning-of-line operator does match the
237 beginning of the string. */
238#define REG_NOTBOL 1
239
240/* Like REG_NOTBOL, except for the end-of-line. */
241#define REG_NOTEOL (1 << 1)
242
243
244/* If any error codes are removed, changed, or added, update the
245 `re_error_msg' table in regex.c. */
246typedef enum
247{
248 REG_NOERROR = 0, /* Success. */
249 REG_NOMATCH, /* Didn't find a match (for regexec). */
250
251 /* POSIX regcomp return error codes. (In the order listed in the
252 standard.) */
253 REG_BADPAT, /* Invalid pattern. */
254 REG_ECOLLATE, /* Not implemented. */
255 REG_ECTYPE, /* Invalid character class name. */
256 REG_EESCAPE, /* Trailing backslash. */
257 REG_ESUBREG, /* Invalid back reference. */
258 REG_EBRACK, /* Unmatched left bracket. */
259 REG_EPAREN, /* Parenthesis imbalance. */
260 REG_EBRACE, /* Unmatched \{. */
261 REG_BADBR, /* Invalid contents of \{\}. */
262 REG_ERANGE, /* Invalid range end. */
263 REG_ESPACE, /* Ran out of memory. */
264 REG_BADRPT, /* No preceding re for repetition op. */
265
266 /* Error codes we've added. */
267 REG_EEND, /* Premature end. */
268 REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
269 REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
270} reg_errcode_t;
271
272/* This data structure represents a compiled pattern. Before calling
273 the pattern compiler, the fields `buffer', `allocated', `fastmap',
274 `translate', and `no_sub' can be set. After the pattern has been
275 compiled, the `re_nsub' field is available. All other fields are
276 private to the regex routines. */
277
278struct re_pattern_buffer
279{
280/* [[[begin pattern_buffer]]] */
281 /* Space that holds the compiled pattern. It is declared as
282 `unsigned char *' because its elements are
283 sometimes used as array indexes. */
284 unsigned char *buffer;
285
286 /* Number of bytes to which `buffer' points. */
287 unsigned long allocated;
288
289 /* Number of bytes actually used in `buffer'. */
290 unsigned long used;
291
292 /* Syntax setting with which the pattern was compiled. */
293 reg_syntax_t syntax;
294
295 /* Pointer to a fastmap, if any, otherwise zero. re_search uses
296 the fastmap, if there is one, to skip over impossible
297 starting points for matches. */
298 char *fastmap;
299
300 /* Either a translate table to apply to all characters before
301 comparing them, or zero for no translation. The translation
302 is applied to a pattern when it is compiled and to a string
303 when it is matched. */
304 char *translate;
305
306 /* Number of subexpressions found by the compiler. */
307 size_t re_nsub;
308
309 /* Zero if this pattern cannot match the empty string, one else.
310 Well, in truth it's used only in `re_search_2', to see
311 whether or not we should use the fastmap, so we don't set
312 this absolutely perfectly; see `re_compile_fastmap' (the
313 `duplicate' case). */
314 unsigned can_be_null : 1;
315
316 /* If REGS_UNALLOCATED, allocate space in the `regs' structure
317 for `max (RE_NREGS, re_nsub + 1)' groups.
318 If REGS_REALLOCATE, reallocate space if necessary.
319 If REGS_FIXED, use what's there. */
320#define REGS_UNALLOCATED 0
321#define REGS_REALLOCATE 1
322#define REGS_FIXED 2
323 unsigned regs_allocated : 2;
324
325 /* Set to zero when `regex_compile' compiles a pattern; set to one
326 by `re_compile_fastmap' if it updates the fastmap. */
327 unsigned fastmap_accurate : 1;
328
329 /* If set, `re_match_2' does not return information about
330 subexpressions. */
331 unsigned no_sub : 1;
332
333 /* If set, a beginning-of-line anchor doesn't match at the
334 beginning of the string. */
335 unsigned not_bol : 1;
336
337 /* Similarly for an end-of-line anchor. */
338 unsigned not_eol : 1;
339
340 /* If true, an anchor at a newline matches. */
341 unsigned newline_anchor : 1;
342
343/* [[[end pattern_buffer]]] */
344};
345
346typedef struct re_pattern_buffer regex_t;
347
348
349/* search.c (search_buffer) in Emacs needs this one opcode value. It is
350 defined both in `regex.c' and here. */
351#define RE_EXACTN_VALUE 1
352
353/* Type for byte offsets within the string. POSIX mandates this. */
354typedef int regoff_t;
355
356
357/* This is the structure we store register match data in. See
358 regex.texinfo for a full description of what registers match. */
359struct re_registers
360{
361 unsigned num_regs;
362 regoff_t *start;
363 regoff_t *end;
364};
365
366
367/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
368 `re_match_2' returns information about at least this many registers
369 the first time a `regs' structure is passed. */
370#ifndef RE_NREGS
371#define RE_NREGS 30
372#endif
373
374
375/* POSIX specification for registers. Aside from the different names than
376 `re_registers', POSIX uses an array of structures, instead of a
377 structure of arrays. */
378typedef struct
379{
380 regoff_t rm_so; /* Byte offset from string's start to substring's start. */
381 regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
382} regmatch_t;
383
384/* Declarations for routines. */
385
386/* To avoid duplicating every routine declaration -- once with a
387 prototype (if we are ANSI), and once without (if we aren't) -- we
388 use the following macro to declare argument types. This
389 unfortunately clutters up the declarations a bit, but I think it's
390 worth it. */
391
392#if __STDC__
393
394#define _RE_ARGS(args) args
395
396#else /* not __STDC__ */
397
398#define _RE_ARGS(args) ()
399
400#endif /* not __STDC__ */
401
402/* Sets the current default syntax to SYNTAX, and return the old syntax.
403 You can also simply assign to the `re_syntax_options' variable. */
404extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
405
406/* Compile the regular expression PATTERN, with length LENGTH
407 and syntax given by the global `re_syntax_options', into the buffer
408 BUFFER. Return NULL if successful, and an error string if not. */
409extern const char *re_compile_pattern
410 _RE_ARGS ((const char *pattern, int length,
411 struct re_pattern_buffer *buffer));
412
413
414/* Compile a fastmap for the compiled pattern in BUFFER; used to
415 accelerate searches. Return 0 if successful and -2 if was an
416 internal error. */
417extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
418
419
420/* Search in the string STRING (with length LENGTH) for the pattern
421 compiled into BUFFER. Start searching at position START, for RANGE
422 characters. Return the starting position of the match, -1 for no
423 match, or -2 for an internal error. Also return register
424 information in REGS (if REGS and BUFFER->no_sub are nonzero). */
425extern int re_search
426 _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
427 int length, int start, int range, struct re_registers *regs));
428
429
430/* Like `re_search', but search in the concatenation of STRING1 and
431 STRING2. Also, stop searching at index START + STOP. */
432extern int re_search_2
433 _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
434 int length1, const char *string2, int length2,
435 int start, int range, struct re_registers *regs, int stop));
436
437
438/* Like `re_search', but return how many characters in STRING the regexp
439 in BUFFER matched, starting at position START. */
440extern int re_match
441 _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
442 int length, int start, struct re_registers *regs));
443
444
445/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
446extern int re_match_2
447 _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
448 int length1, const char *string2, int length2,
449 int start, struct re_registers *regs, int stop));
450
451
452/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
453 ENDS. Subsequent matches using BUFFER and REGS will use this memory
454 for recording register information. STARTS and ENDS must be
455 allocated with malloc, and must each be at least `NUM_REGS * sizeof
456 (regoff_t)' bytes long.
457
458 If NUM_REGS == 0, then subsequent matches should allocate their own
459 register data.
460
461 Unless this function is called, the first search or match using
462 PATTERN_BUFFER will allocate its own register data, without
463 freeing the old data. */
464extern void re_set_registers
465 _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
466 unsigned num_regs, regoff_t *starts, regoff_t *ends));
467
468/* 4.2 bsd compatibility. */
469extern char *re_comp _RE_ARGS ((const char *));
470extern int re_exec _RE_ARGS ((const char *));
471
472/* POSIX compatibility. */
473extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
474extern int regexec
475 _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
476 regmatch_t pmatch[], int eflags));
477extern size_t regerror
478 _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
479 size_t errbuf_size));
480extern void regfree _RE_ARGS ((regex_t *preg));
481
482#endif /* not __REGEXP_LIBRARY_H__ */
483
484/*
485Local variables:
486make-backup-files: t
487version-control: t
488trim-versions-without-asking: nil
489End:
490*/