aboutsummaryrefslogtreecommitdiff
path: root/win32/regex.h
diff options
context:
space:
mode:
Diffstat (limited to 'win32/regex.h')
-rw-r--r--win32/regex.h462
1 files changed, 277 insertions, 185 deletions
diff --git a/win32/regex.h b/win32/regex.h
index 6eb64f140..61c968387 100644
--- a/win32/regex.h
+++ b/win32/regex.h
@@ -1,70 +1,90 @@
1/* Definitions for data structures and routines for the regular 1#include <stdio.h>
2 expression library, version 0.12. 2#include <stddef.h>
3 3
4 Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc. 4/* Definitions for data structures and routines for the regular
5 expression library.
6 Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006,2008
7 Free Software Foundation, Inc.
8 This file is part of the GNU C Library.
5 9
6 This program is free software; you can redistribute it and/or modify 10 The GNU C Library is free software; you can redistribute it and/or
7 it under the terms of the GNU General Public License as published by 11 modify it under the terms of the GNU Lesser General Public
8 the Free Software Foundation; either version 2, or (at your option) 12 License as published by the Free Software Foundation; either
9 any later version. 13 version 2.1 of the License, or (at your option) any later version.
10 14
11 This program is distributed in the hope that it will be useful, 15 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 GNU General Public License for more details. 18 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
19 19
20#ifndef __REGEXP_LIBRARY_H__ 20 You should have received a copy of the GNU Lesser General Public
21#define __REGEXP_LIBRARY_H__ 21 License along with the GNU C Library; if not, write to the Free
22 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 02110-1301 USA. */
22 24
23/* POSIX says that <sys/types.h> must be included (by the caller) before 25#ifndef _REGEX_H
24 <regex.h>. */ 26#define _REGEX_H 1
25 27
26#ifdef VMS 28#ifdef HAVE_STDDEF_H
27/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
28 should be there. */
29#include <stddef.h> 29#include <stddef.h>
30#endif 30#endif
31 31
32#ifdef HAVE_SYS_TYPES_H
33#include <sys/types.h>
34#endif
35
36#ifndef _LIBC
37#define __USE_GNU 1
38#endif
39
40/* Allow the use in C++ code. */
41#ifdef __cplusplus
42extern "C" {
43#endif
44
45/* The following two types have to be signed and unsigned integer type
46 wide enough to hold a value of a pointer. For most ANSI compilers
47 ptrdiff_t and size_t should be likely OK. Still size of these two
48 types is 2 for Microsoft C. Ugh... */
49typedef long int s_reg_t;
50typedef unsigned long int active_reg_t;
32 51
33/* The following bits are used to determine the regexp syntax we 52/* The following bits are used to determine the regexp syntax we
34 recognize. The set/not-set meanings are chosen so that Emacs syntax 53 recognize. The set/not-set meanings are chosen so that Emacs syntax
35 remains the value 0. The bits are given in alphabetical order, and 54 remains the value 0. The bits are given in alphabetical order, and
36 the definitions shifted by one from the previous bit; thus, when we 55 the definitions shifted by one from the previous bit; thus, when we
37 add or remove a bit, only one other definition need change. */ 56 add or remove a bit, only one other definition need change. */
38typedef unsigned reg_syntax_t; 57typedef unsigned long int reg_syntax_t;
39 58
59#ifdef __USE_GNU
40/* If this bit is not set, then \ inside a bracket expression is literal. 60/* If this bit is not set, then \ inside a bracket expression is literal.
41 If set, then such a \ quotes the following character. */ 61 If set, then such a \ quotes the following character. */
42#define RE_BACKSLASH_ESCAPE_IN_LISTS (1) 62# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
43 63
44/* If this bit is not set, then + and ? are operators, and \+ and \? are 64/* If this bit is not set, then + and ? are operators, and \+ and \? are
45 literals. 65 literals.
46 If set, then \+ and \? are operators and + and ? are literals. */ 66 If set, then \+ and \? are operators and + and ? are literals. */
47#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) 67# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
48 68
49/* If this bit is set, then character classes are supported. They are: 69/* If this bit is set, then character classes are supported. They are:
50 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], 70 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
51 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. 71 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
52 If not set, then character classes are not supported. */ 72 If not set, then character classes are not supported. */
53#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) 73# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
54 74
55/* If this bit is set, then ^ and $ are always anchors (outside bracket 75/* If this bit is set, then ^ and $ are always anchors (outside bracket
56 expressions, of course). 76 expressions, of course).
57 If this bit is not set, then it depends: 77 If this bit is not set, then it depends:
58 ^ is an anchor if it is at the beginning of a regular 78 ^ is an anchor if it is at the beginning of a regular
59 expression or after an open-group or an alternation operator; 79 expression or after an open-group or an alternation operator;
60 $ is an anchor if it is at the end of a regular expression, or 80 $ is an anchor if it is at the end of a regular expression, or
61 before a close-group or an alternation operator. 81 before a close-group or an alternation operator.
62 82
63 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because 83 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
64 POSIX draft 11.2 says that * etc. in leading positions is undefined. 84 POSIX draft 11.2 says that * etc. in leading positions is undefined.
65 We already implemented a previous draft which made those constructs 85 We already implemented a previous draft which made those constructs
66 invalid, though, so we haven't changed the code back. */ 86 invalid, though, so we haven't changed the code back. */
67#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) 87# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
68 88
69/* If this bit is set, then special characters are always special 89/* If this bit is set, then special characters are always special
70 regardless of where they are in the pattern. 90 regardless of where they are in the pattern.
@@ -72,63 +92,94 @@ typedef unsigned reg_syntax_t;
72 some contexts; otherwise they are ordinary. Specifically, 92 some contexts; otherwise they are ordinary. Specifically,
73 * + ? and intervals are only special when not after the beginning, 93 * + ? and intervals are only special when not after the beginning,
74 open-group, or alternation operator. */ 94 open-group, or alternation operator. */
75#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) 95# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
76 96
77/* If this bit is set, then *, +, ?, and { cannot be first in an re or 97/* If this bit is set, then *, +, ?, and { cannot be first in an re or
78 immediately after an alternation or begin-group operator. */ 98 immediately after an alternation or begin-group operator. */
79#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) 99# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
80 100
81/* If this bit is set, then . matches newline. 101/* If this bit is set, then . matches newline.
82 If not set, then it doesn't. */ 102 If not set, then it doesn't. */
83#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) 103# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
84 104
85/* If this bit is set, then . doesn't match NUL. 105/* If this bit is set, then . doesn't match NUL.
86 If not set, then it does. */ 106 If not set, then it does. */
87#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) 107# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
88 108
89/* If this bit is set, nonmatching lists [^...] do not match newline. 109/* If this bit is set, nonmatching lists [^...] do not match newline.
90 If not set, they do. */ 110 If not set, they do. */
91#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) 111# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
92 112
93/* If this bit is set, either \{...\} or {...} defines an 113/* If this bit is set, either \{...\} or {...} defines an
94 interval, depending on RE_NO_BK_BRACES. 114 interval, depending on RE_NO_BK_BRACES.
95 If not set, \{, \}, {, and } are literals. */ 115 If not set, \{, \}, {, and } are literals. */
96#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) 116# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
97 117
98/* If this bit is set, +, ? and | aren't recognized as operators. 118/* If this bit is set, +, ? and | aren't recognized as operators.
99 If not set, they are. */ 119 If not set, they are. */
100#define RE_LIMITED_OPS (RE_INTERVALS << 1) 120# define RE_LIMITED_OPS (RE_INTERVALS << 1)
101 121
102/* If this bit is set, newline is an alternation operator. 122/* If this bit is set, newline is an alternation operator.
103 If not set, newline is literal. */ 123 If not set, newline is literal. */
104#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) 124# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
105 125
106/* If this bit is set, then `{...}' defines an interval, and \{ and \} 126/* If this bit is set, then `{...}' defines an interval, and \{ and \}
107 are literals. 127 are literals.
108 If not set, then `\{...\}' defines an interval. */ 128 If not set, then `\{...\}' defines an interval. */
109#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) 129# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
110 130
111/* If this bit is set, (...) defines a group, and \( and \) are literals. 131/* If this bit is set, (...) defines a group, and \( and \) are literals.
112 If not set, \(...\) defines a group, and ( and ) are literals. */ 132 If not set, \(...\) defines a group, and ( and ) are literals. */
113#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) 133# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
114 134
115/* If this bit is set, then \<digit> matches <digit>. 135/* If this bit is set, then \<digit> matches <digit>.
116 If not set, then \<digit> is a back-reference. */ 136 If not set, then \<digit> is a back-reference. */
117#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) 137# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
118 138
119/* If this bit is set, then | is an alternation operator, and \| is literal. 139/* If this bit is set, then | is an alternation operator, and \| is literal.
120 If not set, then \| is an alternation operator, and | is literal. */ 140 If not set, then \| is an alternation operator, and | is literal. */
121#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) 141# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
122 142
123/* If this bit is set, then an ending range point collating higher 143/* If this bit is set, then an ending range point collating higher
124 than the starting range point, as in [z-a], is invalid. 144 than the starting range point, as in [z-a], is invalid.
125 If not set, then when ending range point collates higher than the 145 If not set, then when ending range point collates higher than the
126 starting range point, the range is ignored. */ 146 starting range point, the range is ignored. */
127#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) 147# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
128 148
129/* If this bit is set, then an unmatched ) is ordinary. 149/* If this bit is set, then an unmatched ) is ordinary.
130 If not set, then an unmatched ) is invalid. */ 150 If not set, then an unmatched ) is invalid. */
131#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) 151# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
152
153/* If this bit is set, succeed as soon as we match the whole pattern,
154 without further backtracking. */
155# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
156
157/* If this bit is set, do not process the GNU regex operators.
158 If not set, then the GNU regex operators are recognized. */
159# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
160
161/* If this bit is set, a syntactically invalid interval is treated as
162 a string of ordinary characters. For example, the ERE 'a{1' is
163 treated as 'a\{1'. */
164# define RE_INVALID_INTERVAL_ORD (RE_NO_GNU_OPS << 1)
165
166/* If this bit is set, then ignore case when matching.
167 If not set, then case is significant. */
168# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
169
170/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
171 for ^, because it is difficult to scan the regex backwards to find
172 whether ^ should be special. */
173# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
174
175/* If this bit is set, then \{ cannot be first in an bre or
176 immediately after an alternation or begin-group operator. */
177# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
178
179/* If this bit is set, then no_sub will be set to 1 during
180 re_compile_pattern. */
181#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
182#endif
132 183
133/* This global variable defines the particular regexp syntax to use (for 184/* This global variable defines the particular regexp syntax to use (for
134 some interfaces). When a regexp is compiled, the syntax used is 185 some interfaces). When a regexp is compiled, the syntax used is
@@ -136,6 +187,7 @@ typedef unsigned reg_syntax_t;
136 already-compiled regexps. */ 187 already-compiled regexps. */
137extern reg_syntax_t re_syntax_options; 188extern reg_syntax_t re_syntax_options;
138 189
190#ifdef __USE_GNU
139/* Define combinations of the above bits for the standard possibilities. 191/* Define combinations of the above bits for the standard possibilities.
140 (The [[[ comments delimit what gets put into the Texinfo file, so 192 (The [[[ comments delimit what gets put into the Texinfo file, so
141 don't delete them!) */ 193 don't delete them!) */
@@ -143,13 +195,22 @@ extern reg_syntax_t re_syntax_options;
143#define RE_SYNTAX_EMACS 0 195#define RE_SYNTAX_EMACS 0
144 196
145#define RE_SYNTAX_AWK \ 197#define RE_SYNTAX_AWK \
146 (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ 198 (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
147 | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 199 | RE_NO_BK_PARENS | RE_NO_BK_REFS \
148 | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ 200 | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
149 | RE_UNMATCHED_RIGHT_PAREN_ORD) 201 | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
150 202 | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
151#define RE_SYNTAX_POSIX_AWK \ 203
152 (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) 204#define RE_SYNTAX_GNU_AWK \
205 ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
206 | RE_INVALID_INTERVAL_ORD) \
207 & ~(RE_DOT_NOT_NULL | RE_CONTEXT_INDEP_OPS \
208 | RE_CONTEXT_INVALID_OPS ))
209
210#define RE_SYNTAX_POSIX_AWK \
211 (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
212 | RE_INTERVALS | RE_NO_GNU_OPS \
213 | RE_INVALID_INTERVAL_ORD)
153 214
154#define RE_SYNTAX_GREP \ 215#define RE_SYNTAX_GREP \
155 (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ 216 (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
@@ -163,7 +224,8 @@ extern reg_syntax_t re_syntax_options;
163 | RE_NO_BK_VBAR) 224 | RE_NO_BK_VBAR)
164 225
165#define RE_SYNTAX_POSIX_EGREP \ 226#define RE_SYNTAX_POSIX_EGREP \
166 (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) 227 (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
228 | RE_INVALID_INTERVAL_ORD)
167 229
168/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ 230/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
169#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC 231#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
@@ -176,7 +238,7 @@ extern reg_syntax_t re_syntax_options;
176 | RE_INTERVALS | RE_NO_EMPTY_RANGES) 238 | RE_INTERVALS | RE_NO_EMPTY_RANGES)
177 239
178#define RE_SYNTAX_POSIX_BASIC \ 240#define RE_SYNTAX_POSIX_BASIC \
179 (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) 241 (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
180 242
181/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes 243/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
182 RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this 244 RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
@@ -185,13 +247,13 @@ extern reg_syntax_t re_syntax_options;
185 (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) 247 (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
186 248
187#define RE_SYNTAX_POSIX_EXTENDED \ 249#define RE_SYNTAX_POSIX_EXTENDED \
188 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 250 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
189 | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ 251 | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
190 | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 252 | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
191 | RE_UNMATCHED_RIGHT_PAREN_ORD) 253 | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
192 254
193/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS 255/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
194 replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ 256 removed and RE_NO_BK_REFS is added. */
195#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ 257#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
196 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 258 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
197 | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ 259 | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
@@ -202,10 +264,12 @@ extern reg_syntax_t re_syntax_options;
202/* Maximum number of duplicates an interval can allow. Some systems 264/* Maximum number of duplicates an interval can allow. Some systems
203 (erroneously) define this in other header files, but we want our 265 (erroneously) define this in other header files, but we want our
204 value, so remove any previous define. */ 266 value, so remove any previous define. */
205#ifdef RE_DUP_MAX 267# ifdef RE_DUP_MAX
206#undef RE_DUP_MAX 268# undef RE_DUP_MAX
269# endif
270/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
271# define RE_DUP_MAX (0x7fff)
207#endif 272#endif
208#define RE_DUP_MAX ((1 << 15) - 1)
209 273
210 274
211/* POSIX `cflags' bits (i.e., information for `regcomp'). */ 275/* POSIX `cflags' bits (i.e., information for `regcomp'). */
@@ -240,18 +304,26 @@ extern reg_syntax_t re_syntax_options;
240/* Like REG_NOTBOL, except for the end-of-line. */ 304/* Like REG_NOTBOL, except for the end-of-line. */
241#define REG_NOTEOL (1 << 1) 305#define REG_NOTEOL (1 << 1)
242 306
307/* Use PMATCH[0] to delimit the start and end of the search in the
308 buffer. */
309#define REG_STARTEND (1 << 2)
310
243 311
244/* If any error codes are removed, changed, or added, update the 312/* If any error codes are removed, changed, or added, update the
245 `re_error_msg' table in regex.c. */ 313 `re_error_msg' table in regex.c. */
246typedef enum 314typedef enum
247{ 315{
316#if defined _XOPEN_SOURCE || defined __USE_XOPEN2K
317 REG_ENOSYS = -1, /* This will never happen for this implementation. */
318#endif
319
248 REG_NOERROR = 0, /* Success. */ 320 REG_NOERROR = 0, /* Success. */
249 REG_NOMATCH, /* Didn't find a match (for regexec). */ 321 REG_NOMATCH, /* Didn't find a match (for regexec). */
250 322
251 /* POSIX regcomp return error codes. (In the order listed in the 323 /* POSIX regcomp return error codes. (In the order listed in the
252 standard.) */ 324 standard.) */
253 REG_BADPAT, /* Invalid pattern. */ 325 REG_BADPAT, /* Invalid pattern. */
254 REG_ECOLLATE, /* Not implemented. */ 326 REG_ECOLLATE, /* Inalid collating element. */
255 REG_ECTYPE, /* Invalid character class name. */ 327 REG_ECTYPE, /* Invalid character class name. */
256 REG_EESCAPE, /* Trailing backslash. */ 328 REG_EESCAPE, /* Trailing backslash. */
257 REG_ESUBREG, /* Invalid back reference. */ 329 REG_ESUBREG, /* Invalid back reference. */
@@ -275,85 +347,92 @@ typedef enum
275 compiled, the `re_nsub' field is available. All other fields are 347 compiled, the `re_nsub' field is available. All other fields are
276 private to the regex routines. */ 348 private to the regex routines. */
277 349
350#ifndef RE_TRANSLATE_TYPE
351# define __RE_TRANSLATE_TYPE unsigned char *
352# ifdef __USE_GNU
353# define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE
354# endif
355#endif
356
357#ifdef __USE_GNU
358# define __REPB_PREFIX(name) name
359#else
360# define __REPB_PREFIX(name) __##name
361#endif
362
278struct re_pattern_buffer 363struct re_pattern_buffer
279{ 364{
280/* [[[begin pattern_buffer]]] */ 365 /* Space that holds the compiled pattern. It is declared as
281 /* Space that holds the compiled pattern. It is declared as 366 `unsigned char *' because its elements are sometimes used as
282 `unsigned char *' because its elements are 367 array indexes. */
283 sometimes used as array indexes. */ 368 unsigned char *__REPB_PREFIX(buffer);
284 unsigned char *buffer;
285 369
286 /* Number of bytes to which `buffer' points. */ 370 /* Number of bytes to which `buffer' points. */
287 unsigned long allocated; 371 unsigned long int __REPB_PREFIX(allocated);
288 372
289 /* Number of bytes actually used in `buffer'. */ 373 /* Number of bytes actually used in `buffer'. */
290 unsigned long used; 374 unsigned long int __REPB_PREFIX(used);
291 375
292 /* Syntax setting with which the pattern was compiled. */ 376 /* Syntax setting with which the pattern was compiled. */
293 reg_syntax_t syntax; 377 reg_syntax_t __REPB_PREFIX(syntax);
294 378
295 /* Pointer to a fastmap, if any, otherwise zero. re_search uses 379 /* Pointer to a fastmap, if any, otherwise zero. re_search uses the
296 the fastmap, if there is one, to skip over impossible 380 fastmap, if there is one, to skip over impossible starting points
297 starting points for matches. */ 381 for matches. */
298 char *fastmap; 382 char *__REPB_PREFIX(fastmap);
299 383
300 /* Either a translate table to apply to all characters before 384 /* Either a translate table to apply to all characters before
301 comparing them, or zero for no translation. The translation 385 comparing them, or zero for no translation. The translation is
302 is applied to a pattern when it is compiled and to a string 386 applied to a pattern when it is compiled and to a string when it
303 when it is matched. */ 387 is matched. */
304 char *translate; 388 __RE_TRANSLATE_TYPE __REPB_PREFIX(translate);
305 389
306 /* Number of subexpressions found by the compiler. */ 390 /* Number of subexpressions found by the compiler. */
307 size_t re_nsub; 391 size_t re_nsub;
308 392
309 /* Zero if this pattern cannot match the empty string, one else. 393 /* Zero if this pattern cannot match the empty string, one else.
310 Well, in truth it's used only in `re_search_2', to see 394 Well, in truth it's used only in `re_search_2', to see whether or
311 whether or not we should use the fastmap, so we don't set 395 not we should use the fastmap, so we don't set this absolutely
312 this absolutely perfectly; see `re_compile_fastmap' (the 396 perfectly; see `re_compile_fastmap' (the `duplicate' case). */
313 `duplicate' case). */ 397 unsigned __REPB_PREFIX(can_be_null) : 1;
314 unsigned can_be_null : 1; 398
315 399 /* If REGS_UNALLOCATED, allocate space in the `regs' structure
316 /* If REGS_UNALLOCATED, allocate space in the `regs' structure 400 for `max (RE_NREGS, re_nsub + 1)' groups.
317 for `max (RE_NREGS, re_nsub + 1)' groups. 401 If REGS_REALLOCATE, reallocate space if necessary.
318 If REGS_REALLOCATE, reallocate space if necessary. 402 If REGS_FIXED, use what's there. */
319 If REGS_FIXED, use what's there. */ 403#ifdef __USE_GNU
320#define REGS_UNALLOCATED 0 404# define REGS_UNALLOCATED 0
321#define REGS_REALLOCATE 1 405# define REGS_REALLOCATE 1
322#define REGS_FIXED 2 406# define REGS_FIXED 2
323 unsigned regs_allocated : 2; 407#endif
324 408 unsigned __REPB_PREFIX(regs_allocated) : 2;
325 /* Set to zero when `regex_compile' compiles a pattern; set to one
326 by `re_compile_fastmap' if it updates the fastmap. */
327 unsigned fastmap_accurate : 1;
328
329 /* If set, `re_match_2' does not return information about
330 subexpressions. */
331 unsigned no_sub : 1;
332
333 /* If set, a beginning-of-line anchor doesn't match at the
334 beginning of the string. */
335 unsigned not_bol : 1;
336
337 /* Similarly for an end-of-line anchor. */
338 unsigned not_eol : 1;
339
340 /* If true, an anchor at a newline matches. */
341 unsigned newline_anchor : 1;
342
343/* [[[end pattern_buffer]]] */
344};
345 409
346typedef struct re_pattern_buffer regex_t; 410 /* Set to zero when `regex_compile' compiles a pattern; set to one
411 by `re_compile_fastmap' if it updates the fastmap. */
412 unsigned __REPB_PREFIX(fastmap_accurate) : 1;
413
414 /* If set, `re_match_2' does not return information about
415 subexpressions. */
416 unsigned __REPB_PREFIX(no_sub) : 1;
417
418 /* If set, a beginning-of-line anchor doesn't match at the beginning
419 of the string. */
420 unsigned __REPB_PREFIX(not_bol) : 1;
421
422 /* Similarly for an end-of-line anchor. */
423 unsigned __REPB_PREFIX(not_eol) : 1;
347 424
425 /* If true, an anchor at a newline matches. */
426 unsigned __REPB_PREFIX(newline_anchor) : 1;
427};
348 428
349/* search.c (search_buffer) in Emacs needs this one opcode value. It is 429typedef struct re_pattern_buffer regex_t;
350 defined both in `regex.c' and here. */
351#define RE_EXACTN_VALUE 1
352 430
353/* Type for byte offsets within the string. POSIX mandates this. */ 431/* Type for byte offsets within the string. POSIX mandates this. */
354typedef int regoff_t; 432typedef int regoff_t;
355 433
356 434
435#ifdef __USE_GNU
357/* This is the structure we store register match data in. See 436/* This is the structure we store register match data in. See
358 regex.texinfo for a full description of what registers match. */ 437 regex.texinfo for a full description of what registers match. */
359struct re_registers 438struct re_registers
@@ -367,8 +446,9 @@ struct re_registers
367/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, 446/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
368 `re_match_2' returns information about at least this many registers 447 `re_match_2' returns information about at least this many registers
369 the first time a `regs' structure is passed. */ 448 the first time a `regs' structure is passed. */
370#ifndef RE_NREGS 449# ifndef RE_NREGS
371#define RE_NREGS 30 450# define RE_NREGS 30
451# endif
372#endif 452#endif
373 453
374 454
@@ -383,38 +463,22 @@ typedef struct
383 463
384/* Declarations for routines. */ 464/* Declarations for routines. */
385 465
386/* To avoid duplicating every routine declaration -- once with a 466#ifdef __USE_GNU
387 prototype (if we are ANSI), and once without (if we aren't) -- we
388 use the following macro to declare argument types. This
389 unfortunately clutters up the declarations a bit, but I think it's
390 worth it. */
391
392#if __STDC__
393
394#define _RE_ARGS(args) args
395
396#else /* not __STDC__ */
397
398#define _RE_ARGS(args) ()
399
400#endif /* not __STDC__ */
401
402/* Sets the current default syntax to SYNTAX, and return the old syntax. 467/* Sets the current default syntax to SYNTAX, and return the old syntax.
403 You can also simply assign to the `re_syntax_options' variable. */ 468 You can also simply assign to the `re_syntax_options' variable. */
404extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); 469extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
405 470
406/* Compile the regular expression PATTERN, with length LENGTH 471/* Compile the regular expression PATTERN, with length LENGTH
407 and syntax given by the global `re_syntax_options', into the buffer 472 and syntax given by the global `re_syntax_options', into the buffer
408 BUFFER. Return NULL if successful, and an error string if not. */ 473 BUFFER. Return NULL if successful, and an error string if not. */
409extern const char *re_compile_pattern 474extern const char *re_compile_pattern (const char *__pattern, size_t __length,
410 _RE_ARGS ((const char *pattern, int length, 475 struct re_pattern_buffer *__buffer);
411 struct re_pattern_buffer *buffer));
412 476
413 477
414/* Compile a fastmap for the compiled pattern in BUFFER; used to 478/* Compile a fastmap for the compiled pattern in BUFFER; used to
415 accelerate searches. Return 0 if successful and -2 if was an 479 accelerate searches. Return 0 if successful and -2 if was an
416 internal error. */ 480 internal error. */
417extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); 481extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
418 482
419 483
420/* Search in the string STRING (with length LENGTH) for the pattern 484/* Search in the string STRING (with length LENGTH) for the pattern
@@ -422,31 +486,30 @@ extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
422 characters. Return the starting position of the match, -1 for no 486 characters. Return the starting position of the match, -1 for no
423 match, or -2 for an internal error. Also return register 487 match, or -2 for an internal error. Also return register
424 information in REGS (if REGS and BUFFER->no_sub are nonzero). */ 488 information in REGS (if REGS and BUFFER->no_sub are nonzero). */
425extern int re_search 489extern int re_search (struct re_pattern_buffer *__buffer, const char *__cstring,
426 _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, 490 int __length, int __start, int __range,
427 int length, int start, int range, struct re_registers *regs)); 491 struct re_registers *__regs);
428 492
429 493
430/* Like `re_search', but search in the concatenation of STRING1 and 494/* Like `re_search', but search in the concatenation of STRING1 and
431 STRING2. Also, stop searching at index START + STOP. */ 495 STRING2. Also, stop searching at index START + STOP. */
432extern int re_search_2 496extern int re_search_2 (struct re_pattern_buffer *__buffer,
433 _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, 497 const char *__string1, int __length1,
434 int length1, const char *string2, int length2, 498 const char *__string2, int __length2, int __start,
435 int start, int range, struct re_registers *regs, int stop)); 499 int __range, struct re_registers *__regs, int __stop);
436 500
437 501
438/* Like `re_search', but return how many characters in STRING the regexp 502/* Like `re_search', but return how many characters in STRING the regexp
439 in BUFFER matched, starting at position START. */ 503 in BUFFER matched, starting at position START. */
440extern int re_match 504extern int re_match (struct re_pattern_buffer *__buffer, const char *__cstring,
441 _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, 505 int __length, int __start, struct re_registers *__regs);
442 int length, int start, struct re_registers *regs));
443 506
444 507
445/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ 508/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
446extern int re_match_2 509extern int re_match_2 (struct re_pattern_buffer *__buffer,
447 _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, 510 const char *__string1, int __length1,
448 int length1, const char *string2, int length2, 511 const char *__string2, int __length2, int __start,
449 int start, struct re_registers *regs, int stop)); 512 struct re_registers *__regs, int __stop);
450 513
451 514
452/* Set REGS to hold NUM_REGS registers, storing them in STARTS and 515/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
@@ -461,30 +524,59 @@ extern int re_match_2
461 Unless this function is called, the first search or match using 524 Unless this function is called, the first search or match using
462 PATTERN_BUFFER will allocate its own register data, without 525 PATTERN_BUFFER will allocate its own register data, without
463 freeing the old data. */ 526 freeing the old data. */
464extern void re_set_registers 527extern void re_set_registers (struct re_pattern_buffer *__buffer,
465 _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, 528 struct re_registers *__regs,
466 unsigned num_regs, regoff_t *starts, regoff_t *ends)); 529 unsigned int __num_regs,
467 530 regoff_t *__starts, regoff_t *__ends);
531#endif /* Use GNU */
532
533#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD)
534# ifndef _CRAY
468/* 4.2 bsd compatibility. */ 535/* 4.2 bsd compatibility. */
469extern char *re_comp _RE_ARGS ((const char *)); 536extern char *re_comp (const char *);
470extern int re_exec _RE_ARGS ((const char *)); 537extern int re_exec (const char *);
538# endif
539#endif
540
541/* GCC 2.95 and later have "__restrict"; C99 compilers have
542 "restrict", and "configure" may have defined "restrict". */
543#ifndef __restrict
544# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
545# if defined restrict || 199901L <= __STDC_VERSION__
546# define __restrict restrict
547# else
548# define __restrict
549# endif
550# endif
551#endif
552/* gcc 3.1 and up support the [restrict] syntax. */
553#ifndef __restrict_arr
554# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \
555 && !defined __GNUG__
556# define __restrict_arr __restrict
557# else
558# define __restrict_arr
559# endif
560#endif
471 561
472/* POSIX compatibility. */ 562/* POSIX compatibility. */
473extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags)); 563extern int regcomp (regex_t *__restrict __preg,
474extern int regexec 564 const char *__restrict __pattern,
475 _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch, 565 int __cflags);
476 regmatch_t pmatch[], int eflags)); 566
477extern size_t regerror 567extern int regexec (const regex_t *__restrict __preg,
478 _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf, 568 const char *__restrict __cstring, size_t __nmatch,
479 size_t errbuf_size)); 569 regmatch_t __pmatch[__restrict_arr],
480extern void regfree _RE_ARGS ((regex_t *preg)); 570 int __eflags);
481 571
482#endif /* not __REGEXP_LIBRARY_H__ */ 572extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
483 573 char *__restrict __errbuf, size_t __errbuf_size);
484/* 574
485Local variables: 575extern void regfree (regex_t *__preg);
486make-backup-files: t 576
487version-control: t 577
488trim-versions-without-asking: nil 578#ifdef __cplusplus
489End: 579}
490*/ 580#endif /* C++ */
581
582#endif /* regex.h */