aboutsummaryrefslogtreecommitdiff
path: root/win32
diff options
context:
space:
mode:
Diffstat (limited to 'win32')
-rw-r--r--win32/Kbuild25
-rw-r--r--win32/arpa/inet.h0
-rw-r--r--win32/env.c189
-rw-r--r--win32/fnmatch.c488
-rw-r--r--win32/fnmatch.h84
-rw-r--r--win32/grp.h0
-rw-r--r--win32/inet_pton.c255
-rw-r--r--win32/ioctl.c24
-rw-r--r--win32/mempcpy.c27
-rw-r--r--win32/mingw.c1064
-rw-r--r--win32/mntent.c69
-rw-r--r--win32/mntent.h19
-rw-r--r--win32/net.c101
-rw-r--r--win32/net/if.h0
-rw-r--r--win32/netdb.h0
-rw-r--r--win32/netinet/in.h0
-rw-r--r--win32/paths.h0
-rw-r--r--win32/poll.c606
-rw-r--r--win32/poll.h53
-rw-r--r--win32/popen.c318
-rw-r--r--win32/process.c425
-rw-r--r--win32/pwd.h0
-rw-r--r--win32/regcomp.c3886
-rw-r--r--win32/regex.c90
-rw-r--r--win32/regex.h582
-rw-r--r--win32/regex_internal.c1744
-rw-r--r--win32/regex_internal.h810
-rw-r--r--win32/regexec.c4369
-rw-r--r--win32/sched.h1
-rw-r--r--win32/select.c573
-rw-r--r--win32/statfs.c80
-rw-r--r--win32/strptime.c646
-rw-r--r--win32/sys/ioctl.h0
-rw-r--r--win32/sys/mman.h0
-rw-r--r--win32/sys/prctl.h0
-rw-r--r--win32/sys/resource.h0
-rw-r--r--win32/sys/socket.h0
-rw-r--r--win32/sys/statfs.h22
-rw-r--r--win32/sys/syscall.h0
-rw-r--r--win32/sys/sysmacros.h0
-rw-r--r--win32/sys/times.h0
-rw-r--r--win32/sys/un.h0
-rw-r--r--win32/sys/utsname.h66
-rw-r--r--win32/sys/vfs.h1
-rw-r--r--win32/sys/wait.h0
-rw-r--r--win32/system.c22
-rw-r--r--win32/termios.c83
-rw-r--r--win32/termios.h129
-rw-r--r--win32/uname.c48
-rw-r--r--win32/winansi.c735
50 files changed, 17634 insertions, 0 deletions
diff --git a/win32/Kbuild b/win32/Kbuild
new file mode 100644
index 000000000..ba361f1ca
--- /dev/null
+++ b/win32/Kbuild
@@ -0,0 +1,25 @@
1# Makefile for busybox
2#
3# Licensed under the GPL v2, see the file LICENSE in this tarball.
4
5lib-y:=
6
7lib-$(CONFIG_PLATFORM_MINGW32) += env.o
8lib-$(CONFIG_PLATFORM_MINGW32) += fnmatch.o
9lib-$(CONFIG_PLATFORM_MINGW32) += ioctl.o
10lib-$(CONFIG_PLATFORM_MINGW32) += mingw.o
11lib-$(CONFIG_PLATFORM_MINGW32) += process.o
12lib-$(CONFIG_PLATFORM_MINGW32) += regex.o
13lib-$(CONFIG_PLATFORM_MINGW32) += net.o
14lib-$(CONFIG_PLATFORM_MINGW32) += inet_pton.o
15lib-$(CONFIG_PLATFORM_MINGW32) += poll.o
16lib-$(CONFIG_PLATFORM_MINGW32) += select.o
17lib-$(CONFIG_PLATFORM_MINGW32) += popen.o
18lib-$(CONFIG_PLATFORM_MINGW32) += statfs.o
19lib-$(CONFIG_PLATFORM_MINGW32) += mempcpy.o
20lib-$(CONFIG_PLATFORM_MINGW32) += mntent.o
21lib-$(CONFIG_PLATFORM_MINGW32) += strptime.o
22lib-$(CONFIG_PLATFORM_MINGW32) += system.o
23lib-$(CONFIG_PLATFORM_MINGW32) += termios.o
24lib-$(CONFIG_PLATFORM_MINGW32) += uname.o
25lib-$(CONFIG_PLATFORM_MINGW32) += winansi.o
diff --git a/win32/arpa/inet.h b/win32/arpa/inet.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/arpa/inet.h
diff --git a/win32/env.c b/win32/env.c
new file mode 100644
index 000000000..48b86c555
--- /dev/null
+++ b/win32/env.c
@@ -0,0 +1,189 @@
1#include "libbb.h"
2
3char **copy_environ(const char *const *envp)
4{
5 char **env;
6 int i = 0;
7 while (envp[i])
8 i++;
9 env = xmalloc((i+1)*sizeof(*env));
10 for (i = 0; envp[i]; i++)
11 env[i] = xstrdup(envp[i]);
12 env[i] = NULL;
13 return env;
14}
15
16void free_environ(char **env)
17{
18 int i;
19 for (i = 0; env[i]; i++)
20 free(env[i]);
21 free(env);
22}
23
24static int lookup_env(char **env, const char *name, size_t nmln)
25{
26 int i;
27
28 for (i = 0; env[i]; i++) {
29 if (0 == strncmp(env[i], name, nmln)
30 && '=' == env[i][nmln])
31 /* matches */
32 return i;
33 }
34 return -1;
35}
36
37#undef getenv
38char *mingw_getenv(const char *name)
39{
40 char *result = getenv(name);
41 if (!result && !strcmp(name, "TMPDIR")) {
42 /* on Windows it is TMP and TEMP */
43 result = getenv("TMP");
44 if (!result)
45 result = getenv("TEMP");
46 }
47 return result;
48}
49
50int setenv(const char *name, const char *value, int replace)
51{
52 int out;
53 size_t namelen, valuelen;
54 char *envstr;
55
56 if (!name || !value) return -1;
57 if (!replace) {
58 char *oldval = NULL;
59 oldval = getenv(name);
60 if (oldval) return 0;
61 }
62
63 namelen = strlen(name);
64 valuelen = strlen(value);
65 envstr = malloc((namelen + valuelen + 2));
66 if (!envstr) return -1;
67
68 memcpy(envstr, name, namelen);
69 envstr[namelen] = '=';
70 memcpy(envstr + namelen + 1, value, valuelen);
71 envstr[namelen + valuelen + 1] = 0;
72
73 out = putenv(envstr);
74 /* putenv(3) makes the argument string part of the environment,
75 * and changing that string modifies the environment --- which
76 * means we do not own that storage anymore. Do not free
77 * envstr.
78 */
79
80 return out;
81}
82
83/*
84 * If name contains '=', then sets the variable, otherwise it unsets it
85 */
86char **env_setenv(char **env, const char *name)
87{
88 char *eq = strchrnul(name, '=');
89 int i = lookup_env(env, name, eq-name);
90
91 if (i < 0) {
92 if (*eq) {
93 for (i = 0; env[i]; i++)
94 ;
95 env = xrealloc(env, (i+2)*sizeof(*env));
96 env[i] = xstrdup(name);
97 env[i+1] = NULL;
98 }
99 }
100 else {
101 free(env[i]);
102 if (*eq)
103 env[i] = xstrdup(name);
104 else {
105 for (; env[i]; i++)
106 env[i] = env[i+1];
107#if !ENABLE_SAFE_ENV
108 SetEnvironmentVariable(name, NULL);
109#endif
110 }
111 }
112 return env;
113}
114
115#if ENABLE_SAFE_ENV
116/*
117 * Removing an environment variable with WIN32 putenv requires an argument
118 * like "NAME="; glibc omits the '='. The implementations of unsetenv and
119 * clearenv allow for this.
120 *
121 * It isn't possible to create an environment variable with an empty value
122 * using WIN32 putenv.
123 */
124#undef putenv
125int unsetenv(const char *env)
126{
127 char *name;
128 int ret;
129
130 name = xmalloc(strlen(env)+2);
131 strcat(strcpy(name, env), "=");
132 ret = putenv(name);
133 free(name);
134
135 return ret;
136}
137
138int clearenv(void)
139{
140 char *name, *s;
141
142 while ( environ && *environ ) {
143 if ( (s=strchr(*environ, '=')) != NULL ) {
144 name = xstrndup(*environ, s-*environ+1);
145 putenv(name);
146 free(name);
147 }
148 else {
149 return -1;
150 }
151 }
152 return 0;
153}
154
155int mingw_putenv(const char *env)
156{
157 char *s;
158
159 if ( (s=strchr(env, '=')) == NULL ) {
160 return unsetenv(env);
161 }
162
163 if ( s[1] != '\0' ) {
164 return putenv(env);
165 }
166
167 /* can't set empty value */
168 return 0;
169}
170#else
171void unsetenv(const char *env)
172{
173 env_setenv(environ, env);
174}
175
176int clearenv(void)
177{
178 char **env = environ;
179 if (!env)
180 return 0;
181 while (*env) {
182 free(*env);
183 env++;
184 }
185 free(env);
186 environ = NULL;
187 return 0;
188}
189#endif
diff --git a/win32/fnmatch.c b/win32/fnmatch.c
new file mode 100644
index 000000000..1f4ead5f9
--- /dev/null
+++ b/win32/fnmatch.c
@@ -0,0 +1,488 @@
1/* Copyright (C) 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
13
14 You should have received a copy of the GNU Library General Public
15 License along with this library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
18
19#if HAVE_CONFIG_H
20# include <config.h>
21#endif
22
23/* Enable GNU extensions in fnmatch.h. */
24#ifndef _GNU_SOURCE
25# define _GNU_SOURCE 1
26#endif
27
28#include <errno.h>
29#include <fnmatch.h>
30#include <ctype.h>
31
32#if HAVE_STRING_H || defined _LIBC
33# include <string.h>
34#else
35# include <strings.h>
36#endif
37
38#if defined STDC_HEADERS || defined _LIBC
39# include <stdlib.h>
40#endif
41
42/* For platform which support the ISO C amendement 1 functionality we
43 support user defined character classes. */
44#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
45/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
46# include <wchar.h>
47# include <wctype.h>
48#endif
49
50/* Comment out all this code if we are using the GNU C Library, and are not
51 actually compiling the library itself. This code is part of the GNU C
52 Library, but also included in many other GNU distributions. Compiling
53 and linking in this code is a waste when using the GNU C library
54 (especially if it is a shared library). Rather than having every GNU
55 program understand `configure --with-gnu-libc' and omit the object files,
56 it is simpler to just do this in the source for each such file. */
57
58#if defined _LIBC || !defined __GNU_LIBRARY__
59
60
61# if defined STDC_HEADERS || !defined isascii
62# define ISASCII(c) 1
63# else
64# define ISASCII(c) isascii(c)
65# endif
66
67# ifdef isblank
68# define ISBLANK(c) (ISASCII (c) && isblank (c))
69# else
70# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
71# endif
72# ifdef isgraph
73# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
74# else
75# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
76# endif
77
78# define ISPRINT(c) (ISASCII (c) && isprint (c))
79# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
80# define ISALNUM(c) (ISASCII (c) && isalnum (c))
81# define ISALPHA(c) (ISASCII (c) && isalpha (c))
82# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
83# define ISLOWER(c) (ISASCII (c) && islower (c))
84# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
85# define ISSPACE(c) (ISASCII (c) && isspace (c))
86# define ISUPPER(c) (ISASCII (c) && isupper (c))
87# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
88
89# define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
90
91# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
92/* The GNU C library provides support for user-defined character classes
93 and the functions from ISO C amendement 1. */
94# ifdef CHARCLASS_NAME_MAX
95# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
96# else
97/* This shouldn't happen but some implementation might still have this
98 problem. Use a reasonable default value. */
99# define CHAR_CLASS_MAX_LENGTH 256
100# endif
101
102# ifdef _LIBC
103# define IS_CHAR_CLASS(string) __wctype (string)
104# else
105# define IS_CHAR_CLASS(string) wctype (string)
106# endif
107# else
108# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
109
110# define IS_CHAR_CLASS(string) \
111 (STREQ (string, "alpha") || STREQ (string, "upper") \
112 || STREQ (string, "lower") || STREQ (string, "digit") \
113 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
114 || STREQ (string, "space") || STREQ (string, "print") \
115 || STREQ (string, "punct") || STREQ (string, "graph") \
116 || STREQ (string, "cntrl") || STREQ (string, "blank"))
117# endif
118
119/* Avoid depending on library functions or files
120 whose names are inconsistent. */
121
122# if !defined _LIBC && !defined getenv
123extern char *getenv ();
124# endif
125
126# ifndef errno
127extern int errno;
128# endif
129
130/* This function doesn't exist on most systems. */
131
132# if !defined HAVE___STRCHRNUL && !defined _LIBC
133static char *
134__strchrnul (s, c)
135 const char *s;
136 int c;
137{
138 char *result = strchr (s, c);
139 if (result == NULL)
140 result = strchr (s, '\0');
141 return result;
142}
143# endif
144
145# ifndef internal_function
146/* Inside GNU libc we mark some function in a special way. In other
147 environments simply ignore the marking. */
148# define internal_function
149# endif
150
151/* Match STRING against the filename pattern PATTERN, returning zero if
152 it matches, nonzero if not. */
153static int internal_fnmatch __P ((const char *pattern, const char *string,
154 int no_leading_period, int flags))
155 internal_function;
156static int
157internal_function
158internal_fnmatch (pattern, string, no_leading_period, flags)
159 const char *pattern;
160 const char *string;
161 int no_leading_period;
162 int flags;
163{
164 register const char *p = pattern, *n = string;
165 register unsigned char c;
166
167/* Note that this evaluates C many times. */
168# ifdef _LIBC
169# define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c))
170# else
171# define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c))
172# endif
173
174 while ((c = *p++) != '\0')
175 {
176 c = FOLD (c);
177
178 switch (c)
179 {
180 case '?':
181 if (*n == '\0')
182 return FNM_NOMATCH;
183 else if (*n == '/' && (flags & FNM_FILE_NAME))
184 return FNM_NOMATCH;
185 else if (*n == '.' && no_leading_period
186 && (n == string
187 || (n[-1] == '/' && (flags & FNM_FILE_NAME))))
188 return FNM_NOMATCH;
189 break;
190
191 case '\\':
192 if (!(flags & FNM_NOESCAPE))
193 {
194 c = *p++;
195 if (c == '\0')
196 /* Trailing \ loses. */
197 return FNM_NOMATCH;
198 c = FOLD (c);
199 }
200 if (FOLD ((unsigned char) *n) != c)
201 return FNM_NOMATCH;
202 break;
203
204 case '*':
205 if (*n == '.' && no_leading_period
206 && (n == string
207 || (n[-1] == '/' && (flags & FNM_FILE_NAME))))
208 return FNM_NOMATCH;
209
210 for (c = *p++; c == '?' || c == '*'; c = *p++)
211 {
212 if (*n == '/' && (flags & FNM_FILE_NAME))
213 /* A slash does not match a wildcard under FNM_FILE_NAME. */
214 return FNM_NOMATCH;
215 else if (c == '?')
216 {
217 /* A ? needs to match one character. */
218 if (*n == '\0')
219 /* There isn't another character; no match. */
220 return FNM_NOMATCH;
221 else
222 /* One character of the string is consumed in matching
223 this ? wildcard, so *??? won't match if there are
224 less than three characters. */
225 ++n;
226 }
227 }
228
229 if (c == '\0')
230 /* The wildcard(s) is/are the last element of the pattern.
231 If the name is a file name and contains another slash
232 this does mean it cannot match. */
233 return ((flags & FNM_FILE_NAME) && strchr (n, '/') != NULL
234 ? FNM_NOMATCH : 0);
235 else
236 {
237 const char *endp;
238
239 endp = __strchrnul (n, (flags & FNM_FILE_NAME) ? '/' : '\0');
240
241 if (c == '[')
242 {
243 int flags2 = ((flags & FNM_FILE_NAME)
244 ? flags : (flags & ~FNM_PERIOD));
245
246 for (--p; n < endp; ++n)
247 if (internal_fnmatch (p, n,
248 (no_leading_period
249 && (n == string
250 || (n[-1] == '/'
251 && (flags
252 & FNM_FILE_NAME)))),
253 flags2)
254 == 0)
255 return 0;
256 }
257 else if (c == '/' && (flags & FNM_FILE_NAME))
258 {
259 while (*n != '\0' && *n != '/')
260 ++n;
261 if (*n == '/'
262 && (internal_fnmatch (p, n + 1, flags & FNM_PERIOD,
263 flags) == 0))
264 return 0;
265 }
266 else
267 {
268 int flags2 = ((flags & FNM_FILE_NAME)
269 ? flags : (flags & ~FNM_PERIOD));
270
271 if (c == '\\' && !(flags & FNM_NOESCAPE))
272 c = *p;
273 c = FOLD (c);
274 for (--p; n < endp; ++n)
275 if (FOLD ((unsigned char) *n) == c
276 && (internal_fnmatch (p, n,
277 (no_leading_period
278 && (n == string
279 || (n[-1] == '/'
280 && (flags
281 & FNM_FILE_NAME)))),
282 flags2) == 0))
283 return 0;
284 }
285 }
286
287 /* If we come here no match is possible with the wildcard. */
288 return FNM_NOMATCH;
289
290 case '[':
291 {
292 /* Nonzero if the sense of the character class is inverted. */
293 static int posixly_correct;
294 register int not;
295 char cold;
296
297 if (posixly_correct == 0)
298 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
299
300 if (*n == '\0')
301 return FNM_NOMATCH;
302
303 if (*n == '.' && no_leading_period && (n == string
304 || (n[-1] == '/'
305 && (flags
306 & FNM_FILE_NAME))))
307 return FNM_NOMATCH;
308
309 if (*n == '/' && (flags & FNM_FILE_NAME))
310 /* `/' cannot be matched. */
311 return FNM_NOMATCH;
312
313 not = (*p == '!' || (posixly_correct < 0 && *p == '^'));
314 if (not)
315 ++p;
316
317 c = *p++;
318 for (;;)
319 {
320 unsigned char fn = FOLD ((unsigned char) *n);
321
322 if (!(flags & FNM_NOESCAPE) && c == '\\')
323 {
324 if (*p == '\0')
325 return FNM_NOMATCH;
326 c = FOLD ((unsigned char) *p);
327 ++p;
328
329 if (c == fn)
330 goto matched;
331 }
332 else if (c == '[' && *p == ':')
333 {
334 /* Leave room for the null. */
335 char str[CHAR_CLASS_MAX_LENGTH + 1];
336 size_t c1 = 0;
337# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
338 wctype_t wt;
339# endif
340 const char *startp = p;
341
342 for (;;)
343 {
344 if (c1 == CHAR_CLASS_MAX_LENGTH)
345 /* The name is too long and therefore the pattern
346 is ill-formed. */
347 return FNM_NOMATCH;
348
349 c = *++p;
350 if (c == ':' && p[1] == ']')
351 {
352 p += 2;
353 break;
354 }
355 if (c < 'a' || c >= 'z')
356 {
357 /* This cannot possibly be a character class name.
358 Match it as a normal range. */
359 p = startp;
360 c = '[';
361 goto normal_bracket;
362 }
363 str[c1++] = c;
364 }
365 str[c1] = '\0';
366
367# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
368 wt = IS_CHAR_CLASS (str);
369 if (wt == 0)
370 /* Invalid character class name. */
371 return FNM_NOMATCH;
372
373 if (__iswctype (__btowc ((unsigned char) *n), wt))
374 goto matched;
375# else
376 if ((STREQ (str, "alnum") && ISALNUM ((unsigned char) *n))
377 || (STREQ (str, "alpha") && ISALPHA ((unsigned char) *n))
378 || (STREQ (str, "blank") && ISBLANK ((unsigned char) *n))
379 || (STREQ (str, "cntrl") && ISCNTRL ((unsigned char) *n))
380 || (STREQ (str, "digit") && ISDIGIT ((unsigned char) *n))
381 || (STREQ (str, "graph") && ISGRAPH ((unsigned char) *n))
382 || (STREQ (str, "lower") && ISLOWER ((unsigned char) *n))
383 || (STREQ (str, "print") && ISPRINT ((unsigned char) *n))
384 || (STREQ (str, "punct") && ISPUNCT ((unsigned char) *n))
385 || (STREQ (str, "space") && ISSPACE ((unsigned char) *n))
386 || (STREQ (str, "upper") && ISUPPER ((unsigned char) *n))
387 || (STREQ (str, "xdigit") && ISXDIGIT ((unsigned char) *n)))
388 goto matched;
389# endif
390 }
391 else if (c == '\0')
392 /* [ (unterminated) loses. */
393 return FNM_NOMATCH;
394 else
395 {
396 normal_bracket:
397 if (FOLD (c) == fn)
398 goto matched;
399
400 cold = c;
401 c = *p++;
402
403 if (c == '-' && *p != ']')
404 {
405 /* It is a range. */
406 unsigned char cend = *p++;
407 if (!(flags & FNM_NOESCAPE) && cend == '\\')
408 cend = *p++;
409 if (cend == '\0')
410 return FNM_NOMATCH;
411
412 if (cold <= fn && fn <= FOLD (cend))
413 goto matched;
414
415 c = *p++;
416 }
417 }
418
419 if (c == ']')
420 break;
421 }
422
423 if (!not)
424 return FNM_NOMATCH;
425 break;
426
427 matched:
428 /* Skip the rest of the [...] that already matched. */
429 while (c != ']')
430 {
431 if (c == '\0')
432 /* [... (unterminated) loses. */
433 return FNM_NOMATCH;
434
435 c = *p++;
436 if (!(flags & FNM_NOESCAPE) && c == '\\')
437 {
438 if (*p == '\0')
439 return FNM_NOMATCH;
440 /* XXX 1003.2d11 is unclear if this is right. */
441 ++p;
442 }
443 else if (c == '[' && *p == ':')
444 {
445 do
446 if (*++p == '\0')
447 return FNM_NOMATCH;
448 while (*p != ':' || p[1] == ']');
449 p += 2;
450 c = *p;
451 }
452 }
453 if (not)
454 return FNM_NOMATCH;
455 }
456 break;
457
458 default:
459 if (c != FOLD ((unsigned char) *n))
460 return FNM_NOMATCH;
461 }
462
463 ++n;
464 }
465
466 if (*n == '\0')
467 return 0;
468
469 if ((flags & FNM_LEADING_DIR) && *n == '/')
470 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
471 return 0;
472
473 return FNM_NOMATCH;
474
475# undef FOLD
476}
477
478
479int
480fnmatch (pattern, string, flags)
481 const char *pattern;
482 const char *string;
483 int flags;
484{
485 return internal_fnmatch (pattern, string, flags & FNM_PERIOD, flags);
486}
487
488#endif /* _LIBC or not __GNU_LIBRARY__. */
diff --git a/win32/fnmatch.h b/win32/fnmatch.h
new file mode 100644
index 000000000..cc3ec3794
--- /dev/null
+++ b/win32/fnmatch.h
@@ -0,0 +1,84 @@
1/* Copyright (C) 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
13
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
18
19#ifndef _FNMATCH_H
20#define _FNMATCH_H 1
21
22#ifdef __cplusplus
23extern "C" {
24#endif
25
26#if defined __cplusplus || (defined __STDC__ && __STDC__) || defined WINDOWS32
27# if !defined __GLIBC__ || !defined __P
28# undef __P
29# define __P(protos) protos
30# endif
31#else /* Not C++ or ANSI C. */
32# undef __P
33# define __P(protos) ()
34/* We can get away without defining `const' here only because in this file
35 it is used only inside the prototype for `fnmatch', which is elided in
36 non-ANSI C where `const' is problematical. */
37#endif /* C++ or ANSI C. */
38
39#ifndef const
40# if (defined __STDC__ && __STDC__) || defined __cplusplus
41# define __const const
42# else
43# define __const
44# endif
45#endif
46
47/* We #undef these before defining them because some losing systems
48 (HP-UX A.08.07 for example) define these in <unistd.h>. */
49#undef FNM_PATHNAME
50#undef FNM_NOESCAPE
51#undef FNM_PERIOD
52
53/* Bits set in the FLAGS argument to `fnmatch'. */
54#define FNM_PATHNAME (1 << 0) /* No wildcard can ever match `/'. */
55#define FNM_NOESCAPE (1 << 1) /* Backslashes don't quote special chars. */
56#define FNM_PERIOD (1 << 2) /* Leading `.' is matched only explicitly. */
57
58#if !defined _POSIX_C_SOURCE || _POSIX_C_SOURCE < 2 || defined _GNU_SOURCE
59# define FNM_FILE_NAME FNM_PATHNAME /* Preferred GNU name. */
60# define FNM_LEADING_DIR (1 << 3) /* Ignore `/...' after a match. */
61# define FNM_CASEFOLD (1 << 4) /* Compare without regard to case. */
62#endif
63
64/* Value returned by `fnmatch' if STRING does not match PATTERN. */
65#define FNM_NOMATCH 1
66
67/* This value is returned if the implementation does not support
68 `fnmatch'. Since this is not the case here it will never be
69 returned but the conformance test suites still require the symbol
70 to be defined. */
71#ifdef _XOPEN_SOURCE
72# define FNM_NOSYS (-1)
73#endif
74
75/* Match NAME against the filename pattern PATTERN,
76 returning zero if it matches, FNM_NOMATCH if not. */
77extern int fnmatch __P ((__const char *__pattern, __const char *__name,
78 int __flags));
79
80#ifdef __cplusplus
81}
82#endif
83
84#endif /* fnmatch.h */
diff --git a/win32/grp.h b/win32/grp.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/grp.h
diff --git a/win32/inet_pton.c b/win32/inet_pton.c
new file mode 100644
index 000000000..ec87abec5
--- /dev/null
+++ b/win32/inet_pton.c
@@ -0,0 +1,255 @@
1/* inet_pton.c -- convert IPv4 and IPv6 addresses from text to binary form
2
3 Copyright (C) 2006, 2008-2015 Free Software Foundation, Inc.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17
18/*
19 * Copyright (c) 1996,1999 by Internet Software Consortium.
20 *
21 * Permission to use, copy, modify, and distribute this software for any
22 * purpose with or without fee is hereby granted, provided that the above
23 * copyright notice and this permission notice appear in all copies.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
26 * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
28 * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
29 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
30 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
32 * SOFTWARE.
33 */
34
35#include "libbb.h"
36
37/* Specification. */
38#include <arpa/inet.h>
39
40# include <ctype.h>
41# include <string.h>
42# include <errno.h>
43
44# define NS_INADDRSZ 4
45# define NS_IN6ADDRSZ 16
46# define NS_INT16SZ 2
47# define HAVE_IPV6 1
48
49/*
50 * WARNING: Don't even consider trying to compile this on a system where
51 * sizeof(int) < 4. sizeof(int) > 4 is fine; all the world's not a VAX.
52 */
53
54static int inet_pton4 (const char *src, unsigned char *dst);
55# if HAVE_IPV6
56static int inet_pton6 (const char *src, unsigned char *dst);
57# endif
58
59/* int
60 * inet_pton(af, src, dst)
61 * convert from presentation format (which usually means ASCII printable)
62 * to network format (which is usually some kind of binary format).
63 * return:
64 * 1 if the address was valid for the specified address family
65 * 0 if the address wasn't valid ('dst' is untouched in this case)
66 * -1 if some other error occurred ('dst' is untouched in this case, too)
67 * author:
68 * Paul Vixie, 1996.
69 */
70int
71inet_pton (int af, const char *restrict src, void *restrict dst)
72{
73 switch (af)
74 {
75 case AF_INET:
76 return (inet_pton4 (src, dst));
77
78# if HAVE_IPV6
79 case AF_INET6:
80 return (inet_pton6 (src, dst));
81# endif
82
83 default:
84 errno = EAFNOSUPPORT;
85 return (-1);
86 }
87 /* NOTREACHED */
88}
89
90/* int
91 * inet_pton4(src, dst)
92 * like inet_aton() but without all the hexadecimal, octal (with the
93 * exception of 0) and shorthand.
94 * return:
95 * 1 if 'src' is a valid dotted quad, else 0.
96 * notice:
97 * does not touch 'dst' unless it's returning 1.
98 * author:
99 * Paul Vixie, 1996.
100 */
101static int
102inet_pton4 (const char *restrict src, unsigned char *restrict dst)
103{
104 int saw_digit, octets, ch;
105 unsigned char tmp[NS_INADDRSZ], *tp;
106
107 saw_digit = 0;
108 octets = 0;
109 *(tp = tmp) = 0;
110 while ((ch = *src++) != '\0')
111 {
112
113 if (ch >= '0' && ch <= '9')
114 {
115 unsigned new = *tp * 10 + (ch - '0');
116
117 if (saw_digit && *tp == 0)
118 return (0);
119 if (new > 255)
120 return (0);
121 *tp = new;
122 if (!saw_digit)
123 {
124 if (++octets > 4)
125 return (0);
126 saw_digit = 1;
127 }
128 }
129 else if (ch == '.' && saw_digit)
130 {
131 if (octets == 4)
132 return (0);
133 *++tp = 0;
134 saw_digit = 0;
135 }
136 else
137 return (0);
138 }
139 if (octets < 4)
140 return (0);
141 memcpy (dst, tmp, NS_INADDRSZ);
142 return (1);
143}
144
145# if HAVE_IPV6
146
147/* int
148 * inet_pton6(src, dst)
149 * convert presentation level address to network order binary form.
150 * return:
151 * 1 if 'src' is a valid [RFC1884 2.2] address, else 0.
152 * notice:
153 * (1) does not touch 'dst' unless it's returning 1.
154 * (2) :: in a full address is silently ignored.
155 * credit:
156 * inspired by Mark Andrews.
157 * author:
158 * Paul Vixie, 1996.
159 */
160static int
161inet_pton6 (const char *restrict src, unsigned char *restrict dst)
162{
163 static const char xdigits[] = "0123456789abcdef";
164 unsigned char tmp[NS_IN6ADDRSZ], *tp, *endp, *colonp;
165 const char *curtok;
166 int ch, saw_xdigit;
167 unsigned val;
168
169 tp = memset (tmp, '\0', NS_IN6ADDRSZ);
170 endp = tp + NS_IN6ADDRSZ;
171 colonp = NULL;
172 /* Leading :: requires some special handling. */
173 if (*src == ':')
174 if (*++src != ':')
175 return (0);
176 curtok = src;
177 saw_xdigit = 0;
178 val = 0;
179 while ((ch = tolower (*src++)) != '\0')
180 {
181 const char *pch;
182
183 pch = strchr (xdigits, ch);
184 if (pch != NULL)
185 {
186 val <<= 4;
187 val |= (pch - xdigits);
188 if (val > 0xffff)
189 return (0);
190 saw_xdigit = 1;
191 continue;
192 }
193 if (ch == ':')
194 {
195 curtok = src;
196 if (!saw_xdigit)
197 {
198 if (colonp)
199 return (0);
200 colonp = tp;
201 continue;
202 }
203 else if (*src == '\0')
204 {
205 return (0);
206 }
207 if (tp + NS_INT16SZ > endp)
208 return (0);
209 *tp++ = (u_char) (val >> 8) & 0xff;
210 *tp++ = (u_char) val & 0xff;
211 saw_xdigit = 0;
212 val = 0;
213 continue;
214 }
215 if (ch == '.' && ((tp + NS_INADDRSZ) <= endp) &&
216 inet_pton4 (curtok, tp) > 0)
217 {
218 tp += NS_INADDRSZ;
219 saw_xdigit = 0;
220 break; /* '\0' was seen by inet_pton4(). */
221 }
222 return (0);
223 }
224 if (saw_xdigit)
225 {
226 if (tp + NS_INT16SZ > endp)
227 return (0);
228 *tp++ = (u_char) (val >> 8) & 0xff;
229 *tp++ = (u_char) val & 0xff;
230 }
231 if (colonp != NULL)
232 {
233 /*
234 * Since some memmove()'s erroneously fail to handle
235 * overlapping regions, we'll do the shift by hand.
236 */
237 const int n = tp - colonp;
238 int i;
239
240 if (tp == endp)
241 return (0);
242 for (i = 1; i <= n; i++)
243 {
244 endp[-i] = colonp[n - i];
245 colonp[n - i] = 0;
246 }
247 tp = endp;
248 }
249 if (tp != endp)
250 return (0);
251 memcpy (dst, tmp, NS_IN6ADDRSZ);
252 return (1);
253}
254
255# endif
diff --git a/win32/ioctl.c b/win32/ioctl.c
new file mode 100644
index 000000000..73ceeedec
--- /dev/null
+++ b/win32/ioctl.c
@@ -0,0 +1,24 @@
1#include "libbb.h"
2
3int ioctl(int fd UNUSED_PARAM, int code, ...)
4{
5 va_list ap;
6 void *arg;
7 int ret = -1;
8
9 va_start(ap, code);
10
11 switch (code) {
12 case TIOCGWINSZ:
13 arg = va_arg(ap, void *);
14 ret = winansi_get_terminal_width_height((struct winsize *)arg);
15 break;
16 default:
17 ret = -1;
18 errno = EINVAL;
19 break;
20 }
21
22 va_end(ap);
23 return ret;
24}
diff --git a/win32/mempcpy.c b/win32/mempcpy.c
new file mode 100644
index 000000000..732a6f4b2
--- /dev/null
+++ b/win32/mempcpy.c
@@ -0,0 +1,27 @@
1/* Copy memory area and return pointer after last written byte.
2 Copyright (C) 2003, 2007, 2009-2014 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, see <http://www.gnu.org/licenses/>. */
16
17#include "libbb.h"
18/* Specification. */
19#include <string.h>
20
21/* Copy N bytes of SRC to DEST, return pointer to bytes after the
22 last written byte. */
23void *
24mempcpy (void *dest, const void *src, size_t n)
25{
26 return (char *) memcpy (dest, src, n) + n;
27}
diff --git a/win32/mingw.c b/win32/mingw.c
new file mode 100644
index 000000000..1170cd9d5
--- /dev/null
+++ b/win32/mingw.c
@@ -0,0 +1,1064 @@
1#include "libbb.h"
2#include <userenv.h>
3
4#if defined(__MINGW64_VERSION_MAJOR)
5#if ENABLE_GLOBBING
6int _dowildcard = -1;
7#else
8int _dowildcard = 0;
9#endif
10
11#undef _fmode
12int _fmode = _O_BINARY;
13#endif
14
15#if !defined(__MINGW64_VERSION_MAJOR)
16#if ENABLE_GLOBBING
17int _CRT_glob = 1;
18#else
19int _CRT_glob = 0;
20#endif
21
22unsigned int _CRT_fmode = _O_BINARY;
23#endif
24
25smallint bb_got_signal;
26
27int err_win_to_posix(DWORD winerr)
28{
29 int error = ENOSYS;
30 switch(winerr) {
31 case ERROR_ACCESS_DENIED: error = EACCES; break;
32 case ERROR_ACCOUNT_DISABLED: error = EACCES; break;
33 case ERROR_ACCOUNT_RESTRICTION: error = EACCES; break;
34 case ERROR_ALREADY_ASSIGNED: error = EBUSY; break;
35 case ERROR_ALREADY_EXISTS: error = EEXIST; break;
36 case ERROR_ARITHMETIC_OVERFLOW: error = ERANGE; break;
37 case ERROR_BAD_COMMAND: error = EIO; break;
38 case ERROR_BAD_DEVICE: error = ENODEV; break;
39 case ERROR_BAD_DRIVER_LEVEL: error = ENXIO; break;
40 case ERROR_BAD_EXE_FORMAT: error = ENOEXEC; break;
41 case ERROR_BAD_FORMAT: error = ENOEXEC; break;
42 case ERROR_BAD_LENGTH: error = EINVAL; break;
43 case ERROR_BAD_PATHNAME: error = ENOENT; break;
44 case ERROR_BAD_PIPE: error = EPIPE; break;
45 case ERROR_BAD_UNIT: error = ENODEV; break;
46 case ERROR_BAD_USERNAME: error = EINVAL; break;
47 case ERROR_BROKEN_PIPE: error = EPIPE; break;
48 case ERROR_BUFFER_OVERFLOW: error = ENAMETOOLONG; break;
49 case ERROR_BUSY: error = EBUSY; break;
50 case ERROR_BUSY_DRIVE: error = EBUSY; break;
51 case ERROR_CALL_NOT_IMPLEMENTED: error = ENOSYS; break;
52 case ERROR_CANNOT_MAKE: error = EACCES; break;
53 case ERROR_CANTOPEN: error = EIO; break;
54 case ERROR_CANTREAD: error = EIO; break;
55 case ERROR_CANTWRITE: error = EIO; break;
56 case ERROR_CRC: error = EIO; break;
57 case ERROR_CURRENT_DIRECTORY: error = EACCES; break;
58 case ERROR_DEVICE_IN_USE: error = EBUSY; break;
59 case ERROR_DEV_NOT_EXIST: error = ENODEV; break;
60 case ERROR_DIRECTORY: error = EINVAL; break;
61 case ERROR_DIR_NOT_EMPTY: error = ENOTEMPTY; break;
62 case ERROR_DISK_CHANGE: error = EIO; break;
63 case ERROR_DISK_FULL: error = ENOSPC; break;
64 case ERROR_DRIVE_LOCKED: error = EBUSY; break;
65 case ERROR_ENVVAR_NOT_FOUND: error = EINVAL; break;
66 case ERROR_EXE_MARKED_INVALID: error = ENOEXEC; break;
67 case ERROR_FILENAME_EXCED_RANGE: error = ENAMETOOLONG; break;
68 case ERROR_FILE_EXISTS: error = EEXIST; break;
69 case ERROR_FILE_INVALID: error = ENODEV; break;
70 case ERROR_FILE_NOT_FOUND: error = ENOENT; break;
71 case ERROR_GEN_FAILURE: error = EIO; break;
72 case ERROR_HANDLE_DISK_FULL: error = ENOSPC; break;
73 case ERROR_INSUFFICIENT_BUFFER: error = ENOMEM; break;
74 case ERROR_INVALID_ACCESS: error = EACCES; break;
75 case ERROR_INVALID_ADDRESS: error = EFAULT; break;
76 case ERROR_INVALID_BLOCK: error = EFAULT; break;
77 case ERROR_INVALID_DATA: error = EINVAL; break;
78 case ERROR_INVALID_DRIVE: error = ENODEV; break;
79 case ERROR_INVALID_EXE_SIGNATURE: error = ENOEXEC; break;
80 case ERROR_INVALID_FLAGS: error = EINVAL; break;
81 case ERROR_INVALID_FUNCTION: error = ENOSYS; break;
82 case ERROR_INVALID_HANDLE: error = EBADF; break;
83 case ERROR_INVALID_LOGON_HOURS: error = EACCES; break;
84 case ERROR_INVALID_NAME: error = EINVAL; break;
85 case ERROR_INVALID_OWNER: error = EINVAL; break;
86 case ERROR_INVALID_PARAMETER: error = EINVAL; break;
87 case ERROR_INVALID_PASSWORD: error = EPERM; break;
88 case ERROR_INVALID_PRIMARY_GROUP: error = EINVAL; break;
89 case ERROR_INVALID_SIGNAL_NUMBER: error = EINVAL; break;
90 case ERROR_INVALID_TARGET_HANDLE: error = EIO; break;
91 case ERROR_INVALID_WORKSTATION: error = EACCES; break;
92 case ERROR_IO_DEVICE: error = EIO; break;
93 case ERROR_IO_INCOMPLETE: error = EINTR; break;
94 case ERROR_LOCKED: error = EBUSY; break;
95 case ERROR_LOCK_VIOLATION: error = EACCES; break;
96 case ERROR_LOGON_FAILURE: error = EACCES; break;
97 case ERROR_MAPPED_ALIGNMENT: error = EINVAL; break;
98 case ERROR_META_EXPANSION_TOO_LONG: error = E2BIG; break;
99 case ERROR_MORE_DATA: error = EPIPE; break;
100 case ERROR_NEGATIVE_SEEK: error = ESPIPE; break;
101 case ERROR_NOACCESS: error = EFAULT; break;
102 case ERROR_NONE_MAPPED: error = EINVAL; break;
103 case ERROR_NOT_ENOUGH_MEMORY: error = ENOMEM; break;
104 case ERROR_NOT_READY: error = EAGAIN; break;
105 case ERROR_NOT_SAME_DEVICE: error = EXDEV; break;
106 case ERROR_NO_DATA: error = EPIPE; break;
107 case ERROR_NO_MORE_SEARCH_HANDLES: error = EIO; break;
108 case ERROR_NO_PROC_SLOTS: error = EAGAIN; break;
109 case ERROR_NO_SUCH_PRIVILEGE: error = EACCES; break;
110 case ERROR_OPEN_FAILED: error = EIO; break;
111 case ERROR_OPEN_FILES: error = EBUSY; break;
112 case ERROR_OPERATION_ABORTED: error = EINTR; break;
113 case ERROR_OUTOFMEMORY: error = ENOMEM; break;
114 case ERROR_PASSWORD_EXPIRED: error = EACCES; break;
115 case ERROR_PATH_BUSY: error = EBUSY; break;
116 case ERROR_PATH_NOT_FOUND: error = ENOENT; break;
117 case ERROR_PIPE_BUSY: error = EBUSY; break;
118 case ERROR_PIPE_CONNECTED: error = EPIPE; break;
119 case ERROR_PIPE_LISTENING: error = EPIPE; break;
120 case ERROR_PIPE_NOT_CONNECTED: error = EPIPE; break;
121 case ERROR_PRIVILEGE_NOT_HELD: error = EACCES; break;
122 case ERROR_READ_FAULT: error = EIO; break;
123 case ERROR_SEEK: error = EIO; break;
124 case ERROR_SEEK_ON_DEVICE: error = ESPIPE; break;
125 case ERROR_SHARING_BUFFER_EXCEEDED: error = ENFILE; break;
126 case ERROR_SHARING_VIOLATION: error = EACCES; break;
127 case ERROR_STACK_OVERFLOW: error = ENOMEM; break;
128 case ERROR_SWAPERROR: error = ENOENT; break;
129 case ERROR_TOO_MANY_LINKS: error = EMLINK; break;
130 case ERROR_TOO_MANY_MODULES: error = EMFILE; break;
131 case ERROR_TOO_MANY_OPEN_FILES: error = EMFILE; break;
132 case ERROR_UNRECOGNIZED_MEDIA: error = ENXIO; break;
133 case ERROR_UNRECOGNIZED_VOLUME: error = ENODEV; break;
134 case ERROR_WAIT_NO_CHILDREN: error = ECHILD; break;
135 case ERROR_WRITE_FAULT: error = EIO; break;
136 case ERROR_WRITE_PROTECT: error = EROFS; break;
137 }
138 return error;
139}
140
141#undef open
142int mingw_open (const char *filename, int oflags, ...)
143{
144 va_list args;
145 unsigned mode;
146 int fd;
147
148 va_start(args, oflags);
149 mode = va_arg(args, int);
150 va_end(args);
151
152 if (oflags & O_NONBLOCK) {
153 oflags &= ~O_NONBLOCK;
154 }
155 if (filename && !strcmp(filename, "/dev/null"))
156 filename = "nul";
157 fd = open(filename, oflags, mode);
158 if (fd < 0 && (oflags & O_ACCMODE) != O_RDONLY && errno == EACCES) {
159 DWORD attrs = GetFileAttributes(filename);
160 if (attrs != INVALID_FILE_ATTRIBUTES && (attrs & FILE_ATTRIBUTE_DIRECTORY))
161 errno = EISDIR;
162 }
163 return fd;
164}
165
166#undef fopen
167FILE *mingw_fopen (const char *filename, const char *otype)
168{
169 if (filename && !strcmp(filename, "/dev/null"))
170 filename = "nul";
171 return fopen(filename, otype);
172}
173
174#undef dup2
175int mingw_dup2 (int fd, int fdto)
176{
177 int ret = dup2(fd, fdto);
178 return ret != -1 ? fdto : -1;
179}
180
181/*
182 * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC.
183 * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch.
184 */
185static inline long long filetime_to_hnsec(const FILETIME *ft)
186{
187 long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime;
188 /* Windows to Unix Epoch conversion */
189 return winTime - 116444736000000000LL;
190}
191
192static inline time_t filetime_to_time_t(const FILETIME *ft)
193{
194 return (time_t)(filetime_to_hnsec(ft) / 10000000);
195}
196
197static inline int file_attr_to_st_mode (DWORD attr)
198{
199 int fMode = S_IREAD;
200 if (attr & FILE_ATTRIBUTE_DIRECTORY)
201 fMode |= S_IFDIR|S_IWRITE|S_IEXEC;
202 else
203 fMode |= S_IFREG;
204 if (!(attr & FILE_ATTRIBUTE_READONLY))
205 fMode |= S_IWRITE;
206 return fMode;
207}
208
209static inline int get_file_attr(const char *fname, WIN32_FILE_ATTRIBUTE_DATA *fdata)
210{
211 if (GetFileAttributesExA(fname, GetFileExInfoStandard, fdata))
212 return 0;
213
214 switch (GetLastError()) {
215 case ERROR_ACCESS_DENIED:
216 case ERROR_SHARING_VIOLATION:
217 case ERROR_LOCK_VIOLATION:
218 case ERROR_SHARING_BUFFER_EXCEEDED:
219 return EACCES;
220 case ERROR_BUFFER_OVERFLOW:
221 return ENAMETOOLONG;
222 case ERROR_NOT_ENOUGH_MEMORY:
223 return ENOMEM;
224 default:
225 return ENOENT;
226 }
227}
228
229/* We keep the do_lstat code in a separate function to avoid recursion.
230 * When a path ends with a slash, the stat will fail with ENOENT. In
231 * this case, we strip the trailing slashes and stat again.
232 *
233 * If follow is true then act like stat() and report on the link
234 * target. Otherwise report on the link itself.
235 */
236static int do_lstat(int follow, const char *file_name, struct mingw_stat *buf)
237{
238 int err;
239 WIN32_FILE_ATTRIBUTE_DATA fdata;
240 mode_t usermode;
241
242 if (!(err = get_file_attr(file_name, &fdata))) {
243 int len = strlen(file_name);
244
245 buf->st_ino = 0;
246 buf->st_uid = DEFAULT_UID;
247 buf->st_gid = DEFAULT_GID;
248 buf->st_nlink = 1;
249 buf->st_mode = file_attr_to_st_mode(fdata.dwFileAttributes);
250 if (len > 4 && (!strcasecmp(file_name+len-4, ".exe") ||
251 !strcasecmp(file_name+len-4, ".com")))
252 buf->st_mode |= S_IEXEC;
253 buf->st_size = fdata.nFileSizeLow |
254 (((off64_t)fdata.nFileSizeHigh)<<32);
255 buf->st_dev = buf->st_rdev = 0; /* not used by Git */
256 buf->st_atime = filetime_to_time_t(&(fdata.ftLastAccessTime));
257 buf->st_mtime = filetime_to_time_t(&(fdata.ftLastWriteTime));
258 buf->st_ctime = filetime_to_time_t(&(fdata.ftCreationTime));
259 if (fdata.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) {
260 WIN32_FIND_DATAA findbuf;
261 HANDLE handle = FindFirstFileA(file_name, &findbuf);
262 if (handle != INVALID_HANDLE_VALUE) {
263 if ((findbuf.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) &&
264 (findbuf.dwReserved0 == IO_REPARSE_TAG_SYMLINK)) {
265 if (follow) {
266 char buffer[MAXIMUM_REPARSE_DATA_BUFFER_SIZE];
267 buf->st_size = readlink(file_name, buffer, MAXIMUM_REPARSE_DATA_BUFFER_SIZE);
268 } else {
269 buf->st_mode = S_IFLNK;
270 }
271 buf->st_mode |= S_IREAD;
272 if (!(findbuf.dwFileAttributes & FILE_ATTRIBUTE_READONLY))
273 buf->st_mode |= S_IWRITE;
274 }
275 FindClose(handle);
276 }
277 }
278 usermode = buf->st_mode & S_IRWXU;
279 buf->st_mode |= (usermode >> 3) | ((usermode >> 6) & ~S_IWOTH);
280
281 /*
282 * Assume a block is 4096 bytes and calculate number of 512 byte
283 * sectors.
284 */
285 buf->st_blksize = 4096;
286 buf->st_blocks = ((buf->st_size+4095)>>12)<<3;
287 return 0;
288 }
289 errno = err;
290 return -1;
291}
292
293/* We provide our own lstat/fstat functions, since the provided
294 * lstat/fstat functions are so slow. These stat functions are
295 * tailored for Git's usage (read: fast), and are not meant to be
296 * complete. Note that Git stat()s are redirected to mingw_lstat()
297 * too, since Windows doesn't really handle symlinks that well.
298 */
299static int do_stat_internal(int follow, const char *file_name, struct mingw_stat *buf)
300{
301 int namelen;
302 char alt_name[PATH_MAX];
303
304 if (!do_lstat(follow, file_name, buf))
305 return 0;
306
307 /* if file_name ended in a '/', Windows returned ENOENT;
308 * try again without trailing slashes
309 */
310 if (errno != ENOENT)
311 return -1;
312
313 namelen = strlen(file_name);
314 if (namelen && file_name[namelen-1] != '/')
315 return -1;
316 while (namelen && file_name[namelen-1] == '/')
317 --namelen;
318 if (!namelen || namelen >= PATH_MAX)
319 return -1;
320
321 memcpy(alt_name, file_name, namelen);
322 alt_name[namelen] = 0;
323 return do_lstat(follow, alt_name, buf);
324}
325
326int mingw_lstat(const char *file_name, struct mingw_stat *buf)
327{
328 return do_stat_internal(0, file_name, buf);
329}
330int mingw_stat(const char *file_name, struct mingw_stat *buf)
331{
332 return do_stat_internal(1, file_name, buf);
333}
334
335int mingw_fstat(int fd, struct mingw_stat *buf)
336{
337 HANDLE fh = (HANDLE)_get_osfhandle(fd);
338 BY_HANDLE_FILE_INFORMATION fdata;
339
340 if (fh == INVALID_HANDLE_VALUE) {
341 errno = EBADF;
342 return -1;
343 }
344 /* direct non-file handles to MS's fstat() */
345 if (GetFileType(fh) != FILE_TYPE_DISK) {
346 struct _stati64 buf64;
347
348 if ( _fstati64(fd, &buf64) != 0 ) {
349 return -1;
350 }
351 buf->st_dev = 0;
352 buf->st_ino = 0;
353 buf->st_mode = S_IREAD|S_IWRITE;
354 buf->st_nlink = 1;
355 buf->st_uid = DEFAULT_UID;
356 buf->st_gid = DEFAULT_GID;
357 buf->st_rdev = 0;
358 buf->st_size = buf64.st_size;
359 buf->st_atime = buf64.st_atime;
360 buf->st_mtime = buf64.st_mtime;
361 buf->st_ctime = buf64.st_ctime;
362 buf->st_blksize = 4096;
363 buf->st_blocks = ((buf64.st_size+4095)>>12)<<3;
364 }
365
366 if (GetFileInformationByHandle(fh, &fdata)) {
367 buf->st_ino = 0;
368 buf->st_uid = DEFAULT_UID;
369 buf->st_gid = DEFAULT_GID;
370 /* could use fdata.nNumberOfLinks but it's inconsistent with stat */
371 buf->st_nlink = 1;
372 buf->st_mode = file_attr_to_st_mode(fdata.dwFileAttributes);
373 buf->st_size = fdata.nFileSizeLow |
374 (((off64_t)fdata.nFileSizeHigh)<<32);
375 buf->st_dev = buf->st_rdev = 0; /* not used by Git */
376 buf->st_atime = filetime_to_time_t(&(fdata.ftLastAccessTime));
377 buf->st_mtime = filetime_to_time_t(&(fdata.ftLastWriteTime));
378 buf->st_ctime = filetime_to_time_t(&(fdata.ftCreationTime));
379 buf->st_blksize = 4096;
380 buf->st_blocks = ((buf->st_size+4095)>>12)<<3;
381 return 0;
382 }
383 errno = EBADF;
384 return -1;
385}
386
387static inline void timeval_to_filetime(const struct timeval tv, FILETIME *ft)
388{
389 long long winTime = ((tv.tv_sec * 1000000LL) + tv.tv_usec) * 10LL + 116444736000000000LL;
390 ft->dwLowDateTime = winTime;
391 ft->dwHighDateTime = winTime >> 32;
392}
393
394int utimes(const char *file_name, const struct timeval tims[2])
395{
396 FILETIME mft, aft;
397 HANDLE fh;
398 DWORD flags, attrs;
399 int rc;
400
401 flags = FILE_ATTRIBUTE_NORMAL;
402
403 /* must have write permission */
404 attrs = GetFileAttributes(file_name);
405 if ( attrs != INVALID_FILE_ATTRIBUTES ) {
406 if ( attrs & FILE_ATTRIBUTE_READONLY ) {
407 /* ignore errors here; open() will report them */
408 SetFileAttributes(file_name, attrs & ~FILE_ATTRIBUTE_READONLY);
409 }
410
411 if ( attrs & FILE_ATTRIBUTE_DIRECTORY ) {
412 flags = FILE_FLAG_BACKUP_SEMANTICS;
413 }
414 }
415
416 fh = CreateFile(file_name, GENERIC_READ|GENERIC_WRITE,
417 FILE_SHARE_READ|FILE_SHARE_WRITE,
418 NULL, OPEN_EXISTING, flags, NULL);
419 if ( fh == INVALID_HANDLE_VALUE ) {
420 errno = err_win_to_posix(GetLastError());
421 rc = -1;
422 goto revert_attrs;
423 }
424
425 if (tims) {
426 timeval_to_filetime(tims[0], &aft);
427 timeval_to_filetime(tims[1], &mft);
428 }
429 else {
430 GetSystemTimeAsFileTime(&mft);
431 aft = mft;
432 }
433 if (!SetFileTime(fh, NULL, &aft, &mft)) {
434 errno = EINVAL;
435 rc = -1;
436 } else
437 rc = 0;
438 CloseHandle(fh);
439
440revert_attrs:
441 if (attrs != INVALID_FILE_ATTRIBUTES &&
442 (attrs & FILE_ATTRIBUTE_READONLY)) {
443 /* ignore errors again */
444 SetFileAttributes(file_name, attrs);
445 }
446 return rc;
447}
448
449unsigned int sleep (unsigned int seconds)
450{
451 Sleep(seconds*1000);
452 return 0;
453}
454
455int nanosleep(const struct timespec *req, struct timespec *rem)
456{
457 if (req->tv_nsec < 0 || 1000000000 <= req->tv_nsec) {
458 errno = EINVAL;
459 return -1;
460 }
461
462 Sleep(req->tv_sec*1000 + req->tv_nsec/1000000);
463
464 /* Sleep is not interruptible. So there is no remaining delay. */
465 if (rem != NULL) {
466 rem->tv_sec = 0;
467 rem->tv_nsec = 0;
468 }
469
470 return 0;
471}
472
473/*
474 * Windows' mktemp returns NULL on error whereas POSIX always returns the
475 * template and signals an error by making it an empty string.
476 */
477#undef mktemp
478char *mingw_mktemp(char *template)
479{
480 if ( mktemp(template) == NULL ) {
481 template[0] = '\0';
482 }
483
484 return template;
485}
486
487int mkstemp(char *template)
488{
489 char *filename = mktemp(template);
490 if (filename == NULL)
491 return -1;
492 return open(filename, O_RDWR | O_CREAT, 0600);
493}
494
495int gettimeofday(struct timeval *tv, void *tz UNUSED_PARAM)
496{
497 FILETIME ft;
498 long long hnsec;
499
500 GetSystemTimeAsFileTime(&ft);
501 hnsec = filetime_to_hnsec(&ft);
502 tv->tv_sec = hnsec / 10000000;
503 tv->tv_usec = (hnsec % 10000000) / 10;
504 return 0;
505}
506
507int pipe(int filedes[2])
508{
509 if (_pipe(filedes, PIPE_BUF, 0) < 0)
510 return -1;
511 return 0;
512}
513
514struct tm *gmtime_r(const time_t *timep, struct tm *result)
515{
516 /* gmtime() in MSVCRT.DLL is thread-safe, but not reentrant */
517 memcpy(result, gmtime(timep), sizeof(struct tm));
518 return result;
519}
520
521struct tm *localtime_r(const time_t *timep, struct tm *result)
522{
523 /* localtime() in MSVCRT.DLL is thread-safe, but not reentrant */
524 memcpy(result, localtime(timep), sizeof(struct tm));
525 return result;
526}
527
528#undef getcwd
529char *mingw_getcwd(char *pointer, int len)
530{
531 int i;
532 char *ret = getcwd(pointer, len);
533 if (!ret)
534 return ret;
535 for (i = 0; ret[i]; i++)
536 if (ret[i] == '\\')
537 ret[i] = '/';
538 return ret;
539}
540
541#undef rename
542int mingw_rename(const char *pold, const char *pnew)
543{
544 DWORD attrs;
545
546 /*
547 * Try native rename() first to get errno right.
548 * It is based on MoveFile(), which cannot overwrite existing files.
549 */
550 if (!rename(pold, pnew))
551 return 0;
552 if (errno != EEXIST)
553 return -1;
554 if (MoveFileEx(pold, pnew, MOVEFILE_REPLACE_EXISTING))
555 return 0;
556 /* TODO: translate more errors */
557 if (GetLastError() == ERROR_ACCESS_DENIED &&
558 (attrs = GetFileAttributes(pnew)) != INVALID_FILE_ATTRIBUTES) {
559 if (attrs & FILE_ATTRIBUTE_DIRECTORY) {
560 errno = EISDIR;
561 return -1;
562 }
563 if ((attrs & FILE_ATTRIBUTE_READONLY) &&
564 SetFileAttributes(pnew, attrs & ~FILE_ATTRIBUTE_READONLY)) {
565 if (MoveFileEx(pold, pnew, MOVEFILE_REPLACE_EXISTING))
566 return 0;
567 /* revert file attributes on failure */
568 SetFileAttributes(pnew, attrs);
569 }
570 }
571 errno = EACCES;
572 return -1;
573}
574
575static char *gethomedir(void)
576{
577 static char buf[PATH_MAX];
578 DWORD len = sizeof(buf);
579 HANDLE h;
580 char *s;
581
582 buf[0] = '\0';
583 if ( !OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &h) )
584 return buf;
585
586 if ( !GetUserProfileDirectory(h, buf, &len) ) {
587 CloseHandle(h);
588 return buf;
589 }
590
591 CloseHandle(h);
592
593 for ( s=buf; *s; ++s ) {
594 if ( *s == '\\' ) {
595 *s = '/';
596 }
597 }
598
599 return buf;
600}
601
602static char *get_user_name(void)
603{
604 static char user_name[100] = "";
605 char *s;
606 DWORD len = sizeof(user_name);
607
608 if ( user_name[0] != '\0' ) {
609 return user_name;
610 }
611
612 if ( !GetUserName(user_name, &len) ) {
613 return NULL;
614 }
615
616 for ( s=user_name; *s; ++s ) {
617 if ( *s == ' ' ) {
618 *s = '_';
619 }
620 }
621
622 return user_name;
623}
624
625struct passwd *getpwnam(const char *name)
626{
627 const char *myname;
628
629 if ( (myname=get_user_name()) != NULL &&
630 strcmp(myname, name) == 0 ) {
631 return getpwuid(DEFAULT_UID);
632 }
633
634 return NULL;
635}
636
637struct passwd *getpwuid(uid_t uid UNUSED_PARAM)
638{
639 static struct passwd p;
640
641 if ( (p.pw_name=get_user_name()) == NULL ) {
642 return NULL;
643 }
644 p.pw_passwd = (char *)"secret";
645 p.pw_gecos = (char *)"unknown";
646 p.pw_dir = gethomedir();
647 p.pw_shell = NULL;
648 p.pw_uid = DEFAULT_UID;
649 p.pw_gid = DEFAULT_GID;
650
651 return &p;
652}
653
654struct group *getgrgid(gid_t gid UNUSED_PARAM)
655{
656 static char *members[2] = { NULL, NULL };
657 static struct group g;
658
659 if ( (g.gr_name=get_user_name()) == NULL ) {
660 return NULL;
661 }
662 g.gr_passwd = (char *)"secret";
663 g.gr_gid = DEFAULT_GID;
664 members[0] = g.gr_name;
665 g.gr_mem = members;
666
667 return &g;
668}
669
670int getgrouplist(const char *user UNUSED_PARAM, gid_t group UNUSED_PARAM,
671 gid_t *groups, int *ngroups)
672{
673 if ( *ngroups == 0 ) {
674 *ngroups = 1;
675 return -1;
676 }
677
678 *ngroups = 1;
679 groups[0] = DEFAULT_GID;
680 return 1;
681}
682
683int getgroups(int n, gid_t *groups)
684{
685 if ( n == 0 ) {
686 return 1;
687 }
688
689 groups[0] = DEFAULT_GID;
690 return 1;
691}
692
693int getlogin_r(char *buf, size_t len)
694{
695 char *name;
696
697 if ( (name=get_user_name()) == NULL ) {
698 return -1;
699 }
700
701 if ( strlen(name) >= len ) {
702 errno = ERANGE;
703 return -1;
704 }
705
706 strcpy(buf, name);
707 return 0;
708}
709
710long sysconf(int name)
711{
712 if ( name == _SC_CLK_TCK ) {
713 return 100;
714 }
715 errno = EINVAL;
716 return -1;
717}
718
719clock_t times(struct tms *buf)
720{
721 buf->tms_utime = 0;
722 buf->tms_stime = 0;
723 buf->tms_cutime = 0;
724 buf->tms_cstime = 0;
725
726 return 0;
727}
728
729int link(const char *oldpath, const char *newpath)
730{
731 typedef BOOL (WINAPI *T)(const char*, const char*, LPSECURITY_ATTRIBUTES);
732 static T create_hard_link = NULL;
733 if (!create_hard_link) {
734 create_hard_link = (T) GetProcAddress(
735 GetModuleHandle("kernel32.dll"), "CreateHardLinkA");
736 if (!create_hard_link)
737 create_hard_link = (T)-1;
738 }
739 if (create_hard_link == (T)-1) {
740 errno = ENOSYS;
741 return -1;
742 }
743 if (!create_hard_link(newpath, oldpath, NULL)) {
744 errno = err_win_to_posix(GetLastError());
745 return -1;
746 }
747 return 0;
748}
749
750char *realpath(const char *path, char *resolved_path)
751{
752 /* FIXME: need normalization */
753 return strcpy(resolved_path, path);
754}
755
756const char *get_busybox_exec_path(void)
757{
758 static char path[PATH_MAX] = "";
759
760 if (!*path)
761 GetModuleFileName(NULL, path, PATH_MAX);
762 return path;
763}
764
765#undef mkdir
766int mingw_mkdir(const char *path, int mode UNUSED_PARAM)
767{
768 int ret;
769 struct stat st;
770 int lerrno = 0;
771
772 if ( (ret=mkdir(path)) < 0 ) {
773 lerrno = errno;
774 if ( lerrno == EACCES && stat(path, &st) == 0 ) {
775 ret = 0;
776 lerrno = 0;
777 }
778 }
779
780 errno = lerrno;
781 return ret;
782}
783
784#undef chmod
785int mingw_chmod(const char *path, int mode)
786{
787 WIN32_FILE_ATTRIBUTE_DATA fdata;
788
789 if ( get_file_attr(path, &fdata) == 0 &&
790 fdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ) {
791 mode |= 0222;
792 }
793
794 return chmod(path, mode);
795}
796
797int fcntl(int fd, int cmd, ...)
798{
799 va_list arg;
800 int result = -1;
801 char *fds;
802 int target, i, newfd;
803
804 va_start(arg, cmd);
805
806 switch (cmd) {
807 case F_GETFD:
808 case F_SETFD:
809 case F_GETFL:
810 /*
811 * Our fake F_GETFL won't matter if the return value is used as
812 * fcntl(fd, F_SETFL, ret|something);
813 * because F_SETFL isn't supported either.
814 */
815 result = 0;
816 break;
817 case F_DUPFD:
818 target = va_arg(arg, int);
819 fds = xzalloc(target);
820 while ((newfd = dup(fd)) < target && newfd >= 0) {
821 fds[newfd] = 1;
822 }
823 for (i = 0; i < target; ++i) {
824 if (fds[i]) {
825 close(i);
826 }
827 }
828 free(fds);
829 result = newfd;
830 break;
831 default:
832 errno = ENOSYS;
833 break;
834 }
835
836 va_end(arg);
837 return result;
838}
839
840#undef unlink
841int mingw_unlink(const char *pathname)
842{
843 /* read-only files cannot be removed */
844 chmod(pathname, 0666);
845 return unlink(pathname);
846}
847
848#undef strftime
849size_t mingw_strftime(char *buf, size_t max, const char *format, const struct tm *tm)
850{
851 size_t ret;
852 char day[3];
853 char *t;
854 char *fmt, *newfmt;
855 struct tm tm2;
856 int m;
857
858 /*
859 * Emulate the '%e' and '%s' formats that Windows' strftime lacks.
860 * Happily, the string that replaces '%e' is two characters long.
861 * '%s' is a bit more complicated.
862 */
863 fmt = xstrdup(format);
864 for ( t=fmt; *t; ++t ) {
865 if ( *t == '%' ) {
866 if ( t[1] == 'e' ) {
867 if ( tm->tm_mday >= 0 && tm->tm_mday <= 99 ) {
868 sprintf(day, "%2d", tm->tm_mday);
869 }
870 else {
871 strcpy(day, " ");
872 }
873 memcpy(t++, day, 2);
874 }
875 else if ( t[1] == 's' ) {
876 *t = '\0';
877 m = t - fmt;
878 tm2 = *tm;
879 newfmt = xasprintf("%s%d%s", fmt, (int)mktime(&tm2), t+2);
880 free(fmt);
881 t = newfmt + m + 1;
882 fmt = newfmt;
883 }
884 else if ( t[1] == 'z' ) {
885 char buffer[16] = "";
886
887 *t = '\0';
888 m = t - fmt;
889 _tzset();
890 if ( tm->tm_isdst >= 0 ) {
891 int offset = (int)_timezone - (tm->tm_isdst > 0 ? 3600 : 0);
892 int hr, min;
893
894 if ( offset > 0 ) {
895 buffer[0] = '-';
896 }
897 else {
898 buffer[0] = '+';
899 offset = -offset;
900 }
901
902 hr = offset / 3600;
903 min = (offset % 3600) / 60;
904 sprintf(buffer+1, "%02d%02d", hr, min);
905 }
906 newfmt = xasprintf("%s%s%s", fmt, buffer, t+2);
907 free(fmt);
908 t = newfmt + m + 1;
909 fmt = newfmt;
910 }
911 else if ( t[1] != '\0' ) {
912 ++t;
913 }
914 }
915 }
916
917 ret = strftime(buf, max, fmt, tm);
918 free(fmt);
919
920 return ret;
921}
922
923int stime(time_t *t UNUSED_PARAM)
924{
925 errno = EPERM;
926 return -1;
927}
928
929#undef access
930int mingw_access(const char *name, int mode)
931{
932 int ret;
933 struct stat s;
934 int fd, n, sig;
935 unsigned int offset;
936 unsigned char buf[1024];
937
938 /* Windows can only handle test for existence, read or write */
939 if (mode == F_OK || (mode & ~X_OK)) {
940 ret = _access(name, mode & ~X_OK);
941 if (ret < 0 || !(mode & X_OK)) {
942 return ret;
943 }
944 }
945
946 if (!mingw_stat(name, &s) && S_ISREG(s.st_mode)) {
947
948 /* stat marks .exe and .com files as executable */
949 if ((s.st_mode&S_IEXEC)) {
950 return 0;
951 }
952
953 fd = open(name, O_RDONLY);
954 if (fd < 0)
955 return -1;
956 n = read(fd, buf, sizeof(buf)-1);
957 close(fd);
958 if (n < 4) /* at least '#!/x' and not error */
959 return -1;
960
961 /* shell script */
962 if (buf[0] == '#' && buf[1] == '!') {
963 return 0;
964 }
965
966 /*
967 * Poke about in file to see if it's a PE binary. I've just copied
968 * the magic from the file command.
969 */
970 if (buf[0] == 'M' && buf[1] == 'Z') {
971 offset = (buf[0x19] << 8) + buf[0x18];
972 if (offset > 0x3f) {
973 offset = (buf[0x3f] << 24) + (buf[0x3e] << 16) +
974 (buf[0x3d] << 8) + buf[0x3c];
975 if (offset < sizeof(buf)-100) {
976 if (memcmp(buf+offset, "PE\0\0", 4) == 0) {
977 sig = (buf[offset+25] << 8) + buf[offset+24];
978 if (sig == 0x10b || sig == 0x20b) {
979 sig = (buf[offset+23] << 8) + buf[offset+22];
980 if ((sig & 0x2000) != 0) {
981 /* DLL */
982 return -1;
983 }
984 sig = buf[offset+92];
985 return !(sig == 1 || sig == 2 ||
986 sig == 3 || sig == 7);
987 }
988 }
989 }
990 }
991 }
992 }
993
994 return -1;
995}
996
997#undef rmdir
998int mingw_rmdir(const char *path)
999{
1000 /* read-only directories cannot be removed */
1001 chmod(path, 0666);
1002 return rmdir(path);
1003}
1004
1005/* check if path can be made into an executable by adding a suffix;
1006 * return an allocated string containing the path if it can;
1007 * return NULL if not.
1008 *
1009 * if path already has a suffix don't even bother trying
1010 */
1011char *file_is_win32_executable(const char *p)
1012{
1013 char *path;
1014 int len = strlen(p);
1015
1016 if (len > 4 && (!strcasecmp(p+len-4, ".exe") ||
1017 !strcasecmp(p+len-4, ".com"))) {
1018 return NULL;
1019 }
1020
1021 if ( (path=malloc(len+5)) != NULL ) {
1022 memcpy(path, p, len);
1023 memcpy(path+len, ".exe", 5);
1024 if (file_is_executable(path)) {
1025 return path;
1026 }
1027 memcpy(path+len, ".com", 5);
1028 if (file_is_executable(path)) {
1029 return path;
1030 }
1031 free(path);
1032 }
1033
1034 return NULL;
1035}
1036
1037#undef opendir
1038DIR *mingw_opendir(const char *path)
1039{
1040 char name[4];
1041
1042 if (isalpha(path[0]) && path[1] == ':' && path[2] == '\0') {
1043 strcpy(name, path);
1044 name[2] = '/';
1045 name[3] = '\0';
1046 path = name;
1047 }
1048
1049 return opendir(path);
1050}
1051
1052off_t mingw_lseek(int fd, off_t offset, int whence)
1053{
1054 HANDLE h = (HANDLE)_get_osfhandle(fd);
1055 if (h == INVALID_HANDLE_VALUE) {
1056 errno = EBADF;
1057 return -1;
1058 }
1059 if (GetFileType(h) != FILE_TYPE_DISK) {
1060 errno = ESPIPE;
1061 return -1;
1062 }
1063 return _lseeki64(fd, offset, whence);
1064}
diff --git a/win32/mntent.c b/win32/mntent.c
new file mode 100644
index 000000000..9b04a9c5e
--- /dev/null
+++ b/win32/mntent.c
@@ -0,0 +1,69 @@
1/*
2 * A simple WIN32 implementation of mntent routines. It only handles
3 * fixed logical drives.
4 */
5#include "libbb.h"
6
7struct mntdata {
8 DWORD flags;
9 int index;
10};
11
12FILE *setmntent(const char *file UNUSED_PARAM, const char *mode UNUSED_PARAM)
13{
14 struct mntdata *data;
15
16 if ( (data=malloc(sizeof(struct mntdata))) == NULL ) {
17 return NULL;
18 }
19
20 data->flags = GetLogicalDrives();
21 data->index = -1;
22
23 return (FILE *)data;
24}
25
26struct mntent *getmntent(FILE *stream)
27{
28 struct mntdata *data = (struct mntdata *)stream;
29 static char mnt_fsname[4];
30 static char mnt_dir[4];
31 static char mnt_type[100];
32 static char mnt_opts[4];
33 static struct mntent my_mount_entry =
34 { mnt_fsname, mnt_dir, mnt_type, mnt_opts, 0, 0 };
35 struct mntent *entry;
36
37 entry = NULL;
38 while ( ++data->index < 26 ) {
39 if ( (data->flags & 1<<data->index) != 0 ) {
40 mnt_fsname[0] = 'A' + data->index;
41 mnt_fsname[1] = ':';
42 mnt_fsname[2] = '\0';
43 mnt_dir[0] = 'A' + data->index;
44 mnt_dir[1] = ':';
45 mnt_dir[2] = '\\';
46 mnt_dir[3] = '\0';
47 mnt_type[0] = '\0';
48 mnt_opts[0] = '\0';
49
50 if ( GetDriveType(mnt_dir) == DRIVE_FIXED ) {
51 if ( !GetVolumeInformation(mnt_dir, NULL, 0, NULL, NULL,
52 NULL, mnt_type, 100) ) {
53 mnt_type[0] = '\0';
54 }
55
56 entry = &my_mount_entry;
57 break;
58 }
59 }
60 }
61
62 return entry;
63}
64
65int endmntent(FILE *stream)
66{
67 free(stream);
68 return 0;
69}
diff --git a/win32/mntent.h b/win32/mntent.h
new file mode 100644
index 000000000..b035bfa9c
--- /dev/null
+++ b/win32/mntent.h
@@ -0,0 +1,19 @@
1#ifndef MNTENT_H
2#define MNTENT_H
3
4#include <stdio.h>
5
6struct mntent {
7 char *mnt_fsname; /* Device or server for filesystem. */
8 char *mnt_dir; /* Directory mounted on. */
9 char *mnt_type; /* Type of filesystem: ufs, nfs, etc. */
10 char *mnt_opts; /* Comma-separated options for fs. */
11 int mnt_freq; /* Dump frequency (in days). */
12 int mnt_passno; /* Pass number for `fsck'. */
13};
14
15extern FILE *setmntent(const char *file, const char *mode);
16extern struct mntent *getmntent(FILE *stream);
17extern int endmntent(FILE *stream);
18
19#endif
diff --git a/win32/net.c b/win32/net.c
new file mode 100644
index 000000000..2341119b0
--- /dev/null
+++ b/win32/net.c
@@ -0,0 +1,101 @@
1#include "libbb.h"
2
3int inet_aton(const char *cp, struct in_addr *inp)
4{
5 unsigned long val = inet_addr(cp);
6
7 if (val == INADDR_NONE)
8 return 0;
9 inp->S_un.S_addr = val;
10 return 1;
11}
12
13void init_winsock(void)
14{
15 WSADATA wsa;
16 if (WSAStartup(MAKEWORD(2,2), &wsa))
17 bb_error_msg_and_die("unable to initialize winsock subsystem, error %d",
18 WSAGetLastError());
19 atexit((void(*)(void)) WSACleanup); /* may conflict with other atexit? */
20}
21
22int mingw_socket(int domain, int type, int protocol)
23{
24 int sockfd;
25 SOCKET s = WSASocket(domain, type, protocol, NULL, 0, 0);
26 if (s == INVALID_SOCKET) {
27 /*
28 * WSAGetLastError() values are regular BSD error codes
29 * biased by WSABASEERR.
30 * However, strerror() does not know about networking
31 * specific errors, which are values beginning at 38 or so.
32 * Therefore, we choose to leave the biased error code
33 * in errno so that _if_ someone looks up the code somewhere,
34 * then it is at least the number that are usually listed.
35 */
36 errno = WSAGetLastError();
37 return -1;
38 }
39 /* convert into a file descriptor */
40 if ((sockfd = _open_osfhandle((intptr_t)s, O_RDWR|O_BINARY)) < 0) {
41 closesocket(s);
42 bb_error_msg("unable to make a socket file descriptor: %s",
43 strerror(errno));
44 return -1;
45 }
46 return sockfd;
47}
48
49#undef connect
50int mingw_connect(int sockfd, const struct sockaddr *sa, size_t sz)
51{
52 SOCKET s = (SOCKET)_get_osfhandle(sockfd);
53 return connect(s, sa, sz);
54}
55
56#undef bind
57int mingw_bind(int sockfd, struct sockaddr *sa, size_t sz)
58{
59 SOCKET s = (SOCKET)_get_osfhandle(sockfd);
60 return bind(s, sa, sz);
61}
62
63#undef setsockopt
64int mingw_setsockopt(int sockfd, int lvl, int optname, void *optval, int optlen)
65{
66 SOCKET s = (SOCKET)_get_osfhandle(sockfd);
67 return setsockopt(s, lvl, optname, (const char*)optval, optlen);
68}
69
70#undef shutdown
71int mingw_shutdown(int sockfd, int how)
72{
73 SOCKET s = (SOCKET)_get_osfhandle(sockfd);
74 return shutdown(s, how);
75}
76
77#undef listen
78int mingw_listen(int sockfd, int backlog)
79{
80 SOCKET s = (SOCKET)_get_osfhandle(sockfd);
81 return listen(s, backlog);
82}
83
84#undef accept
85int mingw_accept(int sockfd1, struct sockaddr *sa, socklen_t *sz)
86{
87 int sockfd2;
88
89 SOCKET s1 = (SOCKET)_get_osfhandle(sockfd1);
90 SOCKET s2 = accept(s1, sa, sz);
91
92 /* convert into a file descriptor */
93 if ((sockfd2 = _open_osfhandle((intptr_t)s2, O_RDWR|O_BINARY)) < 0) {
94 int err = errno;
95 closesocket(s2);
96 bb_error_msg("unable to make a socket file descriptor: %s",
97 strerror(err));
98 return -1;
99 }
100 return sockfd2;
101}
diff --git a/win32/net/if.h b/win32/net/if.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/net/if.h
diff --git a/win32/netdb.h b/win32/netdb.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/netdb.h
diff --git a/win32/netinet/in.h b/win32/netinet/in.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/netinet/in.h
diff --git a/win32/paths.h b/win32/paths.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/paths.h
diff --git a/win32/poll.c b/win32/poll.c
new file mode 100644
index 000000000..403eaa7a3
--- /dev/null
+++ b/win32/poll.c
@@ -0,0 +1,606 @@
1/* Emulation for poll(2)
2 Contributed by Paolo Bonzini.
3
4 Copyright 2001-2003, 2006-2011 Free Software Foundation, Inc.
5
6 This file is part of gnulib.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation,
20 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21
22/* Tell gcc not to warn about the (nfd < 0) tests, below. */
23#if (__GNUC__ == 4 && 3 <= __GNUC_MINOR__) || 4 < __GNUC__
24# pragma GCC diagnostic ignored "-Wtype-limits"
25#endif
26
27#include <malloc.h>
28
29#include <sys/types.h>
30
31/* Specification. */
32#include <poll.h>
33
34#include <errno.h>
35#include <limits.h>
36#include <assert.h>
37
38#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
39# define WIN32_NATIVE
40# if defined (_MSC_VER)
41# define _WIN32_WINNT 0x0502
42# endif
43# include <winsock2.h>
44# include <windows.h>
45# include <io.h>
46# include <stdio.h>
47# include <conio.h>
48#else
49# include <sys/time.h>
50# include <sys/socket.h>
51# include <sys/select.h>
52# include <unistd.h>
53#endif
54
55#ifdef HAVE_SYS_IOCTL_H
56# include <sys/ioctl.h>
57#endif
58#ifdef HAVE_SYS_FILIO_H
59# include <sys/filio.h>
60#endif
61
62#include <time.h>
63
64#ifndef INFTIM
65# define INFTIM (-1)
66#endif
67
68/* BeOS does not have MSG_PEEK. */
69#ifndef MSG_PEEK
70# define MSG_PEEK 0
71#endif
72
73#ifdef WIN32_NATIVE
74
75#define IsConsoleHandle(h) (((long) (h) & 3) == 3)
76
77static BOOL
78IsSocketHandle (HANDLE h)
79{
80 WSANETWORKEVENTS ev;
81
82 if (IsConsoleHandle (h))
83 return FALSE;
84
85 /* Under Wine, it seems that getsockopt returns 0 for pipes too.
86 WSAEnumNetworkEvents instead distinguishes the two correctly. */
87 ev.lNetworkEvents = 0xDEADBEEF;
88 WSAEnumNetworkEvents ((SOCKET) h, NULL, &ev);
89 return ev.lNetworkEvents != 0xDEADBEEF;
90}
91
92/* Declare data structures for ntdll functions. */
93typedef struct _FILE_PIPE_LOCAL_INFORMATION {
94 ULONG NamedPipeType;
95 ULONG NamedPipeConfiguration;
96 ULONG MaximumInstances;
97 ULONG CurrentInstances;
98 ULONG InboundQuota;
99 ULONG ReadDataAvailable;
100 ULONG OutboundQuota;
101 ULONG WriteQuotaAvailable;
102 ULONG NamedPipeState;
103 ULONG NamedPipeEnd;
104} FILE_PIPE_LOCAL_INFORMATION, *PFILE_PIPE_LOCAL_INFORMATION;
105
106typedef struct _IO_STATUS_BLOCK
107{
108 union {
109 DWORD Status;
110 PVOID Pointer;
111 } u;
112 ULONG_PTR Information;
113} IO_STATUS_BLOCK, *PIO_STATUS_BLOCK;
114
115typedef enum _FILE_INFORMATION_CLASS {
116 FilePipeLocalInformation = 24
117} FILE_INFORMATION_CLASS, *PFILE_INFORMATION_CLASS;
118
119typedef DWORD (WINAPI *PNtQueryInformationFile)
120 (HANDLE, IO_STATUS_BLOCK *, VOID *, ULONG, FILE_INFORMATION_CLASS);
121
122# ifndef PIPE_BUF
123# define PIPE_BUF 512
124# endif
125
126/* Compute revents values for file handle H. If some events cannot happen
127 for the handle, eliminate them from *P_SOUGHT. */
128
129static int
130win32_compute_revents (HANDLE h, int *p_sought)
131{
132 int i, ret, happened;
133 INPUT_RECORD *irbuffer;
134 DWORD avail, nbuffer;
135 BOOL bRet;
136 IO_STATUS_BLOCK iosb;
137 FILE_PIPE_LOCAL_INFORMATION fpli;
138 static PNtQueryInformationFile NtQueryInformationFile;
139 static BOOL once_only;
140
141 switch (GetFileType (h))
142 {
143 case FILE_TYPE_PIPE:
144 if (!once_only)
145 {
146 NtQueryInformationFile = (PNtQueryInformationFile)
147 GetProcAddress (GetModuleHandle ("ntdll.dll"),
148 "NtQueryInformationFile");
149 once_only = TRUE;
150 }
151
152 happened = 0;
153 if (PeekNamedPipe (h, NULL, 0, NULL, &avail, NULL) != 0)
154 {
155 if (avail)
156 happened |= *p_sought & (POLLIN | POLLRDNORM);
157 }
158 else if (GetLastError () == ERROR_BROKEN_PIPE)
159 happened |= POLLHUP;
160
161 else
162 {
163 /* It was the write-end of the pipe. Check if it is writable.
164 If NtQueryInformationFile fails, optimistically assume the pipe is
165 writable. This could happen on Win9x, where NtQueryInformationFile
166 is not available, or if we inherit a pipe that doesn't permit
167 FILE_READ_ATTRIBUTES access on the write end (I think this should
168 not happen since WinXP SP2; WINE seems fine too). Otherwise,
169 ensure that enough space is available for atomic writes. */
170 memset (&iosb, 0, sizeof (iosb));
171 memset (&fpli, 0, sizeof (fpli));
172
173 if (!NtQueryInformationFile
174 || NtQueryInformationFile (h, &iosb, &fpli, sizeof (fpli),
175 FilePipeLocalInformation)
176 || fpli.WriteQuotaAvailable >= PIPE_BUF
177 || (fpli.OutboundQuota < PIPE_BUF &&
178 fpli.WriteQuotaAvailable == fpli.OutboundQuota))
179 happened |= *p_sought & (POLLOUT | POLLWRNORM | POLLWRBAND);
180 }
181 return happened;
182
183 case FILE_TYPE_CHAR:
184 ret = WaitForSingleObject (h, 0);
185 if (!IsConsoleHandle (h))
186 return ret == WAIT_OBJECT_0 ? *p_sought & ~(POLLPRI | POLLRDBAND) : 0;
187
188 nbuffer = avail = 0;
189 bRet = GetNumberOfConsoleInputEvents (h, &nbuffer);
190 if (bRet)
191 {
192 /* Input buffer. */
193 *p_sought &= POLLIN | POLLRDNORM;
194 if (nbuffer == 0)
195 return POLLHUP;
196 if (!*p_sought)
197 return 0;
198
199 irbuffer = (INPUT_RECORD *) alloca (nbuffer * sizeof (INPUT_RECORD));
200 bRet = PeekConsoleInput (h, irbuffer, nbuffer, &avail);
201 if (!bRet || avail == 0)
202 return POLLHUP;
203
204 for (i = 0; i < avail; i++)
205 if (irbuffer[i].EventType == KEY_EVENT)
206 return *p_sought;
207 return 0;
208 }
209 else
210 {
211 /* Screen buffer. */
212 *p_sought &= POLLOUT | POLLWRNORM | POLLWRBAND;
213 return *p_sought;
214 }
215
216 default:
217 ret = WaitForSingleObject (h, 0);
218 if (ret == WAIT_OBJECT_0)
219 return *p_sought & ~(POLLPRI | POLLRDBAND);
220
221 return *p_sought & (POLLOUT | POLLWRNORM | POLLWRBAND);
222 }
223}
224
225/* Convert fd_sets returned by select into revents values. */
226
227static int
228win32_compute_revents_socket (SOCKET h, int sought, long lNetworkEvents)
229{
230 int happened = 0;
231
232 if ((lNetworkEvents & (FD_READ | FD_ACCEPT | FD_CLOSE)) == FD_ACCEPT)
233 happened |= (POLLIN | POLLRDNORM) & sought;
234
235 else if (lNetworkEvents & (FD_READ | FD_ACCEPT | FD_CLOSE))
236 {
237 int r, error;
238
239 char data[64];
240 WSASetLastError (0);
241 r = recv (h, data, sizeof (data), MSG_PEEK);
242 error = WSAGetLastError ();
243 WSASetLastError (0);
244
245 if (r > 0 || error == WSAENOTCONN)
246 happened |= (POLLIN | POLLRDNORM) & sought;
247
248 /* Distinguish hung-up sockets from other errors. */
249 else if (r == 0 || error == WSAESHUTDOWN || error == WSAECONNRESET
250 || error == WSAECONNABORTED || error == WSAENETRESET)
251 happened |= POLLHUP;
252
253 else
254 happened |= POLLERR;
255 }
256
257 if (lNetworkEvents & (FD_WRITE | FD_CONNECT))
258 happened |= (POLLOUT | POLLWRNORM | POLLWRBAND) & sought;
259
260 if (lNetworkEvents & FD_OOB)
261 happened |= (POLLPRI | POLLRDBAND) & sought;
262
263 return happened;
264}
265
266#else /* !MinGW */
267
268/* Convert select(2) returned fd_sets into poll(2) revents values. */
269static int
270compute_revents (int fd, int sought, fd_set *rfds, fd_set *wfds, fd_set *efds)
271{
272 int happened = 0;
273 if (FD_ISSET (fd, rfds))
274 {
275 int r;
276 int socket_errno;
277
278# if defined __MACH__ && defined __APPLE__
279 /* There is a bug in Mac OS X that causes it to ignore MSG_PEEK
280 for some kinds of descriptors. Detect if this descriptor is a
281 connected socket, a server socket, or something else using a
282 0-byte recv, and use ioctl(2) to detect POLLHUP. */
283 r = recv (fd, NULL, 0, MSG_PEEK);
284 socket_errno = (r < 0) ? errno : 0;
285 if (r == 0 || socket_errno == ENOTSOCK)
286 ioctl (fd, FIONREAD, &r);
287# else
288 char data[64];
289 r = recv (fd, data, sizeof (data), MSG_PEEK);
290 socket_errno = (r < 0) ? errno : 0;
291# endif
292 if (r == 0)
293 happened |= POLLHUP;
294
295 /* If the event happened on an unconnected server socket,
296 that's fine. */
297 else if (r > 0 || ( /* (r == -1) && */ socket_errno == ENOTCONN))
298 happened |= (POLLIN | POLLRDNORM) & sought;
299
300 /* Distinguish hung-up sockets from other errors. */
301 else if (socket_errno == ESHUTDOWN || socket_errno == ECONNRESET
302 || socket_errno == ECONNABORTED || socket_errno == ENETRESET)
303 happened |= POLLHUP;
304
305 else
306 happened |= POLLERR;
307 }
308
309 if (FD_ISSET (fd, wfds))
310 happened |= (POLLOUT | POLLWRNORM | POLLWRBAND) & sought;
311
312 if (FD_ISSET (fd, efds))
313 happened |= (POLLPRI | POLLRDBAND) & sought;
314
315 return happened;
316}
317#endif /* !MinGW */
318
319int
320poll (struct pollfd *pfd, nfds_t nfd, int timeout)
321{
322#ifndef WIN32_NATIVE
323 fd_set rfds, wfds, efds;
324 struct timeval tv;
325 struct timeval *ptv;
326 int maxfd, rc;
327 nfds_t i;
328
329# ifdef _SC_OPEN_MAX
330 static int sc_open_max = -1;
331
332 if (nfd < 0
333 || (nfd > sc_open_max
334 && (sc_open_max != -1
335 || nfd > (sc_open_max = sysconf (_SC_OPEN_MAX)))))
336 {
337 errno = EINVAL;
338 return -1;
339 }
340# else /* !_SC_OPEN_MAX */
341# ifdef OPEN_MAX
342 if (nfd < 0 || nfd > OPEN_MAX)
343 {
344 errno = EINVAL;
345 return -1;
346 }
347# endif /* OPEN_MAX -- else, no check is needed */
348# endif /* !_SC_OPEN_MAX */
349
350 /* EFAULT is not necessary to implement, but let's do it in the
351 simplest case. */
352 if (!pfd)
353 {
354 errno = EFAULT;
355 return -1;
356 }
357
358 /* convert timeout number into a timeval structure */
359 if (timeout == 0)
360 {
361 ptv = &tv;
362 ptv->tv_sec = 0;
363 ptv->tv_usec = 0;
364 }
365 else if (timeout > 0)
366 {
367 ptv = &tv;
368 ptv->tv_sec = timeout / 1000;
369 ptv->tv_usec = (timeout % 1000) * 1000;
370 }
371 else if (timeout == INFTIM)
372 /* wait forever */
373 ptv = NULL;
374 else
375 {
376 errno = EINVAL;
377 return -1;
378 }
379
380 /* create fd sets and determine max fd */
381 maxfd = -1;
382 FD_ZERO (&rfds);
383 FD_ZERO (&wfds);
384 FD_ZERO (&efds);
385 for (i = 0; i < nfd; i++)
386 {
387 if (pfd[i].fd < 0)
388 continue;
389
390 if (pfd[i].events & (POLLIN | POLLRDNORM))
391 FD_SET (pfd[i].fd, &rfds);
392
393 /* see select(2): "the only exceptional condition detectable
394 is out-of-band data received on a socket", hence we push
395 POLLWRBAND events onto wfds instead of efds. */
396 if (pfd[i].events & (POLLOUT | POLLWRNORM | POLLWRBAND))
397 FD_SET (pfd[i].fd, &wfds);
398 if (pfd[i].events & (POLLPRI | POLLRDBAND))
399 FD_SET (pfd[i].fd, &efds);
400 if (pfd[i].fd >= maxfd
401 && (pfd[i].events & (POLLIN | POLLOUT | POLLPRI
402 | POLLRDNORM | POLLRDBAND
403 | POLLWRNORM | POLLWRBAND)))
404 {
405 maxfd = pfd[i].fd;
406 if (maxfd > FD_SETSIZE)
407 {
408 errno = EOVERFLOW;
409 return -1;
410 }
411 }
412 }
413
414 /* examine fd sets */
415 rc = select (maxfd + 1, &rfds, &wfds, &efds, ptv);
416 if (rc < 0)
417 return rc;
418
419 /* establish results */
420 rc = 0;
421 for (i = 0; i < nfd; i++)
422 if (pfd[i].fd < 0)
423 pfd[i].revents = 0;
424 else
425 {
426 int happened = compute_revents (pfd[i].fd, pfd[i].events,
427 &rfds, &wfds, &efds);
428 if (happened)
429 {
430 pfd[i].revents = happened;
431 rc++;
432 }
433 }
434
435 return rc;
436#else
437 static struct timeval tv0;
438 static HANDLE hEvent;
439 WSANETWORKEVENTS ev;
440 HANDLE h, handle_array[FD_SETSIZE + 2];
441 DWORD ret, wait_timeout, nhandles;
442 fd_set rfds, wfds, xfds;
443 BOOL poll_again;
444 MSG msg;
445 int rc = 0;
446 nfds_t i;
447
448 if (nfd < 0 || timeout < -1)
449 {
450 errno = EINVAL;
451 return -1;
452 }
453
454 if (!hEvent)
455 hEvent = CreateEvent (NULL, FALSE, FALSE, NULL);
456
457restart:
458 handle_array[0] = hEvent;
459 nhandles = 1;
460 FD_ZERO (&rfds);
461 FD_ZERO (&wfds);
462 FD_ZERO (&xfds);
463
464 /* Classify socket handles and create fd sets. */
465 for (i = 0; i < nfd; i++)
466 {
467 int sought = pfd[i].events;
468 pfd[i].revents = 0;
469 if (pfd[i].fd < 0)
470 continue;
471 if (!(sought & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM | POLLWRBAND
472 | POLLPRI | POLLRDBAND)))
473 continue;
474
475 h = (HANDLE) _get_osfhandle (pfd[i].fd);
476 assert (h != NULL);
477 if (IsSocketHandle (h))
478 {
479 int requested = FD_CLOSE;
480
481 /* see above; socket handles are mapped onto select. */
482 if (sought & (POLLIN | POLLRDNORM))
483 {
484 requested |= FD_READ | FD_ACCEPT;
485 FD_SET ((SOCKET) h, &rfds);
486 }
487 if (sought & (POLLOUT | POLLWRNORM | POLLWRBAND))
488 {
489 requested |= FD_WRITE | FD_CONNECT;
490 FD_SET ((SOCKET) h, &wfds);
491 }
492 if (sought & (POLLPRI | POLLRDBAND))
493 {
494 requested |= FD_OOB;
495 FD_SET ((SOCKET) h, &xfds);
496 }
497
498 if (requested)
499 WSAEventSelect ((SOCKET) h, hEvent, requested);
500 }
501 else
502 {
503 /* Poll now. If we get an event, do not poll again. Also,
504 screen buffer handles are waitable, and they'll block until
505 a character is available. win32_compute_revents eliminates
506 bits for the "wrong" direction. */
507 pfd[i].revents = win32_compute_revents (h, &sought);
508 if (sought)
509 handle_array[nhandles++] = h;
510 if (pfd[i].revents)
511 timeout = 0;
512 }
513 }
514
515 if (select (0, &rfds, &wfds, &xfds, &tv0) > 0)
516 {
517 /* Do MsgWaitForMultipleObjects anyway to dispatch messages, but
518 no need to call select again. */
519 poll_again = FALSE;
520 wait_timeout = 0;
521 }
522 else
523 {
524 poll_again = TRUE;
525 if (timeout == INFTIM)
526 wait_timeout = INFINITE;
527 else
528 wait_timeout = timeout;
529 }
530
531 for (;;)
532 {
533 ret = MsgWaitForMultipleObjects (nhandles, handle_array, FALSE,
534 wait_timeout, QS_ALLINPUT);
535
536 if (ret == WAIT_OBJECT_0 + nhandles)
537 {
538 /* new input of some other kind */
539 BOOL bRet;
540 while ((bRet = PeekMessage (&msg, NULL, 0, 0, PM_REMOVE)) != 0)
541 {
542 TranslateMessage (&msg);
543 DispatchMessage (&msg);
544 }
545 }
546 else
547 break;
548 }
549
550 if (poll_again)
551 select (0, &rfds, &wfds, &xfds, &tv0);
552
553 /* Place a sentinel at the end of the array. */
554 handle_array[nhandles] = NULL;
555 nhandles = 1;
556 for (i = 0; i < nfd; i++)
557 {
558 int happened;
559
560 if (pfd[i].fd < 0)
561 continue;
562 if (!(pfd[i].events & (POLLIN | POLLRDNORM |
563 POLLOUT | POLLWRNORM | POLLWRBAND)))
564 continue;
565
566 h = (HANDLE) _get_osfhandle (pfd[i].fd);
567 if (h != handle_array[nhandles])
568 {
569 /* It's a socket. */
570 WSAEnumNetworkEvents ((SOCKET) h, NULL, &ev);
571 WSAEventSelect ((SOCKET) h, 0, 0);
572
573 /* If we're lucky, WSAEnumNetworkEvents already provided a way
574 to distinguish FD_READ and FD_ACCEPT; this saves a recv later. */
575 if (FD_ISSET ((SOCKET) h, &rfds)
576 && !(ev.lNetworkEvents & (FD_READ | FD_ACCEPT)))
577 ev.lNetworkEvents |= FD_READ | FD_ACCEPT;
578 if (FD_ISSET ((SOCKET) h, &wfds))
579 ev.lNetworkEvents |= FD_WRITE | FD_CONNECT;
580 if (FD_ISSET ((SOCKET) h, &xfds))
581 ev.lNetworkEvents |= FD_OOB;
582
583 happened = win32_compute_revents_socket ((SOCKET) h, pfd[i].events,
584 ev.lNetworkEvents);
585 }
586 else
587 {
588 /* Not a socket. */
589 int sought = pfd[i].events;
590 happened = win32_compute_revents (h, &sought);
591 nhandles++;
592 }
593
594 if ((pfd[i].revents |= happened) != 0)
595 rc++;
596 }
597
598 if (!rc && timeout == INFTIM)
599 {
600 SwitchToThread();
601 goto restart;
602 }
603
604 return rc;
605#endif
606}
diff --git a/win32/poll.h b/win32/poll.h
new file mode 100644
index 000000000..b7aa59d97
--- /dev/null
+++ b/win32/poll.h
@@ -0,0 +1,53 @@
1/* Header for poll(2) emulation
2 Contributed by Paolo Bonzini.
3
4 Copyright 2001, 2002, 2003, 2007, 2009, 2010 Free Software Foundation, Inc.
5
6 This file is part of gnulib.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation,
20 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21
22#ifndef _GL_POLL_H
23#define _GL_POLL_H
24
25/* fake a poll(2) environment */
26#define POLLIN 0x0001 /* any readable data available */
27#define POLLPRI 0x0002 /* OOB/Urgent readable data */
28#define POLLOUT 0x0004 /* file descriptor is writeable */
29#define POLLERR 0x0008 /* some poll error occurred */
30#define POLLHUP 0x0010 /* file descriptor was "hung up" */
31#define POLLNVAL 0x0020 /* requested events "invalid" */
32#define POLLRDNORM 0x0040
33#define POLLRDBAND 0x0080
34#define POLLWRNORM 0x0100
35#define POLLWRBAND 0x0200
36
37struct pollfd
38{
39 int fd; /* which file descriptor to poll */
40 short events; /* events we are interested in */
41 short revents; /* events found on return */
42};
43
44typedef unsigned long nfds_t;
45
46extern int poll (struct pollfd *pfd, nfds_t nfd, int timeout);
47
48/* Define INFTIM only if doing so conforms to POSIX. */
49#if !defined (_POSIX_C_SOURCE) && !defined (_XOPEN_SOURCE)
50#define INFTIM (-1)
51#endif
52
53#endif /* _GL_POLL_H */
diff --git a/win32/popen.c b/win32/popen.c
new file mode 100644
index 000000000..6b8e52ca9
--- /dev/null
+++ b/win32/popen.c
@@ -0,0 +1,318 @@
1#include <fcntl.h>
2#include "libbb.h"
3
4typedef struct {
5 PROCESS_INFORMATION piProcInfo;
6 HANDLE pipe[2];
7 char mode;
8 int fd;
9} pipe_data;
10
11static pipe_data *pipes = NULL;
12static int num_pipes = 0;
13
14static int mingw_pipe(HANDLE *readwrite)
15{
16 SECURITY_ATTRIBUTES sa;
17
18 sa.nLength = sizeof(sa); /* Length in bytes */
19 sa.bInheritHandle = 1; /* the child must inherit these handles */
20 sa.lpSecurityDescriptor = NULL;
21
22 if ( !CreatePipe (&readwrite[0], &readwrite[1], &sa, 1 << 13) ) {
23 return -1;
24 }
25
26 return 0;
27}
28
29static pipe_data *find_pipe(void)
30{
31 int i;
32 pipe_data *p = NULL;
33
34 /* find an unused pipe structure */
35 for ( i=0; i<num_pipes; ++i ) {
36 if ( pipes[i].mode == '\0' ) {
37 p = pipes+i;
38 break;
39 }
40 }
41
42 if ( p == NULL ) {
43 /* need to extend array */
44 if ( (p=realloc(pipes, sizeof(pipe_data)*(num_pipes+10))) == NULL ) {
45 return NULL;
46 }
47
48 pipes = p;
49 for ( i=0; i<10; ++i ) {
50 memset(pipes+num_pipes+i, 0, sizeof(pipe_data));
51 }
52 p = pipes + num_pipes;
53 num_pipes += 10;
54 }
55
56 p->pipe[0] = INVALID_HANDLE_VALUE;
57 p->pipe[1] = INVALID_HANDLE_VALUE;
58
59 return p;
60}
61
62FILE *mingw_popen(const char *cmd, const char *mode)
63{
64 pipe_data *p;
65 FILE *fptr = NULL;
66 STARTUPINFO siStartInfo;
67 int success;
68 int fd;
69 int len, count;
70 int ip, ic;
71 char *cmd_buff = NULL;
72 const char *s;
73 char *t;
74
75 if ( cmd == NULL || *cmd == '\0' || mode == NULL ||
76 (*mode != 'r' && *mode != 'w') ) {
77 return NULL;
78 }
79
80 /* find an unused pipe structure */
81 if ( (p=find_pipe()) == NULL ) {
82 return NULL;
83 }
84
85 /* count double quotes */
86 count = 0;
87 for ( s=cmd; *s; ++s ) {
88 if ( *s == '"' ) {
89 ++count;
90 }
91 }
92
93 len = strlen(cmd) + 10 + count;
94 if ( (cmd_buff=malloc(len)) == NULL ) {
95 return NULL;
96 }
97
98 /* escape double quotes */
99 strcpy(cmd_buff, "sh -c \"");
100 for ( s=cmd,t=cmd_buff+strlen(cmd_buff); *s; ++s ) {
101 if ( *s == '"' ) {
102 *t++ = '\\';
103 }
104 *t++ = *s;
105 }
106 *t++ = '"';
107 *t = '\0';
108
109 /* Create the pipe */
110 if ( mingw_pipe(p->pipe) == -1 ) {
111 goto finito;
112 }
113
114 /* index of parent end of pipe */
115 ip = !(*mode == 'r');
116 /* index of child end of pipe */
117 ic = (*mode == 'r');
118
119 /* Make the parent end of the pipe non-inheritable */
120 SetHandleInformation(p->pipe[ip], HANDLE_FLAG_INHERIT, 0);
121
122 /* Now create the child process */
123 ZeroMemory(&siStartInfo, sizeof(STARTUPINFO));
124 siStartInfo.cb = sizeof(STARTUPINFO);
125 if ( *mode == 'r' ) {
126 siStartInfo.hStdInput = GetStdHandle(STD_INPUT_HANDLE);
127 siStartInfo.hStdOutput = p->pipe[ic];
128 }
129 else {
130 siStartInfo.hStdInput = p->pipe[ic];
131 siStartInfo.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE);
132 }
133 siStartInfo.hStdError = GetStdHandle(STD_ERROR_HANDLE);
134 siStartInfo.wShowWindow = SW_HIDE;
135 siStartInfo.dwFlags = STARTF_USESTDHANDLES|STARTF_USESHOWWINDOW;
136
137 success = CreateProcess(NULL,
138 (LPTSTR)cmd_buff, /* command line */
139 NULL, /* process security attributes */
140 NULL, /* primary thread security attributes */
141 TRUE, /* handles are inherited */
142 0, /* creation flags */
143 NULL, /* use parent's environment */
144 NULL, /* use parent's current directory */
145 &siStartInfo, /* STARTUPINFO pointer */
146 &p->piProcInfo); /* receives PROCESS_INFORMATION */
147
148 if ( !success ) {
149 goto finito;
150 }
151
152 /* close child end of pipe */
153 CloseHandle(p->pipe[ic]);
154 p->pipe[ic] = INVALID_HANDLE_VALUE;
155
156 if ( *mode == 'r' ) {
157 fd = _open_osfhandle((intptr_t)p->pipe[ip], _O_RDONLY|_O_BINARY);
158 fptr = _fdopen(fd, "rb");
159 }
160 else {
161 fd = _open_osfhandle((intptr_t)p->pipe[ip], _O_WRONLY|_O_BINARY);
162 fptr = _fdopen(fd, "wb");
163 }
164
165finito:
166 if ( !fptr ) {
167 if ( p->pipe[0] != INVALID_HANDLE_VALUE ) {
168 CloseHandle(p->pipe[0]);
169 }
170 if ( p->pipe[1] != INVALID_HANDLE_VALUE ) {
171 CloseHandle(p->pipe[1]);
172 }
173 }
174 else {
175 p->mode = *mode;
176 p->fd = fd;
177 }
178 free(cmd_buff);
179
180 return fptr;
181}
182
183/*
184 * Open a pipe to a command where the file descriptor fd0 is used
185 * as input to the command (read mode) or as the destination of the
186 * output from the command (write mode). The pid of the command is
187 * returned in the variable pid, which can be NULL.
188 */
189int mingw_popen_fd(const char *cmd, const char *mode, int fd0, pid_t *pid)
190{
191 pipe_data *p;
192 STARTUPINFO siStartInfo;
193 int success;
194 int fd = -1;
195 int ip, ic;
196
197 if ( cmd == NULL || *cmd == '\0' || mode == NULL ||
198 (*mode != 'r' && *mode != 'w') ) {
199 return -1;
200 }
201
202 /* find an unused pipe structure */
203 if ( (p=find_pipe()) == NULL ) {
204 return -1;
205 }
206
207 /* Create the pipe */
208 if ( mingw_pipe(p->pipe) == -1 ) {
209 goto finito;
210 }
211
212 /* index of parent end of pipe */
213 ip = !(*mode == 'r');
214 /* index of child end of pipe */
215 ic = (*mode == 'r');
216
217 /* Make the parent end of the pipe non-inheritable */
218 SetHandleInformation(p->pipe[ip], HANDLE_FLAG_INHERIT, 0);
219
220 /* Now create the child process */
221 ZeroMemory(&siStartInfo, sizeof(STARTUPINFO));
222 siStartInfo.cb = sizeof(STARTUPINFO);
223 if ( *mode == 'r' ) {
224 siStartInfo.hStdInput = (HANDLE)_get_osfhandle(fd0);
225 siStartInfo.hStdOutput = p->pipe[ic];
226 }
227 else {
228 siStartInfo.hStdInput = p->pipe[ic];
229 siStartInfo.hStdOutput = (HANDLE)_get_osfhandle(fd0);
230 }
231 siStartInfo.hStdError = GetStdHandle(STD_ERROR_HANDLE);
232 siStartInfo.wShowWindow = SW_HIDE;
233 siStartInfo.dwFlags = STARTF_USESTDHANDLES|STARTF_USESHOWWINDOW;
234
235 success = CreateProcess(NULL,
236 (LPTSTR)cmd, /* command line */
237 NULL, /* process security attributes */
238 NULL, /* primary thread security attributes */
239 TRUE, /* handles are inherited */
240 0, /* creation flags */
241 NULL, /* use parent's environment */
242 NULL, /* use parent's current directory */
243 &siStartInfo, /* STARTUPINFO pointer */
244 &p->piProcInfo); /* receives PROCESS_INFORMATION */
245
246 if ( !success ) {
247 goto finito;
248 }
249
250 /* close child end of pipe */
251 CloseHandle(p->pipe[ic]);
252 p->pipe[ic] = INVALID_HANDLE_VALUE;
253
254 if ( *mode == 'r' ) {
255 fd = _open_osfhandle((intptr_t)p->pipe[ip], _O_RDONLY|_O_BINARY);
256 }
257 else {
258 fd = _open_osfhandle((intptr_t)p->pipe[ip], _O_WRONLY|_O_BINARY);
259 }
260
261finito:
262 if ( fd == -1 ) {
263 if ( p->pipe[0] != INVALID_HANDLE_VALUE ) {
264 CloseHandle(p->pipe[0]);
265 }
266 if ( p->pipe[1] != INVALID_HANDLE_VALUE ) {
267 CloseHandle(p->pipe[1]);
268 }
269 }
270 else {
271 p->mode = *mode;
272 p->fd = fd;
273 if ( pid ) {
274 *pid = (pid_t)p->piProcInfo.dwProcessId;
275 }
276 }
277
278 return fd;
279}
280
281int mingw_pclose(FILE *fp)
282{
283 int i, ip, fd;
284 pipe_data *p = NULL;
285 DWORD ret;
286
287 if ( fp == NULL ) {
288 return -1;
289 }
290
291 /* find struct containing fd */
292 fd = fileno(fp);
293 for ( i=0; i<num_pipes; ++i ) {
294 if ( pipes[i].mode && pipes[i].fd == fd ) {
295 p = pipes+i;
296 break;
297 }
298 }
299
300 if ( p == NULL ) {
301 /* no pipe data, maybe fd isn't a pipe? */
302 return -1;
303 }
304
305 fclose(fp);
306
307 ip = !(p->mode == 'r');
308 CloseHandle(p->pipe[ip]);
309
310 ret = WaitForSingleObject(p->piProcInfo.hProcess, INFINITE);
311
312 CloseHandle(p->piProcInfo.hProcess);
313 CloseHandle(p->piProcInfo.hThread);
314
315 p->mode = '\0';
316
317 return (ret == WAIT_OBJECT_0) ? 0 : -1;
318}
diff --git a/win32/process.c b/win32/process.c
new file mode 100644
index 000000000..968ea9afd
--- /dev/null
+++ b/win32/process.c
@@ -0,0 +1,425 @@
1#include "libbb.h"
2#include <tlhelp32.h>
3
4int waitpid(pid_t pid, int *status, int options)
5{
6 HANDLE proc;
7 intptr_t ret;
8
9 /* Windows does not understand parent-child */
10 if (pid > 0 && options == 0) {
11 if ( (proc=OpenProcess(SYNCHRONIZE|PROCESS_QUERY_INFORMATION,
12 FALSE, pid)) != NULL ) {
13 ret = _cwait(status, (intptr_t)proc, 0);
14 CloseHandle(proc);
15 return ret == -1 ? -1 : pid;
16 }
17 }
18 errno = EINVAL;
19 return -1;
20}
21
22const char *
23next_path_sep(const char *path)
24{
25 static const char *from = NULL, *to;
26 static int has_semicolon;
27 int len = strlen(path);
28
29 if (!from || !(path >= from && path+len <= to)) {
30 from = path;
31 to = from+len;
32 has_semicolon = strchr(path, ';') != NULL;
33 }
34
35 /* Semicolons take precedence, it's Windows PATH */
36 if (has_semicolon)
37 return strchr(path, ';');
38 /* PATH=C:, not really a separator */
39 return strchr(has_dos_drive_prefix(path) ? path+2 : path, ':');
40}
41
42#define MAX_OPT 10
43
44static const char *
45parse_interpreter(const char *cmd, char ***opts, int *nopts)
46{
47 static char buf[100], *opt[MAX_OPT];
48 char *p, *s, *t;
49 int n, fd;
50
51 *nopts = 0;
52 *opts = opt;
53
54 /* don't even try a .exe */
55 n = strlen(cmd);
56 if (n >= 4 &&
57 (!strcasecmp(cmd+n-4, ".exe") ||
58 !strcasecmp(cmd+n-4, ".com")))
59 return NULL;
60
61 fd = open(cmd, O_RDONLY);
62 if (fd < 0)
63 return NULL;
64 n = read(fd, buf, sizeof(buf)-1);
65 close(fd);
66 if (n < 4) /* at least '#!/x' and not error */
67 return NULL;
68
69 /*
70 * See http://www.in-ulm.de/~mascheck/various/shebang/ for trivia
71 * relating to '#!'.
72 */
73 if (buf[0] != '#' || buf[1] != '!')
74 return NULL;
75 buf[n] = '\0';
76 p = strchr(buf, '\n');
77 if (!p)
78 return NULL;
79 *p = '\0';
80
81 /* remove trailing whitespace */
82 while ( isspace(*--p) ) {
83 *p = '\0';
84 }
85
86 /* skip whitespace after '#!' */
87 for ( s=buf+2; *s && isspace(*s); ++s ) {
88 }
89
90 /* move to end of interpreter path (which may not contain spaces) */
91 for ( ; *s && !isspace(*s); ++s ) {
92 }
93
94 n = 0;
95 if ( *s != '\0' ) {
96 /* there are options */
97 *s++ = '\0';
98
99 while ( (t=strtok(s, " \t")) && n < MAX_OPT ) {
100 s = NULL;
101 opt[n++] = t;
102 }
103 }
104
105 /* find interpreter name */
106 if (!(p = strrchr(buf+2, '/')))
107 return NULL;
108
109 *nopts = n;
110 *opts = opt;
111
112 return p+1;
113}
114
115/*
116 * See http://msdn2.microsoft.com/en-us/library/17w5ykft(vs.71).aspx
117 * (Parsing C++ Command-Line Arguments)
118 */
119static char *
120quote_arg(const char *arg)
121{
122 int len = 0, n = 0;
123 int force_quotes = 0;
124 char *q, *d;
125 const char *p = arg;
126
127 /* empty arguments must be quoted */
128 if (!*p) {
129 force_quotes = 1;
130 }
131
132 while (*p) {
133 if (isspace(*p)) {
134 /* arguments containing whitespace must be quoted */
135 force_quotes = 1;
136 }
137 else if (*p == '"') {
138 /* double quotes in arguments need to be escaped */
139 n++;
140 }
141 else if (*p == '\\') {
142 /* count contiguous backslashes */
143 int count = 0;
144 while (*p == '\\') {
145 count++;
146 p++;
147 len++;
148 }
149
150 /*
151 * Only escape backslashes before explicit double quotes or
152 * or where the backslashes are at the end of an argument
153 * that is scheduled to be quoted.
154 */
155 if (*p == '"' || (force_quotes && *p == '\0')) {
156 n += count*2 + 1;
157 }
158
159 if (*p == '\0') {
160 break;
161 }
162 continue;
163 }
164 len++;
165 p++;
166 }
167
168 if (!force_quotes && n == 0) {
169 return (char*)arg;
170 }
171
172 /* insert double quotes and backslashes where necessary */
173 d = q = xmalloc(len+n+3);
174 if (force_quotes) {
175 *d++ = '"';
176 }
177
178 while (*arg) {
179 if (*arg == '"') {
180 *d++ = '\\';
181 }
182 else if (*arg == '\\') {
183 int count = 0;
184 while (*arg == '\\') {
185 count++;
186 *d++ = *arg++;
187 }
188
189 if (*arg == '"' || (force_quotes && *arg == '\0')) {
190 while (count-- > 0) {
191 *d++ = '\\';
192 }
193 if (*arg == '"') {
194 *d++ = '\\';
195 }
196 }
197 }
198 if (*arg != '\0') {
199 *d++ = *arg++;
200 }
201 }
202 if (force_quotes) {
203 *d++ = '"';
204 }
205 *d = '\0';
206
207 return q;
208}
209
210static intptr_t
211spawnveq(int mode, const char *path, const char *const *argv, const char *const *env)
212{
213 char **new_argv;
214 int i, argc = 0;
215 intptr_t ret;
216
217 if (!argv) {
218 const char *empty_argv[] = { path, NULL };
219 return spawnve(mode, path, empty_argv, env);
220 }
221
222
223 while (argv[argc])
224 argc++;
225
226 new_argv = malloc(sizeof(*argv)*(argc+1));
227 for (i = 0;i < argc;i++)
228 new_argv[i] = quote_arg(argv[i]);
229 new_argv[argc] = NULL;
230 ret = spawnve(mode, path, (const char *const *)new_argv, env);
231 for (i = 0;i < argc;i++)
232 if (new_argv[i] != argv[i])
233 free(new_argv[i]);
234 free(new_argv);
235 return ret;
236}
237
238#if ENABLE_FEATURE_PREFER_APPLETS || ENABLE_FEATURE_SH_STANDALONE
239static intptr_t
240mingw_spawn_applet(int mode,
241 const char *const *argv,
242 const char *const *envp)
243{
244 return spawnveq(mode, bb_busybox_exec_path, argv, envp);
245}
246#endif
247
248static intptr_t
249mingw_spawn_interpreter(int mode, const char *prog, const char *const *argv, const char *const *envp)
250{
251 intptr_t ret;
252 char **opts;
253 int nopts;
254 const char *interpr = parse_interpreter(prog, &opts, &nopts);
255 const char **new_argv;
256 int argc = 0;
257
258 if (!interpr)
259 return spawnveq(mode, prog, argv, envp);
260
261
262 while (argv[argc])
263 argc++;
264 new_argv = malloc(sizeof(*argv)*(argc+nopts+2));
265 memcpy(new_argv+1, opts, sizeof(*opts)*nopts);
266 memcpy(new_argv+nopts+2, argv+1, sizeof(*argv)*argc);
267 new_argv[nopts+1] = prog; /* pass absolute path */
268
269#if ENABLE_FEATURE_PREFER_APPLETS || ENABLE_FEATURE_SH_STANDALONE
270 if (find_applet_by_name(interpr) >= 0) {
271 new_argv[0] = interpr;
272 ret = mingw_spawn_applet(mode, new_argv, envp);
273 } else
274#endif
275 {
276 char *path = xstrdup(getenv("PATH"));
277 char *tmp = path;
278 char *iprog = find_executable(interpr, &tmp);
279 free(path);
280 if (!iprog) {
281 free(new_argv);
282 errno = ENOENT;
283 return -1;
284 }
285 new_argv[0] = iprog;
286 ret = spawnveq(mode, iprog, new_argv, envp);
287 free(iprog);
288 }
289
290 free(new_argv);
291 return ret;
292}
293
294static intptr_t
295mingw_spawn_1(int mode, const char *cmd, const char *const *argv, const char *const *envp)
296{
297 intptr_t ret;
298
299#if ENABLE_FEATURE_PREFER_APPLETS || ENABLE_FEATURE_SH_STANDALONE
300 if (find_applet_by_name(cmd) >= 0)
301 return mingw_spawn_applet(mode, argv, envp);
302 else
303#endif
304 if (strchr(cmd, '/') || strchr(cmd, '\\'))
305 return mingw_spawn_interpreter(mode, cmd, argv, envp);
306 else {
307 char *tmp, *path = getenv("PATH");
308 char *prog;
309
310 if (!path) {
311 errno = ENOENT;
312 return -1;
313 }
314
315 /* executable_exists() does not return new file name */
316 tmp = path = xstrdup(path);
317 prog = find_executable(cmd, &tmp);
318 free(path);
319 if (!prog) {
320 errno = ENOENT;
321 return -1;
322 }
323 ret = mingw_spawn_interpreter(mode, prog, argv, envp);
324 free(prog);
325 }
326 return ret;
327}
328
329pid_t FAST_FUNC
330mingw_spawn(char **argv)
331{
332 intptr_t ret;
333
334 ret = mingw_spawn_1(P_NOWAIT, argv[0], (const char *const *)argv,
335 (const char *const *)environ);
336
337 return ret == -1 ? -1 : GetProcessId((HANDLE)ret);
338}
339
340intptr_t FAST_FUNC
341mingw_spawn_proc(char **argv)
342{
343 return mingw_spawn_1(P_NOWAIT, argv[0], (const char *const *)argv,
344 (const char *const *)environ);
345}
346
347int
348mingw_execvp(const char *cmd, const char *const *argv)
349{
350 int ret = (int)mingw_spawn_1(P_WAIT, cmd, argv, (const char *const *)environ);
351 if (ret != -1)
352 exit(ret);
353 return ret;
354}
355
356int
357mingw_execve(const char *cmd, const char *const *argv, const char *const *envp)
358{
359 int ret;
360 int mode = P_WAIT;
361
362 ret = (int)mingw_spawn_interpreter(mode, cmd, argv, envp);
363 if (ret != -1)
364 exit(ret);
365 return ret;
366}
367
368int
369mingw_execv(const char *cmd, const char *const *argv)
370{
371 return mingw_execve(cmd, argv, (const char *const *)environ);
372}
373
374/* POSIX version in libbb/procps.c */
375procps_status_t* FAST_FUNC procps_scan(procps_status_t* sp, int flags UNUSED_PARAM)
376{
377 PROCESSENTRY32 pe;
378
379 pe.dwSize = sizeof(pe);
380 if (!sp) {
381 sp = xzalloc(sizeof(struct procps_status_t));
382 sp->snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
383 if (sp->snapshot == INVALID_HANDLE_VALUE) {
384 free(sp);
385 return NULL;
386 }
387 if (!Process32First(sp->snapshot, &pe)) {
388 CloseHandle(sp->snapshot);
389 free(sp);
390 return NULL;
391 }
392 }
393 else {
394 if (!Process32Next(sp->snapshot, &pe)) {
395 CloseHandle(sp->snapshot);
396 free(sp);
397 return NULL;
398 }
399 }
400
401 sp->pid = pe.th32ProcessID;
402 safe_strncpy(sp->comm, pe.szExeFile, COMM_LEN);
403 return sp;
404}
405
406int kill(pid_t pid, int sig)
407{
408 HANDLE h;
409
410 if (pid > 0 && sig == SIGTERM) {
411 if ((h=OpenProcess(PROCESS_TERMINATE, FALSE, pid)) != NULL &&
412 TerminateProcess(h, 0)) {
413 CloseHandle(h);
414 return 0;
415 }
416
417 errno = err_win_to_posix(GetLastError());
418 if (h != NULL)
419 CloseHandle(h);
420 return -1;
421 }
422
423 errno = EINVAL;
424 return -1;
425}
diff --git a/win32/pwd.h b/win32/pwd.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/pwd.h
diff --git a/win32/regcomp.c b/win32/regcomp.c
new file mode 100644
index 000000000..dca7e6ef3
--- /dev/null
+++ b/win32/regcomp.c
@@ -0,0 +1,3886 @@
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002-2007,2009,2010 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA. */
20
21#define UNUSED_PARAM __attribute__ ((__unused__))
22
23static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
24 size_t length, reg_syntax_t syntax);
25static void re_compile_fastmap_iter (regex_t *bufp,
26 const re_dfastate_t *init_state,
27 char *fastmap);
28static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
29#ifdef RE_ENABLE_I18N
30static void free_charset (re_charset_t *cset);
31#endif /* RE_ENABLE_I18N */
32static void free_workarea_compile (regex_t *preg);
33static reg_errcode_t create_initial_state (re_dfa_t *dfa);
34#ifdef RE_ENABLE_I18N
35static void optimize_utf8 (re_dfa_t *dfa);
36#endif
37static reg_errcode_t analyze (regex_t *preg);
38static reg_errcode_t preorder (bin_tree_t *root,
39 reg_errcode_t (fn (void *, bin_tree_t *)),
40 void *extra);
41static reg_errcode_t postorder (bin_tree_t *root,
42 reg_errcode_t (fn (void *, bin_tree_t *)),
43 void *extra);
44static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
45static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
46static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
47 bin_tree_t *node);
48static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
49static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
50static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
51static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
52static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
53 unsigned int constraint);
54static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
55static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
56 int node, int root);
57static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
58static int fetch_number (re_string_t *input, re_token_t *token,
59 reg_syntax_t syntax);
60static int peek_token (re_token_t *token, re_string_t *input,
61 reg_syntax_t syntax) internal_function;
62static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
63 reg_syntax_t syntax, reg_errcode_t *err);
64static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
65 re_token_t *token, reg_syntax_t syntax,
66 int nest, reg_errcode_t *err);
67static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
68 re_token_t *token, reg_syntax_t syntax,
69 int nest, reg_errcode_t *err);
70static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
71 re_token_t *token, reg_syntax_t syntax,
72 int nest, reg_errcode_t *err);
73static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
74 re_token_t *token, reg_syntax_t syntax,
75 int nest, reg_errcode_t *err);
76static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
77 re_dfa_t *dfa, re_token_t *token,
78 reg_syntax_t syntax, reg_errcode_t *err);
79static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
80 re_token_t *token, reg_syntax_t syntax,
81 reg_errcode_t *err);
82static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
83 re_string_t *regexp,
84 re_token_t *token, int token_len,
85 re_dfa_t *dfa,
86 reg_syntax_t syntax,
87 int accept_hyphen);
88static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
89 re_string_t *regexp,
90 re_token_t *token);
91#ifdef RE_ENABLE_I18N
92static reg_errcode_t build_equiv_class (bitset_t sbcset,
93 re_charset_t *mbcset,
94 int *equiv_class_alloc,
95 const unsigned char *name);
96static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
97 bitset_t sbcset,
98 re_charset_t *mbcset,
99 int *char_class_alloc,
100 const char *class_name,
101 reg_syntax_t syntax);
102#else /* not RE_ENABLE_I18N */
103static reg_errcode_t build_equiv_class (bitset_t sbcset,
104 const unsigned char *name);
105static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
106 bitset_t sbcset,
107 const char *class_name,
108 reg_syntax_t syntax);
109#endif /* not RE_ENABLE_I18N */
110static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
111 RE_TRANSLATE_TYPE trans,
112 const char *class_name,
113 const char *extra,
114 int non_match, reg_errcode_t *err);
115static bin_tree_t *create_tree (re_dfa_t *dfa,
116 bin_tree_t *left, bin_tree_t *right,
117 re_token_type_t type);
118static bin_tree_t *create_token_tree (re_dfa_t *dfa,
119 bin_tree_t *left, bin_tree_t *right,
120 const re_token_t *token);
121static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
122static void free_token (re_token_t *node);
123static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
124static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
125
126/* This table gives an error message for each of the error codes listed
127 in regex.h. Obviously the order here has to be same as there.
128 POSIX doesn't require that we do anything for REG_NOERROR,
129 but why not be nice? */
130
131const char __re_error_msgid[] attribute_hidden =
132 {
133#define REG_NOERROR_IDX 0
134 gettext_noop ("Success") /* REG_NOERROR */
135 "\0"
136#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
137 gettext_noop ("No match") /* REG_NOMATCH */
138 "\0"
139#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
140 gettext_noop ("Invalid regular expression") /* REG_BADPAT */
141 "\0"
142#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
143 gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
144 "\0"
145#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
146 gettext_noop ("Invalid character class name") /* REG_ECTYPE */
147 "\0"
148#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
149 gettext_noop ("Trailing backslash") /* REG_EESCAPE */
150 "\0"
151#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
152 gettext_noop ("Invalid back reference") /* REG_ESUBREG */
153 "\0"
154#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
155 gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
156 "\0"
157#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
158 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
159 "\0"
160#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
161 gettext_noop ("Unmatched \\{") /* REG_EBRACE */
162 "\0"
163#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
164 gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
165 "\0"
166#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
167 gettext_noop ("Invalid range end") /* REG_ERANGE */
168 "\0"
169#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
170 gettext_noop ("Memory exhausted") /* REG_ESPACE */
171 "\0"
172#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
173 gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
174 "\0"
175#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
176 gettext_noop ("Premature end of regular expression") /* REG_EEND */
177 "\0"
178#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
179 gettext_noop ("Regular expression too big") /* REG_ESIZE */
180 "\0"
181#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
182 gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
183 };
184
185const size_t __re_error_msgid_idx[] attribute_hidden =
186 {
187 REG_NOERROR_IDX,
188 REG_NOMATCH_IDX,
189 REG_BADPAT_IDX,
190 REG_ECOLLATE_IDX,
191 REG_ECTYPE_IDX,
192 REG_EESCAPE_IDX,
193 REG_ESUBREG_IDX,
194 REG_EBRACK_IDX,
195 REG_EPAREN_IDX,
196 REG_EBRACE_IDX,
197 REG_BADBR_IDX,
198 REG_ERANGE_IDX,
199 REG_ESPACE_IDX,
200 REG_BADRPT_IDX,
201 REG_EEND_IDX,
202 REG_ESIZE_IDX,
203 REG_ERPAREN_IDX
204 };
205
206/* Entry points for GNU code. */
207
208
209#ifdef ZOS_USS
210
211/* For ZOS USS we must define btowc */
212
213wchar_t
214btowc (int c)
215{
216 wchar_t wtmp[2];
217 char tmp[2];
218
219 tmp[0] = c;
220 tmp[1] = 0;
221
222 mbtowc (wtmp, tmp, 1);
223 return wtmp[0];
224}
225#endif
226
227/* re_compile_pattern is the GNU regular expression compiler: it
228 compiles PATTERN (of length LENGTH) and puts the result in BUFP.
229 Returns 0 if the pattern was valid, otherwise an error string.
230
231 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
232 are set in BUFP on entry. */
233
234const char *
235re_compile_pattern (const char *pattern,
236 size_t length,
237 struct re_pattern_buffer *bufp)
238{
239 reg_errcode_t ret;
240
241 /* And GNU code determines whether or not to get register information
242 by passing null for the REGS argument to re_match, etc., not by
243 setting no_sub, unless RE_NO_SUB is set. */
244 bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
245
246 /* Match anchors at newline. */
247 bufp->newline_anchor = 1;
248
249 ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
250
251 if (!ret)
252 return NULL;
253 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
254}
255#ifdef _LIBC
256weak_alias (__re_compile_pattern, re_compile_pattern)
257#endif
258
259/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
260 also be assigned to arbitrarily: each pattern buffer stores its own
261 syntax, so it can be changed between regex compilations. */
262/* This has no initializer because initialized variables in Emacs
263 become read-only after dumping. */
264reg_syntax_t re_syntax_options;
265
266
267/* Specify the precise syntax of regexps for compilation. This provides
268 for compatibility for various utilities which historically have
269 different, incompatible syntaxes.
270
271 The argument SYNTAX is a bit mask comprised of the various bits
272 defined in regex.h. We return the old syntax. */
273
274reg_syntax_t
275re_set_syntax (reg_syntax_t syntax)
276{
277 reg_syntax_t ret = re_syntax_options;
278
279 re_syntax_options = syntax;
280 return ret;
281}
282#ifdef _LIBC
283weak_alias (__re_set_syntax, re_set_syntax)
284#endif
285
286int
287re_compile_fastmap (struct re_pattern_buffer *bufp)
288{
289 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
290 char *fastmap = bufp->fastmap;
291
292 memset (fastmap, '\0', sizeof (char) * SBC_MAX);
293 re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
294 if (dfa->init_state != dfa->init_state_word)
295 re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
296 if (dfa->init_state != dfa->init_state_nl)
297 re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
298 if (dfa->init_state != dfa->init_state_begbuf)
299 re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
300 bufp->fastmap_accurate = 1;
301 return 0;
302}
303#ifdef _LIBC
304weak_alias (__re_compile_fastmap, re_compile_fastmap)
305#endif
306
307static inline void
308__attribute ((always_inline))
309re_set_fastmap (char *fastmap, int icase, int ch)
310{
311 fastmap[ch] = 1;
312 if (icase)
313 fastmap[tolower (ch)] = 1;
314}
315
316/* Helper function for re_compile_fastmap.
317 Compile fastmap for the initial_state INIT_STATE. */
318
319static void
320re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
321 char *fastmap)
322{
323 volatile re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
324 int node_cnt;
325 int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
326 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
327 {
328 int node = init_state->nodes.elems[node_cnt];
329 re_token_type_t type = dfa->nodes[node].type;
330
331 if (type == CHARACTER)
332 {
333 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
334#ifdef RE_ENABLE_I18N
335 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
336 {
337 unsigned char *buf = re_malloc (unsigned char, dfa->mb_cur_max), *p;
338 wchar_t wc;
339 mbstate_t state;
340
341 p = buf;
342 *p++ = dfa->nodes[node].opr.c;
343 while (++node < dfa->nodes_len
344 && dfa->nodes[node].type == CHARACTER
345 && dfa->nodes[node].mb_partial)
346 *p++ = dfa->nodes[node].opr.c;
347 memset (&state, '\0', sizeof (state));
348 if (__mbrtowc (&wc, (const char *) buf, p - buf,
349 &state) == p - buf
350 && (__wcrtomb ((char *) buf, towlower (wc), &state)
351 != (size_t) -1))
352 re_set_fastmap (fastmap, 0, buf[0]);
353 re_free (buf);
354 }
355#endif
356 }
357 else if (type == SIMPLE_BRACKET)
358 {
359 int i, ch;
360 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
361 {
362 int j;
363 bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
364 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
365 if (w & ((bitset_word_t) 1 << j))
366 re_set_fastmap (fastmap, icase, ch);
367 }
368 }
369#ifdef RE_ENABLE_I18N
370 else if (type == COMPLEX_BRACKET)
371 {
372 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
373 int i;
374
375# ifdef _LIBC
376 /* See if we have to try all bytes which start multiple collation
377 elements.
378 e.g. In da_DK, we want to catch 'a' since "aa" is a valid
379 collation element, and don't catch 'b' since 'b' is
380 the only collation element which starts from 'b' (and
381 it is caught by SIMPLE_BRACKET). */
382 if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
383 && (cset->ncoll_syms || cset->nranges))
384 {
385 const int32_t *table = (const int32_t *)
386 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
387 for (i = 0; i < SBC_MAX; ++i)
388 if (table[i] < 0)
389 re_set_fastmap (fastmap, icase, i);
390 }
391# endif /* _LIBC */
392
393 /* See if we have to start the match at all multibyte characters,
394 i.e. where we would not find an invalid sequence. This only
395 applies to multibyte character sets; for single byte character
396 sets, the SIMPLE_BRACKET again suffices. */
397 if (dfa->mb_cur_max > 1
398 && (cset->nchar_classes || cset->non_match || cset->nranges
399# ifdef _LIBC
400 || cset->nequiv_classes
401# endif /* _LIBC */
402 ))
403 {
404 unsigned char c = 0;
405 do
406 {
407 mbstate_t mbs;
408 memset (&mbs, 0, sizeof (mbs));
409 if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
410 re_set_fastmap (fastmap, false, (int) c);
411 }
412 while (++c != 0);
413 }
414
415 else
416 {
417 /* ... Else catch all bytes which can start the mbchars. */
418 for (i = 0; i < cset->nmbchars; ++i)
419 {
420 char buf[256];
421 mbstate_t state;
422 memset (&state, '\0', sizeof (state));
423 if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
424 re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
425 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
426 {
427 if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
428 != (size_t) -1)
429 re_set_fastmap (fastmap, false, *(unsigned char *) buf);
430 }
431 }
432 }
433 }
434#endif /* RE_ENABLE_I18N */
435 else if (type == OP_PERIOD
436#ifdef RE_ENABLE_I18N
437 || type == OP_UTF8_PERIOD
438#endif /* RE_ENABLE_I18N */
439 || type == END_OF_RE)
440 {
441 memset (fastmap, '\1', sizeof (char) * SBC_MAX);
442 if (type == END_OF_RE)
443 bufp->can_be_null = 1;
444 return;
445 }
446 }
447}
448
449/* Entry point for POSIX code. */
450/* regcomp takes a regular expression as a string and compiles it.
451
452 PREG is a regex_t *. We do not expect any fields to be initialized,
453 since POSIX says we shouldn't. Thus, we set
454
455 `buffer' to the compiled pattern;
456 `used' to the length of the compiled pattern;
457 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
458 REG_EXTENDED bit in CFLAGS is set; otherwise, to
459 RE_SYNTAX_POSIX_BASIC;
460 `newline_anchor' to REG_NEWLINE being set in CFLAGS;
461 `fastmap' to an allocated space for the fastmap;
462 `fastmap_accurate' to zero;
463 `re_nsub' to the number of subexpressions in PATTERN.
464
465 PATTERN is the address of the pattern string.
466
467 CFLAGS is a series of bits which affect compilation.
468
469 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
470 use POSIX basic syntax.
471
472 If REG_NEWLINE is set, then . and [^...] don't match newline.
473 Also, regexec will try a match beginning after every newline.
474
475 If REG_ICASE is set, then we considers upper- and lowercase
476 versions of letters to be equivalent when matching.
477
478 If REG_NOSUB is set, then when PREG is passed to regexec, that
479 routine will report only success or failure, and nothing about the
480 registers.
481
482 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
483 the return codes and their meanings.) */
484
485int
486regcomp (regex_t *__restrict preg,
487 const char *__restrict pattern,
488 int cflags)
489{
490 reg_errcode_t ret;
491 reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
492 : RE_SYNTAX_POSIX_BASIC);
493
494 preg->buffer = NULL;
495 preg->allocated = 0;
496 preg->used = 0;
497
498 /* Try to allocate space for the fastmap. */
499 preg->fastmap = re_malloc (char, SBC_MAX);
500 if (BE (preg->fastmap == NULL, 0))
501 return REG_ESPACE;
502
503 syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
504
505 /* If REG_NEWLINE is set, newlines are treated differently. */
506 if (cflags & REG_NEWLINE)
507 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
508 syntax &= ~RE_DOT_NEWLINE;
509 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
510 /* It also changes the matching behavior. */
511 preg->newline_anchor = 1;
512 }
513 else
514 preg->newline_anchor = 0;
515 preg->no_sub = !!(cflags & REG_NOSUB);
516 preg->translate = NULL;
517
518 ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
519
520 /* POSIX doesn't distinguish between an unmatched open-group and an
521 unmatched close-group: both are REG_EPAREN. */
522 if (ret == REG_ERPAREN)
523 ret = REG_EPAREN;
524
525 /* We have already checked preg->fastmap != NULL. */
526 if (BE (ret == REG_NOERROR, 1))
527 /* Compute the fastmap now, since regexec cannot modify the pattern
528 buffer. This function never fails in this implementation. */
529 (void) re_compile_fastmap (preg);
530 else
531 {
532 /* Some error occurred while compiling the expression. */
533 re_free (preg->fastmap);
534 preg->fastmap = NULL;
535 }
536
537 return (int) ret;
538}
539#ifdef _LIBC
540weak_alias (__regcomp, regcomp)
541#endif
542
543/* Returns a message corresponding to an error code, ERRCODE, returned
544 from either regcomp or regexec. We don't use PREG here. */
545
546size_t
547regerror(int errcode, UNUSED_PARAM const regex_t *__restrict preg,
548 char *__restrict errbuf, size_t errbuf_size)
549{
550 const char *msg;
551 size_t msg_size;
552
553 if (BE (errcode < 0
554 || errcode >= (int) (sizeof (__re_error_msgid_idx)
555 / sizeof (__re_error_msgid_idx[0])), 0))
556 /* Only error codes returned by the rest of the code should be passed
557 to this routine. If we are given anything else, or if other regex
558 code generates an invalid error code, then the program has a bug.
559 Dump core so we can fix it. */
560 abort ();
561
562 msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
563
564 msg_size = strlen (msg) + 1; /* Includes the null. */
565
566 if (BE (errbuf_size != 0, 1))
567 {
568 if (BE (msg_size > errbuf_size, 0))
569 {
570 memcpy (errbuf, msg, errbuf_size - 1);
571 errbuf[errbuf_size - 1] = 0;
572 }
573 else
574 memcpy (errbuf, msg, msg_size);
575 }
576
577 return msg_size;
578}
579#ifdef _LIBC
580weak_alias (__regerror, regerror)
581#endif
582
583
584#ifdef RE_ENABLE_I18N
585/* This static array is used for the map to single-byte characters when
586 UTF-8 is used. Otherwise we would allocate memory just to initialize
587 it the same all the time. UTF-8 is the preferred encoding so this is
588 a worthwhile optimization. */
589#if __GNUC__ >= 3
590static const bitset_t utf8_sb_map = {
591 /* Set the first 128 bits. */
592 [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
593};
594#else /* ! (__GNUC__ >= 3) */
595static bitset_t utf8_sb_map;
596#endif /* __GNUC__ >= 3 */
597#endif /* RE_ENABLE_I18N */
598
599
600static void
601free_dfa_content (re_dfa_t *dfa)
602{
603 int i, j;
604
605 if (dfa->nodes)
606 for (i = 0; i < dfa->nodes_len; ++i)
607 free_token (dfa->nodes + i);
608 re_free (dfa->nexts);
609 for (i = 0; i < dfa->nodes_len; ++i)
610 {
611 if (dfa->eclosures != NULL)
612 re_node_set_free (dfa->eclosures + i);
613 if (dfa->inveclosures != NULL)
614 re_node_set_free (dfa->inveclosures + i);
615 if (dfa->edests != NULL)
616 re_node_set_free (dfa->edests + i);
617 }
618 re_free (dfa->edests);
619 re_free (dfa->eclosures);
620 re_free (dfa->inveclosures);
621 re_free (dfa->nodes);
622
623 if (dfa->state_table)
624 for (i = 0; i <= dfa->state_hash_mask; ++i)
625 {
626 struct re_state_table_entry *entry = dfa->state_table + i;
627 for (j = 0; j < entry->num; ++j)
628 {
629 re_dfastate_t *state = entry->array[j];
630 free_state (state);
631 }
632 re_free (entry->array);
633 }
634 re_free (dfa->state_table);
635#ifdef RE_ENABLE_I18N
636 if (dfa->sb_char != utf8_sb_map)
637 re_free (dfa->sb_char);
638#endif
639 re_free (dfa->subexp_map);
640#ifdef DEBUG
641 re_free (dfa->re_str);
642#endif
643
644 re_free (dfa);
645}
646
647
648/* Free dynamically allocated space used by PREG. */
649
650void
651regfree (regex_t *preg)
652{
653 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
654 if (BE (dfa != NULL, 1))
655 free_dfa_content (dfa);
656 preg->buffer = NULL;
657 preg->allocated = 0;
658
659 re_free (preg->fastmap);
660 preg->fastmap = NULL;
661
662 re_free (preg->translate);
663 preg->translate = NULL;
664}
665#ifdef _LIBC
666weak_alias (__regfree, regfree)
667#endif
668
669/* Entry points compatible with 4.2 BSD regex library. We don't define
670 them unless specifically requested. */
671
672#if defined _REGEX_RE_COMP || defined _LIBC
673
674/* BSD has one and only one pattern buffer. */
675static struct re_pattern_buffer re_comp_buf;
676
677char *
678# ifdef _LIBC
679/* Make these definitions weak in libc, so POSIX programs can redefine
680 these names if they don't use our functions, and still use
681 regcomp/regexec above without link errors. */
682weak_function
683# endif
684re_comp (s)
685 const char *s;
686{
687 reg_errcode_t ret;
688 char *fastmap;
689
690 if (!s)
691 {
692 if (!re_comp_buf.buffer)
693 return gettext ("No previous regular expression");
694 return 0;
695 }
696
697 if (re_comp_buf.buffer)
698 {
699 fastmap = re_comp_buf.fastmap;
700 re_comp_buf.fastmap = NULL;
701 __regfree (&re_comp_buf);
702 memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
703 re_comp_buf.fastmap = fastmap;
704 }
705
706 if (re_comp_buf.fastmap == NULL)
707 {
708 re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
709 if (re_comp_buf.fastmap == NULL)
710 return (char *) gettext (__re_error_msgid
711 + __re_error_msgid_idx[(int) REG_ESPACE]);
712 }
713
714 /* Since `re_exec' always passes NULL for the `regs' argument, we
715 don't need to initialize the pattern buffer fields which affect it. */
716
717 /* Match anchors at newlines. */
718 re_comp_buf.newline_anchor = 1;
719
720 ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
721
722 if (!ret)
723 return NULL;
724
725 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
726 return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
727}
728
729#ifdef _LIBC
730libc_freeres_fn (free_mem)
731{
732 __regfree (&re_comp_buf);
733}
734#endif
735
736#endif /* _REGEX_RE_COMP */
737
738/* Internal entry point.
739 Compile the regular expression PATTERN, whose length is LENGTH.
740 SYNTAX indicate regular expression's syntax. */
741
742static reg_errcode_t
743re_compile_internal (regex_t *preg, const char * pattern, size_t length,
744 reg_syntax_t syntax)
745{
746 reg_errcode_t err = REG_NOERROR;
747 re_dfa_t *dfa;
748 re_string_t regexp;
749
750 /* Initialize the pattern buffer. */
751 preg->fastmap_accurate = 0;
752 preg->syntax = syntax;
753 preg->not_bol = preg->not_eol = 0;
754 preg->used = 0;
755 preg->re_nsub = 0;
756 preg->can_be_null = 0;
757 preg->regs_allocated = REGS_UNALLOCATED;
758
759 /* Initialize the dfa. */
760 dfa = (re_dfa_t *) preg->buffer;
761 if (BE (preg->allocated < sizeof (re_dfa_t), 0))
762 {
763 /* If zero allocated, but buffer is non-null, try to realloc
764 enough space. This loses if buffer's address is bogus, but
765 that is the user's responsibility. If ->buffer is NULL this
766 is a simple allocation. */
767 dfa = re_realloc (preg->buffer, re_dfa_t, 1);
768 if (dfa == NULL)
769 return REG_ESPACE;
770 preg->allocated = sizeof (re_dfa_t);
771 preg->buffer = (unsigned char *) dfa;
772 }
773 preg->used = sizeof (re_dfa_t);
774
775 err = init_dfa (dfa, length);
776 if (BE (err != REG_NOERROR, 0))
777 {
778 free_dfa_content (dfa);
779 preg->buffer = NULL;
780 preg->allocated = 0;
781 return err;
782 }
783#ifdef DEBUG
784 /* Note: length+1 will not overflow since it is checked in init_dfa. */
785 dfa->re_str = re_malloc (char, length + 1);
786 strncpy (dfa->re_str, pattern, length + 1);
787#endif
788
789 __libc_lock_init (dfa->lock);
790
791 err = re_string_construct (&regexp, pattern, length, preg->translate,
792 syntax & RE_ICASE, dfa);
793 if (BE (err != REG_NOERROR, 0))
794 {
795 re_compile_internal_free_return:
796 free_workarea_compile (preg);
797 re_string_destruct (&regexp);
798 free_dfa_content (dfa);
799 preg->buffer = NULL;
800 preg->allocated = 0;
801 return err;
802 }
803
804 /* Parse the regular expression, and build a structure tree. */
805 preg->re_nsub = 0;
806 dfa->str_tree = parse (&regexp, preg, syntax, &err);
807 if (BE (dfa->str_tree == NULL, 0))
808 goto re_compile_internal_free_return;
809
810 /* Analyze the tree and create the nfa. */
811 err = analyze (preg);
812 if (BE (err != REG_NOERROR, 0))
813 goto re_compile_internal_free_return;
814
815#ifdef RE_ENABLE_I18N
816 /* If possible, do searching in single byte encoding to speed things up. */
817 if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
818 optimize_utf8 (dfa);
819#endif
820
821 /* Then create the initial state of the dfa. */
822 err = create_initial_state (dfa);
823
824 /* Release work areas. */
825 free_workarea_compile (preg);
826 re_string_destruct (&regexp);
827
828 if (BE (err != REG_NOERROR, 0))
829 {
830 free_dfa_content (dfa);
831 preg->buffer = NULL;
832 preg->allocated = 0;
833 }
834
835 return err;
836}
837
838/* Initialize DFA. We use the length of the regular expression PAT_LEN
839 as the initial length of some arrays. */
840
841static reg_errcode_t
842init_dfa (re_dfa_t *dfa, size_t pat_len)
843{
844 unsigned int table_size;
845#ifndef _LIBC
846 const char *codeset_name;
847#endif
848
849 memset (dfa, '\0', sizeof (re_dfa_t));
850
851 /* Force allocation of str_tree_storage the first time. */
852 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
853
854 /* Avoid overflows. */
855 if (pat_len == SIZE_MAX)
856 return REG_ESPACE;
857
858 dfa->nodes_alloc = pat_len + 1;
859 dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
860
861 /* table_size = 2 ^ ceil(log pat_len) */
862 for (table_size = 1; ; table_size <<= 1)
863 if (table_size > pat_len)
864 break;
865
866 dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
867 dfa->state_hash_mask = table_size - 1;
868
869 dfa->mb_cur_max = MB_CUR_MAX;
870#ifdef _LIBC
871 if (dfa->mb_cur_max == 6
872 && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
873 dfa->is_utf8 = 1;
874 dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
875 != 0);
876#else
877# ifdef HAVE_LANGINFO_CODESET
878 codeset_name = nl_langinfo (CODESET);
879# else
880 codeset_name = getenv ("LC_ALL");
881 if (codeset_name == NULL || codeset_name[0] == '\0')
882 codeset_name = getenv ("LC_CTYPE");
883 if (codeset_name == NULL || codeset_name[0] == '\0')
884 codeset_name = getenv ("LANG");
885 if (codeset_name == NULL)
886 codeset_name = "";
887 else if (strchr (codeset_name, '.') != NULL)
888 codeset_name = strchr (codeset_name, '.') + 1;
889# endif
890
891 /* strcasecmp isn't a standard interface. brute force check */
892#if 0
893 if (strcasecmp (codeset_name, "UTF-8") == 0
894 || strcasecmp (codeset_name, "UTF8") == 0)
895 dfa->is_utf8 = 1;
896#else
897 if ( (codeset_name[0] == 'U' || codeset_name[0] == 'u')
898 && (codeset_name[1] == 'T' || codeset_name[1] == 't')
899 && (codeset_name[2] == 'F' || codeset_name[2] == 'f')
900 && (codeset_name[3] == '-'
901 ? codeset_name[4] == '8' && codeset_name[5] == '\0'
902 : codeset_name[3] == '8' && codeset_name[4] == '\0'))
903 dfa->is_utf8 = 1;
904#endif
905
906 /* We check exhaustively in the loop below if this charset is a
907 superset of ASCII. */
908 dfa->map_notascii = 0;
909#endif
910
911#ifdef RE_ENABLE_I18N
912 if (dfa->mb_cur_max > 1)
913 {
914 if (dfa->is_utf8)
915 {
916#if !defined(__GNUC__) || __GNUC__ < 3
917 static short utf8_sb_map_inited = 0;
918
919 if (! utf8_sb_map_inited)
920 {
921 int i;
922
923 utf8_sb_map_inited = 0;
924 for (i = 0; i <= 0x80 / BITSET_WORD_BITS - 1; i++)
925 utf8_sb_map[i] = BITSET_WORD_MAX;
926 }
927#endif
928 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
929 }
930 else
931 {
932 int i, j, ch;
933
934 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
935 if (BE (dfa->sb_char == NULL, 0))
936 return REG_ESPACE;
937
938 /* Set the bits corresponding to single byte chars. */
939 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
940 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
941 {
942 wint_t wch = __btowc (ch);
943 if (wch != WEOF)
944 dfa->sb_char[i] |= (bitset_word_t) 1 << j;
945# ifndef _LIBC
946 if (isascii (ch) && wch != ch)
947 dfa->map_notascii = 1;
948# endif
949 }
950 }
951 }
952#endif
953
954 if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
955 return REG_ESPACE;
956 return REG_NOERROR;
957}
958
959/* Initialize WORD_CHAR table, which indicate which character is
960 "word". In this case "word" means that it is the word construction
961 character used by some operators like "\<", "\>", etc. */
962
963static void
964internal_function
965init_word_char (re_dfa_t *dfa)
966{
967 int i, j, ch;
968 dfa->word_ops_used = 1;
969 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
970 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
971 if (isalnum (ch) || ch == '_')
972 dfa->word_char[i] |= (bitset_word_t) 1 << j;
973}
974
975/* Free the work area which are only used while compiling. */
976
977static void
978free_workarea_compile (regex_t *preg)
979{
980 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
981 bin_tree_storage_t *storage, *next;
982 for (storage = dfa->str_tree_storage; storage; storage = next)
983 {
984 next = storage->next;
985 re_free (storage);
986 }
987 dfa->str_tree_storage = NULL;
988 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
989 dfa->str_tree = NULL;
990 re_free (dfa->org_indices);
991 dfa->org_indices = NULL;
992}
993
994/* Create initial states for all contexts. */
995
996static reg_errcode_t
997create_initial_state (re_dfa_t *dfa)
998{
999 int first, i;
1000 reg_errcode_t err;
1001 re_node_set init_nodes;
1002
1003 /* Initial states have the epsilon closure of the node which is
1004 the first node of the regular expression. */
1005 first = dfa->str_tree->first->node_idx;
1006 dfa->init_node = first;
1007 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
1008 if (BE (err != REG_NOERROR, 0))
1009 return err;
1010
1011 /* The back-references which are in initial states can epsilon transit,
1012 since in this case all of the subexpressions can be null.
1013 Then we add epsilon closures of the nodes which are the next nodes of
1014 the back-references. */
1015 if (dfa->nbackref > 0)
1016 for (i = 0; i < init_nodes.nelem; ++i)
1017 {
1018 int node_idx = init_nodes.elems[i];
1019 re_token_type_t type = dfa->nodes[node_idx].type;
1020
1021 int clexp_idx;
1022 if (type != OP_BACK_REF)
1023 continue;
1024 for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
1025 {
1026 re_token_t *clexp_node;
1027 clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
1028 if (clexp_node->type == OP_CLOSE_SUBEXP
1029 && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
1030 break;
1031 }
1032 if (clexp_idx == init_nodes.nelem)
1033 continue;
1034
1035 if (type == OP_BACK_REF)
1036 {
1037 int dest_idx = dfa->edests[node_idx].elems[0];
1038 if (!re_node_set_contains (&init_nodes, dest_idx))
1039 {
1040 err = re_node_set_merge (&init_nodes,
1041 dfa->eclosures + dest_idx);
1042 if (err != REG_NOERROR)
1043 return err;
1044 i = 0;
1045 }
1046 }
1047 }
1048
1049 /* It must be the first time to invoke acquire_state. */
1050 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
1051 /* We don't check ERR here, since the initial state must not be NULL. */
1052 if (BE (dfa->init_state == NULL, 0))
1053 return err;
1054 if (dfa->init_state->has_constraint)
1055 {
1056 dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
1057 CONTEXT_WORD);
1058 dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
1059 CONTEXT_NEWLINE);
1060 dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
1061 &init_nodes,
1062 CONTEXT_NEWLINE
1063 | CONTEXT_BEGBUF);
1064 if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
1065 || dfa->init_state_begbuf == NULL, 0))
1066 return err;
1067 }
1068 else
1069 dfa->init_state_word = dfa->init_state_nl
1070 = dfa->init_state_begbuf = dfa->init_state;
1071
1072 re_node_set_free (&init_nodes);
1073 return REG_NOERROR;
1074}
1075
1076#ifdef RE_ENABLE_I18N
1077/* If it is possible to do searching in single byte encoding instead of UTF-8
1078 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
1079 DFA nodes where needed. */
1080
1081static void
1082optimize_utf8 (re_dfa_t *dfa)
1083{
1084 int node, i, mb_chars = 0, has_period = 0;
1085
1086 for (node = 0; node < dfa->nodes_len; ++node)
1087 switch (dfa->nodes[node].type)
1088 {
1089 case CHARACTER:
1090 if (dfa->nodes[node].opr.c >= 0x80)
1091 mb_chars = 1;
1092 break;
1093 case ANCHOR:
1094 switch (dfa->nodes[node].opr.ctx_type)
1095 {
1096 case LINE_FIRST:
1097 case LINE_LAST:
1098 case BUF_FIRST:
1099 case BUF_LAST:
1100 break;
1101 default:
1102 /* Word anchors etc. cannot be handled. It's okay to test
1103 opr.ctx_type since constraints (for all DFA nodes) are
1104 created by ORing one or more opr.ctx_type values. */
1105 return;
1106 }
1107 break;
1108 case OP_PERIOD:
1109 has_period = 1;
1110 break;
1111 case OP_BACK_REF:
1112 case OP_ALT:
1113 case END_OF_RE:
1114 case OP_DUP_ASTERISK:
1115 case OP_OPEN_SUBEXP:
1116 case OP_CLOSE_SUBEXP:
1117 break;
1118 case COMPLEX_BRACKET:
1119 return;
1120 case SIMPLE_BRACKET:
1121 /* Just double check. The non-ASCII range starts at 0x80. */
1122 assert (0x80 % BITSET_WORD_BITS == 0);
1123 for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
1124 if (dfa->nodes[node].opr.sbcset[i])
1125 return;
1126 break;
1127 default:
1128 abort ();
1129 }
1130
1131 if (mb_chars || has_period)
1132 for (node = 0; node < dfa->nodes_len; ++node)
1133 {
1134 if (dfa->nodes[node].type == CHARACTER
1135 && dfa->nodes[node].opr.c >= 0x80)
1136 dfa->nodes[node].mb_partial = 0;
1137 else if (dfa->nodes[node].type == OP_PERIOD)
1138 dfa->nodes[node].type = OP_UTF8_PERIOD;
1139 }
1140
1141 /* The search can be in single byte locale. */
1142 dfa->mb_cur_max = 1;
1143 dfa->is_utf8 = 0;
1144 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
1145}
1146#endif
1147
1148/* Analyze the structure tree, and calculate "first", "next", "edest",
1149 "eclosure", and "inveclosure". */
1150
1151static reg_errcode_t
1152analyze (regex_t *preg)
1153{
1154 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1155 reg_errcode_t ret;
1156
1157 /* Allocate arrays. */
1158 dfa->nexts = re_malloc (int, dfa->nodes_alloc);
1159 dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
1160 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
1161 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
1162 if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
1163 || dfa->eclosures == NULL, 0))
1164 return REG_ESPACE;
1165
1166 dfa->subexp_map = re_malloc (int, preg->re_nsub);
1167 if (dfa->subexp_map != NULL)
1168 {
1169 int i;
1170 for (i = 0; i < preg->re_nsub; i++)
1171 dfa->subexp_map[i] = i;
1172 preorder (dfa->str_tree, optimize_subexps, dfa);
1173 for (i = 0; i < preg->re_nsub; i++)
1174 if (dfa->subexp_map[i] != i)
1175 break;
1176 if (i == preg->re_nsub)
1177 {
1178 free (dfa->subexp_map);
1179 dfa->subexp_map = NULL;
1180 }
1181 }
1182
1183 ret = postorder (dfa->str_tree, lower_subexps, preg);
1184 if (BE (ret != REG_NOERROR, 0))
1185 return ret;
1186 ret = postorder (dfa->str_tree, calc_first, dfa);
1187 if (BE (ret != REG_NOERROR, 0))
1188 return ret;
1189 preorder (dfa->str_tree, calc_next, dfa);
1190 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
1191 if (BE (ret != REG_NOERROR, 0))
1192 return ret;
1193 ret = calc_eclosure (dfa);
1194 if (BE (ret != REG_NOERROR, 0))
1195 return ret;
1196
1197 /* We only need this during the prune_impossible_nodes pass in regexec.c;
1198 skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
1199 if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
1200 || dfa->nbackref)
1201 {
1202 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
1203 if (BE (dfa->inveclosures == NULL, 0))
1204 return REG_ESPACE;
1205 ret = calc_inveclosure (dfa);
1206 }
1207
1208 return ret;
1209}
1210
1211/* Our parse trees are very unbalanced, so we cannot use a stack to
1212 implement parse tree visits. Instead, we use parent pointers and
1213 some hairy code in these two functions. */
1214static reg_errcode_t
1215postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1216 void *extra)
1217{
1218 bin_tree_t *node, *prev;
1219
1220 for (node = root; ; )
1221 {
1222 /* Descend down the tree, preferably to the left (or to the right
1223 if that's the only child). */
1224 while (node->left || node->right)
1225 if (node->left)
1226 node = node->left;
1227 else
1228 node = node->right;
1229
1230 do
1231 {
1232 reg_errcode_t err = fn (extra, node);
1233 if (BE (err != REG_NOERROR, 0))
1234 return err;
1235 if (node->parent == NULL)
1236 return REG_NOERROR;
1237 prev = node;
1238 node = node->parent;
1239 }
1240 /* Go up while we have a node that is reached from the right. */
1241 while (node->right == prev || node->right == NULL);
1242 node = node->right;
1243 }
1244}
1245
1246static reg_errcode_t
1247preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1248 void *extra)
1249{
1250 bin_tree_t *node;
1251
1252 for (node = root; ; )
1253 {
1254 reg_errcode_t err = fn (extra, node);
1255 if (BE (err != REG_NOERROR, 0))
1256 return err;
1257
1258 /* Go to the left node, or up and to the right. */
1259 if (node->left)
1260 node = node->left;
1261 else
1262 {
1263 bin_tree_t *prev = NULL;
1264 while (node->right == prev || node->right == NULL)
1265 {
1266 prev = node;
1267 node = node->parent;
1268 if (!node)
1269 return REG_NOERROR;
1270 }
1271 node = node->right;
1272 }
1273 }
1274}
1275
1276/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
1277 re_search_internal to map the inner one's opr.idx to this one's. Adjust
1278 backreferences as well. Requires a preorder visit. */
1279static reg_errcode_t
1280optimize_subexps (void *extra, bin_tree_t *node)
1281{
1282 re_dfa_t *dfa = (re_dfa_t *) extra;
1283
1284 if (node->token.type == OP_BACK_REF && dfa->subexp_map)
1285 {
1286 int idx = node->token.opr.idx;
1287 node->token.opr.idx = dfa->subexp_map[idx];
1288 dfa->used_bkref_map |= 1 << node->token.opr.idx;
1289 }
1290
1291 else if (node->token.type == SUBEXP
1292 && node->left && node->left->token.type == SUBEXP)
1293 {
1294 int other_idx = node->left->token.opr.idx;
1295
1296 node->left = node->left->left;
1297 if (node->left)
1298 node->left->parent = node;
1299
1300 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
1301 if (other_idx < BITSET_WORD_BITS)
1302 dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
1303 }
1304
1305 return REG_NOERROR;
1306}
1307
1308/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
1309 of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
1310static reg_errcode_t
1311lower_subexps (void *extra, bin_tree_t *node)
1312{
1313 regex_t *preg = (regex_t *) extra;
1314 reg_errcode_t err = REG_NOERROR;
1315
1316 if (node->left && node->left->token.type == SUBEXP)
1317 {
1318 node->left = lower_subexp (&err, preg, node->left);
1319 if (node->left)
1320 node->left->parent = node;
1321 }
1322 if (node->right && node->right->token.type == SUBEXP)
1323 {
1324 node->right = lower_subexp (&err, preg, node->right);
1325 if (node->right)
1326 node->right->parent = node;
1327 }
1328
1329 return err;
1330}
1331
1332static bin_tree_t *
1333lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
1334{
1335 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1336 bin_tree_t *body = node->left;
1337 bin_tree_t *op, *cls, *tree1, *tree;
1338
1339 if (preg->no_sub
1340 /* We do not optimize empty subexpressions, because otherwise we may
1341 have bad CONCAT nodes with NULL children. This is obviously not
1342 very common, so we do not lose much. An example that triggers
1343 this case is the sed "script" /\(\)/x. */
1344 && node->left != NULL
1345 && (node->token.opr.idx >= BITSET_WORD_BITS
1346 || !(dfa->used_bkref_map
1347 & ((bitset_word_t) 1 << node->token.opr.idx))))
1348 return node->left;
1349
1350 /* Convert the SUBEXP node to the concatenation of an
1351 OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
1352 op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
1353 cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
1354 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
1355 tree = create_tree (dfa, op, tree1, CONCAT);
1356 if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
1357 {
1358 *err = REG_ESPACE;
1359 return NULL;
1360 }
1361
1362 op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
1363 op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
1364 return tree;
1365}
1366
1367/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
1368 nodes. Requires a postorder visit. */
1369static reg_errcode_t
1370calc_first (void *extra, bin_tree_t *node)
1371{
1372 re_dfa_t *dfa = (re_dfa_t *) extra;
1373 if (node->token.type == CONCAT)
1374 {
1375 node->first = node->left->first;
1376 node->node_idx = node->left->node_idx;
1377 }
1378 else
1379 {
1380 node->first = node;
1381 node->node_idx = re_dfa_add_node (dfa, node->token);
1382 if (BE (node->node_idx == -1, 0))
1383 return REG_ESPACE;
1384 if (node->token.type == ANCHOR)
1385 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
1386 }
1387 return REG_NOERROR;
1388}
1389
1390/* Pass 2: compute NEXT on the tree. Preorder visit. */
1391static reg_errcode_t
1392calc_next (UNUSED_PARAM void *extra, bin_tree_t *node)
1393{
1394 switch (node->token.type)
1395 {
1396 case OP_DUP_ASTERISK:
1397 node->left->next = node;
1398 break;
1399 case CONCAT:
1400 node->left->next = node->right->first;
1401 node->right->next = node->next;
1402 break;
1403 default:
1404 if (node->left)
1405 node->left->next = node->next;
1406 if (node->right)
1407 node->right->next = node->next;
1408 break;
1409 }
1410 return REG_NOERROR;
1411}
1412
1413/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
1414static reg_errcode_t
1415link_nfa_nodes (void *extra, bin_tree_t *node)
1416{
1417 re_dfa_t *dfa = (re_dfa_t *) extra;
1418 int idx = node->node_idx;
1419 reg_errcode_t err = REG_NOERROR;
1420
1421 switch (node->token.type)
1422 {
1423 case CONCAT:
1424 break;
1425
1426 case END_OF_RE:
1427 assert (node->next == NULL);
1428 break;
1429
1430 case OP_DUP_ASTERISK:
1431 case OP_ALT:
1432 {
1433 int left, right;
1434 dfa->has_plural_match = 1;
1435 if (node->left != NULL)
1436 left = node->left->first->node_idx;
1437 else
1438 left = node->next->node_idx;
1439 if (node->right != NULL)
1440 right = node->right->first->node_idx;
1441 else
1442 right = node->next->node_idx;
1443 assert (left > -1);
1444 assert (right > -1);
1445 err = re_node_set_init_2 (dfa->edests + idx, left, right);
1446 }
1447 break;
1448
1449 case ANCHOR:
1450 case OP_OPEN_SUBEXP:
1451 case OP_CLOSE_SUBEXP:
1452 err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
1453 break;
1454
1455 case OP_BACK_REF:
1456 dfa->nexts[idx] = node->next->node_idx;
1457 if (node->token.type == OP_BACK_REF)
1458 err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
1459 break;
1460
1461 default:
1462 assert (!IS_EPSILON_NODE (node->token.type));
1463 dfa->nexts[idx] = node->next->node_idx;
1464 break;
1465 }
1466
1467 return err;
1468}
1469
1470/* Duplicate the epsilon closure of the node ROOT_NODE.
1471 Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
1472 to their own constraint. */
1473
1474static reg_errcode_t
1475internal_function
1476duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
1477 int root_node, unsigned int init_constraint)
1478{
1479 int org_node, clone_node, ret;
1480 unsigned int constraint = init_constraint;
1481 for (org_node = top_org_node, clone_node = top_clone_node;;)
1482 {
1483 int org_dest, clone_dest;
1484 if (dfa->nodes[org_node].type == OP_BACK_REF)
1485 {
1486 /* If the back reference epsilon-transit, its destination must
1487 also have the constraint. Then duplicate the epsilon closure
1488 of the destination of the back reference, and store it in
1489 edests of the back reference. */
1490 org_dest = dfa->nexts[org_node];
1491 re_node_set_empty (dfa->edests + clone_node);
1492 clone_dest = duplicate_node (dfa, org_dest, constraint);
1493 if (BE (clone_dest == -1, 0))
1494 return REG_ESPACE;
1495 dfa->nexts[clone_node] = dfa->nexts[org_node];
1496 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1497 if (BE (ret < 0, 0))
1498 return REG_ESPACE;
1499 }
1500 else if (dfa->edests[org_node].nelem == 0)
1501 {
1502 /* In case of the node can't epsilon-transit, don't duplicate the
1503 destination and store the original destination as the
1504 destination of the node. */
1505 dfa->nexts[clone_node] = dfa->nexts[org_node];
1506 break;
1507 }
1508 else if (dfa->edests[org_node].nelem == 1)
1509 {
1510 /* In case of the node can epsilon-transit, and it has only one
1511 destination. */
1512 org_dest = dfa->edests[org_node].elems[0];
1513 re_node_set_empty (dfa->edests + clone_node);
1514 /* If the node is root_node itself, it means the epsilon clsoure
1515 has a loop. Then tie it to the destination of the root_node. */
1516 if (org_node == root_node && clone_node != org_node)
1517 {
1518 ret = re_node_set_insert (dfa->edests + clone_node, org_dest);
1519 if (BE (ret < 0, 0))
1520 return REG_ESPACE;
1521 break;
1522 }
1523 /* In case of the node has another constraint, add it. */
1524 constraint |= dfa->nodes[org_node].constraint;
1525 clone_dest = duplicate_node (dfa, org_dest, constraint);
1526 if (BE (clone_dest == -1, 0))
1527 return REG_ESPACE;
1528 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1529 if (BE (ret < 0, 0))
1530 return REG_ESPACE;
1531 }
1532 else /* dfa->edests[org_node].nelem == 2 */
1533 {
1534 /* In case of the node can epsilon-transit, and it has two
1535 destinations. In the bin_tree_t and DFA, that's '|' and '*'. */
1536 org_dest = dfa->edests[org_node].elems[0];
1537 re_node_set_empty (dfa->edests + clone_node);
1538 /* Search for a duplicated node which satisfies the constraint. */
1539 clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1540 if (clone_dest == -1)
1541 {
1542 /* There is no such duplicated node, create a new one. */
1543 reg_errcode_t err;
1544 clone_dest = duplicate_node (dfa, org_dest, constraint);
1545 if (BE (clone_dest == -1, 0))
1546 return REG_ESPACE;
1547 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1548 if (BE (ret < 0, 0))
1549 return REG_ESPACE;
1550 err = duplicate_node_closure (dfa, org_dest, clone_dest,
1551 root_node, constraint);
1552 if (BE (err != REG_NOERROR, 0))
1553 return err;
1554 }
1555 else
1556 {
1557 /* There is a duplicated node which satisfies the constraint,
1558 use it to avoid infinite loop. */
1559 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1560 if (BE (ret < 0, 0))
1561 return REG_ESPACE;
1562 }
1563
1564 org_dest = dfa->edests[org_node].elems[1];
1565 clone_dest = duplicate_node (dfa, org_dest, constraint);
1566 if (BE (clone_dest == -1, 0))
1567 return REG_ESPACE;
1568 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1569 if (BE (ret < 0, 0))
1570 return REG_ESPACE;
1571 }
1572 org_node = org_dest;
1573 clone_node = clone_dest;
1574 }
1575 return REG_NOERROR;
1576}
1577
1578/* Search for a node which is duplicated from the node ORG_NODE, and
1579 satisfies the constraint CONSTRAINT. */
1580
1581static int
1582search_duplicated_node (const re_dfa_t *dfa, int org_node,
1583 unsigned int constraint)
1584{
1585 int idx;
1586 for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
1587 {
1588 if (org_node == dfa->org_indices[idx]
1589 && constraint == dfa->nodes[idx].constraint)
1590 return idx; /* Found. */
1591 }
1592 return -1; /* Not found. */
1593}
1594
1595/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
1596 Return the index of the new node, or -1 if insufficient storage is
1597 available. */
1598
1599static int
1600duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
1601{
1602 int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
1603 if (BE (dup_idx != -1, 1))
1604 {
1605 dfa->nodes[dup_idx].constraint = constraint;
1606 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
1607 dfa->nodes[dup_idx].duplicated = 1;
1608
1609 /* Store the index of the original node. */
1610 dfa->org_indices[dup_idx] = org_idx;
1611 }
1612 return dup_idx;
1613}
1614
1615static reg_errcode_t
1616calc_inveclosure (re_dfa_t *dfa)
1617{
1618 int src, idx, ret;
1619 for (idx = 0; idx < dfa->nodes_len; ++idx)
1620 re_node_set_init_empty (dfa->inveclosures + idx);
1621
1622 for (src = 0; src < dfa->nodes_len; ++src)
1623 {
1624 int *elems = dfa->eclosures[src].elems;
1625 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
1626 {
1627 ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
1628 if (BE (ret == -1, 0))
1629 return REG_ESPACE;
1630 }
1631 }
1632
1633 return REG_NOERROR;
1634}
1635
1636/* Calculate "eclosure" for all the node in DFA. */
1637
1638static reg_errcode_t
1639calc_eclosure (re_dfa_t *dfa)
1640{
1641 int node_idx, incomplete;
1642#ifdef DEBUG
1643 assert (dfa->nodes_len > 0);
1644#endif
1645 incomplete = 0;
1646 /* For each nodes, calculate epsilon closure. */
1647 for (node_idx = 0; ; ++node_idx)
1648 {
1649 reg_errcode_t err;
1650 re_node_set eclosure_elem;
1651 if (node_idx == dfa->nodes_len)
1652 {
1653 if (!incomplete)
1654 break;
1655 incomplete = 0;
1656 node_idx = 0;
1657 }
1658
1659#ifdef DEBUG
1660 assert (dfa->eclosures[node_idx].nelem != -1);
1661#endif
1662
1663 /* If we have already calculated, skip it. */
1664 if (dfa->eclosures[node_idx].nelem != 0)
1665 continue;
1666 /* Calculate epsilon closure of `node_idx'. */
1667 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
1668 if (BE (err != REG_NOERROR, 0))
1669 return err;
1670
1671 if (dfa->eclosures[node_idx].nelem == 0)
1672 {
1673 incomplete = 1;
1674 re_node_set_free (&eclosure_elem);
1675 }
1676 }
1677 return REG_NOERROR;
1678}
1679
1680/* Calculate epsilon closure of NODE. */
1681
1682static reg_errcode_t
1683calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root)
1684{
1685 reg_errcode_t err;
1686 int i;
1687 re_node_set eclosure;
1688 int ret;
1689 int incomplete = 0;
1690 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
1691 if (BE (err != REG_NOERROR, 0))
1692 return err;
1693
1694 /* This indicates that we are calculating this node now.
1695 We reference this value to avoid infinite loop. */
1696 dfa->eclosures[node].nelem = -1;
1697
1698 /* If the current node has constraints, duplicate all nodes
1699 since they must inherit the constraints. */
1700 if (dfa->nodes[node].constraint
1701 && dfa->edests[node].nelem
1702 && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
1703 {
1704 err = duplicate_node_closure (dfa, node, node, node,
1705 dfa->nodes[node].constraint);
1706 if (BE (err != REG_NOERROR, 0))
1707 return err;
1708 }
1709
1710 /* Expand each epsilon destination nodes. */
1711 if (IS_EPSILON_NODE(dfa->nodes[node].type))
1712 for (i = 0; i < dfa->edests[node].nelem; ++i)
1713 {
1714 re_node_set eclosure_elem;
1715 int edest = dfa->edests[node].elems[i];
1716 /* If calculating the epsilon closure of `edest' is in progress,
1717 return intermediate result. */
1718 if (dfa->eclosures[edest].nelem == -1)
1719 {
1720 incomplete = 1;
1721 continue;
1722 }
1723 /* If we haven't calculated the epsilon closure of `edest' yet,
1724 calculate now. Otherwise use calculated epsilon closure. */
1725 if (dfa->eclosures[edest].nelem == 0)
1726 {
1727 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
1728 if (BE (err != REG_NOERROR, 0))
1729 return err;
1730 }
1731 else
1732 eclosure_elem = dfa->eclosures[edest];
1733 /* Merge the epsilon closure of `edest'. */
1734 err = re_node_set_merge (&eclosure, &eclosure_elem);
1735 if (BE (err != REG_NOERROR, 0))
1736 return err;
1737 /* If the epsilon closure of `edest' is incomplete,
1738 the epsilon closure of this node is also incomplete. */
1739 if (dfa->eclosures[edest].nelem == 0)
1740 {
1741 incomplete = 1;
1742 re_node_set_free (&eclosure_elem);
1743 }
1744 }
1745
1746 /* An epsilon closure includes itself. */
1747 ret = re_node_set_insert (&eclosure, node);
1748 if (BE (ret < 0, 0))
1749 return REG_ESPACE;
1750 if (incomplete && !root)
1751 dfa->eclosures[node].nelem = 0;
1752 else
1753 dfa->eclosures[node] = eclosure;
1754 *new_set = eclosure;
1755 return REG_NOERROR;
1756}
1757
1758/* Functions for token which are used in the parser. */
1759
1760/* Fetch a token from INPUT.
1761 We must not use this function inside bracket expressions. */
1762
1763static void
1764internal_function
1765fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
1766{
1767 re_string_skip_bytes (input, peek_token (result, input, syntax));
1768}
1769
1770/* Peek a token from INPUT, and return the length of the token.
1771 We must not use this function inside bracket expressions. */
1772
1773static int
1774internal_function
1775peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1776{
1777 unsigned char c;
1778
1779 if (re_string_eoi (input))
1780 {
1781 token->type = END_OF_RE;
1782 return 0;
1783 }
1784
1785 c = re_string_peek_byte (input, 0);
1786 token->opr.c = c;
1787
1788 token->word_char = 0;
1789#ifdef RE_ENABLE_I18N
1790 token->mb_partial = 0;
1791 if (input->mb_cur_max > 1 &&
1792 !re_string_first_byte (input, re_string_cur_idx (input)))
1793 {
1794 token->type = CHARACTER;
1795 token->mb_partial = 1;
1796 return 1;
1797 }
1798#endif
1799 if (c == '\\')
1800 {
1801 unsigned char c2;
1802 if (re_string_cur_idx (input) + 1 >= re_string_length (input))
1803 {
1804 token->type = BACK_SLASH;
1805 return 1;
1806 }
1807
1808 c2 = re_string_peek_byte_case (input, 1);
1809 token->opr.c = c2;
1810 token->type = CHARACTER;
1811#ifdef RE_ENABLE_I18N
1812 if (input->mb_cur_max > 1)
1813 {
1814 wint_t wc = re_string_wchar_at (input,
1815 re_string_cur_idx (input) + 1);
1816 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1817 }
1818 else
1819#endif
1820 token->word_char = IS_WORD_CHAR (c2) != 0;
1821
1822 switch (c2)
1823 {
1824 case '|':
1825 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
1826 token->type = OP_ALT;
1827 break;
1828 case '1': case '2': case '3': case '4': case '5':
1829 case '6': case '7': case '8': case '9':
1830 if (!(syntax & RE_NO_BK_REFS))
1831 {
1832 token->type = OP_BACK_REF;
1833 token->opr.idx = c2 - '1';
1834 }
1835 break;
1836 case '<':
1837 if (!(syntax & RE_NO_GNU_OPS))
1838 {
1839 token->type = ANCHOR;
1840 token->opr.ctx_type = WORD_FIRST;
1841 }
1842 break;
1843 case '>':
1844 if (!(syntax & RE_NO_GNU_OPS))
1845 {
1846 token->type = ANCHOR;
1847 token->opr.ctx_type = WORD_LAST;
1848 }
1849 break;
1850 case 'b':
1851 if (!(syntax & RE_NO_GNU_OPS))
1852 {
1853 token->type = ANCHOR;
1854 token->opr.ctx_type = WORD_DELIM;
1855 }
1856 break;
1857 case 'B':
1858 if (!(syntax & RE_NO_GNU_OPS))
1859 {
1860 token->type = ANCHOR;
1861 token->opr.ctx_type = NOT_WORD_DELIM;
1862 }
1863 break;
1864 case 'w':
1865 if (!(syntax & RE_NO_GNU_OPS))
1866 token->type = OP_WORD;
1867 break;
1868 case 'W':
1869 if (!(syntax & RE_NO_GNU_OPS))
1870 token->type = OP_NOTWORD;
1871 break;
1872 case 's':
1873 if (!(syntax & RE_NO_GNU_OPS))
1874 token->type = OP_SPACE;
1875 break;
1876 case 'S':
1877 if (!(syntax & RE_NO_GNU_OPS))
1878 token->type = OP_NOTSPACE;
1879 break;
1880 case '`':
1881 if (!(syntax & RE_NO_GNU_OPS))
1882 {
1883 token->type = ANCHOR;
1884 token->opr.ctx_type = BUF_FIRST;
1885 }
1886 break;
1887 case '\'':
1888 if (!(syntax & RE_NO_GNU_OPS))
1889 {
1890 token->type = ANCHOR;
1891 token->opr.ctx_type = BUF_LAST;
1892 }
1893 break;
1894 case '(':
1895 if (!(syntax & RE_NO_BK_PARENS))
1896 token->type = OP_OPEN_SUBEXP;
1897 break;
1898 case ')':
1899 if (!(syntax & RE_NO_BK_PARENS))
1900 token->type = OP_CLOSE_SUBEXP;
1901 break;
1902 case '+':
1903 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1904 token->type = OP_DUP_PLUS;
1905 break;
1906 case '?':
1907 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1908 token->type = OP_DUP_QUESTION;
1909 break;
1910 case '{':
1911 if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1912 token->type = OP_OPEN_DUP_NUM;
1913 break;
1914 case '}':
1915 if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1916 token->type = OP_CLOSE_DUP_NUM;
1917 break;
1918 default:
1919 break;
1920 }
1921 return 2;
1922 }
1923
1924 token->type = CHARACTER;
1925#ifdef RE_ENABLE_I18N
1926 if (input->mb_cur_max > 1)
1927 {
1928 wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
1929 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1930 }
1931 else
1932#endif
1933 token->word_char = IS_WORD_CHAR (token->opr.c);
1934
1935 switch (c)
1936 {
1937 case '\n':
1938 if (syntax & RE_NEWLINE_ALT)
1939 token->type = OP_ALT;
1940 break;
1941 case '|':
1942 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
1943 token->type = OP_ALT;
1944 break;
1945 case '*':
1946 token->type = OP_DUP_ASTERISK;
1947 break;
1948 case '+':
1949 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1950 token->type = OP_DUP_PLUS;
1951 break;
1952 case '?':
1953 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1954 token->type = OP_DUP_QUESTION;
1955 break;
1956 case '{':
1957 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1958 token->type = OP_OPEN_DUP_NUM;
1959 break;
1960 case '}':
1961 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1962 token->type = OP_CLOSE_DUP_NUM;
1963 break;
1964 case '(':
1965 if (syntax & RE_NO_BK_PARENS)
1966 token->type = OP_OPEN_SUBEXP;
1967 break;
1968 case ')':
1969 if (syntax & RE_NO_BK_PARENS)
1970 token->type = OP_CLOSE_SUBEXP;
1971 break;
1972 case '[':
1973 token->type = OP_OPEN_BRACKET;
1974 break;
1975 case '.':
1976 token->type = OP_PERIOD;
1977 break;
1978 case '^':
1979 if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
1980 re_string_cur_idx (input) != 0)
1981 {
1982 char prev = re_string_peek_byte (input, -1);
1983 if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
1984 break;
1985 }
1986 token->type = ANCHOR;
1987 token->opr.ctx_type = LINE_FIRST;
1988 break;
1989 case '$':
1990 if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
1991 re_string_cur_idx (input) + 1 != re_string_length (input))
1992 {
1993 re_token_t next;
1994 re_string_skip_bytes (input, 1);
1995 peek_token (&next, input, syntax);
1996 re_string_skip_bytes (input, -1);
1997 if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
1998 break;
1999 }
2000 token->type = ANCHOR;
2001 token->opr.ctx_type = LINE_LAST;
2002 break;
2003 default:
2004 break;
2005 }
2006 return 1;
2007}
2008
2009/* Peek a token from INPUT, and return the length of the token.
2010 We must not use this function out of bracket expressions. */
2011
2012static int
2013internal_function
2014peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2015{
2016 unsigned char c;
2017 if (re_string_eoi (input))
2018 {
2019 token->type = END_OF_RE;
2020 return 0;
2021 }
2022 c = re_string_peek_byte (input, 0);
2023 token->opr.c = c;
2024
2025#ifdef RE_ENABLE_I18N
2026 if (input->mb_cur_max > 1 &&
2027 !re_string_first_byte (input, re_string_cur_idx (input)))
2028 {
2029 token->type = CHARACTER;
2030 return 1;
2031 }
2032#endif /* RE_ENABLE_I18N */
2033
2034 if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
2035 && re_string_cur_idx (input) + 1 < re_string_length (input))
2036 {
2037 /* In this case, '\' escape a character. */
2038 unsigned char c2;
2039 re_string_skip_bytes (input, 1);
2040 c2 = re_string_peek_byte (input, 0);
2041 token->opr.c = c2;
2042 token->type = CHARACTER;
2043 return 1;
2044 }
2045 if (c == '[') /* '[' is a special char in a bracket exps. */
2046 {
2047 unsigned char c2;
2048 int token_len;
2049 if (re_string_cur_idx (input) + 1 < re_string_length (input))
2050 c2 = re_string_peek_byte (input, 1);
2051 else
2052 c2 = 0;
2053 token->opr.c = c2;
2054 token_len = 2;
2055 switch (c2)
2056 {
2057 case '.':
2058 token->type = OP_OPEN_COLL_ELEM;
2059 break;
2060 case '=':
2061 token->type = OP_OPEN_EQUIV_CLASS;
2062 break;
2063 case ':':
2064 if (syntax & RE_CHAR_CLASSES)
2065 {
2066 token->type = OP_OPEN_CHAR_CLASS;
2067 break;
2068 }
2069 /* else fall through. */
2070 default:
2071 token->type = CHARACTER;
2072 token->opr.c = c;
2073 token_len = 1;
2074 break;
2075 }
2076 return token_len;
2077 }
2078 switch (c)
2079 {
2080 case '-':
2081 token->type = OP_CHARSET_RANGE;
2082 break;
2083 case ']':
2084 token->type = OP_CLOSE_BRACKET;
2085 break;
2086 case '^':
2087 token->type = OP_NON_MATCH_LIST;
2088 break;
2089 default:
2090 token->type = CHARACTER;
2091 }
2092 return 1;
2093}
2094
2095/* Functions for parser. */
2096
2097/* Entry point of the parser.
2098 Parse the regular expression REGEXP and return the structure tree.
2099 If an error has occurred, ERR is set by error code, and return NULL.
2100 This function build the following tree, from regular expression <reg_exp>:
2101 CAT
2102 / \
2103 / \
2104 <reg_exp> EOR
2105
2106 CAT means concatenation.
2107 EOR means end of regular expression. */
2108
2109static bin_tree_t *
2110parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
2111 reg_errcode_t *err)
2112{
2113 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2114 bin_tree_t *tree, *eor, *root;
2115 re_token_t current_token;
2116 dfa->syntax = syntax;
2117 fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
2118 tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
2119 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2120 return NULL;
2121 eor = create_tree (dfa, NULL, NULL, END_OF_RE);
2122 if (tree != NULL)
2123 root = create_tree (dfa, tree, eor, CONCAT);
2124 else
2125 root = eor;
2126 if (BE (eor == NULL || root == NULL, 0))
2127 {
2128 *err = REG_ESPACE;
2129 return NULL;
2130 }
2131 return root;
2132}
2133
2134/* This function build the following tree, from regular expression
2135 <branch1>|<branch2>:
2136 ALT
2137 / \
2138 / \
2139 <branch1> <branch2>
2140
2141 ALT means alternative, which represents the operator `|'. */
2142
2143static bin_tree_t *
2144parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2145 reg_syntax_t syntax, int nest, reg_errcode_t *err)
2146{
2147 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2148 bin_tree_t *tree, *branch = NULL;
2149 tree = parse_branch (regexp, preg, token, syntax, nest, err);
2150 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2151 return NULL;
2152
2153 while (token->type == OP_ALT)
2154 {
2155 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
2156 if (token->type != OP_ALT && token->type != END_OF_RE
2157 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
2158 {
2159 branch = parse_branch (regexp, preg, token, syntax, nest, err);
2160 if (BE (*err != REG_NOERROR && branch == NULL, 0))
2161 return NULL;
2162 }
2163 else
2164 branch = NULL;
2165 tree = create_tree (dfa, tree, branch, OP_ALT);
2166 if (BE (tree == NULL, 0))
2167 {
2168 *err = REG_ESPACE;
2169 return NULL;
2170 }
2171 }
2172 return tree;
2173}
2174
2175/* This function build the following tree, from regular expression
2176 <exp1><exp2>:
2177 CAT
2178 / \
2179 / \
2180 <exp1> <exp2>
2181
2182 CAT means concatenation. */
2183
2184static bin_tree_t *
2185parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
2186 reg_syntax_t syntax, int nest, reg_errcode_t *err)
2187{
2188 bin_tree_t *tree, *exp;
2189 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2190 tree = parse_expression (regexp, preg, token, syntax, nest, err);
2191 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2192 return NULL;
2193
2194 while (token->type != OP_ALT && token->type != END_OF_RE
2195 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
2196 {
2197 exp = parse_expression (regexp, preg, token, syntax, nest, err);
2198 if (BE (*err != REG_NOERROR && exp == NULL, 0))
2199 {
2200 return NULL;
2201 }
2202 if (tree != NULL && exp != NULL)
2203 {
2204 tree = create_tree (dfa, tree, exp, CONCAT);
2205 if (tree == NULL)
2206 {
2207 *err = REG_ESPACE;
2208 return NULL;
2209 }
2210 }
2211 else if (tree == NULL)
2212 tree = exp;
2213 /* Otherwise exp == NULL, we don't need to create new tree. */
2214 }
2215 return tree;
2216}
2217
2218/* This function build the following tree, from regular expression a*:
2219 *
2220 |
2221 a
2222*/
2223
2224static bin_tree_t *
2225parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2226 reg_syntax_t syntax, int nest, reg_errcode_t *err)
2227{
2228 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2229 bin_tree_t *tree;
2230 switch (token->type)
2231 {
2232 case CHARACTER:
2233 tree = create_token_tree (dfa, NULL, NULL, token);
2234 if (BE (tree == NULL, 0))
2235 {
2236 *err = REG_ESPACE;
2237 return NULL;
2238 }
2239#ifdef RE_ENABLE_I18N
2240 if (dfa->mb_cur_max > 1)
2241 {
2242 while (!re_string_eoi (regexp)
2243 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
2244 {
2245 bin_tree_t *mbc_remain;
2246 fetch_token (token, regexp, syntax);
2247 mbc_remain = create_token_tree (dfa, NULL, NULL, token);
2248 tree = create_tree (dfa, tree, mbc_remain, CONCAT);
2249 if (BE (mbc_remain == NULL || tree == NULL, 0))
2250 {
2251 *err = REG_ESPACE;
2252 return NULL;
2253 }
2254 }
2255 }
2256#endif
2257 break;
2258 case OP_OPEN_SUBEXP:
2259 tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
2260 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2261 return NULL;
2262 break;
2263 case OP_OPEN_BRACKET:
2264 tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
2265 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2266 return NULL;
2267 break;
2268 case OP_BACK_REF:
2269 if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
2270 {
2271 *err = REG_ESUBREG;
2272 return NULL;
2273 }
2274 dfa->used_bkref_map |= 1 << token->opr.idx;
2275 tree = create_token_tree (dfa, NULL, NULL, token);
2276 if (BE (tree == NULL, 0))
2277 {
2278 *err = REG_ESPACE;
2279 return NULL;
2280 }
2281 ++dfa->nbackref;
2282 dfa->has_mb_node = 1;
2283 break;
2284 case OP_OPEN_DUP_NUM:
2285 if (syntax & RE_CONTEXT_INVALID_DUP)
2286 {
2287 *err = REG_BADRPT;
2288 return NULL;
2289 }
2290 /* FALLTHROUGH */
2291 case OP_DUP_ASTERISK:
2292 case OP_DUP_PLUS:
2293 case OP_DUP_QUESTION:
2294 if (syntax & RE_CONTEXT_INVALID_OPS)
2295 {
2296 *err = REG_BADRPT;
2297 return NULL;
2298 }
2299 else if (syntax & RE_CONTEXT_INDEP_OPS)
2300 {
2301 fetch_token (token, regexp, syntax);
2302 return parse_expression (regexp, preg, token, syntax, nest, err);
2303 }
2304 /* else fall through */
2305 case OP_CLOSE_SUBEXP:
2306 if ((token->type == OP_CLOSE_SUBEXP) &&
2307 !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
2308 {
2309 *err = REG_ERPAREN;
2310 return NULL;
2311 }
2312 /* else fall through */
2313 case OP_CLOSE_DUP_NUM:
2314 /* We treat it as a normal character. */
2315
2316 /* Then we can these characters as normal characters. */
2317 token->type = CHARACTER;
2318 /* mb_partial and word_char bits should be initialized already
2319 by peek_token. */
2320 tree = create_token_tree (dfa, NULL, NULL, token);
2321 if (BE (tree == NULL, 0))
2322 {
2323 *err = REG_ESPACE;
2324 return NULL;
2325 }
2326 break;
2327 case ANCHOR:
2328 if ((token->opr.ctx_type
2329 & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
2330 && dfa->word_ops_used == 0)
2331 init_word_char (dfa);
2332 if (token->opr.ctx_type == WORD_DELIM
2333 || token->opr.ctx_type == NOT_WORD_DELIM)
2334 {
2335 bin_tree_t *tree_first, *tree_last;
2336 if (token->opr.ctx_type == WORD_DELIM)
2337 {
2338 token->opr.ctx_type = WORD_FIRST;
2339 tree_first = create_token_tree (dfa, NULL, NULL, token);
2340 token->opr.ctx_type = WORD_LAST;
2341 }
2342 else
2343 {
2344 token->opr.ctx_type = INSIDE_WORD;
2345 tree_first = create_token_tree (dfa, NULL, NULL, token);
2346 token->opr.ctx_type = INSIDE_NOTWORD;
2347 }
2348 tree_last = create_token_tree (dfa, NULL, NULL, token);
2349 tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
2350 if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
2351 {
2352 *err = REG_ESPACE;
2353 return NULL;
2354 }
2355 }
2356 else
2357 {
2358 tree = create_token_tree (dfa, NULL, NULL, token);
2359 if (BE (tree == NULL, 0))
2360 {
2361 *err = REG_ESPACE;
2362 return NULL;
2363 }
2364 }
2365 /* We must return here, since ANCHORs can't be followed
2366 by repetition operators.
2367 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
2368 it must not be "<ANCHOR(^)><REPEAT(*)>". */
2369 fetch_token (token, regexp, syntax);
2370 return tree;
2371 case OP_PERIOD:
2372 tree = create_token_tree (dfa, NULL, NULL, token);
2373 if (BE (tree == NULL, 0))
2374 {
2375 *err = REG_ESPACE;
2376 return NULL;
2377 }
2378 if (dfa->mb_cur_max > 1)
2379 dfa->has_mb_node = 1;
2380 break;
2381 case OP_WORD:
2382 case OP_NOTWORD:
2383 tree = build_charclass_op (dfa, regexp->trans,
2384 "alnum",
2385 "_",
2386 token->type == OP_NOTWORD, err);
2387 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2388 return NULL;
2389 break;
2390 case OP_SPACE:
2391 case OP_NOTSPACE:
2392 tree = build_charclass_op (dfa, regexp->trans,
2393 "space",
2394 "",
2395 token->type == OP_NOTSPACE, err);
2396 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2397 return NULL;
2398 break;
2399 case OP_ALT:
2400 case END_OF_RE:
2401 return NULL;
2402 case BACK_SLASH:
2403 *err = REG_EESCAPE;
2404 return NULL;
2405 default:
2406 /* Must not happen? */
2407#ifdef DEBUG
2408 assert (0);
2409#endif
2410 return NULL;
2411 }
2412 fetch_token (token, regexp, syntax);
2413
2414 while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
2415 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
2416 {
2417 tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
2418 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2419 return NULL;
2420 /* In BRE consecutive duplications are not allowed. */
2421 if ((syntax & RE_CONTEXT_INVALID_DUP)
2422 && (token->type == OP_DUP_ASTERISK
2423 || token->type == OP_OPEN_DUP_NUM))
2424 {
2425 *err = REG_BADRPT;
2426 return NULL;
2427 }
2428 }
2429
2430 return tree;
2431}
2432
2433/* This function build the following tree, from regular expression
2434 (<reg_exp>):
2435 SUBEXP
2436 |
2437 <reg_exp>
2438*/
2439
2440static bin_tree_t *
2441parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2442 reg_syntax_t syntax, int nest, reg_errcode_t *err)
2443{
2444 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2445 bin_tree_t *tree;
2446 size_t cur_nsub;
2447 cur_nsub = preg->re_nsub++;
2448
2449 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
2450
2451 /* The subexpression may be a null string. */
2452 if (token->type == OP_CLOSE_SUBEXP)
2453 tree = NULL;
2454 else
2455 {
2456 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
2457 if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
2458 *err = REG_EPAREN;
2459 if (BE (*err != REG_NOERROR, 0))
2460 return NULL;
2461 }
2462
2463 if (cur_nsub <= '9' - '1')
2464 dfa->completed_bkref_map |= 1 << cur_nsub;
2465
2466 tree = create_tree (dfa, tree, NULL, SUBEXP);
2467 if (BE (tree == NULL, 0))
2468 {
2469 *err = REG_ESPACE;
2470 return NULL;
2471 }
2472 tree->token.opr.idx = cur_nsub;
2473 return tree;
2474}
2475
2476/* This function parse repetition operators like "*", "+", "{1,3}" etc. */
2477
2478static bin_tree_t *
2479parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2480 re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
2481{
2482 bin_tree_t *tree = NULL, *old_tree = NULL;
2483 int i, start, end, start_idx = re_string_cur_idx (regexp);
2484#ifndef RE_TOKEN_INIT_BUG
2485 re_token_t start_token = *token;
2486#else
2487 re_token_t start_token;
2488
2489 memcpy ((void *) &start_token, (void *) token, sizeof start_token);
2490#endif
2491
2492 if (token->type == OP_OPEN_DUP_NUM)
2493 {
2494 end = 0;
2495 start = fetch_number (regexp, token, syntax);
2496 if (start == -1)
2497 {
2498 if (token->type == CHARACTER && token->opr.c == ',')
2499 start = 0; /* We treat "{,m}" as "{0,m}". */
2500 else
2501 {
2502 *err = REG_BADBR; /* <re>{} is invalid. */
2503 return NULL;
2504 }
2505 }
2506 if (BE (start != -2, 1))
2507 {
2508 /* We treat "{n}" as "{n,n}". */
2509 end = ((token->type == OP_CLOSE_DUP_NUM) ? start
2510 : ((token->type == CHARACTER && token->opr.c == ',')
2511 ? fetch_number (regexp, token, syntax) : -2));
2512 }
2513 if (BE (start == -2 || end == -2, 0))
2514 {
2515 /* Invalid sequence. */
2516 if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
2517 {
2518 if (token->type == END_OF_RE)
2519 *err = REG_EBRACE;
2520 else
2521 *err = REG_BADBR;
2522
2523 return NULL;
2524 }
2525
2526 /* If the syntax bit is set, rollback. */
2527 re_string_set_index (regexp, start_idx);
2528 *token = start_token;
2529 token->type = CHARACTER;
2530 /* mb_partial and word_char bits should be already initialized by
2531 peek_token. */
2532 return elem;
2533 }
2534
2535 if (BE ((end != -1 && start > end) || token->type != OP_CLOSE_DUP_NUM, 0))
2536 {
2537 /* First number greater than second. */
2538 *err = REG_BADBR;
2539 return NULL;
2540 }
2541 }
2542 else
2543 {
2544 start = (token->type == OP_DUP_PLUS) ? 1 : 0;
2545 end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
2546 }
2547
2548 fetch_token (token, regexp, syntax);
2549
2550 if (BE (elem == NULL, 0))
2551 return NULL;
2552 if (BE (start == 0 && end == 0, 0))
2553 {
2554 postorder (elem, free_tree, NULL);
2555 return NULL;
2556 }
2557
2558 /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
2559 if (BE (start > 0, 0))
2560 {
2561 tree = elem;
2562 for (i = 2; i <= start; ++i)
2563 {
2564 elem = duplicate_tree (elem, dfa);
2565 tree = create_tree (dfa, tree, elem, CONCAT);
2566 if (BE (elem == NULL || tree == NULL, 0))
2567 goto parse_dup_op_espace;
2568 }
2569
2570 if (start == end)
2571 return tree;
2572
2573 /* Duplicate ELEM before it is marked optional. */
2574 elem = duplicate_tree (elem, dfa);
2575 old_tree = tree;
2576 }
2577 else
2578 old_tree = NULL;
2579
2580 if (elem->token.type == SUBEXP)
2581 postorder (elem, mark_opt_subexp, (void *) (intptr_t) elem->token.opr.idx);
2582
2583 tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
2584 if (BE (tree == NULL, 0))
2585 goto parse_dup_op_espace;
2586
2587 /* This loop is actually executed only when end != -1,
2588 to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
2589 already created the start+1-th copy. */
2590 for (i = start + 2; i <= end; ++i)
2591 {
2592 elem = duplicate_tree (elem, dfa);
2593 tree = create_tree (dfa, tree, elem, CONCAT);
2594 if (BE (elem == NULL || tree == NULL, 0))
2595 goto parse_dup_op_espace;
2596
2597 tree = create_tree (dfa, tree, NULL, OP_ALT);
2598 if (BE (tree == NULL, 0))
2599 goto parse_dup_op_espace;
2600 }
2601
2602 if (old_tree)
2603 tree = create_tree (dfa, old_tree, tree, CONCAT);
2604
2605 return tree;
2606
2607 parse_dup_op_espace:
2608 *err = REG_ESPACE;
2609 return NULL;
2610}
2611
2612/* Size of the names for collating symbol/equivalence_class/character_class.
2613 I'm not sure, but maybe enough. */
2614#define BRACKET_NAME_BUF_SIZE 32
2615
2616#ifndef _LIBC
2617 /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
2618 Build the range expression which starts from START_ELEM, and ends
2619 at END_ELEM. The result are written to MBCSET and SBCSET.
2620 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2621 mbcset->range_ends, is a pointer argument since we may
2622 update it. */
2623
2624static reg_errcode_t
2625internal_function
2626# ifdef RE_ENABLE_I18N
2627build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
2628 bracket_elem_t *start_elem, bracket_elem_t *end_elem)
2629# else /* not RE_ENABLE_I18N */
2630build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
2631 bracket_elem_t *end_elem)
2632# endif /* not RE_ENABLE_I18N */
2633{
2634 unsigned int start_ch, end_ch;
2635 /* Equivalence Classes and Character Classes can't be a range start/end. */
2636 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
2637 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
2638 0))
2639 return REG_ERANGE;
2640
2641 /* We can handle no multi character collating elements without libc
2642 support. */
2643 if (BE ((start_elem->type == COLL_SYM
2644 && strlen ((char *) start_elem->opr.name) > 1)
2645 || (end_elem->type == COLL_SYM
2646 && strlen ((char *) end_elem->opr.name) > 1), 0))
2647 return REG_ECOLLATE;
2648
2649# ifdef RE_ENABLE_I18N
2650 {
2651 wchar_t wc;
2652 wint_t start_wc;
2653 wint_t end_wc;
2654 wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
2655
2656 start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
2657 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2658 : 0));
2659 end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
2660 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2661 : 0));
2662#ifdef GAWK
2663 /*
2664 * Fedora Core 2, maybe others, have broken `btowc' that returns -1
2665 * for any value > 127. Sigh. Note that `start_ch' and `end_ch' are
2666 * unsigned, so we don't have sign extension problems.
2667 */
2668 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
2669 ? start_ch : start_elem->opr.wch);
2670 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
2671 ? end_ch : end_elem->opr.wch);
2672#else
2673 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
2674 ? __btowc (start_ch) : start_elem->opr.wch);
2675 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
2676 ? __btowc (end_ch) : end_elem->opr.wch);
2677#endif
2678 if (start_wc == WEOF || end_wc == WEOF)
2679 return REG_ECOLLATE;
2680 cmp_buf[0] = start_wc;
2681 cmp_buf[4] = end_wc;
2682 if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
2683 return REG_ERANGE;
2684
2685 /* Got valid collation sequence values, add them as a new entry.
2686 However, for !_LIBC we have no collation elements: if the
2687 character set is single byte, the single byte character set
2688 that we build below suffices. parse_bracket_exp passes
2689 no MBCSET if dfa->mb_cur_max == 1. */
2690 if (mbcset)
2691 {
2692 /* Check the space of the arrays. */
2693 if (BE (*range_alloc == mbcset->nranges, 0))
2694 {
2695 /* There is not enough space, need realloc. */
2696 wchar_t *new_array_start, *new_array_end;
2697 int new_nranges;
2698
2699 /* +1 in case of mbcset->nranges is 0. */
2700 new_nranges = 2 * mbcset->nranges + 1;
2701 /* Use realloc since mbcset->range_starts and mbcset->range_ends
2702 are NULL if *range_alloc == 0. */
2703 new_array_start = re_realloc (mbcset->range_starts, wchar_t,
2704 new_nranges);
2705 new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2706 new_nranges);
2707
2708 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
2709 return REG_ESPACE;
2710
2711 mbcset->range_starts = new_array_start;
2712 mbcset->range_ends = new_array_end;
2713 *range_alloc = new_nranges;
2714 }
2715
2716 mbcset->range_starts[mbcset->nranges] = start_wc;
2717 mbcset->range_ends[mbcset->nranges++] = end_wc;
2718 }
2719
2720 /* Build the table for single byte characters. */
2721 for (wc = 0; wc < SBC_MAX; ++wc)
2722 {
2723 cmp_buf[2] = wc;
2724 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
2725 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
2726 bitset_set (sbcset, wc);
2727 }
2728 }
2729# else /* not RE_ENABLE_I18N */
2730 {
2731 unsigned int ch;
2732 start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
2733 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2734 : 0));
2735 end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
2736 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2737 : 0));
2738 if (start_ch > end_ch)
2739 return REG_ERANGE;
2740 /* Build the table for single byte characters. */
2741 for (ch = 0; ch < SBC_MAX; ++ch)
2742 if (start_ch <= ch && ch <= end_ch)
2743 bitset_set (sbcset, ch);
2744 }
2745# endif /* not RE_ENABLE_I18N */
2746 return REG_NOERROR;
2747}
2748#endif /* not _LIBC */
2749
2750#ifndef _LIBC
2751/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
2752 Build the collating element which is represented by NAME.
2753 The result are written to MBCSET and SBCSET.
2754 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2755 pointer argument since we may update it. */
2756
2757static reg_errcode_t
2758internal_function
2759# ifdef RE_ENABLE_I18N
2760build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2761 int *coll_sym_alloc, const unsigned char *name)
2762# else /* not RE_ENABLE_I18N */
2763build_collating_symbol (bitset_t sbcset, const unsigned char *name)
2764# endif /* not RE_ENABLE_I18N */
2765{
2766 size_t name_len = strlen ((const char *) name);
2767 if (BE (name_len != 1, 0))
2768 return REG_ECOLLATE;
2769 else
2770 {
2771 bitset_set (sbcset, name[0]);
2772 return REG_NOERROR;
2773 }
2774}
2775#endif /* not _LIBC */
2776
2777/* This function parse bracket expression like "[abc]", "[a-c]",
2778 "[[.a-a.]]" etc. */
2779
2780static bin_tree_t *
2781parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2782 reg_syntax_t syntax, reg_errcode_t *err)
2783{
2784#ifdef _LIBC
2785 const unsigned char *collseqmb;
2786 const char *collseqwc;
2787 uint32_t nrules;
2788 int32_t table_size;
2789 const int32_t *symb_table;
2790 const unsigned char *extra;
2791
2792 /* Local function for parse_bracket_exp used in _LIBC environment.
2793 Seek the collating symbol entry correspondings to NAME.
2794 Return the index of the symbol in the SYMB_TABLE. */
2795
2796 auto inline int32_t
2797 __attribute ((always_inline))
2798 seek_collating_symbol_entry (name, name_len)
2799 const unsigned char *name;
2800 size_t name_len;
2801 {
2802 int32_t hash = elem_hash ((const char *) name, name_len);
2803 int32_t elem = hash % table_size;
2804 if (symb_table[2 * elem] != 0)
2805 {
2806 int32_t second = hash % (table_size - 2) + 1;
2807
2808 do
2809 {
2810 /* First compare the hashing value. */
2811 if (symb_table[2 * elem] == hash
2812 /* Compare the length of the name. */
2813 && name_len == extra[symb_table[2 * elem + 1]]
2814 /* Compare the name. */
2815 && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
2816 name_len) == 0)
2817 {
2818 /* Yep, this is the entry. */
2819 break;
2820 }
2821
2822 /* Next entry. */
2823 elem += second;
2824 }
2825 while (symb_table[2 * elem] != 0);
2826 }
2827 return elem;
2828 }
2829
2830 /* Local function for parse_bracket_exp used in _LIBC environment.
2831 Look up the collation sequence value of BR_ELEM.
2832 Return the value if succeeded, UINT_MAX otherwise. */
2833
2834 auto inline unsigned int
2835 __attribute ((always_inline))
2836 lookup_collation_sequence_value (br_elem)
2837 bracket_elem_t *br_elem;
2838 {
2839 if (br_elem->type == SB_CHAR)
2840 {
2841 /*
2842 if (MB_CUR_MAX == 1)
2843 */
2844 if (nrules == 0)
2845 return collseqmb[br_elem->opr.ch];
2846 else
2847 {
2848 wint_t wc = __btowc (br_elem->opr.ch);
2849 return __collseq_table_lookup (collseqwc, wc);
2850 }
2851 }
2852 else if (br_elem->type == MB_CHAR)
2853 {
2854 if (nrules != 0)
2855 return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
2856 }
2857 else if (br_elem->type == COLL_SYM)
2858 {
2859 size_t sym_name_len = strlen ((char *) br_elem->opr.name);
2860 if (nrules != 0)
2861 {
2862 int32_t elem, idx;
2863 elem = seek_collating_symbol_entry (br_elem->opr.name,
2864 sym_name_len);
2865 if (symb_table[2 * elem] != 0)
2866 {
2867 /* We found the entry. */
2868 idx = symb_table[2 * elem + 1];
2869 /* Skip the name of collating element name. */
2870 idx += 1 + extra[idx];
2871 /* Skip the byte sequence of the collating element. */
2872 idx += 1 + extra[idx];
2873 /* Adjust for the alignment. */
2874 idx = (idx + 3) & ~3;
2875 /* Skip the multibyte collation sequence value. */
2876 idx += sizeof (unsigned int);
2877 /* Skip the wide char sequence of the collating element. */
2878 idx += sizeof (unsigned int) *
2879 (1 + *(unsigned int *) (extra + idx));
2880 /* Return the collation sequence value. */
2881 return *(unsigned int *) (extra + idx);
2882 }
2883 else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
2884 {
2885 /* No valid character. Match it as a single byte
2886 character. */
2887 return collseqmb[br_elem->opr.name[0]];
2888 }
2889 }
2890 else if (sym_name_len == 1)
2891 return collseqmb[br_elem->opr.name[0]];
2892 }
2893 return UINT_MAX;
2894 }
2895
2896 /* Local function for parse_bracket_exp used in _LIBC environment.
2897 Build the range expression which starts from START_ELEM, and ends
2898 at END_ELEM. The result are written to MBCSET and SBCSET.
2899 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2900 mbcset->range_ends, is a pointer argument since we may
2901 update it. */
2902
2903 auto inline reg_errcode_t
2904 __attribute ((always_inline))
2905 build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
2906 re_charset_t *mbcset;
2907 int *range_alloc;
2908 bitset_t sbcset;
2909 bracket_elem_t *start_elem, *end_elem;
2910 {
2911 unsigned int ch;
2912 uint32_t start_collseq;
2913 uint32_t end_collseq;
2914
2915 /* Equivalence Classes and Character Classes can't be a range
2916 start/end. */
2917 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
2918 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
2919 0))
2920 return REG_ERANGE;
2921
2922 start_collseq = lookup_collation_sequence_value (start_elem);
2923 end_collseq = lookup_collation_sequence_value (end_elem);
2924 /* Check start/end collation sequence values. */
2925 if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
2926 return REG_ECOLLATE;
2927 if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
2928 return REG_ERANGE;
2929
2930 /* Got valid collation sequence values, add them as a new entry.
2931 However, if we have no collation elements, and the character set
2932 is single byte, the single byte character set that we
2933 build below suffices. */
2934 if (nrules > 0 || dfa->mb_cur_max > 1)
2935 {
2936 /* Check the space of the arrays. */
2937 if (BE (*range_alloc == mbcset->nranges, 0))
2938 {
2939 /* There is not enough space, need realloc. */
2940 uint32_t *new_array_start;
2941 uint32_t *new_array_end;
2942 int new_nranges;
2943
2944 /* +1 in case of mbcset->nranges is 0. */
2945 new_nranges = 2 * mbcset->nranges + 1;
2946 new_array_start = re_realloc (mbcset->range_starts, uint32_t,
2947 new_nranges);
2948 new_array_end = re_realloc (mbcset->range_ends, uint32_t,
2949 new_nranges);
2950
2951 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
2952 return REG_ESPACE;
2953
2954 mbcset->range_starts = new_array_start;
2955 mbcset->range_ends = new_array_end;
2956 *range_alloc = new_nranges;
2957 }
2958
2959 mbcset->range_starts[mbcset->nranges] = start_collseq;
2960 mbcset->range_ends[mbcset->nranges++] = end_collseq;
2961 }
2962
2963 /* Build the table for single byte characters. */
2964 for (ch = 0; ch < SBC_MAX; ch++)
2965 {
2966 uint32_t ch_collseq;
2967 /*
2968 if (MB_CUR_MAX == 1)
2969 */
2970 if (nrules == 0)
2971 ch_collseq = collseqmb[ch];
2972 else
2973 ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
2974 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
2975 bitset_set (sbcset, ch);
2976 }
2977 return REG_NOERROR;
2978 }
2979
2980 /* Local function for parse_bracket_exp used in _LIBC environment.
2981 Build the collating element which is represented by NAME.
2982 The result are written to MBCSET and SBCSET.
2983 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2984 pointer argument since we may update it. */
2985
2986 auto inline reg_errcode_t
2987 __attribute ((always_inline))
2988 build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
2989 re_charset_t *mbcset;
2990 int *coll_sym_alloc;
2991 bitset_t sbcset;
2992 const unsigned char *name;
2993 {
2994 int32_t elem, idx;
2995 size_t name_len = strlen ((const char *) name);
2996 if (nrules != 0)
2997 {
2998 elem = seek_collating_symbol_entry (name, name_len);
2999 if (symb_table[2 * elem] != 0)
3000 {
3001 /* We found the entry. */
3002 idx = symb_table[2 * elem + 1];
3003 /* Skip the name of collating element name. */
3004 idx += 1 + extra[idx];
3005 }
3006 else if (symb_table[2 * elem] == 0 && name_len == 1)
3007 {
3008 /* No valid character, treat it as a normal
3009 character. */
3010 bitset_set (sbcset, name[0]);
3011 return REG_NOERROR;
3012 }
3013 else
3014 return REG_ECOLLATE;
3015
3016 /* Got valid collation sequence, add it as a new entry. */
3017 /* Check the space of the arrays. */
3018 if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
3019 {
3020 /* Not enough, realloc it. */
3021 /* +1 in case of mbcset->ncoll_syms is 0. */
3022 int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
3023 /* Use realloc since mbcset->coll_syms is NULL
3024 if *alloc == 0. */
3025 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
3026 new_coll_sym_alloc);
3027 if (BE (new_coll_syms == NULL, 0))
3028 return REG_ESPACE;
3029 mbcset->coll_syms = new_coll_syms;
3030 *coll_sym_alloc = new_coll_sym_alloc;
3031 }
3032 mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
3033 return REG_NOERROR;
3034 }
3035 else
3036 {
3037 if (BE (name_len != 1, 0))
3038 return REG_ECOLLATE;
3039 else
3040 {
3041 bitset_set (sbcset, name[0]);
3042 return REG_NOERROR;
3043 }
3044 }
3045 }
3046#endif
3047
3048 re_token_t br_token;
3049 re_bitset_ptr_t sbcset;
3050#ifdef RE_ENABLE_I18N
3051 re_charset_t *mbcset;
3052 int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
3053 int equiv_class_alloc = 0, char_class_alloc = 0;
3054#endif /* not RE_ENABLE_I18N */
3055 int non_match = 0;
3056 bin_tree_t *work_tree;
3057 int token_len;
3058 int first_round = 1;
3059#ifdef _LIBC
3060 collseqmb = (const unsigned char *)
3061 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3062 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3063 if (nrules)
3064 {
3065 /*
3066 if (MB_CUR_MAX > 1)
3067 */
3068 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3069 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
3070 symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3071 _NL_COLLATE_SYMB_TABLEMB);
3072 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3073 _NL_COLLATE_SYMB_EXTRAMB);
3074 }
3075#endif
3076 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3077#ifdef RE_ENABLE_I18N
3078 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3079#endif /* RE_ENABLE_I18N */
3080#ifdef RE_ENABLE_I18N
3081 if (BE (sbcset == NULL || mbcset == NULL, 0))
3082#else
3083 if (BE (sbcset == NULL, 0))
3084#endif /* RE_ENABLE_I18N */
3085 {
3086 *err = REG_ESPACE;
3087 return NULL;
3088 }
3089
3090 token_len = peek_token_bracket (token, regexp, syntax);
3091 if (BE (token->type == END_OF_RE, 0))
3092 {
3093 *err = REG_BADPAT;
3094 goto parse_bracket_exp_free_return;
3095 }
3096 if (token->type == OP_NON_MATCH_LIST)
3097 {
3098#ifdef RE_ENABLE_I18N
3099 mbcset->non_match = 1;
3100#endif /* not RE_ENABLE_I18N */
3101 non_match = 1;
3102 if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
3103 bitset_set (sbcset, '\n');
3104 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3105 token_len = peek_token_bracket (token, regexp, syntax);
3106 if (BE (token->type == END_OF_RE, 0))
3107 {
3108 *err = REG_BADPAT;
3109 goto parse_bracket_exp_free_return;
3110 }
3111 }
3112
3113 /* We treat the first ']' as a normal character. */
3114 if (token->type == OP_CLOSE_BRACKET)
3115 token->type = CHARACTER;
3116
3117 while (1)
3118 {
3119 bracket_elem_t start_elem, end_elem;
3120 unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
3121 unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
3122 reg_errcode_t ret;
3123 int token_len2 = 0, is_range_exp = 0;
3124 re_token_t token2;
3125
3126 start_elem.opr.name = start_name_buf;
3127 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
3128 syntax, first_round);
3129 if (BE (ret != REG_NOERROR, 0))
3130 {
3131 *err = ret;
3132 goto parse_bracket_exp_free_return;
3133 }
3134 first_round = 0;
3135
3136 /* Get information about the next token. We need it in any case. */
3137 token_len = peek_token_bracket (token, regexp, syntax);
3138
3139 /* Do not check for ranges if we know they are not allowed. */
3140 if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
3141 {
3142 if (BE (token->type == END_OF_RE, 0))
3143 {
3144 *err = REG_EBRACK;
3145 goto parse_bracket_exp_free_return;
3146 }
3147 if (token->type == OP_CHARSET_RANGE)
3148 {
3149 re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
3150 token_len2 = peek_token_bracket (&token2, regexp, syntax);
3151 if (BE (token2.type == END_OF_RE, 0))
3152 {
3153 *err = REG_EBRACK;
3154 goto parse_bracket_exp_free_return;
3155 }
3156 if (token2.type == OP_CLOSE_BRACKET)
3157 {
3158 /* We treat the last '-' as a normal character. */
3159 re_string_skip_bytes (regexp, -token_len);
3160 token->type = CHARACTER;
3161 }
3162 else
3163 is_range_exp = 1;
3164 }
3165 }
3166
3167 if (is_range_exp == 1)
3168 {
3169 end_elem.opr.name = end_name_buf;
3170 ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
3171 dfa, syntax, 1);
3172 if (BE (ret != REG_NOERROR, 0))
3173 {
3174 *err = ret;
3175 goto parse_bracket_exp_free_return;
3176 }
3177
3178 token_len = peek_token_bracket (token, regexp, syntax);
3179
3180#ifdef _LIBC
3181 *err = build_range_exp (sbcset, mbcset, &range_alloc,
3182 &start_elem, &end_elem);
3183#else
3184# ifdef RE_ENABLE_I18N
3185 *err = build_range_exp (sbcset,
3186 dfa->mb_cur_max > 1 ? mbcset : NULL,
3187 &range_alloc, &start_elem, &end_elem);
3188# else
3189 *err = build_range_exp (sbcset, &start_elem, &end_elem);
3190# endif
3191#endif /* RE_ENABLE_I18N */
3192 if (BE (*err != REG_NOERROR, 0))
3193 goto parse_bracket_exp_free_return;
3194 }
3195 else
3196 {
3197 switch (start_elem.type)
3198 {
3199 case SB_CHAR:
3200 bitset_set (sbcset, start_elem.opr.ch);
3201 break;
3202#ifdef RE_ENABLE_I18N
3203 case MB_CHAR:
3204 /* Check whether the array has enough space. */
3205 if (BE (mbchar_alloc == mbcset->nmbchars, 0))
3206 {
3207 wchar_t *new_mbchars;
3208 /* Not enough, realloc it. */
3209 /* +1 in case of mbcset->nmbchars is 0. */
3210 mbchar_alloc = 2 * mbcset->nmbchars + 1;
3211 /* Use realloc since array is NULL if *alloc == 0. */
3212 new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
3213 mbchar_alloc);
3214 if (BE (new_mbchars == NULL, 0))
3215 goto parse_bracket_exp_espace;
3216 mbcset->mbchars = new_mbchars;
3217 }
3218 mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
3219 break;
3220#endif /* RE_ENABLE_I18N */
3221 case EQUIV_CLASS:
3222 *err = build_equiv_class (sbcset,
3223#ifdef RE_ENABLE_I18N
3224 mbcset, &equiv_class_alloc,
3225#endif /* RE_ENABLE_I18N */
3226 start_elem.opr.name);
3227 if (BE (*err != REG_NOERROR, 0))
3228 goto parse_bracket_exp_free_return;
3229 break;
3230 case COLL_SYM:
3231 *err = build_collating_symbol (sbcset,
3232#ifdef RE_ENABLE_I18N
3233 mbcset, &coll_sym_alloc,
3234#endif /* RE_ENABLE_I18N */
3235 start_elem.opr.name);
3236 if (BE (*err != REG_NOERROR, 0))
3237 goto parse_bracket_exp_free_return;
3238 break;
3239 case CHAR_CLASS:
3240 *err = build_charclass (regexp->trans, sbcset,
3241#ifdef RE_ENABLE_I18N
3242 mbcset, &char_class_alloc,
3243#endif /* RE_ENABLE_I18N */
3244 (const char *) start_elem.opr.name, syntax);
3245 if (BE (*err != REG_NOERROR, 0))
3246 goto parse_bracket_exp_free_return;
3247 break;
3248 default:
3249 assert (0);
3250 break;
3251 }
3252 }
3253 if (BE (token->type == END_OF_RE, 0))
3254 {
3255 *err = REG_EBRACK;
3256 goto parse_bracket_exp_free_return;
3257 }
3258 if (token->type == OP_CLOSE_BRACKET)
3259 break;
3260 }
3261
3262 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3263
3264 /* If it is non-matching list. */
3265 if (non_match)
3266 bitset_not (sbcset);
3267
3268#ifdef RE_ENABLE_I18N
3269 /* Ensure only single byte characters are set. */
3270 if (dfa->mb_cur_max > 1)
3271 bitset_mask (sbcset, dfa->sb_char);
3272
3273 if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
3274 || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
3275 || mbcset->non_match)))
3276 {
3277 bin_tree_t *mbc_tree;
3278 int sbc_idx;
3279 /* Build a tree for complex bracket. */
3280 dfa->has_mb_node = 1;
3281 br_token.type = COMPLEX_BRACKET;
3282 br_token.opr.mbcset = mbcset;
3283 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3284 if (BE (mbc_tree == NULL, 0))
3285 goto parse_bracket_exp_espace;
3286 for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
3287 if (sbcset[sbc_idx])
3288 break;
3289 /* If there are no bits set in sbcset, there is no point
3290 of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
3291 if (sbc_idx < BITSET_WORDS)
3292 {
3293 /* Build a tree for simple bracket. */
3294 br_token.type = SIMPLE_BRACKET;
3295 br_token.opr.sbcset = sbcset;
3296 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3297 if (BE (work_tree == NULL, 0))
3298 goto parse_bracket_exp_espace;
3299
3300 /* Then join them by ALT node. */
3301 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
3302 if (BE (work_tree == NULL, 0))
3303 goto parse_bracket_exp_espace;
3304 }
3305 else
3306 {
3307 re_free (sbcset);
3308 work_tree = mbc_tree;
3309 }
3310 }
3311 else
3312#endif /* not RE_ENABLE_I18N */
3313 {
3314#ifdef RE_ENABLE_I18N
3315 free_charset (mbcset);
3316#endif
3317 /* Build a tree for simple bracket. */
3318 br_token.type = SIMPLE_BRACKET;
3319 br_token.opr.sbcset = sbcset;
3320 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3321 if (BE (work_tree == NULL, 0))
3322 goto parse_bracket_exp_espace;
3323 }
3324 return work_tree;
3325
3326 parse_bracket_exp_espace:
3327 *err = REG_ESPACE;
3328 parse_bracket_exp_free_return:
3329 re_free (sbcset);
3330#ifdef RE_ENABLE_I18N
3331 free_charset (mbcset);
3332#endif /* RE_ENABLE_I18N */
3333 return NULL;
3334}
3335
3336/* Parse an element in the bracket expression. */
3337
3338static reg_errcode_t
3339parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
3340 re_token_t *token, int token_len,
3341 UNUSED_PARAM re_dfa_t *dfa, reg_syntax_t syntax,
3342 int accept_hyphen)
3343{
3344#ifdef RE_ENABLE_I18N
3345 int cur_char_size;
3346 cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
3347 if (cur_char_size > 1)
3348 {
3349 elem->type = MB_CHAR;
3350 elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
3351 re_string_skip_bytes (regexp, cur_char_size);
3352 return REG_NOERROR;
3353 }
3354#endif /* RE_ENABLE_I18N */
3355 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3356 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
3357 || token->type == OP_OPEN_EQUIV_CLASS)
3358 return parse_bracket_symbol (elem, regexp, token);
3359 if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
3360 {
3361 /* A '-' must only appear as anything but a range indicator before
3362 the closing bracket. Everything else is an error. */
3363 re_token_t token2;
3364 (void) peek_token_bracket (&token2, regexp, syntax);
3365 if (token2.type != OP_CLOSE_BRACKET)
3366 /* The actual error value is not standardized since this whole
3367 case is undefined. But ERANGE makes good sense. */
3368 return REG_ERANGE;
3369 }
3370 elem->type = SB_CHAR;
3371 elem->opr.ch = token->opr.c;
3372 return REG_NOERROR;
3373}
3374
3375/* Parse a bracket symbol in the bracket expression. Bracket symbols are
3376 such as [:<character_class>:], [.<collating_element>.], and
3377 [=<equivalent_class>=]. */
3378
3379static reg_errcode_t
3380parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
3381 re_token_t *token)
3382{
3383 unsigned char ch, delim = token->opr.c;
3384 int i = 0;
3385 if (re_string_eoi(regexp))
3386 return REG_EBRACK;
3387 for (;; ++i)
3388 {
3389 if (i >= BRACKET_NAME_BUF_SIZE)
3390 return REG_EBRACK;
3391 if (token->type == OP_OPEN_CHAR_CLASS)
3392 ch = re_string_fetch_byte_case (regexp);
3393 else
3394 ch = re_string_fetch_byte (regexp);
3395 if (re_string_eoi(regexp))
3396 return REG_EBRACK;
3397 if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
3398 break;
3399 elem->opr.name[i] = ch;
3400 }
3401 re_string_skip_bytes (regexp, 1);
3402 elem->opr.name[i] = '\0';
3403 switch (token->type)
3404 {
3405 case OP_OPEN_COLL_ELEM:
3406 elem->type = COLL_SYM;
3407 break;
3408 case OP_OPEN_EQUIV_CLASS:
3409 elem->type = EQUIV_CLASS;
3410 break;
3411 case OP_OPEN_CHAR_CLASS:
3412 elem->type = CHAR_CLASS;
3413 break;
3414 default:
3415 break;
3416 }
3417 return REG_NOERROR;
3418}
3419
3420 /* Helper function for parse_bracket_exp.
3421 Build the equivalence class which is represented by NAME.
3422 The result are written to MBCSET and SBCSET.
3423 EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
3424 is a pointer argument since we may update it. */
3425
3426static reg_errcode_t
3427#ifdef RE_ENABLE_I18N
3428build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
3429 int *equiv_class_alloc, const unsigned char *name)
3430#else /* not RE_ENABLE_I18N */
3431build_equiv_class (bitset_t sbcset, const unsigned char *name)
3432#endif /* not RE_ENABLE_I18N */
3433{
3434#ifdef _LIBC
3435 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3436 if (nrules != 0)
3437 {
3438 const int32_t *table, *indirect;
3439 const unsigned char *weights, *extra, *cp;
3440 unsigned char char_buf[2];
3441 int32_t idx1, idx2;
3442 unsigned int ch;
3443 size_t len;
3444 /* This #include defines a local function! */
3445# include <locale/weight.h>
3446 /* Calculate the index for equivalence class. */
3447 cp = name;
3448 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3449 weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3450 _NL_COLLATE_WEIGHTMB);
3451 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3452 _NL_COLLATE_EXTRAMB);
3453 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3454 _NL_COLLATE_INDIRECTMB);
3455 idx1 = findidx (&cp);
3456 if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
3457 /* This isn't a valid character. */
3458 return REG_ECOLLATE;
3459
3460 /* Build single byte matcing table for this equivalence class. */
3461 char_buf[1] = (unsigned char) '\0';
3462 len = weights[idx1 & 0xffffff];
3463 for (ch = 0; ch < SBC_MAX; ++ch)
3464 {
3465 char_buf[0] = ch;
3466 cp = char_buf;
3467 idx2 = findidx (&cp);
3468/*
3469 idx2 = table[ch];
3470*/
3471 if (idx2 == 0)
3472 /* This isn't a valid character. */
3473 continue;
3474 /* Compare only if the length matches and the collation rule
3475 index is the same. */
3476 if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
3477 {
3478 int cnt = 0;
3479
3480 while (cnt <= len &&
3481 weights[(idx1 & 0xffffff) + 1 + cnt]
3482 == weights[(idx2 & 0xffffff) + 1 + cnt])
3483 ++cnt;
3484
3485 if (cnt > len)
3486 bitset_set (sbcset, ch);
3487 }
3488 }
3489 /* Check whether the array has enough space. */
3490 if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
3491 {
3492 /* Not enough, realloc it. */
3493 /* +1 in case of mbcset->nequiv_classes is 0. */
3494 int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
3495 /* Use realloc since the array is NULL if *alloc == 0. */
3496 int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
3497 int32_t,
3498 new_equiv_class_alloc);
3499 if (BE (new_equiv_classes == NULL, 0))
3500 return REG_ESPACE;
3501 mbcset->equiv_classes = new_equiv_classes;
3502 *equiv_class_alloc = new_equiv_class_alloc;
3503 }
3504 mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
3505 }
3506 else
3507#endif /* _LIBC */
3508 {
3509 if (BE (strlen ((const char *) name) != 1, 0))
3510 return REG_ECOLLATE;
3511 bitset_set (sbcset, *name);
3512 }
3513 return REG_NOERROR;
3514}
3515
3516 /* Helper function for parse_bracket_exp.
3517 Build the character class which is represented by NAME.
3518 The result are written to MBCSET and SBCSET.
3519 CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
3520 is a pointer argument since we may update it. */
3521
3522static reg_errcode_t
3523#ifdef RE_ENABLE_I18N
3524build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3525 re_charset_t *mbcset, int *char_class_alloc,
3526 const char *class_name, reg_syntax_t syntax)
3527#else /* not RE_ENABLE_I18N */
3528build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3529 const char *class_name, reg_syntax_t syntax)
3530#endif /* not RE_ENABLE_I18N */
3531{
3532 int i;
3533
3534 /* In case of REG_ICASE "upper" and "lower" match the both of
3535 upper and lower cases. */
3536 if ((syntax & RE_ICASE)
3537 && (strcmp (class_name, "upper") == 0 || strcmp (class_name, "lower") == 0))
3538 class_name = "alpha";
3539
3540#ifdef RE_ENABLE_I18N
3541 /* Check the space of the arrays. */
3542 if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
3543 {
3544 /* Not enough, realloc it. */
3545 /* +1 in case of mbcset->nchar_classes is 0. */
3546 int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
3547 /* Use realloc since array is NULL if *alloc == 0. */
3548 wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
3549 new_char_class_alloc);
3550 if (BE (new_char_classes == NULL, 0))
3551 return REG_ESPACE;
3552 mbcset->char_classes = new_char_classes;
3553 *char_class_alloc = new_char_class_alloc;
3554 }
3555 mbcset->char_classes[mbcset->nchar_classes++] = __wctype (class_name);
3556#endif /* RE_ENABLE_I18N */
3557
3558#define BUILD_CHARCLASS_LOOP(ctype_func) \
3559 do { \
3560 if (BE (trans != NULL, 0)) \
3561 { \
3562 for (i = 0; i < SBC_MAX; ++i) \
3563 if (ctype_func (i)) \
3564 bitset_set (sbcset, trans[i]); \
3565 } \
3566 else \
3567 { \
3568 for (i = 0; i < SBC_MAX; ++i) \
3569 if (ctype_func (i)) \
3570 bitset_set (sbcset, i); \
3571 } \
3572 } while (0)
3573
3574 if (strcmp (class_name, "alnum") == 0)
3575 BUILD_CHARCLASS_LOOP (isalnum);
3576 else if (strcmp (class_name, "cntrl") == 0)
3577 BUILD_CHARCLASS_LOOP (iscntrl);
3578 else if (strcmp (class_name, "lower") == 0)
3579 BUILD_CHARCLASS_LOOP (islower);
3580 else if (strcmp (class_name, "space") == 0)
3581 BUILD_CHARCLASS_LOOP (isspace);
3582 else if (strcmp (class_name, "alpha") == 0)
3583 BUILD_CHARCLASS_LOOP (isalpha);
3584 else if (strcmp (class_name, "digit") == 0)
3585 BUILD_CHARCLASS_LOOP (isdigit);
3586 else if (strcmp (class_name, "print") == 0)
3587 BUILD_CHARCLASS_LOOP (isprint);
3588 else if (strcmp (class_name, "upper") == 0)
3589 BUILD_CHARCLASS_LOOP (isupper);
3590 else if (strcmp (class_name, "blank") == 0)
3591#ifndef GAWK
3592 BUILD_CHARCLASS_LOOP (isblank);
3593#else
3594 /* see comments above */
3595 BUILD_CHARCLASS_LOOP (is_blank);
3596#endif
3597 else if (strcmp (class_name, "graph") == 0)
3598 BUILD_CHARCLASS_LOOP (isgraph);
3599 else if (strcmp (class_name, "punct") == 0)
3600 BUILD_CHARCLASS_LOOP (ispunct);
3601 else if (strcmp (class_name, "xdigit") == 0)
3602 BUILD_CHARCLASS_LOOP (isxdigit);
3603 else
3604 return REG_ECTYPE;
3605
3606 return REG_NOERROR;
3607}
3608
3609static bin_tree_t *
3610build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3611 const char *class_name,
3612 const char *extra, int non_match,
3613 reg_errcode_t *err)
3614{
3615 re_bitset_ptr_t sbcset;
3616#ifdef RE_ENABLE_I18N
3617 re_charset_t *mbcset;
3618 int alloc = 0;
3619#endif /* not RE_ENABLE_I18N */
3620 reg_errcode_t ret;
3621 re_token_t br_token;
3622 bin_tree_t *tree;
3623
3624 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3625#ifdef RE_ENABLE_I18N
3626 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3627#endif /* RE_ENABLE_I18N */
3628
3629#ifdef RE_ENABLE_I18N
3630 if (BE (sbcset == NULL || mbcset == NULL, 0))
3631#else /* not RE_ENABLE_I18N */
3632 if (BE (sbcset == NULL, 0))
3633#endif /* not RE_ENABLE_I18N */
3634 {
3635 *err = REG_ESPACE;
3636 return NULL;
3637 }
3638
3639 if (non_match)
3640 {
3641#ifdef RE_ENABLE_I18N
3642 mbcset->non_match = 1;
3643#endif /* not RE_ENABLE_I18N */
3644 }
3645
3646 /* We don't care the syntax in this case. */
3647 ret = build_charclass (trans, sbcset,
3648#ifdef RE_ENABLE_I18N
3649 mbcset, &alloc,
3650#endif /* RE_ENABLE_I18N */
3651 class_name, 0);
3652
3653 if (BE (ret != REG_NOERROR, 0))
3654 {
3655 re_free (sbcset);
3656#ifdef RE_ENABLE_I18N
3657 free_charset (mbcset);
3658#endif /* RE_ENABLE_I18N */
3659 *err = ret;
3660 return NULL;
3661 }
3662 /* \w match '_' also. */
3663 for (; *extra; extra++)
3664 bitset_set (sbcset, *extra);
3665
3666 /* If it is non-matching list. */
3667 if (non_match)
3668 bitset_not (sbcset);
3669
3670#ifdef RE_ENABLE_I18N
3671 /* Ensure only single byte characters are set. */
3672 if (dfa->mb_cur_max > 1)
3673 bitset_mask (sbcset, dfa->sb_char);
3674#endif
3675
3676 /* Build a tree for simple bracket. */
3677 br_token.type = SIMPLE_BRACKET;
3678 br_token.opr.sbcset = sbcset;
3679 tree = create_token_tree (dfa, NULL, NULL, &br_token);
3680 if (BE (tree == NULL, 0))
3681 goto build_word_op_espace;
3682
3683#ifdef RE_ENABLE_I18N
3684 if (dfa->mb_cur_max > 1)
3685 {
3686 bin_tree_t *mbc_tree;
3687 /* Build a tree for complex bracket. */
3688 br_token.type = COMPLEX_BRACKET;
3689 br_token.opr.mbcset = mbcset;
3690 dfa->has_mb_node = 1;
3691 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3692 if (BE (mbc_tree == NULL, 0))
3693 goto build_word_op_espace;
3694 /* Then join them by ALT node. */
3695 tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
3696 if (BE (mbc_tree != NULL, 1))
3697 return tree;
3698 }
3699 else
3700 {
3701 free_charset (mbcset);
3702 return tree;
3703 }
3704#else /* not RE_ENABLE_I18N */
3705 return tree;
3706#endif /* not RE_ENABLE_I18N */
3707
3708 build_word_op_espace:
3709 re_free (sbcset);
3710#ifdef RE_ENABLE_I18N
3711 free_charset (mbcset);
3712#endif /* RE_ENABLE_I18N */
3713 *err = REG_ESPACE;
3714 return NULL;
3715}
3716
3717/* This is intended for the expressions like "a{1,3}".
3718 Fetch a number from `input', and return the number.
3719 Return -1, if the number field is empty like "{,1}".
3720 Return -2, if an error has occurred. */
3721
3722static int
3723fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
3724{
3725 int num = -1;
3726 unsigned char c;
3727 while (1)
3728 {
3729 fetch_token (token, input, syntax);
3730 c = token->opr.c;
3731 if (BE (token->type == END_OF_RE, 0))
3732 return -2;
3733 if (token->type == OP_CLOSE_DUP_NUM || c == ',')
3734 break;
3735 num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
3736 ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
3737 num = (num > RE_DUP_MAX) ? -2 : num;
3738 }
3739 return num;
3740}
3741
3742#ifdef RE_ENABLE_I18N
3743static void
3744free_charset (re_charset_t *cset)
3745{
3746 re_free (cset->mbchars);
3747# ifdef _LIBC
3748 re_free (cset->coll_syms);
3749 re_free (cset->equiv_classes);
3750 re_free (cset->range_starts);
3751 re_free (cset->range_ends);
3752# endif
3753 re_free (cset->char_classes);
3754 re_free (cset);
3755}
3756#endif /* RE_ENABLE_I18N */
3757
3758/* Functions for binary tree operation. */
3759
3760/* Create a tree node. */
3761
3762static bin_tree_t *
3763create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3764 re_token_type_t type)
3765{
3766 re_token_t t;
3767 t.type = type;
3768 return create_token_tree (dfa, left, right, &t);
3769}
3770
3771static bin_tree_t *
3772create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3773 const re_token_t *token)
3774{
3775 bin_tree_t *tree;
3776 if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
3777 {
3778 bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
3779
3780 if (storage == NULL)
3781 return NULL;
3782 storage->next = dfa->str_tree_storage;
3783 dfa->str_tree_storage = storage;
3784 dfa->str_tree_storage_idx = 0;
3785 }
3786 tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
3787
3788 tree->parent = NULL;
3789 tree->left = left;
3790 tree->right = right;
3791 tree->token = *token;
3792 tree->token.duplicated = 0;
3793 tree->token.opt_subexp = 0;
3794 tree->first = NULL;
3795 tree->next = NULL;
3796 tree->node_idx = -1;
3797
3798 if (left != NULL)
3799 left->parent = tree;
3800 if (right != NULL)
3801 right->parent = tree;
3802 return tree;
3803}
3804
3805/* Mark the tree SRC as an optional subexpression.
3806 To be called from preorder or postorder. */
3807
3808static reg_errcode_t
3809mark_opt_subexp (void *extra, bin_tree_t *node)
3810{
3811 int idx = (int) (intptr_t) extra;
3812 if (node->token.type == SUBEXP && node->token.opr.idx == idx)
3813 node->token.opt_subexp = 1;
3814
3815 return REG_NOERROR;
3816}
3817
3818/* Free the allocated memory inside NODE. */
3819
3820static void
3821free_token (re_token_t *node)
3822{
3823#ifdef RE_ENABLE_I18N
3824 if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
3825 free_charset (node->opr.mbcset);
3826 else
3827#endif /* RE_ENABLE_I18N */
3828 if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
3829 re_free (node->opr.sbcset);
3830}
3831
3832/* Worker function for tree walking. Free the allocated memory inside NODE
3833 and its children. */
3834
3835static reg_errcode_t
3836free_tree (UNUSED_PARAM void *extra, bin_tree_t *node)
3837{
3838 free_token (&node->token);
3839 return REG_NOERROR;
3840}
3841
3842
3843/* Duplicate the node SRC, and return new node. This is a preorder
3844 visit similar to the one implemented by the generic visitor, but
3845 we need more infrastructure to maintain two parallel trees --- so,
3846 it's easier to duplicate. */
3847
3848static bin_tree_t *
3849duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
3850{
3851 const bin_tree_t *node;
3852 bin_tree_t *dup_root;
3853 bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
3854
3855 for (node = root; ; )
3856 {
3857 /* Create a new tree and link it back to the current parent. */
3858 *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
3859 if (*p_new == NULL)
3860 return NULL;
3861 (*p_new)->parent = dup_node;
3862 (*p_new)->token.duplicated = 1;
3863 dup_node = *p_new;
3864
3865 /* Go to the left node, or up and to the right. */
3866 if (node->left)
3867 {
3868 node = node->left;
3869 p_new = &dup_node->left;
3870 }
3871 else
3872 {
3873 const bin_tree_t *prev = NULL;
3874 while (node->right == prev || node->right == NULL)
3875 {
3876 prev = node;
3877 node = node->parent;
3878 dup_node = dup_node->parent;
3879 if (!node)
3880 return dup_root;
3881 }
3882 node = node->right;
3883 p_new = &dup_node->right;
3884 }
3885 }
3886}
diff --git a/win32/regex.c b/win32/regex.c
new file mode 100644
index 000000000..e40a2ea01
--- /dev/null
+++ b/win32/regex.c
@@ -0,0 +1,90 @@
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA. */
20
21#define HAVE_LIBINTL_H 0
22#define ENABLE_NLS 0
23#define HAVE_ALLOCA 0
24#define NO_MBSUPPORT 1
25#define GAWK 1
26
27/* Make sure no one compiles this code with a C++ compiler. */
28#ifdef __cplusplus
29# error "This is C code, use a C compiler"
30#endif
31
32#ifdef _LIBC
33/* We have to keep the namespace clean. */
34# define regfree(preg) __regfree (preg)
35# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
36# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
37# define regerror(errcode, preg, errbuf, errbuf_size) \
38 __regerror(errcode, preg, errbuf, errbuf_size)
39# define re_set_registers(bu, re, nu, st, en) \
40 __re_set_registers (bu, re, nu, st, en)
41# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
42 __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
43# define re_match(bufp, string, size, pos, regs) \
44 __re_match (bufp, string, size, pos, regs)
45# define re_search(bufp, string, size, startpos, range, regs) \
46 __re_search (bufp, string, size, startpos, range, regs)
47# define re_compile_pattern(pattern, length, bufp) \
48 __re_compile_pattern (pattern, length, bufp)
49# define re_set_syntax(syntax) __re_set_syntax (syntax)
50# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
51 __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
52# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
53
54# include "../locale/localeinfo.h"
55#endif
56
57#if defined (_MSC_VER)
58#include <stdio.h> /* for size_t */
59#endif
60
61/* On some systems, limits.h sets RE_DUP_MAX to a lower value than
62 GNU regex allows. Include it before <regex.h>, which correctly
63 #undefs RE_DUP_MAX and sets it to the right value. */
64#include <limits.h>
65#include <stdint.h>
66
67#ifdef GAWK
68#undef alloca
69#define alloca alloca_is_bad_you_should_never_use_it
70#endif
71#include <regex.h>
72#include "regex_internal.h"
73
74#include "regex_internal.c"
75#ifdef GAWK
76#define bool int
77#define true (1)
78#define false (0)
79#endif
80#include "regcomp.c"
81#include "regexec.c"
82
83/* Binary backward compatibility. */
84#ifdef _LIBC
85# include <shlib-compat.h>
86# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
87link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
88int re_max_failures = 2000;
89# endif
90#endif
diff --git a/win32/regex.h b/win32/regex.h
new file mode 100644
index 000000000..61c968387
--- /dev/null
+++ b/win32/regex.h
@@ -0,0 +1,582 @@
1#include <stdio.h>
2#include <stddef.h>
3
4/* Definitions for data structures and routines for the regular
5 expression library.
6 Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006,2008
7 Free Software Foundation, Inc.
8 This file is part of the GNU C Library.
9
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
14
15 The GNU C Library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License for more details.
19
20 You should have received a copy of the GNU Lesser General Public
21 License along with the GNU C Library; if not, write to the Free
22 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 02110-1301 USA. */
24
25#ifndef _REGEX_H
26#define _REGEX_H 1
27
28#ifdef HAVE_STDDEF_H
29#include <stddef.h>
30#endif
31
32#ifdef HAVE_SYS_TYPES_H
33#include <sys/types.h>
34#endif
35
36#ifndef _LIBC
37#define __USE_GNU 1
38#endif
39
40/* Allow the use in C++ code. */
41#ifdef __cplusplus
42extern "C" {
43#endif
44
45/* The following two types have to be signed and unsigned integer type
46 wide enough to hold a value of a pointer. For most ANSI compilers
47 ptrdiff_t and size_t should be likely OK. Still size of these two
48 types is 2 for Microsoft C. Ugh... */
49typedef long int s_reg_t;
50typedef unsigned long int active_reg_t;
51
52/* The following bits are used to determine the regexp syntax we
53 recognize. The set/not-set meanings are chosen so that Emacs syntax
54 remains the value 0. The bits are given in alphabetical order, and
55 the definitions shifted by one from the previous bit; thus, when we
56 add or remove a bit, only one other definition need change. */
57typedef unsigned long int reg_syntax_t;
58
59#ifdef __USE_GNU
60/* If this bit is not set, then \ inside a bracket expression is literal.
61 If set, then such a \ quotes the following character. */
62# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
63
64/* If this bit is not set, then + and ? are operators, and \+ and \? are
65 literals.
66 If set, then \+ and \? are operators and + and ? are literals. */
67# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
68
69/* If this bit is set, then character classes are supported. They are:
70 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
71 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
72 If not set, then character classes are not supported. */
73# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
74
75/* If this bit is set, then ^ and $ are always anchors (outside bracket
76 expressions, of course).
77 If this bit is not set, then it depends:
78 ^ is an anchor if it is at the beginning of a regular
79 expression or after an open-group or an alternation operator;
80 $ is an anchor if it is at the end of a regular expression, or
81 before a close-group or an alternation operator.
82
83 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
84 POSIX draft 11.2 says that * etc. in leading positions is undefined.
85 We already implemented a previous draft which made those constructs
86 invalid, though, so we haven't changed the code back. */
87# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
88
89/* If this bit is set, then special characters are always special
90 regardless of where they are in the pattern.
91 If this bit is not set, then special characters are special only in
92 some contexts; otherwise they are ordinary. Specifically,
93 * + ? and intervals are only special when not after the beginning,
94 open-group, or alternation operator. */
95# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
96
97/* If this bit is set, then *, +, ?, and { cannot be first in an re or
98 immediately after an alternation or begin-group operator. */
99# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
100
101/* If this bit is set, then . matches newline.
102 If not set, then it doesn't. */
103# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
104
105/* If this bit is set, then . doesn't match NUL.
106 If not set, then it does. */
107# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
108
109/* If this bit is set, nonmatching lists [^...] do not match newline.
110 If not set, they do. */
111# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
112
113/* If this bit is set, either \{...\} or {...} defines an
114 interval, depending on RE_NO_BK_BRACES.
115 If not set, \{, \}, {, and } are literals. */
116# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
117
118/* If this bit is set, +, ? and | aren't recognized as operators.
119 If not set, they are. */
120# define RE_LIMITED_OPS (RE_INTERVALS << 1)
121
122/* If this bit is set, newline is an alternation operator.
123 If not set, newline is literal. */
124# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
125
126/* If this bit is set, then `{...}' defines an interval, and \{ and \}
127 are literals.
128 If not set, then `\{...\}' defines an interval. */
129# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
130
131/* If this bit is set, (...) defines a group, and \( and \) are literals.
132 If not set, \(...\) defines a group, and ( and ) are literals. */
133# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
134
135/* If this bit is set, then \<digit> matches <digit>.
136 If not set, then \<digit> is a back-reference. */
137# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
138
139/* If this bit is set, then | is an alternation operator, and \| is literal.
140 If not set, then \| is an alternation operator, and | is literal. */
141# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
142
143/* If this bit is set, then an ending range point collating higher
144 than the starting range point, as in [z-a], is invalid.
145 If not set, then when ending range point collates higher than the
146 starting range point, the range is ignored. */
147# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
148
149/* If this bit is set, then an unmatched ) is ordinary.
150 If not set, then an unmatched ) is invalid. */
151# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
152
153/* If this bit is set, succeed as soon as we match the whole pattern,
154 without further backtracking. */
155# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
156
157/* If this bit is set, do not process the GNU regex operators.
158 If not set, then the GNU regex operators are recognized. */
159# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
160
161/* If this bit is set, a syntactically invalid interval is treated as
162 a string of ordinary characters. For example, the ERE 'a{1' is
163 treated as 'a\{1'. */
164# define RE_INVALID_INTERVAL_ORD (RE_NO_GNU_OPS << 1)
165
166/* If this bit is set, then ignore case when matching.
167 If not set, then case is significant. */
168# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
169
170/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
171 for ^, because it is difficult to scan the regex backwards to find
172 whether ^ should be special. */
173# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
174
175/* If this bit is set, then \{ cannot be first in an bre or
176 immediately after an alternation or begin-group operator. */
177# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
178
179/* If this bit is set, then no_sub will be set to 1 during
180 re_compile_pattern. */
181#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
182#endif
183
184/* This global variable defines the particular regexp syntax to use (for
185 some interfaces). When a regexp is compiled, the syntax used is
186 stored in the pattern buffer, so changing this does not affect
187 already-compiled regexps. */
188extern reg_syntax_t re_syntax_options;
189
190#ifdef __USE_GNU
191/* Define combinations of the above bits for the standard possibilities.
192 (The [[[ comments delimit what gets put into the Texinfo file, so
193 don't delete them!) */
194/* [[[begin syntaxes]]] */
195#define RE_SYNTAX_EMACS 0
196
197#define RE_SYNTAX_AWK \
198 (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
199 | RE_NO_BK_PARENS | RE_NO_BK_REFS \
200 | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
201 | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
202 | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
203
204#define RE_SYNTAX_GNU_AWK \
205 ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
206 | RE_INVALID_INTERVAL_ORD) \
207 & ~(RE_DOT_NOT_NULL | RE_CONTEXT_INDEP_OPS \
208 | RE_CONTEXT_INVALID_OPS ))
209
210#define RE_SYNTAX_POSIX_AWK \
211 (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
212 | RE_INTERVALS | RE_NO_GNU_OPS \
213 | RE_INVALID_INTERVAL_ORD)
214
215#define RE_SYNTAX_GREP \
216 (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
217 | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
218 | RE_NEWLINE_ALT)
219
220#define RE_SYNTAX_EGREP \
221 (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
222 | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
223 | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
224 | RE_NO_BK_VBAR)
225
226#define RE_SYNTAX_POSIX_EGREP \
227 (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
228 | RE_INVALID_INTERVAL_ORD)
229
230/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
231#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
232
233#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
234
235/* Syntax bits common to both basic and extended POSIX regex syntax. */
236#define _RE_SYNTAX_POSIX_COMMON \
237 (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
238 | RE_INTERVALS | RE_NO_EMPTY_RANGES)
239
240#define RE_SYNTAX_POSIX_BASIC \
241 (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
242
243/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
244 RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
245 isn't minimal, since other operators, such as \`, aren't disabled. */
246#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
247 (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
248
249#define RE_SYNTAX_POSIX_EXTENDED \
250 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
251 | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
252 | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
253 | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
254
255/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
256 removed and RE_NO_BK_REFS is added. */
257#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
258 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
259 | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
260 | RE_NO_BK_PARENS | RE_NO_BK_REFS \
261 | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
262/* [[[end syntaxes]]] */
263
264/* Maximum number of duplicates an interval can allow. Some systems
265 (erroneously) define this in other header files, but we want our
266 value, so remove any previous define. */
267# ifdef RE_DUP_MAX
268# undef RE_DUP_MAX
269# endif
270/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
271# define RE_DUP_MAX (0x7fff)
272#endif
273
274
275/* POSIX `cflags' bits (i.e., information for `regcomp'). */
276
277/* If this bit is set, then use extended regular expression syntax.
278 If not set, then use basic regular expression syntax. */
279#define REG_EXTENDED 1
280
281/* If this bit is set, then ignore case when matching.
282 If not set, then case is significant. */
283#define REG_ICASE (REG_EXTENDED << 1)
284
285/* If this bit is set, then anchors do not match at newline
286 characters in the string.
287 If not set, then anchors do match at newlines. */
288#define REG_NEWLINE (REG_ICASE << 1)
289
290/* If this bit is set, then report only success or fail in regexec.
291 If not set, then returns differ between not matching and errors. */
292#define REG_NOSUB (REG_NEWLINE << 1)
293
294
295/* POSIX `eflags' bits (i.e., information for regexec). */
296
297/* If this bit is set, then the beginning-of-line operator doesn't match
298 the beginning of the string (presumably because it's not the
299 beginning of a line).
300 If not set, then the beginning-of-line operator does match the
301 beginning of the string. */
302#define REG_NOTBOL 1
303
304/* Like REG_NOTBOL, except for the end-of-line. */
305#define REG_NOTEOL (1 << 1)
306
307/* Use PMATCH[0] to delimit the start and end of the search in the
308 buffer. */
309#define REG_STARTEND (1 << 2)
310
311
312/* If any error codes are removed, changed, or added, update the
313 `re_error_msg' table in regex.c. */
314typedef enum
315{
316#if defined _XOPEN_SOURCE || defined __USE_XOPEN2K
317 REG_ENOSYS = -1, /* This will never happen for this implementation. */
318#endif
319
320 REG_NOERROR = 0, /* Success. */
321 REG_NOMATCH, /* Didn't find a match (for regexec). */
322
323 /* POSIX regcomp return error codes. (In the order listed in the
324 standard.) */
325 REG_BADPAT, /* Invalid pattern. */
326 REG_ECOLLATE, /* Inalid collating element. */
327 REG_ECTYPE, /* Invalid character class name. */
328 REG_EESCAPE, /* Trailing backslash. */
329 REG_ESUBREG, /* Invalid back reference. */
330 REG_EBRACK, /* Unmatched left bracket. */
331 REG_EPAREN, /* Parenthesis imbalance. */
332 REG_EBRACE, /* Unmatched \{. */
333 REG_BADBR, /* Invalid contents of \{\}. */
334 REG_ERANGE, /* Invalid range end. */
335 REG_ESPACE, /* Ran out of memory. */
336 REG_BADRPT, /* No preceding re for repetition op. */
337
338 /* Error codes we've added. */
339 REG_EEND, /* Premature end. */
340 REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
341 REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
342} reg_errcode_t;
343
344/* This data structure represents a compiled pattern. Before calling
345 the pattern compiler, the fields `buffer', `allocated', `fastmap',
346 `translate', and `no_sub' can be set. After the pattern has been
347 compiled, the `re_nsub' field is available. All other fields are
348 private to the regex routines. */
349
350#ifndef RE_TRANSLATE_TYPE
351# define __RE_TRANSLATE_TYPE unsigned char *
352# ifdef __USE_GNU
353# define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE
354# endif
355#endif
356
357#ifdef __USE_GNU
358# define __REPB_PREFIX(name) name
359#else
360# define __REPB_PREFIX(name) __##name
361#endif
362
363struct re_pattern_buffer
364{
365 /* Space that holds the compiled pattern. It is declared as
366 `unsigned char *' because its elements are sometimes used as
367 array indexes. */
368 unsigned char *__REPB_PREFIX(buffer);
369
370 /* Number of bytes to which `buffer' points. */
371 unsigned long int __REPB_PREFIX(allocated);
372
373 /* Number of bytes actually used in `buffer'. */
374 unsigned long int __REPB_PREFIX(used);
375
376 /* Syntax setting with which the pattern was compiled. */
377 reg_syntax_t __REPB_PREFIX(syntax);
378
379 /* Pointer to a fastmap, if any, otherwise zero. re_search uses the
380 fastmap, if there is one, to skip over impossible starting points
381 for matches. */
382 char *__REPB_PREFIX(fastmap);
383
384 /* Either a translate table to apply to all characters before
385 comparing them, or zero for no translation. The translation is
386 applied to a pattern when it is compiled and to a string when it
387 is matched. */
388 __RE_TRANSLATE_TYPE __REPB_PREFIX(translate);
389
390 /* Number of subexpressions found by the compiler. */
391 size_t re_nsub;
392
393 /* Zero if this pattern cannot match the empty string, one else.
394 Well, in truth it's used only in `re_search_2', to see whether or
395 not we should use the fastmap, so we don't set this absolutely
396 perfectly; see `re_compile_fastmap' (the `duplicate' case). */
397 unsigned __REPB_PREFIX(can_be_null) : 1;
398
399 /* If REGS_UNALLOCATED, allocate space in the `regs' structure
400 for `max (RE_NREGS, re_nsub + 1)' groups.
401 If REGS_REALLOCATE, reallocate space if necessary.
402 If REGS_FIXED, use what's there. */
403#ifdef __USE_GNU
404# define REGS_UNALLOCATED 0
405# define REGS_REALLOCATE 1
406# define REGS_FIXED 2
407#endif
408 unsigned __REPB_PREFIX(regs_allocated) : 2;
409
410 /* Set to zero when `regex_compile' compiles a pattern; set to one
411 by `re_compile_fastmap' if it updates the fastmap. */
412 unsigned __REPB_PREFIX(fastmap_accurate) : 1;
413
414 /* If set, `re_match_2' does not return information about
415 subexpressions. */
416 unsigned __REPB_PREFIX(no_sub) : 1;
417
418 /* If set, a beginning-of-line anchor doesn't match at the beginning
419 of the string. */
420 unsigned __REPB_PREFIX(not_bol) : 1;
421
422 /* Similarly for an end-of-line anchor. */
423 unsigned __REPB_PREFIX(not_eol) : 1;
424
425 /* If true, an anchor at a newline matches. */
426 unsigned __REPB_PREFIX(newline_anchor) : 1;
427};
428
429typedef struct re_pattern_buffer regex_t;
430
431/* Type for byte offsets within the string. POSIX mandates this. */
432typedef int regoff_t;
433
434
435#ifdef __USE_GNU
436/* This is the structure we store register match data in. See
437 regex.texinfo for a full description of what registers match. */
438struct re_registers
439{
440 unsigned num_regs;
441 regoff_t *start;
442 regoff_t *end;
443};
444
445
446/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
447 `re_match_2' returns information about at least this many registers
448 the first time a `regs' structure is passed. */
449# ifndef RE_NREGS
450# define RE_NREGS 30
451# endif
452#endif
453
454
455/* POSIX specification for registers. Aside from the different names than
456 `re_registers', POSIX uses an array of structures, instead of a
457 structure of arrays. */
458typedef struct
459{
460 regoff_t rm_so; /* Byte offset from string's start to substring's start. */
461 regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
462} regmatch_t;
463
464/* Declarations for routines. */
465
466#ifdef __USE_GNU
467/* Sets the current default syntax to SYNTAX, and return the old syntax.
468 You can also simply assign to the `re_syntax_options' variable. */
469extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
470
471/* Compile the regular expression PATTERN, with length LENGTH
472 and syntax given by the global `re_syntax_options', into the buffer
473 BUFFER. Return NULL if successful, and an error string if not. */
474extern const char *re_compile_pattern (const char *__pattern, size_t __length,
475 struct re_pattern_buffer *__buffer);
476
477
478/* Compile a fastmap for the compiled pattern in BUFFER; used to
479 accelerate searches. Return 0 if successful and -2 if was an
480 internal error. */
481extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
482
483
484/* Search in the string STRING (with length LENGTH) for the pattern
485 compiled into BUFFER. Start searching at position START, for RANGE
486 characters. Return the starting position of the match, -1 for no
487 match, or -2 for an internal error. Also return register
488 information in REGS (if REGS and BUFFER->no_sub are nonzero). */
489extern int re_search (struct re_pattern_buffer *__buffer, const char *__cstring,
490 int __length, int __start, int __range,
491 struct re_registers *__regs);
492
493
494/* Like `re_search', but search in the concatenation of STRING1 and
495 STRING2. Also, stop searching at index START + STOP. */
496extern int re_search_2 (struct re_pattern_buffer *__buffer,
497 const char *__string1, int __length1,
498 const char *__string2, int __length2, int __start,
499 int __range, struct re_registers *__regs, int __stop);
500
501
502/* Like `re_search', but return how many characters in STRING the regexp
503 in BUFFER matched, starting at position START. */
504extern int re_match (struct re_pattern_buffer *__buffer, const char *__cstring,
505 int __length, int __start, struct re_registers *__regs);
506
507
508/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
509extern int re_match_2 (struct re_pattern_buffer *__buffer,
510 const char *__string1, int __length1,
511 const char *__string2, int __length2, int __start,
512 struct re_registers *__regs, int __stop);
513
514
515/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
516 ENDS. Subsequent matches using BUFFER and REGS will use this memory
517 for recording register information. STARTS and ENDS must be
518 allocated with malloc, and must each be at least `NUM_REGS * sizeof
519 (regoff_t)' bytes long.
520
521 If NUM_REGS == 0, then subsequent matches should allocate their own
522 register data.
523
524 Unless this function is called, the first search or match using
525 PATTERN_BUFFER will allocate its own register data, without
526 freeing the old data. */
527extern void re_set_registers (struct re_pattern_buffer *__buffer,
528 struct re_registers *__regs,
529 unsigned int __num_regs,
530 regoff_t *__starts, regoff_t *__ends);
531#endif /* Use GNU */
532
533#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD)
534# ifndef _CRAY
535/* 4.2 bsd compatibility. */
536extern char *re_comp (const char *);
537extern int re_exec (const char *);
538# endif
539#endif
540
541/* GCC 2.95 and later have "__restrict"; C99 compilers have
542 "restrict", and "configure" may have defined "restrict". */
543#ifndef __restrict
544# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
545# if defined restrict || 199901L <= __STDC_VERSION__
546# define __restrict restrict
547# else
548# define __restrict
549# endif
550# endif
551#endif
552/* gcc 3.1 and up support the [restrict] syntax. */
553#ifndef __restrict_arr
554# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \
555 && !defined __GNUG__
556# define __restrict_arr __restrict
557# else
558# define __restrict_arr
559# endif
560#endif
561
562/* POSIX compatibility. */
563extern int regcomp (regex_t *__restrict __preg,
564 const char *__restrict __pattern,
565 int __cflags);
566
567extern int regexec (const regex_t *__restrict __preg,
568 const char *__restrict __cstring, size_t __nmatch,
569 regmatch_t __pmatch[__restrict_arr],
570 int __eflags);
571
572extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
573 char *__restrict __errbuf, size_t __errbuf_size);
574
575extern void regfree (regex_t *__preg);
576
577
578#ifdef __cplusplus
579}
580#endif /* C++ */
581
582#endif /* regex.h */
diff --git a/win32/regex_internal.c b/win32/regex_internal.c
new file mode 100644
index 000000000..c33561743
--- /dev/null
+++ b/win32/regex_internal.c
@@ -0,0 +1,1744 @@
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002-2006, 2010 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA. */
20
21static void re_string_construct_common (const char *str, int len,
22 re_string_t *pstr,
23 RE_TRANSLATE_TYPE trans, int icase,
24 const re_dfa_t *dfa) internal_function;
25static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
26 const re_node_set *nodes,
27 unsigned int hash) internal_function;
28static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
29 const re_node_set *nodes,
30 unsigned int context,
31 unsigned int hash) internal_function;
32
33#ifdef GAWK
34#undef MAX /* safety */
35static int
36MAX(size_t a, size_t b)
37{
38 return (a > b ? a : b);
39}
40#endif
41
42/* Functions for string operation. */
43
44/* This function allocate the buffers. It is necessary to call
45 re_string_reconstruct before using the object. */
46
47static reg_errcode_t
48internal_function
49re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
50 RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
51{
52 reg_errcode_t ret;
53 int init_buf_len;
54
55 /* Ensure at least one character fits into the buffers. */
56 if (init_len < dfa->mb_cur_max)
57 init_len = dfa->mb_cur_max;
58 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
59 re_string_construct_common (str, len, pstr, trans, icase, dfa);
60
61 ret = re_string_realloc_buffers (pstr, init_buf_len);
62 if (BE (ret != REG_NOERROR, 0))
63 return ret;
64
65 pstr->word_char = dfa->word_char;
66 pstr->word_ops_used = dfa->word_ops_used;
67 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
68 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
69 pstr->valid_raw_len = pstr->valid_len;
70 return REG_NOERROR;
71}
72
73/* This function allocate the buffers, and initialize them. */
74
75static reg_errcode_t
76internal_function
77re_string_construct (re_string_t *pstr, const char *str, int len,
78 RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
79{
80 reg_errcode_t ret;
81 memset (pstr, '\0', sizeof (re_string_t));
82 re_string_construct_common (str, len, pstr, trans, icase, dfa);
83
84 if (len > 0)
85 {
86 ret = re_string_realloc_buffers (pstr, len + 1);
87 if (BE (ret != REG_NOERROR, 0))
88 return ret;
89 }
90 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
91
92 if (icase)
93 {
94#ifdef RE_ENABLE_I18N
95 if (dfa->mb_cur_max > 1)
96 {
97 while (1)
98 {
99 ret = build_wcs_upper_buffer (pstr);
100 if (BE (ret != REG_NOERROR, 0))
101 return ret;
102 if (pstr->valid_raw_len >= len)
103 break;
104 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
105 break;
106 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
107 if (BE (ret != REG_NOERROR, 0))
108 return ret;
109 }
110 }
111 else
112#endif /* RE_ENABLE_I18N */
113 build_upper_buffer (pstr);
114 }
115 else
116 {
117#ifdef RE_ENABLE_I18N
118 if (dfa->mb_cur_max > 1)
119 build_wcs_buffer (pstr);
120 else
121#endif /* RE_ENABLE_I18N */
122 {
123 if (trans != NULL)
124 re_string_translate_buffer (pstr);
125 else
126 {
127 pstr->valid_len = pstr->bufs_len;
128 pstr->valid_raw_len = pstr->bufs_len;
129 }
130 }
131 }
132
133 return REG_NOERROR;
134}
135
136/* Helper functions for re_string_allocate, and re_string_construct. */
137
138static reg_errcode_t
139internal_function
140re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
141{
142#ifdef RE_ENABLE_I18N
143 if (pstr->mb_cur_max > 1)
144 {
145 wint_t *new_wcs;
146
147 /* Avoid overflow in realloc. */
148 const size_t max_object_size = MAX (sizeof (wint_t), sizeof (int));
149 if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
150 return REG_ESPACE;
151
152 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
153 if (BE (new_wcs == NULL, 0))
154 return REG_ESPACE;
155 pstr->wcs = new_wcs;
156 if (pstr->offsets != NULL)
157 {
158 int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
159 if (BE (new_offsets == NULL, 0))
160 return REG_ESPACE;
161 pstr->offsets = new_offsets;
162 }
163 }
164#endif /* RE_ENABLE_I18N */
165 if (pstr->mbs_allocated)
166 {
167 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
168 new_buf_len);
169 if (BE (new_mbs == NULL, 0))
170 return REG_ESPACE;
171 pstr->mbs = new_mbs;
172 }
173 pstr->bufs_len = new_buf_len;
174 return REG_NOERROR;
175}
176
177
178static void
179internal_function
180re_string_construct_common (const char *str, int len, re_string_t *pstr,
181 RE_TRANSLATE_TYPE trans, int icase,
182 const re_dfa_t *dfa)
183{
184 pstr->raw_mbs = (const unsigned char *) str;
185 pstr->len = len;
186 pstr->raw_len = len;
187 pstr->trans = trans;
188 pstr->icase = icase ? 1 : 0;
189 pstr->mbs_allocated = (trans != NULL || icase);
190 pstr->mb_cur_max = dfa->mb_cur_max;
191 pstr->is_utf8 = dfa->is_utf8;
192 pstr->map_notascii = dfa->map_notascii;
193 pstr->stop = pstr->len;
194 pstr->raw_stop = pstr->stop;
195}
196
197#ifdef RE_ENABLE_I18N
198
199/* Build wide character buffer PSTR->WCS.
200 If the byte sequence of the string are:
201 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
202 Then wide character buffer will be:
203 <wc1> , WEOF , <wc2> , WEOF , <wc3>
204 We use WEOF for padding, they indicate that the position isn't
205 a first byte of a multibyte character.
206
207 Note that this function assumes PSTR->VALID_LEN elements are already
208 built and starts from PSTR->VALID_LEN. */
209
210static void
211internal_function
212build_wcs_buffer (re_string_t *pstr)
213{
214#ifdef _LIBC
215 unsigned char buf[MB_LEN_MAX];
216 assert (MB_LEN_MAX >= pstr->mb_cur_max);
217#else
218 unsigned char buf[64];
219#endif
220 mbstate_t prev_st;
221 int byte_idx, end_idx, remain_len;
222 size_t mbclen;
223
224 /* Build the buffers from pstr->valid_len to either pstr->len or
225 pstr->bufs_len. */
226 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
227 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
228 {
229 wchar_t wc;
230 const char *p;
231
232 remain_len = end_idx - byte_idx;
233 prev_st = pstr->cur_state;
234 /* Apply the translation if we need. */
235 if (BE (pstr->trans != NULL, 0))
236 {
237 int i, ch;
238
239 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
240 {
241 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
242 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
243 }
244 p = (const char *) buf;
245 }
246 else
247 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
248 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
249 if (BE (mbclen == (size_t) -2, 0))
250 {
251 /* The buffer doesn't have enough space, finish to build. */
252 pstr->cur_state = prev_st;
253 break;
254 }
255 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
256 {
257 /* We treat these cases as a singlebyte character. */
258 mbclen = 1;
259 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
260 if (BE (pstr->trans != NULL, 0))
261 wc = pstr->trans[wc];
262 pstr->cur_state = prev_st;
263 }
264
265 /* Write wide character and padding. */
266 pstr->wcs[byte_idx++] = wc;
267 /* Write paddings. */
268 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
269 pstr->wcs[byte_idx++] = WEOF;
270 }
271 pstr->valid_len = byte_idx;
272 pstr->valid_raw_len = byte_idx;
273}
274
275/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
276 but for REG_ICASE. */
277
278static reg_errcode_t
279internal_function
280build_wcs_upper_buffer (re_string_t *pstr)
281{
282 mbstate_t prev_st;
283 int src_idx, byte_idx, end_idx, remain_len;
284 size_t mbclen;
285#ifdef _LIBC
286 char buf[MB_LEN_MAX];
287 assert (MB_LEN_MAX >= pstr->mb_cur_max);
288#else
289 char buf[64];
290#endif
291
292 byte_idx = pstr->valid_len;
293 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
294
295 /* The following optimization assumes that ASCII characters can be
296 mapped to wide characters with a simple cast. */
297 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
298 {
299 while (byte_idx < end_idx)
300 {
301 wchar_t wc;
302
303 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
304 && mbsinit (&pstr->cur_state))
305 {
306 /* In case of a singlebyte character. */
307 pstr->mbs[byte_idx]
308 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
309 /* The next step uses the assumption that wchar_t is encoded
310 ASCII-safe: all ASCII values can be converted like this. */
311 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
312 ++byte_idx;
313 continue;
314 }
315
316 remain_len = end_idx - byte_idx;
317 prev_st = pstr->cur_state;
318 mbclen = __mbrtowc (&wc,
319 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
320 + byte_idx), remain_len, &pstr->cur_state);
321 if (BE (mbclen + 2 > 2, 1))
322 {
323 wchar_t wcu = wc;
324 if (iswlower (wc))
325 {
326 size_t mbcdlen;
327
328 wcu = towupper (wc);
329 mbcdlen = wcrtomb (buf, wcu, &prev_st);
330 if (BE (mbclen == mbcdlen, 1))
331 memcpy (pstr->mbs + byte_idx, buf, mbclen);
332 else
333 {
334 src_idx = byte_idx;
335 goto offsets_needed;
336 }
337 }
338 else
339 memcpy (pstr->mbs + byte_idx,
340 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
341 pstr->wcs[byte_idx++] = wcu;
342 /* Write paddings. */
343 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
344 pstr->wcs[byte_idx++] = WEOF;
345 }
346 else if (mbclen == (size_t) -1 || mbclen == 0)
347 {
348 /* It is an invalid character or '\0'. Just use the byte. */
349 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
350 pstr->mbs[byte_idx] = ch;
351 /* And also cast it to wide char. */
352 pstr->wcs[byte_idx++] = (wchar_t) ch;
353 if (BE (mbclen == (size_t) -1, 0))
354 pstr->cur_state = prev_st;
355 }
356 else
357 {
358 /* The buffer doesn't have enough space, finish to build. */
359 pstr->cur_state = prev_st;
360 break;
361 }
362 }
363 pstr->valid_len = byte_idx;
364 pstr->valid_raw_len = byte_idx;
365 return REG_NOERROR;
366 }
367 else
368 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
369 {
370 wchar_t wc;
371 const char *p;
372 offsets_needed:
373 remain_len = end_idx - byte_idx;
374 prev_st = pstr->cur_state;
375 if (BE (pstr->trans != NULL, 0))
376 {
377 int i, ch;
378
379 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
380 {
381 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
382 buf[i] = pstr->trans[ch];
383 }
384 p = (const char *) buf;
385 }
386 else
387 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
388 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
389 if (BE (mbclen + 2 > 2, 1))
390 {
391 wchar_t wcu = wc;
392 if (iswlower (wc))
393 {
394 size_t mbcdlen;
395
396 wcu = towupper (wc);
397 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
398 if (BE (mbclen == mbcdlen, 1))
399 memcpy (pstr->mbs + byte_idx, buf, mbclen);
400 else if (mbcdlen != (size_t) -1)
401 {
402 size_t i;
403
404 if (byte_idx + mbcdlen > pstr->bufs_len)
405 {
406 pstr->cur_state = prev_st;
407 break;
408 }
409
410 if (pstr->offsets == NULL)
411 {
412 pstr->offsets = re_malloc (int, pstr->bufs_len);
413
414 if (pstr->offsets == NULL)
415 return REG_ESPACE;
416 }
417 if (!pstr->offsets_needed)
418 {
419 for (i = 0; i < (size_t) byte_idx; ++i)
420 pstr->offsets[i] = i;
421 pstr->offsets_needed = 1;
422 }
423
424 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
425 pstr->wcs[byte_idx] = wcu;
426 pstr->offsets[byte_idx] = src_idx;
427 for (i = 1; i < mbcdlen; ++i)
428 {
429 pstr->offsets[byte_idx + i]
430 = src_idx + (i < mbclen ? i : mbclen - 1);
431 pstr->wcs[byte_idx + i] = WEOF;
432 }
433 pstr->len += mbcdlen - mbclen;
434 if (pstr->raw_stop > src_idx)
435 pstr->stop += mbcdlen - mbclen;
436 end_idx = (pstr->bufs_len > pstr->len)
437 ? pstr->len : pstr->bufs_len;
438 byte_idx += mbcdlen;
439 src_idx += mbclen;
440 continue;
441 }
442 else
443 memcpy (pstr->mbs + byte_idx, p, mbclen);
444 }
445 else
446 memcpy (pstr->mbs + byte_idx, p, mbclen);
447
448 if (BE (pstr->offsets_needed != 0, 0))
449 {
450 size_t i;
451 for (i = 0; i < mbclen; ++i)
452 pstr->offsets[byte_idx + i] = src_idx + i;
453 }
454 src_idx += mbclen;
455
456 pstr->wcs[byte_idx++] = wcu;
457 /* Write paddings. */
458 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
459 pstr->wcs[byte_idx++] = WEOF;
460 }
461 else if (mbclen == (size_t) -1 || mbclen == 0)
462 {
463 /* It is an invalid character or '\0'. Just use the byte. */
464 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
465
466 if (BE (pstr->trans != NULL, 0))
467 ch = pstr->trans [ch];
468 pstr->mbs[byte_idx] = ch;
469
470 if (BE (pstr->offsets_needed != 0, 0))
471 pstr->offsets[byte_idx] = src_idx;
472 ++src_idx;
473
474 /* And also cast it to wide char. */
475 pstr->wcs[byte_idx++] = (wchar_t) ch;
476 if (BE (mbclen == (size_t) -1, 0))
477 pstr->cur_state = prev_st;
478 }
479 else
480 {
481 /* The buffer doesn't have enough space, finish to build. */
482 pstr->cur_state = prev_st;
483 break;
484 }
485 }
486 pstr->valid_len = byte_idx;
487 pstr->valid_raw_len = src_idx;
488 return REG_NOERROR;
489}
490
491/* Skip characters until the index becomes greater than NEW_RAW_IDX.
492 Return the index. */
493
494static int
495internal_function
496re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
497{
498 mbstate_t prev_st;
499 int rawbuf_idx;
500 size_t mbclen;
501 wint_t wc = WEOF;
502
503 /* Skip the characters which are not necessary to check. */
504 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
505 rawbuf_idx < new_raw_idx;)
506 {
507 wchar_t wc2;
508 int remain_len = pstr->len - rawbuf_idx;
509 prev_st = pstr->cur_state;
510 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
511 remain_len, &pstr->cur_state);
512 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
513 {
514 /* We treat these cases as a single byte character. */
515 if (mbclen == 0 || remain_len == 0)
516 wc = L'\0';
517 else
518 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
519 mbclen = 1;
520 pstr->cur_state = prev_st;
521 }
522 else
523 wc = (wint_t) wc2;
524 /* Then proceed the next character. */
525 rawbuf_idx += mbclen;
526 }
527 *last_wc = (wint_t) wc;
528 return rawbuf_idx;
529}
530#endif /* RE_ENABLE_I18N */
531
532/* Build the buffer PSTR->MBS, and apply the translation if we need.
533 This function is used in case of REG_ICASE. */
534
535static void
536internal_function
537build_upper_buffer (re_string_t *pstr)
538{
539 int char_idx, end_idx;
540 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
541
542 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
543 {
544 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
545 if (BE (pstr->trans != NULL, 0))
546 ch = pstr->trans[ch];
547 if (islower (ch))
548 pstr->mbs[char_idx] = toupper (ch);
549 else
550 pstr->mbs[char_idx] = ch;
551 }
552 pstr->valid_len = char_idx;
553 pstr->valid_raw_len = char_idx;
554}
555
556/* Apply TRANS to the buffer in PSTR. */
557
558static void
559internal_function
560re_string_translate_buffer (re_string_t *pstr)
561{
562 int buf_idx, end_idx;
563 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
564
565 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
566 {
567 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
568 pstr->mbs[buf_idx] = pstr->trans[ch];
569 }
570
571 pstr->valid_len = buf_idx;
572 pstr->valid_raw_len = buf_idx;
573}
574
575/* This function re-construct the buffers.
576 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
577 convert to upper case in case of REG_ICASE, apply translation. */
578
579static reg_errcode_t
580internal_function
581re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
582{
583 int offset = idx - pstr->raw_mbs_idx;
584 if (BE (offset < 0, 0))
585 {
586 /* Reset buffer. */
587#ifdef RE_ENABLE_I18N
588 if (pstr->mb_cur_max > 1)
589 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
590#endif /* RE_ENABLE_I18N */
591 pstr->len = pstr->raw_len;
592 pstr->stop = pstr->raw_stop;
593 pstr->valid_len = 0;
594 pstr->raw_mbs_idx = 0;
595 pstr->valid_raw_len = 0;
596 pstr->offsets_needed = 0;
597 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
598 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
599 if (!pstr->mbs_allocated)
600 pstr->mbs = (unsigned char *) pstr->raw_mbs;
601 offset = idx;
602 }
603
604 if (BE (offset != 0, 1))
605 {
606 /* Should the already checked characters be kept? */
607 if (BE (offset < pstr->valid_raw_len, 1))
608 {
609 /* Yes, move them to the front of the buffer. */
610#ifdef RE_ENABLE_I18N
611 if (BE (pstr->offsets_needed, 0))
612 {
613 int low = 0, high = pstr->valid_len, mid;
614 do
615 {
616 mid = (high + low) / 2;
617 if (pstr->offsets[mid] > offset)
618 high = mid;
619 else if (pstr->offsets[mid] < offset)
620 low = mid + 1;
621 else
622 break;
623 }
624 while (low < high);
625 if (pstr->offsets[mid] < offset)
626 ++mid;
627 pstr->tip_context = re_string_context_at (pstr, mid - 1,
628 eflags);
629 /* This can be quite complicated, so handle specially
630 only the common and easy case where the character with
631 different length representation of lower and upper
632 case is present at or after offset. */
633 if (pstr->valid_len > offset
634 && mid == offset && pstr->offsets[mid] == offset)
635 {
636 memmove (pstr->wcs, pstr->wcs + offset,
637 (pstr->valid_len - offset) * sizeof (wint_t));
638 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
639 pstr->valid_len -= offset;
640 pstr->valid_raw_len -= offset;
641 for (low = 0; low < pstr->valid_len; low++)
642 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
643 }
644 else
645 {
646 /* Otherwise, just find out how long the partial multibyte
647 character at offset is and fill it with WEOF/255. */
648 pstr->len = pstr->raw_len - idx + offset;
649 pstr->stop = pstr->raw_stop - idx + offset;
650 pstr->offsets_needed = 0;
651 while (mid > 0 && pstr->offsets[mid - 1] == offset)
652 --mid;
653 while (mid < pstr->valid_len)
654 if (pstr->wcs[mid] != WEOF)
655 break;
656 else
657 ++mid;
658 if (mid == pstr->valid_len)
659 pstr->valid_len = 0;
660 else
661 {
662 pstr->valid_len = pstr->offsets[mid] - offset;
663 if (pstr->valid_len)
664 {
665 for (low = 0; low < pstr->valid_len; ++low)
666 pstr->wcs[low] = WEOF;
667 memset (pstr->mbs, 255, pstr->valid_len);
668 }
669 }
670 pstr->valid_raw_len = pstr->valid_len;
671 }
672 }
673 else
674#endif
675 {
676 pstr->tip_context = re_string_context_at (pstr, offset - 1,
677 eflags);
678#ifdef RE_ENABLE_I18N
679 if (pstr->mb_cur_max > 1)
680 memmove (pstr->wcs, pstr->wcs + offset,
681 (pstr->valid_len - offset) * sizeof (wint_t));
682#endif /* RE_ENABLE_I18N */
683 if (BE (pstr->mbs_allocated, 0))
684 memmove (pstr->mbs, pstr->mbs + offset,
685 pstr->valid_len - offset);
686 pstr->valid_len -= offset;
687 pstr->valid_raw_len -= offset;
688#ifdef DEBUG
689 assert (pstr->valid_len > 0);
690#endif
691 }
692 }
693 else
694 {
695#ifdef RE_ENABLE_I18N
696 /* No, skip all characters until IDX. */
697 int prev_valid_len = pstr->valid_len;
698
699 if (BE (pstr->offsets_needed, 0))
700 {
701 pstr->len = pstr->raw_len - idx + offset;
702 pstr->stop = pstr->raw_stop - idx + offset;
703 pstr->offsets_needed = 0;
704 }
705#endif
706 pstr->valid_len = 0;
707#ifdef RE_ENABLE_I18N
708 if (pstr->mb_cur_max > 1)
709 {
710 int wcs_idx;
711 wint_t wc = WEOF;
712
713 if (pstr->is_utf8)
714 {
715 const unsigned char *raw, *p, *end;
716
717 /* Special case UTF-8. Multi-byte chars start with any
718 byte other than 0x80 - 0xbf. */
719 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
720 end = raw + (offset - pstr->mb_cur_max);
721 if (end < pstr->raw_mbs)
722 end = pstr->raw_mbs;
723 p = raw + offset - 1;
724#ifdef _LIBC
725 /* We know the wchar_t encoding is UCS4, so for the simple
726 case, ASCII characters, skip the conversion step. */
727 if (isascii (*p) && BE (pstr->trans == NULL, 1))
728 {
729 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
730 /* pstr->valid_len = 0; */
731 wc = (wchar_t) *p;
732 }
733 else
734#endif
735 for (; p >= end; --p)
736 if ((*p & 0xc0) != 0x80)
737 {
738 mbstate_t cur_state;
739 wchar_t wc2;
740 int mlen = raw + pstr->len - p;
741 unsigned char buf[6];
742 size_t mbclen;
743
744 if (BE (pstr->trans != NULL, 0))
745 {
746 int i = mlen < 6 ? mlen : 6;
747 while (--i >= 0)
748 buf[i] = pstr->trans[p[i]];
749 }
750 /* XXX Don't use mbrtowc, we know which conversion
751 to use (UTF-8 -> UCS4). */
752 memset (&cur_state, 0, sizeof (cur_state));
753 mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
754 &cur_state);
755 if (raw + offset - p <= mbclen
756 && mbclen < (size_t) -2)
757 {
758 memset (&pstr->cur_state, '\0',
759 sizeof (mbstate_t));
760 pstr->valid_len = mbclen - (raw + offset - p);
761 wc = wc2;
762 }
763 break;
764 }
765 }
766
767 if (wc == WEOF)
768 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
769 if (wc == WEOF)
770 pstr->tip_context
771 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
772 else
773 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
774 && IS_WIDE_WORD_CHAR (wc))
775 ? CONTEXT_WORD
776 : ((IS_WIDE_NEWLINE (wc)
777 && pstr->newline_anchor)
778 ? CONTEXT_NEWLINE : 0));
779 if (BE (pstr->valid_len, 0))
780 {
781 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
782 pstr->wcs[wcs_idx] = WEOF;
783 if (pstr->mbs_allocated)
784 memset (pstr->mbs, 255, pstr->valid_len);
785 }
786 pstr->valid_raw_len = pstr->valid_len;
787 }
788 else
789#endif /* RE_ENABLE_I18N */
790 {
791 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
792 pstr->valid_raw_len = 0;
793 if (pstr->trans)
794 c = pstr->trans[c];
795 pstr->tip_context = (bitset_contain (pstr->word_char, c)
796 ? CONTEXT_WORD
797 : ((IS_NEWLINE (c) && pstr->newline_anchor)
798 ? CONTEXT_NEWLINE : 0));
799 }
800 }
801 if (!BE (pstr->mbs_allocated, 0))
802 pstr->mbs += offset;
803 }
804 pstr->raw_mbs_idx = idx;
805 pstr->len -= offset;
806 pstr->stop -= offset;
807
808 /* Then build the buffers. */
809#ifdef RE_ENABLE_I18N
810 if (pstr->mb_cur_max > 1)
811 {
812 if (pstr->icase)
813 {
814 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
815 if (BE (ret != REG_NOERROR, 0))
816 return ret;
817 }
818 else
819 build_wcs_buffer (pstr);
820 }
821 else
822#endif /* RE_ENABLE_I18N */
823 if (BE (pstr->mbs_allocated, 0))
824 {
825 if (pstr->icase)
826 build_upper_buffer (pstr);
827 else if (pstr->trans != NULL)
828 re_string_translate_buffer (pstr);
829 }
830 else
831 pstr->valid_len = pstr->len;
832
833 pstr->cur_idx = 0;
834 return REG_NOERROR;
835}
836
837static unsigned char
838internal_function __attribute ((pure))
839re_string_peek_byte_case (const re_string_t *pstr, int idx)
840{
841 int ch, off;
842
843 /* Handle the common (easiest) cases first. */
844 if (BE (!pstr->mbs_allocated, 1))
845 return re_string_peek_byte (pstr, idx);
846
847#ifdef RE_ENABLE_I18N
848 if (pstr->mb_cur_max > 1
849 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
850 return re_string_peek_byte (pstr, idx);
851#endif
852
853 off = pstr->cur_idx + idx;
854#ifdef RE_ENABLE_I18N
855 if (pstr->offsets_needed)
856 off = pstr->offsets[off];
857#endif
858
859 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
860
861#ifdef RE_ENABLE_I18N
862 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
863 this function returns CAPITAL LETTER I instead of first byte of
864 DOTLESS SMALL LETTER I. The latter would confuse the parser,
865 since peek_byte_case doesn't advance cur_idx in any way. */
866 if (pstr->offsets_needed && !isascii (ch))
867 return re_string_peek_byte (pstr, idx);
868#endif
869
870 return ch;
871}
872
873static unsigned char
874internal_function __attribute ((pure))
875re_string_fetch_byte_case (re_string_t *pstr)
876{
877 if (BE (!pstr->mbs_allocated, 1))
878 return re_string_fetch_byte (pstr);
879
880#ifdef RE_ENABLE_I18N
881 if (pstr->offsets_needed)
882 {
883 int off, ch;
884
885 /* For tr_TR.UTF-8 [[:islower:]] there is
886 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
887 in that case the whole multi-byte character and return
888 the original letter. On the other side, with
889 [[: DOTLESS SMALL LETTER I return [[:I, as doing
890 anything else would complicate things too much. */
891
892 if (!re_string_first_byte (pstr, pstr->cur_idx))
893 return re_string_fetch_byte (pstr);
894
895 off = pstr->offsets[pstr->cur_idx];
896 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
897
898 if (! isascii (ch))
899 return re_string_fetch_byte (pstr);
900
901 re_string_skip_bytes (pstr,
902 re_string_char_size_at (pstr, pstr->cur_idx));
903 return ch;
904 }
905#endif
906
907 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
908}
909
910static void
911internal_function
912re_string_destruct (re_string_t *pstr)
913{
914#ifdef RE_ENABLE_I18N
915 re_free (pstr->wcs);
916 re_free (pstr->offsets);
917#endif /* RE_ENABLE_I18N */
918 if (pstr->mbs_allocated)
919 re_free (pstr->mbs);
920}
921
922/* Return the context at IDX in INPUT. */
923
924static unsigned int
925internal_function
926re_string_context_at (const re_string_t *input, int idx, int eflags)
927{
928 int c;
929 if (BE (idx < 0, 0))
930 /* In this case, we use the value stored in input->tip_context,
931 since we can't know the character in input->mbs[-1] here. */
932 return input->tip_context;
933 if (BE (idx == input->len, 0))
934 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
935 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
936#ifdef RE_ENABLE_I18N
937 if (input->mb_cur_max > 1)
938 {
939 wint_t wc;
940 int wc_idx = idx;
941 while(input->wcs[wc_idx] == WEOF)
942 {
943#ifdef DEBUG
944 /* It must not happen. */
945 assert (wc_idx >= 0);
946#endif
947 --wc_idx;
948 if (wc_idx < 0)
949 return input->tip_context;
950 }
951 wc = input->wcs[wc_idx];
952 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
953 return CONTEXT_WORD;
954 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
955 ? CONTEXT_NEWLINE : 0);
956 }
957 else
958#endif
959 {
960 c = re_string_byte_at (input, idx);
961 if (bitset_contain (input->word_char, c))
962 return CONTEXT_WORD;
963 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
964 }
965}
966
967/* Functions for set operation. */
968
969static reg_errcode_t
970internal_function
971re_node_set_alloc (re_node_set *set, int size)
972{
973 /*
974 * ADR: valgrind says size can be 0, which then doesn't
975 * free the block of size 0. Harumph. This seems
976 * to work ok, though.
977 */
978 if (size == 0)
979 {
980 memset(set, 0, sizeof(*set));
981 return REG_NOERROR;
982 }
983 set->alloc = size;
984 set->nelem = 0;
985 set->elems = re_malloc (int, size);
986 if (BE (set->elems == NULL, 0))
987 return REG_ESPACE;
988 return REG_NOERROR;
989}
990
991static reg_errcode_t
992internal_function
993re_node_set_init_1 (re_node_set *set, int elem)
994{
995 set->alloc = 1;
996 set->nelem = 1;
997 set->elems = re_malloc (int, 1);
998 if (BE (set->elems == NULL, 0))
999 {
1000 set->alloc = set->nelem = 0;
1001 return REG_ESPACE;
1002 }
1003 set->elems[0] = elem;
1004 return REG_NOERROR;
1005}
1006
1007static reg_errcode_t
1008internal_function
1009re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
1010{
1011 set->alloc = 2;
1012 set->elems = re_malloc (int, 2);
1013 if (BE (set->elems == NULL, 0))
1014 return REG_ESPACE;
1015 if (elem1 == elem2)
1016 {
1017 set->nelem = 1;
1018 set->elems[0] = elem1;
1019 }
1020 else
1021 {
1022 set->nelem = 2;
1023 if (elem1 < elem2)
1024 {
1025 set->elems[0] = elem1;
1026 set->elems[1] = elem2;
1027 }
1028 else
1029 {
1030 set->elems[0] = elem2;
1031 set->elems[1] = elem1;
1032 }
1033 }
1034 return REG_NOERROR;
1035}
1036
1037static reg_errcode_t
1038internal_function
1039re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1040{
1041 dest->nelem = src->nelem;
1042 if (src->nelem > 0)
1043 {
1044 dest->alloc = dest->nelem;
1045 dest->elems = re_malloc (int, dest->alloc);
1046 if (BE (dest->elems == NULL, 0))
1047 {
1048 dest->alloc = dest->nelem = 0;
1049 return REG_ESPACE;
1050 }
1051 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1052 }
1053 else
1054 re_node_set_init_empty (dest);
1055 return REG_NOERROR;
1056}
1057
1058/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1059 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1060 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1061
1062static reg_errcode_t
1063internal_function
1064re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1065 const re_node_set *src2)
1066{
1067 int i1, i2, is, id, delta, sbase;
1068 if (src1->nelem == 0 || src2->nelem == 0)
1069 return REG_NOERROR;
1070
1071 /* We need dest->nelem + 2 * elems_in_intersection; this is a
1072 conservative estimate. */
1073 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1074 {
1075 int new_alloc = src1->nelem + src2->nelem + dest->alloc;
1076 int *new_elems = re_realloc (dest->elems, int, new_alloc);
1077 if (BE (new_elems == NULL, 0))
1078 return REG_ESPACE;
1079 dest->elems = new_elems;
1080 dest->alloc = new_alloc;
1081 }
1082
1083 /* Find the items in the intersection of SRC1 and SRC2, and copy
1084 into the top of DEST those that are not already in DEST itself. */
1085 sbase = dest->nelem + src1->nelem + src2->nelem;
1086 i1 = src1->nelem - 1;
1087 i2 = src2->nelem - 1;
1088 id = dest->nelem - 1;
1089 for (;;)
1090 {
1091 if (src1->elems[i1] == src2->elems[i2])
1092 {
1093 /* Try to find the item in DEST. Maybe we could binary search? */
1094 while (id >= 0 && dest->elems[id] > src1->elems[i1])
1095 --id;
1096
1097 if (id < 0 || dest->elems[id] != src1->elems[i1])
1098 dest->elems[--sbase] = src1->elems[i1];
1099
1100 if (--i1 < 0 || --i2 < 0)
1101 break;
1102 }
1103
1104 /* Lower the highest of the two items. */
1105 else if (src1->elems[i1] < src2->elems[i2])
1106 {
1107 if (--i2 < 0)
1108 break;
1109 }
1110 else
1111 {
1112 if (--i1 < 0)
1113 break;
1114 }
1115 }
1116
1117 id = dest->nelem - 1;
1118 is = dest->nelem + src1->nelem + src2->nelem - 1;
1119 delta = is - sbase + 1;
1120
1121 /* Now copy. When DELTA becomes zero, the remaining
1122 DEST elements are already in place; this is more or
1123 less the same loop that is in re_node_set_merge. */
1124 dest->nelem += delta;
1125 if (delta > 0 && id >= 0)
1126 for (;;)
1127 {
1128 if (dest->elems[is] > dest->elems[id])
1129 {
1130 /* Copy from the top. */
1131 dest->elems[id + delta--] = dest->elems[is--];
1132 if (delta == 0)
1133 break;
1134 }
1135 else
1136 {
1137 /* Slide from the bottom. */
1138 dest->elems[id + delta] = dest->elems[id];
1139 if (--id < 0)
1140 break;
1141 }
1142 }
1143
1144 /* Copy remaining SRC elements. */
1145 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
1146
1147 return REG_NOERROR;
1148}
1149
1150/* Calculate the union set of the sets SRC1 and SRC2. And store it to
1151 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1152
1153static reg_errcode_t
1154internal_function
1155re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1156 const re_node_set *src2)
1157{
1158 int i1, i2, id;
1159 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1160 {
1161 dest->alloc = src1->nelem + src2->nelem;
1162 dest->elems = re_malloc (int, dest->alloc);
1163 if (BE (dest->elems == NULL, 0))
1164 return REG_ESPACE;
1165 }
1166 else
1167 {
1168 if (src1 != NULL && src1->nelem > 0)
1169 return re_node_set_init_copy (dest, src1);
1170 else if (src2 != NULL && src2->nelem > 0)
1171 return re_node_set_init_copy (dest, src2);
1172 else
1173 re_node_set_init_empty (dest);
1174 return REG_NOERROR;
1175 }
1176 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1177 {
1178 if (src1->elems[i1] > src2->elems[i2])
1179 {
1180 dest->elems[id++] = src2->elems[i2++];
1181 continue;
1182 }
1183 if (src1->elems[i1] == src2->elems[i2])
1184 ++i2;
1185 dest->elems[id++] = src1->elems[i1++];
1186 }
1187 if (i1 < src1->nelem)
1188 {
1189 memcpy (dest->elems + id, src1->elems + i1,
1190 (src1->nelem - i1) * sizeof (int));
1191 id += src1->nelem - i1;
1192 }
1193 else if (i2 < src2->nelem)
1194 {
1195 memcpy (dest->elems + id, src2->elems + i2,
1196 (src2->nelem - i2) * sizeof (int));
1197 id += src2->nelem - i2;
1198 }
1199 dest->nelem = id;
1200 return REG_NOERROR;
1201}
1202
1203/* Calculate the union set of the sets DEST and SRC. And store it to
1204 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1205
1206static reg_errcode_t
1207internal_function
1208re_node_set_merge (re_node_set *dest, const re_node_set *src)
1209{
1210 int is, id, sbase, delta;
1211 if (src == NULL || src->nelem == 0)
1212 return REG_NOERROR;
1213 if (dest->alloc < 2 * src->nelem + dest->nelem)
1214 {
1215 int new_alloc = 2 * (src->nelem + dest->alloc);
1216 int *new_buffer = re_realloc (dest->elems, int, new_alloc);
1217 if (BE (new_buffer == NULL, 0))
1218 return REG_ESPACE;
1219 dest->elems = new_buffer;
1220 dest->alloc = new_alloc;
1221 }
1222
1223 if (BE (dest->nelem == 0, 0))
1224 {
1225 dest->nelem = src->nelem;
1226 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1227 return REG_NOERROR;
1228 }
1229
1230 /* Copy into the top of DEST the items of SRC that are not
1231 found in DEST. Maybe we could binary search in DEST? */
1232 for (sbase = dest->nelem + 2 * src->nelem,
1233 is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
1234 {
1235 if (dest->elems[id] == src->elems[is])
1236 is--, id--;
1237 else if (dest->elems[id] < src->elems[is])
1238 dest->elems[--sbase] = src->elems[is--];
1239 else /* if (dest->elems[id] > src->elems[is]) */
1240 --id;
1241 }
1242
1243 if (is >= 0)
1244 {
1245 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1246 sbase -= is + 1;
1247 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
1248 }
1249
1250 id = dest->nelem - 1;
1251 is = dest->nelem + 2 * src->nelem - 1;
1252 delta = is - sbase + 1;
1253 if (delta == 0)
1254 return REG_NOERROR;
1255
1256 /* Now copy. When DELTA becomes zero, the remaining
1257 DEST elements are already in place. */
1258 dest->nelem += delta;
1259 for (;;)
1260 {
1261 if (dest->elems[is] > dest->elems[id])
1262 {
1263 /* Copy from the top. */
1264 dest->elems[id + delta--] = dest->elems[is--];
1265 if (delta == 0)
1266 break;
1267 }
1268 else
1269 {
1270 /* Slide from the bottom. */
1271 dest->elems[id + delta] = dest->elems[id];
1272 if (--id < 0)
1273 {
1274 /* Copy remaining SRC elements. */
1275 memcpy (dest->elems, dest->elems + sbase,
1276 delta * sizeof (int));
1277 break;
1278 }
1279 }
1280 }
1281
1282 return REG_NOERROR;
1283}
1284
1285/* Insert the new element ELEM to the re_node_set* SET.
1286 SET should not already have ELEM.
1287 return -1 if an error has occurred, return 1 otherwise. */
1288
1289static int
1290internal_function
1291re_node_set_insert (re_node_set *set, int elem)
1292{
1293 int idx;
1294 /* In case the set is empty. */
1295 if (set->alloc == 0)
1296 {
1297 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
1298 return 1;
1299 else
1300 return -1;
1301 }
1302
1303 if (BE (set->nelem, 0) == 0)
1304 {
1305 /* We already guaranteed above that set->alloc != 0. */
1306 set->elems[0] = elem;
1307 ++set->nelem;
1308 return 1;
1309 }
1310
1311 /* Realloc if we need. */
1312 if (set->alloc == set->nelem)
1313 {
1314 int *new_elems;
1315 set->alloc = set->alloc * 2;
1316 new_elems = re_realloc (set->elems, int, set->alloc);
1317 if (BE (new_elems == NULL, 0))
1318 return -1;
1319 set->elems = new_elems;
1320 }
1321
1322 /* Move the elements which follows the new element. Test the
1323 first element separately to skip a check in the inner loop. */
1324 if (elem < set->elems[0])
1325 {
1326 idx = 0;
1327 for (idx = set->nelem; idx > 0; idx--)
1328 set->elems[idx] = set->elems[idx - 1];
1329 }
1330 else
1331 {
1332 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1333 set->elems[idx] = set->elems[idx - 1];
1334 }
1335
1336 /* Insert the new element. */
1337 set->elems[idx] = elem;
1338 ++set->nelem;
1339 return 1;
1340}
1341
1342/* Insert the new element ELEM to the re_node_set* SET.
1343 SET should not already have any element greater than or equal to ELEM.
1344 Return -1 if an error has occurred, return 1 otherwise. */
1345
1346static int
1347internal_function
1348re_node_set_insert_last (re_node_set *set, int elem)
1349{
1350 /* Realloc if we need. */
1351 if (set->alloc == set->nelem)
1352 {
1353 int *new_elems;
1354 set->alloc = (set->alloc + 1) * 2;
1355 new_elems = re_realloc (set->elems, int, set->alloc);
1356 if (BE (new_elems == NULL, 0))
1357 return -1;
1358 set->elems = new_elems;
1359 }
1360
1361 /* Insert the new element. */
1362 set->elems[set->nelem++] = elem;
1363 return 1;
1364}
1365
1366/* Compare two node sets SET1 and SET2.
1367 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
1368
1369static int
1370internal_function __attribute ((pure))
1371re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1372{
1373 int i;
1374 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1375 return 0;
1376 for (i = set1->nelem ; --i >= 0 ; )
1377 if (set1->elems[i] != set2->elems[i])
1378 return 0;
1379 return 1;
1380}
1381
1382/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1383
1384static int
1385internal_function __attribute ((pure))
1386re_node_set_contains (const re_node_set *set, int elem)
1387{
1388 unsigned int idx, right, mid;
1389 if (set->nelem <= 0)
1390 return 0;
1391
1392 /* Binary search the element. */
1393 idx = 0;
1394 right = set->nelem - 1;
1395 while (idx < right)
1396 {
1397 mid = (idx + right) / 2;
1398 if (set->elems[mid] < elem)
1399 idx = mid + 1;
1400 else
1401 right = mid;
1402 }
1403 return set->elems[idx] == elem ? idx + 1 : 0;
1404}
1405
1406static void
1407internal_function
1408re_node_set_remove_at (re_node_set *set, int idx)
1409{
1410 if (idx < 0 || idx >= set->nelem)
1411 return;
1412 --set->nelem;
1413 for (; idx < set->nelem; idx++)
1414 set->elems[idx] = set->elems[idx + 1];
1415}
1416
1417
1418/* Add the token TOKEN to dfa->nodes, and return the index of the token.
1419 Or return -1, if an error has occurred. */
1420
1421static int
1422internal_function
1423re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1424{
1425 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1426 {
1427 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1428 int *new_nexts, *new_indices;
1429 re_node_set *new_edests, *new_eclosures;
1430 re_token_t *new_nodes;
1431
1432 /* Avoid overflows in realloc. */
1433 const size_t max_object_size = MAX (sizeof (re_token_t),
1434 MAX (sizeof (re_node_set),
1435 sizeof (int)));
1436 if (BE (SIZE_MAX / max_object_size < new_nodes_alloc, 0))
1437 return -1;
1438
1439 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1440 if (BE (new_nodes == NULL, 0))
1441 return -1;
1442 dfa->nodes = new_nodes;
1443 new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
1444 new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
1445 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1446 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1447 if (BE (new_nexts == NULL || new_indices == NULL
1448 || new_edests == NULL || new_eclosures == NULL, 0))
1449 return -1;
1450 dfa->nexts = new_nexts;
1451 dfa->org_indices = new_indices;
1452 dfa->edests = new_edests;
1453 dfa->eclosures = new_eclosures;
1454 dfa->nodes_alloc = new_nodes_alloc;
1455 }
1456 dfa->nodes[dfa->nodes_len] = token;
1457 dfa->nodes[dfa->nodes_len].constraint = 0;
1458#ifdef RE_ENABLE_I18N
1459 dfa->nodes[dfa->nodes_len].accept_mb =
1460 (token.type == OP_PERIOD && dfa->mb_cur_max > 1) || token.type == COMPLEX_BRACKET;
1461#endif
1462 dfa->nexts[dfa->nodes_len] = -1;
1463 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1464 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1465 return dfa->nodes_len++;
1466}
1467
1468static inline unsigned int
1469internal_function
1470calc_state_hash (const re_node_set *nodes, unsigned int context)
1471{
1472 unsigned int hash = nodes->nelem + context;
1473 int i;
1474 for (i = 0 ; i < nodes->nelem ; i++)
1475 hash += nodes->elems[i];
1476 return hash;
1477}
1478
1479/* Search for the state whose node_set is equivalent to NODES.
1480 Return the pointer to the state, if we found it in the DFA.
1481 Otherwise create the new one and return it. In case of an error
1482 return NULL and set the error code in ERR.
1483 Note: - We assume NULL as the invalid state, then it is possible that
1484 return value is NULL and ERR is REG_NOERROR.
1485 - We never return non-NULL value in case of any errors, it is for
1486 optimization. */
1487
1488static re_dfastate_t *
1489internal_function
1490re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1491 const re_node_set *nodes)
1492{
1493 unsigned int hash;
1494 re_dfastate_t *new_state;
1495 struct re_state_table_entry *spot;
1496 int i;
1497 if (BE (nodes->nelem == 0, 0))
1498 {
1499 *err = REG_NOERROR;
1500 return NULL;
1501 }
1502 hash = calc_state_hash (nodes, 0);
1503 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1504
1505 for (i = 0 ; i < spot->num ; i++)
1506 {
1507 re_dfastate_t *state = spot->array[i];
1508 if (hash != state->hash)
1509 continue;
1510 if (re_node_set_compare (&state->nodes, nodes))
1511 return state;
1512 }
1513
1514 /* There are no appropriate state in the dfa, create the new one. */
1515 new_state = create_ci_newstate (dfa, nodes, hash);
1516 if (BE (new_state == NULL, 0))
1517 *err = REG_ESPACE;
1518
1519 return new_state;
1520}
1521
1522/* Search for the state whose node_set is equivalent to NODES and
1523 whose context is equivalent to CONTEXT.
1524 Return the pointer to the state, if we found it in the DFA.
1525 Otherwise create the new one and return it. In case of an error
1526 return NULL and set the error code in ERR.
1527 Note: - We assume NULL as the invalid state, then it is possible that
1528 return value is NULL and ERR is REG_NOERROR.
1529 - We never return non-NULL value in case of any errors, it is for
1530 optimization. */
1531
1532static re_dfastate_t *
1533internal_function
1534re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1535 const re_node_set *nodes, unsigned int context)
1536{
1537 unsigned int hash;
1538 re_dfastate_t *new_state;
1539 struct re_state_table_entry *spot;
1540 int i;
1541 if (nodes->nelem == 0)
1542 {
1543 *err = REG_NOERROR;
1544 return NULL;
1545 }
1546 hash = calc_state_hash (nodes, context);
1547 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1548
1549 for (i = 0 ; i < spot->num ; i++)
1550 {
1551 re_dfastate_t *state = spot->array[i];
1552 if (state->hash == hash
1553 && state->context == context
1554 && re_node_set_compare (state->entrance_nodes, nodes))
1555 return state;
1556 }
1557 /* There are no appropriate state in `dfa', create the new one. */
1558 new_state = create_cd_newstate (dfa, nodes, context, hash);
1559 if (BE (new_state == NULL, 0))
1560 *err = REG_ESPACE;
1561
1562 return new_state;
1563}
1564
1565/* Finish initialization of the new state NEWSTATE, and using its hash value
1566 HASH put in the appropriate bucket of DFA's state table. Return value
1567 indicates the error code if failed. */
1568
1569static reg_errcode_t
1570register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1571 unsigned int hash)
1572{
1573 struct re_state_table_entry *spot;
1574 reg_errcode_t err;
1575 int i;
1576
1577 newstate->hash = hash;
1578 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1579 if (BE (err != REG_NOERROR, 0))
1580 return REG_ESPACE;
1581 for (i = 0; i < newstate->nodes.nelem; i++)
1582 {
1583 int elem = newstate->nodes.elems[i];
1584 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1585 if (re_node_set_insert_last (&newstate->non_eps_nodes, elem) < 0)
1586 return REG_ESPACE;
1587 }
1588
1589 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1590 if (BE (spot->alloc <= spot->num, 0))
1591 {
1592 int new_alloc = 2 * spot->num + 2;
1593 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1594 new_alloc);
1595 if (BE (new_array == NULL, 0))
1596 return REG_ESPACE;
1597 spot->array = new_array;
1598 spot->alloc = new_alloc;
1599 }
1600 spot->array[spot->num++] = newstate;
1601 return REG_NOERROR;
1602}
1603
1604static void
1605free_state (re_dfastate_t *state)
1606{
1607 re_node_set_free (&state->non_eps_nodes);
1608 re_node_set_free (&state->inveclosure);
1609 if (state->entrance_nodes != &state->nodes)
1610 {
1611 re_node_set_free (state->entrance_nodes);
1612 re_free (state->entrance_nodes);
1613 }
1614 re_node_set_free (&state->nodes);
1615 re_free (state->word_trtable);
1616 re_free (state->trtable);
1617 re_free (state);
1618}
1619
1620/* Create the new state which is independ of contexts.
1621 Return the new state if succeeded, otherwise return NULL. */
1622
1623static re_dfastate_t *
1624internal_function
1625create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1626 unsigned int hash)
1627{
1628 int i;
1629 reg_errcode_t err;
1630 re_dfastate_t *newstate;
1631
1632 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1633 if (BE (newstate == NULL, 0))
1634 return NULL;
1635 err = re_node_set_init_copy (&newstate->nodes, nodes);
1636 if (BE (err != REG_NOERROR, 0))
1637 {
1638 re_free (newstate);
1639 return NULL;
1640 }
1641
1642 newstate->entrance_nodes = &newstate->nodes;
1643 for (i = 0 ; i < nodes->nelem ; i++)
1644 {
1645 re_token_t *node = dfa->nodes + nodes->elems[i];
1646 re_token_type_t type = node->type;
1647 if (type == CHARACTER && !node->constraint)
1648 continue;
1649#ifdef RE_ENABLE_I18N
1650 newstate->accept_mb |= node->accept_mb;
1651#endif /* RE_ENABLE_I18N */
1652
1653 /* If the state has the halt node, the state is a halt state. */
1654 if (type == END_OF_RE)
1655 newstate->halt = 1;
1656 else if (type == OP_BACK_REF)
1657 newstate->has_backref = 1;
1658 else if (type == ANCHOR || node->constraint)
1659 newstate->has_constraint = 1;
1660 }
1661 err = register_state (dfa, newstate, hash);
1662 if (BE (err != REG_NOERROR, 0))
1663 {
1664 free_state (newstate);
1665 newstate = NULL;
1666 }
1667 return newstate;
1668}
1669
1670/* Create the new state which is depend on the context CONTEXT.
1671 Return the new state if succeeded, otherwise return NULL. */
1672
1673static re_dfastate_t *
1674internal_function
1675create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1676 unsigned int context, unsigned int hash)
1677{
1678 int i, nctx_nodes = 0;
1679 reg_errcode_t err;
1680 re_dfastate_t *newstate;
1681
1682 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1683 if (BE (newstate == NULL, 0))
1684 return NULL;
1685 err = re_node_set_init_copy (&newstate->nodes, nodes);
1686 if (BE (err != REG_NOERROR, 0))
1687 {
1688 re_free (newstate);
1689 return NULL;
1690 }
1691
1692 newstate->context = context;
1693 newstate->entrance_nodes = &newstate->nodes;
1694
1695 for (i = 0 ; i < nodes->nelem ; i++)
1696 {
1697 re_token_t *node = dfa->nodes + nodes->elems[i];
1698 re_token_type_t type = node->type;
1699 unsigned int constraint = node->constraint;
1700
1701 if (type == CHARACTER && !constraint)
1702 continue;
1703#ifdef RE_ENABLE_I18N
1704 newstate->accept_mb |= node->accept_mb;
1705#endif /* RE_ENABLE_I18N */
1706
1707 /* If the state has the halt node, the state is a halt state. */
1708 if (type == END_OF_RE)
1709 newstate->halt = 1;
1710 else if (type == OP_BACK_REF)
1711 newstate->has_backref = 1;
1712
1713 if (constraint)
1714 {
1715 if (newstate->entrance_nodes == &newstate->nodes)
1716 {
1717 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1718 if (BE (newstate->entrance_nodes == NULL, 0))
1719 {
1720 free_state (newstate);
1721 return NULL;
1722 }
1723 if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1724 != REG_NOERROR)
1725 return NULL;
1726 nctx_nodes = 0;
1727 newstate->has_constraint = 1;
1728 }
1729
1730 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1731 {
1732 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1733 ++nctx_nodes;
1734 }
1735 }
1736 }
1737 err = register_state (dfa, newstate, hash);
1738 if (BE (err != REG_NOERROR, 0))
1739 {
1740 free_state (newstate);
1741 newstate = NULL;
1742 }
1743 return newstate;
1744}
diff --git a/win32/regex_internal.h b/win32/regex_internal.h
new file mode 100644
index 000000000..1495059ab
--- /dev/null
+++ b/win32/regex_internal.h
@@ -0,0 +1,810 @@
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002-2005, 2007, 2008, 2010 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21#ifndef _REGEX_INTERNAL_H
22#define _REGEX_INTERNAL_H 1
23
24#include <assert.h>
25#include <ctype.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
31# include <langinfo.h>
32#endif
33#if defined HAVE_LOCALE_H || defined _LIBC
34# include <locale.h>
35#endif
36#if defined HAVE_WCHAR_H || defined _LIBC
37# include <wchar.h>
38#endif /* HAVE_WCHAR_H || _LIBC */
39#if defined HAVE_WCTYPE_H || defined _LIBC
40# include <wctype.h>
41#endif /* HAVE_WCTYPE_H || _LIBC */
42#if defined HAVE_STDBOOL_H || defined _LIBC
43# include <stdbool.h>
44#endif /* HAVE_STDBOOL_H || _LIBC */
45#if !defined(ZOS_USS)
46#if defined HAVE_STDINT_H || defined _LIBC
47# include <stdint.h>
48#endif /* HAVE_STDINT_H || _LIBC */
49#endif /* !ZOS_USS */
50#if defined _LIBC
51# include <bits/libc-lock.h>
52#else
53# define __libc_lock_define(CLASS,NAME)
54# define __libc_lock_init(NAME) do { } while (0)
55# define __libc_lock_lock(NAME) do { } while (0)
56# define __libc_lock_unlock(NAME) do { } while (0)
57#endif
58
59#ifndef GAWK
60/* In case that the system doesn't have isblank(). */
61#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
62# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
63#endif
64#else /* GAWK */
65/*
66 * This is a freaking mess. On glibc systems you have to define
67 * a magic constant to get isblank() out of <ctype.h>, since it's
68 * a C99 function. To heck with all that and borrow a page from
69 * dfa.c's book.
70 */
71
72static int
73is_blank (int c)
74{
75 return (c == ' ' || c == '\t');
76}
77#endif /* GAWK */
78
79#ifdef _LIBC
80# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
81# define _RE_DEFINE_LOCALE_FUNCTIONS 1
82# include <locale/localeinfo.h>
83# include <locale/elem-hash.h>
84# include <locale/coll-lookup.h>
85# endif
86#endif
87
88/* This is for other GNU distributions with internationalized messages. */
89#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
90# include <libintl.h>
91# ifdef _LIBC
92# undef gettext
93# define gettext(msgid) \
94 INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
95# endif
96#else
97# define gettext(msgid) (msgid)
98#endif
99
100#ifndef gettext_noop
101/* This define is so xgettext can find the internationalizable
102 strings. */
103# define gettext_noop(String) String
104#endif
105
106/* For loser systems without the definition. */
107#ifndef SIZE_MAX
108# define SIZE_MAX ((size_t) -1)
109#endif
110
111#ifndef NO_MBSUPPORT
112#include "mbsupport.h" /* gawk */
113#endif
114#ifndef MB_CUR_MAX
115#define MB_CUR_MAX 1
116#endif
117
118#if (defined MBS_SUPPORT) || defined _LIBC
119# define RE_ENABLE_I18N
120#endif
121
122#if __GNUC__ >= 3
123# define BE(expr, val) __builtin_expect (expr, val)
124#else
125# define BE(expr, val) (expr)
126# ifdef inline
127# undef inline
128# endif
129# define inline
130#endif
131
132/* Number of single byte character. */
133#define SBC_MAX 256
134
135#define COLL_ELEM_LEN_MAX 8
136
137/* The character which represents newline. */
138#define NEWLINE_CHAR '\n'
139#define WIDE_NEWLINE_CHAR L'\n'
140
141/* Rename to standard API for using out of glibc. */
142#ifndef _LIBC
143# ifdef __wctype
144# undef __wctype
145# endif
146# define __wctype wctype
147# ifdef __iswctype
148# undef __iswctype
149# endif
150# define __iswctype iswctype
151# define __btowc btowc
152# define __mbrtowc mbrtowc
153#undef __mempcpy /* GAWK */
154# define __mempcpy mempcpy
155# define __wcrtomb wcrtomb
156# define __regfree regfree
157# define attribute_hidden
158#endif /* not _LIBC */
159
160#ifdef __GNUC__
161# define __attribute(arg) __attribute__ (arg)
162#else
163# define __attribute(arg)
164#endif
165
166extern const char __re_error_msgid[] attribute_hidden;
167extern const size_t __re_error_msgid_idx[] attribute_hidden;
168
169/* An integer used to represent a set of bits. It must be unsigned,
170 and must be at least as wide as unsigned int. */
171typedef unsigned long int bitset_word_t;
172/* All bits set in a bitset_word_t. */
173#define BITSET_WORD_MAX ULONG_MAX
174/* Number of bits in a bitset_word_t. */
175#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT)
176/* Number of bitset_word_t in a bit_set. */
177#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS)
178typedef bitset_word_t bitset_t[BITSET_WORDS];
179typedef bitset_word_t *re_bitset_ptr_t;
180typedef const bitset_word_t *re_const_bitset_ptr_t;
181
182#define bitset_set(set,i) \
183 (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)
184#define bitset_clear(set,i) \
185 (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))
186#define bitset_contain(set,i) \
187 (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))
188#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))
189#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))
190#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))
191
192#define PREV_WORD_CONSTRAINT 0x0001
193#define PREV_NOTWORD_CONSTRAINT 0x0002
194#define NEXT_WORD_CONSTRAINT 0x0004
195#define NEXT_NOTWORD_CONSTRAINT 0x0008
196#define PREV_NEWLINE_CONSTRAINT 0x0010
197#define NEXT_NEWLINE_CONSTRAINT 0x0020
198#define PREV_BEGBUF_CONSTRAINT 0x0040
199#define NEXT_ENDBUF_CONSTRAINT 0x0080
200#define WORD_DELIM_CONSTRAINT 0x0100
201#define NOT_WORD_DELIM_CONSTRAINT 0x0200
202
203typedef enum
204{
205 INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
206 WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
207 WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
208 INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
209 LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
210 LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
211 BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
212 BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
213 WORD_DELIM = WORD_DELIM_CONSTRAINT,
214 NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
215} re_context_type;
216
217typedef struct
218{
219 int alloc;
220 int nelem;
221 int *elems;
222} re_node_set;
223
224typedef enum
225{
226 NON_TYPE = 0,
227
228 /* Node type, These are used by token, node, tree. */
229 CHARACTER = 1,
230 END_OF_RE = 2,
231 SIMPLE_BRACKET = 3,
232 OP_BACK_REF = 4,
233 OP_PERIOD = 5,
234#ifdef RE_ENABLE_I18N
235 COMPLEX_BRACKET = 6,
236 OP_UTF8_PERIOD = 7,
237#endif /* RE_ENABLE_I18N */
238
239 /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
240 when the debugger shows values of this enum type. */
241#define EPSILON_BIT 8
242 OP_OPEN_SUBEXP = EPSILON_BIT | 0,
243 OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
244 OP_ALT = EPSILON_BIT | 2,
245 OP_DUP_ASTERISK = EPSILON_BIT | 3,
246 ANCHOR = EPSILON_BIT | 4,
247
248 /* Tree type, these are used only by tree. */
249 CONCAT = 16,
250 SUBEXP = 17,
251
252 /* Token type, these are used only by token. */
253 OP_DUP_PLUS = 18,
254 OP_DUP_QUESTION,
255 OP_OPEN_BRACKET,
256 OP_CLOSE_BRACKET,
257 OP_CHARSET_RANGE,
258 OP_OPEN_DUP_NUM,
259 OP_CLOSE_DUP_NUM,
260 OP_NON_MATCH_LIST,
261 OP_OPEN_COLL_ELEM,
262 OP_CLOSE_COLL_ELEM,
263 OP_OPEN_EQUIV_CLASS,
264 OP_CLOSE_EQUIV_CLASS,
265 OP_OPEN_CHAR_CLASS,
266 OP_CLOSE_CHAR_CLASS,
267 OP_WORD,
268 OP_NOTWORD,
269 OP_SPACE,
270 OP_NOTSPACE,
271 BACK_SLASH
272
273} re_token_type_t;
274
275#ifdef RE_ENABLE_I18N
276typedef struct
277{
278 /* Multibyte characters. */
279 wchar_t *mbchars;
280
281 /* Collating symbols. */
282# ifdef _LIBC
283 int32_t *coll_syms;
284# endif
285
286 /* Equivalence classes. */
287# ifdef _LIBC
288 int32_t *equiv_classes;
289# endif
290
291 /* Range expressions. */
292# ifdef _LIBC
293 uint32_t *range_starts;
294 uint32_t *range_ends;
295# else /* not _LIBC */
296 wchar_t *range_starts;
297 wchar_t *range_ends;
298# endif /* not _LIBC */
299
300 /* Character classes. */
301 wctype_t *char_classes;
302
303 /* If this character set is the non-matching list. */
304 unsigned int non_match : 1;
305
306 /* # of multibyte characters. */
307 int nmbchars;
308
309 /* # of collating symbols. */
310 int ncoll_syms;
311
312 /* # of equivalence classes. */
313 int nequiv_classes;
314
315 /* # of range expressions. */
316 int nranges;
317
318 /* # of character classes. */
319 int nchar_classes;
320} re_charset_t;
321#endif /* RE_ENABLE_I18N */
322
323typedef struct
324{
325 union
326 {
327 unsigned char c; /* for CHARACTER */
328 re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */
329#ifdef RE_ENABLE_I18N
330 re_charset_t *mbcset; /* for COMPLEX_BRACKET */
331#endif /* RE_ENABLE_I18N */
332 int idx; /* for BACK_REF */
333 re_context_type ctx_type; /* for ANCHOR */
334 } opr;
335#if __GNUC__ >= 2
336 re_token_type_t type : 8;
337#else
338 re_token_type_t type;
339#endif
340 unsigned int constraint : 10; /* context constraint */
341 unsigned int duplicated : 1;
342 unsigned int opt_subexp : 1;
343#ifdef RE_ENABLE_I18N
344 unsigned int accept_mb : 1;
345 /* These 2 bits can be moved into the union if needed (e.g. if running out
346 of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
347 unsigned int mb_partial : 1;
348#endif
349 unsigned int word_char : 1;
350} re_token_t;
351
352#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
353
354struct re_string_t
355{
356 /* Indicate the raw buffer which is the original string passed as an
357 argument of regexec(), re_search(), etc.. */
358 const unsigned char *raw_mbs;
359 /* Store the multibyte string. In case of "case insensitive mode" like
360 REG_ICASE, upper cases of the string are stored, otherwise MBS points
361 the same address that RAW_MBS points. */
362 unsigned char *mbs;
363#ifdef RE_ENABLE_I18N
364 /* Store the wide character string which is corresponding to MBS. */
365 wint_t *wcs;
366 int *offsets;
367 mbstate_t cur_state;
368#endif
369 /* Index in RAW_MBS. Each character mbs[i] corresponds to
370 raw_mbs[raw_mbs_idx + i]. */
371 int raw_mbs_idx;
372 /* The length of the valid characters in the buffers. */
373 int valid_len;
374 /* The corresponding number of bytes in raw_mbs array. */
375 int valid_raw_len;
376 /* The length of the buffers MBS and WCS. */
377 int bufs_len;
378 /* The index in MBS, which is updated by re_string_fetch_byte. */
379 int cur_idx;
380 /* length of RAW_MBS array. */
381 int raw_len;
382 /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */
383 int len;
384 /* End of the buffer may be shorter than its length in the cases such
385 as re_match_2, re_search_2. Then, we use STOP for end of the buffer
386 instead of LEN. */
387 int raw_stop;
388 /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */
389 int stop;
390
391 /* The context of mbs[0]. We store the context independently, since
392 the context of mbs[0] may be different from raw_mbs[0], which is
393 the beginning of the input string. */
394 unsigned int tip_context;
395 /* The translation passed as a part of an argument of re_compile_pattern. */
396 RE_TRANSLATE_TYPE trans;
397 /* Copy of re_dfa_t's word_char. */
398 re_const_bitset_ptr_t word_char;
399 /* 1 if REG_ICASE. */
400 unsigned char icase;
401 unsigned char is_utf8;
402 unsigned char map_notascii;
403 unsigned char mbs_allocated;
404 unsigned char offsets_needed;
405 unsigned char newline_anchor;
406 unsigned char word_ops_used;
407 int mb_cur_max;
408};
409typedef struct re_string_t re_string_t;
410
411
412struct re_dfa_t;
413typedef struct re_dfa_t re_dfa_t;
414
415#ifndef _LIBC
416# ifdef __i386__
417# define internal_function __attribute ((regparm (3), stdcall))
418# else
419# define internal_function
420# endif
421#endif
422
423#ifndef NOT_IN_libc
424static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
425 int new_buf_len)
426 internal_function;
427# ifdef RE_ENABLE_I18N
428static void build_wcs_buffer (re_string_t *pstr) internal_function;
429static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr)
430 internal_function;
431# endif /* RE_ENABLE_I18N */
432static void build_upper_buffer (re_string_t *pstr) internal_function;
433static void re_string_translate_buffer (re_string_t *pstr) internal_function;
434static unsigned int re_string_context_at (const re_string_t *input, int idx,
435 int eflags)
436 internal_function __attribute ((pure));
437#endif
438#define re_string_peek_byte(pstr, offset) \
439 ((pstr)->mbs[(pstr)->cur_idx + offset])
440#define re_string_fetch_byte(pstr) \
441 ((pstr)->mbs[(pstr)->cur_idx++])
442#define re_string_first_byte(pstr, idx) \
443 ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
444#define re_string_is_single_byte_char(pstr, idx) \
445 ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
446 || (pstr)->wcs[(idx) + 1] != WEOF))
447#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
448#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
449#define re_string_get_buffer(pstr) ((pstr)->mbs)
450#define re_string_length(pstr) ((pstr)->len)
451#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
452#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
453#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
454
455#ifndef _LIBC
456# if HAVE_ALLOCA
457# ifdef (_MSC_VER)
458# include <malloc.h>
459# define __libc_use_alloca(n) 0
460# else
461# include <alloca.h>
462/* The OS usually guarantees only one guard page at the bottom of the stack,
463 and a page size can be as small as 4096 bytes. So we cannot safely
464 allocate anything larger than 4096 bytes. Also care for the possibility
465 of a few compiler-allocated temporary stack slots. */
466# define __libc_use_alloca(n) ((n) < 4032)
467# endif
468# else
469/* alloca is implemented with malloc, so just use malloc. */
470# define __libc_use_alloca(n) 0
471# endif
472#endif
473
474#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
475/* SunOS 4.1.x realloc doesn't accept null pointers: pre-Standard C. Sigh. */
476#define re_realloc(p,t,n) ((p != NULL) ? (t *) realloc (p,(n)*sizeof(t)) : (t *) calloc(n,sizeof(t)))
477#define re_free(p) free (p)
478
479struct bin_tree_t
480{
481 struct bin_tree_t *parent;
482 struct bin_tree_t *left;
483 struct bin_tree_t *right;
484 struct bin_tree_t *first;
485 struct bin_tree_t *next;
486
487 re_token_t token;
488
489 /* `node_idx' is the index in dfa->nodes, if `type' == 0.
490 Otherwise `type' indicate the type of this node. */
491 int node_idx;
492};
493typedef struct bin_tree_t bin_tree_t;
494
495#define BIN_TREE_STORAGE_SIZE \
496 ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
497
498struct bin_tree_storage_t
499{
500 struct bin_tree_storage_t *next;
501 bin_tree_t data[BIN_TREE_STORAGE_SIZE];
502};
503typedef struct bin_tree_storage_t bin_tree_storage_t;
504
505#define CONTEXT_WORD 1
506#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
507#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
508#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
509
510#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
511#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
512#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
513#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
514#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
515
516#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
517#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
518#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
519#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
520
521#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
522 ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
523 || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
524 || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
525 || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
526
527#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
528 ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
529 || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
530 || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
531 || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
532
533struct re_dfastate_t
534{
535 unsigned int hash;
536 re_node_set nodes;
537 re_node_set non_eps_nodes;
538 re_node_set inveclosure;
539 re_node_set *entrance_nodes;
540 struct re_dfastate_t **trtable, **word_trtable;
541 unsigned int context : 4;
542 unsigned int halt : 1;
543 /* If this state can accept `multi byte'.
544 Note that we refer to multibyte characters, and multi character
545 collating elements as `multi byte'. */
546 unsigned int accept_mb : 1;
547 /* If this state has backreference node(s). */
548 unsigned int has_backref : 1;
549 unsigned int has_constraint : 1;
550};
551typedef struct re_dfastate_t re_dfastate_t;
552
553struct re_state_table_entry
554{
555 int num;
556 int alloc;
557 re_dfastate_t **array;
558};
559
560/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */
561
562typedef struct
563{
564 int next_idx;
565 int alloc;
566 re_dfastate_t **array;
567} state_array_t;
568
569/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */
570
571typedef struct
572{
573 int node;
574 int str_idx; /* The position NODE match at. */
575 state_array_t path;
576} re_sub_match_last_t;
577
578/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
579 And information about the node, whose type is OP_CLOSE_SUBEXP,
580 corresponding to NODE is stored in LASTS. */
581
582typedef struct
583{
584 int str_idx;
585 int node;
586 state_array_t *path;
587 int alasts; /* Allocation size of LASTS. */
588 int nlasts; /* The number of LASTS. */
589 re_sub_match_last_t **lasts;
590} re_sub_match_top_t;
591
592struct re_backref_cache_entry
593{
594 int node;
595 int str_idx;
596 int subexp_from;
597 int subexp_to;
598 char more;
599 char unused;
600 unsigned short int eps_reachable_subexps_map;
601};
602
603typedef struct
604{
605 /* The string object corresponding to the input string. */
606 re_string_t input;
607#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
608 const re_dfa_t *const dfa;
609#else
610 const re_dfa_t *dfa;
611#endif
612 /* EFLAGS of the argument of regexec. */
613 int eflags;
614 /* Where the matching ends. */
615 int match_last;
616 int last_node;
617 /* The state log used by the matcher. */
618 re_dfastate_t **state_log;
619 int state_log_top;
620 /* Back reference cache. */
621 int nbkref_ents;
622 int abkref_ents;
623 struct re_backref_cache_entry *bkref_ents;
624 int max_mb_elem_len;
625 int nsub_tops;
626 int asub_tops;
627 re_sub_match_top_t **sub_tops;
628} re_match_context_t;
629
630typedef struct
631{
632 re_dfastate_t **sifted_states;
633 re_dfastate_t **limited_states;
634 int last_node;
635 int last_str_idx;
636 re_node_set limits;
637} re_sift_context_t;
638
639struct re_fail_stack_ent_t
640{
641 int idx;
642 int node;
643 regmatch_t *regs;
644 re_node_set eps_via_nodes;
645};
646
647struct re_fail_stack_t
648{
649 int num;
650 int alloc;
651 struct re_fail_stack_ent_t *stack;
652};
653
654struct re_dfa_t
655{
656 re_token_t *nodes;
657 size_t nodes_alloc;
658 size_t nodes_len;
659 int *nexts;
660 int *org_indices;
661 re_node_set *edests;
662 re_node_set *eclosures;
663 re_node_set *inveclosures;
664 struct re_state_table_entry *state_table;
665 re_dfastate_t *init_state;
666 re_dfastate_t *init_state_word;
667 re_dfastate_t *init_state_nl;
668 re_dfastate_t *init_state_begbuf;
669 bin_tree_t *str_tree;
670 bin_tree_storage_t *str_tree_storage;
671 re_bitset_ptr_t sb_char;
672 int str_tree_storage_idx;
673
674 /* number of subexpressions `re_nsub' is in regex_t. */
675 unsigned int state_hash_mask;
676 int init_node;
677 int nbackref; /* The number of backreference in this dfa. */
678
679 /* Bitmap expressing which backreference is used. */
680 bitset_word_t used_bkref_map;
681 bitset_word_t completed_bkref_map;
682
683 unsigned int has_plural_match : 1;
684 /* If this dfa has "multibyte node", which is a backreference or
685 a node which can accept multibyte character or multi character
686 collating element. */
687 unsigned int has_mb_node : 1;
688 unsigned int is_utf8 : 1;
689 unsigned int map_notascii : 1;
690 unsigned int word_ops_used : 1;
691 int mb_cur_max;
692 bitset_t word_char;
693 reg_syntax_t syntax;
694 int *subexp_map;
695#ifdef DEBUG
696 char* re_str;
697#endif
698#if defined _LIBC
699 __libc_lock_define (, lock)
700#endif
701};
702
703#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
704#define re_node_set_remove(set,id) \
705 (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
706#define re_node_set_empty(p) ((p)->nelem = 0)
707#define re_node_set_free(set) re_free ((set)->elems)
708
709
710typedef enum
711{
712 SB_CHAR,
713 MB_CHAR,
714 EQUIV_CLASS,
715 COLL_SYM,
716 CHAR_CLASS
717} bracket_elem_type;
718
719typedef struct
720{
721 bracket_elem_type type;
722 union
723 {
724 unsigned char ch;
725 unsigned char *name;
726 wchar_t wch;
727 } opr;
728} bracket_elem_t;
729
730
731/* Inline functions for bitset operation. */
732static inline void
733bitset_not (bitset_t set)
734{
735 int bitset_i;
736 for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
737 set[bitset_i] = ~set[bitset_i];
738}
739
740static inline void
741bitset_merge (bitset_t dest, const bitset_t src)
742{
743 int bitset_i;
744 for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
745 dest[bitset_i] |= src[bitset_i];
746}
747
748static inline void
749bitset_mask (bitset_t dest, const bitset_t src)
750{
751 int bitset_i;
752 for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
753 dest[bitset_i] &= src[bitset_i];
754}
755
756#ifdef RE_ENABLE_I18N
757/* Inline functions for re_string. */
758static inline int
759internal_function __attribute ((pure))
760re_string_char_size_at (const re_string_t *pstr, int idx)
761{
762 int byte_idx;
763 if (pstr->mb_cur_max == 1)
764 return 1;
765 for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
766 if (pstr->wcs[idx + byte_idx] != WEOF)
767 break;
768 return byte_idx;
769}
770
771static inline wint_t
772internal_function __attribute ((pure))
773re_string_wchar_at (const re_string_t *pstr, int idx)
774{
775 if (pstr->mb_cur_max == 1)
776 return (wint_t) pstr->mbs[idx];
777 return (wint_t) pstr->wcs[idx];
778}
779
780# ifndef NOT_IN_libc
781static int
782internal_function __attribute ((pure))
783re_string_elem_size_at (const re_string_t *pstr, int idx)
784{
785# ifdef _LIBC
786 const unsigned char *p, *extra;
787 const int32_t *table, *indirect;
788 int32_t tmp;
789# include <locale/weight.h>
790 uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
791
792 if (nrules != 0)
793 {
794 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
795 extra = (const unsigned char *)
796 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
797 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
798 _NL_COLLATE_INDIRECTMB);
799 p = pstr->mbs + idx;
800 tmp = findidx (&p);
801 return p - pstr->mbs - idx;
802 }
803 else
804# endif /* _LIBC */
805 return 1;
806}
807# endif
808#endif /* RE_ENABLE_I18N */
809
810#endif /* _REGEX_INTERNAL_H */
diff --git a/win32/regexec.c b/win32/regexec.c
new file mode 100644
index 000000000..eb5e1d443
--- /dev/null
+++ b/win32/regexec.c
@@ -0,0 +1,4369 @@
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002-2005, 2007, 2009, 2010 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA. */
20
21static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
22 int n) internal_function;
23static void match_ctx_clean (re_match_context_t *mctx) internal_function;
24static void match_ctx_free (re_match_context_t *cache) internal_function;
25static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
26 int str_idx, int from, int to)
27 internal_function;
28static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
29 internal_function;
30static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
31 int str_idx) internal_function;
32static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
33 int node, int str_idx)
34 internal_function;
35static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
36 re_dfastate_t **limited_sts, int last_node,
37 int last_str_idx)
38 internal_function;
39static reg_errcode_t re_search_internal (const regex_t *preg,
40 const char *string, int length,
41 int start, int range, int stop,
42 size_t nmatch, regmatch_t pmatch[],
43 int eflags);
44static int re_search_2_stub (struct re_pattern_buffer *bufp,
45 const char *string1, int length1,
46 const char *string2, int length2,
47 int start, int range, struct re_registers *regs,
48 int stop, int ret_len);
49static int re_search_stub (struct re_pattern_buffer *bufp,
50 const char *string, int length, int start,
51 int range, int stop, struct re_registers *regs,
52 int ret_len);
53static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
54 int nregs, int regs_allocated);
55static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
56static int check_matching (re_match_context_t *mctx, int fl_longest_match,
57 int *p_match_first) internal_function;
58static int check_halt_state_context (const re_match_context_t *mctx,
59 const re_dfastate_t *state, int idx)
60 internal_function;
61static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
62 regmatch_t *prev_idx_match, int cur_node,
63 int cur_idx, int nmatch) internal_function;
64static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
65 int str_idx, int dest_node, int nregs,
66 regmatch_t *regs,
67 re_node_set *eps_via_nodes)
68 internal_function;
69static reg_errcode_t set_regs (const regex_t *preg,
70 const re_match_context_t *mctx,
71 size_t nmatch, regmatch_t *pmatch,
72 int fl_backtrack) internal_function;
73static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
74 internal_function;
75
76#ifdef RE_ENABLE_I18N
77static int sift_states_iter_mb (const re_match_context_t *mctx,
78 re_sift_context_t *sctx,
79 int node_idx, int str_idx, int max_str_idx)
80 internal_function;
81#endif /* RE_ENABLE_I18N */
82static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
83 re_sift_context_t *sctx)
84 internal_function;
85static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
86 re_sift_context_t *sctx, int str_idx,
87 re_node_set *cur_dest)
88 internal_function;
89static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
90 re_sift_context_t *sctx,
91 int str_idx,
92 re_node_set *dest_nodes)
93 internal_function;
94static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
95 re_node_set *dest_nodes,
96 const re_node_set *candidates)
97 internal_function;
98static int check_dst_limits (const re_match_context_t *mctx,
99 re_node_set *limits,
100 int dst_node, int dst_idx, int src_node,
101 int src_idx) internal_function;
102static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
103 int boundaries, int subexp_idx,
104 int from_node, int bkref_idx)
105 internal_function;
106static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
107 int limit, int subexp_idx,
108 int node, int str_idx,
109 int bkref_idx) internal_function;
110static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
111 re_node_set *dest_nodes,
112 const re_node_set *candidates,
113 re_node_set *limits,
114 struct re_backref_cache_entry *bkref_ents,
115 int str_idx) internal_function;
116static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
117 re_sift_context_t *sctx,
118 int str_idx, const re_node_set *candidates)
119 internal_function;
120static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
121 re_dfastate_t **dst,
122 re_dfastate_t **src, int num)
123 internal_function;
124static re_dfastate_t *find_recover_state (reg_errcode_t *err,
125 re_match_context_t *mctx) internal_function;
126static re_dfastate_t *transit_state (reg_errcode_t *err,
127 re_match_context_t *mctx,
128 re_dfastate_t *state) internal_function;
129static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
130 re_match_context_t *mctx,
131 re_dfastate_t *next_state)
132 internal_function;
133static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
134 re_node_set *cur_nodes,
135 int str_idx) internal_function;
136#if 0
137static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
138 re_match_context_t *mctx,
139 re_dfastate_t *pstate)
140 internal_function;
141#endif
142#ifdef RE_ENABLE_I18N
143static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
144 re_dfastate_t *pstate)
145 internal_function;
146#endif /* RE_ENABLE_I18N */
147static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
148 const re_node_set *nodes)
149 internal_function;
150static reg_errcode_t get_subexp (re_match_context_t *mctx,
151 int bkref_node, int bkref_str_idx)
152 internal_function;
153static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
154 const re_sub_match_top_t *sub_top,
155 re_sub_match_last_t *sub_last,
156 int bkref_node, int bkref_str)
157 internal_function;
158static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
159 int subexp_idx, int type) internal_function;
160static reg_errcode_t check_arrival (re_match_context_t *mctx,
161 state_array_t *path, int top_node,
162 int top_str, int last_node, int last_str,
163 int type) internal_function;
164static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
165 int str_idx,
166 re_node_set *cur_nodes,
167 re_node_set *next_nodes)
168 internal_function;
169static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
170 re_node_set *cur_nodes,
171 int ex_subexp, int type)
172 internal_function;
173static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
174 re_node_set *dst_nodes,
175 int target, int ex_subexp,
176 int type) internal_function;
177static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
178 re_node_set *cur_nodes, int cur_str,
179 int subexp_num, int type)
180 internal_function;
181static int build_trtable (const re_dfa_t *dfa,
182 re_dfastate_t *state) internal_function;
183#ifdef RE_ENABLE_I18N
184static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
185 const re_string_t *input, int idx)
186 internal_function;
187# ifdef _LIBC
188static unsigned int find_collation_sequence_value (const unsigned char *mbs,
189 size_t name_len)
190 internal_function;
191# endif /* _LIBC */
192#endif /* RE_ENABLE_I18N */
193static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
194 const re_dfastate_t *state,
195 re_node_set *states_node,
196 bitset_t *states_ch) internal_function;
197static int check_node_accept (const re_match_context_t *mctx,
198 const re_token_t *node, int idx)
199 internal_function;
200static reg_errcode_t extend_buffers (re_match_context_t *mctx)
201 internal_function;
202
203/* Entry point for POSIX code. */
204
205/* regexec searches for a given pattern, specified by PREG, in the
206 string STRING.
207
208 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
209 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
210 least NMATCH elements, and we set them to the offsets of the
211 corresponding matched substrings.
212
213 EFLAGS specifies `execution flags' which affect matching: if
214 REG_NOTBOL is set, then ^ does not match at the beginning of the
215 string; if REG_NOTEOL is set, then $ does not match at the end.
216
217 We return 0 if we find a match and REG_NOMATCH if not. */
218
219int
220regexec (
221 const regex_t *__restrict preg,
222 const char *__restrict string,
223 size_t nmatch,
224 regmatch_t pmatch[],
225 int eflags)
226{
227 reg_errcode_t err;
228 int start, length;
229
230 if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
231 return REG_BADPAT;
232
233 if (eflags & REG_STARTEND)
234 {
235 start = pmatch[0].rm_so;
236 length = pmatch[0].rm_eo;
237 }
238 else
239 {
240 start = 0;
241 length = strlen (string);
242 }
243
244 __libc_lock_lock (dfa->lock);
245 if (preg->no_sub)
246 err = re_search_internal (preg, string, length, start, length - start,
247 length, 0, NULL, eflags);
248 else
249 err = re_search_internal (preg, string, length, start, length - start,
250 length, nmatch, pmatch, eflags);
251 __libc_lock_unlock (dfa->lock);
252 return err != REG_NOERROR;
253}
254
255#ifdef _LIBC
256# include <shlib-compat.h>
257versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
258
259# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
260__typeof__ (__regexec) __compat_regexec;
261
262int
263attribute_compat_text_section
264__compat_regexec (const regex_t *__restrict preg,
265 const char *__restrict string, size_t nmatch,
266 regmatch_t pmatch[], int eflags)
267{
268 return regexec (preg, string, nmatch, pmatch,
269 eflags & (REG_NOTBOL | REG_NOTEOL));
270}
271compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
272# endif
273#endif
274
275/* Entry points for GNU code. */
276
277/* re_match, re_search, re_match_2, re_search_2
278
279 The former two functions operate on STRING with length LENGTH,
280 while the later two operate on concatenation of STRING1 and STRING2
281 with lengths LENGTH1 and LENGTH2, respectively.
282
283 re_match() matches the compiled pattern in BUFP against the string,
284 starting at index START.
285
286 re_search() first tries matching at index START, then it tries to match
287 starting from index START + 1, and so on. The last start position tried
288 is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
289 way as re_match().)
290
291 The parameter STOP of re_{match,search}_2 specifies that no match exceeding
292 the first STOP characters of the concatenation of the strings should be
293 concerned.
294
295 If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
296 and all groups is stroed in REGS. (For the "_2" variants, the offsets are
297 computed relative to the concatenation, not relative to the individual
298 strings.)
299
300 On success, re_match* functions return the length of the match, re_search*
301 return the position of the start of the match. Return value -1 means no
302 match was found and -2 indicates an internal error. */
303
304int
305re_match (struct re_pattern_buffer *bufp,
306 const char *string,
307 int length,
308 int start,
309 struct re_registers *regs)
310{
311 return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
312}
313#ifdef _LIBC
314weak_alias (__re_match, re_match)
315#endif
316
317int
318re_search (struct re_pattern_buffer *bufp,
319 const char *string,
320 int length, int start, int range,
321 struct re_registers *regs)
322{
323 return re_search_stub (bufp, string, length, start, range, length, regs, 0);
324}
325#ifdef _LIBC
326weak_alias (__re_search, re_search)
327#endif
328
329int
330re_match_2 (struct re_pattern_buffer *bufp,
331 const char *string1, int length1,
332 const char *string2, int length2, int start,
333 struct re_registers *regs, int stop)
334{
335 return re_search_2_stub (bufp, string1, length1, string2, length2,
336 start, 0, regs, stop, 1);
337}
338#ifdef _LIBC
339weak_alias (__re_match_2, re_match_2)
340#endif
341
342int
343re_search_2 (struct re_pattern_buffer *bufp,
344 const char *string1, int length1,
345 const char *string2, int length2, int start,
346 int range, struct re_registers *regs, int stop)
347{
348 return re_search_2_stub (bufp, string1, length1, string2, length2,
349 start, range, regs, stop, 0);
350}
351#ifdef _LIBC
352weak_alias (__re_search_2, re_search_2)
353#endif
354
355static int
356re_search_2_stub (struct re_pattern_buffer *bufp,
357 const char *string1, int length1,
358 const char *string2, int length2, int start,
359 int range, struct re_registers *regs,
360 int stop, int ret_len)
361{
362 const char *str;
363 int rval;
364 int len = length1 + length2;
365 int free_str = 0;
366
367 if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
368 return -2;
369
370 /* Concatenate the strings. */
371 if (length2 > 0)
372 if (length1 > 0)
373 {
374 char *s = re_malloc (char, len);
375
376 if (BE (s == NULL, 0))
377 return -2;
378 memcpy (s, string1, length1);
379 memcpy (s + length1, string2, length2);
380 str = s;
381 free_str = 1;
382 }
383 else
384 str = string2;
385 else
386 str = string1;
387
388 rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len);
389 if (free_str)
390 re_free ((char *) str);
391 return rval;
392}
393
394/* The parameters have the same meaning as those of re_search.
395 Additional parameters:
396 If RET_LEN is nonzero the length of the match is returned (re_match style);
397 otherwise the position of the match is returned. */
398
399static int
400re_search_stub (struct re_pattern_buffer *bufp,
401 const char *string, int length, int start,
402 int range, int stop,
403 struct re_registers *regs, int ret_len)
404{
405 reg_errcode_t result;
406 regmatch_t *pmatch;
407 int nregs, rval;
408 int eflags = 0;
409
410 /* Check for out-of-range. */
411 if (BE (start < 0 || start > length, 0))
412 return -1;
413 if (BE (start + range > length, 0))
414 range = length - start;
415 else if (BE (start + range < 0, 0))
416 range = -start;
417
418 __libc_lock_lock (dfa->lock);
419
420 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
421 eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
422
423 /* Compile fastmap if we haven't yet. */
424 if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
425 re_compile_fastmap (bufp);
426
427 if (BE (bufp->no_sub, 0))
428 regs = NULL;
429
430 /* We need at least 1 register. */
431 if (regs == NULL)
432 nregs = 1;
433 else if (BE (bufp->regs_allocated == REGS_FIXED &&
434 regs->num_regs < bufp->re_nsub + 1, 0))
435 {
436 nregs = regs->num_regs;
437 if (BE (nregs < 1, 0))
438 {
439 /* Nothing can be copied to regs. */
440 regs = NULL;
441 nregs = 1;
442 }
443 }
444 else
445 nregs = bufp->re_nsub + 1;
446 pmatch = re_malloc (regmatch_t, nregs);
447 if (BE (pmatch == NULL, 0))
448 {
449 rval = -2;
450 goto out;
451 }
452
453 result = re_search_internal (bufp, string, length, start, range, stop,
454 nregs, pmatch, eflags);
455
456 rval = 0;
457
458 /* I hope we needn't fill their regs with -1's when no match was found. */
459 if (result != REG_NOERROR)
460 rval = -1;
461 else if (regs != NULL)
462 {
463 /* If caller wants register contents data back, copy them. */
464 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
465 bufp->regs_allocated);
466 if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
467 rval = -2;
468 }
469
470 if (BE (rval == 0, 1))
471 {
472 if (ret_len)
473 {
474 assert (pmatch[0].rm_so == start);
475 rval = pmatch[0].rm_eo - start;
476 }
477 else
478 rval = pmatch[0].rm_so;
479 }
480 re_free (pmatch);
481 out:
482 __libc_lock_unlock (dfa->lock);
483 return rval;
484}
485
486static unsigned
487re_copy_regs (struct re_registers *regs,
488 regmatch_t *pmatch,
489 int nregs, int regs_allocated)
490{
491 int rval = REGS_REALLOCATE;
492 int i;
493 int need_regs = nregs + 1;
494 /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
495 uses. */
496
497 /* Have the register data arrays been allocated? */
498 if (regs_allocated == REGS_UNALLOCATED)
499 { /* No. So allocate them with malloc. */
500 regs->start = re_malloc (regoff_t, need_regs);
501 if (BE (regs->start == NULL, 0))
502 return REGS_UNALLOCATED;
503 regs->end = re_malloc (regoff_t, need_regs);
504 if (BE (regs->end == NULL, 0))
505 {
506 re_free (regs->start);
507 return REGS_UNALLOCATED;
508 }
509 regs->num_regs = need_regs;
510 }
511 else if (regs_allocated == REGS_REALLOCATE)
512 { /* Yes. If we need more elements than were already
513 allocated, reallocate them. If we need fewer, just
514 leave it alone. */
515 if (BE (need_regs > regs->num_regs, 0))
516 {
517 regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
518 regoff_t *new_end;
519 if (BE (new_start == NULL, 0))
520 return REGS_UNALLOCATED;
521 new_end = re_realloc (regs->end, regoff_t, need_regs);
522 if (BE (new_end == NULL, 0))
523 {
524 re_free (new_start);
525 return REGS_UNALLOCATED;
526 }
527 regs->start = new_start;
528 regs->end = new_end;
529 regs->num_regs = need_regs;
530 }
531 }
532 else
533 {
534 assert (regs_allocated == REGS_FIXED);
535 /* This function may not be called with REGS_FIXED and nregs too big. */
536 assert (regs->num_regs >= nregs);
537 rval = REGS_FIXED;
538 }
539
540 /* Copy the regs. */
541 for (i = 0; i < nregs; ++i)
542 {
543 regs->start[i] = pmatch[i].rm_so;
544 regs->end[i] = pmatch[i].rm_eo;
545 }
546 for ( ; i < regs->num_regs; ++i)
547 regs->start[i] = regs->end[i] = -1;
548
549 return rval;
550}
551
552/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
553 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
554 this memory for recording register information. STARTS and ENDS
555 must be allocated using the malloc library routine, and must each
556 be at least NUM_REGS * sizeof (regoff_t) bytes long.
557
558 If NUM_REGS == 0, then subsequent matches should allocate their own
559 register data.
560
561 Unless this function is called, the first search or match using
562 PATTERN_BUFFER will allocate its own register data, without
563 freeing the old data. */
564
565void
566re_set_registers (struct re_pattern_buffer *bufp,
567 struct re_registers *regs,
568 unsigned num_regs,
569 regoff_t *starts,
570 regoff_t *ends)
571{
572 if (num_regs)
573 {
574 bufp->regs_allocated = REGS_REALLOCATE;
575 regs->num_regs = num_regs;
576 regs->start = starts;
577 regs->end = ends;
578 }
579 else
580 {
581 bufp->regs_allocated = REGS_UNALLOCATED;
582 regs->num_regs = 0;
583 regs->start = regs->end = (regoff_t *) 0;
584 }
585}
586#ifdef _LIBC
587weak_alias (__re_set_registers, re_set_registers)
588#endif
589
590/* Entry points compatible with 4.2 BSD regex library. We don't define
591 them unless specifically requested. */
592
593#if defined _REGEX_RE_COMP || defined _LIBC
594int
595# ifdef _LIBC
596weak_function
597# endif
598re_exec (s)
599 const char *s;
600{
601 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
602}
603#endif /* _REGEX_RE_COMP */
604
605/* Internal entry point. */
606
607/* Searches for a compiled pattern PREG in the string STRING, whose
608 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
609 mingings with regexec. START, and RANGE have the same meanings
610 with re_search.
611 Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
612 otherwise return the error code.
613 Note: We assume front end functions already check ranges.
614 (START + RANGE >= 0 && START + RANGE <= LENGTH) */
615
616static reg_errcode_t
617re_search_internal (const regex_t *preg,
618 const char *string,
619 int length, int start, int range, int stop,
620 size_t nmatch, regmatch_t pmatch[],
621 int eflags)
622{
623 reg_errcode_t err;
624 const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
625 int left_lim, right_lim, incr;
626 int fl_longest_match, match_first, match_kind, match_last = -1;
627 int extra_nmatch;
628 int sb, ch;
629#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
630 re_match_context_t mctx = { .dfa = dfa };
631#else
632 re_match_context_t mctx;
633#endif
634 char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
635 && range && !preg->can_be_null) ? preg->fastmap : NULL;
636 RE_TRANSLATE_TYPE t = preg->translate;
637
638#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
639 memset (&mctx, '\0', sizeof (re_match_context_t));
640 mctx.dfa = dfa;
641#endif
642
643 extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
644 nmatch -= extra_nmatch;
645
646 /* Check if the DFA haven't been compiled. */
647 if (BE (preg->used == 0 || dfa->init_state == NULL
648 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
649 || dfa->init_state_begbuf == NULL, 0))
650 return REG_NOMATCH;
651
652#ifdef DEBUG
653 /* We assume front-end functions already check them. */
654 assert (start + range >= 0 && start + range <= length);
655#endif
656
657 /* If initial states with non-begbuf contexts have no elements,
658 the regex must be anchored. If preg->newline_anchor is set,
659 we'll never use init_state_nl, so do not check it. */
660 if (dfa->init_state->nodes.nelem == 0
661 && dfa->init_state_word->nodes.nelem == 0
662 && (dfa->init_state_nl->nodes.nelem == 0
663 || !preg->newline_anchor))
664 {
665 if (start != 0 && start + range != 0)
666 return REG_NOMATCH;
667 start = range = 0;
668 }
669
670 /* We must check the longest matching, if nmatch > 0. */
671 fl_longest_match = (nmatch != 0 || dfa->nbackref);
672
673 err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
674 preg->translate, preg->syntax & RE_ICASE, dfa);
675 if (BE (err != REG_NOERROR, 0))
676 goto free_return;
677 mctx.input.stop = stop;
678 mctx.input.raw_stop = stop;
679 mctx.input.newline_anchor = preg->newline_anchor;
680
681 err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
682 if (BE (err != REG_NOERROR, 0))
683 goto free_return;
684
685 /* We will log all the DFA states through which the dfa pass,
686 if nmatch > 1, or this dfa has "multibyte node", which is a
687 back-reference or a node which can accept multibyte character or
688 multi character collating element. */
689 if (nmatch > 1 || dfa->has_mb_node)
690 {
691 /* Avoid overflow. */
692 if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
693 {
694 err = REG_ESPACE;
695 goto free_return;
696 }
697
698 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
699 if (BE (mctx.state_log == NULL, 0))
700 {
701 err = REG_ESPACE;
702 goto free_return;
703 }
704 }
705 else
706 mctx.state_log = NULL;
707
708 match_first = start;
709 mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
710 : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
711
712 /* Check incrementally whether of not the input string match. */
713 incr = (range < 0) ? -1 : 1;
714 left_lim = (range < 0) ? start + range : start;
715 right_lim = (range < 0) ? start : start + range;
716 sb = dfa->mb_cur_max == 1;
717 match_kind =
718 (fastmap
719 ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
720 | (range >= 0 ? 2 : 0)
721 | (t != NULL ? 1 : 0))
722 : 8);
723
724 for (;; match_first += incr)
725 {
726 err = REG_NOMATCH;
727 if (match_first < left_lim || right_lim < match_first)
728 goto free_return;
729
730 /* Advance as rapidly as possible through the string, until we
731 find a plausible place to start matching. This may be done
732 with varying efficiency, so there are various possibilities:
733 only the most common of them are specialized, in order to
734 save on code size. We use a switch statement for speed. */
735 switch (match_kind)
736 {
737 case 8:
738 /* No fastmap. */
739 break;
740
741 case 7:
742 /* Fastmap with single-byte translation, match forward. */
743 while (BE (match_first < right_lim, 1)
744 && !fastmap[t[(unsigned char) string[match_first]]])
745 ++match_first;
746 goto forward_match_found_start_or_reached_end;
747
748 case 6:
749 /* Fastmap without translation, match forward. */
750 while (BE (match_first < right_lim, 1)
751 && !fastmap[(unsigned char) string[match_first]])
752 ++match_first;
753
754 forward_match_found_start_or_reached_end:
755 if (BE (match_first == right_lim, 0))
756 {
757 ch = match_first >= length
758 ? 0 : (unsigned char) string[match_first];
759 if (!fastmap[t ? t[ch] : ch])
760 goto free_return;
761 }
762 break;
763
764 case 4:
765 case 5:
766 /* Fastmap without multi-byte translation, match backwards. */
767 while (match_first >= left_lim)
768 {
769 ch = match_first >= length
770 ? 0 : (unsigned char) string[match_first];
771 if (fastmap[t ? t[ch] : ch])
772 break;
773 --match_first;
774 }
775 if (match_first < left_lim)
776 goto free_return;
777 break;
778
779 default:
780 /* In this case, we can't determine easily the current byte,
781 since it might be a component byte of a multibyte
782 character. Then we use the constructed buffer instead. */
783 for (;;)
784 {
785 /* If MATCH_FIRST is out of the valid range, reconstruct the
786 buffers. */
787 unsigned int offset = match_first - mctx.input.raw_mbs_idx;
788 if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
789 {
790 err = re_string_reconstruct (&mctx.input, match_first,
791 eflags);
792 if (BE (err != REG_NOERROR, 0))
793 goto free_return;
794
795 offset = match_first - mctx.input.raw_mbs_idx;
796 }
797 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
798 Note that MATCH_FIRST must not be smaller than 0. */
799 ch = (match_first >= length
800 ? 0 : re_string_byte_at (&mctx.input, offset));
801 if (fastmap[ch])
802 break;
803 match_first += incr;
804 if (match_first < left_lim || match_first > right_lim)
805 {
806 err = REG_NOMATCH;
807 goto free_return;
808 }
809 }
810 break;
811 }
812
813 /* Reconstruct the buffers so that the matcher can assume that
814 the matching starts from the beginning of the buffer. */
815 err = re_string_reconstruct (&mctx.input, match_first, eflags);
816 if (BE (err != REG_NOERROR, 0))
817 goto free_return;
818
819#ifdef RE_ENABLE_I18N
820 /* Don't consider this char as a possible match start if it part,
821 yet isn't the head, of a multibyte character. */
822 if (!sb && !re_string_first_byte (&mctx.input, 0))
823 continue;
824#endif
825
826 /* It seems to be appropriate one, then use the matcher. */
827 /* We assume that the matching starts from 0. */
828 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
829 match_last = check_matching (&mctx, fl_longest_match,
830 range >= 0 ? &match_first : NULL);
831 if (match_last != -1)
832 {
833 if (BE (match_last == -2, 0))
834 {
835 err = REG_ESPACE;
836 goto free_return;
837 }
838 else
839 {
840 mctx.match_last = match_last;
841 if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
842 {
843 re_dfastate_t *pstate = mctx.state_log[match_last];
844 mctx.last_node = check_halt_state_context (&mctx, pstate,
845 match_last);
846 }
847 if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
848 || dfa->nbackref)
849 {
850 err = prune_impossible_nodes (&mctx);
851 if (err == REG_NOERROR)
852 break;
853 if (BE (err != REG_NOMATCH, 0))
854 goto free_return;
855 match_last = -1;
856 }
857 else
858 break; /* We found a match. */
859 }
860 }
861
862 match_ctx_clean (&mctx);
863 }
864
865#ifdef DEBUG
866 assert (match_last != -1);
867 assert (err == REG_NOERROR);
868#endif
869
870 /* Set pmatch[] if we need. */
871 if (nmatch > 0)
872 {
873 int reg_idx;
874
875 /* Initialize registers. */
876 for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
877 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
878
879 /* Set the points where matching start/end. */
880 pmatch[0].rm_so = 0;
881 pmatch[0].rm_eo = mctx.match_last;
882
883 if (!preg->no_sub && nmatch > 1)
884 {
885 err = set_regs (preg, &mctx, nmatch, pmatch,
886 dfa->has_plural_match && dfa->nbackref > 0);
887 if (BE (err != REG_NOERROR, 0))
888 goto free_return;
889 }
890
891 /* At last, add the offset to the each registers, since we slided
892 the buffers so that we could assume that the matching starts
893 from 0. */
894 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
895 if (pmatch[reg_idx].rm_so != -1)
896 {
897#ifdef RE_ENABLE_I18N
898 if (BE (mctx.input.offsets_needed != 0, 0))
899 {
900 pmatch[reg_idx].rm_so =
901 (pmatch[reg_idx].rm_so == mctx.input.valid_len
902 ? mctx.input.valid_raw_len
903 : mctx.input.offsets[pmatch[reg_idx].rm_so]);
904 pmatch[reg_idx].rm_eo =
905 (pmatch[reg_idx].rm_eo == mctx.input.valid_len
906 ? mctx.input.valid_raw_len
907 : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
908 }
909#else
910 assert (mctx.input.offsets_needed == 0);
911#endif
912 pmatch[reg_idx].rm_so += match_first;
913 pmatch[reg_idx].rm_eo += match_first;
914 }
915 for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
916 {
917 pmatch[nmatch + reg_idx].rm_so = -1;
918 pmatch[nmatch + reg_idx].rm_eo = -1;
919 }
920
921 if (dfa->subexp_map)
922 for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
923 if (dfa->subexp_map[reg_idx] != reg_idx)
924 {
925 pmatch[reg_idx + 1].rm_so
926 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
927 pmatch[reg_idx + 1].rm_eo
928 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
929 }
930 }
931
932 free_return:
933 re_free (mctx.state_log);
934 if (dfa->nbackref)
935 match_ctx_free (&mctx);
936 re_string_destruct (&mctx.input);
937 return err;
938}
939
940static reg_errcode_t
941prune_impossible_nodes (re_match_context_t *mctx)
942{
943 const re_dfa_t *const dfa = mctx->dfa;
944 int halt_node, match_last;
945 reg_errcode_t ret;
946 re_dfastate_t **sifted_states;
947 re_dfastate_t **lim_states = NULL;
948 re_sift_context_t sctx;
949#ifdef DEBUG
950 assert (mctx->state_log != NULL);
951#endif
952 match_last = mctx->match_last;
953 halt_node = mctx->last_node;
954
955 /* Avoid overflow. */
956 if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
957 return REG_ESPACE;
958
959 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
960 if (BE (sifted_states == NULL, 0))
961 {
962 ret = REG_ESPACE;
963 goto free_return;
964 }
965 if (dfa->nbackref)
966 {
967 lim_states = re_malloc (re_dfastate_t *, match_last + 1);
968 if (BE (lim_states == NULL, 0))
969 {
970 ret = REG_ESPACE;
971 goto free_return;
972 }
973 while (1)
974 {
975 memset (lim_states, '\0',
976 sizeof (re_dfastate_t *) * (match_last + 1));
977 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
978 match_last);
979 ret = sift_states_backward (mctx, &sctx);
980 re_node_set_free (&sctx.limits);
981 if (BE (ret != REG_NOERROR, 0))
982 goto free_return;
983 if (sifted_states[0] != NULL || lim_states[0] != NULL)
984 break;
985 do
986 {
987 --match_last;
988 if (match_last < 0)
989 {
990 ret = REG_NOMATCH;
991 goto free_return;
992 }
993 } while (mctx->state_log[match_last] == NULL
994 || !mctx->state_log[match_last]->halt);
995 halt_node = check_halt_state_context (mctx,
996 mctx->state_log[match_last],
997 match_last);
998 }
999 ret = merge_state_array (dfa, sifted_states, lim_states,
1000 match_last + 1);
1001 re_free (lim_states);
1002 lim_states = NULL;
1003 if (BE (ret != REG_NOERROR, 0))
1004 goto free_return;
1005 }
1006 else
1007 {
1008 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1009 ret = sift_states_backward (mctx, &sctx);
1010 re_node_set_free (&sctx.limits);
1011 if (BE (ret != REG_NOERROR, 0))
1012 goto free_return;
1013 if (sifted_states[0] == NULL)
1014 {
1015 ret = REG_NOMATCH;
1016 goto free_return;
1017 }
1018 }
1019 re_free (mctx->state_log);
1020 mctx->state_log = sifted_states;
1021 sifted_states = NULL;
1022 mctx->last_node = halt_node;
1023 mctx->match_last = match_last;
1024 ret = REG_NOERROR;
1025 free_return:
1026 re_free (sifted_states);
1027 re_free (lim_states);
1028 return ret;
1029}
1030
1031/* Acquire an initial state and return it.
1032 We must select appropriate initial state depending on the context,
1033 since initial states may have constraints like "\<", "^", etc.. */
1034
1035static inline re_dfastate_t *
1036__attribute ((always_inline)) internal_function
1037acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1038 int idx)
1039{
1040 const re_dfa_t *const dfa = mctx->dfa;
1041 if (dfa->init_state->has_constraint)
1042 {
1043 unsigned int context;
1044 context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1045 if (IS_WORD_CONTEXT (context))
1046 return dfa->init_state_word;
1047 else if (IS_ORDINARY_CONTEXT (context))
1048 return dfa->init_state;
1049 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1050 return dfa->init_state_begbuf;
1051 else if (IS_NEWLINE_CONTEXT (context))
1052 return dfa->init_state_nl;
1053 else if (IS_BEGBUF_CONTEXT (context))
1054 {
1055 /* It is relatively rare case, then calculate on demand. */
1056 return re_acquire_state_context (err, dfa,
1057 dfa->init_state->entrance_nodes,
1058 context);
1059 }
1060 else
1061 /* Must not happen? */
1062 return dfa->init_state;
1063 }
1064 else
1065 return dfa->init_state;
1066}
1067
1068/* Check whether the regular expression match input string INPUT or not,
1069 and return the index where the matching end, return -1 if not match,
1070 or return -2 in case of an error.
1071 FL_LONGEST_MATCH means we want the POSIX longest matching.
1072 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1073 next place where we may want to try matching.
1074 Note that the matcher assume that the matching starts from the current
1075 index of the buffer. */
1076
1077static int
1078internal_function
1079check_matching (re_match_context_t *mctx, int fl_longest_match,
1080 int *p_match_first)
1081{
1082 const re_dfa_t *const dfa = mctx->dfa;
1083 reg_errcode_t err;
1084 int match = 0;
1085 int match_last = -1;
1086 int cur_str_idx = re_string_cur_idx (&mctx->input);
1087 re_dfastate_t *cur_state;
1088 int at_init_state = p_match_first != NULL;
1089 int next_start_idx = cur_str_idx;
1090
1091 err = REG_NOERROR;
1092 cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1093 /* An initial state must not be NULL (invalid). */
1094 if (BE (cur_state == NULL, 0))
1095 {
1096 assert (err == REG_ESPACE);
1097 return -2;
1098 }
1099
1100 if (mctx->state_log != NULL)
1101 {
1102 mctx->state_log[cur_str_idx] = cur_state;
1103
1104 /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1105 later. E.g. Processing back references. */
1106 if (BE (dfa->nbackref, 0))
1107 {
1108 at_init_state = 0;
1109 err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1110 if (BE (err != REG_NOERROR, 0))
1111 return err;
1112
1113 if (cur_state->has_backref)
1114 {
1115 err = transit_state_bkref (mctx, &cur_state->nodes);
1116 if (BE (err != REG_NOERROR, 0))
1117 return err;
1118 }
1119 }
1120 }
1121
1122 /* If the RE accepts NULL string. */
1123 if (BE (cur_state->halt, 0))
1124 {
1125 if (!cur_state->has_constraint
1126 || check_halt_state_context (mctx, cur_state, cur_str_idx))
1127 {
1128 if (!fl_longest_match)
1129 return cur_str_idx;
1130 else
1131 {
1132 match_last = cur_str_idx;
1133 match = 1;
1134 }
1135 }
1136 }
1137
1138 while (!re_string_eoi (&mctx->input))
1139 {
1140 re_dfastate_t *old_state = cur_state;
1141 int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1142
1143 if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1144 || (BE (next_char_idx >= mctx->input.valid_len, 0)
1145 && mctx->input.valid_len < mctx->input.len))
1146 {
1147 err = extend_buffers (mctx);
1148 if (BE (err != REG_NOERROR, 0))
1149 {
1150 assert (err == REG_ESPACE);
1151 return -2;
1152 }
1153 }
1154
1155 cur_state = transit_state (&err, mctx, cur_state);
1156 if (mctx->state_log != NULL)
1157 cur_state = merge_state_with_log (&err, mctx, cur_state);
1158
1159 if (cur_state == NULL)
1160 {
1161 /* Reached the invalid state or an error. Try to recover a valid
1162 state using the state log, if available and if we have not
1163 already found a valid (even if not the longest) match. */
1164 if (BE (err != REG_NOERROR, 0))
1165 return -2;
1166
1167 if (mctx->state_log == NULL
1168 || (match && !fl_longest_match)
1169 || (cur_state = find_recover_state (&err, mctx)) == NULL)
1170 break;
1171 }
1172
1173 if (BE (at_init_state, 0))
1174 {
1175 if (old_state == cur_state)
1176 next_start_idx = next_char_idx;
1177 else
1178 at_init_state = 0;
1179 }
1180
1181 if (cur_state->halt)
1182 {
1183 /* Reached a halt state.
1184 Check the halt state can satisfy the current context. */
1185 if (!cur_state->has_constraint
1186 || check_halt_state_context (mctx, cur_state,
1187 re_string_cur_idx (&mctx->input)))
1188 {
1189 /* We found an appropriate halt state. */
1190 match_last = re_string_cur_idx (&mctx->input);
1191 match = 1;
1192
1193 /* We found a match, do not modify match_first below. */
1194 p_match_first = NULL;
1195 if (!fl_longest_match)
1196 break;
1197 }
1198 }
1199 }
1200
1201 if (p_match_first)
1202 *p_match_first += next_start_idx;
1203
1204 return match_last;
1205}
1206
1207/* Check NODE match the current context. */
1208
1209static int
1210internal_function
1211check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
1212{
1213 re_token_type_t type = dfa->nodes[node].type;
1214 unsigned int constraint = dfa->nodes[node].constraint;
1215 if (type != END_OF_RE)
1216 return 0;
1217 if (!constraint)
1218 return 1;
1219 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1220 return 0;
1221 return 1;
1222}
1223
1224/* Check the halt state STATE match the current context.
1225 Return 0 if not match, if the node, STATE has, is a halt node and
1226 match the context, return the node. */
1227
1228static int
1229internal_function
1230check_halt_state_context (const re_match_context_t *mctx,
1231 const re_dfastate_t *state, int idx)
1232{
1233 int i;
1234 unsigned int context;
1235#ifdef DEBUG
1236 assert (state->halt);
1237#endif
1238 context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1239 for (i = 0; i < state->nodes.nelem; ++i)
1240 if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1241 return state->nodes.elems[i];
1242 return 0;
1243}
1244
1245/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1246 corresponding to the DFA).
1247 Return the destination node, and update EPS_VIA_NODES, return -1 in case
1248 of errors. */
1249
1250static int
1251internal_function
1252proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
1253 int *pidx, int node, re_node_set *eps_via_nodes,
1254 struct re_fail_stack_t *fs)
1255{
1256 const re_dfa_t *const dfa = mctx->dfa;
1257 int i, err;
1258 if (IS_EPSILON_NODE (dfa->nodes[node].type))
1259 {
1260 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1261 re_node_set *edests = &dfa->edests[node];
1262 int dest_node;
1263 err = re_node_set_insert (eps_via_nodes, node);
1264 if (BE (err < 0, 0))
1265 return -2;
1266 /* Pick up a valid destination, or return -1 if none is found. */
1267 for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1268 {
1269 int candidate = edests->elems[i];
1270 if (!re_node_set_contains (cur_nodes, candidate))
1271 continue;
1272 if (dest_node == -1)
1273 dest_node = candidate;
1274
1275 else
1276 {
1277 /* In order to avoid infinite loop like "(a*)*", return the second
1278 epsilon-transition if the first was already considered. */
1279 if (re_node_set_contains (eps_via_nodes, dest_node))
1280 return candidate;
1281
1282 /* Otherwise, push the second epsilon-transition on the fail stack. */
1283 else if (fs != NULL
1284 && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1285 eps_via_nodes))
1286 return -2;
1287
1288 /* We know we are going to exit. */
1289 break;
1290 }
1291 }
1292 return dest_node;
1293 }
1294 else
1295 {
1296 int naccepted = 0;
1297 re_token_type_t type = dfa->nodes[node].type;
1298
1299#ifdef RE_ENABLE_I18N
1300 if (dfa->nodes[node].accept_mb)
1301 naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1302 else
1303#endif /* RE_ENABLE_I18N */
1304 if (type == OP_BACK_REF)
1305 {
1306 int subexp_idx = dfa->nodes[node].opr.idx + 1;
1307 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1308 if (fs != NULL)
1309 {
1310 if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1311 return -1;
1312 else if (naccepted)
1313 {
1314 char *buf = (char *) re_string_get_buffer (&mctx->input);
1315 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1316 naccepted) != 0)
1317 return -1;
1318 }
1319 }
1320
1321 if (naccepted == 0)
1322 {
1323 int dest_node;
1324 err = re_node_set_insert (eps_via_nodes, node);
1325 if (BE (err < 0, 0))
1326 return -2;
1327 dest_node = dfa->edests[node].elems[0];
1328 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1329 dest_node))
1330 return dest_node;
1331 }
1332 }
1333
1334 if (naccepted != 0
1335 || check_node_accept (mctx, dfa->nodes + node, *pidx))
1336 {
1337 int dest_node = dfa->nexts[node];
1338 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1339 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1340 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1341 dest_node)))
1342 return -1;
1343 re_node_set_empty (eps_via_nodes);
1344 return dest_node;
1345 }
1346 }
1347 return -1;
1348}
1349
1350static reg_errcode_t
1351internal_function
1352push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
1353 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1354{
1355 reg_errcode_t err;
1356 int num = fs->num++;
1357 if (fs->num == fs->alloc)
1358 {
1359 struct re_fail_stack_ent_t *new_array;
1360 new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1361 * fs->alloc * 2));
1362 if (new_array == NULL)
1363 return REG_ESPACE;
1364 fs->alloc *= 2;
1365 fs->stack = new_array;
1366 }
1367 fs->stack[num].idx = str_idx;
1368 fs->stack[num].node = dest_node;
1369 fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1370 if (fs->stack[num].regs == NULL)
1371 return REG_ESPACE;
1372 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1373 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1374 return err;
1375}
1376
1377static int
1378internal_function
1379pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
1380 regmatch_t *regs, re_node_set *eps_via_nodes)
1381{
1382 int num = --fs->num;
1383 assert (num >= 0);
1384 *pidx = fs->stack[num].idx;
1385 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1386 re_node_set_free (eps_via_nodes);
1387 re_free (fs->stack[num].regs);
1388 *eps_via_nodes = fs->stack[num].eps_via_nodes;
1389 return fs->stack[num].node;
1390}
1391
1392/* Set the positions where the subexpressions are starts/ends to registers
1393 PMATCH.
1394 Note: We assume that pmatch[0] is already set, and
1395 pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
1396
1397static reg_errcode_t
1398internal_function
1399set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1400 regmatch_t *pmatch, int fl_backtrack)
1401{
1402 const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
1403 int idx, cur_node;
1404 re_node_set eps_via_nodes;
1405 struct re_fail_stack_t *fs;
1406 struct re_fail_stack_t fs_body = { 0, 2, NULL };
1407 regmatch_t *prev_idx_match;
1408 int prev_idx_match_malloced = 0;
1409
1410#ifdef DEBUG
1411 assert (nmatch > 1);
1412 assert (mctx->state_log != NULL);
1413#endif
1414 if (fl_backtrack)
1415 {
1416 fs = &fs_body;
1417 fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1418 if (fs->stack == NULL)
1419 return REG_ESPACE;
1420 }
1421 else
1422 fs = NULL;
1423
1424 cur_node = dfa->init_node;
1425 re_node_set_init_empty (&eps_via_nodes);
1426
1427#ifdef HAVE_ALLOCA
1428 if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1429 prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1430 else
1431#endif
1432 {
1433 prev_idx_match = re_malloc (regmatch_t, nmatch);
1434 if (prev_idx_match == NULL)
1435 {
1436 free_fail_stack_return (fs);
1437 return REG_ESPACE;
1438 }
1439 prev_idx_match_malloced = 1;
1440 }
1441 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1442
1443 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1444 {
1445 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1446
1447 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1448 {
1449 int reg_idx;
1450 if (fs)
1451 {
1452 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1453 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1454 break;
1455 if (reg_idx == nmatch)
1456 {
1457 re_node_set_free (&eps_via_nodes);
1458 if (prev_idx_match_malloced)
1459 re_free (prev_idx_match);
1460 return free_fail_stack_return (fs);
1461 }
1462 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1463 &eps_via_nodes);
1464 }
1465 else
1466 {
1467 re_node_set_free (&eps_via_nodes);
1468 if (prev_idx_match_malloced)
1469 re_free (prev_idx_match);
1470 return REG_NOERROR;
1471 }
1472 }
1473
1474 /* Proceed to next node. */
1475 cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1476 &eps_via_nodes, fs);
1477
1478 if (BE (cur_node < 0, 0))
1479 {
1480 if (BE (cur_node == -2, 0))
1481 {
1482 re_node_set_free (&eps_via_nodes);
1483 if (prev_idx_match_malloced)
1484 re_free (prev_idx_match);
1485 free_fail_stack_return (fs);
1486 return REG_ESPACE;
1487 }
1488 if (fs)
1489 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1490 &eps_via_nodes);
1491 else
1492 {
1493 re_node_set_free (&eps_via_nodes);
1494 if (prev_idx_match_malloced)
1495 re_free (prev_idx_match);
1496 return REG_NOMATCH;
1497 }
1498 }
1499 }
1500 re_node_set_free (&eps_via_nodes);
1501 if (prev_idx_match_malloced)
1502 re_free (prev_idx_match);
1503 return free_fail_stack_return (fs);
1504}
1505
1506static reg_errcode_t
1507internal_function
1508free_fail_stack_return (struct re_fail_stack_t *fs)
1509{
1510 if (fs)
1511 {
1512 int fs_idx;
1513 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1514 {
1515 re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1516 re_free (fs->stack[fs_idx].regs);
1517 }
1518 re_free (fs->stack);
1519 }
1520 return REG_NOERROR;
1521}
1522
1523static void
1524internal_function
1525update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1526 regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
1527{
1528 int type = dfa->nodes[cur_node].type;
1529 if (type == OP_OPEN_SUBEXP)
1530 {
1531 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1532
1533 /* We are at the first node of this sub expression. */
1534 if (reg_num < nmatch)
1535 {
1536 pmatch[reg_num].rm_so = cur_idx;
1537 pmatch[reg_num].rm_eo = -1;
1538 }
1539 }
1540 else if (type == OP_CLOSE_SUBEXP)
1541 {
1542 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1543 if (reg_num < nmatch)
1544 {
1545 /* We are at the last node of this sub expression. */
1546 if (pmatch[reg_num].rm_so < cur_idx)
1547 {
1548 pmatch[reg_num].rm_eo = cur_idx;
1549 /* This is a non-empty match or we are not inside an optional
1550 subexpression. Accept this right away. */
1551 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1552 }
1553 else
1554 {
1555 if (dfa->nodes[cur_node].opt_subexp
1556 && prev_idx_match[reg_num].rm_so != -1)
1557 /* We transited through an empty match for an optional
1558 subexpression, like (a?)*, and this is not the subexp's
1559 first match. Copy back the old content of the registers
1560 so that matches of an inner subexpression are undone as
1561 well, like in ((a?))*. */
1562 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1563 else
1564 /* We completed a subexpression, but it may be part of
1565 an optional one, so do not update PREV_IDX_MATCH. */
1566 pmatch[reg_num].rm_eo = cur_idx;
1567 }
1568 }
1569 }
1570}
1571
1572/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1573 and sift the nodes in each states according to the following rules.
1574 Updated state_log will be wrote to STATE_LOG.
1575
1576 Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1577 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1578 If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1579 the LAST_NODE, we throw away the node `a'.
1580 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1581 string `s' and transit to `b':
1582 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1583 away the node `a'.
1584 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1585 thrown away, we throw away the node `a'.
1586 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1587 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1588 node `a'.
1589 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1590 we throw away the node `a'. */
1591
1592#define STATE_NODE_CONTAINS(state,node) \
1593 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1594
1595static reg_errcode_t
1596internal_function
1597sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1598{
1599 reg_errcode_t err;
1600 int null_cnt = 0;
1601 int str_idx = sctx->last_str_idx;
1602 re_node_set cur_dest;
1603
1604#ifdef DEBUG
1605 assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1606#endif
1607
1608 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1609 transit to the last_node and the last_node itself. */
1610 err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1611 if (BE (err != REG_NOERROR, 0))
1612 return err;
1613 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1614 if (BE (err != REG_NOERROR, 0))
1615 goto free_return;
1616
1617 /* Then check each states in the state_log. */
1618 while (str_idx > 0)
1619 {
1620 /* Update counters. */
1621 null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1622 if (null_cnt > mctx->max_mb_elem_len)
1623 {
1624 memset (sctx->sifted_states, '\0',
1625 sizeof (re_dfastate_t *) * str_idx);
1626 re_node_set_free (&cur_dest);
1627 return REG_NOERROR;
1628 }
1629 re_node_set_empty (&cur_dest);
1630 --str_idx;
1631
1632 if (mctx->state_log[str_idx])
1633 {
1634 err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1635 if (BE (err != REG_NOERROR, 0))
1636 goto free_return;
1637 }
1638
1639 /* Add all the nodes which satisfy the following conditions:
1640 - It can epsilon transit to a node in CUR_DEST.
1641 - It is in CUR_SRC.
1642 And update state_log. */
1643 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1644 if (BE (err != REG_NOERROR, 0))
1645 goto free_return;
1646 }
1647 err = REG_NOERROR;
1648 free_return:
1649 re_node_set_free (&cur_dest);
1650 return err;
1651}
1652
1653static reg_errcode_t
1654internal_function
1655build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1656 int str_idx, re_node_set *cur_dest)
1657{
1658 const re_dfa_t *const dfa = mctx->dfa;
1659 const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1660 int i;
1661
1662 /* Then build the next sifted state.
1663 We build the next sifted state on `cur_dest', and update
1664 `sifted_states[str_idx]' with `cur_dest'.
1665 Note:
1666 `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1667 `cur_src' points the node_set of the old `state_log[str_idx]'
1668 (with the epsilon nodes pre-filtered out). */
1669 for (i = 0; i < cur_src->nelem; i++)
1670 {
1671 int prev_node = cur_src->elems[i];
1672 int naccepted = 0;
1673 int ret;
1674
1675#ifdef DEBUG
1676 re_token_type_t type = dfa->nodes[prev_node].type;
1677 assert (!IS_EPSILON_NODE (type));
1678#endif
1679#ifdef RE_ENABLE_I18N
1680 /* If the node may accept `multi byte'. */
1681 if (dfa->nodes[prev_node].accept_mb)
1682 naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1683 str_idx, sctx->last_str_idx);
1684#endif /* RE_ENABLE_I18N */
1685
1686 /* We don't check backreferences here.
1687 See update_cur_sifted_state(). */
1688 if (!naccepted
1689 && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1690 && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1691 dfa->nexts[prev_node]))
1692 naccepted = 1;
1693
1694 if (naccepted == 0)
1695 continue;
1696
1697 if (sctx->limits.nelem)
1698 {
1699 int to_idx = str_idx + naccepted;
1700 if (check_dst_limits (mctx, &sctx->limits,
1701 dfa->nexts[prev_node], to_idx,
1702 prev_node, str_idx))
1703 continue;
1704 }
1705 ret = re_node_set_insert (cur_dest, prev_node);
1706 if (BE (ret == -1, 0))
1707 return REG_ESPACE;
1708 }
1709
1710 return REG_NOERROR;
1711}
1712
1713/* Helper functions. */
1714
1715static reg_errcode_t
1716internal_function
1717clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
1718{
1719 int top = mctx->state_log_top;
1720
1721 if (next_state_log_idx >= mctx->input.bufs_len
1722 || (next_state_log_idx >= mctx->input.valid_len
1723 && mctx->input.valid_len < mctx->input.len))
1724 {
1725 reg_errcode_t err;
1726 err = extend_buffers (mctx);
1727 if (BE (err != REG_NOERROR, 0))
1728 return err;
1729 }
1730
1731 if (top < next_state_log_idx)
1732 {
1733 memset (mctx->state_log + top + 1, '\0',
1734 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1735 mctx->state_log_top = next_state_log_idx;
1736 }
1737 return REG_NOERROR;
1738}
1739
1740static reg_errcode_t
1741internal_function
1742merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1743 re_dfastate_t **src, int num)
1744{
1745 int st_idx;
1746 reg_errcode_t err;
1747 for (st_idx = 0; st_idx < num; ++st_idx)
1748 {
1749 if (dst[st_idx] == NULL)
1750 dst[st_idx] = src[st_idx];
1751 else if (src[st_idx] != NULL)
1752 {
1753 re_node_set merged_set;
1754 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1755 &src[st_idx]->nodes);
1756 if (BE (err != REG_NOERROR, 0))
1757 return err;
1758 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1759 re_node_set_free (&merged_set);
1760 if (BE (err != REG_NOERROR, 0))
1761 return err;
1762 }
1763 }
1764 return REG_NOERROR;
1765}
1766
1767static reg_errcode_t
1768internal_function
1769update_cur_sifted_state (const re_match_context_t *mctx,
1770 re_sift_context_t *sctx, int str_idx,
1771 re_node_set *dest_nodes)
1772{
1773 const re_dfa_t *const dfa = mctx->dfa;
1774 reg_errcode_t err = REG_NOERROR;
1775 const re_node_set *candidates;
1776 candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1777 : &mctx->state_log[str_idx]->nodes);
1778
1779 if (dest_nodes->nelem == 0)
1780 sctx->sifted_states[str_idx] = NULL;
1781 else
1782 {
1783 if (candidates)
1784 {
1785 /* At first, add the nodes which can epsilon transit to a node in
1786 DEST_NODE. */
1787 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1788 if (BE (err != REG_NOERROR, 0))
1789 return err;
1790
1791 /* Then, check the limitations in the current sift_context. */
1792 if (sctx->limits.nelem)
1793 {
1794 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1795 mctx->bkref_ents, str_idx);
1796 if (BE (err != REG_NOERROR, 0))
1797 return err;
1798 }
1799 }
1800
1801 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1802 if (BE (err != REG_NOERROR, 0))
1803 return err;
1804 }
1805
1806 if (candidates && mctx->state_log[str_idx]->has_backref)
1807 {
1808 err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1809 if (BE (err != REG_NOERROR, 0))
1810 return err;
1811 }
1812 return REG_NOERROR;
1813}
1814
1815static reg_errcode_t
1816internal_function
1817add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1818 const re_node_set *candidates)
1819{
1820 reg_errcode_t err = REG_NOERROR;
1821 int i;
1822
1823 re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1824 if (BE (err != REG_NOERROR, 0))
1825 return err;
1826
1827 if (!state->inveclosure.alloc)
1828 {
1829 err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1830 if (BE (err != REG_NOERROR, 0))
1831 return REG_ESPACE;
1832 for (i = 0; i < dest_nodes->nelem; i++)
1833 {
1834 err = re_node_set_merge (&state->inveclosure,
1835 dfa->inveclosures + dest_nodes->elems[i]);
1836 if (BE (err != REG_NOERROR, 0))
1837 return REG_ESPACE;
1838 }
1839 }
1840 return re_node_set_add_intersect (dest_nodes, candidates,
1841 &state->inveclosure);
1842}
1843
1844static reg_errcode_t
1845internal_function
1846sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
1847 const re_node_set *candidates)
1848{
1849 int ecl_idx;
1850 reg_errcode_t err;
1851 re_node_set *inv_eclosure = dfa->inveclosures + node;
1852 re_node_set except_nodes;
1853 re_node_set_init_empty (&except_nodes);
1854 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1855 {
1856 int cur_node = inv_eclosure->elems[ecl_idx];
1857 if (cur_node == node)
1858 continue;
1859 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1860 {
1861 int edst1 = dfa->edests[cur_node].elems[0];
1862 int edst2 = ((dfa->edests[cur_node].nelem > 1)
1863 ? dfa->edests[cur_node].elems[1] : -1);
1864 if ((!re_node_set_contains (inv_eclosure, edst1)
1865 && re_node_set_contains (dest_nodes, edst1))
1866 || (edst2 > 0
1867 && !re_node_set_contains (inv_eclosure, edst2)
1868 && re_node_set_contains (dest_nodes, edst2)))
1869 {
1870 err = re_node_set_add_intersect (&except_nodes, candidates,
1871 dfa->inveclosures + cur_node);
1872 if (BE (err != REG_NOERROR, 0))
1873 {
1874 re_node_set_free (&except_nodes);
1875 return err;
1876 }
1877 }
1878 }
1879 }
1880 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1881 {
1882 int cur_node = inv_eclosure->elems[ecl_idx];
1883 if (!re_node_set_contains (&except_nodes, cur_node))
1884 {
1885 int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1886 re_node_set_remove_at (dest_nodes, idx);
1887 }
1888 }
1889 re_node_set_free (&except_nodes);
1890 return REG_NOERROR;
1891}
1892
1893static int
1894internal_function
1895check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
1896 int dst_node, int dst_idx, int src_node, int src_idx)
1897{
1898 const re_dfa_t *const dfa = mctx->dfa;
1899 int lim_idx, src_pos, dst_pos;
1900
1901 int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1902 int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1903 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1904 {
1905 int subexp_idx;
1906 struct re_backref_cache_entry *ent;
1907 ent = mctx->bkref_ents + limits->elems[lim_idx];
1908 subexp_idx = dfa->nodes[ent->node].opr.idx;
1909
1910 dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1911 subexp_idx, dst_node, dst_idx,
1912 dst_bkref_idx);
1913 src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1914 subexp_idx, src_node, src_idx,
1915 src_bkref_idx);
1916
1917 /* In case of:
1918 <src> <dst> ( <subexp> )
1919 ( <subexp> ) <src> <dst>
1920 ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
1921 if (src_pos == dst_pos)
1922 continue; /* This is unrelated limitation. */
1923 else
1924 return 1;
1925 }
1926 return 0;
1927}
1928
1929static int
1930internal_function
1931check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1932 int subexp_idx, int from_node, int bkref_idx)
1933{
1934 const re_dfa_t *const dfa = mctx->dfa;
1935 const re_node_set *eclosures = dfa->eclosures + from_node;
1936 int node_idx;
1937
1938 /* Else, we are on the boundary: examine the nodes on the epsilon
1939 closure. */
1940 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1941 {
1942 int node = eclosures->elems[node_idx];
1943 switch (dfa->nodes[node].type)
1944 {
1945 case OP_BACK_REF:
1946 if (bkref_idx != -1)
1947 {
1948 struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1949 do
1950 {
1951 int dst, cpos;
1952
1953 if (ent->node != node)
1954 continue;
1955
1956 if (subexp_idx < BITSET_WORD_BITS
1957 && !(ent->eps_reachable_subexps_map
1958 & ((bitset_word_t) 1 << subexp_idx)))
1959 continue;
1960
1961 /* Recurse trying to reach the OP_OPEN_SUBEXP and
1962 OP_CLOSE_SUBEXP cases below. But, if the
1963 destination node is the same node as the source
1964 node, don't recurse because it would cause an
1965 infinite loop: a regex that exhibits this behavior
1966 is ()\1*\1* */
1967 dst = dfa->edests[node].elems[0];
1968 if (dst == from_node)
1969 {
1970 if (boundaries & 1)
1971 return -1;
1972 else /* if (boundaries & 2) */
1973 return 0;
1974 }
1975
1976 cpos =
1977 check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1978 dst, bkref_idx);
1979 if (cpos == -1 /* && (boundaries & 1) */)
1980 return -1;
1981 if (cpos == 0 && (boundaries & 2))
1982 return 0;
1983
1984 if (subexp_idx < BITSET_WORD_BITS)
1985 ent->eps_reachable_subexps_map
1986 &= ~((bitset_word_t) 1 << subexp_idx);
1987 }
1988 while (ent++->more);
1989 }
1990 break;
1991
1992 case OP_OPEN_SUBEXP:
1993 if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1994 return -1;
1995 break;
1996
1997 case OP_CLOSE_SUBEXP:
1998 if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1999 return 0;
2000 break;
2001
2002 default:
2003 break;
2004 }
2005 }
2006
2007 return (boundaries & 2) ? 1 : 0;
2008}
2009
2010static int
2011internal_function
2012check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
2013 int subexp_idx, int from_node, int str_idx,
2014 int bkref_idx)
2015{
2016 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2017 int boundaries;
2018
2019 /* If we are outside the range of the subexpression, return -1 or 1. */
2020 if (str_idx < lim->subexp_from)
2021 return -1;
2022
2023 if (lim->subexp_to < str_idx)
2024 return 1;
2025
2026 /* If we are within the subexpression, return 0. */
2027 boundaries = (str_idx == lim->subexp_from);
2028 boundaries |= (str_idx == lim->subexp_to) << 1;
2029 if (boundaries == 0)
2030 return 0;
2031
2032 /* Else, examine epsilon closure. */
2033 return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2034 from_node, bkref_idx);
2035}
2036
2037/* Check the limitations of sub expressions LIMITS, and remove the nodes
2038 which are against limitations from DEST_NODES. */
2039
2040static reg_errcode_t
2041internal_function
2042check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2043 const re_node_set *candidates, re_node_set *limits,
2044 struct re_backref_cache_entry *bkref_ents, int str_idx)
2045{
2046 reg_errcode_t err;
2047 int node_idx, lim_idx;
2048
2049 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2050 {
2051 int subexp_idx;
2052 struct re_backref_cache_entry *ent;
2053 ent = bkref_ents + limits->elems[lim_idx];
2054
2055 if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2056 continue; /* This is unrelated limitation. */
2057
2058 subexp_idx = dfa->nodes[ent->node].opr.idx;
2059 if (ent->subexp_to == str_idx)
2060 {
2061 int ops_node = -1;
2062 int cls_node = -1;
2063 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2064 {
2065 int node = dest_nodes->elems[node_idx];
2066 re_token_type_t type = dfa->nodes[node].type;
2067 if (type == OP_OPEN_SUBEXP
2068 && subexp_idx == dfa->nodes[node].opr.idx)
2069 ops_node = node;
2070 else if (type == OP_CLOSE_SUBEXP
2071 && subexp_idx == dfa->nodes[node].opr.idx)
2072 cls_node = node;
2073 }
2074
2075 /* Check the limitation of the open subexpression. */
2076 /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
2077 if (ops_node >= 0)
2078 {
2079 err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2080 candidates);
2081 if (BE (err != REG_NOERROR, 0))
2082 return err;
2083 }
2084
2085 /* Check the limitation of the close subexpression. */
2086 if (cls_node >= 0)
2087 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2088 {
2089 int node = dest_nodes->elems[node_idx];
2090 if (!re_node_set_contains (dfa->inveclosures + node,
2091 cls_node)
2092 && !re_node_set_contains (dfa->eclosures + node,
2093 cls_node))
2094 {
2095 /* It is against this limitation.
2096 Remove it form the current sifted state. */
2097 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2098 candidates);
2099 if (BE (err != REG_NOERROR, 0))
2100 return err;
2101 --node_idx;
2102 }
2103 }
2104 }
2105 else /* (ent->subexp_to != str_idx) */
2106 {
2107 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2108 {
2109 int node = dest_nodes->elems[node_idx];
2110 re_token_type_t type = dfa->nodes[node].type;
2111 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2112 {
2113 if (subexp_idx != dfa->nodes[node].opr.idx)
2114 continue;
2115 /* It is against this limitation.
2116 Remove it form the current sifted state. */
2117 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2118 candidates);
2119 if (BE (err != REG_NOERROR, 0))
2120 return err;
2121 }
2122 }
2123 }
2124 }
2125 return REG_NOERROR;
2126}
2127
2128static reg_errcode_t
2129internal_function
2130sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2131 int str_idx, const re_node_set *candidates)
2132{
2133 const re_dfa_t *const dfa = mctx->dfa;
2134 reg_errcode_t err;
2135 int node_idx, node;
2136 re_sift_context_t local_sctx;
2137 int first_idx = search_cur_bkref_entry (mctx, str_idx);
2138
2139 if (first_idx == -1)
2140 return REG_NOERROR;
2141
2142 local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
2143
2144 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2145 {
2146 int enabled_idx;
2147 re_token_type_t type;
2148 struct re_backref_cache_entry *entry;
2149 node = candidates->elems[node_idx];
2150 type = dfa->nodes[node].type;
2151 /* Avoid infinite loop for the REs like "()\1+". */
2152 if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2153 continue;
2154 if (type != OP_BACK_REF)
2155 continue;
2156
2157 entry = mctx->bkref_ents + first_idx;
2158 enabled_idx = first_idx;
2159 do
2160 {
2161 int subexp_len;
2162 int to_idx;
2163 int dst_node;
2164 int ret;
2165 re_dfastate_t *cur_state;
2166
2167 if (entry->node != node)
2168 continue;
2169 subexp_len = entry->subexp_to - entry->subexp_from;
2170 to_idx = str_idx + subexp_len;
2171 dst_node = (subexp_len ? dfa->nexts[node]
2172 : dfa->edests[node].elems[0]);
2173
2174 if (to_idx > sctx->last_str_idx
2175 || sctx->sifted_states[to_idx] == NULL
2176 || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2177 || check_dst_limits (mctx, &sctx->limits, node,
2178 str_idx, dst_node, to_idx))
2179 continue;
2180
2181 if (local_sctx.sifted_states == NULL)
2182 {
2183 local_sctx = *sctx;
2184 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2185 if (BE (err != REG_NOERROR, 0))
2186 goto free_return;
2187 }
2188 local_sctx.last_node = node;
2189 local_sctx.last_str_idx = str_idx;
2190 ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
2191 if (BE (ret < 0, 0))
2192 {
2193 err = REG_ESPACE;
2194 goto free_return;
2195 }
2196 cur_state = local_sctx.sifted_states[str_idx];
2197 err = sift_states_backward (mctx, &local_sctx);
2198 if (BE (err != REG_NOERROR, 0))
2199 goto free_return;
2200 if (sctx->limited_states != NULL)
2201 {
2202 err = merge_state_array (dfa, sctx->limited_states,
2203 local_sctx.sifted_states,
2204 str_idx + 1);
2205 if (BE (err != REG_NOERROR, 0))
2206 goto free_return;
2207 }
2208 local_sctx.sifted_states[str_idx] = cur_state;
2209 re_node_set_remove (&local_sctx.limits, enabled_idx);
2210
2211 /* mctx->bkref_ents may have changed, reload the pointer. */
2212 entry = mctx->bkref_ents + enabled_idx;
2213 }
2214 while (enabled_idx++, entry++->more);
2215 }
2216 err = REG_NOERROR;
2217 free_return:
2218 if (local_sctx.sifted_states != NULL)
2219 {
2220 re_node_set_free (&local_sctx.limits);
2221 }
2222
2223 return err;
2224}
2225
2226
2227#ifdef RE_ENABLE_I18N
2228static int
2229internal_function
2230sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2231 int node_idx, int str_idx, int max_str_idx)
2232{
2233 const re_dfa_t *const dfa = mctx->dfa;
2234 int naccepted;
2235 /* Check the node can accept `multi byte'. */
2236 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2237 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2238 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2239 dfa->nexts[node_idx]))
2240 /* The node can't accept the `multi byte', or the
2241 destination was already thrown away, then the node
2242 couldn't accept the current input `multi byte'. */
2243 naccepted = 0;
2244 /* Otherwise, it is sure that the node could accept
2245 `naccepted' bytes input. */
2246 return naccepted;
2247}
2248#endif /* RE_ENABLE_I18N */
2249
2250
2251/* Functions for state transition. */
2252
2253/* Return the next state to which the current state STATE will transit by
2254 accepting the current input byte, and update STATE_LOG if necessary.
2255 If STATE can accept a multibyte char/collating element/back reference
2256 update the destination of STATE_LOG. */
2257
2258static re_dfastate_t *
2259internal_function
2260transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2261 re_dfastate_t *state)
2262{
2263 re_dfastate_t **trtable;
2264 unsigned char ch;
2265
2266#ifdef RE_ENABLE_I18N
2267 /* If the current state can accept multibyte. */
2268 if (BE (state->accept_mb, 0))
2269 {
2270 *err = transit_state_mb (mctx, state);
2271 if (BE (*err != REG_NOERROR, 0))
2272 return NULL;
2273 }
2274#endif /* RE_ENABLE_I18N */
2275
2276 /* Then decide the next state with the single byte. */
2277#if 0
2278 if (0)
2279 /* don't use transition table */
2280 return transit_state_sb (err, mctx, state);
2281#endif
2282
2283 /* Use transition table */
2284 ch = re_string_fetch_byte (&mctx->input);
2285 for (;;)
2286 {
2287 trtable = state->trtable;
2288 if (BE (trtable != NULL, 1))
2289 return trtable[ch];
2290
2291 trtable = state->word_trtable;
2292 if (BE (trtable != NULL, 1))
2293 {
2294 unsigned int context;
2295 context
2296 = re_string_context_at (&mctx->input,
2297 re_string_cur_idx (&mctx->input) - 1,
2298 mctx->eflags);
2299 if (IS_WORD_CONTEXT (context))
2300 return trtable[ch + SBC_MAX];
2301 else
2302 return trtable[ch];
2303 }
2304
2305 if (!build_trtable (mctx->dfa, state))
2306 {
2307 *err = REG_ESPACE;
2308 return NULL;
2309 }
2310
2311 /* Retry, we now have a transition table. */
2312 }
2313}
2314
2315/* Update the state_log if we need */
2316static re_dfastate_t *
2317internal_function
2318merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2319 re_dfastate_t *next_state)
2320{
2321 const re_dfa_t *const dfa = mctx->dfa;
2322 int cur_idx = re_string_cur_idx (&mctx->input);
2323
2324 if (cur_idx > mctx->state_log_top)
2325 {
2326 mctx->state_log[cur_idx] = next_state;
2327 mctx->state_log_top = cur_idx;
2328 }
2329 else if (mctx->state_log[cur_idx] == NULL)
2330 {
2331 mctx->state_log[cur_idx] = next_state;
2332 }
2333 else
2334 {
2335 re_dfastate_t *pstate;
2336 unsigned int context;
2337 re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2338 /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2339 the destination of a multibyte char/collating element/
2340 back reference. Then the next state is the union set of
2341 these destinations and the results of the transition table. */
2342 pstate = mctx->state_log[cur_idx];
2343 log_nodes = pstate->entrance_nodes;
2344 if (next_state != NULL)
2345 {
2346 table_nodes = next_state->entrance_nodes;
2347 *err = re_node_set_init_union (&next_nodes, table_nodes,
2348 log_nodes);
2349 if (BE (*err != REG_NOERROR, 0))
2350 return NULL;
2351 }
2352 else
2353 next_nodes = *log_nodes;
2354 /* Note: We already add the nodes of the initial state,
2355 then we don't need to add them here. */
2356
2357 context = re_string_context_at (&mctx->input,
2358 re_string_cur_idx (&mctx->input) - 1,
2359 mctx->eflags);
2360 next_state = mctx->state_log[cur_idx]
2361 = re_acquire_state_context (err, dfa, &next_nodes, context);
2362 /* We don't need to check errors here, since the return value of
2363 this function is next_state and ERR is already set. */
2364
2365 if (table_nodes != NULL)
2366 re_node_set_free (&next_nodes);
2367 }
2368
2369 if (BE (dfa->nbackref, 0) && next_state != NULL)
2370 {
2371 /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2372 later. We must check them here, since the back references in the
2373 next state might use them. */
2374 *err = check_subexp_matching_top (mctx, &next_state->nodes,
2375 cur_idx);
2376 if (BE (*err != REG_NOERROR, 0))
2377 return NULL;
2378
2379 /* If the next state has back references. */
2380 if (next_state->has_backref)
2381 {
2382 *err = transit_state_bkref (mctx, &next_state->nodes);
2383 if (BE (*err != REG_NOERROR, 0))
2384 return NULL;
2385 next_state = mctx->state_log[cur_idx];
2386 }
2387 }
2388
2389 return next_state;
2390}
2391
2392/* Skip bytes in the input that correspond to part of a
2393 multi-byte match, then look in the log for a state
2394 from which to restart matching. */
2395static re_dfastate_t *
2396internal_function
2397find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2398{
2399 re_dfastate_t *cur_state;
2400 do
2401 {
2402 int max = mctx->state_log_top;
2403 int cur_str_idx = re_string_cur_idx (&mctx->input);
2404
2405 do
2406 {
2407 if (++cur_str_idx > max)
2408 return NULL;
2409 re_string_skip_bytes (&mctx->input, 1);
2410 }
2411 while (mctx->state_log[cur_str_idx] == NULL);
2412
2413 cur_state = merge_state_with_log (err, mctx, NULL);
2414 }
2415 while (*err == REG_NOERROR && cur_state == NULL);
2416 return cur_state;
2417}
2418
2419/* Helper functions for transit_state. */
2420
2421/* From the node set CUR_NODES, pick up the nodes whose types are
2422 OP_OPEN_SUBEXP and which have corresponding back references in the regular
2423 expression. And register them to use them later for evaluating the
2424 correspoding back references. */
2425
2426static reg_errcode_t
2427internal_function
2428check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2429 int str_idx)
2430{
2431 const re_dfa_t *const dfa = mctx->dfa;
2432 int node_idx;
2433 reg_errcode_t err;
2434
2435 /* TODO: This isn't efficient.
2436 Because there might be more than one nodes whose types are
2437 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2438 nodes.
2439 E.g. RE: (a){2} */
2440 for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2441 {
2442 int node = cur_nodes->elems[node_idx];
2443 if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2444 && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2445 && (dfa->used_bkref_map
2446 & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2447 {
2448 err = match_ctx_add_subtop (mctx, node, str_idx);
2449 if (BE (err != REG_NOERROR, 0))
2450 return err;
2451 }
2452 }
2453 return REG_NOERROR;
2454}
2455
2456#if 0
2457/* Return the next state to which the current state STATE will transit by
2458 accepting the current input byte. */
2459
2460static re_dfastate_t *
2461transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2462 re_dfastate_t *state)
2463{
2464 const re_dfa_t *const dfa = mctx->dfa;
2465 re_node_set next_nodes;
2466 re_dfastate_t *next_state;
2467 int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2468 unsigned int context;
2469
2470 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2471 if (BE (*err != REG_NOERROR, 0))
2472 return NULL;
2473 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2474 {
2475 int cur_node = state->nodes.elems[node_cnt];
2476 if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2477 {
2478 *err = re_node_set_merge (&next_nodes,
2479 dfa->eclosures + dfa->nexts[cur_node]);
2480 if (BE (*err != REG_NOERROR, 0))
2481 {
2482 re_node_set_free (&next_nodes);
2483 return NULL;
2484 }
2485 }
2486 }
2487 context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2488 next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2489 /* We don't need to check errors here, since the return value of
2490 this function is next_state and ERR is already set. */
2491
2492 re_node_set_free (&next_nodes);
2493 re_string_skip_bytes (&mctx->input, 1);
2494 return next_state;
2495}
2496#endif
2497
2498#ifdef RE_ENABLE_I18N
2499static reg_errcode_t
2500internal_function
2501transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2502{
2503 const re_dfa_t *const dfa = mctx->dfa;
2504 reg_errcode_t err;
2505 int i;
2506
2507 for (i = 0; i < pstate->nodes.nelem; ++i)
2508 {
2509 re_node_set dest_nodes, *new_nodes;
2510 int cur_node_idx = pstate->nodes.elems[i];
2511 int naccepted, dest_idx;
2512 unsigned int context;
2513 re_dfastate_t *dest_state;
2514
2515 if (!dfa->nodes[cur_node_idx].accept_mb)
2516 continue;
2517
2518 if (dfa->nodes[cur_node_idx].constraint)
2519 {
2520 context = re_string_context_at (&mctx->input,
2521 re_string_cur_idx (&mctx->input),
2522 mctx->eflags);
2523 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2524 context))
2525 continue;
2526 }
2527
2528 /* How many bytes the node can accept? */
2529 naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2530 re_string_cur_idx (&mctx->input));
2531 if (naccepted == 0)
2532 continue;
2533
2534 /* The node can accepts `naccepted' bytes. */
2535 dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2536 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2537 : mctx->max_mb_elem_len);
2538 err = clean_state_log_if_needed (mctx, dest_idx);
2539 if (BE (err != REG_NOERROR, 0))
2540 return err;
2541#ifdef DEBUG
2542 assert (dfa->nexts[cur_node_idx] != -1);
2543#endif
2544 new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2545
2546 dest_state = mctx->state_log[dest_idx];
2547 if (dest_state == NULL)
2548 dest_nodes = *new_nodes;
2549 else
2550 {
2551 err = re_node_set_init_union (&dest_nodes,
2552 dest_state->entrance_nodes, new_nodes);
2553 if (BE (err != REG_NOERROR, 0))
2554 return err;
2555 }
2556 context = re_string_context_at (&mctx->input, dest_idx - 1,
2557 mctx->eflags);
2558 mctx->state_log[dest_idx]
2559 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2560 if (dest_state != NULL)
2561 re_node_set_free (&dest_nodes);
2562 if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2563 return err;
2564 }
2565 return REG_NOERROR;
2566}
2567#endif /* RE_ENABLE_I18N */
2568
2569static reg_errcode_t
2570internal_function
2571transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2572{
2573 const re_dfa_t *const dfa = mctx->dfa;
2574 reg_errcode_t err;
2575 int i;
2576 int cur_str_idx = re_string_cur_idx (&mctx->input);
2577
2578 for (i = 0; i < nodes->nelem; ++i)
2579 {
2580 int dest_str_idx, prev_nelem, bkc_idx;
2581 int node_idx = nodes->elems[i];
2582 unsigned int context;
2583 const re_token_t *node = dfa->nodes + node_idx;
2584 re_node_set *new_dest_nodes;
2585
2586 /* Check whether `node' is a backreference or not. */
2587 if (node->type != OP_BACK_REF)
2588 continue;
2589
2590 if (node->constraint)
2591 {
2592 context = re_string_context_at (&mctx->input, cur_str_idx,
2593 mctx->eflags);
2594 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2595 continue;
2596 }
2597
2598 /* `node' is a backreference.
2599 Check the substring which the substring matched. */
2600 bkc_idx = mctx->nbkref_ents;
2601 err = get_subexp (mctx, node_idx, cur_str_idx);
2602 if (BE (err != REG_NOERROR, 0))
2603 goto free_return;
2604
2605 /* And add the epsilon closures (which is `new_dest_nodes') of
2606 the backreference to appropriate state_log. */
2607#ifdef DEBUG
2608 assert (dfa->nexts[node_idx] != -1);
2609#endif
2610 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2611 {
2612 int subexp_len;
2613 re_dfastate_t *dest_state;
2614 struct re_backref_cache_entry *bkref_ent;
2615 bkref_ent = mctx->bkref_ents + bkc_idx;
2616 if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2617 continue;
2618 subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2619 new_dest_nodes = (subexp_len == 0
2620 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2621 : dfa->eclosures + dfa->nexts[node_idx]);
2622 dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2623 - bkref_ent->subexp_from);
2624 context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2625 mctx->eflags);
2626 dest_state = mctx->state_log[dest_str_idx];
2627 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2628 : mctx->state_log[cur_str_idx]->nodes.nelem);
2629 /* Add `new_dest_node' to state_log. */
2630 if (dest_state == NULL)
2631 {
2632 mctx->state_log[dest_str_idx]
2633 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2634 context);
2635 if (BE (mctx->state_log[dest_str_idx] == NULL
2636 && err != REG_NOERROR, 0))
2637 goto free_return;
2638 }
2639 else
2640 {
2641 re_node_set dest_nodes;
2642 err = re_node_set_init_union (&dest_nodes,
2643 dest_state->entrance_nodes,
2644 new_dest_nodes);
2645 if (BE (err != REG_NOERROR, 0))
2646 {
2647 re_node_set_free (&dest_nodes);
2648 goto free_return;
2649 }
2650 mctx->state_log[dest_str_idx]
2651 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2652 re_node_set_free (&dest_nodes);
2653 if (BE (mctx->state_log[dest_str_idx] == NULL
2654 && err != REG_NOERROR, 0))
2655 goto free_return;
2656 }
2657 /* We need to check recursively if the backreference can epsilon
2658 transit. */
2659 if (subexp_len == 0
2660 && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2661 {
2662 err = check_subexp_matching_top (mctx, new_dest_nodes,
2663 cur_str_idx);
2664 if (BE (err != REG_NOERROR, 0))
2665 goto free_return;
2666 err = transit_state_bkref (mctx, new_dest_nodes);
2667 if (BE (err != REG_NOERROR, 0))
2668 goto free_return;
2669 }
2670 }
2671 }
2672 err = REG_NOERROR;
2673 free_return:
2674 return err;
2675}
2676
2677/* Enumerate all the candidates which the backreference BKREF_NODE can match
2678 at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2679 Note that we might collect inappropriate candidates here.
2680 However, the cost of checking them strictly here is too high, then we
2681 delay these checking for prune_impossible_nodes(). */
2682
2683static reg_errcode_t
2684internal_function
2685get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
2686{
2687 const re_dfa_t *const dfa = mctx->dfa;
2688 int subexp_num, sub_top_idx;
2689 const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2690 /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
2691 int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2692 if (cache_idx != -1)
2693 {
2694 const struct re_backref_cache_entry *entry
2695 = mctx->bkref_ents + cache_idx;
2696 do
2697 if (entry->node == bkref_node)
2698 return REG_NOERROR; /* We already checked it. */
2699 while (entry++->more);
2700 }
2701
2702 subexp_num = dfa->nodes[bkref_node].opr.idx;
2703
2704 /* For each sub expression */
2705 for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2706 {
2707 reg_errcode_t err;
2708 re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2709 re_sub_match_last_t *sub_last;
2710 int sub_last_idx, sl_str, bkref_str_off;
2711
2712 if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2713 continue; /* It isn't related. */
2714
2715 sl_str = sub_top->str_idx;
2716 bkref_str_off = bkref_str_idx;
2717 /* At first, check the last node of sub expressions we already
2718 evaluated. */
2719 for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2720 {
2721 int sl_str_diff;
2722 sub_last = sub_top->lasts[sub_last_idx];
2723 sl_str_diff = sub_last->str_idx - sl_str;
2724 /* The matched string by the sub expression match with the substring
2725 at the back reference? */
2726 if (sl_str_diff > 0)
2727 {
2728 if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2729 {
2730 /* Not enough chars for a successful match. */
2731 if (bkref_str_off + sl_str_diff > mctx->input.len)
2732 break;
2733
2734 err = clean_state_log_if_needed (mctx,
2735 bkref_str_off
2736 + sl_str_diff);
2737 if (BE (err != REG_NOERROR, 0))
2738 return err;
2739 buf = (const char *) re_string_get_buffer (&mctx->input);
2740 }
2741 if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2742 /* We don't need to search this sub expression any more. */
2743 break;
2744 }
2745 bkref_str_off += sl_str_diff;
2746 sl_str += sl_str_diff;
2747 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2748 bkref_str_idx);
2749
2750 /* Reload buf, since the preceding call might have reallocated
2751 the buffer. */
2752 buf = (const char *) re_string_get_buffer (&mctx->input);
2753
2754 if (err == REG_NOMATCH)
2755 continue;
2756 if (BE (err != REG_NOERROR, 0))
2757 return err;
2758 }
2759
2760 if (sub_last_idx < sub_top->nlasts)
2761 continue;
2762 if (sub_last_idx > 0)
2763 ++sl_str;
2764 /* Then, search for the other last nodes of the sub expression. */
2765 for (; sl_str <= bkref_str_idx; ++sl_str)
2766 {
2767 int cls_node, sl_str_off;
2768 const re_node_set *nodes;
2769 sl_str_off = sl_str - sub_top->str_idx;
2770 /* The matched string by the sub expression match with the substring
2771 at the back reference? */
2772 if (sl_str_off > 0)
2773 {
2774 if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2775 {
2776 /* If we are at the end of the input, we cannot match. */
2777 if (bkref_str_off >= mctx->input.len)
2778 break;
2779
2780 err = extend_buffers (mctx);
2781 if (BE (err != REG_NOERROR, 0))
2782 return err;
2783
2784 buf = (const char *) re_string_get_buffer (&mctx->input);
2785 }
2786 if (buf [bkref_str_off++] != buf[sl_str - 1])
2787 break; /* We don't need to search this sub expression
2788 any more. */
2789 }
2790 if (mctx->state_log[sl_str] == NULL)
2791 continue;
2792 /* Does this state have a ')' of the sub expression? */
2793 nodes = &mctx->state_log[sl_str]->nodes;
2794 cls_node = find_subexp_node (dfa, nodes, subexp_num,
2795 OP_CLOSE_SUBEXP);
2796 if (cls_node == -1)
2797 continue; /* No. */
2798 if (sub_top->path == NULL)
2799 {
2800 sub_top->path = calloc (sizeof (state_array_t),
2801 sl_str - sub_top->str_idx + 1);
2802 if (sub_top->path == NULL)
2803 return REG_ESPACE;
2804 }
2805 /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2806 in the current context? */
2807 err = check_arrival (mctx, sub_top->path, sub_top->node,
2808 sub_top->str_idx, cls_node, sl_str,
2809 OP_CLOSE_SUBEXP);
2810 if (err == REG_NOMATCH)
2811 continue;
2812 if (BE (err != REG_NOERROR, 0))
2813 return err;
2814 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2815 if (BE (sub_last == NULL, 0))
2816 return REG_ESPACE;
2817 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2818 bkref_str_idx);
2819 if (err == REG_NOMATCH)
2820 continue;
2821 }
2822 }
2823 return REG_NOERROR;
2824}
2825
2826/* Helper functions for get_subexp(). */
2827
2828/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2829 If it can arrive, register the sub expression expressed with SUB_TOP
2830 and SUB_LAST. */
2831
2832static reg_errcode_t
2833internal_function
2834get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2835 re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
2836{
2837 reg_errcode_t err;
2838 int to_idx;
2839 /* Can the subexpression arrive the back reference? */
2840 err = check_arrival (mctx, &sub_last->path, sub_last->node,
2841 sub_last->str_idx, bkref_node, bkref_str,
2842 OP_OPEN_SUBEXP);
2843 if (err != REG_NOERROR)
2844 return err;
2845 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2846 sub_last->str_idx);
2847 if (BE (err != REG_NOERROR, 0))
2848 return err;
2849 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2850 return clean_state_log_if_needed (mctx, to_idx);
2851}
2852
2853/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2854 Search '(' if FL_OPEN, or search ')' otherwise.
2855 TODO: This function isn't efficient...
2856 Because there might be more than one nodes whose types are
2857 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2858 nodes.
2859 E.g. RE: (a){2} */
2860
2861static int
2862internal_function
2863find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2864 int subexp_idx, int type)
2865{
2866 int cls_idx;
2867 for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2868 {
2869 int cls_node = nodes->elems[cls_idx];
2870 const re_token_t *node = dfa->nodes + cls_node;
2871 if (node->type == type
2872 && node->opr.idx == subexp_idx)
2873 return cls_node;
2874 }
2875 return -1;
2876}
2877
2878/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2879 LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2880 heavily reused.
2881 Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
2882
2883static reg_errcode_t
2884internal_function
2885check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
2886 int top_str, int last_node, int last_str, int type)
2887{
2888 const re_dfa_t *const dfa = mctx->dfa;
2889 reg_errcode_t err = REG_NOERROR;
2890 int subexp_num, backup_cur_idx, str_idx, null_cnt;
2891 re_dfastate_t *cur_state = NULL;
2892 re_node_set *cur_nodes, next_nodes;
2893 re_dfastate_t **backup_state_log;
2894 unsigned int context;
2895
2896 subexp_num = dfa->nodes[top_node].opr.idx;
2897 /* Extend the buffer if we need. */
2898 if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2899 {
2900 re_dfastate_t **new_array;
2901 int old_alloc = path->alloc;
2902 path->alloc += last_str + mctx->max_mb_elem_len + 1;
2903 new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2904 if (BE (new_array == NULL, 0))
2905 {
2906 path->alloc = old_alloc;
2907 return REG_ESPACE;
2908 }
2909 path->array = new_array;
2910 memset (new_array + old_alloc, '\0',
2911 sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2912 }
2913
2914 str_idx = path->next_idx ? path->next_idx : top_str;
2915
2916 /* Temporary modify MCTX. */
2917 backup_state_log = mctx->state_log;
2918 backup_cur_idx = mctx->input.cur_idx;
2919 mctx->state_log = path->array;
2920 mctx->input.cur_idx = str_idx;
2921
2922 /* Setup initial node set. */
2923 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2924 if (str_idx == top_str)
2925 {
2926 err = re_node_set_init_1 (&next_nodes, top_node);
2927 if (BE (err != REG_NOERROR, 0))
2928 return err;
2929 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2930 if (BE (err != REG_NOERROR, 0))
2931 {
2932 re_node_set_free (&next_nodes);
2933 return err;
2934 }
2935 }
2936 else
2937 {
2938 cur_state = mctx->state_log[str_idx];
2939 if (cur_state && cur_state->has_backref)
2940 {
2941 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2942 if (BE (err != REG_NOERROR, 0))
2943 return err;
2944 }
2945 else
2946 re_node_set_init_empty (&next_nodes);
2947 }
2948 if (str_idx == top_str || (cur_state && cur_state->has_backref))
2949 {
2950 if (next_nodes.nelem)
2951 {
2952 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2953 subexp_num, type);
2954 if (BE (err != REG_NOERROR, 0))
2955 {
2956 re_node_set_free (&next_nodes);
2957 return err;
2958 }
2959 }
2960 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2961 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2962 {
2963 re_node_set_free (&next_nodes);
2964 return err;
2965 }
2966 mctx->state_log[str_idx] = cur_state;
2967 }
2968
2969 for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2970 {
2971 re_node_set_empty (&next_nodes);
2972 if (mctx->state_log[str_idx + 1])
2973 {
2974 err = re_node_set_merge (&next_nodes,
2975 &mctx->state_log[str_idx + 1]->nodes);
2976 if (BE (err != REG_NOERROR, 0))
2977 {
2978 re_node_set_free (&next_nodes);
2979 return err;
2980 }
2981 }
2982 if (cur_state)
2983 {
2984 err = check_arrival_add_next_nodes (mctx, str_idx,
2985 &cur_state->non_eps_nodes,
2986 &next_nodes);
2987 if (BE (err != REG_NOERROR, 0))
2988 {
2989 re_node_set_free (&next_nodes);
2990 return err;
2991 }
2992 }
2993 ++str_idx;
2994 if (next_nodes.nelem)
2995 {
2996 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2997 if (BE (err != REG_NOERROR, 0))
2998 {
2999 re_node_set_free (&next_nodes);
3000 return err;
3001 }
3002 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3003 subexp_num, type);
3004 if (BE (err != REG_NOERROR, 0))
3005 {
3006 re_node_set_free (&next_nodes);
3007 return err;
3008 }
3009 }
3010 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3011 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3012 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3013 {
3014 re_node_set_free (&next_nodes);
3015 return err;
3016 }
3017 mctx->state_log[str_idx] = cur_state;
3018 null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3019 }
3020 re_node_set_free (&next_nodes);
3021 cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3022 : &mctx->state_log[last_str]->nodes);
3023 path->next_idx = str_idx;
3024
3025 /* Fix MCTX. */
3026 mctx->state_log = backup_state_log;
3027 mctx->input.cur_idx = backup_cur_idx;
3028
3029 /* Then check the current node set has the node LAST_NODE. */
3030 if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3031 return REG_NOERROR;
3032
3033 return REG_NOMATCH;
3034}
3035
3036/* Helper functions for check_arrival. */
3037
3038/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3039 to NEXT_NODES.
3040 TODO: This function is similar to the functions transit_state*(),
3041 however this function has many additional works.
3042 Can't we unify them? */
3043
3044static reg_errcode_t
3045internal_function
3046check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
3047 re_node_set *cur_nodes, re_node_set *next_nodes)
3048{
3049 const re_dfa_t *const dfa = mctx->dfa;
3050 int result;
3051 int cur_idx;
3052#ifdef RE_ENABLE_I18N
3053 reg_errcode_t err = REG_NOERROR;
3054#endif
3055 re_node_set union_set;
3056 re_node_set_init_empty (&union_set);
3057 for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3058 {
3059 int naccepted = 0;
3060 int cur_node = cur_nodes->elems[cur_idx];
3061#ifdef DEBUG
3062 re_token_type_t type = dfa->nodes[cur_node].type;
3063 assert (!IS_EPSILON_NODE (type));
3064#endif
3065#ifdef RE_ENABLE_I18N
3066 /* If the node may accept `multi byte'. */
3067 if (dfa->nodes[cur_node].accept_mb)
3068 {
3069 naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3070 str_idx);
3071 if (naccepted > 1)
3072 {
3073 re_dfastate_t *dest_state;
3074 int next_node = dfa->nexts[cur_node];
3075 int next_idx = str_idx + naccepted;
3076 dest_state = mctx->state_log[next_idx];
3077 re_node_set_empty (&union_set);
3078 if (dest_state)
3079 {
3080 err = re_node_set_merge (&union_set, &dest_state->nodes);
3081 if (BE (err != REG_NOERROR, 0))
3082 {
3083 re_node_set_free (&union_set);
3084 return err;
3085 }
3086 }
3087 result = re_node_set_insert (&union_set, next_node);
3088 if (BE (result < 0, 0))
3089 {
3090 re_node_set_free (&union_set);
3091 return REG_ESPACE;
3092 }
3093 mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3094 &union_set);
3095 if (BE (mctx->state_log[next_idx] == NULL
3096 && err != REG_NOERROR, 0))
3097 {
3098 re_node_set_free (&union_set);
3099 return err;
3100 }
3101 }
3102 }
3103#endif /* RE_ENABLE_I18N */
3104 if (naccepted
3105 || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3106 {
3107 result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3108 if (BE (result < 0, 0))
3109 {
3110 re_node_set_free (&union_set);
3111 return REG_ESPACE;
3112 }
3113 }
3114 }
3115 re_node_set_free (&union_set);
3116 return REG_NOERROR;
3117}
3118
3119/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3120 CUR_NODES, however exclude the nodes which are:
3121 - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3122 - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3123*/
3124
3125static reg_errcode_t
3126internal_function
3127check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3128 int ex_subexp, int type)
3129{
3130 reg_errcode_t err;
3131 int idx, outside_node;
3132 re_node_set new_nodes;
3133#ifdef DEBUG
3134 assert (cur_nodes->nelem);
3135#endif
3136 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3137 if (BE (err != REG_NOERROR, 0))
3138 return err;
3139 /* Create a new node set NEW_NODES with the nodes which are epsilon
3140 closures of the node in CUR_NODES. */
3141
3142 for (idx = 0; idx < cur_nodes->nelem; ++idx)
3143 {
3144 int cur_node = cur_nodes->elems[idx];
3145 const re_node_set *eclosure = dfa->eclosures + cur_node;
3146 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3147 if (outside_node == -1)
3148 {
3149 /* There are no problematic nodes, just merge them. */
3150 err = re_node_set_merge (&new_nodes, eclosure);
3151 if (BE (err != REG_NOERROR, 0))
3152 {
3153 re_node_set_free (&new_nodes);
3154 return err;
3155 }
3156 }
3157 else
3158 {
3159 /* There are problematic nodes, re-calculate incrementally. */
3160 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3161 ex_subexp, type);
3162 if (BE (err != REG_NOERROR, 0))
3163 {
3164 re_node_set_free (&new_nodes);
3165 return err;
3166 }
3167 }
3168 }
3169 re_node_set_free (cur_nodes);
3170 *cur_nodes = new_nodes;
3171 return REG_NOERROR;
3172}
3173
3174/* Helper function for check_arrival_expand_ecl.
3175 Check incrementally the epsilon closure of TARGET, and if it isn't
3176 problematic append it to DST_NODES. */
3177
3178static reg_errcode_t
3179internal_function
3180check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3181 int target, int ex_subexp, int type)
3182{
3183 int cur_node;
3184 for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3185 {
3186 int err;
3187
3188 if (dfa->nodes[cur_node].type == type
3189 && dfa->nodes[cur_node].opr.idx == ex_subexp)
3190 {
3191 if (type == OP_CLOSE_SUBEXP)
3192 {
3193 err = re_node_set_insert (dst_nodes, cur_node);
3194 if (BE (err == -1, 0))
3195 return REG_ESPACE;
3196 }
3197 break;
3198 }
3199 err = re_node_set_insert (dst_nodes, cur_node);
3200 if (BE (err == -1, 0))
3201 return REG_ESPACE;
3202 if (dfa->edests[cur_node].nelem == 0)
3203 break;
3204 if (dfa->edests[cur_node].nelem == 2)
3205 {
3206 err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3207 dfa->edests[cur_node].elems[1],
3208 ex_subexp, type);
3209 if (BE (err != REG_NOERROR, 0))
3210 return err;
3211 }
3212 cur_node = dfa->edests[cur_node].elems[0];
3213 }
3214 return REG_NOERROR;
3215}
3216
3217
3218/* For all the back references in the current state, calculate the
3219 destination of the back references by the appropriate entry
3220 in MCTX->BKREF_ENTS. */
3221
3222static reg_errcode_t
3223internal_function
3224expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3225 int cur_str, int subexp_num, int type)
3226{
3227 const re_dfa_t *const dfa = mctx->dfa;
3228 reg_errcode_t err;
3229 int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3230 struct re_backref_cache_entry *ent;
3231
3232 if (cache_idx_start == -1)
3233 return REG_NOERROR;
3234
3235 restart:
3236 ent = mctx->bkref_ents + cache_idx_start;
3237 do
3238 {
3239 int to_idx, next_node;
3240
3241 /* Is this entry ENT is appropriate? */
3242 if (!re_node_set_contains (cur_nodes, ent->node))
3243 continue; /* No. */
3244
3245 to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3246 /* Calculate the destination of the back reference, and append it
3247 to MCTX->STATE_LOG. */
3248 if (to_idx == cur_str)
3249 {
3250 /* The backreference did epsilon transit, we must re-check all the
3251 node in the current state. */
3252 re_node_set new_dests;
3253 reg_errcode_t err2, err3;
3254 next_node = dfa->edests[ent->node].elems[0];
3255 if (re_node_set_contains (cur_nodes, next_node))
3256 continue;
3257 err = re_node_set_init_1 (&new_dests, next_node);
3258 err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3259 err3 = re_node_set_merge (cur_nodes, &new_dests);
3260 re_node_set_free (&new_dests);
3261 if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3262 || err3 != REG_NOERROR, 0))
3263 {
3264 err = (err != REG_NOERROR ? err
3265 : (err2 != REG_NOERROR ? err2 : err3));
3266 return err;
3267 }
3268 /* TODO: It is still inefficient... */
3269 goto restart;
3270 }
3271 else
3272 {
3273 re_node_set union_set;
3274 next_node = dfa->nexts[ent->node];
3275 if (mctx->state_log[to_idx])
3276 {
3277 int ret;
3278 if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3279 next_node))
3280 continue;
3281 err = re_node_set_init_copy (&union_set,
3282 &mctx->state_log[to_idx]->nodes);
3283 ret = re_node_set_insert (&union_set, next_node);
3284 if (BE (err != REG_NOERROR || ret < 0, 0))
3285 {
3286 re_node_set_free (&union_set);
3287 err = err != REG_NOERROR ? err : REG_ESPACE;
3288 return err;
3289 }
3290 }
3291 else
3292 {
3293 err = re_node_set_init_1 (&union_set, next_node);
3294 if (BE (err != REG_NOERROR, 0))
3295 return err;
3296 }
3297 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3298 re_node_set_free (&union_set);
3299 if (BE (mctx->state_log[to_idx] == NULL
3300 && err != REG_NOERROR, 0))
3301 return err;
3302 }
3303 }
3304 while (ent++->more);
3305 return REG_NOERROR;
3306}
3307
3308/* Build transition table for the state.
3309 Return 1 if succeeded, otherwise return NULL. */
3310
3311static int
3312internal_function
3313build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3314{
3315 reg_errcode_t err;
3316 int i, j, ch, need_word_trtable = 0;
3317 bitset_word_t elem, mask;
3318 bool dests_node_malloced = false;
3319 bool dest_states_malloced = false;
3320 int ndests; /* Number of the destination states from `state'. */
3321 re_dfastate_t **trtable;
3322 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3323 re_node_set follows, *dests_node;
3324 bitset_t *dests_ch;
3325 bitset_t acceptable;
3326
3327 struct dests_alloc
3328 {
3329 re_node_set dests_node[SBC_MAX];
3330 bitset_t dests_ch[SBC_MAX];
3331 } *dests_alloc;
3332
3333 /* We build DFA states which corresponds to the destination nodes
3334 from `state'. `dests_node[i]' represents the nodes which i-th
3335 destination state contains, and `dests_ch[i]' represents the
3336 characters which i-th destination state accepts. */
3337#ifdef HAVE_ALLOCA
3338 if (__libc_use_alloca (sizeof (struct dests_alloc)))
3339 dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3340 else
3341#endif
3342 {
3343 dests_alloc = re_malloc (struct dests_alloc, 1);
3344 if (BE (dests_alloc == NULL, 0))
3345 return 0;
3346 dests_node_malloced = true;
3347 }
3348 dests_node = dests_alloc->dests_node;
3349 dests_ch = dests_alloc->dests_ch;
3350
3351 /* Initialize transiton table. */
3352 state->word_trtable = state->trtable = NULL;
3353
3354 /* At first, group all nodes belonging to `state' into several
3355 destinations. */
3356 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3357 if (BE (ndests <= 0, 0))
3358 {
3359 if (dests_node_malloced)
3360 free (dests_alloc);
3361 /* Return 0 in case of an error, 1 otherwise. */
3362 if (ndests == 0)
3363 {
3364 state->trtable = (re_dfastate_t **)
3365 calloc (sizeof (re_dfastate_t *), SBC_MAX);
3366 return 1;
3367 }
3368 return 0;
3369 }
3370
3371 err = re_node_set_alloc (&follows, ndests + 1);
3372 if (BE (err != REG_NOERROR, 0))
3373 goto out_free;
3374
3375 /* Avoid arithmetic overflow in size calculation. */
3376 if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3377 / (3 * sizeof (re_dfastate_t *)))
3378 < ndests),
3379 0))
3380 goto out_free;
3381
3382#ifdef HAVE_ALLOCA
3383 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3384 + ndests * 3 * sizeof (re_dfastate_t *)))
3385 dest_states = (re_dfastate_t **)
3386 alloca (ndests * 3 * sizeof (re_dfastate_t *));
3387 else
3388#endif
3389 {
3390 dest_states = (re_dfastate_t **)
3391 malloc (ndests * 3 * sizeof (re_dfastate_t *));
3392 if (BE (dest_states == NULL, 0))
3393 {
3394out_free:
3395 if (dest_states_malloced)
3396 free (dest_states);
3397 re_node_set_free (&follows);
3398 for (i = 0; i < ndests; ++i)
3399 re_node_set_free (dests_node + i);
3400 if (dests_node_malloced)
3401 free (dests_alloc);
3402 return 0;
3403 }
3404 dest_states_malloced = true;
3405 }
3406 dest_states_word = dest_states + ndests;
3407 dest_states_nl = dest_states_word + ndests;
3408 bitset_empty (acceptable);
3409
3410 /* Then build the states for all destinations. */
3411 for (i = 0; i < ndests; ++i)
3412 {
3413 int next_node;
3414 re_node_set_empty (&follows);
3415 /* Merge the follows of this destination states. */
3416 for (j = 0; j < dests_node[i].nelem; ++j)
3417 {
3418 next_node = dfa->nexts[dests_node[i].elems[j]];
3419 if (next_node != -1)
3420 {
3421 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3422 if (BE (err != REG_NOERROR, 0))
3423 goto out_free;
3424 }
3425 }
3426 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3427 if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3428 goto out_free;
3429 /* If the new state has context constraint,
3430 build appropriate states for these contexts. */
3431 if (dest_states[i]->has_constraint)
3432 {
3433 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3434 CONTEXT_WORD);
3435 if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3436 goto out_free;
3437
3438 if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3439 need_word_trtable = 1;
3440
3441 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3442 CONTEXT_NEWLINE);
3443 if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3444 goto out_free;
3445 }
3446 else
3447 {
3448 dest_states_word[i] = dest_states[i];
3449 dest_states_nl[i] = dest_states[i];
3450 }
3451 bitset_merge (acceptable, dests_ch[i]);
3452 }
3453
3454 if (!BE (need_word_trtable, 0))
3455 {
3456 /* We don't care about whether the following character is a word
3457 character, or we are in a single-byte character set so we can
3458 discern by looking at the character code: allocate a
3459 256-entry transition table. */
3460 trtable = state->trtable =
3461 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3462 if (BE (trtable == NULL, 0))
3463 goto out_free;
3464
3465 /* For all characters ch...: */
3466 for (i = 0; i < BITSET_WORDS; ++i)
3467 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3468 elem;
3469 mask <<= 1, elem >>= 1, ++ch)
3470 if (BE (elem & 1, 0))
3471 {
3472 /* There must be exactly one destination which accepts
3473 character ch. See group_nodes_into_DFAstates. */
3474 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3475 ;
3476
3477 /* j-th destination accepts the word character ch. */
3478 if (dfa->word_char[i] & mask)
3479 trtable[ch] = dest_states_word[j];
3480 else
3481 trtable[ch] = dest_states[j];
3482 }
3483 }
3484 else
3485 {
3486 /* We care about whether the following character is a word
3487 character, and we are in a multi-byte character set: discern
3488 by looking at the character code: build two 256-entry
3489 transition tables, one starting at trtable[0] and one
3490 starting at trtable[SBC_MAX]. */
3491 trtable = state->word_trtable =
3492 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3493 if (BE (trtable == NULL, 0))
3494 goto out_free;
3495
3496 /* For all characters ch...: */
3497 for (i = 0; i < BITSET_WORDS; ++i)
3498 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3499 elem;
3500 mask <<= 1, elem >>= 1, ++ch)
3501 if (BE (elem & 1, 0))
3502 {
3503 /* There must be exactly one destination which accepts
3504 character ch. See group_nodes_into_DFAstates. */
3505 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3506 ;
3507
3508 /* j-th destination accepts the word character ch. */
3509 trtable[ch] = dest_states[j];
3510 trtable[ch + SBC_MAX] = dest_states_word[j];
3511 }
3512 }
3513
3514 /* new line */
3515 if (bitset_contain (acceptable, NEWLINE_CHAR))
3516 {
3517 /* The current state accepts newline character. */
3518 for (j = 0; j < ndests; ++j)
3519 if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3520 {
3521 /* k-th destination accepts newline character. */
3522 trtable[NEWLINE_CHAR] = dest_states_nl[j];
3523 if (need_word_trtable)
3524 trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3525 /* There must be only one destination which accepts
3526 newline. See group_nodes_into_DFAstates. */
3527 break;
3528 }
3529 }
3530
3531 if (dest_states_malloced)
3532 free (dest_states);
3533
3534 re_node_set_free (&follows);
3535 for (i = 0; i < ndests; ++i)
3536 re_node_set_free (dests_node + i);
3537
3538 if (dests_node_malloced)
3539 free (dests_alloc);
3540
3541 return 1;
3542}
3543
3544/* Group all nodes belonging to STATE into several destinations.
3545 Then for all destinations, set the nodes belonging to the destination
3546 to DESTS_NODE[i] and set the characters accepted by the destination
3547 to DEST_CH[i]. This function return the number of destinations. */
3548
3549static int
3550internal_function
3551group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3552 re_node_set *dests_node, bitset_t *dests_ch)
3553{
3554 reg_errcode_t err;
3555 int result;
3556 int i, j, k;
3557 int ndests; /* Number of the destinations from `state'. */
3558 bitset_t accepts; /* Characters a node can accept. */
3559 const re_node_set *cur_nodes = &state->nodes;
3560 bitset_empty (accepts);
3561 ndests = 0;
3562
3563 /* For all the nodes belonging to `state', */
3564 for (i = 0; i < cur_nodes->nelem; ++i)
3565 {
3566 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3567 re_token_type_t type = node->type;
3568 unsigned int constraint = node->constraint;
3569
3570 /* Enumerate all single byte character this node can accept. */
3571 if (type == CHARACTER)
3572 bitset_set (accepts, node->opr.c);
3573 else if (type == SIMPLE_BRACKET)
3574 {
3575 bitset_merge (accepts, node->opr.sbcset);
3576 }
3577 else if (type == OP_PERIOD)
3578 {
3579#ifdef RE_ENABLE_I18N
3580 if (dfa->mb_cur_max > 1)
3581 bitset_merge (accepts, dfa->sb_char);
3582 else
3583#endif
3584 bitset_set_all (accepts);
3585 if (!(dfa->syntax & RE_DOT_NEWLINE))
3586 bitset_clear (accepts, '\n');
3587 if (dfa->syntax & RE_DOT_NOT_NULL)
3588 bitset_clear (accepts, '\0');
3589 }
3590#ifdef RE_ENABLE_I18N
3591 else if (type == OP_UTF8_PERIOD)
3592 {
3593 memset (accepts, '\xff', sizeof (bitset_t) / 2);
3594 if (!(dfa->syntax & RE_DOT_NEWLINE))
3595 bitset_clear (accepts, '\n');
3596 if (dfa->syntax & RE_DOT_NOT_NULL)
3597 bitset_clear (accepts, '\0');
3598 }
3599#endif
3600 else
3601 continue;
3602
3603 /* Check the `accepts' and sift the characters which are not
3604 match it the context. */
3605 if (constraint)
3606 {
3607 if (constraint & NEXT_NEWLINE_CONSTRAINT)
3608 {
3609 bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3610 bitset_empty (accepts);
3611 if (accepts_newline)
3612 bitset_set (accepts, NEWLINE_CHAR);
3613 else
3614 continue;
3615 }
3616 if (constraint & NEXT_ENDBUF_CONSTRAINT)
3617 {
3618 bitset_empty (accepts);
3619 continue;
3620 }
3621
3622 if (constraint & NEXT_WORD_CONSTRAINT)
3623 {
3624 bitset_word_t any_set = 0;
3625 if (type == CHARACTER && !node->word_char)
3626 {
3627 bitset_empty (accepts);
3628 continue;
3629 }
3630#ifdef RE_ENABLE_I18N
3631 if (dfa->mb_cur_max > 1)
3632 for (j = 0; j < BITSET_WORDS; ++j)
3633 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3634 else
3635#endif
3636 for (j = 0; j < BITSET_WORDS; ++j)
3637 any_set |= (accepts[j] &= dfa->word_char[j]);
3638 if (!any_set)
3639 continue;
3640 }
3641 if (constraint & NEXT_NOTWORD_CONSTRAINT)
3642 {
3643 bitset_word_t any_set = 0;
3644 if (type == CHARACTER && node->word_char)
3645 {
3646 bitset_empty (accepts);
3647 continue;
3648 }
3649#ifdef RE_ENABLE_I18N
3650 if (dfa->mb_cur_max > 1)
3651 for (j = 0; j < BITSET_WORDS; ++j)
3652 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3653 else
3654#endif
3655 for (j = 0; j < BITSET_WORDS; ++j)
3656 any_set |= (accepts[j] &= ~dfa->word_char[j]);
3657 if (!any_set)
3658 continue;
3659 }
3660 }
3661
3662 /* Then divide `accepts' into DFA states, or create a new
3663 state. Above, we make sure that accepts is not empty. */
3664 for (j = 0; j < ndests; ++j)
3665 {
3666 bitset_t intersec; /* Intersection sets, see below. */
3667 bitset_t remains;
3668 /* Flags, see below. */
3669 bitset_word_t has_intersec, not_subset, not_consumed;
3670
3671 /* Optimization, skip if this state doesn't accept the character. */
3672 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3673 continue;
3674
3675 /* Enumerate the intersection set of this state and `accepts'. */
3676 has_intersec = 0;
3677 for (k = 0; k < BITSET_WORDS; ++k)
3678 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3679 /* And skip if the intersection set is empty. */
3680 if (!has_intersec)
3681 continue;
3682
3683 /* Then check if this state is a subset of `accepts'. */
3684 not_subset = not_consumed = 0;
3685 for (k = 0; k < BITSET_WORDS; ++k)
3686 {
3687 not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3688 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3689 }
3690
3691 /* If this state isn't a subset of `accepts', create a
3692 new group state, which has the `remains'. */
3693 if (not_subset)
3694 {
3695 bitset_copy (dests_ch[ndests], remains);
3696 bitset_copy (dests_ch[j], intersec);
3697 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3698 if (BE (err != REG_NOERROR, 0))
3699 goto error_return;
3700 ++ndests;
3701 }
3702
3703 /* Put the position in the current group. */
3704 result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3705 if (BE (result < 0, 0))
3706 goto error_return;
3707
3708 /* If all characters are consumed, go to next node. */
3709 if (!not_consumed)
3710 break;
3711 }
3712 /* Some characters remain, create a new group. */
3713 if (j == ndests)
3714 {
3715 bitset_copy (dests_ch[ndests], accepts);
3716 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3717 if (BE (err != REG_NOERROR, 0))
3718 goto error_return;
3719 ++ndests;
3720 bitset_empty (accepts);
3721 }
3722 }
3723 return ndests;
3724 error_return:
3725 for (j = 0; j < ndests; ++j)
3726 re_node_set_free (dests_node + j);
3727 return -1;
3728}
3729
3730#ifdef RE_ENABLE_I18N
3731/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3732 Return the number of the bytes the node accepts.
3733 STR_IDX is the current index of the input string.
3734
3735 This function handles the nodes which can accept one character, or
3736 one collating element like '.', '[a-z]', opposite to the other nodes
3737 can only accept one byte. */
3738
3739static int
3740internal_function
3741check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
3742 const re_string_t *input, int str_idx)
3743{
3744 const re_token_t *node = dfa->nodes + node_idx;
3745 int char_len, elem_len;
3746 int i;
3747 wint_t wc;
3748
3749 if (BE (node->type == OP_UTF8_PERIOD, 0))
3750 {
3751 unsigned char c = re_string_byte_at (input, str_idx), d;
3752 if (BE (c < 0xc2, 1))
3753 return 0;
3754
3755 if (str_idx + 2 > input->len)
3756 return 0;
3757
3758 d = re_string_byte_at (input, str_idx + 1);
3759 if (c < 0xe0)
3760 return (d < 0x80 || d > 0xbf) ? 0 : 2;
3761 else if (c < 0xf0)
3762 {
3763 char_len = 3;
3764 if (c == 0xe0 && d < 0xa0)
3765 return 0;
3766 }
3767 else if (c < 0xf8)
3768 {
3769 char_len = 4;
3770 if (c == 0xf0 && d < 0x90)
3771 return 0;
3772 }
3773 else if (c < 0xfc)
3774 {
3775 char_len = 5;
3776 if (c == 0xf8 && d < 0x88)
3777 return 0;
3778 }
3779 else if (c < 0xfe)
3780 {
3781 char_len = 6;
3782 if (c == 0xfc && d < 0x84)
3783 return 0;
3784 }
3785 else
3786 return 0;
3787
3788 if (str_idx + char_len > input->len)
3789 return 0;
3790
3791 for (i = 1; i < char_len; ++i)
3792 {
3793 d = re_string_byte_at (input, str_idx + i);
3794 if (d < 0x80 || d > 0xbf)
3795 return 0;
3796 }
3797 return char_len;
3798 }
3799
3800 char_len = re_string_char_size_at (input, str_idx);
3801 if (node->type == OP_PERIOD)
3802 {
3803 if (char_len <= 1)
3804 return 0;
3805 /* FIXME: I don't think this if is needed, as both '\n'
3806 and '\0' are char_len == 1. */
3807 /* '.' accepts any one character except the following two cases. */
3808 if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3809 re_string_byte_at (input, str_idx) == '\n') ||
3810 ((dfa->syntax & RE_DOT_NOT_NULL) &&
3811 re_string_byte_at (input, str_idx) == '\0'))
3812 return 0;
3813 return char_len;
3814 }
3815
3816 elem_len = re_string_elem_size_at (input, str_idx);
3817 wc = __btowc(*(input->mbs+str_idx));
3818 if (((elem_len <= 1 && char_len <= 1) || char_len == 0) && (wc != WEOF && wc < SBC_MAX))
3819 return 0;
3820
3821 if (node->type == COMPLEX_BRACKET)
3822 {
3823 const re_charset_t *cset = node->opr.mbcset;
3824# ifdef _LIBC
3825 const unsigned char *pin
3826 = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3827 int j;
3828 uint32_t nrules;
3829# endif /* _LIBC */
3830 int match_len = 0;
3831 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3832 ? re_string_wchar_at (input, str_idx) : 0);
3833
3834 /* match with multibyte character? */
3835 for (i = 0; i < cset->nmbchars; ++i)
3836 if (wc == cset->mbchars[i])
3837 {
3838 match_len = char_len;
3839 goto check_node_accept_bytes_match;
3840 }
3841 /* match with character_class? */
3842 for (i = 0; i < cset->nchar_classes; ++i)
3843 {
3844 wctype_t wt = cset->char_classes[i];
3845 if (__iswctype (wc, wt))
3846 {
3847 match_len = char_len;
3848 goto check_node_accept_bytes_match;
3849 }
3850 }
3851
3852# ifdef _LIBC
3853 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3854 if (nrules != 0)
3855 {
3856 unsigned int in_collseq = 0;
3857 const int32_t *table, *indirect;
3858 const unsigned char *weights, *extra;
3859 const char *collseqwc;
3860 /* This #include defines a local function! */
3861# include <locale/weight.h>
3862
3863 /* match with collating_symbol? */
3864 if (cset->ncoll_syms)
3865 extra = (const unsigned char *)
3866 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3867 for (i = 0; i < cset->ncoll_syms; ++i)
3868 {
3869 const unsigned char *coll_sym = extra + cset->coll_syms[i];
3870 /* Compare the length of input collating element and
3871 the length of current collating element. */
3872 if (*coll_sym != elem_len)
3873 continue;
3874 /* Compare each bytes. */
3875 for (j = 0; j < *coll_sym; j++)
3876 if (pin[j] != coll_sym[1 + j])
3877 break;
3878 if (j == *coll_sym)
3879 {
3880 /* Match if every bytes is equal. */
3881 match_len = j;
3882 goto check_node_accept_bytes_match;
3883 }
3884 }
3885
3886 if (cset->nranges)
3887 {
3888 if (elem_len <= char_len)
3889 {
3890 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3891 in_collseq = __collseq_table_lookup (collseqwc, wc);
3892 }
3893 else
3894 in_collseq = find_collation_sequence_value (pin, elem_len);
3895 }
3896 /* match with range expression? */
3897 for (i = 0; i < cset->nranges; ++i)
3898 if (cset->range_starts[i] <= in_collseq
3899 && in_collseq <= cset->range_ends[i])
3900 {
3901 match_len = elem_len;
3902 goto check_node_accept_bytes_match;
3903 }
3904
3905 /* match with equivalence_class? */
3906 if (cset->nequiv_classes)
3907 {
3908 const unsigned char *cp = pin;
3909 table = (const int32_t *)
3910 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3911 weights = (const unsigned char *)
3912 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3913 extra = (const unsigned char *)
3914 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3915 indirect = (const int32_t *)
3916 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3917 int32_t idx = findidx (&cp);
3918 if (idx > 0)
3919 for (i = 0; i < cset->nequiv_classes; ++i)
3920 {
3921 int32_t equiv_class_idx = cset->equiv_classes[i];
3922 size_t weight_len = weights[idx & 0xffffff];
3923 if (weight_len == weights[equiv_class_idx & 0xffffff]
3924 && (idx >> 24) == (equiv_class_idx >> 24))
3925 {
3926 int cnt = 0;
3927
3928 idx &= 0xffffff;
3929 equiv_class_idx &= 0xffffff;
3930
3931 while (cnt <= weight_len
3932 && (weights[equiv_class_idx + 1 + cnt]
3933 == weights[idx + 1 + cnt]))
3934 ++cnt;
3935 if (cnt > weight_len)
3936 {
3937 match_len = elem_len;
3938 goto check_node_accept_bytes_match;
3939 }
3940 }
3941 }
3942 }
3943 }
3944 else
3945# endif /* _LIBC */
3946 {
3947 /* match with range expression? */
3948#if __GNUC__ >= 2
3949 wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3950#else
3951 wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3952 cmp_buf[2] = wc;
3953#endif
3954 for (i = 0; i < cset->nranges; ++i)
3955 {
3956 cmp_buf[0] = cset->range_starts[i];
3957 cmp_buf[4] = cset->range_ends[i];
3958 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3959 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3960 {
3961 match_len = char_len;
3962 goto check_node_accept_bytes_match;
3963 }
3964 }
3965 }
3966 check_node_accept_bytes_match:
3967 if (!cset->non_match)
3968 return match_len;
3969 else
3970 {
3971 if (match_len > 0)
3972 return 0;
3973 else
3974 return (elem_len > char_len) ? elem_len : char_len;
3975 }
3976 }
3977 return 0;
3978}
3979
3980# ifdef _LIBC
3981static unsigned int
3982internal_function
3983find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3984{
3985 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3986 if (nrules == 0)
3987 {
3988 if (mbs_len == 1)
3989 {
3990 /* No valid character. Match it as a single byte character. */
3991 const unsigned char *collseq = (const unsigned char *)
3992 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3993 return collseq[mbs[0]];
3994 }
3995 return UINT_MAX;
3996 }
3997 else
3998 {
3999 int32_t idx;
4000 const unsigned char *extra = (const unsigned char *)
4001 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4002 int32_t extrasize = (const unsigned char *)
4003 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4004
4005 for (idx = 0; idx < extrasize;)
4006 {
4007 int mbs_cnt, found = 0;
4008 int32_t elem_mbs_len;
4009 /* Skip the name of collating element name. */
4010 idx = idx + extra[idx] + 1;
4011 elem_mbs_len = extra[idx++];
4012 if (mbs_len == elem_mbs_len)
4013 {
4014 for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4015 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4016 break;
4017 if (mbs_cnt == elem_mbs_len)
4018 /* Found the entry. */
4019 found = 1;
4020 }
4021 /* Skip the byte sequence of the collating element. */
4022 idx += elem_mbs_len;
4023 /* Adjust for the alignment. */
4024 idx = (idx + 3) & ~3;
4025 /* Skip the collation sequence value. */
4026 idx += sizeof (uint32_t);
4027 /* Skip the wide char sequence of the collating element. */
4028 idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
4029 /* If we found the entry, return the sequence value. */
4030 if (found)
4031 return *(uint32_t *) (extra + idx);
4032 /* Skip the collation sequence value. */
4033 idx += sizeof (uint32_t);
4034 }
4035 return UINT_MAX;
4036 }
4037}
4038# endif /* _LIBC */
4039#endif /* RE_ENABLE_I18N */
4040
4041/* Check whether the node accepts the byte which is IDX-th
4042 byte of the INPUT. */
4043
4044static int
4045internal_function
4046check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4047 int idx)
4048{
4049 unsigned char ch;
4050 ch = re_string_byte_at (&mctx->input, idx);
4051 switch (node->type)
4052 {
4053 case CHARACTER:
4054 if (node->opr.c != ch)
4055 return 0;
4056 break;
4057
4058 case SIMPLE_BRACKET:
4059 if (!bitset_contain (node->opr.sbcset, ch))
4060 return 0;
4061 break;
4062
4063#ifdef RE_ENABLE_I18N
4064 case OP_UTF8_PERIOD:
4065 if (ch >= 0x80)
4066 return 0;
4067 /* FALLTHROUGH */
4068#endif
4069 case OP_PERIOD:
4070 if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4071 || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4072 return 0;
4073 break;
4074
4075 default:
4076 return 0;
4077 }
4078
4079 if (node->constraint)
4080 {
4081 /* The node has constraints. Check whether the current context
4082 satisfies the constraints. */
4083 unsigned int context = re_string_context_at (&mctx->input, idx,
4084 mctx->eflags);
4085 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4086 return 0;
4087 }
4088
4089 return 1;
4090}
4091
4092/* Extend the buffers, if the buffers have run out. */
4093
4094static reg_errcode_t
4095internal_function
4096extend_buffers (re_match_context_t *mctx)
4097{
4098 reg_errcode_t ret;
4099 re_string_t *pstr = &mctx->input;
4100
4101 /* Avoid overflow. */
4102 if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
4103 return REG_ESPACE;
4104
4105 /* Double the lengthes of the buffers. */
4106 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4107 if (BE (ret != REG_NOERROR, 0))
4108 return ret;
4109
4110 if (mctx->state_log != NULL)
4111 {
4112 /* And double the length of state_log. */
4113 /* XXX We have no indication of the size of this buffer. If this
4114 allocation fail we have no indication that the state_log array
4115 does not have the right size. */
4116 re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4117 pstr->bufs_len + 1);
4118 if (BE (new_array == NULL, 0))
4119 return REG_ESPACE;
4120 mctx->state_log = new_array;
4121 }
4122
4123 /* Then reconstruct the buffers. */
4124 if (pstr->icase)
4125 {
4126#ifdef RE_ENABLE_I18N
4127 if (pstr->mb_cur_max > 1)
4128 {
4129 ret = build_wcs_upper_buffer (pstr);
4130 if (BE (ret != REG_NOERROR, 0))
4131 return ret;
4132 }
4133 else
4134#endif /* RE_ENABLE_I18N */
4135 build_upper_buffer (pstr);
4136 }
4137 else
4138 {
4139#ifdef RE_ENABLE_I18N
4140 if (pstr->mb_cur_max > 1)
4141 build_wcs_buffer (pstr);
4142 else
4143#endif /* RE_ENABLE_I18N */
4144 {
4145 if (pstr->trans != NULL)
4146 re_string_translate_buffer (pstr);
4147 }
4148 }
4149 return REG_NOERROR;
4150}
4151
4152
4153/* Functions for matching context. */
4154
4155/* Initialize MCTX. */
4156
4157static reg_errcode_t
4158internal_function
4159match_ctx_init (re_match_context_t *mctx, int eflags, int n)
4160{
4161 mctx->eflags = eflags;
4162 mctx->match_last = -1;
4163 if (n > 0)
4164 {
4165 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4166 mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4167 if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4168 return REG_ESPACE;
4169 }
4170 /* Already zero-ed by the caller.
4171 else
4172 mctx->bkref_ents = NULL;
4173 mctx->nbkref_ents = 0;
4174 mctx->nsub_tops = 0; */
4175 mctx->abkref_ents = n;
4176 mctx->max_mb_elem_len = 1;
4177 mctx->asub_tops = n;
4178 return REG_NOERROR;
4179}
4180
4181/* Clean the entries which depend on the current input in MCTX.
4182 This function must be invoked when the matcher changes the start index
4183 of the input, or changes the input string. */
4184
4185static void
4186internal_function
4187match_ctx_clean (re_match_context_t *mctx)
4188{
4189 int st_idx;
4190 for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4191 {
4192 int sl_idx;
4193 re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4194 for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4195 {
4196 re_sub_match_last_t *last = top->lasts[sl_idx];
4197 re_free (last->path.array);
4198 re_free (last);
4199 }
4200 re_free (top->lasts);
4201 if (top->path)
4202 {
4203 re_free (top->path->array);
4204 re_free (top->path);
4205 }
4206 free (top);
4207 }
4208
4209 mctx->nsub_tops = 0;
4210 mctx->nbkref_ents = 0;
4211}
4212
4213/* Free all the memory associated with MCTX. */
4214
4215static void
4216internal_function
4217match_ctx_free (re_match_context_t *mctx)
4218{
4219 /* First, free all the memory associated with MCTX->SUB_TOPS. */
4220 match_ctx_clean (mctx);
4221 re_free (mctx->sub_tops);
4222 re_free (mctx->bkref_ents);
4223}
4224
4225/* Add a new backreference entry to MCTX.
4226 Note that we assume that caller never call this function with duplicate
4227 entry, and call with STR_IDX which isn't smaller than any existing entry.
4228*/
4229
4230static reg_errcode_t
4231internal_function
4232match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
4233 int to)
4234{
4235 if (mctx->nbkref_ents >= mctx->abkref_ents)
4236 {
4237 struct re_backref_cache_entry* new_entry;
4238 new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4239 mctx->abkref_ents * 2);
4240 if (BE (new_entry == NULL, 0))
4241 {
4242 re_free (mctx->bkref_ents);
4243 return REG_ESPACE;
4244 }
4245 mctx->bkref_ents = new_entry;
4246 memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4247 sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4248 mctx->abkref_ents *= 2;
4249 }
4250 if (mctx->nbkref_ents > 0
4251 && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4252 mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4253
4254 mctx->bkref_ents[mctx->nbkref_ents].node = node;
4255 mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4256 mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4257 mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4258
4259 /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4260 If bit N is clear, means that this entry won't epsilon-transition to
4261 an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4262 it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4263 such node.
4264
4265 A backreference does not epsilon-transition unless it is empty, so set
4266 to all zeros if FROM != TO. */
4267 mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4268 = (from == to ? ~0 : 0);
4269
4270 mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4271 if (mctx->max_mb_elem_len < to - from)
4272 mctx->max_mb_elem_len = to - from;
4273 return REG_NOERROR;
4274}
4275
4276/* Search for the first entry which has the same str_idx, or -1 if none is
4277 found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
4278
4279static int
4280internal_function
4281search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
4282{
4283 int left, right, mid, last;
4284 last = right = mctx->nbkref_ents;
4285 for (left = 0; left < right;)
4286 {
4287 mid = (left + right) / 2;
4288 if (mctx->bkref_ents[mid].str_idx < str_idx)
4289 left = mid + 1;
4290 else
4291 right = mid;
4292 }
4293 if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4294 return left;
4295 else
4296 return -1;
4297}
4298
4299/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4300 at STR_IDX. */
4301
4302static reg_errcode_t
4303internal_function
4304match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
4305{
4306#ifdef DEBUG
4307 assert (mctx->sub_tops != NULL);
4308 assert (mctx->asub_tops > 0);
4309#endif
4310 if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4311 {
4312 int new_asub_tops = mctx->asub_tops * 2;
4313 re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4314 re_sub_match_top_t *,
4315 new_asub_tops);
4316 if (BE (new_array == NULL, 0))
4317 return REG_ESPACE;
4318 mctx->sub_tops = new_array;
4319 mctx->asub_tops = new_asub_tops;
4320 }
4321 mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4322 if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4323 return REG_ESPACE;
4324 mctx->sub_tops[mctx->nsub_tops]->node = node;
4325 mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4326 return REG_NOERROR;
4327}
4328
4329/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4330 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
4331
4332static re_sub_match_last_t *
4333internal_function
4334match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
4335{
4336 re_sub_match_last_t *new_entry;
4337 if (BE (subtop->nlasts == subtop->alasts, 0))
4338 {
4339 int new_alasts = 2 * subtop->alasts + 1;
4340 re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4341 re_sub_match_last_t *,
4342 new_alasts);
4343 if (BE (new_array == NULL, 0))
4344 return NULL;
4345 subtop->lasts = new_array;
4346 subtop->alasts = new_alasts;
4347 }
4348 new_entry = calloc (1, sizeof (re_sub_match_last_t));
4349 if (BE (new_entry != NULL, 1))
4350 {
4351 subtop->lasts[subtop->nlasts] = new_entry;
4352 new_entry->node = node;
4353 new_entry->str_idx = str_idx;
4354 ++subtop->nlasts;
4355 }
4356 return new_entry;
4357}
4358
4359static void
4360internal_function
4361sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4362 re_dfastate_t **limited_sts, int last_node, int last_str_idx)
4363{
4364 sctx->sifted_states = sifted_sts;
4365 sctx->limited_states = limited_sts;
4366 sctx->last_node = last_node;
4367 sctx->last_str_idx = last_str_idx;
4368 re_node_set_init_empty (&sctx->limits);
4369}
diff --git a/win32/sched.h b/win32/sched.h
new file mode 100644
index 000000000..128bfe698
--- /dev/null
+++ b/win32/sched.h
@@ -0,0 +1 @@
static inline void sched_yield(void) {}
diff --git a/win32/select.c b/win32/select.c
new file mode 100644
index 000000000..416174b3e
--- /dev/null
+++ b/win32/select.c
@@ -0,0 +1,573 @@
1/* Emulation for select(2)
2 Contributed by Paolo Bonzini.
3
4 Copyright 2008-2015 Free Software Foundation, Inc.
5
6 This file is part of gnulib.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, see <http://www.gnu.org/licenses/>. */
20
21#include <malloc.h>
22#include <assert.h>
23
24#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
25/* Native Windows. */
26
27#include <sys/types.h>
28#include <errno.h>
29#include <limits.h>
30
31#include <winsock2.h>
32#include <windows.h>
33#include <io.h>
34#include <stdio.h>
35#include <conio.h>
36#include <time.h>
37
38/* Get the overridden 'struct timeval'. */
39#include <sys/time.h>
40
41#undef select
42
43struct bitset {
44 unsigned char in[FD_SETSIZE / CHAR_BIT];
45 unsigned char out[FD_SETSIZE / CHAR_BIT];
46};
47
48/* Declare data structures for ntdll functions. */
49typedef struct _FILE_PIPE_LOCAL_INFORMATION {
50 ULONG NamedPipeType;
51 ULONG NamedPipeConfiguration;
52 ULONG MaximumInstances;
53 ULONG CurrentInstances;
54 ULONG InboundQuota;
55 ULONG ReadDataAvailable;
56 ULONG OutboundQuota;
57 ULONG WriteQuotaAvailable;
58 ULONG NamedPipeState;
59 ULONG NamedPipeEnd;
60} FILE_PIPE_LOCAL_INFORMATION, *PFILE_PIPE_LOCAL_INFORMATION;
61
62typedef struct _IO_STATUS_BLOCK
63{
64 union {
65 DWORD Status;
66 PVOID Pointer;
67 } u;
68 ULONG_PTR Information;
69} IO_STATUS_BLOCK, *PIO_STATUS_BLOCK;
70
71typedef enum _FILE_INFORMATION_CLASS {
72 FilePipeLocalInformation = 24
73} FILE_INFORMATION_CLASS, *PFILE_INFORMATION_CLASS;
74
75typedef DWORD (WINAPI *PNtQueryInformationFile)
76 (HANDLE, IO_STATUS_BLOCK *, VOID *, ULONG, FILE_INFORMATION_CLASS);
77
78#ifndef PIPE_BUF
79#define PIPE_BUF 512
80#endif
81
82static BOOL IsConsoleHandle (HANDLE h)
83{
84 DWORD mode;
85 return GetConsoleMode (h, &mode) != 0;
86}
87
88static BOOL
89IsSocketHandle (HANDLE h)
90{
91 WSANETWORKEVENTS ev;
92
93 if (IsConsoleHandle (h))
94 return FALSE;
95
96 /* Under Wine, it seems that getsockopt returns 0 for pipes too.
97 WSAEnumNetworkEvents instead distinguishes the two correctly. */
98 ev.lNetworkEvents = 0xDEADBEEF;
99 WSAEnumNetworkEvents ((SOCKET) h, NULL, &ev);
100 return ev.lNetworkEvents != 0xDEADBEEF;
101}
102
103/* Compute output fd_sets for libc descriptor FD (whose Windows handle is
104 H). */
105
106static int
107windows_poll_handle (HANDLE h, int fd,
108 struct bitset *rbits,
109 struct bitset *wbits,
110 struct bitset *xbits)
111{
112 BOOL read, write, except;
113 int i, ret;
114 INPUT_RECORD *irbuffer;
115 DWORD avail, nbuffer;
116 BOOL bRet;
117 IO_STATUS_BLOCK iosb;
118 FILE_PIPE_LOCAL_INFORMATION fpli;
119 static PNtQueryInformationFile NtQueryInformationFile;
120 static BOOL once_only;
121
122 read = write = except = FALSE;
123 switch (GetFileType (h))
124 {
125 case FILE_TYPE_DISK:
126 read = TRUE;
127 write = TRUE;
128 break;
129
130 case FILE_TYPE_PIPE:
131 if (!once_only)
132 {
133 NtQueryInformationFile = (PNtQueryInformationFile)
134 GetProcAddress (GetModuleHandle ("ntdll.dll"),
135 "NtQueryInformationFile");
136 once_only = TRUE;
137 }
138
139 if (PeekNamedPipe (h, NULL, 0, NULL, &avail, NULL) != 0)
140 {
141 if (avail)
142 read = TRUE;
143 }
144 else if (GetLastError () == ERROR_BROKEN_PIPE)
145 read = TRUE;
146
147 else
148 {
149 /* It was the write-end of the pipe. Check if it is writable.
150 If NtQueryInformationFile fails, optimistically assume the pipe is
151 writable. This could happen on Windows 9x, where
152 NtQueryInformationFile is not available, or if we inherit a pipe
153 that doesn't permit FILE_READ_ATTRIBUTES access on the write end
154 (I think this should not happen since Windows XP SP2; WINE seems
155 fine too). Otherwise, ensure that enough space is available for
156 atomic writes. */
157 memset (&iosb, 0, sizeof (iosb));
158 memset (&fpli, 0, sizeof (fpli));
159
160 if (!NtQueryInformationFile
161 || NtQueryInformationFile (h, &iosb, &fpli, sizeof (fpli),
162 FilePipeLocalInformation)
163 || fpli.WriteQuotaAvailable >= PIPE_BUF
164 || (fpli.OutboundQuota < PIPE_BUF &&
165 fpli.WriteQuotaAvailable == fpli.OutboundQuota))
166 write = TRUE;
167 }
168 break;
169
170 case FILE_TYPE_CHAR:
171 write = TRUE;
172 if (!(rbits->in[fd / CHAR_BIT] & (1 << (fd & (CHAR_BIT - 1)))))
173 break;
174
175 ret = WaitForSingleObject (h, 0);
176 if (ret == WAIT_OBJECT_0)
177 {
178 if (!IsConsoleHandle (h))
179 {
180 read = TRUE;
181 break;
182 }
183
184 nbuffer = avail = 0;
185 bRet = GetNumberOfConsoleInputEvents (h, &nbuffer);
186
187 /* Screen buffers handles are filtered earlier. */
188 assert (bRet);
189 if (nbuffer == 0)
190 {
191 except = TRUE;
192 break;
193 }
194
195 irbuffer = (INPUT_RECORD *) alloca (nbuffer * sizeof (INPUT_RECORD));
196 bRet = PeekConsoleInput (h, irbuffer, nbuffer, &avail);
197 if (!bRet || avail == 0)
198 {
199 except = TRUE;
200 break;
201 }
202
203 for (i = 0; i < avail; i++)
204 if (irbuffer[i].EventType == KEY_EVENT &&
205 irbuffer[i].Event.KeyEvent.bKeyDown)
206 read = TRUE;
207 }
208 break;
209
210 default:
211 ret = WaitForSingleObject (h, 0);
212 write = TRUE;
213 if (ret == WAIT_OBJECT_0)
214 read = TRUE;
215
216 break;
217 }
218
219 ret = 0;
220 if (read && (rbits->in[fd / CHAR_BIT] & (1 << (fd & (CHAR_BIT - 1)))))
221 {
222 rbits->out[fd / CHAR_BIT] |= (1 << (fd & (CHAR_BIT - 1)));
223 ret++;
224 }
225
226 if (write && (wbits->in[fd / CHAR_BIT] & (1 << (fd & (CHAR_BIT - 1)))))
227 {
228 wbits->out[fd / CHAR_BIT] |= (1 << (fd & (CHAR_BIT - 1)));
229 ret++;
230 }
231
232 if (except && (xbits->in[fd / CHAR_BIT] & (1 << (fd & (CHAR_BIT - 1)))))
233 {
234 xbits->out[fd / CHAR_BIT] |= (1 << (fd & (CHAR_BIT - 1)));
235 ret++;
236 }
237
238 return ret;
239}
240
241int
242mingw_select (int nfds, fd_set *rfds, fd_set *wfds, fd_set *xfds,
243 struct timeval *timeout)
244#undef timeval
245{
246 static struct timeval tv0;
247 static HANDLE hEvent;
248 HANDLE h, handle_array[FD_SETSIZE + 2];
249 fd_set handle_rfds, handle_wfds, handle_xfds;
250 struct bitset rbits, wbits, xbits;
251 unsigned char anyfds_in[FD_SETSIZE / CHAR_BIT];
252 DWORD ret, wait_timeout, nhandles, nsock, nbuffer;
253 MSG msg;
254 int i, fd, rc;
255 clock_t tend;
256
257 if (nfds > FD_SETSIZE)
258 nfds = FD_SETSIZE;
259
260 if (!timeout)
261 wait_timeout = INFINITE;
262 else
263 {
264 wait_timeout = timeout->tv_sec * 1000 + timeout->tv_usec / 1000;
265
266 /* select is also used as a portable usleep. */
267 if (!rfds && !wfds && !xfds)
268 {
269 Sleep (wait_timeout);
270 return 0;
271 }
272 }
273
274 if (!hEvent)
275 hEvent = CreateEvent (NULL, FALSE, FALSE, NULL);
276
277 handle_array[0] = hEvent;
278 nhandles = 1;
279 nsock = 0;
280
281 /* Copy descriptors to bitsets. At the same time, eliminate
282 bits in the "wrong" direction for console input buffers
283 and screen buffers, because screen buffers are waitable
284 and they will block until a character is available. */
285 memset (&rbits, 0, sizeof (rbits));
286 memset (&wbits, 0, sizeof (wbits));
287 memset (&xbits, 0, sizeof (xbits));
288 memset (anyfds_in, 0, sizeof (anyfds_in));
289 if (rfds)
290 for (i = 0; i < rfds->fd_count; i++)
291 {
292 fd = rfds->fd_array[i];
293 h = (HANDLE) _get_osfhandle (fd);
294 if (IsConsoleHandle (h)
295 && !GetNumberOfConsoleInputEvents (h, &nbuffer))
296 continue;
297
298 rbits.in[fd / CHAR_BIT] |= 1 << (fd & (CHAR_BIT - 1));
299 anyfds_in[fd / CHAR_BIT] |= 1 << (fd & (CHAR_BIT - 1));
300 }
301 else
302 rfds = (fd_set *) alloca (sizeof (fd_set));
303
304 if (wfds)
305 for (i = 0; i < wfds->fd_count; i++)
306 {
307 fd = wfds->fd_array[i];
308 h = (HANDLE) _get_osfhandle (fd);
309 if (IsConsoleHandle (h)
310 && GetNumberOfConsoleInputEvents (h, &nbuffer))
311 continue;
312
313 wbits.in[fd / CHAR_BIT] |= 1 << (fd & (CHAR_BIT - 1));
314 anyfds_in[fd / CHAR_BIT] |= 1 << (fd & (CHAR_BIT - 1));
315 }
316 else
317 wfds = (fd_set *) alloca (sizeof (fd_set));
318
319 if (xfds)
320 for (i = 0; i < xfds->fd_count; i++)
321 {
322 fd = xfds->fd_array[i];
323 xbits.in[fd / CHAR_BIT] |= 1 << (fd & (CHAR_BIT - 1));
324 anyfds_in[fd / CHAR_BIT] |= 1 << (fd & (CHAR_BIT - 1));
325 }
326 else
327 xfds = (fd_set *) alloca (sizeof (fd_set));
328
329 /* Zero all the fd_sets, including the application's. */
330 FD_ZERO (rfds);
331 FD_ZERO (wfds);
332 FD_ZERO (xfds);
333 FD_ZERO (&handle_rfds);
334 FD_ZERO (&handle_wfds);
335 FD_ZERO (&handle_xfds);
336
337 /* Classify handles. Create fd sets for sockets, poll the others. */
338 for (i = 0; i < nfds; i++)
339 {
340 if ((anyfds_in[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1)))) == 0)
341 continue;
342
343 h = (HANDLE) _get_osfhandle (i);
344 if (!h)
345 {
346 errno = EBADF;
347 return -1;
348 }
349
350 if (IsSocketHandle (h))
351 {
352 int requested = FD_CLOSE;
353
354 /* See above; socket handles are mapped onto select, but we
355 need to map descriptors to handles. */
356 if (rbits.in[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1))))
357 {
358 requested |= FD_READ | FD_ACCEPT;
359 FD_SET ((SOCKET) h, rfds);
360 FD_SET ((SOCKET) h, &handle_rfds);
361 }
362 if (wbits.in[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1))))
363 {
364 requested |= FD_WRITE | FD_CONNECT;
365 FD_SET ((SOCKET) h, wfds);
366 FD_SET ((SOCKET) h, &handle_wfds);
367 }
368 if (xbits.in[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1))))
369 {
370 requested |= FD_OOB;
371 FD_SET ((SOCKET) h, xfds);
372 FD_SET ((SOCKET) h, &handle_xfds);
373 }
374
375 WSAEventSelect ((SOCKET) h, hEvent, requested);
376 nsock++;
377 }
378 else
379 {
380 handle_array[nhandles++] = h;
381
382 /* Poll now. If we get an event, do not wait below. */
383 if (wait_timeout != 0
384 && windows_poll_handle (h, i, &rbits, &wbits, &xbits))
385 wait_timeout = 0;
386 }
387 }
388
389 /* Place a sentinel at the end of the array. */
390 handle_array[nhandles] = NULL;
391
392 /* When will the waiting period expire? */
393 if (wait_timeout != INFINITE)
394 tend = clock () + wait_timeout;
395
396restart:
397 if (wait_timeout == 0 || nsock == 0)
398 rc = 0;
399 else
400 {
401 /* See if we need to wait in the loop below. If any select is ready,
402 do MsgWaitForMultipleObjects anyway to dispatch messages, but
403 no need to call select again. */
404 rc = select (0, &handle_rfds, &handle_wfds, &handle_xfds, &tv0);
405 if (rc == 0)
406 {
407 /* Restore the fd_sets for the other select we do below. */
408 memcpy (&handle_rfds, rfds, sizeof (fd_set));
409 memcpy (&handle_wfds, wfds, sizeof (fd_set));
410 memcpy (&handle_xfds, xfds, sizeof (fd_set));
411 }
412 else
413 wait_timeout = 0;
414 }
415
416 /* How much is left to wait? */
417 if (wait_timeout != INFINITE)
418 {
419 clock_t tnow = clock ();
420 if (tend >= tnow)
421 wait_timeout = tend - tnow;
422 else
423 wait_timeout = 0;
424 }
425
426 for (;;)
427 {
428 ret = MsgWaitForMultipleObjects (nhandles, handle_array, FALSE,
429 wait_timeout, QS_ALLINPUT);
430
431 if (ret == WAIT_OBJECT_0 + nhandles)
432 {
433 /* new input of some other kind */
434 BOOL bRet;
435 while ((bRet = PeekMessage (&msg, NULL, 0, 0, PM_REMOVE)) != 0)
436 {
437 TranslateMessage (&msg);
438 DispatchMessage (&msg);
439 }
440 }
441 else
442 break;
443 }
444
445 /* If we haven't done it yet, check the status of the sockets. */
446 if (rc == 0 && nsock > 0)
447 rc = select (0, &handle_rfds, &handle_wfds, &handle_xfds, &tv0);
448
449 if (nhandles > 1)
450 {
451 /* Count results that are not counted in the return value of select. */
452 nhandles = 1;
453 for (i = 0; i < nfds; i++)
454 {
455 if ((anyfds_in[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1)))) == 0)
456 continue;
457
458 h = (HANDLE) _get_osfhandle (i);
459 if (h == handle_array[nhandles])
460 {
461 /* Not a socket. */
462 nhandles++;
463 windows_poll_handle (h, i, &rbits, &wbits, &xbits);
464 if (rbits.out[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1)))
465 || wbits.out[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1)))
466 || xbits.out[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1))))
467 rc++;
468 }
469 }
470
471 if (rc == 0
472 && (wait_timeout == INFINITE
473 /* If NHANDLES > 1, but no bits are set, it means we've
474 been told incorrectly that some handle was signaled.
475 This happens with anonymous pipes, which always cause
476 MsgWaitForMultipleObjects to exit immediately, but no
477 data is found ready to be read by windows_poll_handle.
478 To avoid a total failure (whereby we return zero and
479 don't wait at all), let's poll in a more busy loop. */
480 || (wait_timeout != 0 && nhandles > 1)))
481 {
482 /* Sleep 1 millisecond to avoid busy wait and retry with the
483 original fd_sets. */
484 memcpy (&handle_rfds, rfds, sizeof (fd_set));
485 memcpy (&handle_wfds, wfds, sizeof (fd_set));
486 memcpy (&handle_xfds, xfds, sizeof (fd_set));
487 SleepEx (1, TRUE);
488 goto restart;
489 }
490 if (timeout && wait_timeout == 0 && rc == 0)
491 timeout->tv_sec = timeout->tv_usec = 0;
492 }
493
494 /* Now fill in the results. */
495 FD_ZERO (rfds);
496 FD_ZERO (wfds);
497 FD_ZERO (xfds);
498 nhandles = 1;
499 for (i = 0; i < nfds; i++)
500 {
501 if ((anyfds_in[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1)))) == 0)
502 continue;
503
504 h = (HANDLE) _get_osfhandle (i);
505 if (h != handle_array[nhandles])
506 {
507 /* Perform handle->descriptor mapping. */
508 WSAEventSelect ((SOCKET) h, NULL, 0);
509 if (FD_ISSET (h, &handle_rfds))
510 FD_SET (i, rfds);
511 if (FD_ISSET (h, &handle_wfds))
512 FD_SET (i, wfds);
513 if (FD_ISSET (h, &handle_xfds))
514 FD_SET (i, xfds);
515 }
516 else
517 {
518 /* Not a socket. */
519 nhandles++;
520 if (rbits.out[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1))))
521 FD_SET (i, rfds);
522 if (wbits.out[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1))))
523 FD_SET (i, wfds);
524 if (xbits.out[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1))))
525 FD_SET (i, xfds);
526 }
527 }
528
529 return rc;
530}
531
532#else /* ! Native Windows. */
533
534#include <sys/select.h>
535#include <stddef.h> /* NULL */
536#include <errno.h>
537#include <unistd.h>
538
539#undef select
540
541int
542rpl_select (int nfds, fd_set *rfds, fd_set *wfds, fd_set *xfds,
543 struct timeval *timeout)
544{
545 int i;
546
547 /* FreeBSD 8.2 has a bug: it does not always detect invalid fds. */
548 if (nfds < 0 || nfds > FD_SETSIZE)
549 {
550 errno = EINVAL;
551 return -1;
552 }
553 for (i = 0; i < nfds; i++)
554 {
555 if (((rfds && FD_ISSET (i, rfds))
556 || (wfds && FD_ISSET (i, wfds))
557 || (xfds && FD_ISSET (i, xfds)))
558 && dup2 (i, i) != i)
559 return -1;
560 }
561
562 /* Interix 3.5 has a bug: it does not support nfds == 0. */
563 if (nfds == 0)
564 {
565 nfds = 1;
566 rfds = NULL;
567 wfds = NULL;
568 xfds = NULL;
569 }
570 return select (nfds, rfds, wfds, xfds, timeout);
571}
572
573#endif
diff --git a/win32/statfs.c b/win32/statfs.c
new file mode 100644
index 000000000..a35c9adea
--- /dev/null
+++ b/win32/statfs.c
@@ -0,0 +1,80 @@
1#include <sys/vfs.h>
2#include "libbb.h"
3
4/*
5 * Code from libguestfs (with addition of GetVolumeInformation call)
6 */
7int statfs(const char *file, struct statfs *buf)
8{
9 ULONGLONG free_bytes_available; /* for user - similar to bavail */
10 ULONGLONG total_number_of_bytes;
11 ULONGLONG total_number_of_free_bytes; /* for everyone - bfree */
12 DWORD serial, namelen, flags;
13 char fsname[100];
14 struct mntent *mnt;
15
16 if ( (mnt=find_mount_point(file, 0)) == NULL ) {
17 return -1;
18 }
19
20 file = mnt->mnt_dir;
21 if ( !GetDiskFreeSpaceEx(file, (PULARGE_INTEGER) &free_bytes_available,
22 (PULARGE_INTEGER) &total_number_of_bytes,
23 (PULARGE_INTEGER) &total_number_of_free_bytes) ) {
24 errno = err_win_to_posix(GetLastError());
25 return -1;
26 }
27
28 if ( !GetVolumeInformation(file, NULL, 0, &serial, &namelen, &flags,
29 fsname, 100) ) {
30 errno = err_win_to_posix(GetLastError());
31 return -1;
32 }
33
34 /* XXX I couldn't determine how to get block size. MSDN has a
35 * unhelpful hard-coded list here:
36 * http://support.microsoft.com/kb/140365
37 * but this depends on the filesystem type, the size of the disk and
38 * the version of Windows. So this code assumes the disk is NTFS
39 * and the version of Windows is >= Win2K.
40 */
41 if (total_number_of_bytes < UINT64_C(16) * 1024 * 1024 * 1024 * 1024)
42 buf->f_bsize = 4096;
43 else if (total_number_of_bytes < UINT64_C(32) * 1024 * 1024 * 1024 * 1024)
44 buf->f_bsize = 8192;
45 else if (total_number_of_bytes < UINT64_C(64) * 1024 * 1024 * 1024 * 1024)
46 buf->f_bsize = 16384;
47 else if (total_number_of_bytes < UINT64_C(128) * 1024 * 1024 * 1024 * 1024)
48 buf->f_bsize = 32768;
49 else
50 buf->f_bsize = 65536;
51
52 /*
53 * Valid filesystem names don't seem to be documented. The following
54 * are present in Wine.
55 */
56 if ( strcmp(fsname, "NTFS") == 0 ) {
57 buf->f_type = 0x5346544e;
58 }
59 else if ( strcmp(fsname, "FAT") == 0 || strcmp(fsname, "FAT32") == 0 ) {
60 buf->f_type = 0x4006;
61 }
62 else if ( strcmp(fsname, "CDFS") == 0 ) {
63 buf->f_type = 0x9660;
64 }
65 else {
66 buf->f_type = 0;
67 }
68
69 buf->f_frsize = buf->f_bsize;
70 buf->f_blocks = total_number_of_bytes / buf->f_bsize;
71 buf->f_bfree = total_number_of_free_bytes / buf->f_bsize;
72 buf->f_bavail = free_bytes_available / buf->f_bsize;
73 buf->f_files = UINT32_MAX;
74 buf->f_ffree = UINT32_MAX;
75 buf->f_fsid = serial;
76 buf->f_flag = UINT64_MAX;
77 buf->f_namelen = namelen;
78
79 return 0;
80}
diff --git a/win32/strptime.c b/win32/strptime.c
new file mode 100644
index 000000000..89fb8b736
--- /dev/null
+++ b/win32/strptime.c
@@ -0,0 +1,646 @@
1/* Copyright (C) 2002, 2004-2005, 2007, 2009-2014 Free Software Foundation,
2 Inc.
3 This file is part of the GNU C Library.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, see <http://www.gnu.org/licenses/>. */
17
18/*
19 * File from gnulib (http://www.gnu.org/software/gnulib/), processed with
20 * coan source -U_LIBC -U_NL_CURRENT -UHAVE_TM_GMTOFF strptime.c
21 * and lightly edited.
22 */
23
24#include <time.h>
25
26#include <assert.h>
27#include <ctype.h>
28#include <limits.h>
29#include <string.h>
30#include <stdbool.h>
31
32
33enum ptime_locale_status { not, loc, raw };
34
35
36
37#define match_char(ch1, ch2) if (ch1 != ch2) return NULL
38/* Oh come on. Get a reasonable compiler. */
39# define match_string(cs1, s2) \
40 (strncasecmp ((cs1), (s2), strlen (cs1)) ? 0 : ((s2) += strlen (cs1), 1))
41/* We intentionally do not use isdigit() for testing because this will
42 lead to problems with the wide character version. */
43#define get_number(from, to, n) \
44 do { \
45 int __n = n; \
46 val = 0; \
47 while (*rp == ' ') \
48 ++rp; \
49 if (*rp < '0' || *rp > '9') \
50 return NULL; \
51 do { \
52 val *= 10; \
53 val += *rp++ - '0'; \
54 } while (--__n > 0 && val * 10 <= to && *rp >= '0' && *rp <= '9'); \
55 if (val < from || val > to) \
56 return NULL; \
57 } while (0)
58# define get_alt_number(from, to, n) \
59 /* We don't have the alternate representation. */ \
60 get_number(from, to, n)
61#define recursive(new_fmt) \
62 (*(new_fmt) != '\0' \
63 && (rp = __strptime_internal (rp, (new_fmt), tm, \
64 decided, era_cnt LOCALE_ARG)) != NULL)
65
66
67static char const weekday_name[][10] =
68 {
69 "Sunday", "Monday", "Tuesday", "Wednesday",
70 "Thursday", "Friday", "Saturday"
71 };
72static char const ab_weekday_name[][4] =
73 {
74 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
75 };
76static char const month_name[][10] =
77 {
78 "January", "February", "March", "April", "May", "June",
79 "July", "August", "September", "October", "November", "December"
80 };
81static char const ab_month_name[][4] =
82 {
83 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
84 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
85 };
86# define HERE_D_T_FMT "%a %b %e %H:%M:%S %Y"
87# define HERE_D_FMT "%m/%d/%y"
88# define HERE_AM_STR "AM"
89# define HERE_PM_STR "PM"
90# define HERE_T_FMT_AMPM "%I:%M:%S %p"
91# define HERE_T_FMT "%H:%M:%S"
92
93static const unsigned short int __mon_yday[2][13] =
94 {
95 /* Normal years. */
96 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
97 /* Leap years. */
98 { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
99 };
100
101# define LOCALE_PARAM
102# define LOCALE_ARG
103# define LOCALE_PARAM_DECL
104# define LOCALE_PARAM_PROTO
105# define HELPER_LOCALE_ARG
106# define ISSPACE(Ch) isspace (Ch)
107
108
109
110
111#ifndef __isleap
112/* Nonzero if YEAR is a leap year (every 4 years,
113 except every 100th isn't, and every 400th is). */
114# define __isleap(year) \
115 ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
116#endif
117
118/* Compute the day of the week. */
119static void
120day_of_the_week (struct tm *tm)
121{
122 /* We know that January 1st 1970 was a Thursday (= 4). Compute the
123 difference between this data in the one on TM and so determine
124 the weekday. */
125 int corr_year = 1900 + tm->tm_year - (tm->tm_mon < 2);
126 int wday = (-473
127 + (365 * (tm->tm_year - 70))
128 + (corr_year / 4)
129 - ((corr_year / 4) / 25) + ((corr_year / 4) % 25 < 0)
130 + (((corr_year / 4) / 25) / 4)
131 + __mon_yday[0][tm->tm_mon]
132 + tm->tm_mday - 1);
133 tm->tm_wday = ((wday % 7) + 7) % 7;
134}
135
136/* Compute the day of the year. */
137static void
138day_of_the_year (struct tm *tm)
139{
140 tm->tm_yday = (__mon_yday[__isleap (1900 + tm->tm_year)][tm->tm_mon]
141 + (tm->tm_mday - 1));
142}
143
144
145static char *
146__strptime_internal (rp, fmt, tm, decided, era_cnt LOCALE_PARAM)
147 const char *rp;
148 const char *fmt;
149 struct tm *tm;
150 enum ptime_locale_status *decided;
151 int era_cnt;
152 LOCALE_PARAM_DECL
153{
154
155 int cnt;
156 size_t val;
157 int have_I, is_pm;
158 int century, want_century;
159 int want_era;
160 int have_wday, want_xday;
161 int have_yday;
162 int have_mon, have_mday;
163 int have_uweek, have_wweek;
164 int week_no;
165
166 have_I = is_pm = 0;
167 century = -1;
168 want_century = 0;
169 want_era = 0;
170 week_no = 0;
171
172 have_wday = want_xday = have_yday = have_mon = have_mday = have_uweek = 0;
173 have_wweek = 0;
174
175 while (*fmt != '\0')
176 {
177 /* A white space in the format string matches 0 more or white
178 space in the input string. */
179 if (ISSPACE (*fmt))
180 {
181 while (ISSPACE (*rp))
182 ++rp;
183 ++fmt;
184 continue;
185 }
186
187 /* Any character but '%' must be matched by the same character
188 in the iput string. */
189 if (*fmt != '%')
190 {
191 match_char (*fmt++, *rp++);
192 continue;
193 }
194
195 ++fmt;
196 /* We need this for handling the 'E' modifier. */
197 start_over:
198
199 switch (*fmt++)
200 {
201 case '%':
202 /* Match the '%' character itself. */
203 match_char ('%', *rp++);
204 break;
205 case 'a':
206 case 'A':
207 /* Match day of week. */
208 for (cnt = 0; cnt < 7; ++cnt)
209 {
210 if (*decided != loc
211 && (match_string (weekday_name[cnt], rp)
212 || match_string (ab_weekday_name[cnt], rp)))
213 {
214 *decided = raw;
215 break;
216 }
217 }
218 if (cnt == 7)
219 /* Does not match a weekday name. */
220 return NULL;
221 tm->tm_wday = cnt;
222 have_wday = 1;
223 break;
224 case 'b':
225 case 'B':
226 case 'h':
227 /* Match month name. */
228 for (cnt = 0; cnt < 12; ++cnt)
229 {
230 if (match_string (month_name[cnt], rp)
231 || match_string (ab_month_name[cnt], rp))
232 {
233 *decided = raw;
234 break;
235 }
236 }
237 if (cnt == 12)
238 /* Does not match a month name. */
239 return NULL;
240 tm->tm_mon = cnt;
241 want_xday = 1;
242 break;
243 case 'c':
244 /* Match locale's date and time format. */
245 if (!recursive (HERE_D_T_FMT))
246 return NULL;
247 want_xday = 1;
248 break;
249 case 'C':
250 /* Match century number. */
251 get_number (0, 99, 2);
252 century = val;
253 want_xday = 1;
254 break;
255 case 'd':
256 case 'e':
257 /* Match day of month. */
258 get_number (1, 31, 2);
259 tm->tm_mday = val;
260 have_mday = 1;
261 want_xday = 1;
262 break;
263 case 'F':
264 if (!recursive ("%Y-%m-%d"))
265 return NULL;
266 want_xday = 1;
267 break;
268 case 'x':
269 /* Fall through. */
270 case 'D':
271 /* Match standard day format. */
272 if (!recursive (HERE_D_FMT))
273 return NULL;
274 want_xday = 1;
275 break;
276 case 'k':
277 case 'H':
278 /* Match hour in 24-hour clock. */
279 get_number (0, 23, 2);
280 tm->tm_hour = val;
281 have_I = 0;
282 break;
283 case 'l':
284 /* Match hour in 12-hour clock. GNU extension. */
285 case 'I':
286 /* Match hour in 12-hour clock. */
287 get_number (1, 12, 2);
288 tm->tm_hour = val % 12;
289 have_I = 1;
290 break;
291 case 'j':
292 /* Match day number of year. */
293 get_number (1, 366, 3);
294 tm->tm_yday = val - 1;
295 have_yday = 1;
296 break;
297 case 'm':
298 /* Match number of month. */
299 get_number (1, 12, 2);
300 tm->tm_mon = val - 1;
301 have_mon = 1;
302 want_xday = 1;
303 break;
304 case 'M':
305 /* Match minute. */
306 get_number (0, 59, 2);
307 tm->tm_min = val;
308 break;
309 case 'n':
310 case 't':
311 /* Match any white space. */
312 while (ISSPACE (*rp))
313 ++rp;
314 break;
315 case 'p':
316 /* Match locale's equivalent of AM/PM. */
317 if (!match_string (HERE_AM_STR, rp))
318 {
319 if (match_string (HERE_PM_STR, rp))
320 is_pm = 1;
321 else
322 return NULL;
323 }
324 break;
325 case 'r':
326 if (!recursive (HERE_T_FMT_AMPM))
327 return NULL;
328 break;
329 case 'R':
330 if (!recursive ("%H:%M"))
331 return NULL;
332 break;
333 case 's':
334 {
335 /* The number of seconds may be very high so we cannot use
336 the 'get_number' macro. Instead read the number
337 character for character and construct the result while
338 doing this. */
339 time_t secs = 0;
340 if (*rp < '0' || *rp > '9')
341 /* We need at least one digit. */
342 return NULL;
343
344 do
345 {
346 secs *= 10;
347 secs += *rp++ - '0';
348 }
349 while (*rp >= '0' && *rp <= '9');
350
351 if (localtime_r (&secs, tm) == NULL)
352 /* Error in function. */
353 return NULL;
354 }
355 break;
356 case 'S':
357 get_number (0, 61, 2);
358 tm->tm_sec = val;
359 break;
360 case 'X':
361 /* Fall through. */
362 case 'T':
363 if (!recursive (HERE_T_FMT))
364 return NULL;
365 break;
366 case 'u':
367 get_number (1, 7, 1);
368 tm->tm_wday = val % 7;
369 have_wday = 1;
370 break;
371 case 'g':
372 get_number (0, 99, 2);
373 /* XXX This cannot determine any field in TM. */
374 break;
375 case 'G':
376 if (*rp < '0' || *rp > '9')
377 return NULL;
378 /* XXX Ignore the number since we would need some more
379 information to compute a real date. */
380 do
381 ++rp;
382 while (*rp >= '0' && *rp <= '9');
383 break;
384 case 'U':
385 get_number (0, 53, 2);
386 week_no = val;
387 have_uweek = 1;
388 break;
389 case 'W':
390 get_number (0, 53, 2);
391 week_no = val;
392 have_wweek = 1;
393 break;
394 case 'V':
395 get_number (0, 53, 2);
396 /* XXX This cannot determine any field in TM without some
397 information. */
398 break;
399 case 'w':
400 /* Match number of weekday. */
401 get_number (0, 6, 1);
402 tm->tm_wday = val;
403 have_wday = 1;
404 break;
405 case 'y':
406 /* Match year within century. */
407 get_number (0, 99, 2);
408 /* The "Year 2000: The Millennium Rollover" paper suggests that
409 values in the range 69-99 refer to the twentieth century. */
410 tm->tm_year = val >= 69 ? val : val + 100;
411 /* Indicate that we want to use the century, if specified. */
412 want_century = 1;
413 want_xday = 1;
414 break;
415 case 'Y':
416 /* Match year including century number. */
417 get_number (0, 9999, 4);
418 tm->tm_year = val - 1900;
419 want_century = 0;
420 want_xday = 1;
421 break;
422 case 'Z':
423 /* XXX How to handle this? */
424 break;
425 case 'z':
426 /* We recognize two formats: if two digits are given, these
427 specify hours. If fours digits are used, minutes are
428 also specified. */
429 {
430 bool neg;
431 int n;
432
433 val = 0;
434 while (*rp == ' ')
435 ++rp;
436 if (*rp != '+' && *rp != '-')
437 return NULL;
438 neg = *rp++ == '-';
439 n = 0;
440 while (n < 4 && *rp >= '0' && *rp <= '9')
441 {
442 val = val * 10 + *rp++ - '0';
443 ++n;
444 }
445 if (n == 2)
446 val *= 100;
447 else if (n != 4)
448 /* Only two or four digits recognized. */
449 return NULL;
450 else
451 {
452 /* We have to convert the minutes into decimal. */
453 if (val % 100 >= 60)
454 return NULL;
455 val = (val / 100) * 100 + ((val % 100) * 50) / 30;
456 }
457 if (val > 1200)
458 return NULL;
459 }
460 break;
461 case 'E':
462 /* We have no information about the era format. Just use
463 the normal format. */
464 if (*fmt != 'c' && *fmt != 'C' && *fmt != 'y' && *fmt != 'Y'
465 && *fmt != 'x' && *fmt != 'X')
466 /* This is an illegal format. */
467 return NULL;
468
469 goto start_over;
470 case 'O':
471 switch (*fmt++)
472 {
473 case 'd':
474 case 'e':
475 /* Match day of month using alternate numeric symbols. */
476 get_alt_number (1, 31, 2);
477 tm->tm_mday = val;
478 have_mday = 1;
479 want_xday = 1;
480 break;
481 case 'H':
482 /* Match hour in 24-hour clock using alternate numeric
483 symbols. */
484 get_alt_number (0, 23, 2);
485 tm->tm_hour = val;
486 have_I = 0;
487 break;
488 case 'I':
489 /* Match hour in 12-hour clock using alternate numeric
490 symbols. */
491 get_alt_number (1, 12, 2);
492 tm->tm_hour = val % 12;
493 have_I = 1;
494 break;
495 case 'm':
496 /* Match month using alternate numeric symbols. */
497 get_alt_number (1, 12, 2);
498 tm->tm_mon = val - 1;
499 have_mon = 1;
500 want_xday = 1;
501 break;
502 case 'M':
503 /* Match minutes using alternate numeric symbols. */
504 get_alt_number (0, 59, 2);
505 tm->tm_min = val;
506 break;
507 case 'S':
508 /* Match seconds using alternate numeric symbols. */
509 get_alt_number (0, 61, 2);
510 tm->tm_sec = val;
511 break;
512 case 'U':
513 get_alt_number (0, 53, 2);
514 week_no = val;
515 have_uweek = 1;
516 break;
517 case 'W':
518 get_alt_number (0, 53, 2);
519 week_no = val;
520 have_wweek = 1;
521 break;
522 case 'V':
523 get_alt_number (0, 53, 2);
524 /* XXX This cannot determine any field in TM without
525 further information. */
526 break;
527 case 'w':
528 /* Match number of weekday using alternate numeric symbols. */
529 get_alt_number (0, 6, 1);
530 tm->tm_wday = val;
531 have_wday = 1;
532 break;
533 case 'y':
534 /* Match year within century using alternate numeric symbols. */
535 get_alt_number (0, 99, 2);
536 tm->tm_year = val >= 69 ? val : val + 100;
537 want_xday = 1;
538 break;
539 default:
540 return NULL;
541 }
542 break;
543 default:
544 return NULL;
545 }
546 }
547
548 if (have_I && is_pm)
549 tm->tm_hour += 12;
550
551 if (century != -1)
552 {
553 if (want_century)
554 tm->tm_year = tm->tm_year % 100 + (century - 19) * 100;
555 else
556 /* Only the century, but not the year. Strange, but so be it. */
557 tm->tm_year = (century - 19) * 100;
558 }
559
560 if (era_cnt != -1)
561 {
562 }
563 else
564 if (want_era)
565 {
566 /* No era found but we have seen an E modifier. Rectify some
567 values. */
568 if (want_century && century == -1 && tm->tm_year < 69)
569 tm->tm_year += 100;
570 }
571
572 if (want_xday && !have_wday)
573 {
574 if ( !(have_mon && have_mday) && have_yday)
575 {
576 /* We don't have tm_mon and/or tm_mday, compute them. */
577 int t_mon = 0;
578 while (__mon_yday[__isleap(1900 + tm->tm_year)][t_mon] <= tm->tm_yday)
579 t_mon++;
580 if (!have_mon)
581 tm->tm_mon = t_mon - 1;
582 if (!have_mday)
583 tm->tm_mday =
584 (tm->tm_yday
585 - __mon_yday[__isleap(1900 + tm->tm_year)][t_mon - 1] + 1);
586 }
587 day_of_the_week (tm);
588 }
589
590 if (want_xday && !have_yday)
591 day_of_the_year (tm);
592
593 if ((have_uweek || have_wweek) && have_wday)
594 {
595 int save_wday = tm->tm_wday;
596 int save_mday = tm->tm_mday;
597 int save_mon = tm->tm_mon;
598 int w_offset = have_uweek ? 0 : 1;
599
600 tm->tm_mday = 1;
601 tm->tm_mon = 0;
602 day_of_the_week (tm);
603 if (have_mday)
604 tm->tm_mday = save_mday;
605 if (have_mon)
606 tm->tm_mon = save_mon;
607
608 if (!have_yday)
609 tm->tm_yday = ((7 - (tm->tm_wday - w_offset)) % 7
610 + (week_no - 1) *7
611 + save_wday - w_offset);
612
613 if (!have_mday || !have_mon)
614 {
615 int t_mon = 0;
616 while (__mon_yday[__isleap(1900 + tm->tm_year)][t_mon]
617 <= tm->tm_yday)
618 t_mon++;
619 if (!have_mon)
620 tm->tm_mon = t_mon - 1;
621 if (!have_mday)
622 tm->tm_mday =
623 (tm->tm_yday
624 - __mon_yday[__isleap(1900 + tm->tm_year)][t_mon - 1] + 1);
625 }
626
627 tm->tm_wday = save_wday;
628 }
629
630 return (char *) rp;
631}
632
633
634char *
635strptime (buf, format, tm LOCALE_PARAM)
636 const char *buf;
637 const char *format;
638 struct tm *tm;
639 LOCALE_PARAM_DECL
640{
641 enum ptime_locale_status decided;
642
643 decided = raw;
644 return __strptime_internal (buf, format, tm, &decided, -1 LOCALE_ARG);
645}
646
diff --git a/win32/sys/ioctl.h b/win32/sys/ioctl.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/ioctl.h
diff --git a/win32/sys/mman.h b/win32/sys/mman.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/mman.h
diff --git a/win32/sys/prctl.h b/win32/sys/prctl.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/prctl.h
diff --git a/win32/sys/resource.h b/win32/sys/resource.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/resource.h
diff --git a/win32/sys/socket.h b/win32/sys/socket.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/socket.h
diff --git a/win32/sys/statfs.h b/win32/sys/statfs.h
new file mode 100644
index 000000000..498f41e50
--- /dev/null
+++ b/win32/sys/statfs.h
@@ -0,0 +1,22 @@
1#ifndef _SYS_STATFS_H
2#define _SYS_STATFS_H 1
3
4#include <stdint.h>
5
6struct statfs {
7 int f_type;
8 uint64_t f_bsize;
9 uint64_t f_frsize;
10 uint64_t f_blocks;
11 uint64_t f_bfree;
12 uint64_t f_bavail;
13 uint64_t f_files;
14 uint64_t f_ffree;
15 uint64_t f_fsid;
16 uint64_t f_flag;
17 uint64_t f_namelen;
18};
19
20extern int statfs(const char *file, struct statfs *buf);
21
22#endif
diff --git a/win32/sys/syscall.h b/win32/sys/syscall.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/syscall.h
diff --git a/win32/sys/sysmacros.h b/win32/sys/sysmacros.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/sysmacros.h
diff --git a/win32/sys/times.h b/win32/sys/times.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/times.h
diff --git a/win32/sys/un.h b/win32/sys/un.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/un.h
diff --git a/win32/sys/utsname.h b/win32/sys/utsname.h
new file mode 100644
index 000000000..6f12efd58
--- /dev/null
+++ b/win32/sys/utsname.h
@@ -0,0 +1,66 @@
1/* Copyright (C) 1991,92,94,96,97,99,2002 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 02111-1307 USA. */
18
19/*
20 * POSIX Standard: 4.4 System Identification <sys/utsname.h>
21 */
22
23#ifndef _SYS_UTSNAME_H
24#define _SYS_UTSNAME_H 1
25
26#define _UTSNAME_LENGTH 65
27
28#ifndef _UTSNAME_SYSNAME_LENGTH
29# define _UTSNAME_SYSNAME_LENGTH _UTSNAME_LENGTH
30#endif
31#ifndef _UTSNAME_NODENAME_LENGTH
32# define _UTSNAME_NODENAME_LENGTH _UTSNAME_LENGTH
33#endif
34#ifndef _UTSNAME_RELEASE_LENGTH
35# define _UTSNAME_RELEASE_LENGTH _UTSNAME_LENGTH
36#endif
37#ifndef _UTSNAME_VERSION_LENGTH
38# define _UTSNAME_VERSION_LENGTH _UTSNAME_LENGTH
39#endif
40#ifndef _UTSNAME_MACHINE_LENGTH
41# define _UTSNAME_MACHINE_LENGTH _UTSNAME_LENGTH
42#endif
43
44/* Structure describing the system and machine. */
45struct utsname
46 {
47 /* Name of the implementation of the operating system. */
48 char sysname[_UTSNAME_SYSNAME_LENGTH];
49
50 /* Name of this node on the network. */
51 char nodename[_UTSNAME_NODENAME_LENGTH];
52
53 /* Current release level of this implementation. */
54 char release[_UTSNAME_RELEASE_LENGTH];
55 /* Current version level of this release. */
56 char version[_UTSNAME_VERSION_LENGTH];
57
58 /* Name of the hardware type the system is running on. */
59 char machine[_UTSNAME_MACHINE_LENGTH];
60 };
61
62/* Put information about the system in NAME. */
63extern int uname (struct utsname *__name);
64
65
66#endif /* sys/utsname.h */
diff --git a/win32/sys/vfs.h b/win32/sys/vfs.h
new file mode 100644
index 000000000..a899db276
--- /dev/null
+++ b/win32/sys/vfs.h
@@ -0,0 +1 @@
#include <sys/statfs.h>
diff --git a/win32/sys/wait.h b/win32/sys/wait.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/wait.h
diff --git a/win32/system.c b/win32/system.c
new file mode 100644
index 000000000..02aaaa0a1
--- /dev/null
+++ b/win32/system.c
@@ -0,0 +1,22 @@
1#include "libbb.h"
2
3int mingw_system(const char *cmd)
4{
5 const char *argv[4] = { "sh", "-c", cmd, NULL };
6 intptr_t proc;
7 HANDLE h;
8 DWORD ret = 0;
9
10 if (cmd == NULL)
11 return 1;
12
13 if ((proc=mingw_spawn_proc((char **)argv)) == -1)
14 return -1;
15
16 h = (HANDLE)proc;
17 WaitForSingleObject(h, INFINITE);
18 GetExitCodeProcess(h, &ret);
19 CloseHandle(h);
20
21 return ret << 8;
22}
diff --git a/win32/termios.c b/win32/termios.c
new file mode 100644
index 000000000..658af4a26
--- /dev/null
+++ b/win32/termios.c
@@ -0,0 +1,83 @@
1#include "libbb.h"
2
3int tcsetattr(int fd UNUSED_PARAM, int mode UNUSED_PARAM, const struct termios *t UNUSED_PARAM)
4{
5 return -1;
6}
7
8int tcgetattr(int fd UNUSED_PARAM, struct termios *t UNUSED_PARAM)
9{
10 return -1;
11}
12
13int64_t FAST_FUNC read_key(int fd, char *buf UNUSED_PARAM, int timeout)
14{
15 HANDLE cin = GetStdHandle(STD_INPUT_HANDLE);
16 INPUT_RECORD record;
17 DWORD nevent_out, mode;
18 int ret = -1;
19 char *s;
20
21 if (fd != 0)
22 bb_error_msg_and_die("read_key only works on stdin");
23 if (cin == INVALID_HANDLE_VALUE)
24 return -1;
25 GetConsoleMode(cin, &mode);
26 SetConsoleMode(cin, 0);
27
28 if (timeout > 0) {
29 if (WaitForSingleObject(cin, timeout) != WAIT_OBJECT_0)
30 goto done;
31 }
32 while (1) {
33 if (!ReadConsoleInput(cin, &record, 1, &nevent_out))
34 goto done;
35 if (record.EventType != KEY_EVENT || !record.Event.KeyEvent.bKeyDown)
36 continue;
37 if (!record.Event.KeyEvent.uChar.AsciiChar) {
38 DWORD state = record.Event.KeyEvent.dwControlKeyState;
39
40 if (state & (RIGHT_CTRL_PRESSED|LEFT_CTRL_PRESSED) &&
41 (record.Event.KeyEvent.wVirtualKeyCode >= 'A' &&
42 record.Event.KeyEvent.wVirtualKeyCode <= 'Z')) {
43 ret = record.Event.KeyEvent.wVirtualKeyCode & ~0x40;
44 break;
45 }
46
47 switch (record.Event.KeyEvent.wVirtualKeyCode) {
48 case VK_DELETE: ret = KEYCODE_DELETE; goto done;
49 case VK_INSERT: ret = KEYCODE_INSERT; goto done;
50 case VK_UP: ret = KEYCODE_UP; goto done;
51 case VK_DOWN: ret = KEYCODE_DOWN; goto done;
52 case VK_RIGHT:
53 if (state & (RIGHT_CTRL_PRESSED|LEFT_CTRL_PRESSED)) {
54 ret = KEYCODE_CTRL_RIGHT;
55 goto done;
56 }
57 ret = KEYCODE_RIGHT;
58 goto done;
59 case VK_LEFT:
60 if (state & (RIGHT_CTRL_PRESSED|LEFT_CTRL_PRESSED)) {
61 ret = KEYCODE_CTRL_LEFT;
62 goto done;
63 }
64 ret = KEYCODE_LEFT;
65 goto done;
66 case VK_HOME: ret = KEYCODE_HOME; goto done;
67 case VK_END: ret = KEYCODE_END; goto done;
68 case VK_PRIOR: ret = KEYCODE_PAGEUP; goto done;
69 case VK_NEXT: ret = KEYCODE_PAGEDOWN; goto done;
70 }
71 continue;
72 }
73 if ( (record.Event.KeyEvent.uChar.AsciiChar & 0x80) == 0x80 ) {
74 s = &record.Event.KeyEvent.uChar.AsciiChar;
75 OemToCharBuff(s, s, 1);
76 }
77 ret = record.Event.KeyEvent.uChar.AsciiChar;
78 break;
79 }
80 done:
81 SetConsoleMode(cin, mode);
82 return ret;
83}
diff --git a/win32/termios.h b/win32/termios.h
new file mode 100644
index 000000000..011a37eb9
--- /dev/null
+++ b/win32/termios.h
@@ -0,0 +1,129 @@
1/* iflag bits */
2#define IGNBRK 0x00001
3#define BRKINT 0x00002
4#define IGNPAR 0x00004
5#define IMAXBEL 0x00008
6#define INPCK 0x00010
7#define ISTRIP 0x00020
8#define INLCR 0x00040
9#define IGNCR 0x00080
10#define ICRNL 0x00100
11#define IXON 0x00400
12#define IXOFF 0x01000
13#define IUCLC 0x04000
14#define IXANY 0x08000
15#define PARMRK 0x10000
16
17/* oflag bits */
18
19#define OPOST 0x00001
20#define OLCUC 0x00002
21#define OCRNL 0x00004
22#define ONLCR 0x00008
23#define ONOCR 0x00010
24#define ONLRET 0x00020
25#define OFILL 0x00040
26#define CRDLY 0x00180
27#define CR0 0x00000
28#define CR1 0x00080
29#define CR2 0x00100
30#define CR3 0x00180
31#define NLDLY 0x00200
32#define NL0 0x00000
33#define NL1 0x00200
34#define BSDLY 0x00400
35#define BS0 0x00000
36#define BS1 0x00400
37#define TABDLY 0x01800
38#define TAB0 0x00000
39#define TAB1 0x00800
40#define TAB2 0x01000
41#define TAB3 0x01800
42#define XTABS 0x01800
43#define VTDLY 0x02000
44#define VT0 0x00000
45#define VT1 0x02000
46#define FFDLY 0x04000
47#define FF0 0x00000
48#define FF1 0x04000
49#define OFDEL 0x08000
50
51/* lflag bits */
52#define ISIG 0x0001
53#define ICANON 0x0002
54#define ECHO 0x0004
55#define ECHOE 0x0008
56#define ECHOK 0x0010
57#define ECHONL 0x0020
58#define NOFLSH 0x0040
59#define TOSTOP 0x0080
60#define IEXTEN 0x0100
61#define FLUSHO 0x0200
62#define ECHOKE 0x0400
63#define ECHOCTL 0x0800
64
65#define VDISCARD 1
66#define VEOL 2
67#define VEOL2 3
68#define VEOF 4
69#define VERASE 5
70#define VINTR 6
71#define VKILL 7
72#define VLNEXT 8
73#define VMIN 9
74#define VQUIT 10
75#define VREPRINT 11
76#define VSTART 12
77#define VSTOP 13
78#define VSUSP 14
79#define VSWTC 15
80#define VTIME 16
81#define VWERASE 17
82
83#define TCIFLUSH 0
84#define TCSAFLUSH 1
85#define TCSANOW 2
86#define TCSADRAIN 3
87#define TCSADFLUSH 4
88
89#define B0 0000000 /* hang up */
90#define B50 0000001
91#define B75 0000002
92#define B110 0000003
93#define B134 0000004
94#define B150 0000005
95#define B200 0000006
96#define B300 0000007
97#define B600 0000010
98#define B1200 0000011
99#define B1800 0000012
100#define B2400 0000013
101#define B4800 0000014
102#define B9600 0000015
103
104typedef unsigned char cc_t;
105typedef unsigned int tcflag_t;
106typedef unsigned int speed_t;
107typedef unsigned short otcflag_t;
108typedef unsigned char ospeed_t;
109
110#define NCCS 18
111struct termios {
112 tcflag_t c_iflag;
113 tcflag_t c_oflag;
114 tcflag_t c_cflag;
115 tcflag_t c_lflag;
116 char c_line;
117 cc_t c_cc[NCCS];
118 speed_t c_ispeed;
119 speed_t c_ospeed;
120};
121
122struct winsize {
123 unsigned short ws_row, ws_col;
124 unsigned short ws_xpixel, ws_ypixel;
125};
126
127int tcflush(int fd, int queue_selector);
128int tcgetattr(int fd, struct termios *t);
129int tcsetattr(int fd, int mode, const struct termios *t);
diff --git a/win32/uname.c b/win32/uname.c
new file mode 100644
index 000000000..3b3e21f8d
--- /dev/null
+++ b/win32/uname.c
@@ -0,0 +1,48 @@
1#include "libbb.h"
2/* After libbb.h, since it needs sys/types.h on some systems */
3#include <sys/utsname.h>
4
5int uname(struct utsname *name)
6{
7 const char *unk = "unknown";
8 OSVERSIONINFO os_info;
9 SYSTEM_INFO sys_info;
10
11 strcpy(name->sysname, "Windows_NT");
12
13 if ( gethostname(name->nodename, sizeof(name->nodename)) != 0 ) {
14 strcpy(name->nodename, unk);
15 }
16
17 memset(&os_info, 0, sizeof(OSVERSIONINFO));
18 os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
19
20 strcpy(name->release, unk);
21 strcpy(name->version, unk);
22 if (GetVersionEx(&os_info)) {
23 sprintf(name->release, "%u.%u", (unsigned int)os_info.dwMajorVersion,
24 (unsigned int)os_info.dwMinorVersion);
25 sprintf(name->version, "%u", (unsigned int)os_info.dwBuildNumber);
26 }
27
28 strcpy(name->machine, unk);
29 GetSystemInfo(&sys_info);
30 switch (sys_info.wProcessorArchitecture) {
31 case PROCESSOR_ARCHITECTURE_AMD64:
32 strcpy(name->machine, "x86_64");
33 break;
34 case PROCESSOR_ARCHITECTURE_IA64:
35 strcpy(name->machine, "ia64");
36 break;
37 case PROCESSOR_ARCHITECTURE_INTEL:
38 if (sys_info.wProcessorLevel < 6) {
39 strcpy(name->machine, "i386");
40 }
41 else {
42 strcpy(name->machine, "i686");
43 }
44 break;
45 }
46
47 return 0;
48}
diff --git a/win32/winansi.c b/win32/winansi.c
new file mode 100644
index 000000000..7c7e1a626
--- /dev/null
+++ b/win32/winansi.c
@@ -0,0 +1,735 @@
1/*
2 * Copyright 2008 Peter Harris <git@peter.is-a-geek.org>
3 */
4
5#include "libbb.h"
6#include <windows.h>
7#undef PACKED
8
9/*
10 Functions to be wrapped:
11*/
12#undef vfprintf
13#undef vprintf
14#undef printf
15#undef fprintf
16#undef fputs
17#undef putchar
18#undef fwrite
19#undef puts
20#undef write
21#undef read
22#undef getc
23
24/*
25 ANSI codes used by git: m, K
26
27 This file is git-specific. Therefore, this file does not attempt
28 to implement any codes that are not used by git.
29*/
30
31static HANDLE console;
32static HANDLE console_in;
33static WORD plain_attr;
34static WORD attr;
35static int negative;
36
37static void init(void)
38{
39 CONSOLE_SCREEN_BUFFER_INFO sbi;
40
41 static int initialized = 0;
42 if (initialized)
43 return;
44
45 console_in = GetStdHandle(STD_INPUT_HANDLE);
46 if (console_in == INVALID_HANDLE_VALUE)
47 console_in = NULL;
48
49 console = GetStdHandle(STD_OUTPUT_HANDLE);
50 if (console == INVALID_HANDLE_VALUE)
51 console = NULL;
52
53 if (!console)
54 return;
55
56 GetConsoleScreenBufferInfo(console, &sbi);
57 attr = plain_attr = sbi.wAttributes;
58 negative = 0;
59
60 initialized = 1;
61}
62
63static int skip_ansi_emulation(void)
64{
65 static char *var = NULL;
66 static int got_var = FALSE;
67
68 if (!got_var) {
69 var = getenv("BB_SKIP_ANSI_EMULATION");
70 got_var = TRUE;
71 }
72
73 return var != NULL;
74}
75
76
77#define FOREGROUND_ALL (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE)
78#define BACKGROUND_ALL (BACKGROUND_RED | BACKGROUND_GREEN | BACKGROUND_BLUE)
79
80static void set_console_attr(void)
81{
82 WORD attributes = attr;
83 if (negative) {
84 attributes &= ~FOREGROUND_ALL;
85 attributes &= ~BACKGROUND_ALL;
86
87 /* This could probably use a bitmask
88 instead of a series of ifs */
89 if (attr & FOREGROUND_RED)
90 attributes |= BACKGROUND_RED;
91 if (attr & FOREGROUND_GREEN)
92 attributes |= BACKGROUND_GREEN;
93 if (attr & FOREGROUND_BLUE)
94 attributes |= BACKGROUND_BLUE;
95
96 if (attr & BACKGROUND_RED)
97 attributes |= FOREGROUND_RED;
98 if (attr & BACKGROUND_GREEN)
99 attributes |= FOREGROUND_GREEN;
100 if (attr & BACKGROUND_BLUE)
101 attributes |= FOREGROUND_BLUE;
102 }
103 SetConsoleTextAttribute(console, attributes);
104}
105
106static void erase_in_line(void)
107{
108 CONSOLE_SCREEN_BUFFER_INFO sbi;
109 DWORD dummy; /* Needed for Windows 7 (or Vista) regression */
110
111 if (!console)
112 return;
113
114 GetConsoleScreenBufferInfo(console, &sbi);
115 FillConsoleOutputCharacterA(console, ' ',
116 sbi.dwSize.X - sbi.dwCursorPosition.X, sbi.dwCursorPosition,
117 &dummy);
118 FillConsoleOutputAttribute(console, plain_attr,
119 sbi.dwSize.X - sbi.dwCursorPosition.X, sbi.dwCursorPosition,
120 &dummy);
121}
122
123static void erase_till_end_of_screen(void)
124{
125 CONSOLE_SCREEN_BUFFER_INFO sbi;
126 DWORD dummy, len;
127
128 if (!console)
129 return;
130
131 GetConsoleScreenBufferInfo(console, &sbi);
132 len = sbi.dwSize.X - sbi.dwCursorPosition.X +
133 sbi.dwSize.X * (sbi.srWindow.Bottom - sbi.dwCursorPosition.Y);
134
135 FillConsoleOutputCharacterA(console, ' ', len, sbi.dwCursorPosition,
136 &dummy);
137 FillConsoleOutputAttribute(console, plain_attr, len, sbi.dwCursorPosition,
138 &dummy);
139}
140
141void reset_screen(void)
142{
143 CONSOLE_SCREEN_BUFFER_INFO sbi;
144 COORD pos;
145 DWORD dummy, len;
146
147 if (!console)
148 return;
149
150 /* move to start of screen buffer and clear it all */
151 GetConsoleScreenBufferInfo(console, &sbi);
152 pos.X = 0;
153 pos.Y = 0;
154 SetConsoleCursorPosition(console, pos);
155 len = sbi.dwSize.X * sbi.dwSize.Y;
156 FillConsoleOutputCharacterA(console, ' ', len, pos, &dummy);
157 FillConsoleOutputAttribute(console, plain_attr, len, pos, &dummy);
158}
159
160void move_cursor_row(int n)
161{
162 CONSOLE_SCREEN_BUFFER_INFO sbi;
163
164 if (!console)
165 return;
166
167 GetConsoleScreenBufferInfo(console, &sbi);
168 sbi.dwCursorPosition.Y += n;
169 SetConsoleCursorPosition(console, sbi.dwCursorPosition);
170}
171
172static void move_cursor_column(int n)
173{
174 CONSOLE_SCREEN_BUFFER_INFO sbi;
175
176 if (!console)
177 return;
178
179 GetConsoleScreenBufferInfo(console, &sbi);
180 sbi.dwCursorPosition.X += n;
181 SetConsoleCursorPosition(console, sbi.dwCursorPosition);
182}
183
184static void move_cursor(int x, int y)
185{
186 COORD pos;
187 CONSOLE_SCREEN_BUFFER_INFO sbi;
188
189 if (!console)
190 return;
191
192 GetConsoleScreenBufferInfo(console, &sbi);
193 pos.X = sbi.srWindow.Left + x;
194 pos.Y = sbi.srWindow.Top + y;
195 SetConsoleCursorPosition(console, pos);
196}
197
198static const char *set_attr(const char *str)
199{
200 const char *func;
201 size_t len = strspn(str, "0123456789;");
202 func = str + len;
203
204 switch (*func) {
205 case 'm':
206 do {
207 long val = strtol(str, (char **)&str, 10);
208 switch (val) {
209 case 0: /* reset */
210 attr = plain_attr;
211 negative = 0;
212 break;
213 case 1: /* bold */
214 attr |= FOREGROUND_INTENSITY;
215 break;
216 case 2: /* faint */
217 case 22: /* normal */
218 attr &= ~FOREGROUND_INTENSITY;
219 break;
220 case 3: /* italic */
221 /* Unsupported */
222 break;
223 case 4: /* underline */
224 case 21: /* double underline */
225 /* Wikipedia says this flag does nothing */
226 /* Furthermore, mingw doesn't define this flag
227 attr |= COMMON_LVB_UNDERSCORE; */
228 break;
229 case 24: /* no underline */
230 /* attr &= ~COMMON_LVB_UNDERSCORE; */
231 break;
232 case 5: /* slow blink */
233 case 6: /* fast blink */
234 /* We don't have blink, but we do have
235 background intensity */
236 attr |= BACKGROUND_INTENSITY;
237 break;
238 case 25: /* no blink */
239 attr &= ~BACKGROUND_INTENSITY;
240 break;
241 case 7: /* negative */
242 negative = 1;
243 break;
244 case 27: /* positive */
245 negative = 0;
246 break;
247 case 8: /* conceal */
248 case 28: /* reveal */
249 /* Unsupported */
250 break;
251 case 30: /* Black */
252 attr &= ~FOREGROUND_ALL;
253 break;
254 case 31: /* Red */
255 attr &= ~FOREGROUND_ALL;
256 attr |= FOREGROUND_RED;
257 break;
258 case 32: /* Green */
259 attr &= ~FOREGROUND_ALL;
260 attr |= FOREGROUND_GREEN;
261 break;
262 case 33: /* Yellow */
263 attr &= ~FOREGROUND_ALL;
264 attr |= FOREGROUND_RED | FOREGROUND_GREEN;
265 break;
266 case 34: /* Blue */
267 attr &= ~FOREGROUND_ALL;
268 attr |= FOREGROUND_BLUE;
269 break;
270 case 35: /* Magenta */
271 attr &= ~FOREGROUND_ALL;
272 attr |= FOREGROUND_RED | FOREGROUND_BLUE;
273 break;
274 case 36: /* Cyan */
275 attr &= ~FOREGROUND_ALL;
276 attr |= FOREGROUND_GREEN | FOREGROUND_BLUE;
277 break;
278 case 37: /* White */
279 attr |= FOREGROUND_RED |
280 FOREGROUND_GREEN |
281 FOREGROUND_BLUE;
282 break;
283 case 38: /* Unknown */
284 break;
285 case 39: /* reset */
286 attr &= ~FOREGROUND_ALL;
287 attr |= (plain_attr & FOREGROUND_ALL);
288 break;
289 case 40: /* Black */
290 attr &= ~BACKGROUND_ALL;
291 break;
292 case 41: /* Red */
293 attr &= ~BACKGROUND_ALL;
294 attr |= BACKGROUND_RED;
295 break;
296 case 42: /* Green */
297 attr &= ~BACKGROUND_ALL;
298 attr |= BACKGROUND_GREEN;
299 break;
300 case 43: /* Yellow */
301 attr &= ~BACKGROUND_ALL;
302 attr |= BACKGROUND_RED | BACKGROUND_GREEN;
303 break;
304 case 44: /* Blue */
305 attr &= ~BACKGROUND_ALL;
306 attr |= BACKGROUND_BLUE;
307 break;
308 case 45: /* Magenta */
309 attr &= ~BACKGROUND_ALL;
310 attr |= BACKGROUND_RED | BACKGROUND_BLUE;
311 break;
312 case 46: /* Cyan */
313 attr &= ~BACKGROUND_ALL;
314 attr |= BACKGROUND_GREEN | BACKGROUND_BLUE;
315 break;
316 case 47: /* White */
317 attr |= BACKGROUND_RED |
318 BACKGROUND_GREEN |
319 BACKGROUND_BLUE;
320 break;
321 case 48: /* Unknown */
322 break;
323 case 49: /* reset */
324 attr &= ~BACKGROUND_ALL;
325 attr |= (plain_attr & BACKGROUND_ALL);
326 break;
327 default:
328 /* Unsupported code */
329 break;
330 }
331 str++;
332 } while (*(str-1) == ';');
333
334 set_console_attr();
335 break;
336 case 'A': /* up */
337 move_cursor_row(-strtol(str, (char **)&str, 10));
338 break;
339 case 'B': /* down */
340 move_cursor_row(strtol(str, (char **)&str, 10));
341 break;
342 case 'C': /* forward */
343 move_cursor_column(strtol(str, (char **)&str, 10));
344 break;
345 case 'D': /* back */
346 move_cursor_column(-strtol(str, (char **)&str, 10));
347 break;
348 case 'H':
349 if (!len)
350 move_cursor(0, 0);
351 else {
352 int row, col = 1;
353
354 row = strtol(str, (char **)&str, 10);
355 if (*str == ';') {
356 col = strtol(str+1, (char **)&str, 10);
357 }
358 move_cursor(col > 0 ? col-1 : 0, row > 0 ? row-1 : 0);
359 }
360 break;
361 case 'J':
362 erase_till_end_of_screen();
363 break;
364 case 'K':
365 erase_in_line();
366 break;
367 case '?':
368 /* skip this to avoid ugliness when vi is shut down */
369 ++str;
370 while (isdigit(*str))
371 ++str;
372 func = str;
373 break;
374 default:
375 /* Unsupported code */
376 break;
377 }
378
379 return func + 1;
380}
381
382static int ansi_emulate(const char *s, FILE *stream)
383{
384 int rv = 0;
385 const char *t;
386 char *pos, *str;
387 size_t out_len, cur_len;
388 static size_t max_len = 0;
389 static char *mem = NULL;
390
391 /* if no special treatment is required output the string as-is */
392 for ( t=s; *t; ++t ) {
393 if ( *t == '\033' || *t > 0x7f ) {
394 break;
395 }
396 }
397
398 if ( *t == '\0' ) {
399 return fputs(s, stream) == EOF ? EOF : strlen(s);
400 }
401
402 /* make a writable copy of the string and retain it for reuse */
403 cur_len = strlen(s);
404 if ( cur_len == 0 || cur_len > max_len ) {
405 free(mem);
406 mem = strdup(s);
407 max_len = cur_len;
408 }
409 else {
410 strcpy(mem, s);
411 }
412 pos = str = mem;
413
414 while (*pos) {
415 pos = strstr(str, "\033[");
416 if (pos && !skip_ansi_emulation()) {
417 size_t len = pos - str;
418
419 if (len) {
420 CharToOemBuff(str, str, len);
421 out_len = fwrite(str, 1, len, stream);
422 rv += out_len;
423 if (out_len < len)
424 return rv;
425 }
426
427 str = pos + 2;
428 rv += 2;
429
430 fflush(stream);
431
432 pos = (char *)set_attr(str);
433 rv += pos - str;
434 str = pos;
435 } else {
436 rv += strlen(str);
437 CharToOem(str, str);
438 fputs(str, stream);
439 return rv;
440 }
441 }
442 return rv;
443}
444
445int winansi_putchar(int c)
446{
447 char t = c;
448 char *s = &t;
449
450 if (!isatty(STDOUT_FILENO))
451 return putchar(c);
452
453 init();
454
455 if (!console)
456 return putchar(c);
457
458 CharToOemBuff(s, s, 1);
459 return putchar(t) == EOF ? EOF : c;
460}
461
462int winansi_puts(const char *s)
463{
464 int rv;
465
466 if (!isatty(STDOUT_FILENO))
467 return puts(s);
468
469 init();
470
471 if (!console)
472 return puts(s);
473
474 rv = ansi_emulate(s, stdout);
475 putchar('\n');
476
477 return rv;
478}
479
480size_t winansi_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
481{
482 size_t lsize, lmemb;
483 char *str;
484 int rv;
485
486 lsize = MIN(size, nmemb);
487 lmemb = MAX(size, nmemb);
488 if (lsize != 1)
489 return fwrite(ptr, size, nmemb, stream);
490
491 if (!isatty(fileno(stream)))
492 return fwrite(ptr, size, nmemb, stream);
493
494 init();
495
496 if (!console)
497 return fwrite(ptr, size, nmemb, stream);
498
499 str = xmalloc(lmemb+1);
500 memcpy(str, ptr, lmemb);
501 str[lmemb] = '\0';
502
503 rv = ansi_emulate(str, stream);
504 free(str);
505
506 return rv;
507}
508
509int winansi_fputs(const char *str, FILE *stream)
510{
511 int rv;
512
513 if (!isatty(fileno(stream)))
514 return fputs(str, stream);
515
516 init();
517
518 if (!console)
519 return fputs(str, stream);
520
521 rv = ansi_emulate(str, stream);
522
523 if (rv >= 0)
524 return 0;
525 else
526 return EOF;
527}
528
529int winansi_vfprintf(FILE *stream, const char *format, va_list list)
530{
531 int len, rv;
532 char small_buf[256];
533 char *buf = small_buf;
534 va_list cp;
535
536 if (!isatty(fileno(stream)))
537 goto abort;
538
539 init();
540
541 if (!console)
542 goto abort;
543
544 va_copy(cp, list);
545 len = vsnprintf(small_buf, sizeof(small_buf), format, cp);
546 va_end(cp);
547
548 if (len > sizeof(small_buf) - 1) {
549 buf = malloc(len + 1);
550 if (!buf)
551 goto abort;
552
553 va_copy(cp, list);
554 len = vsnprintf(buf, len + 1, format, cp);
555 va_end(cp);
556 }
557
558 if (len == -1)
559 goto abort;
560
561 rv = ansi_emulate(buf, stream);
562
563 if (buf != small_buf)
564 free(buf);
565 return rv;
566
567abort:
568 rv = vfprintf(stream, format, list);
569 return rv;
570}
571
572int winansi_fprintf(FILE *stream, const char *format, ...)
573{
574 va_list list;
575 int rv;
576
577 va_start(list, format);
578 rv = winansi_vfprintf(stream, format, list);
579 va_end(list);
580
581 return rv;
582}
583
584int winansi_printf(const char *format, ...)
585{
586 va_list list;
587 int rv;
588
589 va_start(list, format);
590 rv = winansi_vfprintf(stdout, format, list);
591 va_end(list);
592
593 return rv;
594}
595
596int winansi_get_terminal_width_height(struct winsize *win)
597{
598 BOOL ret;
599 CONSOLE_SCREEN_BUFFER_INFO sbi;
600
601 init();
602
603 win->ws_row = 0;
604 win->ws_col = 0;
605 if ((ret=GetConsoleScreenBufferInfo(console, &sbi)) != 0) {
606 win->ws_row = sbi.srWindow.Bottom - sbi.srWindow.Top + 1;
607 win->ws_col = sbi.srWindow.Right - sbi.srWindow.Left + 1;
608 }
609
610 return ret ? 0 : -1;
611}
612
613static int ansi_emulate_write(int fd, const void *buf, size_t count)
614{
615 int rv = 0, i;
616 int special = FALSE, has_null = FALSE;
617 const char *s = (const char *)buf;
618 char *pos, *str;
619 size_t len, out_len;
620 static size_t max_len = 0;
621 static char *mem = NULL;
622
623 for ( i=0; i<count; ++i ) {
624 if ( s[i] == '\033' || s[i] > 0x7f ) {
625 special = TRUE;
626 }
627 else if ( !s[i] ) {
628 has_null = TRUE;
629 }
630 }
631
632 /*
633 * If no special treatment is required or the data contains NUL
634 * characters output the string as-is.
635 */
636 if ( !special || has_null ) {
637 return write(fd, buf, count);
638 }
639
640 /* make a writable copy of the data and retain it for reuse */
641 if ( count > max_len ) {
642 free(mem);
643 mem = malloc(count+1);
644 max_len = count;
645 }
646 memcpy(mem, buf, count);
647 mem[count] = '\0';
648 pos = str = mem;
649
650 /* we've checked the data doesn't contain any NULs */
651 while (*pos) {
652 pos = strstr(str, "\033[");
653 if (pos && !skip_ansi_emulation()) {
654 len = pos - str;
655
656 if (len) {
657 CharToOemBuff(str, str, len);
658 out_len = write(fd, str, len);
659 rv += out_len;
660 if (out_len < len)
661 return rv;
662 }
663
664 str = pos + 2;
665 rv += 2;
666
667 pos = (char *)set_attr(str);
668 rv += pos - str;
669 str = pos;
670 } else {
671 len = strlen(str);
672 rv += len;
673 CharToOem(str, str);
674 write(fd, str, len);
675 return rv;
676 }
677 }
678 return rv;
679}
680
681int winansi_write(int fd, const void *buf, size_t count)
682{
683 if (!isatty(fd))
684 return write(fd, buf, count);
685
686 init();
687
688 if (!console)
689 return write(fd, buf, count);
690
691 return ansi_emulate_write(fd, buf, count);
692}
693
694int winansi_read(int fd, void *buf, size_t count)
695{
696 int rv;
697
698 rv = read(fd, buf, count);
699 if (!isatty(fd))
700 return rv;
701
702 init();
703
704 if (!console_in)
705 return rv;
706
707 if ( rv > 0 ) {
708 OemToCharBuff(buf, buf, rv);
709 }
710
711 return rv;
712}
713
714int winansi_getc(FILE *stream)
715{
716 int rv;
717
718 rv = getc(stream);
719 if (!isatty(fileno(stream)))
720 return rv;
721
722 init();
723
724 if (!console_in)
725 return rv;
726
727 if ( rv != EOF ) {
728 unsigned char c = (unsigned char)rv;
729 char *s = (char *)&c;
730 OemToCharBuff(s, s, 1);
731 rv = (int)c;
732 }
733
734 return rv;
735}