aboutsummaryrefslogtreecommitdiff
path: root/win32
diff options
context:
space:
mode:
Diffstat (limited to 'win32')
-rw-r--r--win32/Kbuild34
-rw-r--r--win32/arpa/inet.h0
-rw-r--r--win32/dirent.c106
-rw-r--r--win32/dirent.h20
-rw-r--r--win32/dirname.c287
-rw-r--r--win32/env.c117
-rw-r--r--win32/fnmatch.c525
-rw-r--r--win32/fnmatch.h84
-rw-r--r--win32/fsync.c75
-rw-r--r--win32/glob.c343
-rw-r--r--win32/glob.h89
-rw-r--r--win32/grp.h0
-rw-r--r--win32/inet_pton.c95
-rw-r--r--win32/ioctl.c46
-rw-r--r--win32/isaac.c192
-rw-r--r--win32/lazyload.h27
-rw-r--r--win32/match_class.c7
-rw-r--r--win32/match_class.h11
-rw-r--r--win32/mingw.c2540
-rw-r--r--win32/mntent.c94
-rw-r--r--win32/mntent.h33
-rw-r--r--win32/net.c146
-rw-r--r--win32/net/if.h0
-rw-r--r--win32/netdb.h0
-rw-r--r--win32/netinet/in.h0
-rw-r--r--win32/paths.h0
-rw-r--r--win32/poll.c656
-rw-r--r--win32/poll.h53
-rw-r--r--win32/popen.c316
-rw-r--r--win32/process.c955
-rw-r--r--win32/pwd.h0
-rw-r--r--win32/regcomp.c3936
-rw-r--r--win32/regex.c90
-rw-r--r--win32/regex.h582
-rw-r--r--win32/regex_internal.c1744
-rw-r--r--win32/regex_internal.h810
-rw-r--r--win32/regexec.c4369
-rw-r--r--win32/resources/COPYING_CCBYSA37
-rw-r--r--win32/resources/Kbuild.src29
-rw-r--r--win32/resources/README9
-rw-r--r--win32/resources/app.manifest24
-rw-r--r--win32/resources/aterm.icobin0 -> 15086 bytes
-rw-r--r--win32/resources/dummy.c0
-rw-r--r--win32/resources/resources.rc45
-rw-r--r--win32/resources/sterm.icobin0 -> 15086 bytes
-rw-r--r--win32/resources/utf8.manifest30
-rw-r--r--win32/sched.h1
-rw-r--r--win32/select.c592
-rw-r--r--win32/sh_random.c59
-rw-r--r--win32/statfs.c70
-rw-r--r--win32/strndup.c36
-rw-r--r--win32/strptime.c603
-rw-r--r--win32/strverscmp.c62
-rw-r--r--win32/sys/inotify.h0
-rw-r--r--win32/sys/ioctl.h0
-rw-r--r--win32/sys/mman.h0
-rw-r--r--win32/sys/resource.h11
-rw-r--r--win32/sys/select.h0
-rw-r--r--win32/sys/socket.h0
-rw-r--r--win32/sys/statfs.h22
-rw-r--r--win32/sys/statvfs.h3
-rw-r--r--win32/sys/syscall.h0
-rw-r--r--win32/sys/sysmacros.h0
-rw-r--r--win32/sys/times.h0
-rw-r--r--win32/sys/un.h0
-rw-r--r--win32/sys/utsname.h66
-rw-r--r--win32/sys/wait.h0
-rw-r--r--win32/system.c22
-rw-r--r--win32/termios.c128
-rw-r--r--win32/termios.h31
-rw-r--r--win32/timegm.c133
-rw-r--r--win32/uname.c47
-rw-r--r--win32/winansi.c1608
73 files changed, 22020 insertions, 0 deletions
diff --git a/win32/Kbuild b/win32/Kbuild
new file mode 100644
index 000000000..1bb79bfd3
--- /dev/null
+++ b/win32/Kbuild
@@ -0,0 +1,34 @@
1# Makefile for busybox
2#
3# Licensed under the GPL v2, see the file LICENSE in this tarball.
4
5lib-y:=
6
7lib-$(CONFIG_PLATFORM_MINGW32) += dirent.o
8lib-$(CONFIG_PLATFORM_MINGW32) += dirname.o
9lib-$(CONFIG_PLATFORM_MINGW32) += env.o
10lib-$(CONFIG_PLATFORM_MINGW32) += fnmatch.o
11lib-$(CONFIG_PLATFORM_MINGW32) += fsync.o
12lib-$(CONFIG_PLATFORM_MINGW32) += glob.o
13lib-$(CONFIG_PLATFORM_MINGW32) += inet_pton.o
14lib-$(CONFIG_PLATFORM_MINGW32) += ioctl.o
15lib-$(CONFIG_FEATURE_PRNG_ISAAC) += isaac.o
16lib-$(CONFIG_PLATFORM_MINGW32) += mingw.o
17lib-$(CONFIG_PLATFORM_MINGW32) += process.o
18lib-$(CONFIG_PLATFORM_MINGW32) += match_class.o
19lib-$(CONFIG_PLATFORM_MINGW32) += mntent.o
20lib-$(CONFIG_PLATFORM_MINGW32) += net.o
21lib-$(CONFIG_PLATFORM_MINGW32) += poll.o
22lib-$(CONFIG_PLATFORM_MINGW32) += popen.o
23lib-$(CONFIG_PLATFORM_MINGW32) += regex.o
24lib-$(CONFIG_PLATFORM_MINGW32) += select.o
25lib-$(CONFIG_FEATURE_PRNG_SHELL) += sh_random.o
26lib-$(CONFIG_PLATFORM_MINGW32) += statfs.o
27lib-$(CONFIG_PLATFORM_MINGW32) += strndup.o
28lib-$(CONFIG_PLATFORM_MINGW32) += strptime.o
29lib-$(CONFIG_PLATFORM_MINGW32) += strverscmp.o
30lib-$(CONFIG_PLATFORM_MINGW32) += system.o
31lib-$(CONFIG_PLATFORM_MINGW32) += termios.o
32lib-$(CONFIG_PLATFORM_MINGW32) += timegm.o
33lib-$(CONFIG_PLATFORM_MINGW32) += uname.o
34lib-$(CONFIG_PLATFORM_MINGW32) += winansi.o
diff --git a/win32/arpa/inet.h b/win32/arpa/inet.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/arpa/inet.h
diff --git a/win32/dirent.c b/win32/dirent.c
new file mode 100644
index 000000000..795fc779c
--- /dev/null
+++ b/win32/dirent.c
@@ -0,0 +1,106 @@
1#include "libbb.h"
2
3struct DIR {
4 struct dirent dd_dir;
5 HANDLE dd_handle; /* FindFirstFile handle */
6 int dd_stat; /* 0-based index */
7};
8
9static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAA *fdata)
10{
11 /* copy file name from WIN32_FIND_DATA to dirent */
12 strcpy(ent->d_name, fdata->cFileName);
13
14 if ((fdata->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) &&
15 (fdata->dwReserved0 == IO_REPARSE_TAG_SYMLINK ||
16 fdata->dwReserved0 == IO_REPARSE_TAG_MOUNT_POINT ||
17 fdata->dwReserved0 == IO_REPARSE_TAG_APPEXECLINK))
18 ent->d_type = DT_LNK;
19 else if (fdata->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
20 ent->d_type = DT_DIR;
21 else
22 ent->d_type = DT_REG;
23}
24
25DIR *opendir(const char *name)
26{
27 char pattern[MAX_PATH];
28 WIN32_FIND_DATAA fdata;
29 HANDLE h;
30 int len;
31 DIR *dir;
32
33 /* check that name is not NULL */
34 if (!name) {
35 errno = EINVAL;
36 return NULL;
37 }
38 /* check that the pattern won't be too long for FindFirstFileA */
39 len = strlen(name);
40 if (len + 2 >= MAX_PATH) {
41 errno = ENAMETOOLONG;
42 return NULL;
43 }
44 /* copy name to temp buffer */
45 strcpy(pattern, name);
46
47 /* append optional '/' and wildcard '*' */
48 if (len && !is_dir_sep(pattern[len - 1]))
49 pattern[len++] = '/';
50 pattern[len++] = '*';
51 pattern[len] = 0;
52
53 /* open find handle */
54 h = FindFirstFileA(pattern, &fdata);
55 if (h == INVALID_HANDLE_VALUE) {
56 DWORD err = GetLastError();
57 errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix();
58 return NULL;
59 }
60
61 /* initialize DIR structure and copy first dir entry */
62 dir = xmalloc(sizeof(DIR));
63 dir->dd_handle = h;
64 dir->dd_stat = 0;
65 finddata2dirent(&dir->dd_dir, &fdata);
66 return dir;
67}
68
69struct dirent *readdir(DIR *dir)
70{
71 if (!dir) {
72 errno = EBADF; /* No set_errno for mingw */
73 return NULL;
74 }
75
76 /* if first entry, dirent has already been set up by opendir */
77 if (dir->dd_stat) {
78 /* get next entry and convert from WIN32_FIND_DATA to dirent */
79 WIN32_FIND_DATAA fdata;
80 if (FindNextFileA(dir->dd_handle, &fdata)) {
81 finddata2dirent(&dir->dd_dir, &fdata);
82 } else {
83 DWORD lasterr = GetLastError();
84 /* POSIX says you shouldn't set errno when readdir can't
85 find any more files; so, if another error we leave it set. */
86 if (lasterr != ERROR_NO_MORE_FILES)
87 errno = err_win_to_posix();
88 return NULL;
89 }
90 }
91
92 ++dir->dd_stat;
93 return &dir->dd_dir;
94}
95
96int closedir(DIR *dir)
97{
98 if (!dir) {
99 errno = EBADF;
100 return -1;
101 }
102
103 FindClose(dir->dd_handle);
104 free(dir);
105 return 0;
106}
diff --git a/win32/dirent.h b/win32/dirent.h
new file mode 100644
index 000000000..4e7971ef6
--- /dev/null
+++ b/win32/dirent.h
@@ -0,0 +1,20 @@
1#ifndef DIRENT_H
2#define DIRENT_H
3
4typedef struct DIR DIR;
5
6#define DT_UNKNOWN 0
7#define DT_DIR 1
8#define DT_REG 2
9#define DT_LNK 3
10
11struct dirent {
12 unsigned char d_type;
13 char d_name[PATH_MAX]; // file name
14};
15
16DIR *opendir(const char *dirname);
17struct dirent *readdir(DIR *dir);
18int closedir(DIR *dir);
19
20#endif /* DIRENT_H */
diff --git a/win32/dirname.c b/win32/dirname.c
new file mode 100644
index 000000000..dd62b8b08
--- /dev/null
+++ b/win32/dirname.c
@@ -0,0 +1,287 @@
1/**
2 * This file has no copyright assigned and is placed in the Public Domain.
3 * This file is part of the mingw-w64 runtime package.
4 * No warranty is given; refer to the file DISCLAIMER.PD within this package.
5 */
6#ifndef WIN32_LEAN_AND_MEAN
7#define WIN32_LEAN_AND_MEAN
8#endif
9#include <stdlib.h>
10#include <libgen.h>
11#include <windows.h>
12
13#if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR > 11
14
15/* A 'directory separator' is a byte that equals 0x2F ('solidus' or more
16 * commonly 'forward slash') or 0x5C ('reverse solidus' or more commonly
17 * 'backward slash'). The byte 0x5C may look different from a backward slash
18 * in some locales; for example, it looks the same as a Yen sign in Japanese
19 * locales and a Won sign in Korean locales. Despite its appearance, it still
20 * functions as a directory separator.
21 *
22 * A 'path' comprises an optional DOS drive letter with a colon, and then an
23 * arbitrary number of possibily empty components, separated by non-empty
24 * sequences of directory separators (in other words, consecutive directory
25 * separators are treated as a single one). A path that comprises an empty
26 * component denotes the current working directory.
27 *
28 * An 'absolute path' comprises at least two components, the first of which
29 * is empty.
30 *
31 * A 'relative path' is a path that is not an absolute path. In other words,
32 * it either comprises an empty component, or begins with a non-empty
33 * component.
34 *
35 * POSIX doesn't have a concept about DOS drives. A path that does not have a
36 * drive letter starts from the same drive as the current working directory.
37 *
38 * For example:
39 * (Examples without drive letters match POSIX.)
40 *
41 * Argument dirname() returns basename() returns
42 * -------- ----------------- ------------------
43 * `` or NULL `.` `.`
44 * `usr` `.` `usr`
45 * `usr\` `.` `usr`
46 * `\` `\` `\`
47 * `\usr` `\` `usr`
48 * `\usr\lib` `\usr` `lib`
49 * `\home\\dwc\\test` `\home\\dwc` `test`
50 * `\\host\usr` `\\host\.` `usr`
51 * `\\host\usr\lib` `\\host\usr` `lib`
52 * `\\host\\usr` `\\host\\` `usr`
53 * `\\host\\usr\lib` `\\host\\usr` `lib`
54 * `C:` `C:.` `.`
55 * `C:usr` `C:.` `usr`
56 * `C:usr\` `C:.` `usr`
57 * `C:\` `C:\` `\`
58 * `C:\\` `C:\` `\`
59 * `C:\\\` `C:\` `\`
60 * `C:\usr` `C:\` `usr`
61 * `C:\usr\lib` `C:\usr` `lib`
62 * `C:\\usr\\lib\\` `C:\\usr` `lib`
63 * `C:\home\\dwc\\test` `C:\home\\dwc` `test`
64 */
65
66struct path_info
67 {
68 /* This points to end of the UNC prefix and drive letter, if any. */
69 char* prefix_end;
70
71 /* These point to the directory separator in front of the last non-empty
72 * component. */
73 char* base_sep_begin;
74 char* base_sep_end;
75
76 /* This points to the last directory separator sequence if no other
77 * non-separator characters follow it. */
78 char* term_sep_begin;
79
80 /* This points to the end of the string. */
81 char* path_end;
82 };
83
84#define IS_DIR_SEP(c) ((c) == '/' || (c) == '\\')
85
86static
87void
88do_get_path_info(struct path_info* info, char* path)
89 {
90 char* pos = path;
91 int unc_ncoms = 0;
92 DWORD cp;
93 int dbcs_tb, prev_dir_sep, dir_sep;
94
95 /* Get the code page for paths in the same way as `fopen()`. */
96 cp = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
97
98 /* Set the structure to 'no data'. */
99 info->prefix_end = NULL;
100 info->base_sep_begin = NULL;
101 info->base_sep_end = NULL;
102 info->term_sep_begin = NULL;
103
104 if(IS_DIR_SEP(pos[0]) && IS_DIR_SEP(pos[1])) {
105 /* The path is UNC. */
106 pos += 2;
107
108 /* Seek to the end of the share/device name. */
109 dbcs_tb = 0;
110 prev_dir_sep = 0;
111
112 while(*pos != 0) {
113 dir_sep = 0;
114
115 if(dbcs_tb)
116 dbcs_tb = 0;
117 else if(IsDBCSLeadByteEx(cp, *pos))
118 dbcs_tb = 1;
119 else
120 dir_sep = IS_DIR_SEP(*pos);
121
122 /* If a separator has been encountered and the previous character
123 * was not, mark this as the end of the current component. */
124 if(dir_sep && !prev_dir_sep) {
125 unc_ncoms ++;
126
127 /* The first component is the host name, and the second is the
128 * share name. So we stop at the end of the second component. */
129 if(unc_ncoms == 2)
130 break;
131 }
132
133 prev_dir_sep = dir_sep;
134 pos ++;
135 }
136
137 /* The UNC prefix terminates here. The terminating directory separator
138 * is not part of the prefix, and initiates a new absolute path. */
139 info->prefix_end = pos;
140 }
141 else if((pos[0] >= 'A' && pos[0] <= 'Z' && pos[1] == ':')
142 || (pos[0] >= 'a' && pos[0] <= 'z' && pos[1] == ':')) {
143 /* The path contains a DOS drive letter in the beginning. */
144 pos += 2;
145
146 /* The DOS drive prefix terminates here. Unlike UNC paths, the remaing
147 * part can be relative. For example, `C:foo` denotes `foo` in the
148 * working directory of drive `C:`. */
149 info->prefix_end = pos;
150 }
151
152 /* The remaining part of the path is almost the same as POSIX. */
153 dbcs_tb = 0;
154 prev_dir_sep = 0;
155
156 while(*pos != 0) {
157 dir_sep = 0;
158
159 if(dbcs_tb)
160 dbcs_tb = 0;
161 else if(IsDBCSLeadByteEx(cp, *pos))
162 dbcs_tb = 1;
163 else
164 dir_sep = IS_DIR_SEP(*pos);
165
166 /* If a separator has been encountered and the previous character
167 * was not, mark this as the beginning of the terminating separator
168 * sequence. */
169 if(dir_sep && !prev_dir_sep)
170 info->term_sep_begin = pos;
171
172 /* If a non-separator character has been encountered and a previous
173 * terminating separator sequence exists, start a new component. */
174 if(!dir_sep && prev_dir_sep) {
175 info->base_sep_begin = info->term_sep_begin;
176 info->base_sep_end = pos;
177 info->term_sep_begin = NULL;
178 }
179
180 prev_dir_sep = dir_sep;
181 pos ++;
182 }
183
184 /* Store the end of the path for convenience. */
185 info->path_end = pos;
186 }
187
188char*
189dirname(char* path)
190 {
191 struct path_info info;
192 char* upath;
193 const char* top;
194 static char* static_path_copy;
195
196 if(path == NULL || path[0] == 0)
197 return (char*) ".";
198
199 do_get_path_info(&info, path);
200 upath = info.prefix_end ? info.prefix_end : path;
201 /* Preserve type of top-level separator */
202 if (IS_DIR_SEP(path[0]))
203 top = path[0] == '/' ? "/" : "\\";
204 else if (IS_DIR_SEP(upath[0]))
205 top = upath[0] == '/' ? "/" : "\\";
206 else
207 top = ".";
208
209 /* If a non-terminating directory separator exists, it terminates the
210 * dirname. Truncate the path there. */
211 if(info.base_sep_begin) {
212 info.base_sep_begin[0] = 0;
213
214 /* If the unprefixed path has not been truncated to empty, it is now
215 * the dirname, so return it. */
216 if(upath[0])
217 return path;
218 }
219
220 /* The dirname is empty. In principle we return `<prefix>.` if the
221 * path is relative and `<prefix>\` if it is absolute. This can be
222 * optimized if there is no prefix. */
223 if(upath == path)
224 return (char*) top;
225
226 /* When there is a prefix, we must append a character to the prefix.
227 * If there is enough room in the original path, we just reuse its
228 * storage. */
229 if(upath != info.path_end) {
230 upath[0] = *top;
231 upath[1] = 0;
232 return path;
233 }
234
235 /* This is only the last resort. If there is no room, we have to copy
236 * the prefix elsewhere. */
237 upath = realloc(static_path_copy, info.prefix_end - path + 2);
238 if(!upath)
239 return (char*) top;
240
241 static_path_copy = upath;
242 memcpy(upath, path, info.prefix_end - path);
243 upath += info.prefix_end - path;
244 upath[0] = *top;
245 upath[1] = 0;
246 return static_path_copy;
247 }
248
249char*
250basename(char* path)
251 {
252 struct path_info info;
253 char* upath;
254
255 if(path == NULL || path[0] == 0)
256 return (char*) ".";
257
258 do_get_path_info(&info, path);
259 upath = info.prefix_end ? info.prefix_end : path;
260
261 /* If the path is non-UNC and empty, then it's relative. POSIX says '.'
262 * shall be returned. */
263 if(IS_DIR_SEP(path[0]) == 0 && upath[0] == 0)
264 return (char*) ".";
265
266 /* If a terminating separator sequence exists, it is not part of the
267 * name and shall be truncated. */
268 if(info.term_sep_begin)
269 info.term_sep_begin[0] = 0;
270
271 /* If some other separator sequence has been found, the basename
272 * immediately follows it. */
273 if(info.base_sep_end)
274 return info.base_sep_end;
275
276 /* If removal of the terminating separator sequence has caused the
277 * unprefixed path to become empty, it must have comprised only
278 * separators. POSIX says `/` shall be returned, but on Windows, we
279 * return `\` instead. */
280 if(upath[0] == 0)
281 return (char*) "\\";
282
283 /* Return the unprefixed path. */
284 return upath;
285 }
286
287#endif /* __MINGW64_VERSION_MAJOR */
diff --git a/win32/env.c b/win32/env.c
new file mode 100644
index 000000000..f30ee62f6
--- /dev/null
+++ b/win32/env.c
@@ -0,0 +1,117 @@
1#include "libbb.h"
2
3#undef getenv
4#undef putenv
5
6char *mingw_getenv(const char *name)
7{
8 char *result = getenv(name);
9 if (!result) {
10 if (!strcmp(name, "TMPDIR")) {
11 /* on Windows it is TMP and TEMP */
12 result = getenv("TMP");
13 if (!result)
14 result = getenv("TEMP");
15 } else if (!strcmp(name, "HOME")) {
16 struct passwd *p = getpwuid(getuid());
17 if (p)
18 result = p->pw_dir;
19 }
20 }
21 return result;
22}
23
24int setenv(const char *name, const char *value, int replace)
25{
26 int out;
27 char *envstr;
28
29 if (!name || !*name || strchr(name, '=') || !value) return -1;
30 if (!replace) {
31 if (getenv(name)) return 0;
32 }
33
34 envstr = xasprintf("%s=%s", name, value);
35 out = mingw_putenv(envstr);
36 free(envstr);
37
38 return out;
39}
40
41/*
42 * Removing an environment variable with WIN32 _putenv requires an argument
43 * like "NAME="; glibc omits the '='. The implementations of unsetenv and
44 * clearenv allow for this.
45 *
46 * It isn't possible to create an environment variable with an empty value
47 * using WIN32 _putenv.
48 */
49int unsetenv(const char *name)
50{
51 char *envstr;
52 int ret;
53
54 if (!name || !*name || strchr(name, '=') ) {
55 return -1;
56 }
57
58 envstr = xasprintf("%s=", name);
59 ret = _putenv(envstr);
60 free(envstr);
61
62 return ret;
63}
64
65int clearenv(void)
66{
67 char *envp, *name, *s;
68
69 while ( environ && (envp=*environ) ) {
70 if ( (s=strchr(envp, '=')) != NULL ) {
71 name = xstrndup(envp, s-envp+1);
72 if (_putenv(name) == -1) {
73 free(name);
74 return -1;
75 }
76 free(name);
77 }
78 else {
79 return -1;
80 }
81 }
82 return 0;
83}
84
85int mingw_putenv(const char *env)
86{
87 char *s, **envp;
88 int ret = 0;
89
90 if ( (s=strchr(env, '=')) == NULL ) {
91 return unsetenv(env);
92 }
93
94 if (s[1] != '\0') {
95 /* setting non-empty value is fine */
96 return _putenv(env);
97 }
98 else {
99 /* set empty value by setting a non-empty one then truncating */
100 char *envstr = xasprintf("%s0", env);
101 ret = _putenv(envstr);
102
103 for (envp = environ; *envp; ++envp) {
104 if (strcmp(*envp, envstr) == 0) {
105 (*envp)[s - env + 1] = '\0';
106 break;
107 }
108 }
109
110 /* tell the OS environment about the change */
111 envstr[s - env] = '\0';
112 SetEnvironmentVariable(envstr, "");
113 free(envstr);
114 }
115
116 return ret;
117}
diff --git a/win32/fnmatch.c b/win32/fnmatch.c
new file mode 100644
index 000000000..77b54c5f5
--- /dev/null
+++ b/win32/fnmatch.c
@@ -0,0 +1,525 @@
1/* Copyright (C) 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
13
14 You should have received a copy of the GNU Library General Public
15 License along with this library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
18
19#include <platform.h>
20#include "match_class.h"
21
22#if HAVE_CONFIG_H
23# include <config.h>
24#endif
25
26/* Enable GNU extensions in fnmatch.h. */
27#ifndef _GNU_SOURCE
28# define _GNU_SOURCE 1
29#endif
30
31#include <errno.h>
32#include <fnmatch.h>
33#include <ctype.h>
34
35#if HAVE_STRING_H || defined _LIBC
36# include <string.h>
37#else
38# include <strings.h>
39#endif
40
41#if defined STDC_HEADERS || defined _LIBC
42# include <stdlib.h>
43#endif
44
45/* For platform which support the ISO C amendement 1 functionality we
46 support user defined character classes. */
47#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
48/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
49# include <wchar.h>
50# include <wctype.h>
51#endif
52
53/* Comment out all this code if we are using the GNU C Library, and are not
54 actually compiling the library itself. This code is part of the GNU C
55 Library, but also included in many other GNU distributions. Compiling
56 and linking in this code is a waste when using the GNU C library
57 (especially if it is a shared library). Rather than having every GNU
58 program understand `configure --with-gnu-libc' and omit the object files,
59 it is simpler to just do this in the source for each such file. */
60
61#if defined _LIBC || !defined __GNU_LIBRARY__
62
63
64# if defined STDC_HEADERS || !defined isascii
65# define ISASCII(c) 1
66# else
67# define ISASCII(c) isascii(c)
68# endif
69
70# ifdef isblank
71# define ISBLANK(c) (ISASCII (c) && isblank (c))
72# else
73# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
74# endif
75# ifdef isgraph
76# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
77# else
78# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
79# endif
80
81# define ISPRINT(c) (ISASCII (c) && isprint (c))
82# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
83# define ISALNUM(c) (ISASCII (c) && isalnum (c))
84# define ISALPHA(c) (ISASCII (c) && isalpha (c))
85# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
86# define ISLOWER(c) (ISASCII (c) && islower (c))
87# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
88# define ISSPACE(c) (ISASCII (c) && isspace (c))
89# define ISUPPER(c) (ISASCII (c) && isupper (c))
90# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
91
92# define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
93
94# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
95/* The GNU C library provides support for user-defined character classes
96 and the functions from ISO C amendement 1. */
97# ifdef CHARCLASS_NAME_MAX
98# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
99# else
100/* This shouldn't happen but some implementation might still have this
101 problem. Use a reasonable default value. */
102# define CHAR_CLASS_MAX_LENGTH 256
103# endif
104
105# ifdef _LIBC
106# define IS_CHAR_CLASS(string) __wctype (string)
107# else
108# define IS_CHAR_CLASS(string) wctype (string)
109# endif
110# else
111# define CHAR_CLASS_MAX_LENGTH 7 /* Namely, `xdigit'. */
112
113# define IS_CHAR_CLASS(string) \
114 (STREQ (string, "alpha") || STREQ (string, "upper") \
115 || STREQ (string, "lower") || STREQ (string, "digit") \
116 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
117 || STREQ (string, "space") || STREQ (string, "print") \
118 || STREQ (string, "punct") || STREQ (string, "graph") \
119 || STREQ (string, "cntrl") || STREQ (string, "blank"))
120# endif
121
122/* Avoid depending on library functions or files
123 whose names are inconsistent. */
124
125# if !defined _LIBC && !defined getenv
126extern char *getenv (const char *);
127# endif
128
129# ifndef errno
130extern int errno;
131# endif
132
133/* This function doesn't exist on most systems. */
134
135# if !defined HAVE___STRCHRNUL && !defined _LIBC && 0
136static char *
137__strchrnul (const char *s, int c)
138{
139 char *result = strchr (s, c);
140 if (result == NULL)
141 result = strchr (s, '\0');
142 return result;
143}
144# else
145# define __strchrnul strchrnul
146# endif
147
148# ifndef internal_function
149/* Inside GNU libc we mark some function in a special way. In other
150 environments simply ignore the marking. */
151# define internal_function
152# endif
153
154/* Match STRING against the filename pattern PATTERN, returning zero if
155 it matches, nonzero if not. */
156static int internal_fnmatch __P ((const char *pattern, const char *string,
157 int no_leading_period, int flags))
158 internal_function;
159static int
160internal_function
161internal_fnmatch (const char *pattern, const char *string,
162 int no_leading_period, int flags)
163{
164 register const char *p = pattern, *n = string;
165 register unsigned char c;
166
167/* Note that this evaluates C many times. */
168# ifdef _LIBC
169# define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c))
170# else
171# define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c))
172# endif
173
174 while ((c = *p++) != '\0')
175 {
176 c = FOLD (c);
177
178 switch (c)
179 {
180 case '?':
181 if (*n == '\0')
182 return FNM_NOMATCH;
183 else if (*n == '/' && (flags & FNM_FILE_NAME))
184 return FNM_NOMATCH;
185 else if (*n == '.' && no_leading_period
186 && (n == string
187 || (n[-1] == '/' && (flags & FNM_FILE_NAME))))
188 return FNM_NOMATCH;
189 break;
190
191 case '\\':
192 if (!(flags & FNM_NOESCAPE))
193 {
194 c = *p++;
195 if (c == '\0')
196 /* Trailing \ loses. */
197 return FNM_NOMATCH;
198 c = FOLD (c);
199 }
200 if (FOLD ((unsigned char) *n) != c)
201 return FNM_NOMATCH;
202 break;
203
204 case '*':
205 if (*n == '.' && no_leading_period
206 && (n == string
207 || (n[-1] == '/' && (flags & FNM_FILE_NAME))))
208 return FNM_NOMATCH;
209
210 for (c = *p++; c == '?' || c == '*'; c = *p++)
211 {
212 if (*n == '/' && (flags & FNM_FILE_NAME))
213 /* A slash does not match a wildcard under FNM_FILE_NAME. */
214 return FNM_NOMATCH;
215 else if (c == '?')
216 {
217 /* A ? needs to match one character. */
218 if (*n == '\0')
219 /* There isn't another character; no match. */
220 return FNM_NOMATCH;
221 else
222 /* One character of the string is consumed in matching
223 this ? wildcard, so *??? won't match if there are
224 less than three characters. */
225 ++n;
226 }
227 }
228
229 if (c == '\0')
230 /* The wildcard(s) is/are the last element of the pattern.
231 If the name is a file name and contains another slash
232 this does mean it cannot match. */
233 return ((flags & FNM_FILE_NAME) && strchr (n, '/') != NULL
234 ? FNM_NOMATCH : 0);
235 else
236 {
237 const char *endp;
238
239 endp = __strchrnul (n, (flags & FNM_FILE_NAME) ? '/' : '\0');
240
241 if (c == '[')
242 {
243 int flags2 = ((flags & FNM_FILE_NAME)
244 ? flags : (flags & ~FNM_PERIOD));
245
246 for (--p; n < endp; ++n)
247 if (internal_fnmatch (p, n,
248 (no_leading_period
249 && (n == string
250 || (n[-1] == '/'
251 && (flags
252 & FNM_FILE_NAME)))),
253 flags2)
254 == 0)
255 return 0;
256 }
257 else if (c == '/' && (flags & FNM_FILE_NAME))
258 {
259 while (*n != '\0' && *n != '/')
260 ++n;
261 if (*n == '/'
262 && (internal_fnmatch (p, n + 1, flags & FNM_PERIOD,
263 flags) == 0))
264 return 0;
265 }
266 else
267 {
268 int flags2 = ((flags & FNM_FILE_NAME)
269 ? flags : (flags & ~FNM_PERIOD));
270
271 if (c == '\\' && !(flags & FNM_NOESCAPE))
272 c = *p;
273 c = FOLD (c);
274 for (--p; n < endp; ++n)
275 if (FOLD ((unsigned char) *n) == c
276 && (internal_fnmatch (p, n,
277 (no_leading_period
278 && (n == string
279 || (n[-1] == '/'
280 && (flags
281 & FNM_FILE_NAME)))),
282 flags2) == 0))
283 return 0;
284 }
285 }
286
287 /* If we come here no match is possible with the wildcard. */
288 return FNM_NOMATCH;
289
290 case '[':
291 {
292 /* Nonzero if the sense of the character class is inverted. */
293 static int posixly_correct;
294 register int not;
295 char cold;
296
297 if (posixly_correct == 0)
298 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
299
300 if (*n == '\0')
301 return FNM_NOMATCH;
302
303 if (*n == '.' && no_leading_period && (n == string
304 || (n[-1] == '/'
305 && (flags
306 & FNM_FILE_NAME))))
307 return FNM_NOMATCH;
308
309 if (*n == '/' && (flags & FNM_FILE_NAME))
310 /* `/' cannot be matched. */
311 return FNM_NOMATCH;
312
313 not = (*p == '!' || (posixly_correct < 0 && *p == '^'));
314 if (not)
315 ++p;
316
317 c = *p++;
318 for (;;)
319 {
320 unsigned char fn = FOLD ((unsigned char) *n);
321
322 if (!(flags & FNM_NOESCAPE) && c == '\\')
323 {
324 if (*p == '\0')
325 return FNM_NOMATCH;
326 c = FOLD ((unsigned char) *p);
327 ++p;
328
329 if (c == fn)
330 goto matched;
331 }
332 else if (c == '[' && *p == ':')
333 {
334 /* Leave room for the null. */
335 char str[CHAR_CLASS_MAX_LENGTH + 1];
336 size_t c1 = 0;
337# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
338 wctype_t wt;
339# endif
340 const char *startp = p;
341
342 for (;;)
343 {
344 if (c1 == CHAR_CLASS_MAX_LENGTH)
345 /* The name is too long and therefore the pattern
346 is ill-formed. */
347 return FNM_NOMATCH;
348
349 c = *++p;
350 if (c == ':' && p[1] == ']')
351 {
352 p += 2;
353 break;
354 }
355 if (c < 'a' || c >= 'z')
356 {
357 /* This cannot possibly be a character class name.
358 Match it as a normal range. */
359 p = startp;
360 c = '[';
361 goto normal_bracket;
362 }
363 str[c1++] = c;
364 }
365 str[c1] = '\0';
366
367# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
368 wt = IS_CHAR_CLASS (str);
369 if (wt == 0)
370 /* Invalid character class name. */
371 return FNM_NOMATCH;
372
373 if (__iswctype (__btowc ((unsigned char) *n), wt))
374 goto matched;
375# else
376 switch (match_class(str)) {
377 case CCLASS_ALNUM:
378 if (ISALNUM ((unsigned char) *n))
379 goto matched;
380 break;
381 case CCLASS_ALPHA:
382 if (ISALPHA ((unsigned char) *n))
383 goto matched;
384 break;
385 case CCLASS_BLANK:
386 if (ISBLANK ((unsigned char) *n))
387 goto matched;
388 break;
389 case CCLASS_CNTRL:
390 if (ISCNTRL ((unsigned char) *n))
391 goto matched;
392 break;
393 case CCLASS_DIGIT:
394 if (ISDIGIT ((unsigned char) *n))
395 goto matched;
396 break;
397 case CCLASS_GRAPH:
398 if (ISGRAPH ((unsigned char) *n))
399 goto matched;
400 break;
401 case CCLASS_LOWER:
402 if (ISLOWER ((unsigned char) *n))
403 goto matched;
404 break;
405 case CCLASS_PRINT:
406 if (ISPRINT ((unsigned char) *n))
407 goto matched;
408 break;
409 case CCLASS_PUNCT:
410 if (ISPUNCT ((unsigned char) *n))
411 goto matched;
412 break;
413 case CCLASS_SPACE:
414 if (ISSPACE ((unsigned char) *n))
415 goto matched;
416 break;
417 case CCLASS_UPPER:
418 if (ISUPPER ((unsigned char) *n))
419 goto matched;
420 break;
421 case CCLASS_XDIGIT:
422 if (ISXDIGIT ((unsigned char) *n))
423 goto matched;
424 break;
425 }
426 c = *p++;
427# endif
428 }
429 else if (c == '\0')
430 /* [ (unterminated) loses. */
431 return FNM_NOMATCH;
432 else
433 {
434 normal_bracket:
435 if (FOLD (c) == fn)
436 goto matched;
437
438 cold = c;
439 c = *p++;
440
441 if (c == '-' && *p != ']')
442 {
443 /* It is a range. */
444 unsigned char cend = *p++;
445 if (!(flags & FNM_NOESCAPE) && cend == '\\')
446 cend = *p++;
447 if (cend == '\0')
448 return FNM_NOMATCH;
449
450 if (cold <= fn && fn <= FOLD (cend))
451 goto matched;
452
453 c = *p++;
454 }
455 }
456
457 if (c == ']')
458 break;
459 }
460
461 if (!not)
462 return FNM_NOMATCH;
463 break;
464
465 matched:
466 /* Skip the rest of the [...] that already matched. */
467 do
468 {
469 c = *p++;
470
471 if (c == '\0')
472 /* [... (unterminated) loses. */
473 return FNM_NOMATCH;
474
475 if (!(flags & FNM_NOESCAPE) && c == '\\')
476 {
477 if (*p == '\0')
478 return FNM_NOMATCH;
479 /* XXX 1003.2d11 is unclear if this is right. */
480 ++p;
481 }
482 else if (c == '[' && *p == ':')
483 {
484 do
485 if (*++p == '\0')
486 return FNM_NOMATCH;
487 while (*p != ':' || p[1] == ']');
488 p += 2;
489 c = *p;
490 }
491 }
492 while (c != ']');
493 if (not)
494 return FNM_NOMATCH;
495 }
496 break;
497
498 default:
499 if (c != FOLD ((unsigned char) *n))
500 return FNM_NOMATCH;
501 }
502
503 ++n;
504 }
505
506 if (*n == '\0')
507 return 0;
508
509 if ((flags & FNM_LEADING_DIR) && *n == '/')
510 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
511 return 0;
512
513 return FNM_NOMATCH;
514
515# undef FOLD
516}
517
518
519int
520fnmatch (const char *pattern, const char *string, int flags)
521{
522 return internal_fnmatch (pattern, string, flags & FNM_PERIOD, flags);
523}
524
525#endif /* _LIBC or not __GNU_LIBRARY__. */
diff --git a/win32/fnmatch.h b/win32/fnmatch.h
new file mode 100644
index 000000000..cc3ec3794
--- /dev/null
+++ b/win32/fnmatch.h
@@ -0,0 +1,84 @@
1/* Copyright (C) 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
13
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
18
19#ifndef _FNMATCH_H
20#define _FNMATCH_H 1
21
22#ifdef __cplusplus
23extern "C" {
24#endif
25
26#if defined __cplusplus || (defined __STDC__ && __STDC__) || defined WINDOWS32
27# if !defined __GLIBC__ || !defined __P
28# undef __P
29# define __P(protos) protos
30# endif
31#else /* Not C++ or ANSI C. */
32# undef __P
33# define __P(protos) ()
34/* We can get away without defining `const' here only because in this file
35 it is used only inside the prototype for `fnmatch', which is elided in
36 non-ANSI C where `const' is problematical. */
37#endif /* C++ or ANSI C. */
38
39#ifndef const
40# if (defined __STDC__ && __STDC__) || defined __cplusplus
41# define __const const
42# else
43# define __const
44# endif
45#endif
46
47/* We #undef these before defining them because some losing systems
48 (HP-UX A.08.07 for example) define these in <unistd.h>. */
49#undef FNM_PATHNAME
50#undef FNM_NOESCAPE
51#undef FNM_PERIOD
52
53/* Bits set in the FLAGS argument to `fnmatch'. */
54#define FNM_PATHNAME (1 << 0) /* No wildcard can ever match `/'. */
55#define FNM_NOESCAPE (1 << 1) /* Backslashes don't quote special chars. */
56#define FNM_PERIOD (1 << 2) /* Leading `.' is matched only explicitly. */
57
58#if !defined _POSIX_C_SOURCE || _POSIX_C_SOURCE < 2 || defined _GNU_SOURCE
59# define FNM_FILE_NAME FNM_PATHNAME /* Preferred GNU name. */
60# define FNM_LEADING_DIR (1 << 3) /* Ignore `/...' after a match. */
61# define FNM_CASEFOLD (1 << 4) /* Compare without regard to case. */
62#endif
63
64/* Value returned by `fnmatch' if STRING does not match PATTERN. */
65#define FNM_NOMATCH 1
66
67/* This value is returned if the implementation does not support
68 `fnmatch'. Since this is not the case here it will never be
69 returned but the conformance test suites still require the symbol
70 to be defined. */
71#ifdef _XOPEN_SOURCE
72# define FNM_NOSYS (-1)
73#endif
74
75/* Match NAME against the filename pattern PATTERN,
76 returning zero if it matches, FNM_NOMATCH if not. */
77extern int fnmatch __P ((__const char *__pattern, __const char *__name,
78 int __flags));
79
80#ifdef __cplusplus
81}
82#endif
83
84#endif /* fnmatch.h */
diff --git a/win32/fsync.c b/win32/fsync.c
new file mode 100644
index 000000000..6ab44d434
--- /dev/null
+++ b/win32/fsync.c
@@ -0,0 +1,75 @@
1/* Emulate fsync on platforms that lack it, primarily Windows and
2 cross-compilers like MinGW.
3
4 This is derived from sqlite3 sources.
5 https://www.sqlite.org/src/finfo?name=src/os_win.c
6 https://www.sqlite.org/copyright.html
7
8 Written by Richard W.M. Jones <rjones.at.redhat.com>
9
10 Copyright (C) 2008-2018 Free Software Foundation, Inc.
11
12 This library is free software; you can redistribute it and/or
13 modify it under the terms of the GNU Lesser General Public
14 License as published by the Free Software Foundation; either
15 version 2.1 of the License, or (at your option) any later version.
16
17 This library is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 Lesser General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <https://www.gnu.org/licenses/>. */
24
25#include "libbb.h"
26#include <unistd.h>
27
28/* FlushFileBuffers */
29# define WIN32_LEAN_AND_MEAN
30# include <windows.h>
31
32# include <errno.h>
33
34/* Get _get_osfhandle. */
35# include <io.h>
36
37int
38fsync (int fd)
39{
40 HANDLE h = (HANDLE) _get_osfhandle (fd);
41 DWORD err;
42
43 if (h == INVALID_HANDLE_VALUE)
44 {
45 errno = EBADF;
46 return -1;
47 }
48
49 if (!FlushFileBuffers (h))
50 {
51 /* Translate some Windows errors into rough approximations of Unix
52 * errors. MSDN is useless as usual - in this case it doesn't
53 * document the full range of errors.
54 */
55 err = GetLastError ();
56 switch (err)
57 {
58 case ERROR_ACCESS_DENIED:
59 /* For a read-only handle, fsync should succeed, even though we have
60 no way to sync the access-time changes. */
61 return 0;
62
63 /* eg. Trying to fsync a tty. */
64 case ERROR_INVALID_HANDLE:
65 errno = EINVAL;
66 break;
67
68 default:
69 errno = EIO;
70 }
71 return -1;
72 }
73
74 return 0;
75}
diff --git a/win32/glob.c b/win32/glob.c
new file mode 100644
index 000000000..1cc6483e7
--- /dev/null
+++ b/win32/glob.c
@@ -0,0 +1,343 @@
1/*
2 glob from musl (https://www.musl-libc.org/).
3
4 MIT licensed:
5
6----------------------------------------------------------------------
7Copyright © 2005-2020 Rich Felker, et al.
8
9Permission is hereby granted, free of charge, to any person obtaining
10a copy of this software and associated documentation files (the
11"Software"), to deal in the Software without restriction, including
12without limitation the rights to use, copy, modify, merge, publish,
13distribute, sublicense, and/or sell copies of the Software, and to
14permit persons to whom the Software is furnished to do so, subject to
15the following conditions:
16
17The above copyright notice and this permission notice shall be
18included in all copies or substantial portions of the Software.
19
20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27----------------------------------------------------------------------
28*/
29#include "libbb.h"
30#include <glob.h>
31#include <fnmatch.h>
32
33struct match
34{
35 struct match *next;
36 char name[];
37};
38
39static int append(struct match **tail, const char *name, size_t len, int mark)
40{
41 struct match *new = malloc(sizeof(struct match) + len + 2);
42 if (!new) return -1;
43 (*tail)->next = new;
44 new->next = NULL;
45 memcpy(new->name, name, len+1);
46 if (mark && len && name[len-1]!='/') {
47 new->name[len] = '/';
48 new->name[len+1] = 0;
49 }
50 *tail = new;
51 return 0;
52}
53
54static int do_glob(char *buf, size_t pos, int type, char *pat, int flags, int (*errfunc)(const char *path, int err), struct match **tail)
55{
56 ptrdiff_t i, j;
57 int in_bracket, overflow;
58 char *p2, saved_sep;
59 int readerr, old_errno;
60 DIR *dir;
61 struct dirent *de;
62
63 /* If GLOB_MARK is unused, we don't care about type. */
64 if (!type && !(flags & GLOB_MARK)) type = DT_REG;
65
66 /* Special-case the remaining pattern being all slashes, in
67 * which case we can use caller-passed type if it's a dir. */
68 if (*pat && type!=DT_DIR) type = 0;
69 while (pos+1 < PATH_MAX && *pat=='/') buf[pos++] = *pat++;
70
71 /* Consume maximal [escaped-]literal prefix of pattern, copying
72 * and un-escaping it to the running buffer as we go. */
73 i=0; j=0;
74 in_bracket = 0; overflow = 0;
75 for (; pat[i]!='*' && pat[i]!='?' && (!in_bracket || pat[i]!=']'); i++) {
76 if (!pat[i]) {
77 if (overflow) return 0;
78 pat += i;
79 pos += j;
80 i = j = 0;
81 break;
82 } else if (pat[i] == '[') {
83 in_bracket = 1;
84 } else if (pat[i] == '\\' && !(flags & GLOB_NOESCAPE)) {
85 /* Backslashes inside a bracket are (at least by
86 * our interpretation) non-special, so if next
87 * char is ']' we have a complete expression. */
88 if (in_bracket && pat[i+1]==']') break;
89 /* Unpaired final backslash never matches. */
90 if (!pat[i+1]) return 0;
91 i++;
92 }
93 if (pat[i] == '/') {
94 if (overflow) return 0;
95 in_bracket = 0;
96 pat += i+1;
97 i = -1;
98 pos += j+1;
99 j = -1;
100 }
101 /* Only store a character if it fits in the buffer, but if
102 * a potential bracket expression is open, the overflow
103 * must be remembered and handled later only if the bracket
104 * is unterminated (and thereby a literal), so as not to
105 * disallow long bracket expressions with short matches. */
106 if (pos+(j+1) < PATH_MAX) {
107 buf[pos+j++] = pat[i];
108 } else if (in_bracket) {
109 overflow = 1;
110 } else {
111 return 0;
112 }
113 /* If we consume any new components, the caller-passed type
114 * or dummy type from above is no longer valid. */
115 type = 0;
116 }
117 buf[pos] = 0;
118 if (!*pat) {
119 /* If we consumed any components above, or if GLOB_MARK is
120 * requested and we don't yet know if the match is a dir,
121 * we must confirm the file exists and/or determine its type.
122 *
123 * If marking dirs, symlink type is inconclusive; we need the
124 * type for the symlink target, and therefore must try stat
125 * first unless type is known not to be a symlink. Otherwise,
126 * or if that fails, use lstat for determining existence to
127 * avoid false negatives in the case of broken symlinks. */
128 struct stat st;
129 if ((flags & GLOB_MARK) && (!type||type==DT_LNK) && !stat(buf, &st)) {
130 if (S_ISDIR(st.st_mode)) type = DT_DIR;
131 else type = DT_REG;
132 }
133 if (!type && lstat(buf, &st)) {
134 if (errno!=ENOENT && (errfunc(buf, errno) || (flags & GLOB_ERR)))
135 return GLOB_ABORTED;
136 return 0;
137 }
138 if (append(tail, buf, pos, (flags & GLOB_MARK) && type==DT_DIR))
139 return GLOB_NOSPACE;
140 return 0;
141 }
142 p2 = strchr(pat, '/');
143 saved_sep = '/';
144 /* Check if the '/' was escaped and, if so, remove the escape char
145 * so that it will not be unpaired when passed to fnmatch. */
146 if (p2 && !(flags & GLOB_NOESCAPE)) {
147 char *p;
148 for (p=p2; p>pat && p[-1]=='\\'; p--);
149 if ((p2-p)%2) {
150 p2--;
151 saved_sep = '\\';
152 }
153 }
154 dir = opendir(pos ? buf : ".");
155 if (!dir) {
156 if (errfunc(buf, errno) || (flags & GLOB_ERR))
157 return GLOB_ABORTED;
158 return 0;
159 }
160 old_errno = errno;
161 while (errno=0, de=readdir(dir)) {
162 size_t l;
163 int fnm_flags, r;
164
165 /* Quickly skip non-directories when there's pattern left. */
166 if (p2 && de->d_type && de->d_type!=DT_DIR && de->d_type!=DT_LNK)
167 continue;
168
169 l = strlen(de->d_name);
170 if (l >= PATH_MAX-pos) continue;
171
172 if (p2) *p2 = 0;
173
174 fnm_flags= ((flags & GLOB_NOESCAPE) ? FNM_NOESCAPE : 0)
175 | ((!(flags & GLOB_PERIOD)) ? FNM_PERIOD : 0);
176
177 if (fnmatch(pat, de->d_name, fnm_flags))
178 continue;
179
180 /* With GLOB_PERIOD, don't allow matching . or .. unless
181 * fnmatch would match them with FNM_PERIOD rules in effect. */
182 if (p2 && (flags & GLOB_PERIOD) && de->d_name[0]=='.'
183 && (!de->d_name[1] || (de->d_name[1]=='.' && !de->d_name[2]))
184 && fnmatch(pat, de->d_name, fnm_flags | FNM_PERIOD))
185 continue;
186
187 memcpy(buf+pos, de->d_name, l+1);
188 if (p2) *p2 = saved_sep;
189 r = do_glob(buf, pos+l, de->d_type, p2 ? p2 : (char *)"", flags, errfunc, tail);
190 if (r) {
191 closedir(dir);
192 return r;
193 }
194 }
195 readerr = errno;
196 if (p2) *p2 = saved_sep;
197 closedir(dir);
198 if (readerr && (errfunc(buf, errno) || (flags & GLOB_ERR)))
199 return GLOB_ABORTED;
200 errno = old_errno;
201 return 0;
202}
203
204static int ignore_err(const char *path UNUSED_PARAM, int err UNUSED_PARAM)
205{
206 return 0;
207}
208
209static void freelist(struct match *head)
210{
211 struct match *match, *next;
212 for (match=head->next; match; match=next) {
213 next = match->next;
214 free(match);
215 }
216}
217
218#if !ENABLE_PLATFORM_MINGW32
219static int sort(const void *a, const void *b)
220{
221 return strcmp(*(const char **)a, *(const char **)b);
222}
223
224static int expand_tilde(char **pat, char *buf, size_t *pos)
225{
226 char *p = *pat + 1;
227 size_t i = 0;
228
229 char delim, *name_end = __strchrnul(p, '/');
230 if ((delim = *name_end)) *name_end++ = 0;
231 *pat = name_end;
232
233 char *home = *p ? NULL : getenv("HOME");
234 if (!home) {
235 struct passwd pw, *res;
236 switch (*p ? getpwnam_r(p, &pw, buf, PATH_MAX, &res)
237 : getpwuid_r(getuid(), &pw, buf, PATH_MAX, &res)) {
238 case ENOMEM:
239 return GLOB_NOSPACE;
240 case 0:
241 if (!res)
242 default:
243 return GLOB_NOMATCH;
244 }
245 home = pw.pw_dir;
246 }
247 while (i < PATH_MAX - 2 && *home)
248 buf[i++] = *home++;
249 if (*home)
250 return GLOB_NOMATCH;
251 if ((buf[i] = delim))
252 buf[++i] = 0;
253 *pos = i;
254 return 0;
255}
256#endif
257
258int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, int err), glob_t *restrict g)
259{
260 struct match head = { .next = NULL }, *tail = &head;
261 size_t cnt, i;
262 size_t offs = (flags & GLOB_DOOFFS) ? g->gl_offs : 0;
263 int error = 0;
264 char buf[PATH_MAX];
265
266 if (!errfunc) errfunc = ignore_err;
267
268 if (!(flags & GLOB_APPEND)) {
269 g->gl_offs = offs;
270 g->gl_pathc = 0;
271 g->gl_pathv = NULL;
272 }
273
274 if (*pat) {
275 char *p = strdup(pat);
276 size_t pos = 0;
277 char *s = p;
278 if (!p) return GLOB_NOSPACE;
279 buf[0] = 0;
280#if !ENABLE_PLATFORM_MINGW32
281 if ((flags & (GLOB_TILDE | GLOB_TILDE_CHECK)) && *p == '~')
282 error = expand_tilde(&s, buf, &pos);
283 if (!error)
284#endif
285 error = do_glob(buf, pos, 0, s, flags, errfunc, &tail);
286 free(p);
287 }
288
289 if (error == GLOB_NOSPACE) {
290 freelist(&head);
291 return error;
292 }
293
294 for (cnt=0, tail=head.next; tail; tail=tail->next, cnt++);
295 if (!cnt) {
296 if (flags & GLOB_NOCHECK) {
297 tail = &head;
298 if (append(&tail, pat, strlen(pat), 0))
299 return GLOB_NOSPACE;
300 cnt++;
301 } else
302 return GLOB_NOMATCH;
303 }
304
305 if (flags & GLOB_APPEND) {
306 char **pathv = realloc(g->gl_pathv, (offs + g->gl_pathc + cnt + 1) * sizeof(char *));
307 if (!pathv) {
308 freelist(&head);
309 return GLOB_NOSPACE;
310 }
311 g->gl_pathv = pathv;
312 offs += g->gl_pathc;
313 } else {
314 g->gl_pathv = malloc((offs + cnt + 1) * sizeof(char *));
315 if (!g->gl_pathv) {
316 freelist(&head);
317 return GLOB_NOSPACE;
318 }
319 for (i=0; i<offs; i++)
320 g->gl_pathv[i] = NULL;
321 }
322 for (i=0, tail=head.next; i<cnt; tail=tail->next, i++)
323 g->gl_pathv[offs + i] = tail->name;
324 g->gl_pathv[offs + i] = NULL;
325 g->gl_pathc += cnt;
326
327#if !ENABLE_PLATFORM_MINGW32
328 if (!(flags & GLOB_NOSORT))
329 qsort(g->gl_pathv+offs, cnt, sizeof(char *), sort);
330#endif
331
332 return error;
333}
334
335void globfree(glob_t *g)
336{
337 size_t i;
338 for (i=0; i<g->gl_pathc; i++)
339 free(g->gl_pathv[g->gl_offs + i] - offsetof(struct match, name));
340 free(g->gl_pathv);
341 g->gl_pathc = 0;
342 g->gl_pathv = NULL;
343}
diff --git a/win32/glob.h b/win32/glob.h
new file mode 100644
index 000000000..a8141b8bf
--- /dev/null
+++ b/win32/glob.h
@@ -0,0 +1,89 @@
1/*
2 glob from musl (https://www.musl-libc.org/).
3
4 MIT licensed:
5
6----------------------------------------------------------------------
7Copyright © 2005-2020 Rich Felker, et al.
8
9Permission is hereby granted, free of charge, to any person obtaining
10a copy of this software and associated documentation files (the
11"Software"), to deal in the Software without restriction, including
12without limitation the rights to use, copy, modify, merge, publish,
13distribute, sublicense, and/or sell copies of the Software, and to
14permit persons to whom the Software is furnished to do so, subject to
15the following conditions:
16
17The above copyright notice and this permission notice shall be
18included in all copies or substantial portions of the Software.
19
20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27----------------------------------------------------------------------
28*/
29#ifndef _GLOB_H
30#define _GLOB_H
31
32#ifdef __cplusplus
33extern "C" {
34#endif
35
36typedef struct {
37 size_t gl_pathc;
38 char **gl_pathv;
39 size_t gl_offs;
40 int __dummy1;
41 void *__dummy2[5];
42} glob_t;
43
44int glob(const char *__restrict, int, int (*)(const char *, int), glob_t *__restrict);
45void globfree(glob_t *);
46
47#if ENABLE_PLATFORM_MINGW32
48// Set some flags to zero so the compiler can exclude unused code.
49#define GLOB_ERR 0
50#define GLOB_MARK 0
51#define GLOB_NOSORT 0x04
52#define GLOB_DOOFFS 0
53#define GLOB_NOCHECK 0x10
54#define GLOB_APPEND 0
55#define GLOB_NOESCAPE 0x40
56#define GLOB_PERIOD 0
57
58#define GLOB_TILDE 0
59#define GLOB_TILDE_CHECK 0
60#else
61#define GLOB_ERR 0x01
62#define GLOB_MARK 0x02
63#define GLOB_NOSORT 0x04
64#define GLOB_DOOFFS 0x08
65#define GLOB_NOCHECK 0x10
66#define GLOB_APPEND 0x20
67#define GLOB_NOESCAPE 0x40
68#define GLOB_PERIOD 0x80
69
70#define GLOB_TILDE 0x1000
71#define GLOB_TILDE_CHECK 0x4000
72#endif
73
74#define GLOB_NOSPACE 1
75#define GLOB_ABORTED 2
76#define GLOB_NOMATCH 3
77#define GLOB_NOSYS 4
78
79#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
80#define glob64 glob
81#define globfree64 globfree
82#define glob64_t glob_t
83#endif
84
85#ifdef __cplusplus
86}
87#endif
88
89#endif
diff --git a/win32/grp.h b/win32/grp.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/grp.h
diff --git a/win32/inet_pton.c b/win32/inet_pton.c
new file mode 100644
index 000000000..f229a9355
--- /dev/null
+++ b/win32/inet_pton.c
@@ -0,0 +1,95 @@
1/*
2 inet_pton from musl (https://www.musl-libc.org/).
3
4 MIT licensed:
5
6----------------------------------------------------------------------
7Copyright © 2005-2020 Rich Felker, et al.
8
9Permission is hereby granted, free of charge, to any person obtaining
10a copy of this software and associated documentation files (the
11"Software"), to deal in the Software without restriction, including
12without limitation the rights to use, copy, modify, merge, publish,
13distribute, sublicense, and/or sell copies of the Software, and to
14permit persons to whom the Software is furnished to do so, subject to
15the following conditions:
16
17The above copyright notice and this permission notice shall be
18included in all copies or substantial portions of the Software.
19
20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27----------------------------------------------------------------------
28*/
29#include "libbb.h"
30
31static int hexval(unsigned c)
32{
33 if (c-'0'<10) return c-'0';
34 c |= 32;
35 if (c-'a'<6) return c-'a'+10;
36 return -1;
37}
38
39int inet_pton(int af, const char *restrict s, void *restrict a0)
40{
41 uint16_t ip[8];
42 unsigned char *a = a0;
43 int i, j, v, d, brk=-1, need_v4=0;
44
45 if (af==AF_INET) {
46 for (i=0; i<4; i++) {
47 for (v=j=0; j<3 && isdigit(s[j]); j++)
48 v = 10*v + s[j]-'0';
49 if (j==0 || (j>1 && s[0]=='0') || v>255) return 0;
50 a[i] = v;
51 if (s[j]==0 && i==3) return 1;
52 if (s[j]!='.') return 0;
53 s += j+1;
54 }
55 return 0;
56 } else if (af!=AF_INET6) {
57 errno = EAFNOSUPPORT;
58 return -1;
59 }
60
61 if (*s==':' && *++s!=':') return 0;
62
63 for (i=0; ; i++) {
64 if (s[0]==':' && brk<0) {
65 brk=i;
66 ip[i&7]=0;
67 if (!*++s) break;
68 if (i==7) return 0;
69 continue;
70 }
71 for (v=j=0; j<4 && (d=hexval(s[j]))>=0; j++)
72 v=16*v+d;
73 if (j==0) return 0;
74 ip[i&7] = v;
75 if (!s[j] && (brk>=0 || i==7)) break;
76 if (i==7) return 0;
77 if (s[j]!=':') {
78 if (s[j]!='.' || (i<6 && brk<0)) return 0;
79 need_v4=1;
80 i++;
81 break;
82 }
83 s += j+1;
84 }
85 if (brk>=0) {
86 memmove(ip+brk+7-i, ip+brk, 2*(i+1-brk));
87 for (j=0; j<7-i; j++) ip[brk+j] = 0;
88 }
89 for (j=0; j<8; j++) {
90 *a++ = ip[j]>>8;
91 *a++ = ip[j];
92 }
93 if (need_v4 && inet_pton(AF_INET, (void *)s, a-4) <= 0) return 0;
94 return 1;
95}
diff --git a/win32/ioctl.c b/win32/ioctl.c
new file mode 100644
index 000000000..93f9f504d
--- /dev/null
+++ b/win32/ioctl.c
@@ -0,0 +1,46 @@
1#include "libbb.h"
2
3static int mingw_get_terminal_width_height(struct winsize *win)
4{
5 int fd;
6 HANDLE handle;
7 CONSOLE_SCREEN_BUFFER_INFO sbi;
8
9 win->ws_row = 0;
10 win->ws_col = 0;
11
12 for (fd=STDOUT_FILENO; fd<=STDERR_FILENO; ++fd) {
13 handle = (HANDLE)_get_osfhandle(fd);
14 if (handle != INVALID_HANDLE_VALUE &&
15 GetConsoleScreenBufferInfo(handle, &sbi) != 0) {
16 win->ws_row = sbi.srWindow.Bottom - sbi.srWindow.Top + 1;
17 win->ws_col = sbi.srWindow.Right - sbi.srWindow.Left + 1;
18 return 0;
19 }
20 }
21
22 return -1;
23}
24
25int ioctl(int fd UNUSED_PARAM, int code, ...)
26{
27 va_list ap;
28 void *arg;
29 int ret = -1;
30
31 va_start(ap, code);
32
33 switch (code) {
34 case TIOCGWINSZ:
35 arg = va_arg(ap, void *);
36 ret = mingw_get_terminal_width_height((struct winsize *)arg);
37 break;
38 default:
39 ret = -1;
40 errno = EINVAL;
41 break;
42 }
43
44 va_end(ap);
45 return ret;
46}
diff --git a/win32/isaac.c b/win32/isaac.c
new file mode 100644
index 000000000..19b96de94
--- /dev/null
+++ b/win32/isaac.c
@@ -0,0 +1,192 @@
1/*
2------------------------------------------------------------------------------
3readable.c: My random number generator, ISAAC.
4(c) Bob Jenkins, March 1996, Public Domain
5You may use this code in any way you wish, and it is free. No warrantee.
6* May 2008 -- made it not depend on standard.h
7------------------------------------------------------------------------------
8
9 The original version of this file was downloaded from Bob Jenkins website:
10
11 http://burtleburtle.net/bob/rand/isaacafa.html
12
13 The isaac and randinit functions have been slightly modified to silence
14 warnings in modern compilers; the get_entropy and get_random_bytes have
15 been added.
16
17 These changes were made by R M Yorston and are also dedicated to the
18 public domain.
19*/
20#include "libbb.h"
21#include <ntsecapi.h>
22
23typedef struct {
24 /* external results */
25 uint32_t randrsl[256];
26
27 /* internal state */
28 uint32_t mm[256];
29 uint32_t aa, bb, cc;
30} isaac_t;
31
32
33static void isaac(isaac_t *t)
34{
35 register uint32_t i,x,y;
36
37 t->cc = t->cc + 1; /* cc just gets incremented once per 256 results */
38 t->bb = t->bb + t->cc; /* then combined with bb */
39
40 for (i=0; i<256; ++i)
41 {
42 x = t->mm[i];
43 switch (i%4)
44 {
45 case 0: t->aa = t->aa^(t->aa<<13); break;
46 case 1: t->aa = t->aa^(t->aa>>6); break;
47 case 2: t->aa = t->aa^(t->aa<<2); break;
48 case 3: t->aa = t->aa^(t->aa>>16); break;
49 }
50 t->aa = t->mm[(i+128)%256] + t->aa;
51 t->mm[i] = y = t->mm[(x>>2)%256] + t->aa + t->bb;
52 t->randrsl[i] = t->bb = t->mm[(y>>10)%256] + x;
53
54 /* Note that bits 2..9 are chosen from x but 10..17 are chosen
55 from y. The only important thing here is that 2..9 and 10..17
56 don't overlap. 2..9 and 10..17 were then chosen for speed in
57 the optimized version (rand.c) */
58 /* See http://burtleburtle.net/bob/rand/isaac.html
59 for further explanations and analysis. */
60 }
61}
62
63
64/* if (flag!=0), then use the contents of randrsl[] to initialize mm[]. */
65#define mix(a,b,c,d,e,f,g,h) \
66{ \
67 a^=b<<11; d+=a; b+=c; \
68 b^=c>>2; e+=b; c+=d; \
69 c^=d<<8; f+=c; d+=e; \
70 d^=e>>16; g+=d; e+=f; \
71 e^=f<<10; h+=e; f+=g; \
72 f^=g>>4; a+=f; g+=h; \
73 g^=h<<8; b+=g; h+=a; \
74 h^=a>>9; c+=h; a+=b; \
75}
76
77static void randinit(isaac_t *t, int flag)
78{
79 int i;
80 uint32_t a,b,c,d,e,f,g,h;
81 t->aa = t->bb = t->cc = 0;
82 a=b=c=d=e=f=g=h=0x9e3779b9; /* the golden ratio */
83
84 for (i=0; i<4; ++i) /* scramble it */
85 {
86 mix(a,b,c,d,e,f,g,h);
87 }
88
89 for (i=0; i<256; i+=8) /* fill in mm[] with messy stuff */
90 {
91 if (flag) /* use all the information in the seed */
92 {
93 a+=t->randrsl[i ]; b+=t->randrsl[i+1]; c+=t->randrsl[i+2];
94 d+=t->randrsl[i+3]; e+=t->randrsl[i+4]; f+=t->randrsl[i+5];
95 g+=t->randrsl[i+6]; h+=t->randrsl[i+7];
96 }
97 mix(a,b,c,d,e,f,g,h);
98 t->mm[i ]=a; t->mm[i+1]=b; t->mm[i+2]=c; t->mm[i+3]=d;
99 t->mm[i+4]=e; t->mm[i+5]=f; t->mm[i+6]=g; t->mm[i+7]=h;
100 }
101
102 if (flag)
103 { /* do a second pass to make all of the seed affect all of mm */
104 for (i=0; i<256; i+=8)
105 {
106 a+=t->mm[i ]; b+=t->mm[i+1]; c+=t->mm[i+2]; d+=t->mm[i+3];
107 e+=t->mm[i+4]; f+=t->mm[i+5]; g+=t->mm[i+6]; h+=t->mm[i+7];
108 mix(a,b,c,d,e,f,g,h);
109 t->mm[i ]=a; t->mm[i+1]=b; t->mm[i+2]=c; t->mm[i+3]=d;
110 t->mm[i+4]=e; t->mm[i+5]=f; t->mm[i+6]=g; t->mm[i+7]=h;
111 }
112 }
113
114 isaac(t); /* fill in the first set of results */
115}
116
117/*
118 * Stuff a few bytes of random-ish data into the generator state.
119 * This is unlikely to be very robust: don't rely on it for
120 * anything that needs to be secure.
121 */
122static void get_entropy(isaac_t *t)
123{
124 if (!RtlGenRandom(t->randrsl, sizeof(uint32_t)*256))
125 GetSystemTimeAsFileTime((FILETIME *)t->randrsl);
126
127#if 0
128 {
129 unsigned char *p = (unsigned char *)t->randrsl;
130 int j;
131
132 for (j=0; j<256; ++j) {
133 fprintf(stderr, "%02x", p[j]);
134 if ((j&31) == 31) {
135 fprintf(stderr, "\n");
136 }
137 else if ((j&3) == 3) {
138 fprintf(stderr, " ");
139 }
140 }
141 fprintf(stderr, "\n");
142 }
143#endif
144}
145
146#define RAND_BYTES sizeof(t->randrsl)
147#define RAND_WORDS (sizeof(t->randrsl)/sizeof(t->randrsl[0]))
148
149/*
150 * Place 'count' random bytes in the buffer 'buf'. You're responsible
151 * for ensuring the buffer is big enough.
152 */
153ssize_t get_random_bytes(void *buf, ssize_t count)
154{
155 static isaac_t *t = NULL;
156 static int rand_index = 0;
157 ssize_t save_count = count;
158 unsigned char *ptr;
159
160 if (buf == NULL || count < 0) {
161 errno = EINVAL;
162 return -1;
163 }
164
165 if (!t) {
166 t = xzalloc(sizeof(isaac_t));
167
168 get_entropy(t);
169 randinit(t, 1);
170 isaac(t);
171 rand_index = 0;
172 }
173
174 ptr = (unsigned char *)t->randrsl;
175 while (count > 0) {
176 int bytes_left = RAND_BYTES - rand_index;
177 ssize_t delta = MIN(bytes_left, count);
178
179 memcpy(buf, ptr+rand_index, delta);
180 buf += delta;
181 count -= delta;
182 rand_index += delta;
183
184 if (rand_index >= RAND_BYTES) {
185 /* generate more */
186 isaac(t);
187 rand_index = 0;
188 }
189 }
190
191 return save_count;
192}
diff --git a/win32/lazyload.h b/win32/lazyload.h
new file mode 100644
index 000000000..034bc7e45
--- /dev/null
+++ b/win32/lazyload.h
@@ -0,0 +1,27 @@
1#ifndef LAZYLOAD_H
2#define LAZYLOAD_H
3
4/* simplify loading of DLL functions */
5
6struct proc_addr {
7 FARPROC pfunction;
8 unsigned initialized;
9};
10
11/* Declares a function to be loaded dynamically from a DLL. */
12#define DECLARE_PROC_ADDR(rettype, function, ...) \
13 static struct proc_addr proc_addr_##function = { NULL, 0 }; \
14 rettype (WINAPI *function)(__VA_ARGS__)
15
16/*
17 * Loads a function from a DLL (once-only).
18 * Returns non-NULL function pointer on success.
19 * Returns NULL and sets errno == ENOSYS on failure.
20 */
21#define INIT_PROC_ADDR(dll, function) \
22 (function = get_proc_addr(#dll, #function, &proc_addr_##function))
23
24void *get_proc_addr(const char *dll, const char *function,
25 struct proc_addr *proc);
26
27#endif
diff --git a/win32/match_class.c b/win32/match_class.c
new file mode 100644
index 000000000..789e0df02
--- /dev/null
+++ b/win32/match_class.c
@@ -0,0 +1,7 @@
1#include "libbb.h"
2#include "match_class.h"
3
4int match_class(const char *name)
5{
6 return index_in_strings(CHAR_CLASSES, name);
7}
diff --git a/win32/match_class.h b/win32/match_class.h
new file mode 100644
index 000000000..92fd1323f
--- /dev/null
+++ b/win32/match_class.h
@@ -0,0 +1,11 @@
1#define CHAR_CLASSES \
2 "alnum\0alpha\0blank\0cntrl\0digit\0graph\0" \
3 "lower\0print\0punct\0space\0upper\0xdigit\0"
4
5enum {
6 CCLASS_ALNUM, CCLASS_ALPHA, CCLASS_BLANK, CCLASS_CNTRL,
7 CCLASS_DIGIT, CCLASS_GRAPH, CCLASS_LOWER, CCLASS_PRINT,
8 CCLASS_PUNCT, CCLASS_SPACE, CCLASS_UPPER, CCLASS_XDIGIT
9};
10
11extern int match_class(const char *name);
diff --git a/win32/mingw.c b/win32/mingw.c
new file mode 100644
index 000000000..7a5198ccf
--- /dev/null
+++ b/win32/mingw.c
@@ -0,0 +1,2540 @@
1#include "libbb.h"
2#include <userenv.h>
3#include "lazyload.h"
4#if ENABLE_FEATURE_EXTRA_FILE_DATA
5#include <aclapi.h>
6#endif
7#include <ntdef.h>
8#include <psapi.h>
9
10#if defined(__MINGW64_VERSION_MAJOR)
11#if ENABLE_GLOBBING
12extern int _setargv(void);
13int _setargv(void)
14{
15 extern int _dowildcard;
16 char *glob;
17
18 _dowildcard = -1;
19 glob = getenv("BB_GLOBBING");
20 if (glob) {
21 if (strcmp(glob, "0") == 0)
22 _dowildcard = 0;
23 }
24 else {
25 setenv("BB_GLOBBING", "0", TRUE);
26 }
27 return 0;
28}
29#else
30int _dowildcard = 0;
31#endif
32
33#undef _fmode
34int _fmode = _O_BINARY;
35#endif
36
37#if !defined(__MINGW64_VERSION_MAJOR)
38#if ENABLE_GLOBBING
39int _CRT_glob = 1;
40#else
41int _CRT_glob = 0;
42#endif
43
44unsigned int _CRT_fmode = _O_BINARY;
45#endif
46
47smallint bb_got_signal;
48static mode_t current_umask = DEFAULT_UMASK;
49
50#pragma GCC optimize ("no-if-conversion")
51int err_win_to_posix(void)
52{
53 int error = ENOSYS;
54 switch(GetLastError()) {
55 case ERROR_ACCESS_DENIED: error = EACCES; break;
56 case ERROR_ACCOUNT_DISABLED: error = EACCES; break;
57 case ERROR_ACCOUNT_RESTRICTION: error = EACCES; break;
58 case ERROR_ALREADY_ASSIGNED: error = EBUSY; break;
59 case ERROR_ALREADY_EXISTS: error = EEXIST; break;
60 case ERROR_ARITHMETIC_OVERFLOW: error = ERANGE; break;
61 case ERROR_BAD_COMMAND: error = EIO; break;
62 case ERROR_BAD_DEVICE: error = ENODEV; break;
63 case ERROR_BAD_DRIVER_LEVEL: error = ENXIO; break;
64 case ERROR_BAD_EXE_FORMAT: error = ENOEXEC; break;
65 case ERROR_BAD_FORMAT: error = ENOEXEC; break;
66 case ERROR_BAD_LENGTH: error = EINVAL; break;
67 case ERROR_BAD_PATHNAME: error = ENOENT; break;
68 case ERROR_BAD_NET_NAME: error = ENOENT; break;
69 case ERROR_BAD_NETPATH: error = ENOENT; break;
70 case ERROR_BAD_PIPE: error = EPIPE; break;
71 case ERROR_BAD_UNIT: error = ENODEV; break;
72 case ERROR_BAD_USERNAME: error = EINVAL; break;
73 case ERROR_BROKEN_PIPE: error = EPIPE; break;
74 case ERROR_BUFFER_OVERFLOW: error = ENAMETOOLONG; break;
75 case ERROR_BUSY: error = EBUSY; break;
76 case ERROR_BUSY_DRIVE: error = EBUSY; break;
77 case ERROR_CALL_NOT_IMPLEMENTED: error = ENOSYS; break;
78 case ERROR_CANNOT_MAKE: error = EACCES; break;
79 case ERROR_CANTOPEN: error = EIO; break;
80 case ERROR_CANTREAD: error = EIO; break;
81 case ERROR_CANTWRITE: error = EIO; break;
82 case ERROR_CRC: error = EIO; break;
83 case ERROR_CURRENT_DIRECTORY: error = EACCES; break;
84 case ERROR_DEVICE_IN_USE: error = EBUSY; break;
85 case ERROR_DEV_NOT_EXIST: error = ENODEV; break;
86 case ERROR_DIRECTORY: error = EINVAL; break;
87 case ERROR_DIR_NOT_EMPTY: error = ENOTEMPTY; break;
88 case ERROR_DISK_CHANGE: error = EIO; break;
89 case ERROR_DISK_FULL: error = ENOSPC; break;
90 case ERROR_DRIVE_LOCKED: error = EBUSY; break;
91 case ERROR_ENVVAR_NOT_FOUND: error = EINVAL; break;
92 case ERROR_EXE_MARKED_INVALID: error = ENOEXEC; break;
93 case ERROR_FILENAME_EXCED_RANGE: error = ENAMETOOLONG; break;
94 case ERROR_FILE_EXISTS: error = EEXIST; break;
95 case ERROR_FILE_INVALID: error = ENODEV; break;
96 case ERROR_FILE_NOT_FOUND: error = ENOENT; break;
97 case ERROR_GEN_FAILURE: error = EIO; break;
98 case ERROR_HANDLE_DISK_FULL: error = ENOSPC; break;
99 case ERROR_INSUFFICIENT_BUFFER: error = ENOMEM; break;
100 case ERROR_INVALID_ACCESS: error = EACCES; break;
101 case ERROR_INVALID_ADDRESS: error = EFAULT; break;
102 case ERROR_INVALID_BLOCK: error = EFAULT; break;
103 case ERROR_INVALID_DATA: error = EINVAL; break;
104 case ERROR_INVALID_DRIVE: error = ENODEV; break;
105 case ERROR_INVALID_EXE_SIGNATURE: error = ENOEXEC; break;
106 case ERROR_INVALID_FLAGS: error = EINVAL; break;
107 case ERROR_INVALID_FUNCTION: error = ENOSYS; break;
108 case ERROR_INVALID_HANDLE: error = EBADF; break;
109 case ERROR_INVALID_LOGON_HOURS: error = EACCES; break;
110 case ERROR_INVALID_NAME: error = EINVAL; break;
111 case ERROR_INVALID_OWNER: error = EINVAL; break;
112 case ERROR_INVALID_PARAMETER: error = EINVAL; break;
113 case ERROR_INVALID_PASSWORD: error = EPERM; break;
114 case ERROR_INVALID_PRIMARY_GROUP: error = EINVAL; break;
115 case ERROR_INVALID_SIGNAL_NUMBER: error = EINVAL; break;
116 case ERROR_INVALID_TARGET_HANDLE: error = EIO; break;
117 case ERROR_INVALID_WORKSTATION: error = EACCES; break;
118 case ERROR_IO_DEVICE: error = EIO; break;
119 case ERROR_IO_INCOMPLETE: error = EINTR; break;
120 case ERROR_LOCKED: error = EBUSY; break;
121 case ERROR_LOCK_VIOLATION: error = EACCES; break;
122 case ERROR_LOGON_FAILURE: error = EACCES; break;
123 case ERROR_MAPPED_ALIGNMENT: error = EINVAL; break;
124 case ERROR_META_EXPANSION_TOO_LONG: error = E2BIG; break;
125 case ERROR_MORE_DATA: error = EPIPE; break;
126 case ERROR_NEGATIVE_SEEK: error = ESPIPE; break;
127 case ERROR_NOACCESS: error = EFAULT; break;
128 case ERROR_NONE_MAPPED: error = EINVAL; break;
129 case ERROR_NOT_ENOUGH_MEMORY: error = ENOMEM; break;
130 case ERROR_NOT_READY: error = EAGAIN; break;
131 case ERROR_NOT_SAME_DEVICE: error = EXDEV; break;
132 case ERROR_NO_DATA: error = EPIPE; break;
133 case ERROR_NO_MORE_SEARCH_HANDLES: error = EIO; break;
134 case ERROR_NO_PROC_SLOTS: error = EAGAIN; break;
135 case ERROR_NO_SUCH_PRIVILEGE: error = EACCES; break;
136 case ERROR_OPEN_FAILED: error = EIO; break;
137 case ERROR_OPEN_FILES: error = EBUSY; break;
138 case ERROR_OPERATION_ABORTED: error = EINTR; break;
139 case ERROR_OUTOFMEMORY: error = ENOMEM; break;
140 case ERROR_PASSWORD_EXPIRED: error = EACCES; break;
141 case ERROR_PATH_BUSY: error = EBUSY; break;
142 case ERROR_PATH_NOT_FOUND: error = ENOENT; break;
143 case ERROR_PIPE_BUSY: error = EBUSY; break;
144 case ERROR_PIPE_CONNECTED: error = EPIPE; break;
145 case ERROR_PIPE_LISTENING: error = EPIPE; break;
146 case ERROR_PIPE_NOT_CONNECTED: error = EPIPE; break;
147 case ERROR_PRIVILEGE_NOT_HELD: error = EACCES; break;
148 case ERROR_READ_FAULT: error = EIO; break;
149 case ERROR_SEEK: error = EIO; break;
150 case ERROR_SEEK_ON_DEVICE: error = ESPIPE; break;
151 case ERROR_SHARING_BUFFER_EXCEEDED: error = ENFILE; break;
152 case ERROR_SHARING_VIOLATION: error = EACCES; break;
153 case ERROR_STACK_OVERFLOW: error = ENOMEM; break;
154 case ERROR_SWAPERROR: error = ENOENT; break;
155 case ERROR_TOO_MANY_LINKS: error = EMLINK; break;
156 case ERROR_TOO_MANY_MODULES: error = EMFILE; break;
157 case ERROR_TOO_MANY_OPEN_FILES: error = EMFILE; break;
158 case ERROR_UNRECOGNIZED_MEDIA: error = ENXIO; break;
159 case ERROR_UNRECOGNIZED_VOLUME: error = ENODEV; break;
160 case ERROR_WAIT_NO_CHILDREN: error = ECHILD; break;
161 case ERROR_WRITE_FAULT: error = EIO; break;
162 case ERROR_WRITE_PROTECT: error = EROFS; break;
163 case ERROR_CANT_RESOLVE_FILENAME: error = ELOOP; break;
164 }
165 return error;
166}
167#pragma GCC reset_options
168
169#undef strerror
170char *mingw_strerror(int errnum)
171{
172 if (errnum == ELOOP)
173 return (char *)"Too many levels of symbolic links";
174 return strerror(errnum);
175}
176
177char *strsignal(int sig)
178{
179 if (sig == SIGTERM)
180 return (char *)"Terminated";
181 else if (sig == SIGKILL)
182 return (char *)"Killed";
183 return (char *)get_signame(sig);
184}
185
186static int zero_fd = -1;
187static int rand_fd = -1;
188
189/*
190 * Determine if 'filename' corresponds to one of the supported
191 * device files. Constants for these are defined as an enum
192 * in mingw.h.
193 */
194int get_dev_type(const char *filename)
195{
196 if (filename && is_prefixed_with(filename, "/dev/"))
197 return index_in_strings("null\0zero\0urandom\0", filename+5);
198
199 return NOT_DEVICE;
200}
201
202void update_special_fd(int dev, int fd)
203{
204 if (dev == DEV_ZERO)
205 zero_fd = fd;
206 else if (dev == DEV_URANDOM)
207 rand_fd = fd;
208}
209
210#define PREFIX_LEN (sizeof(DEV_FD_PREFIX)-1)
211static int get_dev_fd(const char *filename)
212{
213 int fd;
214
215 if (filename && is_prefixed_with(filename, DEV_FD_PREFIX)) {
216 fd = bb_strtou(filename+PREFIX_LEN, NULL, 10);
217 if (errno == 0 && (HANDLE)_get_osfhandle(fd) != INVALID_HANDLE_VALUE)
218 return fd;
219 }
220 return -1;
221}
222
223static int mingw_is_directory(const char *path);
224#undef open
225int mingw_open (const char *filename, int oflags, ...)
226{
227 va_list args;
228 int pmode, mode = 0666;
229 int fd;
230 int special = (oflags & O_SPECIAL);
231 int dev = get_dev_type(filename);
232
233 /* /dev/null is always allowed, others only if O_SPECIAL is set */
234 if (dev == DEV_NULL || (special && dev != NOT_DEVICE)) {
235 filename = "nul";
236 oflags = O_RDWR;
237 }
238 else if ((fd=get_dev_fd(filename)) >= 0) {
239 return fd;
240 }
241
242 if ((oflags & O_CREAT)) {
243 va_start(args, oflags);
244 mode = va_arg(args, int);
245 va_end(args);
246 }
247
248 pmode = ((mode & S_IWUSR) ? _S_IWRITE : 0) |
249 ((mode & S_IRUSR) ? _S_IREAD : 0);
250
251 fd = open(filename, oflags&~O_SPECIAL, pmode);
252 if (fd >= 0) {
253 update_special_fd(dev, fd);
254 }
255 else if ((oflags & O_ACCMODE) != O_RDONLY && errno == EACCES) {
256 if (mingw_is_directory(filename))
257 errno = EISDIR;
258 }
259 return fd;
260}
261
262int mingw_xopen(const char *pathname, int flags)
263{
264 int ret;
265
266 /* allow use of special devices */
267 ret = mingw_open(pathname, flags|O_SPECIAL);
268 if (ret < 0) {
269 bb_perror_msg_and_die("can't open '%s'", pathname);
270 }
271 return ret;
272}
273
274ssize_t FAST_FUNC mingw_open_read_close(const char *fn, void *buf, size_t size)
275{
276 /* allow use of special devices */
277 int fd = mingw_open(fn, O_RDONLY|O_SPECIAL);
278 if (fd < 0)
279 return fd;
280 return read_close(fd, buf, size);
281}
282
283#undef fopen
284FILE *mingw_fopen (const char *filename, const char *otype)
285{
286 int fd;
287
288 if (get_dev_type(filename) == DEV_NULL)
289 filename = "nul";
290 else if ((fd=get_dev_fd(filename)) >= 0)
291 return fdopen(fd, otype);
292 return fopen(filename, otype);
293}
294
295#undef read
296ssize_t mingw_read(int fd, void *buf, size_t count)
297{
298 if (fd == zero_fd) {
299 memset(buf, 0, count);
300 return count;
301 }
302 else if (fd == rand_fd) {
303 return get_random_bytes(buf, count);
304 }
305 return read(fd, buf, count);
306}
307
308#undef close
309int mingw_close(int fd)
310{
311 if (fd == zero_fd) {
312 zero_fd = -1;
313 }
314 if (fd == rand_fd) {
315 rand_fd = -1;
316 }
317 return close(fd);
318}
319
320#undef dup2
321int mingw_dup2 (int fd, int fdto)
322{
323 int ret = dup2(fd, fdto);
324 return ret != -1 ? fdto : -1;
325}
326
327/*
328 * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC.
329 * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch.
330 */
331static inline long long filetime_to_hnsec(const FILETIME *ft)
332{
333 long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime;
334 /* Windows to Unix Epoch conversion */
335 return winTime - 116444736000000000LL;
336}
337
338static inline struct timespec filetime_to_timespec(const FILETIME *ft)
339{
340 struct timespec ts;
341 long long winTime = filetime_to_hnsec(ft);
342
343 ts.tv_sec = (time_t)(winTime / 10000000);
344 ts.tv_nsec = (long)(winTime % 10000000) * 100;
345
346 return ts;
347}
348
349static inline mode_t file_attr_to_st_mode(DWORD attr)
350{
351 mode_t fMode = S_IRUSR|S_IRGRP|S_IROTH;
352 if (attr & FILE_ATTRIBUTE_DIRECTORY)
353 fMode |= (S_IFDIR|S_IRWXU|S_IRWXG|S_IRWXO) & ~(current_umask & 0022);
354 else if (attr & FILE_ATTRIBUTE_DEVICE)
355 fMode |= S_IFCHR|S_IWUSR|S_IWGRP|S_IWOTH;
356 else
357 fMode |= S_IFREG;
358 if (!(attr & (FILE_ATTRIBUTE_READONLY|FILE_ATTRIBUTE_DEVICE)))
359 fMode |= (S_IWUSR|S_IWGRP|S_IWOTH) & ~(current_umask & 0022);
360 return fMode;
361}
362
363static int get_file_attr(const char *fname, WIN32_FILE_ATTRIBUTE_DATA *fdata)
364{
365 char *want_dir;
366
367 if (get_dev_type(fname) == DEV_NULL || get_dev_fd(fname) >= 0) {
368 /* Fake attributes for special devices */
369 /* Though not /dev/zero or /dev/urandom */
370 FILETIME epoch = {0xd53e8000, 0x019db1de}; // Unix epoch as FILETIME
371 fdata->dwFileAttributes = FILE_ATTRIBUTE_DEVICE;
372 fdata->ftCreationTime = fdata->ftLastAccessTime =
373 fdata->ftLastWriteTime = epoch;
374 fdata->nFileSizeHigh = fdata->nFileSizeLow = 0;
375 return 0;
376 }
377
378 want_dir = last_char_is_dir_sep(fname);
379 if (GetFileAttributesExA(fname, GetFileExInfoStandard, fdata)) {
380 if (!(fdata->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) && want_dir)
381 return ENOTDIR;
382 fdata->dwFileAttributes &= ~FILE_ATTRIBUTE_DEVICE;
383 return 0;
384 }
385
386 if (GetLastError() == ERROR_SHARING_VIOLATION) {
387 HANDLE hnd;
388 WIN32_FIND_DATA fd;
389
390 if ((hnd=FindFirstFile(fname, &fd)) != INVALID_HANDLE_VALUE) {
391 fdata->dwFileAttributes =
392 fd.dwFileAttributes & ~FILE_ATTRIBUTE_DEVICE;
393 fdata->ftCreationTime = fd.ftCreationTime;
394 fdata->ftLastAccessTime = fd.ftLastAccessTime;
395 fdata->ftLastWriteTime = fd.ftLastWriteTime;
396 fdata->nFileSizeHigh = fd.nFileSizeHigh;
397 fdata->nFileSizeLow = fd.nFileSizeLow;
398 FindClose(hnd);
399 return 0;
400 }
401 }
402
403 switch (GetLastError()) {
404 case ERROR_ACCESS_DENIED:
405 case ERROR_SHARING_VIOLATION:
406 case ERROR_LOCK_VIOLATION:
407 case ERROR_SHARING_BUFFER_EXCEEDED:
408 return EACCES;
409 case ERROR_BUFFER_OVERFLOW:
410 return ENAMETOOLONG;
411 case ERROR_NOT_ENOUGH_MEMORY:
412 return ENOMEM;
413 case ERROR_INVALID_NAME:
414 if (want_dir)
415 return ENOTDIR;
416 default:
417 return ENOENT;
418 }
419}
420
421#undef umask
422mode_t mingw_umask(mode_t new_mode)
423{
424 mode_t tmp_mode;
425
426 tmp_mode = current_umask;
427 current_umask = new_mode & 0777;
428
429 umask((new_mode & S_IWUSR) ? _S_IWRITE : 0);
430
431 return tmp_mode;
432}
433
434/*
435 * Examine a file's contents to determine if it can be executed. This
436 * should be a last resort: in most cases it's much more efficient to
437 * check the file extension.
438 *
439 * We look for two types of file: shell scripts and binary executables.
440 */
441static int has_exec_format(const char *name)
442{
443 HANDLE fh;
444 int fd = -1;
445 ssize_t n;
446 int sig;
447 unsigned int offset;
448 unsigned char buf[1024];
449
450 /* special case: skip DLLs, there are thousands of them! */
451 if (is_suffixed_with_case(name, ".dll"))
452 return 0;
453
454 /* Open file and try to avoid updating access time */
455 fh = CreateFileA(name, GENERIC_READ | FILE_WRITE_ATTRIBUTES,
456 FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL);
457 if (fh != INVALID_HANDLE_VALUE) {
458 FILETIME last_access = { 0xffffffff, 0xffffffff };
459
460 SetFileTime(fh, NULL, &last_access, NULL);
461 fd = _open_osfhandle((intptr_t)fh, O_RDONLY);
462 }
463
464 if (fd < 0)
465 n = open_read_close(name, buf, sizeof(buf));
466 else
467 n = read_close(fd, buf, sizeof(buf));
468
469 if (n < 4) /* Need at least a few bytes and no error */
470 return 0;
471
472 /* shell script */
473 if (buf[0] == '#' && buf[1] == '!') {
474 return 1;
475 }
476
477 /*
478 * Poke about in file to see if it's a PE binary. I've just copied
479 * the magic from the file command.
480 */
481 if (buf[0] == 'M' && buf[1] == 'Z') {
482/* Convert four unsigned bytes to an unsigned int (little-endian) */
483#define LE4(b, o) (((unsigned)b[o+3] << 24) + (b[o+2] << 16) + \
484 (b[o+1] << 8) + b[o])
485
486 /* Actually Portable Executable */
487 /* See ape/ape.S at https://github.com/jart/cosmopolitan */
488 const unsigned char *qFpD = (unsigned char *)"qFpD";
489 if (n > 6 && LE4(buf, 2) == LE4(qFpD, 0))
490 return 1;
491
492 if (n > 0x3f) {
493 offset = (buf[0x19] << 8) + buf[0x18];
494 if (offset > 0x3f) {
495 offset = LE4(buf, 0x3c);
496 if (offset < sizeof(buf)-100) {
497 if (memcmp(buf+offset, "PE\0\0", 4) == 0) {
498 sig = (buf[offset+25] << 8) + buf[offset+24];
499 if (sig == 0x10b || sig == 0x20b) {
500 sig = (buf[offset+23] << 8) + buf[offset+22];
501 if ((sig & 0x2000) != 0) {
502 /* DLL */
503 return 0;
504 }
505 sig = buf[offset+92];
506 return (sig == 1 || sig == 2 || sig == 3
507 || sig == 7);
508 }
509 }
510 }
511 }
512 }
513 }
514
515 return 0;
516}
517
518#if ENABLE_FEATURE_EXTRA_FILE_DATA
519static uid_t file_owner(HANDLE fh, struct mingw_stat *buf)
520{
521 PSID pSidOwner;
522 PACL pDACL;
523 PSECURITY_DESCRIPTOR pSD;
524 static PTOKEN_USER user = NULL;
525 static HANDLE impersonate = INVALID_HANDLE_VALUE;
526 static int initialised = 0;
527 uid_t uid = 0;
528 DWORD *ptr;
529 unsigned char prefix[] = {
530 0x01, 0x05, 0x00, 0x00,
531 0x00, 0x00, 0x00, 0x05,
532 0x15, 0x00, 0x00, 0x00
533 };
534 unsigned char nullsid[] = {
535 0x01, 0x01, 0x00, 0x00,
536 0x00, 0x00, 0x00, 0x01,
537 0x00, 0x00, 0x00, 0x00
538 };
539
540 /* get SID of current user */
541 if (!initialised) {
542 HANDLE token;
543 DWORD ret = 0;
544
545 initialised = 1;
546 if (OpenProcessToken(GetCurrentProcess(),
547 TOKEN_IMPERSONATE | TOKEN_QUERY | TOKEN_DUPLICATE |
548 STANDARD_RIGHTS_READ, &token)) {
549 GetTokenInformation(token, TokenUser, NULL, 0, &ret);
550 if (ret <= 0 || (user=malloc(ret)) == NULL ||
551 !GetTokenInformation(token, TokenUser, user, ret, &ret)) {
552 free(user);
553 user = NULL;
554 }
555 DuplicateToken(token, SecurityImpersonation, &impersonate);
556 CloseHandle(token);
557 }
558 }
559
560 if (user == NULL)
561 return DEFAULT_UID;
562
563 /* get SID of file's owner */
564 if (GetSecurityInfo(fh, SE_FILE_OBJECT,
565 OWNER_SECURITY_INFORMATION | GROUP_SECURITY_INFORMATION |
566 DACL_SECURITY_INFORMATION,
567 &pSidOwner, NULL, &pDACL, NULL, &pSD) != ERROR_SUCCESS)
568 return 0;
569
570 if (EqualSid(pSidOwner, user->User.Sid)) {
571 uid = DEFAULT_UID;
572 } else if (memcmp(pSidOwner, nullsid, sizeof(nullsid)) == 0) {
573 uid = DEFAULT_UID;
574 } else if (memcmp(pSidOwner, prefix, sizeof(prefix)) == 0) {
575 /* for local or domain users use the RID as uid */
576 ptr = (DWORD *)pSidOwner;
577 if (ptr[6] >= 500 && ptr[6] < DEFAULT_UID)
578 uid = (uid_t)ptr[6];
579 }
580
581 if (uid != DEFAULT_UID && impersonate != INVALID_HANDLE_VALUE &&
582 getuid() != 0) {
583 static GENERIC_MAPPING mapping = {
584 FILE_GENERIC_READ, FILE_GENERIC_WRITE,
585 FILE_GENERIC_EXECUTE, FILE_ALL_ACCESS
586 };
587 PRIVILEGE_SET privileges;
588 DWORD grantedAccess;
589 DWORD privilegesLength = sizeof(privileges);
590 DWORD genericAccessRights = MAXIMUM_ALLOWED;
591 BOOL result;
592
593 if (AccessCheck(pSD, impersonate, genericAccessRights,
594 &mapping, &privileges, &privilegesLength,
595 &grantedAccess, &result)) {
596 if (result && (grantedAccess & 0x1200af) == 0x1200af) {
597 buf->st_mode |= (buf->st_mode & S_IRWXU) >> 6;
598 }
599 }
600 }
601 LocalFree(pSD);
602 return uid;
603}
604#endif
605
606static DWORD get_symlink_data(DWORD attr, const char *pathname,
607 WIN32_FIND_DATAA *fbuf)
608{
609 if (attr & FILE_ATTRIBUTE_REPARSE_POINT) {
610 HANDLE handle = FindFirstFileA(pathname, fbuf);
611 if (handle != INVALID_HANDLE_VALUE) {
612 FindClose(handle);
613 if ((fbuf->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)) {
614 switch (fbuf->dwReserved0) {
615 case IO_REPARSE_TAG_SYMLINK:
616 case IO_REPARSE_TAG_MOUNT_POINT:
617 case IO_REPARSE_TAG_APPEXECLINK:
618 return fbuf->dwReserved0;
619 }
620 }
621 }
622 }
623 return 0;
624}
625
626static DWORD is_symlink(const char *pathname)
627{
628 WIN32_FILE_ATTRIBUTE_DATA fdata;
629 WIN32_FIND_DATAA fbuf;
630
631 if (!get_file_attr(pathname, &fdata))
632 return get_symlink_data(fdata.dwFileAttributes, pathname, &fbuf);
633 return 0;
634}
635
636static int mingw_is_directory(const char *path)
637{
638 WIN32_FILE_ATTRIBUTE_DATA fdata;
639
640 return get_file_attr(path, &fdata) == 0 &&
641 (fdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY);
642}
643
644#if ENABLE_FEATURE_EXTRA_FILE_DATA
645/*
646 * By default we don't count subdirectories. Counting can be enabled
647 * in specific cases by calling 'count_subdirs(NULL)' before making
648 * any calls to stat(2) or lstat(2) that require accurate values of
649 * st_nlink for directories.
650 */
651int count_subdirs(const char *pathname)
652{
653 int count = 0;
654 DIR *dirp;
655 struct dirent *dp;
656 static int do_count = FALSE;
657
658 if (pathname == NULL) {
659 do_count = TRUE;
660 return 0;
661 }
662
663 if (do_count && (dirp = opendir(pathname))) {
664 while ((dp = readdir(dirp)) != NULL) {
665 if (dp->d_type == DT_DIR)
666 count++;
667 }
668 closedir(dirp);
669 } else {
670 count = 2;
671 }
672 return count;
673}
674#endif
675
676#ifndef FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS
677# define FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS 0x00400000
678#endif
679
680/* If follow is true then act like stat() and report on the link
681 * target. Otherwise report on the link itself.
682 */
683static int do_lstat(int follow, const char *file_name, struct mingw_stat *buf)
684{
685 int err;
686 WIN32_FILE_ATTRIBUTE_DATA fdata;
687 WIN32_FIND_DATAA findbuf;
688 DWORD low, high;
689 off64_t size;
690 char *lname = NULL;
691
692 while (!(err=get_file_attr(file_name, &fdata))) {
693 buf->st_ino = 0;
694 buf->st_uid = DEFAULT_UID;
695 buf->st_gid = DEFAULT_GID;
696 buf->st_dev = buf->st_rdev = 0;
697 buf->st_attr = fdata.dwFileAttributes;
698 buf->st_tag = get_symlink_data(buf->st_attr, file_name, &findbuf);
699
700 if (buf->st_tag) {
701 char *content;
702
703 if (follow) {
704 /* The file size and times are wrong when Windows follows
705 * a symlink. Use the symlink target instead. */
706 file_name = lname = xmalloc_follow_symlinks(file_name);
707 if (!lname)
708 return -1;
709 continue;
710 }
711
712 /* Get the contents of a symlink, not its target. */
713 buf->st_mode = S_IFLNK|S_IRWXU|S_IRWXG|S_IRWXO;
714 content = xmalloc_readlink(file_name);
715 buf->st_size = content ? strlen(content) : 0;
716 free(content);
717 buf->st_atim = filetime_to_timespec(&(findbuf.ftLastAccessTime));
718 buf->st_mtim = filetime_to_timespec(&(findbuf.ftLastWriteTime));
719 buf->st_ctim = filetime_to_timespec(&(findbuf.ftCreationTime));
720 }
721 else {
722 /* The file is not a symlink. */
723 buf->st_mode = file_attr_to_st_mode(fdata.dwFileAttributes);
724 if (S_ISREG(buf->st_mode) &&
725 (has_exe_suffix(file_name) ||
726 (!(buf->st_attr & FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS) &&
727 has_exec_format(file_name))))
728 buf->st_mode |= S_IXUSR|S_IXGRP|S_IXOTH;
729 buf->st_size = fdata.nFileSizeLow |
730 (((off64_t)fdata.nFileSizeHigh)<<32);
731 buf->st_atim = filetime_to_timespec(&(fdata.ftLastAccessTime));
732 buf->st_mtim = filetime_to_timespec(&(fdata.ftLastWriteTime));
733 buf->st_ctim = filetime_to_timespec(&(fdata.ftCreationTime));
734 }
735 buf->st_nlink = (buf->st_attr & FILE_ATTRIBUTE_DIRECTORY) ? 2 : 1;
736
737#if ENABLE_FEATURE_EXTRA_FILE_DATA
738 if (!(buf->st_attr &
739 (FILE_ATTRIBUTE_DEVICE | FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS))) {
740 DWORD flags;
741 HANDLE fh;
742 BY_HANDLE_FILE_INFORMATION hdata;
743
744 flags = FILE_FLAG_BACKUP_SEMANTICS;
745 if (S_ISLNK(buf->st_mode))
746 flags |= FILE_FLAG_OPEN_REPARSE_POINT;
747 fh = CreateFile(file_name, READ_CONTROL, 0, NULL,
748 OPEN_EXISTING, flags, NULL);
749 if (fh != INVALID_HANDLE_VALUE) {
750 if (GetFileInformationByHandle(fh, &hdata)) {
751 buf->st_dev = hdata.dwVolumeSerialNumber;
752 buf->st_ino = hdata.nFileIndexLow |
753 (((ino_t)hdata.nFileIndexHigh)<<32);
754 buf->st_nlink = (buf->st_attr & FILE_ATTRIBUTE_DIRECTORY) ?
755 count_subdirs(file_name) :
756 hdata.nNumberOfLinks;
757 }
758 buf->st_uid = buf->st_gid = file_owner(fh, buf);
759 CloseHandle(fh);
760 } else {
761 buf->st_uid = buf->st_gid = 0;
762 buf->st_mode &= ~S_IRWXO;
763 }
764 }
765#endif
766
767 /* Get actual size of compressed/sparse files. Only regular
768 * files need to be considered. */
769 size = buf->st_size;
770 if (S_ISREG(buf->st_mode)) {
771 low = GetCompressedFileSize(file_name, &high);
772 if (low != INVALID_FILE_SIZE || GetLastError() == NO_ERROR) {
773 size = low | (((off64_t)high)<<32);
774 }
775 }
776
777 /*
778 * Assume a block is 4096 bytes and calculate number of 512 byte
779 * sectors.
780 */
781 buf->st_blksize = 4096;
782 buf->st_blocks = ((size+4095)>>12)<<3;
783 return 0;
784 }
785 free(lname);
786 errno = err;
787 return -1;
788}
789
790int mingw_lstat(const char *file_name, struct mingw_stat *buf)
791{
792 return do_lstat(0, file_name, buf);
793}
794
795int mingw_stat(const char *file_name, struct mingw_stat *buf)
796{
797 return do_lstat(1, file_name, buf);
798}
799
800#undef st_atime
801#undef st_mtime
802#undef st_ctime
803int mingw_fstat(int fd, struct mingw_stat *buf)
804{
805 HANDLE fh = (HANDLE)_get_osfhandle(fd);
806 BY_HANDLE_FILE_INFORMATION fdata;
807
808 if (fh == INVALID_HANDLE_VALUE)
809 goto fail;
810
811 /* direct non-file handles to MS's fstat() */
812 if (GetFileType(fh) != FILE_TYPE_DISK) {
813 struct _stati64 buf64;
814
815 if (_fstati64(fd, &buf64) != 0)
816 return -1;
817
818 buf->st_mode = (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)
819 & ~(current_umask & 0022);
820 buf->st_attr = FILE_ATTRIBUTE_NORMAL;
821 buf->st_size = buf64.st_size;
822 buf->st_atim.tv_sec = buf64.st_atime;
823 buf->st_atim.tv_nsec = 0;
824 buf->st_mtim.tv_sec = buf64.st_mtime;
825 buf->st_mtim.tv_nsec = 0;
826 buf->st_ctim.tv_sec = buf64.st_ctime;
827 buf->st_ctim.tv_nsec = 0;
828#if ENABLE_FEATURE_EXTRA_FILE_DATA
829 buf->st_dev = 0;
830 buf->st_ino = 0;
831 buf->st_nlink = 1;
832#endif
833 goto success;
834 }
835
836 if (GetFileInformationByHandle(fh, &fdata)) {
837 buf->st_mode = file_attr_to_st_mode(fdata.dwFileAttributes);
838 buf->st_attr = fdata.dwFileAttributes;
839 buf->st_size = fdata.nFileSizeLow |
840 (((off64_t)fdata.nFileSizeHigh)<<32);
841 buf->st_atim = filetime_to_timespec(&(fdata.ftLastAccessTime));
842 buf->st_mtim = filetime_to_timespec(&(fdata.ftLastWriteTime));
843 buf->st_ctim = filetime_to_timespec(&(fdata.ftCreationTime));
844#if ENABLE_FEATURE_EXTRA_FILE_DATA
845 buf->st_dev = fdata.dwVolumeSerialNumber;
846 buf->st_ino = fdata.nFileIndexLow |
847 (((uint64_t)fdata.nFileIndexHigh)<<32);
848 buf->st_nlink = (buf->st_attr & FILE_ATTRIBUTE_DIRECTORY) ?
849 2 : fdata.nNumberOfLinks;
850#endif
851 success:
852#if !ENABLE_FEATURE_EXTRA_FILE_DATA
853 buf->st_dev = 0;
854 buf->st_ino = 0;
855 buf->st_nlink = (buf->st_attr & FILE_ATTRIBUTE_DIRECTORY) ? 2 : 1;
856#endif
857 buf->st_tag = 0;
858 buf->st_rdev = 0;
859 buf->st_uid = DEFAULT_UID;
860 buf->st_gid = DEFAULT_GID;
861 buf->st_blksize = 4096;
862 buf->st_blocks = ((buf->st_size+4095)>>12)<<3;
863 return 0;
864 }
865 fail:
866 errno = EBADF;
867 return -1;
868}
869
870static inline void timespec_to_filetime(const struct timespec tv, FILETIME *ft)
871{
872 long long winTime = tv.tv_sec * 10000000LL + tv.tv_nsec / 100LL +
873 116444736000000000LL;
874 ft->dwLowDateTime = winTime;
875 ft->dwHighDateTime = winTime >> 32;
876}
877
878static int hutimens(HANDLE fh, const struct timespec times[2])
879{
880 FILETIME now, aft, mft;
881 FILETIME *pft[2] = {&aft, &mft};
882 int i;
883
884 GetSystemTimeAsFileTime(&now);
885
886 if (times) {
887 for (i = 0; i < 2; ++i) {
888 if (times[i].tv_nsec == UTIME_NOW)
889 *pft[i] = now;
890 else if (times[i].tv_nsec == UTIME_OMIT)
891 pft[i] = NULL;
892 else if (times[i].tv_nsec >= 0 && times[i].tv_nsec < 1000000000L)
893 timespec_to_filetime(times[i], pft[i]);
894 else {
895 errno = EINVAL;
896 return -1;
897 }
898 }
899 } else {
900 aft = mft = now;
901 }
902
903 if (!SetFileTime(fh, NULL, pft[0], pft[1])) {
904 errno = err_win_to_posix();
905 return -1;
906 }
907 return 0;
908}
909
910int futimens(int fd, const struct timespec times[2])
911{
912 HANDLE fh;
913
914 fh = (HANDLE)_get_osfhandle(fd);
915 if (fh == INVALID_HANDLE_VALUE) {
916 errno = EBADF;
917 return -1;
918 }
919
920 return hutimens(fh, times);
921}
922
923int utimensat(int fd, const char *path, const struct timespec times[2],
924 int flags)
925{
926 int rc = -1;
927 HANDLE fh;
928 DWORD cflag = FILE_FLAG_BACKUP_SEMANTICS;
929
930 if (is_relative_path(path) && fd != AT_FDCWD) {
931 errno = ENOSYS; // partial implementation
932 return rc;
933 }
934
935 if (flags & AT_SYMLINK_NOFOLLOW)
936 cflag |= FILE_FLAG_OPEN_REPARSE_POINT;
937
938 fh = CreateFile(path, FILE_WRITE_ATTRIBUTES, 0, NULL, OPEN_EXISTING,
939 cflag, NULL);
940 if (fh == INVALID_HANDLE_VALUE) {
941 errno = err_win_to_posix();
942 return rc;
943 }
944
945 rc = hutimens(fh, times);
946 CloseHandle(fh);
947 return rc;
948}
949
950int utimes(const char *file_name, const struct timeval tv[2])
951{
952 struct timespec ts[2];
953
954 if (tv) {
955 if (tv[0].tv_usec < 0 || tv[0].tv_usec >= 1000000 ||
956 tv[1].tv_usec < 0 || tv[1].tv_usec >= 1000000) {
957 errno = EINVAL;
958 return -1;
959 }
960 ts[0].tv_sec = tv[0].tv_sec;
961 ts[0].tv_nsec = tv[0].tv_usec * 1000;
962 ts[1].tv_sec = tv[1].tv_sec;
963 ts[1].tv_nsec = tv[1].tv_usec * 1000;
964 }
965 return utimensat(AT_FDCWD, file_name, tv ? ts : NULL, 0);
966}
967
968unsigned int sleep (unsigned int seconds)
969{
970 Sleep(seconds*1000);
971 return 0;
972}
973
974int nanosleep(const struct timespec *req, struct timespec *rem)
975{
976 if (req->tv_nsec < 0 || 1000000000 <= req->tv_nsec) {
977 errno = EINVAL;
978 return -1;
979 }
980
981 Sleep(req->tv_sec*1000 + req->tv_nsec/1000000);
982
983 /* Sleep is not interruptible. So there is no remaining delay. */
984 if (rem != NULL) {
985 rem->tv_sec = 0;
986 rem->tv_nsec = 0;
987 }
988
989 return 0;
990}
991
992/*
993 * Windows' mktemp returns NULL on error whereas POSIX always returns the
994 * template and signals an error by making it an empty string.
995 */
996#undef mktemp
997char *mingw_mktemp(char *template)
998{
999 if ( mktemp(template) == NULL ) {
1000 template[0] = '\0';
1001 }
1002
1003 return template;
1004}
1005
1006int mkstemp(char *template)
1007{
1008 char *filename = mktemp(template);
1009 if (filename == NULL)
1010 return -1;
1011 return open(filename, O_RDWR | O_CREAT, 0600);
1012}
1013
1014int gettimeofday(struct timeval *tv, void *tz UNUSED_PARAM)
1015{
1016 FILETIME ft;
1017 long long hnsec;
1018
1019 GetSystemTimeAsFileTime(&ft);
1020 hnsec = filetime_to_hnsec(&ft);
1021 tv->tv_sec = hnsec / 10000000;
1022 tv->tv_usec = (hnsec % 10000000) / 10;
1023 return 0;
1024}
1025
1026int clock_gettime(clockid_t clockid, struct timespec *tp)
1027{
1028 FILETIME ft;
1029
1030 if (clockid != CLOCK_REALTIME) {
1031 errno = ENOSYS;
1032 return -1;
1033 }
1034 GetSystemTimeAsFileTime(&ft);
1035 *tp = filetime_to_timespec(&ft);
1036 return 0;
1037}
1038
1039int clock_settime(clockid_t clockid, const struct timespec *tp)
1040{
1041 SYSTEMTIME st;
1042 FILETIME ft;
1043
1044 if (clockid != CLOCK_REALTIME) {
1045 errno = ENOSYS;
1046 return -1;
1047 }
1048
1049 timespec_to_filetime(*tp, &ft);
1050 if (FileTimeToSystemTime(&ft, &st) == 0) {
1051 errno = EINVAL;
1052 return -1;
1053 }
1054
1055 if (SetSystemTime(&st) == 0) {
1056 errno = EPERM;
1057 return -1;
1058 }
1059 return 0;
1060}
1061
1062int pipe(int filedes[2])
1063{
1064 if (_pipe(filedes, PIPE_BUF, 0) < 0)
1065 return -1;
1066 return 0;
1067}
1068
1069struct tm *gmtime_r(const time_t *timep, struct tm *result)
1070{
1071 /* gmtime() in MSVCRT.DLL is thread-safe, but not reentrant */
1072 memcpy(result, gmtime(timep), sizeof(struct tm));
1073 return result;
1074}
1075
1076struct tm *localtime_r(const time_t *timep, struct tm *result)
1077{
1078 /* localtime() in MSVCRT.DLL is thread-safe, but not reentrant */
1079 memcpy(result, localtime(timep), sizeof(struct tm));
1080 return result;
1081}
1082
1083#undef getcwd
1084char *mingw_getcwd(char *pointer, int len)
1085{
1086 char *ret = getcwd(pointer, len);
1087 if (!ret)
1088 return ret;
1089 return bs_to_slash(ret);
1090}
1091
1092#undef rename
1093int mingw_rename(const char *pold, const char *pnew)
1094{
1095 DWORD attrs;
1096
1097 /*
1098 * For non-symlinks, try native rename() first to get errno right.
1099 * It is based on MoveFile(), which cannot overwrite existing files.
1100 */
1101 if (!is_symlink(pold)) {
1102 if (!rename(pold, pnew))
1103 return 0;
1104 if (errno != EEXIST)
1105 return -1;
1106 }
1107 if (MoveFileEx(pold, pnew,
1108 MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED))
1109 return 0;
1110 /* TODO: translate more errors */
1111 if (GetLastError() == ERROR_ACCESS_DENIED &&
1112 (attrs = GetFileAttributes(pnew)) != INVALID_FILE_ATTRIBUTES) {
1113 if (attrs & FILE_ATTRIBUTE_DIRECTORY) {
1114 errno = EISDIR;
1115 return -1;
1116 }
1117 if ((attrs & FILE_ATTRIBUTE_READONLY) &&
1118 SetFileAttributes(pnew, attrs & ~FILE_ATTRIBUTE_READONLY)) {
1119 if (MoveFileEx(pold, pnew, MOVEFILE_REPLACE_EXISTING))
1120 return 0;
1121 /* revert file attributes on failure */
1122 SetFileAttributes(pnew, attrs);
1123 }
1124 }
1125 errno = EACCES;
1126 return -1;
1127}
1128
1129static char *gethomedir(void)
1130{
1131 static char *buf = NULL;
1132 DECLARE_PROC_ADDR(BOOL, GetUserProfileDirectoryA, HANDLE, LPSTR, LPDWORD);
1133
1134 if (!buf) {
1135 DWORD len = PATH_MAX;
1136 HANDLE h;
1137
1138 buf = xzalloc(len);
1139 if (OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &h)) {
1140 if (INIT_PROC_ADDR(userenv.dll, GetUserProfileDirectoryA)) {
1141 GetUserProfileDirectoryA(h, buf, &len);
1142 bs_to_slash(buf);
1143 }
1144 CloseHandle(h);
1145 }
1146 }
1147 return buf;
1148}
1149
1150#define NAME_LEN 100
1151char *get_user_name(void)
1152{
1153 static char *user_name = NULL;
1154 char *s;
1155 DWORD len = NAME_LEN;
1156
1157 if ( user_name == NULL ) {
1158 user_name = xzalloc(NAME_LEN);
1159 }
1160
1161 if ( user_name[0] != '\0' ) {
1162 return user_name;
1163 }
1164
1165 if ( !GetUserName(user_name, &len) ) {
1166 return NULL;
1167 }
1168
1169 for ( s=user_name; *s; ++s ) {
1170 if ( *s == ' ' ) {
1171 *s = '_';
1172 }
1173 }
1174
1175 return user_name;
1176}
1177
1178/*
1179 * When 'drop' drops privileges TokenIsElevated is still TRUE.
1180 * Find out if we're really privileged by checking if the group
1181 * BUILTIN\Administrators is enabled.
1182 */
1183int
1184elevation_state(void)
1185{
1186 int elevated = FALSE;
1187 int enabled = TRUE;
1188 HANDLE h;
1189#if ENABLE_DROP || ENABLE_CDROP || ENABLE_PDROP
1190 BOOL admin_enabled = TRUE;
1191 unsigned char admin[16] = {
1192 0x01, 0x02, 0x00, 0x00,
1193 0x00, 0x00, 0x00, 0x05,
1194 0x20, 0x00, 0x00, 0x00,
1195 0x20, 0x02, 0x00, 0x00
1196 };
1197#endif
1198
1199 if (OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &h)) {
1200 TOKEN_ELEVATION elevation = { 0 };
1201 DWORD size;
1202
1203 if (GetTokenInformation(h, TokenElevation, &elevation,
1204 sizeof(elevation), &size))
1205 elevated = elevation.TokenIsElevated != 0;
1206 CloseHandle(h);
1207 }
1208
1209#if ENABLE_DROP || ENABLE_CDROP || ENABLE_PDROP
1210 if (CheckTokenMembership(NULL, (PSID)admin, &admin_enabled))
1211 enabled = admin_enabled != 0;
1212#endif
1213
1214 return elevated | (enabled << 1);
1215}
1216
1217int getuid(void)
1218{
1219 return elevation_state() == (ELEVATED_PRIVILEGE | ADMIN_ENABLED) ?
1220 0 : DEFAULT_UID;
1221}
1222
1223struct passwd *getpwnam(const char *name)
1224{
1225 const char *myname;
1226
1227 if ( (myname=get_user_name()) != NULL &&
1228 strcmp(myname, name) == 0 ) {
1229 return getpwuid(DEFAULT_UID);
1230 }
1231 else if (strcmp(name, "root") == 0) {
1232 return getpwuid(0);
1233 }
1234
1235 return NULL;
1236}
1237
1238struct passwd *getpwuid(uid_t uid)
1239{
1240 static struct passwd p;
1241
1242 if (uid == 0)
1243 p.pw_name = (char *)"root";
1244 else if (uid != DEFAULT_UID || (p.pw_name=get_user_name()) == NULL)
1245 return NULL;
1246
1247 p.pw_dir = gethomedir();
1248 p.pw_passwd = (char *)"";
1249 p.pw_gecos = p.pw_name;
1250 p.pw_shell = NULL;
1251 p.pw_uid = uid;
1252 p.pw_gid = uid;
1253
1254 return &p;
1255}
1256
1257struct group *getgrgid(gid_t gid)
1258{
1259 static char *members[2] = { NULL, NULL };
1260 static struct group g;
1261
1262 if (gid == 0) {
1263 g.gr_name = (char *)"root";
1264 }
1265 else if (gid != DEFAULT_GID || (g.gr_name=get_user_name()) == NULL) {
1266 return NULL;
1267 }
1268 g.gr_passwd = (char *)"";
1269 g.gr_gid = gid;
1270 members[0] = g.gr_name;
1271 g.gr_mem = members;
1272
1273 return &g;
1274}
1275
1276#if 0
1277int getgrouplist(const char *user UNUSED_PARAM, gid_t group,
1278 gid_t *groups, int *ngroups)
1279{
1280 if ( *ngroups == 0 ) {
1281 *ngroups = 1;
1282 return -1;
1283 }
1284
1285 *ngroups = 1;
1286 groups[0] = group;
1287 return 1;
1288}
1289
1290int getgroups(int n, gid_t *groups)
1291{
1292 if ( n == 0 ) {
1293 return 1;
1294 }
1295
1296 groups[0] = getgid();
1297 return 1;
1298}
1299#endif
1300
1301int getlogin_r(char *buf, size_t len)
1302{
1303 char *name;
1304
1305 if ( (name=get_user_name()) == NULL ) {
1306 return -1;
1307 }
1308
1309 if ( strlen(name) >= len ) {
1310 errno = ERANGE;
1311 return -1;
1312 }
1313
1314 strcpy(buf, name);
1315 return 0;
1316}
1317
1318long sysconf(int name)
1319{
1320 if ( name == _SC_CLK_TCK ) {
1321 return TICKS_PER_SECOND;
1322 }
1323 errno = EINVAL;
1324 return -1;
1325}
1326
1327clock_t times(struct tms *buf)
1328{
1329 memset(buf, 0, sizeof(*buf));
1330 return 0;
1331}
1332
1333int link(const char *oldpath, const char *newpath)
1334{
1335 DECLARE_PROC_ADDR(BOOL, CreateHardLinkA, LPCSTR, LPCSTR,
1336 LPSECURITY_ATTRIBUTES);
1337
1338 if (!INIT_PROC_ADDR(kernel32.dll, CreateHardLinkA)) {
1339 errno = ENOSYS;
1340 return -1;
1341 }
1342 if (!CreateHardLinkA(newpath, oldpath, NULL)) {
1343 errno = err_win_to_posix();
1344 return -1;
1345 }
1346 return 0;
1347}
1348
1349#ifndef SYMBOLIC_LINK_FLAG_DIRECTORY
1350# define SYMBOLIC_LINK_FLAG_DIRECTORY (0x1)
1351#endif
1352#ifndef SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE
1353# define SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE (0x2)
1354#endif
1355
1356int symlink(const char *target, const char *linkpath)
1357{
1358 DWORD flag = SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE;
1359 DECLARE_PROC_ADDR(BOOLEAN, CreateSymbolicLinkA, LPCSTR, LPCSTR, DWORD);
1360 char *targ, *relative = NULL;
1361
1362 if (!INIT_PROC_ADDR(kernel32.dll, CreateSymbolicLinkA)) {
1363 errno = ENOSYS;
1364 return -1;
1365 }
1366
1367 if (is_relative_path(target) && has_path(linkpath)) {
1368 /* make target's path relative to current directory */
1369 const char *name = bb_get_last_path_component_nostrip(linkpath);
1370 relative = xasprintf("%.*s%s",
1371 (int)(name - linkpath), linkpath, target);
1372 }
1373
1374 if (mingw_is_directory(relative ?: target))
1375 flag |= SYMBOLIC_LINK_FLAG_DIRECTORY;
1376 free(relative);
1377
1378 targ = auto_string(strdup(target));
1379 slash_to_bs(targ);
1380
1381 retry:
1382 if (!CreateSymbolicLinkA(linkpath, targ, flag)) {
1383 /* Old Windows versions see 'UNPRIVILEGED_CREATE' as an invalid
1384 * parameter. Retry without it. */
1385 if ((flag & SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE) &&
1386 GetLastError() == ERROR_INVALID_PARAMETER) {
1387 flag &= ~SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE;
1388 goto retry;
1389 }
1390 errno = err_win_to_posix();
1391 return -1;
1392 }
1393 return 0;
1394}
1395
1396/* Create a directory junction */
1397#define MRPB rptr->MountPointReparseBuffer
1398#if 0
1399static void print_junction(REPARSE_DATA_BUFFER *rptr)
1400{
1401 int i;
1402#define MRPB_HEADER_SIZE \
1403 (FIELD_OFFSET(REPARSE_DATA_BUFFER, MountPointReparseBuffer.PathBuffer) - \
1404 FIELD_OFFSET(REPARSE_DATA_BUFFER, MountPointReparseBuffer))
1405
1406 fprintf(stderr, "---\n");
1407 fprintf(stderr, "Tag: %lx\n", rptr->ReparseTag);
1408 fprintf(stderr, "ReparseDataLength: %d (%d + %d + %d + %d + %d = %d)\n",
1409 rptr->ReparseDataLength, MRPB_HEADER_SIZE,
1410 MRPB.SubstituteNameLength, sizeof(WCHAR),
1411 MRPB.PrintNameLength, sizeof(WCHAR),
1412 MRPB_HEADER_SIZE + MRPB.SubstituteNameLength + sizeof(WCHAR) +
1413 MRPB.PrintNameLength + sizeof(WCHAR));
1414 fprintf(stderr, "Reserved: %d\n", rptr->Reserved);
1415 fprintf(stderr, "---\n");
1416 fprintf(stderr, "SubstituteNameOffset: %d\n", MRPB.SubstituteNameOffset);
1417 fprintf(stderr, "SubstituteNameLength: %d\n", MRPB.SubstituteNameLength);
1418 fprintf(stderr, "PrintNameOffset: %d\n", MRPB.PrintNameOffset);
1419 fprintf(stderr, "PrintNameLength: %d\n", MRPB.PrintNameLength);
1420 fprintf(stderr, "SubstituteName: ");
1421 for (i = 0; i < MRPB.SubstituteNameLength/sizeof(WCHAR); i++)
1422 fprintf(stderr, "%c",
1423 MRPB.PathBuffer[MRPB.SubstituteNameOffset/sizeof(WCHAR) + i]);
1424 fprintf(stderr, " (%x)",
1425 MRPB.PathBuffer[MRPB.SubstituteNameOffset/sizeof(WCHAR) + i]);
1426 fprintf(stderr, "\n");
1427 fprintf(stderr, "PrintName: ");
1428 for (i = 0; i < MRPB.PrintNameLength/sizeof(WCHAR); i++)
1429 fprintf(stderr, "%c",
1430 MRPB.PathBuffer[MRPB.PrintNameOffset/sizeof(WCHAR) + i]);
1431 fprintf(stderr, " (%x)",
1432 MRPB.PathBuffer[MRPB.PrintNameOffset/sizeof(WCHAR) + i]);
1433 fprintf(stderr, "\n");
1434 fprintf(stderr, "---\n");
1435}
1436#endif
1437
1438static REPARSE_DATA_BUFFER *make_junction_data_buffer(char *rpath)
1439{
1440 WCHAR pbuf[PATH_MAX];
1441 int plen, slen, rbufsize;
1442 REPARSE_DATA_BUFFER *rptr;
1443
1444 /* We need two strings for the reparse data. The PrintName is the
1445 * target path in Win32 format, the SubstituteName is the same in
1446 * NT format.
1447 *
1448 * The return value from MultiByteToWideChar includes the trailing
1449 * L'\0' character.
1450 */
1451 slash_to_bs(rpath);
1452 plen = MultiByteToWideChar(CP_ACP, 0, rpath, -1, pbuf, PATH_MAX);
1453 if (plen == 0) {
1454 errno = err_win_to_posix();
1455 return NULL;
1456 }
1457 slen = plen + 4;
1458
1459 rbufsize = (slen + plen) * sizeof(WCHAR) +
1460 FIELD_OFFSET(REPARSE_DATA_BUFFER, MountPointReparseBuffer.PathBuffer);
1461 rptr = xzalloc(rbufsize);
1462
1463 rptr->ReparseTag = IO_REPARSE_TAG_MOUNT_POINT;
1464 rptr->ReparseDataLength = rbufsize -
1465 FIELD_OFFSET(REPARSE_DATA_BUFFER, MountPointReparseBuffer);
1466 /* rptr->Reserved = 0; */
1467 /* MRPB.SubstituteNameOffset = 0; */
1468 MRPB.SubstituteNameLength = (slen - 1) * sizeof(WCHAR);
1469 MRPB.PrintNameOffset = MRPB.SubstituteNameLength + sizeof(WCHAR);
1470 MRPB.PrintNameLength = (plen - 1) * sizeof(WCHAR);
1471
1472 wcscpy(MRPB.PathBuffer, L"\\??\\");
1473 wcscpy(MRPB.PathBuffer + 4, pbuf);
1474 wcscpy(MRPB.PathBuffer + slen, pbuf);
1475 return rptr;
1476}
1477
1478int create_junction(const char *oldpath, const char *newpath)
1479{
1480 char rpath[PATH_MAX];
1481 struct stat statbuf;
1482 REPARSE_DATA_BUFFER *rptr = NULL;
1483 HANDLE h;
1484 int error = 0;
1485 DWORD bytes;
1486
1487 if (realpath(oldpath, rpath) == NULL || stat(rpath, &statbuf) < 0)
1488 return -1;
1489
1490 if (!has_dos_drive_prefix(rpath)) {
1491 errno = EINVAL;
1492 return -1;
1493 }
1494
1495 if (!S_ISDIR(statbuf.st_mode)) {
1496 errno = ENOTDIR;
1497 return -1;
1498 }
1499
1500 if (!(rptr = make_junction_data_buffer(rpath))) {
1501 return -1;
1502 }
1503
1504 if (mkdir(newpath, 0777) < 0) {
1505 free(rptr);
1506 return -1;
1507 }
1508
1509 h = CreateFileA(newpath, GENERIC_READ | GENERIC_WRITE, 0, NULL,
1510 OPEN_EXISTING,
1511 FILE_FLAG_OPEN_REPARSE_POINT | FILE_FLAG_BACKUP_SEMANTICS, NULL);
1512 if (h != INVALID_HANDLE_VALUE) {
1513 if (DeviceIoControl(h, FSCTL_SET_REPARSE_POINT, rptr,
1514 rptr->ReparseDataLength + REPARSE_DATA_BUFFER_HEADER_SIZE,
1515 NULL, 0, &bytes, NULL) != 0) {
1516 CloseHandle(h);
1517 free(rptr);
1518 return 0;
1519 }
1520 error = err_win_to_posix();
1521 CloseHandle(h);
1522 } else {
1523 error = err_win_to_posix();
1524 }
1525
1526 rmdir(newpath);
1527 free(rptr);
1528 errno = error;
1529 return -1;
1530}
1531
1532static char *normalize_ntpathA(char *buf)
1533{
1534 /* fix absolute path prefixes */
1535 if (buf[0] == '\\') {
1536 /* strip NT namespace prefixes */
1537 if (is_prefixed_with(buf, "\\??\\") ||
1538 is_prefixed_with(buf, "\\\\?\\"))
1539 buf += 4;
1540 else if (is_prefixed_with_case(buf, "\\DosDevices\\"))
1541 buf += 12;
1542 /* replace remaining '...UNC\' with '\\' */
1543 if (is_prefixed_with_case(buf, "UNC\\")) {
1544 buf += 2;
1545 *buf = '\\';
1546 }
1547 }
1548 return buf;
1549}
1550
1551static char *resolve_symlinks(char *path)
1552{
1553 HANDLE h;
1554 DWORD status;
1555 char *ptr = NULL;
1556 DECLARE_PROC_ADDR(DWORD, GetFinalPathNameByHandleA, HANDLE,
1557 LPSTR, DWORD, DWORD);
1558 char *resolve = NULL;
1559
1560 if (GetFileAttributesA(path) & FILE_ATTRIBUTE_REPARSE_POINT) {
1561 resolve = xmalloc_follow_symlinks(path);
1562 if (!resolve)
1563 return NULL;
1564 }
1565
1566 /* need a file handle to resolve symlinks */
1567 h = CreateFileA(resolve ?: path, 0, 0, NULL, OPEN_EXISTING,
1568 FILE_FLAG_BACKUP_SEMANTICS, NULL);
1569 if (h != INVALID_HANDLE_VALUE) {
1570 if (!INIT_PROC_ADDR(kernel32.dll, GetFinalPathNameByHandleA)) {
1571 if (resolve)
1572 strcpy(path, resolve);
1573 ptr = path;
1574 goto end;
1575 }
1576
1577 /* normalize the path and return it on success */
1578 status = GetFinalPathNameByHandleA(h, path, MAX_PATH,
1579 FILE_NAME_NORMALIZED|VOLUME_NAME_DOS);
1580 if (status != 0 && status < MAX_PATH) {
1581 ptr = normalize_ntpathA(path);
1582 goto end;
1583 } else if (err_win_to_posix() == ENOSYS) {
1584 if (resolve)
1585 strcpy(path, resolve);
1586 ptr = path;
1587 goto end;
1588 }
1589 }
1590
1591 errno = err_win_to_posix();
1592 end:
1593 CloseHandle(h);
1594 free(resolve);
1595 return ptr;
1596}
1597
1598/*
1599 * Emulate realpath in two stages:
1600 *
1601 * - _fullpath handles './', '../' and extra '/' characters. The
1602 * resulting path may not refer to an actual file.
1603 *
1604 * - resolve_symlinks checks that the file exists (by opening it) and
1605 * resolves symlinks by calling GetFinalPathNameByHandleA.
1606 */
1607char *realpath(const char *path, char *resolved_path)
1608{
1609 char buffer[MAX_PATH];
1610 char *real_path, *p;
1611
1612 /* enforce glibc pre-2.3 behaviour */
1613 if (path == NULL || resolved_path == NULL) {
1614 errno = EINVAL;
1615 return NULL;
1616 }
1617
1618 if (_fullpath(buffer, path, MAX_PATH) &&
1619 (real_path=resolve_symlinks(buffer))) {
1620 bs_to_slash(strcpy(resolved_path, real_path));
1621 p = last_char_is(resolved_path, '/');
1622 if (p && p > resolved_path && p[-1] != ':')
1623 *p = '\0';
1624 return resolved_path;
1625 }
1626 return NULL;
1627}
1628
1629static wchar_t *normalize_ntpath(wchar_t *wbuf)
1630{
1631 int i;
1632 /* fix absolute path prefixes */
1633 if (wbuf[0] == '\\') {
1634 /* strip NT namespace prefixes */
1635 if (!wcsncmp(wbuf, L"\\??\\", 4) ||
1636 !wcsncmp(wbuf, L"\\\\?\\", 4))
1637 wbuf += 4;
1638 else if (!wcsnicmp(wbuf, L"\\DosDevices\\", 12))
1639 wbuf += 12;
1640 /* replace remaining '...UNC\' with '\\' */
1641 if (!wcsnicmp(wbuf, L"UNC\\", 4)) {
1642 wbuf += 2;
1643 *wbuf = '\\';
1644 }
1645 }
1646 /* convert backslashes to slashes */
1647 for (i = 0; wbuf[i]; i++)
1648 if (wbuf[i] == '\\')
1649 wbuf[i] = '/';
1650 return wbuf;
1651}
1652
1653/*
1654 * This is the stucture required for reparse points with the tag
1655 * IO_REPARSE_TAG_APPEXECLINK. The Buffer member contains four
1656 * NUL-terminated, concatentated strings:
1657 *
1658 * package id, entry point, executable path and application type.
1659 *
1660 * https://www.tiraniddo.dev/2019/09/overview-of-windows-execution-aliases.html
1661 */
1662typedef struct {
1663 DWORD ReparseTag;
1664 USHORT ReparseDataLength;
1665 USHORT Reserved;
1666 ULONG Version;
1667 WCHAR Buffer[1];
1668} APPEXECLINK_BUFFER;
1669
1670#define SRPB rptr->SymbolicLinkReparseBuffer
1671char * FAST_FUNC xmalloc_readlink(const char *pathname)
1672{
1673 HANDLE h;
1674 char *buf;
1675 int bufsiz;
1676
1677 h = CreateFile(pathname, 0, 0, NULL, OPEN_EXISTING,
1678 FILE_FLAG_OPEN_REPARSE_POINT|FILE_FLAG_BACKUP_SEMANTICS, NULL);
1679 if (h != INVALID_HANDLE_VALUE) {
1680 DWORD nbytes;
1681 BYTE rbuf[MAXIMUM_REPARSE_DATA_BUFFER_SIZE];
1682 PREPARSE_DATA_BUFFER rptr = (PREPARSE_DATA_BUFFER)rbuf;
1683 APPEXECLINK_BUFFER *aptr = (APPEXECLINK_BUFFER *)rptr;
1684 BOOL status;
1685 size_t len;
1686 WCHAR *name = NULL, *str[4], *s;
1687 int i;
1688
1689 status = DeviceIoControl(h, FSCTL_GET_REPARSE_POINT, NULL, 0,
1690 rptr, sizeof(rbuf), &nbytes, NULL);
1691 CloseHandle(h);
1692
1693 if (status && rptr->ReparseTag == IO_REPARSE_TAG_SYMLINK) {
1694 len = SRPB.SubstituteNameLength/sizeof(WCHAR);
1695 name = SRPB.PathBuffer + SRPB.SubstituteNameOffset/sizeof(WCHAR);
1696 } else if (status && rptr->ReparseTag == IO_REPARSE_TAG_MOUNT_POINT) {
1697 len = MRPB.SubstituteNameLength/sizeof(WCHAR);
1698 name = MRPB.PathBuffer + MRPB.SubstituteNameOffset/sizeof(WCHAR);
1699 } else if (status && rptr->ReparseTag == IO_REPARSE_TAG_APPEXECLINK) {
1700 // We only need the executable path but we determine all of
1701 // the strings as a sanity check.
1702 i = 0;
1703 s = aptr->Buffer;
1704 do {
1705 str[i] = s;
1706 while (*s++)
1707 ;
1708 } while (++i < 4);
1709
1710 if (s - aptr->Buffer < MAXIMUM_REPARSE_DATA_BUFFER_SIZE) {
1711 len = wcslen(str[2]);
1712 name = str[2];
1713 }
1714 }
1715
1716 if (name) {
1717 name[len] = 0;
1718 name = normalize_ntpath(name);
1719 bufsiz = WideCharToMultiByte(CP_ACP, 0, name, -1, NULL, 0, 0, 0);
1720 if (bufsiz) {
1721 buf = xmalloc(bufsiz);
1722 if (WideCharToMultiByte(CP_ACP, 0, name, -1, buf, bufsiz, 0, 0))
1723 return buf;
1724 free(buf);
1725 }
1726 }
1727 }
1728 errno = err_win_to_posix();
1729 return NULL;
1730}
1731
1732const char *get_busybox_exec_path(void)
1733{
1734 static char *path = NULL;
1735
1736 if (!path) {
1737 path = xzalloc(PATH_MAX);
1738 }
1739
1740 if (!*path) {
1741 GetModuleFileName(NULL, path, PATH_MAX);
1742 bs_to_slash(path);
1743 }
1744 return path;
1745}
1746
1747#undef mkdir
1748int mingw_mkdir(const char *path, int mode UNUSED_PARAM)
1749{
1750 int ret;
1751 struct stat st;
1752 int lerrno = 0;
1753
1754 if ( (ret=mkdir(path)) < 0 ) {
1755 lerrno = errno;
1756 if ( lerrno == EACCES && stat(path, &st) == 0 ) {
1757 ret = 0;
1758 lerrno = 0;
1759 }
1760 }
1761
1762 errno = lerrno;
1763 return ret;
1764}
1765
1766#undef chdir
1767int mingw_chdir(const char *dirname)
1768{
1769 int ret = -1;
1770 char *realdir;
1771
1772 if (is_symlink(dirname))
1773 realdir = xmalloc_realpath(dirname);
1774 else
1775 realdir = xstrdup(dirname);
1776
1777 if (realdir) {
1778 fix_path_case(realdir);
1779 ret = chdir(realdir);
1780 }
1781 free(realdir);
1782
1783 return ret;
1784}
1785
1786#undef chmod
1787int mingw_chmod(const char *path, int mode)
1788{
1789 if (mingw_is_directory(path))
1790 mode |= 0222;
1791
1792 return chmod(path, mode);
1793}
1794
1795int fcntl(int fd, int cmd, ...)
1796{
1797 va_list arg;
1798 int result = -1;
1799 char *fds;
1800 int target, i, newfd;
1801
1802 va_start(arg, cmd);
1803
1804 switch (cmd) {
1805 case F_GETFD:
1806 case F_SETFD:
1807 case F_GETFL:
1808 /*
1809 * Our fake F_GETFL won't matter if the return value is used as
1810 * fcntl(fd, F_SETFL, ret|something);
1811 * because F_SETFL isn't supported either.
1812 */
1813 result = 0;
1814 break;
1815 case F_DUPFD:
1816 target = va_arg(arg, int);
1817 fds = xzalloc(target);
1818 while ((newfd = dup(fd)) < target && newfd >= 0) {
1819 fds[newfd] = 1;
1820 }
1821 for (i = 0; i < target; ++i) {
1822 if (fds[i]) {
1823 close(i);
1824 }
1825 }
1826 free(fds);
1827 result = newfd;
1828 break;
1829 default:
1830 errno = ENOSYS;
1831 break;
1832 }
1833
1834 va_end(arg);
1835 return result;
1836}
1837
1838#undef unlink
1839#undef rmdir
1840int mingw_unlink(const char *pathname)
1841{
1842 int ret;
1843
1844 /* read-only files cannot be removed */
1845 chmod(pathname, 0666);
1846
1847 ret = unlink(pathname);
1848 if (ret == -1 && errno == EACCES) {
1849 /* a symlink to a directory needs to be removed by calling rmdir */
1850 /* (the *real* Windows rmdir, not mingw_rmdir) */
1851 if (is_symlink(pathname)) {
1852 return rmdir(pathname);
1853 }
1854 }
1855 return ret;
1856}
1857
1858struct pagefile_info {
1859 SIZE_T total;
1860 SIZE_T in_use;
1861};
1862
1863static BOOL CALLBACK
1864pagefile_cb(LPVOID context, PENUM_PAGE_FILE_INFORMATION info,
1865 LPCSTR name UNUSED_PARAM)
1866{
1867 struct pagefile_info *pfinfo = (struct pagefile_info *)context;
1868
1869 pfinfo->total += info->TotalSize;
1870 pfinfo->in_use += info->TotalInUse;
1871 return TRUE;
1872}
1873
1874int sysinfo(struct sysinfo *info)
1875{
1876 PERFORMANCE_INFORMATION perf;
1877 struct pagefile_info pfinfo;
1878 DECLARE_PROC_ADDR(BOOL, GetPerformanceInfo, PPERFORMANCE_INFORMATION,
1879 DWORD);
1880 DECLARE_PROC_ADDR(BOOL, EnumPageFilesA, PENUM_PAGE_FILE_CALLBACKA, LPVOID);
1881
1882 memset((void *)info, 0, sizeof(struct sysinfo));
1883 memset((void *)&perf, 0, sizeof(PERFORMANCE_INFORMATION));
1884 memset((void *)&pfinfo, 0, sizeof(struct pagefile_info));
1885 info->mem_unit = 4096;
1886
1887 if (INIT_PROC_ADDR(psapi.dll, GetPerformanceInfo)) {
1888 perf.cb = sizeof(PERFORMANCE_INFORMATION);
1889 GetPerformanceInfo(&perf, perf.cb);
1890 }
1891
1892 if (INIT_PROC_ADDR(psapi.dll, EnumPageFilesA)) {
1893 EnumPageFilesA((PENUM_PAGE_FILE_CALLBACK)pagefile_cb, (LPVOID)&pfinfo);
1894 }
1895
1896 info->totalram = perf.PhysicalTotal * perf.PageSize / 4096;
1897 info->bufferram = perf.SystemCache * perf.PageSize / 4096;
1898 if (perf.PhysicalAvailable > perf.SystemCache)
1899 info->freeram = perf.PhysicalAvailable * perf.PageSize / 4096 -
1900 info->bufferram;
1901 info->totalswap = pfinfo.total * perf.PageSize / 4096;
1902 info->freeswap = (pfinfo.total - pfinfo.in_use) * perf.PageSize / 4096;
1903
1904 info->uptime = GetTickCount64() / 1000;
1905 info->procs = perf.ProcessCount;
1906
1907 return 0;
1908}
1909
1910#undef strftime
1911size_t mingw_strftime(char *buf, size_t max, const char *format, const struct tm *tm)
1912{
1913 size_t ret;
1914 char buffer[64];
1915 const char *replace;
1916 char *t;
1917 char *fmt;
1918 struct tm tm2;
1919
1920 /*
1921 * Emulate some formats that Windows' strftime lacks.
1922 * - '%e' day of the month with space padding
1923 * - '%s' number of seconds since the Unix epoch
1924 * - '%T' same as %H:%M:%S
1925 * - '%z' timezone offset
1926 * Also, permit the '-' modifier to omit padding. Windows uses '#'.
1927 */
1928 fmt = xstrdup(format);
1929 for ( t=fmt; *t; ++t ) {
1930 if ( *t == '%' ) {
1931 replace = NULL;
1932 if ( t[1] == 'e' ) {
1933 if ( tm->tm_mday >= 0 && tm->tm_mday <= 99 ) {
1934 sprintf(buffer, "%2d", tm->tm_mday);
1935 }
1936 else {
1937 strcpy(buffer, " ");
1938 }
1939 replace = buffer;
1940 }
1941 else if ( t[1] == 's' ) {
1942 tm2 = *tm;
1943 sprintf(buffer, "%"LL_FMT"d", (long long)mktime(&tm2));
1944 replace = buffer;
1945 }
1946 else if ( t[1] == 'T' ) {
1947 replace = "%H:%M:%S";
1948 }
1949 else if ( t[1] == 'z' ) {
1950 _tzset();
1951 if ( tm->tm_isdst >= 0 ) {
1952 int offset = (int)_timezone - (tm->tm_isdst > 0 ? 3600 : 0);
1953 int hr, min;
1954
1955 if ( offset > 0 ) {
1956 buffer[0] = '-';
1957 }
1958 else {
1959 buffer[0] = '+';
1960 offset = -offset;
1961 }
1962
1963 hr = offset / 3600;
1964 min = (offset % 3600) / 60;
1965 sprintf(buffer+1, "%04d", hr*100 + min);
1966 }
1967 else {
1968 buffer[0] = '\0';
1969 }
1970 replace = buffer;
1971 }
1972 else if ( t[1] == '-' && t[2] != '\0' &&
1973 strchr("dHIjmMSUwWyY", t[2]) ) {
1974 /* Microsoft uses '#' rather than '-' to remove padding */
1975 t[1] = '#';
1976 }
1977 else if ( t[1] != '\0' ) {
1978 ++t;
1979 }
1980
1981 if (replace) {
1982 int m;
1983 char *newfmt;
1984
1985 *t = '\0';
1986 m = t - fmt;
1987 newfmt = xasprintf("%s%s%s", fmt, replace, t+2);
1988 free(fmt);
1989 t = newfmt + m + strlen(replace) - 1;
1990 fmt = newfmt;
1991 }
1992 }
1993 }
1994
1995 ret = strftime(buf, max, fmt, tm);
1996 free(fmt);
1997
1998 return ret;
1999}
2000
2001#undef access
2002int mingw_access(const char *name, int mode)
2003{
2004 int ret;
2005 struct stat s;
2006
2007 /* Windows can only handle test for existence, read or write */
2008 if (mode == F_OK || (mode & ~X_OK)) {
2009 ret = _access(name, mode & ~X_OK);
2010 if (ret < 0 || !(mode & X_OK)) {
2011 return ret;
2012 }
2013 }
2014
2015 if (!mingw_stat(name, &s)) {
2016 if ((s.st_mode&S_IXUSR)) {
2017 return 0;
2018 }
2019 errno = EACCES;
2020 }
2021
2022 return -1;
2023}
2024
2025int mingw_rmdir(const char *path)
2026{
2027 /* On Linux rmdir(2) doesn't remove symlinks */
2028 if (is_symlink(path)) {
2029 errno = ENOTDIR;
2030 return -1;
2031 }
2032
2033 /* read-only directories cannot be removed */
2034 chmod(path, 0666);
2035 return rmdir(path);
2036}
2037
2038void mingw_sync(void)
2039{
2040 HANDLE h;
2041 FILE *mnt;
2042 struct mntent *entry;
2043 char name[] = "\\\\.\\C:";
2044
2045 mnt = setmntent(bb_path_mtab_file, "r");
2046 if (mnt) {
2047 while ((entry=getmntent(mnt)) != NULL) {
2048 name[4] = entry->mnt_dir[0];
2049 h = CreateFile(name, GENERIC_READ | GENERIC_WRITE,
2050 FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
2051 OPEN_EXISTING, 0, NULL);
2052 if (h != INVALID_HANDLE_VALUE) {
2053 FlushFileBuffers(h);
2054 CloseHandle(h);
2055 }
2056 }
2057 endmntent(mnt);
2058 }
2059}
2060
2061#define NUMEXT 5
2062static const char win_suffix[NUMEXT][4] = { "com", "exe", "sh", "bat", "cmd" };
2063
2064static int has_win_suffix(const char *name, int start)
2065{
2066 const char *dot = strrchr(bb_basename(name), '.');
2067 int i;
2068
2069 if (dot != NULL && strlen(dot) < 5) {
2070 for (i=start; i<NUMEXT; ++i) {
2071 if (!strcasecmp(dot+1, win_suffix[i])) {
2072 return 1;
2073 }
2074 }
2075 }
2076 return 0;
2077}
2078
2079int has_bat_suffix(const char *name)
2080{
2081 return has_win_suffix(name, 3);
2082}
2083
2084int has_exe_suffix(const char *name)
2085{
2086 return has_win_suffix(name, 0);
2087}
2088
2089int has_exe_suffix_or_dot(const char *name)
2090{
2091 return last_char_is(name, '.') || has_win_suffix(name, 0);
2092}
2093
2094/* Copy path to an allocated string long enough to allow a file extension
2095 * to be added. */
2096char *alloc_ext_space(const char *path)
2097{
2098 char *s = xmalloc(strlen(path) + 5);
2099 strcpy(s, path);
2100 return s;
2101}
2102
2103/* Check if path is an executable or can be made into one by adding
2104 * a suffix. The suffix is added to the end of the argument which
2105 * must be long enough to allow this.
2106 *
2107 * If the return value is TRUE the argument contains the new path,
2108 * if FALSE the argument is unchanged.
2109 */
2110int
2111add_win32_extension(char *p)
2112{
2113 if (file_is_executable(p))
2114 return TRUE;
2115
2116 if (!has_exe_suffix_or_dot(p)) {
2117 int i, len = strlen(p);
2118
2119 p[len] = '.';
2120 for (i = 0; i < NUMEXT; ++i) {
2121 strcpy(p + len + 1, win_suffix[i]);
2122 if (file_is_executable(p))
2123 return TRUE;
2124 }
2125 p[len] = '\0';
2126 }
2127 return FALSE;
2128}
2129
2130/*
2131 * Determine if a path represents a WIN32 executable, adding a suffix
2132 * if necessary. Returns an allocated string if it does, NULL if not.
2133 */
2134char *
2135file_is_win32_exe(const char *name)
2136{
2137 char *path = alloc_ext_space(name);
2138
2139 if (add_win32_extension(path))
2140 return path;
2141
2142 free(path);
2143 return NULL;
2144}
2145
2146char * FAST_FUNC bs_to_slash(char *str)
2147{
2148 char *p;
2149
2150 for (p=str; *p; ++p) {
2151 if ( *p == '\\' ) {
2152 *p = '/';
2153 }
2154 }
2155 return str;
2156}
2157
2158#if ENABLE_UNICODE_SUPPORT
2159MINGW_BB_WCHAR_T * FAST_FUNC bs_to_slash_u(MINGW_BB_WCHAR_T *str)
2160{
2161 MINGW_BB_WCHAR_T *p;
2162
2163 for (p=str; *p; ++p) {
2164 if ( *p == '\\' ) {
2165 *p = '/';
2166 }
2167 }
2168 return str;
2169}
2170#endif
2171
2172void FAST_FUNC slash_to_bs(char *p)
2173{
2174 for (; *p; ++p) {
2175 if ( *p == '/' ) {
2176 *p = '\\';
2177 }
2178 }
2179}
2180
2181/* Windows strips trailing dots and spaces from the last component of
2182 * a file path. This routine emulates that behaviour so we can preempt
2183 * Windows if necessary. */
2184void FAST_FUNC strip_dot_space(char *p)
2185{
2186 char *start = (char *)bb_basename(p);
2187 char *end = start + strlen(start);
2188
2189 while (end > start && (end[-1] == '.' || end[-1] == ' ')) {
2190 *--end = '\0';
2191 }
2192
2193 // Strip trailing slash, but not from a drive root (C:/)
2194 if (--end != start && (*end == '/' || *end == '\\') &&
2195 !(end == p + 2 && root_len(p) == 2))
2196 *end = '\0';
2197}
2198
2199size_t FAST_FUNC remove_cr(char *p, size_t len)
2200{
2201 ssize_t i, j;
2202
2203 for (i=j=0; i<len; ++i) {
2204 if (p[i] == '\r' && i < len - 1 && p[i+1] == '\n')
2205 continue;
2206 p[j++] = p[i];
2207 }
2208 return j;
2209}
2210
2211off_t mingw_lseek(int fd, off_t offset, int whence)
2212{
2213 DWORD ftype;
2214 HANDLE h = (HANDLE)_get_osfhandle(fd);
2215 if (h == INVALID_HANDLE_VALUE) {
2216 errno = EBADF;
2217 return -1;
2218 }
2219 ftype = GetFileType(h);
2220 if (ftype != FILE_TYPE_DISK && ftype != FILE_TYPE_CHAR) {
2221 errno = ESPIPE;
2222 return -1;
2223 }
2224 return _lseeki64(fd, offset, whence);
2225}
2226
2227#undef GetTickCount64
2228ULONGLONG CompatGetTickCount64(void)
2229{
2230 DECLARE_PROC_ADDR(ULONGLONG, GetTickCount64, void);
2231
2232 if (!INIT_PROC_ADDR(kernel32.dll, GetTickCount64)) {
2233 return (ULONGLONG)GetTickCount();
2234 }
2235
2236 return GetTickCount64();
2237}
2238
2239#if ENABLE_FEATURE_INSTALLER
2240/*
2241 * Enumerate the names of all hard links to a file. The first call
2242 * provides the file name as the first argument; subsequent calls must
2243 * set the first argument to NULL. Returns 0 on error or when there are
2244 * no more links.
2245 */
2246int enumerate_links(const char *file, char *name)
2247{
2248 static HANDLE h = INVALID_HANDLE_VALUE;
2249 char aname[PATH_MAX];
2250 wchar_t wname[PATH_MAX];
2251 DWORD length = PATH_MAX;
2252 DECLARE_PROC_ADDR(HANDLE, FindFirstFileNameW, LPCWSTR, DWORD, LPDWORD,
2253 PWSTR);
2254 DECLARE_PROC_ADDR(BOOL, FindNextFileNameW, HANDLE, LPDWORD, PWSTR);
2255
2256 if (!INIT_PROC_ADDR(kernel32.dll, FindFirstFileNameW) ||
2257 !INIT_PROC_ADDR(kernel32.dll, FindNextFileNameW))
2258 return 0;
2259
2260 if (file != NULL) {
2261 wchar_t wfile[PATH_MAX];
2262 MultiByteToWideChar(CP_ACP, 0, file, -1, wfile, PATH_MAX);
2263 h = FindFirstFileNameW(wfile, 0, &length, wname);
2264 if (h == INVALID_HANDLE_VALUE)
2265 return 0;
2266 }
2267 else if (!FindNextFileNameW(h, &length, wname)) {
2268 FindClose(h);
2269 h = INVALID_HANDLE_VALUE;
2270 return 0;
2271 }
2272 WideCharToMultiByte(CP_ACP, 0, wname, -1, aname, PATH_MAX, NULL, NULL);
2273 realpath(aname, name);
2274 return 1;
2275}
2276#endif
2277
2278/* Return the length of the root of a UNC path, i.e. the '//host/share'
2279 * component, or 0 if the path doesn't look like that. */
2280int FAST_FUNC unc_root_len(const char *dir)
2281{
2282 const char *s = dir + 2;
2283 int len;
2284
2285 if (!is_unc_path(dir))
2286 return 0;
2287 len = strcspn(s, "/\\");
2288 if (len == 0)
2289 return 0;
2290 s += len + 1;
2291 len = strcspn(s, "/\\");
2292 if (len == 0)
2293 return 0;
2294 s += len;
2295
2296 return s - dir;
2297}
2298
2299/* Return the length of the root of a path, i.e. either the drive or
2300 * UNC '//host/share', or 0 if the path doesn't look like that. */
2301int FAST_FUNC root_len(const char *path)
2302{
2303 if (path == NULL)
2304 return 0;
2305 if (isalpha(*path) && path[1] == ':')
2306 return 2;
2307 return unc_root_len(path);
2308}
2309
2310const char * FAST_FUNC get_system_drive(void)
2311{
2312 static const char *drive = NULL;
2313 char sysdir[PATH_MAX];
2314 int len;
2315
2316 if (drive == NULL) {
2317 UINT ret = GetSystemDirectory(sysdir, PATH_MAX);
2318 if ((ret != 0 && ret < PATH_MAX) && (len=root_len(sysdir)))
2319 drive = xstrndup(sysdir, len);
2320 else
2321 drive = "";
2322 }
2323
2324 return getenv(BB_SYSTEMROOT) ?: drive;
2325}
2326
2327int chdir_system_drive(void)
2328{
2329 const char *sd = get_system_drive();
2330 int ret = -1;
2331
2332 if (*sd)
2333 ret = chdir(auto_string(concat_path_file(sd, "")));
2334 return ret;
2335}
2336
2337/*
2338 * This function is used to make relative paths absolute before a call
2339 * to chdir_system_drive(). It's unlikely to be useful in other cases.
2340 *
2341 * If the argument is an absolute path return 'path', otherwise return
2342 * an allocated string containing the resolved path. Die on failure,
2343 * which is most likely because the file doesn't exist.
2344 */
2345char * FAST_FUNC xabsolute_path(char *path)
2346{
2347 char *rpath;
2348
2349 if (root_len(path) != 0)
2350 return path; // absolute path
2351 rpath = xmalloc_realpath(path);
2352 if (rpath)
2353 return rpath;
2354 bb_perror_msg_and_die("can't open '%s'", path);
2355}
2356
2357char * FAST_FUNC get_drive_cwd(const char *path, char *buffer, int size)
2358{
2359 char drive[3] = { *path, ':', '\0' };
2360 DWORD ret;
2361
2362 ret = GetFullPathName(drive, size, buffer, NULL);
2363 if (ret == 0 || ret > size)
2364 return NULL;
2365 return bs_to_slash(buffer);
2366}
2367
2368void FAST_FUNC fix_path_case(char *path)
2369{
2370 char resolved[PATH_MAX];
2371 int len;
2372
2373 // Canonicalise path: for physical drives this makes case match
2374 // what's stored on disk. For mapped drives, not so much.
2375 if (realpath(path, resolved) && strcasecmp(path, resolved) == 0)
2376 strcpy(path, resolved);
2377
2378 // make drive letter or UNC hostname uppercase
2379 len = root_len(path);
2380 if (len == 2) {
2381 *path = toupper(*path);
2382 }
2383 else if (len != 0) {
2384 for (path+=2; !is_dir_sep(*path); ++path) {
2385 *path = toupper(*path);
2386 }
2387 }
2388}
2389
2390void FAST_FUNC make_sparse(int fd, off_t start, off_t end)
2391{
2392 DWORD dwTemp;
2393 HANDLE fh;
2394 FILE_ZERO_DATA_INFORMATION fzdi;
2395
2396 if ((fh=(HANDLE)_get_osfhandle(fd)) == INVALID_HANDLE_VALUE)
2397 return;
2398
2399 DeviceIoControl(fh, FSCTL_SET_SPARSE, NULL, 0, NULL, 0, &dwTemp, NULL);
2400
2401 fzdi.FileOffset.QuadPart = start;
2402 fzdi.BeyondFinalZero.QuadPart = end;
2403 DeviceIoControl(fh, FSCTL_SET_ZERO_DATA, &fzdi, sizeof(fzdi),
2404 NULL, 0, &dwTemp, NULL);
2405}
2406
2407void *get_proc_addr(const char *dll, const char *function,
2408 struct proc_addr *proc)
2409{
2410 /* only do this once */
2411 if (!proc->initialized) {
2412 HANDLE hnd = LoadLibraryExA(dll, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
2413
2414 /* The documentation for LoadLibraryEx says the above may fail
2415 * on Windows 7. If it does, retry using LoadLibrary with an
2416 * explicit, backslash-separated path. */
2417 if (!hnd) {
2418 char buf[PATH_MAX];
2419 UINT ret = GetSystemDirectory(buf, PATH_MAX);
2420 if (ret != 0 && ret < PATH_MAX) {
2421 char *path = concat_path_file(buf, dll);
2422 slash_to_bs(path);
2423 hnd = LoadLibrary(path);
2424 free(path);
2425 }
2426 }
2427
2428 if (hnd)
2429 proc->pfunction = GetProcAddress(hnd, function);
2430 proc->initialized = 1;
2431 }
2432 return proc->pfunction;
2433}
2434
2435int FAST_FUNC unix_path(const char *path)
2436{
2437 int i;
2438 char *p = xstrdup(path);
2439
2440#define UNIX_PATHS "/bin\0/usr/bin\0/sbin\0/usr/sbin\0"
2441 i = index_in_strings(UNIX_PATHS, dirname(p));
2442 free(p);
2443 return i >= 0;
2444}
2445
2446/* Return true if file is referenced using a path. This means a path
2447 * look-up isn't required. */
2448int FAST_FUNC has_path(const char *file)
2449{
2450 return strchr(file, '/') || strchr(file, '\\') ||
2451 has_dos_drive_prefix(file);
2452}
2453
2454/*
2455 * Test whether a path is relative to a known location (usually the
2456 * current working directory or a symlink). On Unix this is a path
2457 * that doesn't start with a slash but on Windows it also includes
2458 * paths that don't start with a backslash or a drive letter.
2459 *
2460 * Paths of the form /dir/file or c:dir/file aren't relative by this
2461 * definition.
2462 */
2463int FAST_FUNC is_relative_path(const char *path)
2464{
2465 return !is_dir_sep(path[0]) && !has_dos_drive_prefix(path);
2466}
2467
2468#if ENABLE_FEATURE_SH_STANDALONE
2469/*
2470 * In standalone shell mode it's possible there's no binary file
2471 * corresponding to an applet name. There's one case where it's
2472 * easy to determine the corresponding binary: if the applet name
2473 * matches the file name from bb_busybox_exec_path (with appropriate
2474 * allowance for 'busybox*.exe').
2475 */
2476const char * FAST_FUNC applet_to_exe(const char *name)
2477{
2478 const char *exefile = bb_basename(bb_busybox_exec_path);
2479 const char *exesuff = is_prefixed_with_case(exefile, name);
2480
2481 if (exesuff && (strcmp(name, "busybox") == 0 ||
2482 strcasecmp(exesuff, ".exe") == 0)) {
2483 return bb_busybox_exec_path;
2484 }
2485 return name;
2486}
2487#endif
2488
2489/*
2490 * Append a word to a space-separated string of words. The first
2491 * call should use a NULL pointer for str, subsequent calls should
2492 * pass an allocated string which will be freed.
2493 */
2494char * FAST_FUNC xappendword(const char *str, const char *word)
2495{
2496 char *newstr = str ? xasprintf("%s %s", str, word) : xstrdup(word);
2497 free((void *)str);
2498 return newstr;
2499}
2500
2501/*
2502 * Detect if the environment contains certain mixed-case names:
2503 *
2504 * Path is present in a standard Windows environment
2505 * ComSpec is present in WINE
2506 * ProgramData is present in Cygwin/MSYS2
2507 */
2508int
2509windows_env(void)
2510{
2511 const char *names = "PATH=\0""COMSPEC=\0""PROGRAMDATA=\0";
2512 const char *n;
2513
2514 for (char **envp = environ; envp && *envp; envp++) {
2515 for (n = names; *n; ) {
2516 if (is_prefixed_with_case(*envp, n) &&
2517 !is_prefixed_with(*envp, n)) {
2518 return TRUE;
2519 }
2520 while (*n++)
2521 ;
2522 }
2523 }
2524 return FALSE;
2525}
2526
2527void FAST_FUNC
2528change_critical_error_dialogs(const char *newval)
2529{
2530 SetErrorMode(newval && newval[0] == '1' && newval[1] == '\0' ?
2531 0 : SEM_FAILCRITICALERRORS);
2532}
2533
2534char * FAST_FUNC exe_relative_path(const char *tail)
2535{
2536 char *exepath = xstrdup(bb_busybox_exec_path);
2537 char *relpath = concat_path_file(dirname(exepath), tail);
2538 free(exepath);
2539 return relpath;
2540}
diff --git a/win32/mntent.c b/win32/mntent.c
new file mode 100644
index 000000000..7f142b485
--- /dev/null
+++ b/win32/mntent.c
@@ -0,0 +1,94 @@
1/*
2 * A simple WIN32 implementation of mntent routines. It only handles
3 * logical drives.
4 */
5#define MNTENT_PRIVATE
6#include "libbb.h"
7
8struct mntstate {
9 DWORD drives;
10 int index;
11};
12
13int fill_mntdata(struct mntdata *data, int index)
14{
15 UINT drive_type;
16 char buf[PATH_MAX];
17
18 // initialise pointers and scalar data
19 data->me.mnt_fsname = data->mnt_fsname;
20 data->me.mnt_dir = data->mnt_dir;
21 data->me.mnt_type = data->mnt_type;
22 data->me.mnt_opts = data->mnt_opts;
23 data->me.mnt_freq = 0;
24 data->me.mnt_passno = 0;
25
26 // initialise strings
27 data->mnt_fsname[0] = 'A' + index;
28 data->mnt_fsname[1] = ':';
29 data->mnt_fsname[2] = '\0';
30 data->mnt_dir[0] = 'A' + index;
31 data->mnt_dir[1] = ':';
32 data->mnt_dir[2] = '/';
33 data->mnt_dir[3] = '\0';
34 data->mnt_type[0] = '\0';
35 data->mnt_opts[0] = '\0';
36
37 drive_type = GetDriveType(data->mnt_dir);
38 if (drive_type == DRIVE_FIXED || drive_type == DRIVE_CDROM ||
39 drive_type == DRIVE_REMOVABLE || drive_type == DRIVE_REMOTE) {
40 if (!GetVolumeInformation(data->mnt_dir, NULL, 0, NULL, NULL,
41 NULL, data->mnt_type, 100)) {
42 return FALSE;
43 }
44
45 if (realpath(data->mnt_dir, buf) != NULL) {
46 if (isalpha(buf[0]) && strcmp(buf+1, ":/") == 0)
47 buf[2] = '\0';
48 strcpy(data->mnt_fsname, buf);
49 }
50 return TRUE;
51 }
52 return FALSE;
53}
54
55FILE *mingw_setmntent(void)
56{
57 struct mntstate *state;
58
59 if ( (state=malloc(sizeof(struct mntstate))) == NULL ) {
60 return NULL;
61 }
62
63 state->drives = GetLogicalDrives();
64 state->index = -1;
65
66 return (FILE *)state;
67}
68
69struct mntent *getmntent(FILE *stream)
70{
71 struct mntstate *state = (struct mntstate *)stream;
72 static struct mntdata *data = NULL;
73 struct mntent *entry = NULL;
74
75 while (++state->index < 26) {
76 if ((state->drives & 1 << state->index) != 0) {
77 if (data == NULL)
78 data = xmalloc(sizeof(*data));
79
80 if (fill_mntdata(data, state->index)) {
81 entry = &data->me;
82 break;
83 }
84 }
85 }
86
87 return entry;
88}
89
90int endmntent(FILE *stream)
91{
92 free(stream);
93 return 0;
94}
diff --git a/win32/mntent.h b/win32/mntent.h
new file mode 100644
index 000000000..029f18b96
--- /dev/null
+++ b/win32/mntent.h
@@ -0,0 +1,33 @@
1#ifndef MNTENT_H
2#define MNTENT_H
3
4#include <stdio.h>
5
6struct mntent {
7 char *mnt_fsname; /* Device or server for filesystem. */
8 char *mnt_dir; /* Directory mounted on. */
9 char *mnt_type; /* Type of filesystem: ufs, nfs, etc. */
10 char *mnt_opts; /* Comma-separated options for fs. */
11 int mnt_freq; /* Dump frequency (in days). */
12 int mnt_passno; /* Pass number for `fsck'. */
13};
14
15extern FILE *mingw_setmntent(void);
16extern struct mntent *getmntent(FILE *stream);
17extern int endmntent(FILE *stream);
18
19# if defined(MNTENT_PRIVATE)
20struct mntdata {
21 struct mntent me;
22 char mnt_fsname[PATH_MAX];
23 char mnt_dir[4];
24 char mnt_type[100];
25 char mnt_opts[4];
26};
27
28extern int fill_mntdata(struct mntdata *data, int index);
29# endif
30
31#define setmntent(f, m) mingw_setmntent()
32
33#endif
diff --git a/win32/net.c b/win32/net.c
new file mode 100644
index 000000000..33dc837fa
--- /dev/null
+++ b/win32/net.c
@@ -0,0 +1,146 @@
1#include "libbb.h"
2
3int inet_aton(const char *cp, struct in_addr *inp)
4{
5 unsigned long val = inet_addr(cp);
6
7 if (val == INADDR_NONE)
8 return 0;
9 inp->S_un.S_addr = val;
10 return 1;
11}
12
13void init_winsock(void)
14{
15 WSADATA wsa;
16 static int initialized = 0;
17
18 if (initialized)
19 return;
20
21 if (WSAStartup(MAKEWORD(2,2), &wsa))
22 bb_error_msg_and_die("WSAStartup failed, error %d", WSAGetLastError());
23
24 atexit((void(*)(void)) WSACleanup);
25 initialized = 1;
26}
27
28#undef gethostname
29int mingw_gethostname(char *name, int namelen)
30{
31 init_winsock();
32 return gethostname(name, namelen);
33}
34
35#undef gethostbyaddr
36struct hostent *mingw_gethostbyaddr(const void *addr, socklen_t len, int type)
37{
38 init_winsock();
39 return gethostbyaddr(addr, len, type);
40}
41
42#undef getaddrinfo
43int mingw_getaddrinfo(const char *node, const char *service,
44 const struct addrinfo *hints, struct addrinfo **res)
45{
46 init_winsock();
47 return getaddrinfo(node, service, hints, res);
48}
49
50int mingw_socket(int domain, int type, int protocol)
51{
52 int sockfd;
53 SOCKET s;
54
55 init_winsock();
56 s = WSASocket(domain, type, protocol, NULL, 0, 0);
57 if (s == INVALID_SOCKET) {
58 /*
59 * WSAGetLastError() values are regular BSD error codes
60 * biased by WSABASEERR.
61 * However, strerror() does not know about networking
62 * specific errors, which are values beginning at 38 or so.
63 * Therefore, we choose to leave the biased error code
64 * in errno so that _if_ someone looks up the code somewhere,
65 * then it is at least the number that are usually listed.
66 */
67 errno = WSAGetLastError();
68 return -1;
69 }
70 /* convert into a file descriptor */
71 if ((sockfd = _open_osfhandle((intptr_t)s, O_RDWR|O_BINARY)) < 0) {
72 closesocket(s);
73 bb_error_msg("unable to make a socket file descriptor: %s",
74 strerror(errno));
75 return -1;
76 }
77 return sockfd;
78}
79
80#undef connect
81int mingw_connect(int sockfd, const struct sockaddr *sa, size_t sz)
82{
83 SOCKET s = (SOCKET)_get_osfhandle(sockfd);
84 return connect(s, sa, sz);
85}
86
87#undef bind
88int mingw_bind(int sockfd, struct sockaddr *sa, size_t sz)
89{
90 SOCKET s = (SOCKET)_get_osfhandle(sockfd);
91 return bind(s, sa, sz);
92}
93
94#undef setsockopt
95int mingw_setsockopt(int sockfd, int lvl, int optname, void *optval, int optlen)
96{
97 SOCKET s = (SOCKET)_get_osfhandle(sockfd);
98 return setsockopt(s, lvl, optname, (const char*)optval, optlen);
99}
100
101#undef shutdown
102int mingw_shutdown(int sockfd, int how)
103{
104 SOCKET s = (SOCKET)_get_osfhandle(sockfd);
105 return shutdown(s, how);
106}
107
108#undef listen
109int mingw_listen(int sockfd, int backlog)
110{
111 SOCKET s = (SOCKET)_get_osfhandle(sockfd);
112 return listen(s, backlog);
113}
114
115#undef accept
116int mingw_accept(int sockfd1, struct sockaddr *sa, socklen_t *sz)
117{
118 int sockfd2;
119
120 SOCKET s1 = (SOCKET)_get_osfhandle(sockfd1);
121 SOCKET s2 = accept(s1, sa, sz);
122
123 /* convert into a file descriptor */
124 if ((sockfd2 = _open_osfhandle((intptr_t)s2, O_RDWR|O_BINARY)) < 0) {
125 int err = errno;
126 closesocket(s2);
127 bb_error_msg("unable to make a socket file descriptor: %s",
128 strerror(err));
129 return -1;
130 }
131 return sockfd2;
132}
133
134#undef getpeername
135int mingw_getpeername(int fd, struct sockaddr *sa, socklen_t *sz)
136{
137 SOCKET sock;
138
139 init_winsock();
140 sock = (SOCKET)_get_osfhandle(fd);
141 if (sock == INVALID_SOCKET) {
142 errno = EBADF;
143 return -1;
144 }
145 return getpeername(sock, sa, sz);
146}
diff --git a/win32/net/if.h b/win32/net/if.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/net/if.h
diff --git a/win32/netdb.h b/win32/netdb.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/netdb.h
diff --git a/win32/netinet/in.h b/win32/netinet/in.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/netinet/in.h
diff --git a/win32/paths.h b/win32/paths.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/paths.h
diff --git a/win32/poll.c b/win32/poll.c
new file mode 100644
index 000000000..8ab6bbf29
--- /dev/null
+++ b/win32/poll.c
@@ -0,0 +1,656 @@
1/* Emulation for poll(2)
2 Contributed by Paolo Bonzini.
3
4 Copyright 2001-2003, 2006-2024 Free Software Foundation, Inc.
5
6 This file is part of gnulib.
7
8 This file is free software: you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as
10 published by the Free Software Foundation; either version 2.1 of the
11 License, or (at your option) any later version.
12
13 This file is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with this program. If not, see <https://www.gnu.org/licenses/>. */
20
21/* Tell gcc not to warn about the (nfd < 0) tests, below. */
22#if (__GNUC__ == 4 && 3 <= __GNUC_MINOR__) || 4 < __GNUC__
23# pragma GCC diagnostic ignored "-Wtype-limits"
24#endif
25
26#include "libbb.h"
27#include <malloc.h>
28
29#include <sys/types.h>
30
31/* Specification. */
32#include <poll.h>
33
34#include <errno.h>
35#include <limits.h>
36#include <assert.h>
37
38#if defined _WIN32 && ! defined __CYGWIN__
39# define WINDOWS_NATIVE
40# include <winsock2.h>
41# include <windows.h>
42# include <io.h>
43# include <stdio.h>
44# include <conio.h>
45#else
46# include <sys/time.h>
47# include <unistd.h>
48#endif
49
50#include <sys/select.h>
51#include <sys/socket.h>
52
53#ifdef HAVE_SYS_IOCTL_H
54# include <sys/ioctl.h>
55#endif
56#ifdef HAVE_SYS_FILIO_H
57# include <sys/filio.h>
58#endif
59
60#include <time.h>
61
62#ifndef INFTIM
63# define INFTIM (-1)
64#endif
65
66/* BeOS does not have MSG_PEEK. */
67#ifndef MSG_PEEK
68# define MSG_PEEK 0
69#endif
70
71#ifdef WINDOWS_NATIVE
72
73/* Don't assume that UNICODE is not defined. */
74# undef GetModuleHandle
75# define GetModuleHandle GetModuleHandleA
76# undef PeekConsoleInput
77# define PeekConsoleInput PeekConsoleInputA
78# undef CreateEvent
79# define CreateEvent CreateEventA
80# undef PeekMessage
81# define PeekMessage PeekMessageA
82# undef DispatchMessage
83# define DispatchMessage DispatchMessageA
84
85/* Do *not* use the function WSAPoll
86 <https://docs.microsoft.com/en-us/windows/desktop/api/winsock2/nf-winsock2-wsapoll>
87 because there is a bug named “Windows 8 Bugs 309411 - WSAPoll does not
88 report failed connections” that Microsoft won't fix.
89 See Daniel Stenberg: "WASPoll is broken"
90 <https://daniel.haxx.se/blog/2012/10/10/wsapoll-is-broken/>. */
91
92/* Here we need the recv() function from Windows, that takes a SOCKET as
93 first argument, not any possible gnulib override. */
94# undef recv
95
96/* Here we need the select() function from Windows, because we pass bit masks
97 of SOCKETs, not bit masks of FDs. */
98# undef select
99
100/* Here we need timeval from Windows since this is what the select() function
101 from Windows requires. */
102# undef timeval
103
104/* Avoid warnings from gcc -Wcast-function-type. */
105# define GetProcAddress \
106 (void *) GetProcAddress
107
108static BOOL IsConsoleHandle (HANDLE h)
109{
110 DWORD mode;
111 return GetConsoleMode (h, &mode) != 0;
112}
113
114static BOOL
115IsSocketHandle (HANDLE h)
116{
117 WSANETWORKEVENTS ev;
118
119 if (IsConsoleHandle (h))
120 return FALSE;
121
122 /* Under Wine, it seems that getsockopt returns 0 for pipes too.
123 WSAEnumNetworkEvents instead distinguishes the two correctly. */
124 ev.lNetworkEvents = 0xDEADBEEF;
125 WSAEnumNetworkEvents ((SOCKET) h, NULL, &ev);
126 return ev.lNetworkEvents != 0xDEADBEEF;
127}
128
129/* Declare data structures for ntdll functions. */
130typedef struct _FILE_PIPE_LOCAL_INFORMATION {
131 ULONG NamedPipeType;
132 ULONG NamedPipeConfiguration;
133 ULONG MaximumInstances;
134 ULONG CurrentInstances;
135 ULONG InboundQuota;
136 ULONG ReadDataAvailable;
137 ULONG OutboundQuota;
138 ULONG WriteQuotaAvailable;
139 ULONG NamedPipeState;
140 ULONG NamedPipeEnd;
141} FILE_PIPE_LOCAL_INFORMATION, *PFILE_PIPE_LOCAL_INFORMATION;
142
143typedef struct _IO_STATUS_BLOCK
144{
145 union {
146 DWORD Status;
147 PVOID Pointer;
148 } u;
149 ULONG_PTR Information;
150} IO_STATUS_BLOCK, *PIO_STATUS_BLOCK;
151
152typedef enum _FILE_INFORMATION_CLASS {
153 FilePipeLocalInformation = 24
154} FILE_INFORMATION_CLASS, *PFILE_INFORMATION_CLASS;
155
156typedef DWORD (WINAPI *PNtQueryInformationFile)
157 (HANDLE, IO_STATUS_BLOCK *, VOID *, ULONG, FILE_INFORMATION_CLASS);
158
159# ifndef PIPE_BUF
160# define PIPE_BUF 512
161# endif
162
163/* Compute revents values for file handle H. If some events cannot happen
164 for the handle, eliminate them from *P_SOUGHT. */
165
166static int
167windows_compute_revents (HANDLE h, int *p_sought)
168{
169 int i, ret, happened;
170 INPUT_RECORD *irbuffer;
171 DWORD avail, nbuffer;
172 BOOL bRet;
173#if 0
174 IO_STATUS_BLOCK iosb;
175 FILE_PIPE_LOCAL_INFORMATION fpli;
176 static PNtQueryInformationFile NtQueryInformationFile;
177 static BOOL once_only;
178#endif
179
180 switch (GetFileType (h))
181 {
182 case FILE_TYPE_PIPE:
183#if 0
184 if (!once_only)
185 {
186 NtQueryInformationFile = (PNtQueryInformationFile)
187 GetProcAddress (GetModuleHandle ("ntdll.dll"),
188 "NtQueryInformationFile");
189 once_only = TRUE;
190 }
191#endif
192
193 happened = 0;
194 if (PeekNamedPipe (h, NULL, 0, NULL, &avail, NULL) != 0)
195 {
196 if (avail)
197 happened |= *p_sought & (POLLIN | POLLRDNORM);
198 }
199 else if (GetLastError () == ERROR_BROKEN_PIPE)
200 happened |= POLLHUP;
201
202 else
203 {
204 /* The writability of a pipe can't be detected reliably on Windows.
205 * Just say it's OK.
206 *
207 * Details:
208 *
209 * https://github.com/git-for-windows/git/commit/94f4d01932279c419844aa708bec31a26056bc6b
210 */
211#if 0
212 /* It was the write-end of the pipe. Check if it is writable.
213 If NtQueryInformationFile fails, optimistically assume the pipe is
214 writable. This could happen on Windows 9x, where
215 NtQueryInformationFile is not available, or if we inherit a pipe
216 that doesn't permit FILE_READ_ATTRIBUTES access on the write end
217 (I think this should not happen since Windows XP SP2; WINE seems
218 fine too). Otherwise, ensure that enough space is available for
219 atomic writes. */
220 memset (&iosb, 0, sizeof (iosb));
221 memset (&fpli, 0, sizeof (fpli));
222
223 if (!NtQueryInformationFile
224 || NtQueryInformationFile (h, &iosb, &fpli, sizeof (fpli),
225 FilePipeLocalInformation)
226 || fpli.WriteQuotaAvailable >= PIPE_BUF
227 || (fpli.OutboundQuota < PIPE_BUF &&
228 fpli.WriteQuotaAvailable == fpli.OutboundQuota))
229#endif
230 happened |= *p_sought & (POLLOUT | POLLWRNORM | POLLWRBAND);
231 }
232 return happened;
233
234 case FILE_TYPE_CHAR:
235 // Fall through to default case for non-console, e.g. /dev/null.
236 if (IsConsoleHandle (h)) {
237 nbuffer = avail = 0;
238 bRet = GetNumberOfConsoleInputEvents (h, &nbuffer);
239 if (bRet)
240 {
241 /* Input buffer. */
242 *p_sought &= POLLIN | POLLRDNORM;
243 if (nbuffer == 0)
244 // Having no unread events isn't an error condition.
245 return 0 /* was POLLHUP */;
246 if (!*p_sought)
247 return 0;
248
249 irbuffer = (INPUT_RECORD *) alloca (nbuffer * sizeof (INPUT_RECORD));
250 bRet = PeekConsoleInputW (h, irbuffer, nbuffer, &avail);
251 if (!bRet || avail == 0)
252 return POLLHUP;
253
254 for (i = 0; i < avail; i++)
255 // Ignore key release.
256 if (irbuffer[i].EventType == KEY_EVENT &&
257 irbuffer[i].Event.KeyEvent.bKeyDown)
258 return *p_sought;
259 return 0;
260 }
261 else
262 {
263 /* Screen buffer. */
264 *p_sought &= POLLOUT | POLLWRNORM | POLLWRBAND;
265 return *p_sought;
266 }
267 }
268 /* fall through */
269
270 default:
271 ret = WaitForSingleObject (h, 0);
272 if (ret == WAIT_OBJECT_0)
273 return *p_sought & ~(POLLPRI | POLLRDBAND);
274
275 // Add (POLLIN | POLLRDNORM). Why only support write?
276 return *p_sought & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM | POLLWRBAND);
277 }
278}
279
280/* Convert fd_sets returned by select into revents values. */
281
282static int
283windows_compute_revents_socket (SOCKET h, int sought, long lNetworkEvents)
284{
285 int happened = 0;
286
287 if ((lNetworkEvents & (FD_READ | FD_ACCEPT | FD_CLOSE)) == FD_ACCEPT)
288 happened |= (POLLIN | POLLRDNORM) & sought;
289
290 else if (lNetworkEvents & (FD_READ | FD_ACCEPT | FD_CLOSE))
291 {
292 int r, error;
293
294 char data[64];
295 WSASetLastError (0);
296 r = recv (h, data, sizeof (data), MSG_PEEK);
297 error = WSAGetLastError ();
298 WSASetLastError (0);
299
300 if (r > 0 || error == WSAENOTCONN)
301 happened |= (POLLIN | POLLRDNORM) & sought;
302
303 /* Distinguish hung-up sockets from other errors. */
304 else if (r == 0 || error == WSAESHUTDOWN || error == WSAECONNRESET
305 || error == WSAECONNABORTED || error == WSAENETRESET)
306 happened |= POLLHUP;
307
308 else
309 happened |= POLLERR;
310 }
311
312 if (lNetworkEvents & (FD_WRITE | FD_CONNECT))
313 happened |= (POLLOUT | POLLWRNORM | POLLWRBAND) & sought;
314
315 if (lNetworkEvents & FD_OOB)
316 happened |= (POLLPRI | POLLRDBAND) & sought;
317
318 return happened;
319}
320
321#else /* !MinGW */
322
323/* Convert select(2) returned fd_sets into poll(2) revents values. */
324static int
325compute_revents (int fd, int sought, fd_set *rfds, fd_set *wfds, fd_set *efds)
326{
327 int happened = 0;
328 if (FD_ISSET (fd, rfds))
329 {
330 int r;
331 int socket_errno;
332
333# if defined __MACH__ && defined __APPLE__
334 /* There is a bug in Mac OS X that causes it to ignore MSG_PEEK
335 for some kinds of descriptors. Detect if this descriptor is a
336 connected socket, a server socket, or something else using a
337 0-byte recv, and use ioctl(2) to detect POLLHUP. */
338 r = recv (fd, NULL, 0, MSG_PEEK);
339 socket_errno = (r < 0) ? errno : 0;
340 if (r == 0 || socket_errno == ENOTSOCK)
341 ioctl (fd, FIONREAD, &r);
342# else
343 char data[64];
344 r = recv (fd, data, sizeof (data), MSG_PEEK);
345 socket_errno = (r < 0) ? errno : 0;
346# endif
347 if (r == 0)
348 happened |= POLLHUP;
349
350 /* If the event happened on an unconnected server socket,
351 that's fine. */
352 else if (r > 0 || ( /* (r == -1) && */ socket_errno == ENOTCONN))
353 happened |= (POLLIN | POLLRDNORM) & sought;
354
355 /* Distinguish hung-up sockets from other errors. */
356 else if (socket_errno == ESHUTDOWN || socket_errno == ECONNRESET
357 || socket_errno == ECONNABORTED || socket_errno == ENETRESET)
358 happened |= POLLHUP;
359
360 /* some systems can't use recv() on non-socket, including HP NonStop */
361 else if (socket_errno == ENOTSOCK)
362 happened |= (POLLIN | POLLRDNORM) & sought;
363
364 else
365 happened |= POLLERR;
366 }
367
368 if (FD_ISSET (fd, wfds))
369 happened |= (POLLOUT | POLLWRNORM | POLLWRBAND) & sought;
370
371 if (FD_ISSET (fd, efds))
372 happened |= (POLLPRI | POLLRDBAND) & sought;
373
374 return happened;
375}
376#endif /* !MinGW */
377
378int
379poll (struct pollfd *pfd, nfds_t nfd, int timeout)
380{
381#ifndef WINDOWS_NATIVE
382 fd_set rfds, wfds, efds;
383 struct timeval tv;
384 struct timeval *ptv;
385 int maxfd, rc;
386 nfds_t i;
387
388 if (nfd > INT_MAX)
389 {
390 errno = EINVAL;
391 return -1;
392 }
393 /* Don't check directly for NFD greater than OPEN_MAX. Any practical use
394 of a too-large NFD is caught by one of the other checks below, and
395 checking directly for getdtablesize is too much of a portability
396 and/or performance and/or correctness hassle. */
397
398 /* EFAULT is not necessary to implement, but let's do it in the
399 simplest case. */
400 if (!pfd && nfd)
401 {
402 errno = EFAULT;
403 return -1;
404 }
405
406 /* convert timeout number into a timeval structure */
407 if (timeout == 0)
408 {
409 ptv = &tv;
410 tv = (struct timeval) {0};
411 }
412 else if (timeout > 0)
413 {
414 ptv = &tv;
415 tv = (struct timeval) {
416 .tv_sec = timeout / 1000,
417 .tv_usec = (timeout % 1000) * 1000
418 };
419 }
420 else if (timeout == INFTIM)
421 /* wait forever */
422 ptv = NULL;
423 else
424 {
425 errno = EINVAL;
426 return -1;
427 }
428
429 /* create fd sets and determine max fd */
430 maxfd = -1;
431 FD_ZERO (&rfds);
432 FD_ZERO (&wfds);
433 FD_ZERO (&efds);
434 for (i = 0; i < nfd; i++)
435 {
436 if (pfd[i].fd < 0)
437 continue;
438 if (maxfd < pfd[i].fd)
439 {
440 maxfd = pfd[i].fd;
441 if (FD_SETSIZE <= maxfd)
442 {
443 errno = EINVAL;
444 return -1;
445 }
446 }
447 if (pfd[i].events & (POLLIN | POLLRDNORM))
448 FD_SET (pfd[i].fd, &rfds);
449 /* see select(2): "the only exceptional condition detectable
450 is out-of-band data received on a socket", hence we push
451 POLLWRBAND events onto wfds instead of efds. */
452 if (pfd[i].events & (POLLOUT | POLLWRNORM | POLLWRBAND))
453 FD_SET (pfd[i].fd, &wfds);
454 if (pfd[i].events & (POLLPRI | POLLRDBAND))
455 FD_SET (pfd[i].fd, &efds);
456 }
457
458 /* examine fd sets */
459 rc = select (maxfd + 1, &rfds, &wfds, &efds, ptv);
460 if (rc < 0)
461 return rc;
462
463 /* establish results */
464 rc = 0;
465 for (i = 0; i < nfd; i++)
466 {
467 pfd[i].revents = (pfd[i].fd < 0
468 ? 0
469 : compute_revents (pfd[i].fd, pfd[i].events,
470 &rfds, &wfds, &efds));
471 rc += pfd[i].revents != 0;
472 }
473
474 return rc;
475#else
476 static struct timeval tv0;
477 static HANDLE hEvent;
478 WSANETWORKEVENTS ev;
479 HANDLE h, handle_array[FD_SETSIZE + 2];
480 DWORD ret, wait_timeout, nhandles;
481 fd_set rfds, wfds, xfds;
482 BOOL poll_again;
483 MSG msg;
484 int rc = 0;
485 nfds_t i;
486 DWORD real_timeout = 0;
487 int save_timeout = timeout;
488 clock_t tend = clock () + timeout;
489
490 if (nfd > INT_MAX || timeout < -1)
491 {
492 errno = EINVAL;
493 return -1;
494 }
495
496 if (!hEvent)
497 hEvent = CreateEvent (NULL, FALSE, FALSE, NULL);
498
499restart:
500 /* How much is left to wait? */
501 timeout = save_timeout;
502 if (timeout != INFTIM)
503 {
504 clock_t now = clock ();
505 real_timeout = tend > now ? tend - now : 0;
506 }
507
508 handle_array[0] = hEvent;
509 nhandles = 1;
510 FD_ZERO (&rfds);
511 FD_ZERO (&wfds);
512 FD_ZERO (&xfds);
513
514 /* Classify socket handles and create fd sets. */
515 for (i = 0; i < nfd; i++)
516 {
517 int sought = pfd[i].events;
518 pfd[i].revents = 0;
519 if (pfd[i].fd < 0)
520 continue;
521 if (!(sought & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM | POLLWRBAND
522 | POLLPRI | POLLRDBAND)))
523 continue;
524
525 h = (HANDLE) _get_osfhandle (pfd[i].fd);
526 assert (h != NULL);
527 if (IsSocketHandle (h))
528 {
529 int requested = FD_CLOSE;
530
531 /* see above; socket handles are mapped onto select. */
532 if (sought & (POLLIN | POLLRDNORM))
533 {
534 requested |= FD_READ | FD_ACCEPT;
535 FD_SET ((SOCKET) h, &rfds);
536 }
537 if (sought & (POLLOUT | POLLWRNORM | POLLWRBAND))
538 {
539 requested |= FD_WRITE | FD_CONNECT;
540 FD_SET ((SOCKET) h, &wfds);
541 }
542 if (sought & (POLLPRI | POLLRDBAND))
543 {
544 requested |= FD_OOB;
545 FD_SET ((SOCKET) h, &xfds);
546 }
547
548 if (requested)
549 WSAEventSelect ((SOCKET) h, hEvent, requested);
550 }
551 else
552 {
553 /* Poll now. If we get an event, do not poll again. Also,
554 screen buffer handles are waitable, and they'll block until
555 a character is available. windows_compute_revents eliminates
556 bits for the "wrong" direction. */
557 pfd[i].revents = windows_compute_revents (h, &sought);
558 if (sought)
559 handle_array[nhandles++] = h;
560 if (pfd[i].revents)
561 timeout = 0;
562 }
563 }
564
565 if (select (0, &rfds, &wfds, &xfds, &tv0) > 0)
566 {
567 /* Do MsgWaitForMultipleObjects anyway to dispatch messages, but
568 no need to call select again. */
569 poll_again = FALSE;
570 wait_timeout = 0;
571 }
572 else
573 {
574 poll_again = TRUE;
575 if (timeout == INFTIM)
576 wait_timeout = INFINITE;
577 else
578 wait_timeout = timeout;
579 }
580
581 for (;;)
582 {
583 ret = MsgWaitForMultipleObjects (nhandles, handle_array, FALSE,
584 wait_timeout, QS_ALLINPUT);
585
586 if (ret == WAIT_OBJECT_0 + nhandles)
587 {
588 /* new input of some other kind */
589 BOOL bRet;
590 while ((bRet = PeekMessage (&msg, NULL, 0, 0, PM_REMOVE)) != 0)
591 {
592 TranslateMessage (&msg);
593 DispatchMessage (&msg);
594 }
595 }
596 else
597 break;
598 }
599
600 if (poll_again)
601 select (0, &rfds, &wfds, &xfds, &tv0);
602
603 /* Place a sentinel at the end of the array. */
604 handle_array[nhandles] = NULL;
605 nhandles = 1;
606 for (i = 0; i < nfd; i++)
607 {
608 int happened;
609
610 if (pfd[i].fd < 0)
611 continue;
612 if (!(pfd[i].events & (POLLIN | POLLRDNORM |
613 POLLOUT | POLLWRNORM | POLLWRBAND)))
614 continue;
615
616 h = (HANDLE) _get_osfhandle (pfd[i].fd);
617 if (h != handle_array[nhandles])
618 {
619 /* It's a socket. */
620 WSAEnumNetworkEvents ((SOCKET) h, NULL, &ev);
621 WSAEventSelect ((SOCKET) h, 0, 0);
622
623 /* If we're lucky, WSAEnumNetworkEvents already provided a way
624 to distinguish FD_READ and FD_ACCEPT; this saves a recv later. */
625 if (FD_ISSET ((SOCKET) h, &rfds)
626 && !(ev.lNetworkEvents & (FD_READ | FD_ACCEPT)))
627 ev.lNetworkEvents |= FD_READ | FD_ACCEPT;
628 if (FD_ISSET ((SOCKET) h, &wfds))
629 ev.lNetworkEvents |= FD_WRITE | FD_CONNECT;
630 if (FD_ISSET ((SOCKET) h, &xfds))
631 ev.lNetworkEvents |= FD_OOB;
632
633 happened = windows_compute_revents_socket ((SOCKET) h, pfd[i].events,
634 ev.lNetworkEvents);
635 }
636 else
637 {
638 /* Not a socket. */
639 int sought = pfd[i].events;
640 happened = windows_compute_revents (h, &sought);
641 nhandles++;
642 }
643
644 if ((pfd[i].revents |= happened) != 0)
645 rc++;
646 }
647
648 if (!rc && (save_timeout == INFTIM || (real_timeout != 0 && nhandles > 1)))
649 {
650 SleepEx (1, TRUE);
651 goto restart;
652 }
653
654 return rc;
655#endif
656}
diff --git a/win32/poll.h b/win32/poll.h
new file mode 100644
index 000000000..b7aa59d97
--- /dev/null
+++ b/win32/poll.h
@@ -0,0 +1,53 @@
1/* Header for poll(2) emulation
2 Contributed by Paolo Bonzini.
3
4 Copyright 2001, 2002, 2003, 2007, 2009, 2010 Free Software Foundation, Inc.
5
6 This file is part of gnulib.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation,
20 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21
22#ifndef _GL_POLL_H
23#define _GL_POLL_H
24
25/* fake a poll(2) environment */
26#define POLLIN 0x0001 /* any readable data available */
27#define POLLPRI 0x0002 /* OOB/Urgent readable data */
28#define POLLOUT 0x0004 /* file descriptor is writeable */
29#define POLLERR 0x0008 /* some poll error occurred */
30#define POLLHUP 0x0010 /* file descriptor was "hung up" */
31#define POLLNVAL 0x0020 /* requested events "invalid" */
32#define POLLRDNORM 0x0040
33#define POLLRDBAND 0x0080
34#define POLLWRNORM 0x0100
35#define POLLWRBAND 0x0200
36
37struct pollfd
38{
39 int fd; /* which file descriptor to poll */
40 short events; /* events we are interested in */
41 short revents; /* events found on return */
42};
43
44typedef unsigned long nfds_t;
45
46extern int poll (struct pollfd *pfd, nfds_t nfd, int timeout);
47
48/* Define INFTIM only if doing so conforms to POSIX. */
49#if !defined (_POSIX_C_SOURCE) && !defined (_XOPEN_SOURCE)
50#define INFTIM (-1)
51#endif
52
53#endif /* _GL_POLL_H */
diff --git a/win32/popen.c b/win32/popen.c
new file mode 100644
index 000000000..7cf2b1893
--- /dev/null
+++ b/win32/popen.c
@@ -0,0 +1,316 @@
1#include <fcntl.h>
2#include "libbb.h"
3#include "NUM_APPLETS.h"
4
5typedef struct {
6 PROCESS_INFORMATION piProcInfo;
7 HANDLE pipe[2];
8 int fd;
9} pipe_data;
10
11static pipe_data *pipes = NULL;
12static int num_pipes = 0;
13
14static int mingw_popen_internal(pipe_data *p, const char *exe,
15 const char *cmd, const char *mode, int fd0, pid_t *pid);
16
17static int mingw_pipe(pipe_data *p, int bidi)
18{
19 SECURITY_ATTRIBUTES sa;
20
21 sa.nLength = sizeof(sa); /* Length in bytes */
22 sa.bInheritHandle = 1; /* the child must inherit these handles */
23 sa.lpSecurityDescriptor = NULL;
24
25 if (!bidi) {
26 /* pipe[0] is the read handle, pipe[i] the write handle */
27 if ( !CreatePipe (&p->pipe[0], &p->pipe[1], &sa, 1 << 13) ) {
28 return -1;
29 }
30 }
31 else {
32 char *name;
33 const int ip = 1; /* index of parent end of pipe */
34 const int ic = 0; /* index of child end of pipe */
35 static int count = 0;
36
37 name = xasprintf("\\\\.\\pipe\\bb_pipe.%d.%d", getpid(), ++count);
38
39 p->pipe[ip] = CreateNamedPipe(name,
40 PIPE_ACCESS_DUPLEX|FILE_FLAG_OVERLAPPED,
41 PIPE_TYPE_BYTE|PIPE_WAIT,
42 1, 4096, 4096, 0, &sa);
43
44 p->pipe[ic] = CreateFile(name, GENERIC_READ|GENERIC_WRITE, 0, &sa,
45 OPEN_EXISTING,
46 FILE_ATTRIBUTE_NORMAL|FILE_FLAG_OVERLAPPED,
47 NULL);
48 free(name);
49 }
50
51 return (p->pipe[0] == INVALID_HANDLE_VALUE ||
52 p->pipe[1] == INVALID_HANDLE_VALUE) ? -1 : 0;
53}
54
55static void clear_pipe_data(pipe_data *p)
56{
57 memset(p, 0, sizeof(pipe_data));
58 p->pipe[0] = INVALID_HANDLE_VALUE;
59 p->pipe[1] = INVALID_HANDLE_VALUE;
60 p->fd = -1;
61}
62
63static void close_pipe_data(pipe_data *p)
64{
65 if (p->pipe[0] != INVALID_HANDLE_VALUE)
66 CloseHandle(p->pipe[0]);
67 if (p->pipe[1] != INVALID_HANDLE_VALUE)
68 CloseHandle(p->pipe[1]);
69 clear_pipe_data(p);
70}
71
72/*
73 * Search for a pipe_data structure with file descriptor fd. If fd is
74 * -1 and no empty slots are available the array is extended. Return
75 * NULL if the file descriptor can't be found or the array can't be
76 * extended.
77 */
78static pipe_data *find_pipe(int fd)
79{
80 int i;
81 pipe_data *p = NULL;
82
83 /* find a matching pipe structure */
84 for ( i=0; i<num_pipes; ++i ) {
85 if (pipes[i].fd == fd) {
86 p = pipes+i;
87 break;
88 }
89 }
90
91 /* if looking for valid file descriptor return now */
92 if (fd != -1)
93 return p;
94
95 if ( p == NULL ) {
96 /* need to extend array */
97 if ( (p=realloc(pipes, sizeof(pipe_data)*(num_pipes+10))) == NULL ) {
98 return NULL;
99 }
100
101 pipes = p;
102 p = pipes + num_pipes;
103 for ( i=0; i<10; ++i ) {
104 clear_pipe_data(p+i);
105 }
106 num_pipes += 10;
107 }
108 clear_pipe_data(p);
109
110 return p;
111}
112
113FILE *mingw_popen(const char *cmd, const char *mode)
114{
115 pipe_data *p;
116 FILE *fptr = NULL;
117 int fd;
118 char *arg, *cmd_buff;
119
120 if ( cmd == NULL || *cmd == '\0' || mode == NULL ||
121 (*mode != 'r' && *mode != 'w') ) {
122 return NULL;
123 }
124
125 /* find an unused pipe structure */
126 if ((p=find_pipe(-1)) == NULL) {
127 return NULL;
128 }
129
130 arg = quote_arg(cmd);
131 cmd_buff = xasprintf("sh -c %s", arg);
132
133 /* Create the pipe */
134 if ((fd=mingw_popen_internal(p, "sh", cmd_buff, mode, -1, NULL)) != -1) {
135 fptr = _fdopen(fd, *mode == 'r' ? "rb" : "wb");
136 }
137
138 free(cmd_buff);
139 free(arg);
140
141 return fptr;
142}
143
144/*
145 * Open a pipe to a command.
146 *
147 * - mode may be "r", "w" or "b" for read-only, write-only or
148 * bidirectional (from the perspective of the parent).
149 * - if fd0 is a valid file descriptor it's used as input to the
150 * command ("r") or as the destination of the output from the
151 * command ("w"). Otherwise (and if not "b") use stdin or stdout.
152 * - the pid of the command is returned in the variable pid, which
153 * can be NULL if the pid is not required.
154 * - mode "w+" forces the use of an external program. This is required
155 * for xz and lzma compression.
156 */
157static int mingw_popen_internal(pipe_data *p, const char *exe,
158 const char *cmd, const char *mode, int fd0, pid_t *pid)
159{
160 pipe_data pd;
161 STARTUPINFO siStartInfo;
162 int success;
163 int fd = -1;
164 int ip, ic, flags;
165 char *freeme = NULL;
166
167 switch (*mode) {
168 case 'r':
169 ip = 0;
170 flags = _O_RDONLY|_O_BINARY;
171 break;
172 case 'w':
173 ip = 1;
174 flags = _O_WRONLY|_O_BINARY;
175 break;
176 case 'b':
177 ip = 1;
178 flags = _O_RDWR|_O_BINARY;
179 break;
180 default:
181 return -1;
182 }
183 ic = !ip;
184
185 if (!p) {
186 /* no struct provided, use a local one */
187 p = &pd;
188 }
189
190 /* Create the pipe */
191 if ( mingw_pipe(p, *mode == 'b') == -1 ) {
192 goto finito;
193 }
194
195#if ENABLE_FEATURE_PREFER_APPLETS && NUM_APPLETS > 1
196 // "w+" mode forces a path lookup
197 if (mode[1] != '+' && find_applet_by_name(exe) >= 0) {
198 exe = bb_busybox_exec_path;
199 } else
200#endif
201 {
202 // Look up executable on PATH
203 freeme = find_first_executable(exe);
204 if (freeme == NULL)
205 bb_perror_msg_and_die("can't execute '%s'", exe);
206 exe = freeme;
207 }
208
209 /* Make the parent end of the pipe non-inheritable */
210 SetHandleInformation(p->pipe[ip], HANDLE_FLAG_INHERIT, 0);
211
212 /* Now create the child process */
213 ZeroMemory(&siStartInfo, sizeof(STARTUPINFO));
214 siStartInfo.cb = sizeof(STARTUPINFO);
215 /* default settings for a bidirectional pipe */
216 siStartInfo.hStdInput = p->pipe[ic];
217 siStartInfo.hStdOutput = p->pipe[ic];
218 /* override for read-only or write-only */
219 if ( *mode == 'r' ) {
220 siStartInfo.hStdInput = fd0 >= 0 ? (HANDLE)_get_osfhandle(fd0) :
221 GetStdHandle(STD_INPUT_HANDLE);
222 }
223 else if ( *mode == 'w' ) {
224 siStartInfo.hStdOutput = fd0 >= 0 ? (HANDLE)_get_osfhandle(fd0) :
225 GetStdHandle(STD_OUTPUT_HANDLE);
226 }
227 siStartInfo.hStdError = GetStdHandle(STD_ERROR_HANDLE);
228 siStartInfo.wShowWindow = SW_HIDE;
229 siStartInfo.dwFlags = STARTF_USESTDHANDLES|STARTF_USESHOWWINDOW;
230
231 success = CreateProcess((LPCSTR)exe,
232 (LPSTR)cmd, /* command line */
233 NULL, /* process security attributes */
234 NULL, /* primary thread security attributes */
235 TRUE, /* handles are inherited */
236 0, /* creation flags */
237 NULL, /* use parent's environment */
238 NULL, /* use parent's current directory */
239 &siStartInfo, /* STARTUPINFO pointer */
240 &p->piProcInfo); /* receives PROCESS_INFORMATION */
241
242 if ( !success ) {
243 goto finito;
244 }
245
246 /* close child end of pipe */
247 CloseHandle(p->pipe[ic]);
248 p->pipe[ic] = INVALID_HANDLE_VALUE;
249
250 fd = _open_osfhandle((intptr_t)p->pipe[ip], flags);
251
252finito:
253 free(freeme);
254 if ( fd == -1 ) {
255 close_pipe_data(p);
256 }
257 else {
258 p->fd = fd;
259 if ( pid ) {
260 *pid = (pid_t)p->piProcInfo.dwProcessId;
261 }
262 }
263
264 return fd;
265}
266
267int mingw_popen_fd(const char *exe, const char *cmd, const char *mode,
268 int fd0, pid_t *pid)
269{
270 return mingw_popen_internal(NULL, exe, cmd, mode, fd0, pid);
271}
272
273int mingw_pclose(FILE *fp)
274{
275 int fd;
276 pipe_data *p;
277 DWORD ret;
278
279 /* find struct containing fd */
280 if (fp == NULL || (fd=fileno(fp)) == -1 || (p=find_pipe(fd)) == NULL)
281 return -1;
282
283 fclose(fp);
284
285 ret = WaitForSingleObject(p->piProcInfo.hProcess, INFINITE);
286
287 CloseHandle(p->piProcInfo.hProcess);
288 CloseHandle(p->piProcInfo.hThread);
289 close_pipe_data(p);
290
291 return (ret == WAIT_OBJECT_0) ? 0 : -1;
292}
293
294/* Used with mode "w" and a compressor when creating a compressed tar
295 * file; with mode "r" and a decompressor in open_transformer. */
296pid_t mingw_fork_compressor(int fd, const char *compressor, const char *mode)
297{
298 char *cmd;
299 int fd1;
300 pid_t pid;
301
302 cmd = xasprintf("%s -cf -", compressor);
303#if ENABLE_FEATURE_SEAMLESS_XZ || ENABLE_FEATURE_SEAMLESS_LZMA
304 // xz and lzma applets don't support compression, we must use
305 // an external command.
306 if (mode[0] == 'w' && index_in_strings("lzma\0xz\0", compressor) >= 0)
307 mode = "w+";
308#endif
309
310 if ((fd1 = mingw_popen_fd(compressor, cmd, mode, fd, &pid)) == -1)
311 bb_perror_msg_and_die("can't execute '%s'", compressor);
312
313 free(cmd);
314 xmove_fd(fd1, fd);
315 return pid;
316}
diff --git a/win32/process.c b/win32/process.c
new file mode 100644
index 000000000..e7c9ca187
--- /dev/null
+++ b/win32/process.c
@@ -0,0 +1,955 @@
1#include "libbb.h"
2#include <tlhelp32.h>
3#include <psapi.h>
4#include "lazyload.h"
5#include "NUM_APPLETS.h"
6
7pid_t waitpid(pid_t pid, int *status, int options)
8#if ENABLE_TIME
9{
10 return mingw_wait3(pid, status, options, NULL);
11}
12#endif
13
14#if ENABLE_TIME
15pid_t mingw_wait3(pid_t pid, int *status, int options, struct rusage *rusage)
16#endif
17{
18 HANDLE proc;
19 DWORD code;
20
21 /* Windows does not understand parent-child */
22 if (pid > 0 && options == 0) {
23 if ( (proc=OpenProcess(SYNCHRONIZE|PROCESS_QUERY_INFORMATION,
24 FALSE, pid)) != NULL ) {
25 WaitForSingleObject(proc, INFINITE);
26 GetExitCodeProcess(proc, &code);
27#if ENABLE_TIME
28 if (rusage != NULL) {
29 FILETIME crTime, exTime, keTime, usTime;
30
31 memset(rusage, 0, sizeof(*rusage));
32 if (GetProcessTimes(proc, &crTime, &exTime, &keTime, &usTime)) {
33 uint64_t kernel_usec =
34 (((uint64_t)keTime.dwHighDateTime << 32)
35 | (uint64_t)keTime.dwLowDateTime)/10;
36 uint64_t user_usec =
37 (((uint64_t)usTime.dwHighDateTime << 32)
38 | (uint64_t)usTime.dwLowDateTime)/10;
39
40 rusage->ru_utime.tv_sec = user_usec / 1000000U;
41 rusage->ru_utime.tv_usec = user_usec % 1000000U;
42 rusage->ru_stime.tv_sec = kernel_usec / 1000000U;
43 rusage->ru_stime.tv_usec = kernel_usec % 1000000U;
44 }
45 }
46#endif
47 CloseHandle(proc);
48 *status = exit_code_to_wait_status(code);
49 return pid;
50 }
51 }
52 errno = pid < 0 ? ENOSYS : EINVAL;
53 return -1;
54}
55
56int FAST_FUNC
57parse_interpreter(const char *cmd, interp_t *interp)
58{
59 char *path, *t;
60 int n;
61
62 while (TRUE) {
63 n = open_read_close(cmd, interp->buf, sizeof(interp->buf)-1);
64 if (n < 4) /* at least '#!/x' and not error */
65 break;
66
67 /*
68 * See http://www.in-ulm.de/~mascheck/various/shebang/ for trivia
69 * relating to '#!'. See also https://lwn.net/Articles/630727/
70 * for Linux-specific details.
71 */
72 if (interp->buf[0] != '#' || interp->buf[1] != '!')
73 break;
74 interp->buf[n] = '\0';
75 if ((t=strchr(interp->buf, '\n')) == NULL)
76 break;
77 t[1] = '\0';
78
79 if ((path=strtok(interp->buf+2, " \t\r\n")) == NULL)
80 break;
81
82 t = (char *)bb_basename(path);
83 if (*t == '\0')
84 break;
85
86 interp->path = path;
87 interp->name = t;
88 interp->opts = strtok(NULL, "\r\n");
89 /* Trim leading and trailing whitespace from the options.
90 * If the resulting string is empty return a NULL pointer. */
91 if (interp->opts && trim(interp->opts) == interp->opts)
92 interp->opts = NULL;
93 return 1;
94 }
95
96 if (n >= 0 && is_suffixed_with_case(cmd, ".sh")) {
97 interp->path = (char *)DEFAULT_SHELL;
98 interp->name = (char *)DEFAULT_SHELL_SHORT_NAME;
99 interp->opts = NULL;
100 return 1;
101 }
102 return 0;
103}
104
105/*
106 * See https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=vs-2019#parsing-c-command-line-arguments
107 * (Parsing C++ Command-Line Arguments)
108 */
109char * FAST_FUNC
110quote_arg(const char *arg)
111{
112 char *d, *r = xmalloc(2 * strlen(arg) + 3); // max-esc, quotes, \0
113 size_t nbs = 0; // consecutive backslashes before current char
114 int quoted = !*arg;
115
116 for (d = r; *arg; *d++ = *arg++) {
117 if (*arg == ' ' || *arg == '\t')
118 quoted = 1;
119
120 if (*arg == '\\' || *arg == '"')
121 *d++ = '\\';
122 else
123 d -= nbs; // undo nbs escapes, if any (not followed by DQ)
124
125 if (*arg == '\\')
126 ++nbs;
127 else
128 nbs = 0;
129 }
130
131 if (quoted) {
132 memmove(r + 1, r, d++ - r);
133 *r = *d++ = '"';
134 } else {
135 d -= nbs;
136 }
137
138 *d = 0;
139 return r;
140}
141
142char * FAST_FUNC
143find_first_executable(const char *name)
144{
145 const char *path = getenv("PATH");
146 return find_executable(name, &path);
147}
148
149static intptr_t
150spawnveq(int mode, const char *path, char *const *argv, char *const *env)
151{
152 char **new_argv;
153 char *new_path = NULL;
154 int i, argc;
155 intptr_t ret;
156 struct stat st;
157 size_t len = 0;
158
159 /*
160 * Require that the file exists, is a regular file and is executable.
161 * It may still contain garbage but we let spawnve deal with that.
162 */
163 if (stat(path, &st) == 0) {
164 if (!S_ISREG(st.st_mode) || !(st.st_mode&S_IXUSR)) {
165 errno = EACCES;
166 return -1;
167 }
168 }
169 else {
170 return -1;
171 }
172
173 argc = string_array_len((char **)argv);
174 new_argv = xzalloc(sizeof(*argv)*(argc+1));
175 for (i = 0; i < argc; i++) {
176 new_argv[i] = quote_arg(argv[i]);
177 len += strlen(new_argv[i]) + 1;
178 }
179
180 /* Special case: spawnve won't execute a batch file if the first
181 * argument is a relative path containing forward slashes. Absolute
182 * paths are fine but there's no harm in converting them too. */
183 if (has_bat_suffix(path)) {
184 slash_to_bs(new_argv[0]);
185
186 /* Another special case: spawnve returns ENOEXEC when passed an
187 * empty batch file. Pretend it worked. */
188 if (st.st_size == 0) {
189 ret = 0;
190 goto done;
191 }
192 }
193
194 /*
195 * Another special case: if a file doesn't have an extension add
196 * a '.' at the end. This forces spawnve to use precisely the
197 * file specified without trying to add an extension.
198 */
199 if (!strchr(bb_basename(path), '.')) {
200 new_path = xasprintf("%s.", path);
201 }
202
203 errno = 0;
204 ret = spawnve(mode, new_path ? new_path : path, new_argv, env);
205 if (errno == EINVAL && len > bb_arg_max())
206 errno = E2BIG;
207
208 done:
209 for (i = 0;i < argc;i++)
210 free(new_argv[i]);
211 free(new_argv);
212 free(new_path);
213
214 return ret;
215}
216
217#if ENABLE_FEATURE_PREFER_APPLETS && NUM_APPLETS > 1
218static intptr_t
219mingw_spawn_applet(int mode,
220 char *const *argv,
221 char *const *envp)
222{
223 return spawnveq(mode, bb_busybox_exec_path, argv, envp);
224}
225#endif
226
227/* Make a copy of an argv array with n extra slots at the start */
228char ** FAST_FUNC
229grow_argv(char **argv, int n)
230{
231 char **new_argv;
232 int argc;
233
234 argc = string_array_len(argv) + 1;
235 new_argv = xmalloc(sizeof(*argv) * (argc + n));
236 memcpy(new_argv + n, argv, sizeof(*argv) * argc);
237 return new_argv;
238}
239
240#if ENABLE_FEATURE_HTTPD_CGI
241static int
242create_detached_process(const char *prog, char *const *argv)
243{
244 int argc, i;
245 char *command = NULL;
246 STARTUPINFO siStartInfo;
247 PROCESS_INFORMATION piProcInfo;
248 int success;
249
250 argc = string_array_len((char **)argv);
251 for (i = 0; i < argc; i++) {
252 char *qarg = quote_arg(argv[i]);
253 command = xappendword(command, qarg);
254 if (ENABLE_FEATURE_CLEAN_UP)
255 free(qarg);
256 }
257
258 ZeroMemory(&siStartInfo, sizeof(STARTUPINFO));
259 siStartInfo.cb = sizeof(STARTUPINFO);
260 siStartInfo.hStdInput = (HANDLE)_get_osfhandle(STDIN_FILENO);
261 siStartInfo.hStdOutput = (HANDLE)_get_osfhandle(STDOUT_FILENO);
262 siStartInfo.dwFlags = STARTF_USESTDHANDLES;
263
264 success = CreateProcess((LPCSTR)prog,
265 (LPSTR)command, /* command line */
266 NULL, /* process security attributes */
267 NULL, /* primary thread security attributes */
268 TRUE, /* handles are inherited */
269 CREATE_NO_WINDOW, /* creation flags */
270 NULL, /* use parent's environment */
271 NULL, /* use parent's current directory */
272 &siStartInfo, /* STARTUPINFO pointer */
273 &piProcInfo); /* receives PROCESS_INFORMATION */
274
275 if (ENABLE_FEATURE_CLEAN_UP)
276 free(command);
277
278 if (!success)
279 return -1;
280 exit(0);
281}
282
283# define SPAWNVEQ(m, p, a, e) \
284 ((m != HTTPD_DETACH) ? spawnveq(m, p, a, e) : \
285 create_detached_process(p, a))
286#else
287# define SPAWNVEQ(m, p, a, e) spawnveq(m, p, a, e)
288#endif
289
290static intptr_t
291mingw_spawn_interpreter(int mode, const char *prog, char *const *argv,
292 char *const *envp, int level)
293{
294 intptr_t ret = -1;
295 int nopts;
296 interp_t interp;
297 char **new_argv;
298 char *path = NULL;
299 int is_unix_path;
300
301 if (!parse_interpreter(prog, &interp))
302 return SPAWNVEQ(mode, prog, argv, envp);
303
304 if (++level > 4) {
305 errno = ELOOP;
306 return -1;
307 }
308
309 nopts = interp.opts != NULL;
310 new_argv = grow_argv((char **)(argv + 1), nopts + 2);
311 new_argv[1] = interp.opts;
312 new_argv[nopts+1] = (char *)prog; /* pass absolute path */
313
314 is_unix_path = unix_path(interp.path);
315#if ENABLE_FEATURE_PREFER_APPLETS && NUM_APPLETS > 1
316 if (is_unix_path && find_applet_by_name(interp.name) >= 0) {
317 /* the fake path indicates the index of the script */
318 new_argv[0] = path = xasprintf("%d:/%s", nopts+1, interp.name);
319 ret = SPAWNVEQ(mode, bb_busybox_exec_path, new_argv, envp);
320 goto done;
321 }
322#endif
323
324 path = file_is_win32_exe(interp.path);
325 if (!path && is_unix_path)
326 path = find_first_executable(interp.name);
327
328 if (path) {
329 new_argv[0] = path;
330 ret = mingw_spawn_interpreter(mode, path, new_argv, envp, level);
331 } else {
332 errno = ENOENT;
333 }
334 done:
335 free(path);
336 free(new_argv);
337 return ret;
338}
339
340static intptr_t
341mingw_spawnvp(int mode, const char *cmd, char *const *argv)
342{
343 char *path;
344 intptr_t ret;
345
346#if ENABLE_FEATURE_PREFER_APPLETS && NUM_APPLETS > 1
347 if ((!has_path(cmd) || unix_path(cmd)) &&
348 find_applet_by_name(bb_basename(cmd)) >= 0)
349 return mingw_spawn_applet(mode, argv, NULL);
350#endif
351 if (has_path(cmd)) {
352 path = file_is_win32_exe(cmd);
353 if (path) {
354 ret = mingw_spawn_interpreter(mode, path, argv, NULL, 0);
355 free(path);
356 return ret;
357 }
358 if (unix_path(cmd))
359 cmd = bb_basename(cmd);
360 }
361
362 if (!has_path(cmd) && (path = find_first_executable(cmd)) != NULL) {
363 ret = mingw_spawn_interpreter(mode, path, argv, NULL, 0);
364 free(path);
365 return ret;
366 }
367
368 errno = ENOENT;
369 return -1;
370}
371
372pid_t FAST_FUNC
373mingw_spawn(char **argv)
374{
375 intptr_t ret;
376
377 ret = mingw_spawnvp(P_NOWAIT, argv[0], (char *const *)argv);
378
379 return ret == -1 ? (pid_t)-1 : (pid_t)GetProcessId((HANDLE)ret);
380}
381
382intptr_t FAST_FUNC
383mingw_spawn_detach(char **argv)
384{
385 return mingw_spawnvp(P_DETACH, argv[0], argv);
386}
387
388intptr_t FAST_FUNC
389mingw_spawn_proc(const char **argv)
390{
391 return mingw_spawnvp(P_NOWAIT, argv[0], (char *const *)argv);
392}
393
394BOOL WINAPI kill_child_ctrl_handler(DWORD dwCtrlType)
395{
396 static pid_t child_pid = 0;
397 DWORD dummy, *procs, count, rcount, i;
398 DECLARE_PROC_ADDR(DWORD, GetConsoleProcessList, LPDWORD, DWORD);
399
400 if (child_pid == 0) {
401 // First call sets child pid
402 child_pid = dwCtrlType;
403 return FALSE;
404 }
405
406 if (dwCtrlType == CTRL_C_EVENT || dwCtrlType == CTRL_BREAK_EVENT) {
407 if (!INIT_PROC_ADDR(kernel32.dll, GetConsoleProcessList))
408 return TRUE;
409
410 count = GetConsoleProcessList(&dummy, 1) + 16;
411 procs = malloc(sizeof(DWORD) * count);
412 rcount = GetConsoleProcessList(procs, count);
413 if (rcount != 0 && rcount <= count) {
414 for (i = 0; i < rcount; i++) {
415 if (procs[i] == child_pid) {
416 // Child is attached to our console
417 break;
418 }
419 }
420 if (i == rcount) {
421 // Kill non-console child; console children can
422 // handle Ctrl-C as they see fit.
423 kill(-child_pid, SIGINT);
424 }
425 }
426 free(procs);
427 return TRUE;
428 }
429 return FALSE;
430}
431
432static int exit_code_to_wait_status_cmd(DWORD exit_code, const char *cmd)
433{
434 int sig, status;
435 DECLARE_PROC_ADDR(ULONG, RtlNtStatusToDosError, NTSTATUS);
436 DWORD flags, code;
437 char *msg = NULL;
438 const char *sep = ": ";
439
440 if (exit_code == 0xc0000005)
441 return SIGSEGV;
442 else if (exit_code == 0xc000013a)
443 return SIGINT;
444
445 // When a process is terminated as if by a signal the Windows
446 // exit code is zero apart from the signal in its topmost byte.
447 // This is a busybox-w32 convention.
448 sig = exit_code >> 24;
449 if (sig != 0 && exit_code == sig << 24 && is_valid_signal(sig))
450 return sig;
451
452 // The exit code may be an NTSTATUS code. Try to obtain a
453 // descriptive message for it.
454 if (exit_code > 0xff) {
455 flags = FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM;
456 if (INIT_PROC_ADDR(ntdll.dll, RtlNtStatusToDosError)) {
457 code = RtlNtStatusToDosError(exit_code);
458 if (FormatMessage(flags, NULL, code, 0, (char *)&msg, 0, NULL)) {
459 char *cr = strrchr(msg, '\r');
460 if (cr) { // Replace CRLF with a space
461 cr[0] = ' ';
462 cr[1] = '\0';
463 }
464 }
465 }
466
467 if (!cmd)
468 cmd = sep = "";
469 bb_error_msg("%s%s%sError 0x%lx", cmd, sep, msg ?: "", exit_code);
470 LocalFree(msg);
471 }
472
473 // Use least significant byte as exit code, but not if it's zero
474 // and the Windows exit code as a whole is non-zero.
475 status = exit_code & 0xff;
476 if (exit_code != 0 && status == 0)
477 status = 255;
478 return status << 8;
479}
480
481static NORETURN void wait_for_child(HANDLE child, const char *cmd)
482{
483 DWORD code;
484 int status;
485
486 if (getppid() == 1)
487 exit(0);
488
489 kill_child_ctrl_handler(GetProcessId(child));
490 SetConsoleCtrlHandler(kill_child_ctrl_handler, TRUE);
491 WaitForSingleObject(child, INFINITE);
492 GetExitCodeProcess(child, &code);
493 // We don't need the wait status, but get it anyway so the error
494 // message can include the command. In such cases we pass the
495 // exit status to exit() so our caller won't repeat the message.
496 status = exit_code_to_wait_status_cmd(code, cmd);
497 if (!WIFSIGNALED(status) && code > 0xff)
498 code = WEXITSTATUS(status);
499 exit((int)code);
500}
501
502int
503mingw_execvp(const char *cmd, char *const *argv)
504{
505 intptr_t ret = mingw_spawnvp(P_NOWAIT, cmd, argv);
506 if (ret != -1)
507 wait_for_child((HANDLE)ret, cmd);
508 return ret;
509}
510
511int
512mingw_execve(const char *cmd, char *const *argv, char *const *envp)
513{
514 intptr_t ret = mingw_spawn_interpreter(P_NOWAIT, cmd, argv, envp, 0);
515 if (ret != -1)
516 wait_for_child((HANDLE)ret, cmd);
517 return ret;
518}
519
520int
521mingw_execv(const char *cmd, char *const *argv)
522{
523 return mingw_execve(cmd, argv, NULL);
524}
525
526#if ENABLE_FEATURE_HTTPD_CGI
527int httpd_execv_detach(const char *script, char *const *argv)
528{
529 intptr_t ret = mingw_spawn_interpreter(HTTPD_DETACH, script,
530 (char *const *)argv, NULL, 0);
531 if (ret != -1)
532 exit(0);
533 return ret;
534}
535#endif
536
537static inline long long filetime_to_ticks(const FILETIME *ft)
538{
539 return (((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime)/
540 HNSEC_PER_TICK;
541}
542
543/*
544 * Attempt to get a string from another instance of busybox.exe.
545 * This will only work if the other process is using the same binary
546 * as the current process. If anything goes wrong just give up.
547 */
548static char *get_bb_string(DWORD pid, const char *exe, char *string)
549{
550 HANDLE proc;
551 HMODULE mlist[32];
552 DWORD needed;
553 void *address;
554 char *my_base;
555 char buffer[128];
556 char exepath[PATH_MAX];
557 char *name = NULL;
558 int i;
559 DECLARE_PROC_ADDR(DWORD, GetProcessImageFileNameA, HANDLE,
560 LPSTR, DWORD);
561 DECLARE_PROC_ADDR(BOOL, EnumProcessModules, HANDLE, HMODULE *,
562 DWORD, LPDWORD);
563 DECLARE_PROC_ADDR(DWORD, GetModuleFileNameExA, HANDLE, HMODULE,
564 LPSTR, DWORD);
565
566 if (!INIT_PROC_ADDR(psapi.dll, GetProcessImageFileNameA) ||
567 !INIT_PROC_ADDR(psapi.dll, EnumProcessModules) ||
568 !INIT_PROC_ADDR(psapi.dll, GetModuleFileNameExA))
569 return NULL;
570
571 if (!(proc=OpenProcess(PROCESS_QUERY_INFORMATION|PROCESS_VM_READ,
572 FALSE, pid))) {
573 return NULL;
574 }
575
576 if (exe == NULL) {
577 if (GetProcessImageFileNameA(proc, exepath, PATH_MAX) != 0) {
578 exe = bb_basename(exepath);
579 }
580 }
581
582 /*
583 * Search for the module that matches the name of the executable.
584 * The values returned in mlist are actually the base address of
585 * the module in the other process (as noted in the documentation
586 * for the MODULEINFO structure).
587 */
588 if (!EnumProcessModules(proc, mlist, sizeof(mlist), &needed)) {
589 goto finish;
590 }
591
592 for (i=0; exe != NULL && i<needed/sizeof(HMODULE); ++i) {
593 char modname[MAX_PATH];
594 if (GetModuleFileNameExA(proc, mlist[i], modname, sizeof(modname))) {
595 if (strcasecmp(bb_basename(modname), exe) == 0) {
596 break;
597 }
598 }
599 }
600
601 if (i == needed/sizeof(HMODULE)) {
602 goto finish;
603 }
604
605 /* attempt to read the BusyBox version string */
606 my_base = (char *)GetModuleHandle(NULL);
607 address = (char *)mlist[i] + ((char *)bb_banner - my_base);
608 if (!ReadProcessMemory(proc, address, buffer, 128, NULL)) {
609 goto finish;
610 }
611
612 if (memcmp(buffer, bb_banner, strlen(bb_banner)) != 0) {
613 /* version mismatch (or not BusyBox at all) */
614 goto finish;
615 }
616
617 /* attempt to read the required string */
618 address = (char *)mlist[i] + ((char *)string - my_base);
619 if (!ReadProcessMemory(proc, address, buffer, 128, NULL)) {
620 goto finish;
621 }
622
623 buffer[127] = '\0';
624 name = auto_string(xstrdup(buffer));
625
626 finish:
627 CloseHandle(proc);
628 return name;
629}
630
631pid_t getppid(void)
632{
633 procps_status_t *sp = NULL;
634 int my_pid = getpid();
635
636 while ((sp = procps_scan(sp, 0)) != NULL) {
637 if (sp->pid == my_pid) {
638 return sp->ppid;
639 }
640 }
641 return 1;
642}
643
644#define NPIDS 128
645
646/* POSIX version in libbb/procps.c */
647procps_status_t* FAST_FUNC procps_scan(procps_status_t* sp, int flags
648#if !ENABLE_FEATURE_PS_TIME && !ENABLE_FEATURE_PS_LONG
649UNUSED_PARAM
650#endif
651)
652{
653 PROCESSENTRY32 pe;
654 HANDLE proc;
655 const char *comm, *name;
656 BOOL ret;
657
658 pe.dwSize = sizeof(pe);
659 if (!sp) {
660 sp = xzalloc(sizeof(struct procps_status_t));
661 sp->snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
662 if (sp->snapshot == INVALID_HANDLE_VALUE) {
663 free(sp);
664 return NULL;
665 }
666 if (Process32First(sp->snapshot, &pe)) {
667 int maxpids = 0;
668 do {
669 if (sp->npids == maxpids) {
670 maxpids += NPIDS;
671 sp->pids = xrealloc(sp->pids, sizeof(DWORD) * maxpids);
672 }
673 sp->pids[sp->npids++] = pe.th32ProcessID;
674 } while (Process32Next(sp->snapshot, &pe));
675 }
676 ret = Process32First(sp->snapshot, &pe);
677 }
678 else {
679 ret = Process32Next(sp->snapshot, &pe);
680 }
681
682 if (!ret) {
683 CloseHandle(sp->snapshot);
684 free(sp->pids);
685 free(sp);
686 return NULL;
687 }
688
689 memset(&sp->vsz, 0, sizeof(*sp) - offsetof(procps_status_t, vsz));
690#if !ENABLE_DESKTOP
691 strcpy(sp->state, " ");
692#endif
693
694#if ENABLE_FEATURE_PS_TIME || ENABLE_FEATURE_PS_LONG
695 if (flags & (PSSCAN_STIME|PSSCAN_UTIME|PSSCAN_START_TIME)) {
696 FILETIME crTime, exTime, keTime, usTime;
697
698 if ((proc=OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION,
699 FALSE, pe.th32ProcessID))) {
700 if (GetProcessTimes(proc, &crTime, &exTime, &keTime, &usTime)) {
701 long long ticks_since_boot, boot_time, create_time;
702 FILETIME now;
703
704 ticks_since_boot = GetTickCount64()/MS_PER_TICK;
705 GetSystemTimeAsFileTime(&now);
706 boot_time = filetime_to_ticks(&now) - ticks_since_boot;
707 create_time = filetime_to_ticks(&crTime);
708
709 sp->start_time = (unsigned long)(create_time - boot_time);
710 sp->stime = (unsigned long)filetime_to_ticks(&keTime);
711 sp->utime = (unsigned long)filetime_to_ticks(&usTime);
712 }
713 CloseHandle(proc);
714 }
715 }
716#endif
717
718 if (flags & PSSCAN_UIDGID) {
719 /* if we can open the process it belongs to us */
720 if ((proc=OpenProcess(PROCESS_ALL_ACCESS, FALSE, pe.th32ProcessID))) {
721 sp->uid = DEFAULT_UID;
722 sp->gid = DEFAULT_GID;
723 CloseHandle(proc);
724 }
725 }
726
727 /* The parent of PID 0 is 0. If the parent is a PID we haven't
728 * seen set PPID to 1. */
729 sp->ppid = pe.th32ProcessID != 0;
730 for (int i = 0; i < sp->npids; ++i) {
731 if (sp->pids[i] == pe.th32ParentProcessID) {
732 sp->ppid = pe.th32ParentProcessID;
733 break;
734 }
735 }
736 sp->pid = pe.th32ProcessID;
737
738 if (flags & PSSCAN_COMM) {
739 if (sp->pid == getpid()) {
740 comm = applet_name;
741 }
742 else if ((name=get_bb_string(sp->pid, pe.szExeFile, bb_comm)) != NULL) {
743 comm = name;
744 }
745 else {
746 comm = pe.szExeFile;
747 }
748 safe_strncpy(sp->comm, comm, COMM_LEN);
749 }
750
751 return sp;
752}
753
754void FAST_FUNC read_cmdline(char *buf, int col, unsigned pid, const char *comm)
755{
756 const char *str, *cmdline;
757
758 *buf = '\0';
759 if (pid == getpid())
760 cmdline = bb_command_line;
761 else if ((str=get_bb_string(pid, NULL, bb_command_line)) != NULL)
762 cmdline = str;
763 else
764 cmdline = comm;
765 safe_strncpy(buf, cmdline, col);
766}
767
768/**
769 * Determine whether a process runs in the same architecture as the current
770 * one. That test is required before we assume that GetProcAddress() returns
771 * a valid address *for the target process*.
772 */
773static inline int process_architecture_matches_current(HANDLE process)
774{
775 static BOOL current_is_wow = -1;
776 BOOL is_wow;
777
778 if (current_is_wow == -1 &&
779 !IsWow64Process (GetCurrentProcess(), &current_is_wow))
780 current_is_wow = -2;
781 if (current_is_wow == -2)
782 return 0; /* could not determine current process' WoW-ness */
783 if (!IsWow64Process (process, &is_wow))
784 return 0; /* cannot determine */
785 return is_wow == current_is_wow;
786}
787
788/**
789 * This function tries to terminate a Win32 process, as gently as possible,
790 * by injecting a thread that calls ExitProcess().
791 *
792 * Note: as kernel32.dll is loaded before any process, the other process and
793 * this process will have ExitProcess() at the same address.
794 *
795 * The idea comes from the Dr Dobb's article "A Safer Alternative to
796 * TerminateProcess()" by Andrew Tucker (July 1, 1999),
797 * http://www.drdobbs.com/a-safer-alternative-to-terminateprocess/184416547
798 *
799 */
800static int kill_signal_by_handle(HANDLE process, int sig)
801{
802 DECLARE_PROC_ADDR(DWORD, ExitProcess, LPVOID);
803 PVOID arg = (PVOID)(intptr_t)(sig << 24);
804 DWORD thread_id;
805 HANDLE thread;
806
807 if (!INIT_PROC_ADDR(kernel32, ExitProcess) ||
808 !process_architecture_matches_current(process)) {
809 SetLastError(ERROR_ACCESS_DENIED);
810 return -1;
811 }
812
813 if (sig != 0 && (thread = CreateRemoteThread(process, NULL, 0,
814 ExitProcess, arg, 0, &thread_id))) {
815 CloseHandle(thread);
816 }
817 return 0;
818}
819
820static int kill_signal(pid_t pid, int sig)
821{
822 HANDLE process;
823 int ret = 0;
824 DWORD code, flags;
825
826 if (sig == SIGKILL)
827 flags = PROCESS_TERMINATE | PROCESS_QUERY_INFORMATION;
828 else
829 flags = SYNCHRONIZE | PROCESS_CREATE_THREAD |
830 PROCESS_QUERY_INFORMATION |
831 PROCESS_VM_OPERATION | PROCESS_VM_WRITE |
832 PROCESS_VM_READ;
833 process = OpenProcess(flags, FALSE, pid);
834
835 if (!process)
836 return -1;
837
838 if (!GetExitCodeProcess(process, &code) || code != STILL_ACTIVE) {
839 SetLastError(ERROR_INVALID_PARAMETER);
840 ret = -1;
841 } else if (sig == SIGKILL) {
842 /* This way of terminating processes is not gentle: they get no
843 * chance to clean up after themselves (closing file handles,
844 * removing .lock files, terminating spawned processes (if any),
845 * etc). */
846 ret = !TerminateProcess(process, SIGKILL << 24);
847 } else {
848 ret = kill_signal_by_handle(process, sig);
849 }
850 CloseHandle(process);
851
852 return ret;
853}
854
855/**
856 * If the process ID is positive signal that process only. If negative
857 * or zero signal all descendants of the indicated process. Zero
858 * indicates the current process; negative indicates the process with
859 * process ID -pid.
860 */
861int kill(pid_t pid, int sig)
862{
863 DWORD *pids;
864 int max_len, i, len, ret = 0;
865
866 if (!is_valid_signal(sig)) {
867 errno = EINVAL;
868 return -1;
869 }
870
871 max_len = NPIDS;
872 pids = xmalloc(sizeof(*pids) * max_len);
873
874 if(pid > 0)
875 pids[0] = (DWORD)pid;
876 else if (pid == 0)
877 pids[0] = (DWORD)getpid();
878 else
879 pids[0] = (DWORD)-pid;
880 len = 1;
881
882 /*
883 * Even if Process32First()/Process32Next() seem to traverse the
884 * processes in topological order (i.e. parent processes before
885 * child processes), there is nothing in the Win32 API documentation
886 * suggesting that this is guaranteed.
887 *
888 * Therefore, run through them at least twice and stop when no more
889 * process IDs were added to the list.
890 */
891 if (pid <= 0) {
892 HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
893 PROCESSENTRY32 entry;
894 int pid_added;
895
896 if (snapshot == INVALID_HANDLE_VALUE) {
897 errno = err_win_to_posix();
898 free(pids);
899 return -1;
900 }
901
902 entry.dwSize = sizeof(entry);
903 pid_added = TRUE;
904 while (pid_added && Process32First(snapshot, &entry)) {
905 pid_added = FALSE;
906
907 do {
908 for (i = len - 1; i >= 0; i--) {
909 if (pids[i] == entry.th32ProcessID)
910 break;
911 if (pids[i] == entry.th32ParentProcessID) {
912 if (len == max_len) {
913 max_len += NPIDS;
914 pids = xrealloc(pids, sizeof(*pids) * max_len);
915 }
916 pids[len++] = entry.th32ProcessID;
917 pid_added = TRUE;
918 }
919 }
920 } while (Process32Next(snapshot, &entry));
921 }
922
923 CloseHandle(snapshot);
924 }
925
926 for (i = len - 1; i >= 0; i--) {
927 SetLastError(0);
928 if (kill_signal(pids[i], sig)) {
929 errno = err_win_to_posix();
930 ret = -1;
931 }
932 }
933 free(pids);
934
935 return ret;
936}
937
938int FAST_FUNC is_valid_signal(int number)
939{
940 return isalpha(*get_signame(number));
941}
942
943int exit_code_to_wait_status(DWORD exit_code)
944{
945 return exit_code_to_wait_status_cmd(exit_code, NULL);
946}
947
948int exit_code_to_posix(DWORD exit_code)
949{
950 int status = exit_code_to_wait_status(exit_code);
951
952 if (WIFSIGNALED(status))
953 return 128 + WTERMSIG(status);
954 return WEXITSTATUS(status);
955}
diff --git a/win32/pwd.h b/win32/pwd.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/pwd.h
diff --git a/win32/regcomp.c b/win32/regcomp.c
new file mode 100644
index 000000000..e1692d341
--- /dev/null
+++ b/win32/regcomp.c
@@ -0,0 +1,3936 @@
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002-2007,2009,2010 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA. */
20
21#include "match_class.h"
22
23#define UNUSED_PARAM __attribute__ ((__unused__))
24
25static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
26 size_t length, reg_syntax_t syntax);
27static void re_compile_fastmap_iter (regex_t *bufp,
28 const re_dfastate_t *init_state,
29 char *fastmap);
30static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
31#ifdef RE_ENABLE_I18N
32static void free_charset (re_charset_t *cset);
33#endif /* RE_ENABLE_I18N */
34static void free_workarea_compile (regex_t *preg);
35static reg_errcode_t create_initial_state (re_dfa_t *dfa);
36#ifdef RE_ENABLE_I18N
37static void optimize_utf8 (re_dfa_t *dfa);
38#endif
39static reg_errcode_t analyze (regex_t *preg);
40static reg_errcode_t preorder (bin_tree_t *root,
41 reg_errcode_t (fn (void *, bin_tree_t *)),
42 void *extra);
43static reg_errcode_t postorder (bin_tree_t *root,
44 reg_errcode_t (fn (void *, bin_tree_t *)),
45 void *extra);
46static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
47static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
48static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
49 bin_tree_t *node);
50static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
51static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
52static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
53static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
54static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
55 unsigned int constraint);
56static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
57static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
58 int node, int root);
59static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
60static int fetch_number (re_string_t *input, re_token_t *token,
61 reg_syntax_t syntax);
62static int peek_token (re_token_t *token, re_string_t *input,
63 reg_syntax_t syntax) internal_function;
64static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
65 reg_syntax_t syntax, reg_errcode_t *err);
66static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
67 re_token_t *token, reg_syntax_t syntax,
68 int nest, reg_errcode_t *err);
69static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
70 re_token_t *token, reg_syntax_t syntax,
71 int nest, reg_errcode_t *err);
72static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
73 re_token_t *token, reg_syntax_t syntax,
74 int nest, reg_errcode_t *err);
75static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
76 re_token_t *token, reg_syntax_t syntax,
77 int nest, reg_errcode_t *err);
78static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
79 re_dfa_t *dfa, re_token_t *token,
80 reg_syntax_t syntax, reg_errcode_t *err);
81static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
82 re_token_t *token, reg_syntax_t syntax,
83 reg_errcode_t *err);
84static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
85 re_string_t *regexp,
86 re_token_t *token, int token_len,
87 re_dfa_t *dfa,
88 reg_syntax_t syntax,
89 int accept_hyphen);
90static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
91 re_string_t *regexp,
92 re_token_t *token);
93#ifdef RE_ENABLE_I18N
94static reg_errcode_t build_equiv_class (bitset_t sbcset,
95 re_charset_t *mbcset,
96 int *equiv_class_alloc,
97 const unsigned char *name);
98static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
99 bitset_t sbcset,
100 re_charset_t *mbcset,
101 int *char_class_alloc,
102 const char *class_name,
103 reg_syntax_t syntax);
104#else /* not RE_ENABLE_I18N */
105static reg_errcode_t build_equiv_class (bitset_t sbcset,
106 const unsigned char *name);
107static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
108 bitset_t sbcset,
109 const char *class_name,
110 reg_syntax_t syntax);
111#endif /* not RE_ENABLE_I18N */
112static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
113 RE_TRANSLATE_TYPE trans,
114 const char *class_name,
115 const char *extra,
116 int non_match, reg_errcode_t *err);
117static bin_tree_t *create_tree (re_dfa_t *dfa,
118 bin_tree_t *left, bin_tree_t *right,
119 re_token_type_t type);
120static bin_tree_t *create_token_tree (re_dfa_t *dfa,
121 bin_tree_t *left, bin_tree_t *right,
122 const re_token_t *token);
123static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
124static void free_token (re_token_t *node);
125static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
126static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
127
128/* This table gives an error message for each of the error codes listed
129 in regex.h. Obviously the order here has to be same as there.
130 POSIX doesn't require that we do anything for REG_NOERROR,
131 but why not be nice? */
132
133const char __re_error_msgid[] attribute_hidden =
134 {
135#define REG_NOERROR_IDX 0
136 gettext_noop ("Success") /* REG_NOERROR */
137 "\0"
138#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
139 gettext_noop ("No match") /* REG_NOMATCH */
140 "\0"
141#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
142 gettext_noop ("Invalid regular expression") /* REG_BADPAT */
143 "\0"
144#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
145 gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
146 "\0"
147#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
148 gettext_noop ("Invalid character class name") /* REG_ECTYPE */
149 "\0"
150#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
151 gettext_noop ("Trailing backslash") /* REG_EESCAPE */
152 "\0"
153#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
154 gettext_noop ("Invalid back reference") /* REG_ESUBREG */
155 "\0"
156#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
157 gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
158 "\0"
159#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
160 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
161 "\0"
162#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
163 gettext_noop ("Unmatched \\{") /* REG_EBRACE */
164 "\0"
165#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
166 gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
167 "\0"
168#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
169 gettext_noop ("Invalid range end") /* REG_ERANGE */
170 "\0"
171#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
172 gettext_noop ("Memory exhausted") /* REG_ESPACE */
173 "\0"
174#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
175 gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
176 "\0"
177#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
178 gettext_noop ("Premature end of regular expression") /* REG_EEND */
179 "\0"
180#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
181 gettext_noop ("Regular expression too big") /* REG_ESIZE */
182 "\0"
183#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
184 gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
185 };
186
187const size_t __re_error_msgid_idx[] attribute_hidden =
188 {
189 REG_NOERROR_IDX,
190 REG_NOMATCH_IDX,
191 REG_BADPAT_IDX,
192 REG_ECOLLATE_IDX,
193 REG_ECTYPE_IDX,
194 REG_EESCAPE_IDX,
195 REG_ESUBREG_IDX,
196 REG_EBRACK_IDX,
197 REG_EPAREN_IDX,
198 REG_EBRACE_IDX,
199 REG_BADBR_IDX,
200 REG_ERANGE_IDX,
201 REG_ESPACE_IDX,
202 REG_BADRPT_IDX,
203 REG_EEND_IDX,
204 REG_ESIZE_IDX,
205 REG_ERPAREN_IDX
206 };
207
208/* Entry points for GNU code. */
209
210
211#ifdef ZOS_USS
212
213/* For ZOS USS we must define btowc */
214
215wchar_t
216btowc (int c)
217{
218 wchar_t wtmp[2];
219 char tmp[2];
220
221 tmp[0] = c;
222 tmp[1] = 0;
223
224 mbtowc (wtmp, tmp, 1);
225 return wtmp[0];
226}
227#endif
228
229/* re_compile_pattern is the GNU regular expression compiler: it
230 compiles PATTERN (of length LENGTH) and puts the result in BUFP.
231 Returns 0 if the pattern was valid, otherwise an error string.
232
233 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
234 are set in BUFP on entry. */
235
236const char *
237re_compile_pattern (const char *pattern,
238 size_t length,
239 struct re_pattern_buffer *bufp)
240{
241 reg_errcode_t ret;
242
243 /* And GNU code determines whether or not to get register information
244 by passing null for the REGS argument to re_match, etc., not by
245 setting no_sub, unless RE_NO_SUB is set. */
246 bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
247
248 /* Match anchors at newline. */
249 bufp->newline_anchor = 1;
250
251 ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
252
253 if (!ret)
254 return NULL;
255 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
256}
257#ifdef _LIBC
258weak_alias (__re_compile_pattern, re_compile_pattern)
259#endif
260
261/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
262 also be assigned to arbitrarily: each pattern buffer stores its own
263 syntax, so it can be changed between regex compilations. */
264/* This has no initializer because initialized variables in Emacs
265 become read-only after dumping. */
266reg_syntax_t re_syntax_options;
267
268
269/* Specify the precise syntax of regexps for compilation. This provides
270 for compatibility for various utilities which historically have
271 different, incompatible syntaxes.
272
273 The argument SYNTAX is a bit mask comprised of the various bits
274 defined in regex.h. We return the old syntax. */
275
276reg_syntax_t
277re_set_syntax (reg_syntax_t syntax)
278{
279 reg_syntax_t ret = re_syntax_options;
280
281 re_syntax_options = syntax;
282 return ret;
283}
284#ifdef _LIBC
285weak_alias (__re_set_syntax, re_set_syntax)
286#endif
287
288int
289re_compile_fastmap (struct re_pattern_buffer *bufp)
290{
291 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
292 char *fastmap = bufp->fastmap;
293
294 memset (fastmap, '\0', sizeof (char) * SBC_MAX);
295 re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
296 if (dfa->init_state != dfa->init_state_word)
297 re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
298 if (dfa->init_state != dfa->init_state_nl)
299 re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
300 if (dfa->init_state != dfa->init_state_begbuf)
301 re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
302 bufp->fastmap_accurate = 1;
303 return 0;
304}
305#ifdef _LIBC
306weak_alias (__re_compile_fastmap, re_compile_fastmap)
307#endif
308
309static inline void
310__attribute ((always_inline))
311re_set_fastmap (char *fastmap, int icase, int ch)
312{
313 fastmap[ch] = 1;
314 if (icase)
315 fastmap[tolower (ch)] = 1;
316}
317
318/* Helper function for re_compile_fastmap.
319 Compile fastmap for the initial_state INIT_STATE. */
320
321static void
322re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
323 char *fastmap)
324{
325 volatile re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
326 int node_cnt;
327 int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
328 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
329 {
330 int node = init_state->nodes.elems[node_cnt];
331 re_token_type_t type = dfa->nodes[node].type;
332
333 if (type == CHARACTER)
334 {
335 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
336#ifdef RE_ENABLE_I18N
337 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
338 {
339 unsigned char *buf = re_malloc (unsigned char, dfa->mb_cur_max), *p;
340 wchar_t wc;
341 mbstate_t state;
342
343 p = buf;
344 *p++ = dfa->nodes[node].opr.c;
345 while (++node < dfa->nodes_len
346 && dfa->nodes[node].type == CHARACTER
347 && dfa->nodes[node].mb_partial)
348 *p++ = dfa->nodes[node].opr.c;
349 memset (&state, '\0', sizeof (state));
350 if (__mbrtowc (&wc, (const char *) buf, p - buf,
351 &state) == p - buf
352 && (__wcrtomb ((char *) buf, towlower (wc), &state)
353 != (size_t) -1))
354 re_set_fastmap (fastmap, 0, buf[0]);
355 re_free (buf);
356 }
357#endif
358 }
359 else if (type == SIMPLE_BRACKET)
360 {
361 int i, ch;
362 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
363 {
364 int j;
365 bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
366 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
367 if (w & ((bitset_word_t) 1 << j))
368 re_set_fastmap (fastmap, icase, ch);
369 }
370 }
371#ifdef RE_ENABLE_I18N
372 else if (type == COMPLEX_BRACKET)
373 {
374 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
375 int i;
376
377# ifdef _LIBC
378 /* See if we have to try all bytes which start multiple collation
379 elements.
380 e.g. In da_DK, we want to catch 'a' since "aa" is a valid
381 collation element, and don't catch 'b' since 'b' is
382 the only collation element which starts from 'b' (and
383 it is caught by SIMPLE_BRACKET). */
384 if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
385 && (cset->ncoll_syms || cset->nranges))
386 {
387 const int32_t *table = (const int32_t *)
388 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
389 for (i = 0; i < SBC_MAX; ++i)
390 if (table[i] < 0)
391 re_set_fastmap (fastmap, icase, i);
392 }
393# endif /* _LIBC */
394
395 /* See if we have to start the match at all multibyte characters,
396 i.e. where we would not find an invalid sequence. This only
397 applies to multibyte character sets; for single byte character
398 sets, the SIMPLE_BRACKET again suffices. */
399 if (dfa->mb_cur_max > 1
400 && (cset->nchar_classes || cset->non_match || cset->nranges
401# ifdef _LIBC
402 || cset->nequiv_classes
403# endif /* _LIBC */
404 ))
405 {
406 unsigned char c = 0;
407 do
408 {
409 mbstate_t mbs;
410 memset (&mbs, 0, sizeof (mbs));
411 if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
412 re_set_fastmap (fastmap, false, (int) c);
413 }
414 while (++c != 0);
415 }
416
417 else
418 {
419 /* ... Else catch all bytes which can start the mbchars. */
420 for (i = 0; i < cset->nmbchars; ++i)
421 {
422 char buf[256];
423 mbstate_t state;
424 memset (&state, '\0', sizeof (state));
425 if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
426 re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
427 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
428 {
429 if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
430 != (size_t) -1)
431 re_set_fastmap (fastmap, false, *(unsigned char *) buf);
432 }
433 }
434 }
435 }
436#endif /* RE_ENABLE_I18N */
437 else if (type == OP_PERIOD
438#ifdef RE_ENABLE_I18N
439 || type == OP_UTF8_PERIOD
440#endif /* RE_ENABLE_I18N */
441 || type == END_OF_RE)
442 {
443 memset (fastmap, '\1', sizeof (char) * SBC_MAX);
444 if (type == END_OF_RE)
445 bufp->can_be_null = 1;
446 return;
447 }
448 }
449}
450
451/* Entry point for POSIX code. */
452/* regcomp takes a regular expression as a string and compiles it.
453
454 PREG is a regex_t *. We do not expect any fields to be initialized,
455 since POSIX says we shouldn't. Thus, we set
456
457 `buffer' to the compiled pattern;
458 `used' to the length of the compiled pattern;
459 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
460 REG_EXTENDED bit in CFLAGS is set; otherwise, to
461 RE_SYNTAX_POSIX_BASIC;
462 `newline_anchor' to REG_NEWLINE being set in CFLAGS;
463 `fastmap' to an allocated space for the fastmap;
464 `fastmap_accurate' to zero;
465 `re_nsub' to the number of subexpressions in PATTERN.
466
467 PATTERN is the address of the pattern string.
468
469 CFLAGS is a series of bits which affect compilation.
470
471 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
472 use POSIX basic syntax.
473
474 If REG_NEWLINE is set, then . and [^...] don't match newline.
475 Also, regexec will try a match beginning after every newline.
476
477 If REG_ICASE is set, then we considers upper- and lowercase
478 versions of letters to be equivalent when matching.
479
480 If REG_NOSUB is set, then when PREG is passed to regexec, that
481 routine will report only success or failure, and nothing about the
482 registers.
483
484 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
485 the return codes and their meanings.) */
486
487int
488regcomp (regex_t *__restrict preg,
489 const char *__restrict pattern,
490 int cflags)
491{
492 reg_errcode_t ret;
493 reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
494 : RE_SYNTAX_POSIX_BASIC);
495
496 preg->buffer = NULL;
497 preg->allocated = 0;
498 preg->used = 0;
499
500 /* Try to allocate space for the fastmap. */
501 preg->fastmap = re_malloc (char, SBC_MAX);
502 if (BE (preg->fastmap == NULL, 0))
503 return REG_ESPACE;
504
505 syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
506
507 /* If REG_NEWLINE is set, newlines are treated differently. */
508 if (cflags & REG_NEWLINE)
509 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
510 syntax &= ~RE_DOT_NEWLINE;
511 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
512 /* It also changes the matching behavior. */
513 preg->newline_anchor = 1;
514 }
515 else
516 preg->newline_anchor = 0;
517 preg->no_sub = !!(cflags & REG_NOSUB);
518 preg->translate = NULL;
519
520 ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
521
522 /* POSIX doesn't distinguish between an unmatched open-group and an
523 unmatched close-group: both are REG_EPAREN. */
524 if (ret == REG_ERPAREN)
525 ret = REG_EPAREN;
526
527 /* We have already checked preg->fastmap != NULL. */
528 if (BE (ret == REG_NOERROR, 1))
529 /* Compute the fastmap now, since regexec cannot modify the pattern
530 buffer. This function never fails in this implementation. */
531 (void) re_compile_fastmap (preg);
532 else
533 {
534 /* Some error occurred while compiling the expression. */
535 re_free (preg->fastmap);
536 preg->fastmap = NULL;
537 }
538
539 return (int) ret;
540}
541#ifdef _LIBC
542weak_alias (__regcomp, regcomp)
543#endif
544
545/* Returns a message corresponding to an error code, ERRCODE, returned
546 from either regcomp or regexec. We don't use PREG here. */
547
548size_t
549regerror(int errcode, UNUSED_PARAM const regex_t *__restrict preg,
550 char *__restrict errbuf, size_t errbuf_size)
551{
552 const char *msg;
553 size_t msg_size;
554
555 if (BE (errcode < 0
556 || errcode >= (int) (sizeof (__re_error_msgid_idx)
557 / sizeof (__re_error_msgid_idx[0])), 0))
558 /* Only error codes returned by the rest of the code should be passed
559 to this routine. If we are given anything else, or if other regex
560 code generates an invalid error code, then the program has a bug.
561 Dump core so we can fix it. */
562 abort ();
563
564 msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
565
566 msg_size = strlen (msg) + 1; /* Includes the null. */
567
568 if (BE (errbuf_size != 0, 1))
569 {
570 if (BE (msg_size > errbuf_size, 0))
571 {
572 memcpy (errbuf, msg, errbuf_size - 1);
573 errbuf[errbuf_size - 1] = 0;
574 }
575 else
576 memcpy (errbuf, msg, msg_size);
577 }
578
579 return msg_size;
580}
581#ifdef _LIBC
582weak_alias (__regerror, regerror)
583#endif
584
585
586#ifdef RE_ENABLE_I18N
587/* This static array is used for the map to single-byte characters when
588 UTF-8 is used. Otherwise we would allocate memory just to initialize
589 it the same all the time. UTF-8 is the preferred encoding so this is
590 a worthwhile optimization. */
591#if __GNUC__ >= 3
592static const bitset_t utf8_sb_map = {
593 /* Set the first 128 bits. */
594 [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
595};
596#else /* ! (__GNUC__ >= 3) */
597static bitset_t utf8_sb_map;
598#endif /* __GNUC__ >= 3 */
599#endif /* RE_ENABLE_I18N */
600
601
602static void
603free_dfa_content (re_dfa_t *dfa)
604{
605 int i, j;
606
607 if (dfa->nodes)
608 for (i = 0; i < dfa->nodes_len; ++i)
609 free_token (dfa->nodes + i);
610 re_free (dfa->nexts);
611 for (i = 0; i < dfa->nodes_len; ++i)
612 {
613 if (dfa->eclosures != NULL)
614 re_node_set_free (dfa->eclosures + i);
615 if (dfa->inveclosures != NULL)
616 re_node_set_free (dfa->inveclosures + i);
617 if (dfa->edests != NULL)
618 re_node_set_free (dfa->edests + i);
619 }
620 re_free (dfa->edests);
621 re_free (dfa->eclosures);
622 re_free (dfa->inveclosures);
623 re_free (dfa->nodes);
624
625 if (dfa->state_table)
626 for (i = 0; i <= dfa->state_hash_mask; ++i)
627 {
628 struct re_state_table_entry *entry = dfa->state_table + i;
629 for (j = 0; j < entry->num; ++j)
630 {
631 re_dfastate_t *state = entry->array[j];
632 free_state (state);
633 }
634 re_free (entry->array);
635 }
636 re_free (dfa->state_table);
637#ifdef RE_ENABLE_I18N
638 if (dfa->sb_char != utf8_sb_map)
639 re_free (dfa->sb_char);
640#endif
641 re_free (dfa->subexp_map);
642#ifdef DEBUG
643 re_free (dfa->re_str);
644#endif
645
646 re_free (dfa);
647}
648
649
650/* Free dynamically allocated space used by PREG. */
651
652void
653regfree (regex_t *preg)
654{
655 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
656 if (BE (dfa != NULL, 1))
657 free_dfa_content (dfa);
658 preg->buffer = NULL;
659 preg->allocated = 0;
660
661 re_free (preg->fastmap);
662 preg->fastmap = NULL;
663
664 re_free (preg->translate);
665 preg->translate = NULL;
666}
667#ifdef _LIBC
668weak_alias (__regfree, regfree)
669#endif
670
671/* Entry points compatible with 4.2 BSD regex library. We don't define
672 them unless specifically requested. */
673
674#if defined _REGEX_RE_COMP || defined _LIBC
675
676/* BSD has one and only one pattern buffer. */
677static struct re_pattern_buffer re_comp_buf;
678
679char *
680# ifdef _LIBC
681/* Make these definitions weak in libc, so POSIX programs can redefine
682 these names if they don't use our functions, and still use
683 regcomp/regexec above without link errors. */
684weak_function
685# endif
686re_comp (s)
687 const char *s;
688{
689 reg_errcode_t ret;
690 char *fastmap;
691
692 if (!s)
693 {
694 if (!re_comp_buf.buffer)
695 return gettext ("No previous regular expression");
696 return 0;
697 }
698
699 if (re_comp_buf.buffer)
700 {
701 fastmap = re_comp_buf.fastmap;
702 re_comp_buf.fastmap = NULL;
703 __regfree (&re_comp_buf);
704 memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
705 re_comp_buf.fastmap = fastmap;
706 }
707
708 if (re_comp_buf.fastmap == NULL)
709 {
710 re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
711 if (re_comp_buf.fastmap == NULL)
712 return (char *) gettext (__re_error_msgid
713 + __re_error_msgid_idx[(int) REG_ESPACE]);
714 }
715
716 /* Since `re_exec' always passes NULL for the `regs' argument, we
717 don't need to initialize the pattern buffer fields which affect it. */
718
719 /* Match anchors at newlines. */
720 re_comp_buf.newline_anchor = 1;
721
722 ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
723
724 if (!ret)
725 return NULL;
726
727 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
728 return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
729}
730
731#ifdef _LIBC
732libc_freeres_fn (free_mem)
733{
734 __regfree (&re_comp_buf);
735}
736#endif
737
738#endif /* _REGEX_RE_COMP */
739
740/* Internal entry point.
741 Compile the regular expression PATTERN, whose length is LENGTH.
742 SYNTAX indicate regular expression's syntax. */
743
744static reg_errcode_t
745re_compile_internal (regex_t *preg, const char * pattern, size_t length,
746 reg_syntax_t syntax)
747{
748 reg_errcode_t err = REG_NOERROR;
749 re_dfa_t *dfa;
750 re_string_t regexp;
751
752 /* Initialize the pattern buffer. */
753 preg->fastmap_accurate = 0;
754 preg->syntax = syntax;
755 preg->not_bol = preg->not_eol = 0;
756 preg->used = 0;
757 preg->re_nsub = 0;
758 preg->can_be_null = 0;
759 preg->regs_allocated = REGS_UNALLOCATED;
760
761 /* Initialize the dfa. */
762 dfa = (re_dfa_t *) preg->buffer;
763 if (BE (preg->allocated < sizeof (re_dfa_t), 0))
764 {
765 /* If zero allocated, but buffer is non-null, try to realloc
766 enough space. This loses if buffer's address is bogus, but
767 that is the user's responsibility. If ->buffer is NULL this
768 is a simple allocation. */
769 dfa = re_realloc (preg->buffer, re_dfa_t, 1);
770 if (dfa == NULL)
771 return REG_ESPACE;
772 preg->allocated = sizeof (re_dfa_t);
773 preg->buffer = (unsigned char *) dfa;
774 }
775 preg->used = sizeof (re_dfa_t);
776
777 err = init_dfa (dfa, length);
778 if (BE (err != REG_NOERROR, 0))
779 {
780 free_dfa_content (dfa);
781 preg->buffer = NULL;
782 preg->allocated = 0;
783 return err;
784 }
785#ifdef DEBUG
786 /* Note: length+1 will not overflow since it is checked in init_dfa. */
787 dfa->re_str = re_malloc (char, length + 1);
788 strncpy (dfa->re_str, pattern, length + 1);
789#endif
790
791 __libc_lock_init (dfa->lock);
792
793 err = re_string_construct (&regexp, pattern, length, preg->translate,
794 syntax & RE_ICASE, dfa);
795 if (BE (err != REG_NOERROR, 0))
796 {
797 re_compile_internal_free_return:
798 free_workarea_compile (preg);
799 re_string_destruct (&regexp);
800 free_dfa_content (dfa);
801 preg->buffer = NULL;
802 preg->allocated = 0;
803 return err;
804 }
805
806 /* Parse the regular expression, and build a structure tree. */
807 preg->re_nsub = 0;
808 dfa->str_tree = parse (&regexp, preg, syntax, &err);
809 if (BE (dfa->str_tree == NULL, 0))
810 goto re_compile_internal_free_return;
811
812 /* Analyze the tree and create the nfa. */
813 err = analyze (preg);
814 if (BE (err != REG_NOERROR, 0))
815 goto re_compile_internal_free_return;
816
817#ifdef RE_ENABLE_I18N
818 /* If possible, do searching in single byte encoding to speed things up. */
819 if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
820 optimize_utf8 (dfa);
821#endif
822
823 /* Then create the initial state of the dfa. */
824 err = create_initial_state (dfa);
825
826 /* Release work areas. */
827 free_workarea_compile (preg);
828 re_string_destruct (&regexp);
829
830 if (BE (err != REG_NOERROR, 0))
831 {
832 free_dfa_content (dfa);
833 preg->buffer = NULL;
834 preg->allocated = 0;
835 }
836
837 return err;
838}
839
840/* Initialize DFA. We use the length of the regular expression PAT_LEN
841 as the initial length of some arrays. */
842
843static reg_errcode_t
844init_dfa (re_dfa_t *dfa, size_t pat_len)
845{
846 unsigned int table_size;
847#ifndef _LIBC
848 const char *codeset_name;
849#endif
850
851 memset (dfa, '\0', sizeof (re_dfa_t));
852
853 /* Force allocation of str_tree_storage the first time. */
854 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
855
856 /* Avoid overflows. */
857 if (pat_len == SIZE_MAX)
858 return REG_ESPACE;
859
860 dfa->nodes_alloc = pat_len + 1;
861 dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
862
863 /* table_size = 2 ^ ceil(log pat_len) */
864 for (table_size = 1; ; table_size <<= 1)
865 if (table_size > pat_len)
866 break;
867
868 dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
869 dfa->state_hash_mask = table_size - 1;
870
871 dfa->mb_cur_max = MB_CUR_MAX;
872#ifdef _LIBC
873 if (dfa->mb_cur_max == 6
874 && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
875 dfa->is_utf8 = 1;
876 dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
877 != 0);
878#else
879# ifdef HAVE_LANGINFO_CODESET
880 codeset_name = nl_langinfo (CODESET);
881# else
882 codeset_name = getenv ("LC_ALL");
883 if (codeset_name == NULL || codeset_name[0] == '\0')
884 codeset_name = getenv ("LC_CTYPE");
885 if (codeset_name == NULL || codeset_name[0] == '\0')
886 codeset_name = getenv ("LANG");
887 if (codeset_name == NULL)
888 codeset_name = "";
889 else if (strchr (codeset_name, '.') != NULL)
890 codeset_name = strchr (codeset_name, '.') + 1;
891# endif
892
893 /* strcasecmp isn't a standard interface. brute force check */
894#if 0
895 if (strcasecmp (codeset_name, "UTF-8") == 0
896 || strcasecmp (codeset_name, "UTF8") == 0)
897 dfa->is_utf8 = 1;
898#else
899 if ( (codeset_name[0] == 'U' || codeset_name[0] == 'u')
900 && (codeset_name[1] == 'T' || codeset_name[1] == 't')
901 && (codeset_name[2] == 'F' || codeset_name[2] == 'f')
902 && (codeset_name[3] == '-'
903 ? codeset_name[4] == '8' && codeset_name[5] == '\0'
904 : codeset_name[3] == '8' && codeset_name[4] == '\0'))
905 dfa->is_utf8 = 1;
906#endif
907
908 /* We check exhaustively in the loop below if this charset is a
909 superset of ASCII. */
910 dfa->map_notascii = 0;
911#endif
912
913#ifdef RE_ENABLE_I18N
914 if (dfa->mb_cur_max > 1)
915 {
916 if (dfa->is_utf8)
917 {
918#if !defined(__GNUC__) || __GNUC__ < 3
919 static short utf8_sb_map_inited = 0;
920
921 if (! utf8_sb_map_inited)
922 {
923 int i;
924
925 utf8_sb_map_inited = 0;
926 for (i = 0; i <= 0x80 / BITSET_WORD_BITS - 1; i++)
927 utf8_sb_map[i] = BITSET_WORD_MAX;
928 }
929#endif
930 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
931 }
932 else
933 {
934 int i, j, ch;
935
936 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
937 if (BE (dfa->sb_char == NULL, 0))
938 return REG_ESPACE;
939
940 /* Set the bits corresponding to single byte chars. */
941 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
942 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
943 {
944 wint_t wch = __btowc (ch);
945 if (wch != WEOF)
946 dfa->sb_char[i] |= (bitset_word_t) 1 << j;
947# ifndef _LIBC
948 if (isascii (ch) && wch != ch)
949 dfa->map_notascii = 1;
950# endif
951 }
952 }
953 }
954#endif
955
956 if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
957 return REG_ESPACE;
958 return REG_NOERROR;
959}
960
961/* Initialize WORD_CHAR table, which indicate which character is
962 "word". In this case "word" means that it is the word construction
963 character used by some operators like "\<", "\>", etc. */
964
965static void
966internal_function
967init_word_char (re_dfa_t *dfa)
968{
969 int i, j, ch;
970 dfa->word_ops_used = 1;
971 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
972 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
973 if (isalnum (ch) || ch == '_')
974 dfa->word_char[i] |= (bitset_word_t) 1 << j;
975}
976
977/* Free the work area which are only used while compiling. */
978
979static void
980free_workarea_compile (regex_t *preg)
981{
982 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
983 bin_tree_storage_t *storage, *next;
984 for (storage = dfa->str_tree_storage; storage; storage = next)
985 {
986 next = storage->next;
987 re_free (storage);
988 }
989 dfa->str_tree_storage = NULL;
990 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
991 dfa->str_tree = NULL;
992 re_free (dfa->org_indices);
993 dfa->org_indices = NULL;
994}
995
996/* Create initial states for all contexts. */
997
998static reg_errcode_t
999create_initial_state (re_dfa_t *dfa)
1000{
1001 int first, i;
1002 reg_errcode_t err;
1003 re_node_set init_nodes;
1004
1005 /* Initial states have the epsilon closure of the node which is
1006 the first node of the regular expression. */
1007 first = dfa->str_tree->first->node_idx;
1008 dfa->init_node = first;
1009 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
1010 if (BE (err != REG_NOERROR, 0))
1011 return err;
1012
1013 /* The back-references which are in initial states can epsilon transit,
1014 since in this case all of the subexpressions can be null.
1015 Then we add epsilon closures of the nodes which are the next nodes of
1016 the back-references. */
1017 if (dfa->nbackref > 0)
1018 for (i = 0; i < init_nodes.nelem; ++i)
1019 {
1020 int node_idx = init_nodes.elems[i];
1021 re_token_type_t type = dfa->nodes[node_idx].type;
1022
1023 int clexp_idx;
1024 if (type != OP_BACK_REF)
1025 continue;
1026 for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
1027 {
1028 re_token_t *clexp_node;
1029 clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
1030 if (clexp_node->type == OP_CLOSE_SUBEXP
1031 && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
1032 break;
1033 }
1034 if (clexp_idx == init_nodes.nelem)
1035 continue;
1036
1037 if (type == OP_BACK_REF)
1038 {
1039 int dest_idx = dfa->edests[node_idx].elems[0];
1040 if (!re_node_set_contains (&init_nodes, dest_idx))
1041 {
1042 err = re_node_set_merge (&init_nodes,
1043 dfa->eclosures + dest_idx);
1044 if (err != REG_NOERROR)
1045 return err;
1046 i = 0;
1047 }
1048 }
1049 }
1050
1051 /* It must be the first time to invoke acquire_state. */
1052 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
1053 /* We don't check ERR here, since the initial state must not be NULL. */
1054 if (BE (dfa->init_state == NULL, 0))
1055 return err;
1056 if (dfa->init_state->has_constraint)
1057 {
1058 dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
1059 CONTEXT_WORD);
1060 dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
1061 CONTEXT_NEWLINE);
1062 dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
1063 &init_nodes,
1064 CONTEXT_NEWLINE
1065 | CONTEXT_BEGBUF);
1066 if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
1067 || dfa->init_state_begbuf == NULL, 0))
1068 return err;
1069 }
1070 else
1071 dfa->init_state_word = dfa->init_state_nl
1072 = dfa->init_state_begbuf = dfa->init_state;
1073
1074 re_node_set_free (&init_nodes);
1075 return REG_NOERROR;
1076}
1077
1078#ifdef RE_ENABLE_I18N
1079/* If it is possible to do searching in single byte encoding instead of UTF-8
1080 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
1081 DFA nodes where needed. */
1082
1083static void
1084optimize_utf8 (re_dfa_t *dfa)
1085{
1086 int node, i, mb_chars = 0, has_period = 0;
1087
1088 for (node = 0; node < dfa->nodes_len; ++node)
1089 switch (dfa->nodes[node].type)
1090 {
1091 case CHARACTER:
1092 if (dfa->nodes[node].opr.c >= 0x80)
1093 mb_chars = 1;
1094 break;
1095 case ANCHOR:
1096 switch (dfa->nodes[node].opr.ctx_type)
1097 {
1098 case LINE_FIRST:
1099 case LINE_LAST:
1100 case BUF_FIRST:
1101 case BUF_LAST:
1102 break;
1103 default:
1104 /* Word anchors etc. cannot be handled. It's okay to test
1105 opr.ctx_type since constraints (for all DFA nodes) are
1106 created by ORing one or more opr.ctx_type values. */
1107 return;
1108 }
1109 break;
1110 case OP_PERIOD:
1111 has_period = 1;
1112 break;
1113 case OP_BACK_REF:
1114 case OP_ALT:
1115 case END_OF_RE:
1116 case OP_DUP_ASTERISK:
1117 case OP_OPEN_SUBEXP:
1118 case OP_CLOSE_SUBEXP:
1119 break;
1120 case COMPLEX_BRACKET:
1121 return;
1122 case SIMPLE_BRACKET:
1123 /* Just double check. The non-ASCII range starts at 0x80. */
1124 assert (0x80 % BITSET_WORD_BITS == 0);
1125 for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
1126 if (dfa->nodes[node].opr.sbcset[i])
1127 return;
1128 break;
1129 default:
1130 abort ();
1131 }
1132
1133 if (mb_chars || has_period)
1134 for (node = 0; node < dfa->nodes_len; ++node)
1135 {
1136 if (dfa->nodes[node].type == CHARACTER
1137 && dfa->nodes[node].opr.c >= 0x80)
1138 dfa->nodes[node].mb_partial = 0;
1139 else if (dfa->nodes[node].type == OP_PERIOD)
1140 dfa->nodes[node].type = OP_UTF8_PERIOD;
1141 }
1142
1143 /* The search can be in single byte locale. */
1144 dfa->mb_cur_max = 1;
1145 dfa->is_utf8 = 0;
1146 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
1147}
1148#endif
1149
1150/* Analyze the structure tree, and calculate "first", "next", "edest",
1151 "eclosure", and "inveclosure". */
1152
1153static reg_errcode_t
1154analyze (regex_t *preg)
1155{
1156 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1157 reg_errcode_t ret;
1158
1159 /* Allocate arrays. */
1160 dfa->nexts = re_malloc (int, dfa->nodes_alloc);
1161 dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
1162 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
1163 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
1164 if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
1165 || dfa->eclosures == NULL, 0))
1166 return REG_ESPACE;
1167
1168 dfa->subexp_map = re_malloc (int, preg->re_nsub);
1169 if (dfa->subexp_map != NULL)
1170 {
1171 int i;
1172 for (i = 0; i < preg->re_nsub; i++)
1173 dfa->subexp_map[i] = i;
1174 preorder (dfa->str_tree, optimize_subexps, dfa);
1175 for (i = 0; i < preg->re_nsub; i++)
1176 if (dfa->subexp_map[i] != i)
1177 break;
1178 if (i == preg->re_nsub)
1179 {
1180 free (dfa->subexp_map);
1181 dfa->subexp_map = NULL;
1182 }
1183 }
1184
1185 ret = postorder (dfa->str_tree, lower_subexps, preg);
1186 if (BE (ret != REG_NOERROR, 0))
1187 return ret;
1188 ret = postorder (dfa->str_tree, calc_first, dfa);
1189 if (BE (ret != REG_NOERROR, 0))
1190 return ret;
1191 preorder (dfa->str_tree, calc_next, dfa);
1192 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
1193 if (BE (ret != REG_NOERROR, 0))
1194 return ret;
1195 ret = calc_eclosure (dfa);
1196 if (BE (ret != REG_NOERROR, 0))
1197 return ret;
1198
1199 /* We only need this during the prune_impossible_nodes pass in regexec.c;
1200 skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
1201 if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
1202 || dfa->nbackref)
1203 {
1204 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
1205 if (BE (dfa->inveclosures == NULL, 0))
1206 return REG_ESPACE;
1207 ret = calc_inveclosure (dfa);
1208 }
1209
1210 return ret;
1211}
1212
1213/* Our parse trees are very unbalanced, so we cannot use a stack to
1214 implement parse tree visits. Instead, we use parent pointers and
1215 some hairy code in these two functions. */
1216static reg_errcode_t
1217postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1218 void *extra)
1219{
1220 bin_tree_t *node, *prev;
1221
1222 for (node = root; ; )
1223 {
1224 /* Descend down the tree, preferably to the left (or to the right
1225 if that's the only child). */
1226 while (node->left || node->right)
1227 if (node->left)
1228 node = node->left;
1229 else
1230 node = node->right;
1231
1232 do
1233 {
1234 reg_errcode_t err = fn (extra, node);
1235 if (BE (err != REG_NOERROR, 0))
1236 return err;
1237 if (node->parent == NULL)
1238 return REG_NOERROR;
1239 prev = node;
1240 node = node->parent;
1241 }
1242 /* Go up while we have a node that is reached from the right. */
1243 while (node->right == prev || node->right == NULL);
1244 node = node->right;
1245 }
1246}
1247
1248static reg_errcode_t
1249preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1250 void *extra)
1251{
1252 bin_tree_t *node;
1253
1254 for (node = root; ; )
1255 {
1256 reg_errcode_t err = fn (extra, node);
1257 if (BE (err != REG_NOERROR, 0))
1258 return err;
1259
1260 /* Go to the left node, or up and to the right. */
1261 if (node->left)
1262 node = node->left;
1263 else
1264 {
1265 bin_tree_t *prev = NULL;
1266 while (node->right == prev || node->right == NULL)
1267 {
1268 prev = node;
1269 node = node->parent;
1270 if (!node)
1271 return REG_NOERROR;
1272 }
1273 node = node->right;
1274 }
1275 }
1276}
1277
1278/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
1279 re_search_internal to map the inner one's opr.idx to this one's. Adjust
1280 backreferences as well. Requires a preorder visit. */
1281static reg_errcode_t
1282optimize_subexps (void *extra, bin_tree_t *node)
1283{
1284 re_dfa_t *dfa = (re_dfa_t *) extra;
1285
1286 if (node->token.type == OP_BACK_REF && dfa->subexp_map)
1287 {
1288 int idx = node->token.opr.idx;
1289 node->token.opr.idx = dfa->subexp_map[idx];
1290 dfa->used_bkref_map |= 1 << node->token.opr.idx;
1291 }
1292
1293 else if (node->token.type == SUBEXP
1294 && node->left && node->left->token.type == SUBEXP)
1295 {
1296 int other_idx = node->left->token.opr.idx;
1297
1298 node->left = node->left->left;
1299 if (node->left)
1300 node->left->parent = node;
1301
1302 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
1303 if (other_idx < BITSET_WORD_BITS)
1304 dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
1305 }
1306
1307 return REG_NOERROR;
1308}
1309
1310/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
1311 of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
1312static reg_errcode_t
1313lower_subexps (void *extra, bin_tree_t *node)
1314{
1315 regex_t *preg = (regex_t *) extra;
1316 reg_errcode_t err = REG_NOERROR;
1317
1318 if (node->left && node->left->token.type == SUBEXP)
1319 {
1320 node->left = lower_subexp (&err, preg, node->left);
1321 if (node->left)
1322 node->left->parent = node;
1323 }
1324 if (node->right && node->right->token.type == SUBEXP)
1325 {
1326 node->right = lower_subexp (&err, preg, node->right);
1327 if (node->right)
1328 node->right->parent = node;
1329 }
1330
1331 return err;
1332}
1333
1334static bin_tree_t *
1335lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
1336{
1337 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1338 bin_tree_t *body = node->left;
1339 bin_tree_t *op, *cls, *tree1, *tree;
1340
1341 if (preg->no_sub
1342 /* We do not optimize empty subexpressions, because otherwise we may
1343 have bad CONCAT nodes with NULL children. This is obviously not
1344 very common, so we do not lose much. An example that triggers
1345 this case is the sed "script" /\(\)/x. */
1346 && node->left != NULL
1347 && (node->token.opr.idx >= BITSET_WORD_BITS
1348 || !(dfa->used_bkref_map
1349 & ((bitset_word_t) 1 << node->token.opr.idx))))
1350 return node->left;
1351
1352 /* Convert the SUBEXP node to the concatenation of an
1353 OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
1354 op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
1355 cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
1356 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
1357 tree = create_tree (dfa, op, tree1, CONCAT);
1358 if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
1359 {
1360 *err = REG_ESPACE;
1361 return NULL;
1362 }
1363
1364 op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
1365 op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
1366 return tree;
1367}
1368
1369/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
1370 nodes. Requires a postorder visit. */
1371static reg_errcode_t
1372calc_first (void *extra, bin_tree_t *node)
1373{
1374 re_dfa_t *dfa = (re_dfa_t *) extra;
1375 if (node->token.type == CONCAT)
1376 {
1377 node->first = node->left->first;
1378 node->node_idx = node->left->node_idx;
1379 }
1380 else
1381 {
1382 node->first = node;
1383 node->node_idx = re_dfa_add_node (dfa, node->token);
1384 if (BE (node->node_idx == -1, 0))
1385 return REG_ESPACE;
1386 if (node->token.type == ANCHOR)
1387 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
1388 }
1389 return REG_NOERROR;
1390}
1391
1392/* Pass 2: compute NEXT on the tree. Preorder visit. */
1393static reg_errcode_t
1394calc_next (UNUSED_PARAM void *extra, bin_tree_t *node)
1395{
1396 switch (node->token.type)
1397 {
1398 case OP_DUP_ASTERISK:
1399 node->left->next = node;
1400 break;
1401 case CONCAT:
1402 node->left->next = node->right->first;
1403 node->right->next = node->next;
1404 break;
1405 default:
1406 if (node->left)
1407 node->left->next = node->next;
1408 if (node->right)
1409 node->right->next = node->next;
1410 break;
1411 }
1412 return REG_NOERROR;
1413}
1414
1415/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
1416static reg_errcode_t
1417link_nfa_nodes (void *extra, bin_tree_t *node)
1418{
1419 re_dfa_t *dfa = (re_dfa_t *) extra;
1420 int idx = node->node_idx;
1421 reg_errcode_t err = REG_NOERROR;
1422
1423 switch (node->token.type)
1424 {
1425 case CONCAT:
1426 break;
1427
1428 case END_OF_RE:
1429 assert (node->next == NULL);
1430 break;
1431
1432 case OP_DUP_ASTERISK:
1433 case OP_ALT:
1434 {
1435 int left, right;
1436 dfa->has_plural_match = 1;
1437 if (node->left != NULL)
1438 left = node->left->first->node_idx;
1439 else
1440 left = node->next->node_idx;
1441 if (node->right != NULL)
1442 right = node->right->first->node_idx;
1443 else
1444 right = node->next->node_idx;
1445 assert (left > -1);
1446 assert (right > -1);
1447 err = re_node_set_init_2 (dfa->edests + idx, left, right);
1448 }
1449 break;
1450
1451 case ANCHOR:
1452 case OP_OPEN_SUBEXP:
1453 case OP_CLOSE_SUBEXP:
1454 err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
1455 break;
1456
1457 case OP_BACK_REF:
1458 dfa->nexts[idx] = node->next->node_idx;
1459 if (node->token.type == OP_BACK_REF)
1460 err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
1461 break;
1462
1463 default:
1464 assert (!IS_EPSILON_NODE (node->token.type));
1465 dfa->nexts[idx] = node->next->node_idx;
1466 break;
1467 }
1468
1469 return err;
1470}
1471
1472/* Duplicate the epsilon closure of the node ROOT_NODE.
1473 Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
1474 to their own constraint. */
1475
1476static reg_errcode_t
1477internal_function
1478duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
1479 int root_node, unsigned int init_constraint)
1480{
1481 int org_node, clone_node, ret;
1482 unsigned int constraint = init_constraint;
1483 for (org_node = top_org_node, clone_node = top_clone_node;;)
1484 {
1485 int org_dest, clone_dest;
1486 if (dfa->nodes[org_node].type == OP_BACK_REF)
1487 {
1488 /* If the back reference epsilon-transit, its destination must
1489 also have the constraint. Then duplicate the epsilon closure
1490 of the destination of the back reference, and store it in
1491 edests of the back reference. */
1492 org_dest = dfa->nexts[org_node];
1493 re_node_set_empty (dfa->edests + clone_node);
1494 clone_dest = duplicate_node (dfa, org_dest, constraint);
1495 if (BE (clone_dest == -1, 0))
1496 return REG_ESPACE;
1497 dfa->nexts[clone_node] = dfa->nexts[org_node];
1498 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1499 if (BE (ret < 0, 0))
1500 return REG_ESPACE;
1501 }
1502 else if (dfa->edests[org_node].nelem == 0)
1503 {
1504 /* In case of the node can't epsilon-transit, don't duplicate the
1505 destination and store the original destination as the
1506 destination of the node. */
1507 dfa->nexts[clone_node] = dfa->nexts[org_node];
1508 break;
1509 }
1510 else if (dfa->edests[org_node].nelem == 1)
1511 {
1512 /* In case of the node can epsilon-transit, and it has only one
1513 destination. */
1514 org_dest = dfa->edests[org_node].elems[0];
1515 re_node_set_empty (dfa->edests + clone_node);
1516 /* If the node is root_node itself, it means the epsilon clsoure
1517 has a loop. Then tie it to the destination of the root_node. */
1518 if (org_node == root_node && clone_node != org_node)
1519 {
1520 ret = re_node_set_insert (dfa->edests + clone_node, org_dest);
1521 if (BE (ret < 0, 0))
1522 return REG_ESPACE;
1523 break;
1524 }
1525 /* In case of the node has another constraint, add it. */
1526 constraint |= dfa->nodes[org_node].constraint;
1527 clone_dest = duplicate_node (dfa, org_dest, constraint);
1528 if (BE (clone_dest == -1, 0))
1529 return REG_ESPACE;
1530 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1531 if (BE (ret < 0, 0))
1532 return REG_ESPACE;
1533 }
1534 else /* dfa->edests[org_node].nelem == 2 */
1535 {
1536 /* In case of the node can epsilon-transit, and it has two
1537 destinations. In the bin_tree_t and DFA, that's '|' and '*'. */
1538 org_dest = dfa->edests[org_node].elems[0];
1539 re_node_set_empty (dfa->edests + clone_node);
1540 /* Search for a duplicated node which satisfies the constraint. */
1541 clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1542 if (clone_dest == -1)
1543 {
1544 /* There is no such duplicated node, create a new one. */
1545 reg_errcode_t err;
1546 clone_dest = duplicate_node (dfa, org_dest, constraint);
1547 if (BE (clone_dest == -1, 0))
1548 return REG_ESPACE;
1549 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1550 if (BE (ret < 0, 0))
1551 return REG_ESPACE;
1552 err = duplicate_node_closure (dfa, org_dest, clone_dest,
1553 root_node, constraint);
1554 if (BE (err != REG_NOERROR, 0))
1555 return err;
1556 }
1557 else
1558 {
1559 /* There is a duplicated node which satisfies the constraint,
1560 use it to avoid infinite loop. */
1561 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1562 if (BE (ret < 0, 0))
1563 return REG_ESPACE;
1564 }
1565
1566 org_dest = dfa->edests[org_node].elems[1];
1567 clone_dest = duplicate_node (dfa, org_dest, constraint);
1568 if (BE (clone_dest == -1, 0))
1569 return REG_ESPACE;
1570 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1571 if (BE (ret < 0, 0))
1572 return REG_ESPACE;
1573 }
1574 org_node = org_dest;
1575 clone_node = clone_dest;
1576 }
1577 return REG_NOERROR;
1578}
1579
1580/* Search for a node which is duplicated from the node ORG_NODE, and
1581 satisfies the constraint CONSTRAINT. */
1582
1583static int
1584search_duplicated_node (const re_dfa_t *dfa, int org_node,
1585 unsigned int constraint)
1586{
1587 int idx;
1588 for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
1589 {
1590 if (org_node == dfa->org_indices[idx]
1591 && constraint == dfa->nodes[idx].constraint)
1592 return idx; /* Found. */
1593 }
1594 return -1; /* Not found. */
1595}
1596
1597/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
1598 Return the index of the new node, or -1 if insufficient storage is
1599 available. */
1600
1601static int
1602duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
1603{
1604 int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
1605 if (BE (dup_idx != -1, 1))
1606 {
1607 dfa->nodes[dup_idx].constraint = constraint;
1608 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
1609 dfa->nodes[dup_idx].duplicated = 1;
1610
1611 /* Store the index of the original node. */
1612 dfa->org_indices[dup_idx] = org_idx;
1613 }
1614 return dup_idx;
1615}
1616
1617static reg_errcode_t
1618calc_inveclosure (re_dfa_t *dfa)
1619{
1620 int src, idx, ret;
1621 for (idx = 0; idx < dfa->nodes_len; ++idx)
1622 re_node_set_init_empty (dfa->inveclosures + idx);
1623
1624 for (src = 0; src < dfa->nodes_len; ++src)
1625 {
1626 int *elems = dfa->eclosures[src].elems;
1627 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
1628 {
1629 ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
1630 if (BE (ret == -1, 0))
1631 return REG_ESPACE;
1632 }
1633 }
1634
1635 return REG_NOERROR;
1636}
1637
1638/* Calculate "eclosure" for all the node in DFA. */
1639
1640static reg_errcode_t
1641calc_eclosure (re_dfa_t *dfa)
1642{
1643 int node_idx, incomplete;
1644#ifdef DEBUG
1645 assert (dfa->nodes_len > 0);
1646#endif
1647 incomplete = 0;
1648 /* For each nodes, calculate epsilon closure. */
1649 for (node_idx = 0; ; ++node_idx)
1650 {
1651 reg_errcode_t err;
1652 re_node_set eclosure_elem;
1653 if (node_idx == dfa->nodes_len)
1654 {
1655 if (!incomplete)
1656 break;
1657 incomplete = 0;
1658 node_idx = 0;
1659 }
1660
1661#ifdef DEBUG
1662 assert (dfa->eclosures[node_idx].nelem != -1);
1663#endif
1664
1665 /* If we have already calculated, skip it. */
1666 if (dfa->eclosures[node_idx].nelem != 0)
1667 continue;
1668 /* Calculate epsilon closure of `node_idx'. */
1669 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
1670 if (BE (err != REG_NOERROR, 0))
1671 return err;
1672
1673 if (dfa->eclosures[node_idx].nelem == 0)
1674 {
1675 incomplete = 1;
1676 re_node_set_free (&eclosure_elem);
1677 }
1678 }
1679 return REG_NOERROR;
1680}
1681
1682/* Calculate epsilon closure of NODE. */
1683
1684static reg_errcode_t
1685calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root)
1686{
1687 reg_errcode_t err;
1688 int i;
1689 re_node_set eclosure;
1690 int ret;
1691 int incomplete = 0;
1692 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
1693 if (BE (err != REG_NOERROR, 0))
1694 return err;
1695
1696 /* This indicates that we are calculating this node now.
1697 We reference this value to avoid infinite loop. */
1698 dfa->eclosures[node].nelem = -1;
1699
1700 /* If the current node has constraints, duplicate all nodes
1701 since they must inherit the constraints. */
1702 if (dfa->nodes[node].constraint
1703 && dfa->edests[node].nelem
1704 && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
1705 {
1706 err = duplicate_node_closure (dfa, node, node, node,
1707 dfa->nodes[node].constraint);
1708 if (BE (err != REG_NOERROR, 0))
1709 return err;
1710 }
1711
1712 /* Expand each epsilon destination nodes. */
1713 if (IS_EPSILON_NODE(dfa->nodes[node].type))
1714 for (i = 0; i < dfa->edests[node].nelem; ++i)
1715 {
1716 re_node_set eclosure_elem;
1717 int edest = dfa->edests[node].elems[i];
1718 /* If calculating the epsilon closure of `edest' is in progress,
1719 return intermediate result. */
1720 if (dfa->eclosures[edest].nelem == -1)
1721 {
1722 incomplete = 1;
1723 continue;
1724 }
1725 /* If we haven't calculated the epsilon closure of `edest' yet,
1726 calculate now. Otherwise use calculated epsilon closure. */
1727 if (dfa->eclosures[edest].nelem == 0)
1728 {
1729 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
1730 if (BE (err != REG_NOERROR, 0))
1731 return err;
1732 }
1733 else
1734 eclosure_elem = dfa->eclosures[edest];
1735 /* Merge the epsilon closure of `edest'. */
1736 err = re_node_set_merge (&eclosure, &eclosure_elem);
1737 if (BE (err != REG_NOERROR, 0))
1738 return err;
1739 /* If the epsilon closure of `edest' is incomplete,
1740 the epsilon closure of this node is also incomplete. */
1741 if (dfa->eclosures[edest].nelem == 0)
1742 {
1743 incomplete = 1;
1744 re_node_set_free (&eclosure_elem);
1745 }
1746 }
1747
1748 /* An epsilon closure includes itself. */
1749 ret = re_node_set_insert (&eclosure, node);
1750 if (BE (ret < 0, 0))
1751 return REG_ESPACE;
1752 if (incomplete && !root)
1753 dfa->eclosures[node].nelem = 0;
1754 else
1755 dfa->eclosures[node] = eclosure;
1756 *new_set = eclosure;
1757 return REG_NOERROR;
1758}
1759
1760/* Functions for token which are used in the parser. */
1761
1762/* Fetch a token from INPUT.
1763 We must not use this function inside bracket expressions. */
1764
1765static void
1766internal_function
1767fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
1768{
1769 re_string_skip_bytes (input, peek_token (result, input, syntax));
1770}
1771
1772/* Peek a token from INPUT, and return the length of the token.
1773 We must not use this function inside bracket expressions. */
1774
1775static int
1776internal_function
1777peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1778{
1779 unsigned char c;
1780
1781 if (re_string_eoi (input))
1782 {
1783 token->type = END_OF_RE;
1784 return 0;
1785 }
1786
1787 c = re_string_peek_byte (input, 0);
1788 token->opr.c = c;
1789
1790 token->word_char = 0;
1791#ifdef RE_ENABLE_I18N
1792 token->mb_partial = 0;
1793 if (input->mb_cur_max > 1 &&
1794 !re_string_first_byte (input, re_string_cur_idx (input)))
1795 {
1796 token->type = CHARACTER;
1797 token->mb_partial = 1;
1798 return 1;
1799 }
1800#endif
1801 if (c == '\\')
1802 {
1803 unsigned char c2;
1804 if (re_string_cur_idx (input) + 1 >= re_string_length (input))
1805 {
1806 token->type = BACK_SLASH;
1807 return 1;
1808 }
1809
1810 c2 = re_string_peek_byte_case (input, 1);
1811 token->opr.c = c2;
1812 token->type = CHARACTER;
1813#ifdef RE_ENABLE_I18N
1814 if (input->mb_cur_max > 1)
1815 {
1816 wint_t wc = re_string_wchar_at (input,
1817 re_string_cur_idx (input) + 1);
1818 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1819 }
1820 else
1821#endif
1822 token->word_char = IS_WORD_CHAR (c2) != 0;
1823
1824 switch (c2)
1825 {
1826 case '|':
1827 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
1828 token->type = OP_ALT;
1829 break;
1830 case '1': case '2': case '3': case '4': case '5':
1831 case '6': case '7': case '8': case '9':
1832 if (!(syntax & RE_NO_BK_REFS))
1833 {
1834 token->type = OP_BACK_REF;
1835 token->opr.idx = c2 - '1';
1836 }
1837 break;
1838 case '<':
1839 if (!(syntax & RE_NO_GNU_OPS))
1840 {
1841 token->type = ANCHOR;
1842 token->opr.ctx_type = WORD_FIRST;
1843 }
1844 break;
1845 case '>':
1846 if (!(syntax & RE_NO_GNU_OPS))
1847 {
1848 token->type = ANCHOR;
1849 token->opr.ctx_type = WORD_LAST;
1850 }
1851 break;
1852 case 'b':
1853 if (!(syntax & RE_NO_GNU_OPS))
1854 {
1855 token->type = ANCHOR;
1856 token->opr.ctx_type = WORD_DELIM;
1857 }
1858 break;
1859 case 'B':
1860 if (!(syntax & RE_NO_GNU_OPS))
1861 {
1862 token->type = ANCHOR;
1863 token->opr.ctx_type = NOT_WORD_DELIM;
1864 }
1865 break;
1866 case 'w':
1867 if (!(syntax & RE_NO_GNU_OPS))
1868 token->type = OP_WORD;
1869 break;
1870 case 'W':
1871 if (!(syntax & RE_NO_GNU_OPS))
1872 token->type = OP_NOTWORD;
1873 break;
1874 case 's':
1875 if (!(syntax & RE_NO_GNU_OPS))
1876 token->type = OP_SPACE;
1877 break;
1878 case 'S':
1879 if (!(syntax & RE_NO_GNU_OPS))
1880 token->type = OP_NOTSPACE;
1881 break;
1882 case '`':
1883 if (!(syntax & RE_NO_GNU_OPS))
1884 {
1885 token->type = ANCHOR;
1886 token->opr.ctx_type = BUF_FIRST;
1887 }
1888 break;
1889 case '\'':
1890 if (!(syntax & RE_NO_GNU_OPS))
1891 {
1892 token->type = ANCHOR;
1893 token->opr.ctx_type = BUF_LAST;
1894 }
1895 break;
1896 case '(':
1897 if (!(syntax & RE_NO_BK_PARENS))
1898 token->type = OP_OPEN_SUBEXP;
1899 break;
1900 case ')':
1901 if (!(syntax & RE_NO_BK_PARENS))
1902 token->type = OP_CLOSE_SUBEXP;
1903 break;
1904 case '+':
1905 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1906 token->type = OP_DUP_PLUS;
1907 break;
1908 case '?':
1909 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1910 token->type = OP_DUP_QUESTION;
1911 break;
1912 case '{':
1913 if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1914 token->type = OP_OPEN_DUP_NUM;
1915 break;
1916 case '}':
1917 if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1918 token->type = OP_CLOSE_DUP_NUM;
1919 break;
1920 default:
1921 break;
1922 }
1923 return 2;
1924 }
1925
1926 token->type = CHARACTER;
1927#ifdef RE_ENABLE_I18N
1928 if (input->mb_cur_max > 1)
1929 {
1930 wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
1931 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1932 }
1933 else
1934#endif
1935 token->word_char = IS_WORD_CHAR (token->opr.c);
1936
1937 switch (c)
1938 {
1939 case '\n':
1940 if (syntax & RE_NEWLINE_ALT)
1941 token->type = OP_ALT;
1942 break;
1943 case '|':
1944 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
1945 token->type = OP_ALT;
1946 break;
1947 case '*':
1948 token->type = OP_DUP_ASTERISK;
1949 break;
1950 case '+':
1951 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1952 token->type = OP_DUP_PLUS;
1953 break;
1954 case '?':
1955 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1956 token->type = OP_DUP_QUESTION;
1957 break;
1958 case '{':
1959 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1960 token->type = OP_OPEN_DUP_NUM;
1961 break;
1962 case '}':
1963 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1964 token->type = OP_CLOSE_DUP_NUM;
1965 break;
1966 case '(':
1967 if (syntax & RE_NO_BK_PARENS)
1968 token->type = OP_OPEN_SUBEXP;
1969 break;
1970 case ')':
1971 if (syntax & RE_NO_BK_PARENS)
1972 token->type = OP_CLOSE_SUBEXP;
1973 break;
1974 case '[':
1975 token->type = OP_OPEN_BRACKET;
1976 break;
1977 case '.':
1978 token->type = OP_PERIOD;
1979 break;
1980 case '^':
1981 if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
1982 re_string_cur_idx (input) != 0)
1983 {
1984 char prev = re_string_peek_byte (input, -1);
1985 if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
1986 break;
1987 }
1988 token->type = ANCHOR;
1989 token->opr.ctx_type = LINE_FIRST;
1990 break;
1991 case '$':
1992 if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
1993 re_string_cur_idx (input) + 1 != re_string_length (input))
1994 {
1995 re_token_t next;
1996 re_string_skip_bytes (input, 1);
1997 peek_token (&next, input, syntax);
1998 re_string_skip_bytes (input, -1);
1999 if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
2000 break;
2001 }
2002 token->type = ANCHOR;
2003 token->opr.ctx_type = LINE_LAST;
2004 break;
2005 default:
2006 break;
2007 }
2008 return 1;
2009}
2010
2011/* Peek a token from INPUT, and return the length of the token.
2012 We must not use this function out of bracket expressions. */
2013
2014static int
2015internal_function
2016peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2017{
2018 unsigned char c;
2019 if (re_string_eoi (input))
2020 {
2021 token->type = END_OF_RE;
2022 return 0;
2023 }
2024 c = re_string_peek_byte (input, 0);
2025 token->opr.c = c;
2026
2027#ifdef RE_ENABLE_I18N
2028 if (input->mb_cur_max > 1 &&
2029 !re_string_first_byte (input, re_string_cur_idx (input)))
2030 {
2031 token->type = CHARACTER;
2032 return 1;
2033 }
2034#endif /* RE_ENABLE_I18N */
2035
2036 if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
2037 && re_string_cur_idx (input) + 1 < re_string_length (input))
2038 {
2039 /* In this case, '\' escape a character. */
2040 unsigned char c2;
2041 re_string_skip_bytes (input, 1);
2042 c2 = re_string_peek_byte (input, 0);
2043 token->opr.c = c2;
2044 token->type = CHARACTER;
2045 return 1;
2046 }
2047 if (c == '[') /* '[' is a special char in a bracket exps. */
2048 {
2049 unsigned char c2;
2050 int token_len;
2051 if (re_string_cur_idx (input) + 1 < re_string_length (input))
2052 c2 = re_string_peek_byte (input, 1);
2053 else
2054 c2 = 0;
2055 token->opr.c = c2;
2056 token_len = 2;
2057 switch (c2)
2058 {
2059 case '.':
2060 token->type = OP_OPEN_COLL_ELEM;
2061 break;
2062 case '=':
2063 token->type = OP_OPEN_EQUIV_CLASS;
2064 break;
2065 case ':':
2066 if (syntax & RE_CHAR_CLASSES)
2067 {
2068 token->type = OP_OPEN_CHAR_CLASS;
2069 break;
2070 }
2071 /* else fall through. */
2072 default:
2073 token->type = CHARACTER;
2074 token->opr.c = c;
2075 token_len = 1;
2076 break;
2077 }
2078 return token_len;
2079 }
2080 switch (c)
2081 {
2082 case '-':
2083 token->type = OP_CHARSET_RANGE;
2084 break;
2085 case ']':
2086 token->type = OP_CLOSE_BRACKET;
2087 break;
2088 case '^':
2089 token->type = OP_NON_MATCH_LIST;
2090 break;
2091 default:
2092 token->type = CHARACTER;
2093 }
2094 return 1;
2095}
2096
2097/* Functions for parser. */
2098
2099/* Entry point of the parser.
2100 Parse the regular expression REGEXP and return the structure tree.
2101 If an error has occurred, ERR is set by error code, and return NULL.
2102 This function build the following tree, from regular expression <reg_exp>:
2103 CAT
2104 / \
2105 / \
2106 <reg_exp> EOR
2107
2108 CAT means concatenation.
2109 EOR means end of regular expression. */
2110
2111static bin_tree_t *
2112parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
2113 reg_errcode_t *err)
2114{
2115 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2116 bin_tree_t *tree, *eor, *root;
2117 re_token_t current_token;
2118 dfa->syntax = syntax;
2119 fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
2120 tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
2121 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2122 return NULL;
2123 eor = create_tree (dfa, NULL, NULL, END_OF_RE);
2124 if (tree != NULL)
2125 root = create_tree (dfa, tree, eor, CONCAT);
2126 else
2127 root = eor;
2128 if (BE (eor == NULL || root == NULL, 0))
2129 {
2130 *err = REG_ESPACE;
2131 return NULL;
2132 }
2133 return root;
2134}
2135
2136/* This function build the following tree, from regular expression
2137 <branch1>|<branch2>:
2138 ALT
2139 / \
2140 / \
2141 <branch1> <branch2>
2142
2143 ALT means alternative, which represents the operator `|'. */
2144
2145static bin_tree_t *
2146parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2147 reg_syntax_t syntax, int nest, reg_errcode_t *err)
2148{
2149 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2150 bin_tree_t *tree, *branch = NULL;
2151 tree = parse_branch (regexp, preg, token, syntax, nest, err);
2152 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2153 return NULL;
2154
2155 while (token->type == OP_ALT)
2156 {
2157 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
2158 if (token->type != OP_ALT && token->type != END_OF_RE
2159 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
2160 {
2161 branch = parse_branch (regexp, preg, token, syntax, nest, err);
2162 if (BE (*err != REG_NOERROR && branch == NULL, 0))
2163 return NULL;
2164 }
2165 else
2166 branch = NULL;
2167 tree = create_tree (dfa, tree, branch, OP_ALT);
2168 if (BE (tree == NULL, 0))
2169 {
2170 *err = REG_ESPACE;
2171 return NULL;
2172 }
2173 }
2174 return tree;
2175}
2176
2177/* This function build the following tree, from regular expression
2178 <exp1><exp2>:
2179 CAT
2180 / \
2181 / \
2182 <exp1> <exp2>
2183
2184 CAT means concatenation. */
2185
2186static bin_tree_t *
2187parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
2188 reg_syntax_t syntax, int nest, reg_errcode_t *err)
2189{
2190 bin_tree_t *tree, *exp;
2191 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2192 tree = parse_expression (regexp, preg, token, syntax, nest, err);
2193 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2194 return NULL;
2195
2196 while (token->type != OP_ALT && token->type != END_OF_RE
2197 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
2198 {
2199 exp = parse_expression (regexp, preg, token, syntax, nest, err);
2200 if (BE (*err != REG_NOERROR && exp == NULL, 0))
2201 {
2202 return NULL;
2203 }
2204 if (tree != NULL && exp != NULL)
2205 {
2206 tree = create_tree (dfa, tree, exp, CONCAT);
2207 if (tree == NULL)
2208 {
2209 *err = REG_ESPACE;
2210 return NULL;
2211 }
2212 }
2213 else if (tree == NULL)
2214 tree = exp;
2215 /* Otherwise exp == NULL, we don't need to create new tree. */
2216 }
2217 return tree;
2218}
2219
2220/* This function build the following tree, from regular expression a*:
2221 *
2222 |
2223 a
2224*/
2225
2226static bin_tree_t *
2227parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2228 reg_syntax_t syntax, int nest, reg_errcode_t *err)
2229{
2230 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2231 bin_tree_t *tree;
2232 switch (token->type)
2233 {
2234 case CHARACTER:
2235 tree = create_token_tree (dfa, NULL, NULL, token);
2236 if (BE (tree == NULL, 0))
2237 {
2238 *err = REG_ESPACE;
2239 return NULL;
2240 }
2241#ifdef RE_ENABLE_I18N
2242 if (dfa->mb_cur_max > 1)
2243 {
2244 while (!re_string_eoi (regexp)
2245 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
2246 {
2247 bin_tree_t *mbc_remain;
2248 fetch_token (token, regexp, syntax);
2249 mbc_remain = create_token_tree (dfa, NULL, NULL, token);
2250 tree = create_tree (dfa, tree, mbc_remain, CONCAT);
2251 if (BE (mbc_remain == NULL || tree == NULL, 0))
2252 {
2253 *err = REG_ESPACE;
2254 return NULL;
2255 }
2256 }
2257 }
2258#endif
2259 break;
2260 case OP_OPEN_SUBEXP:
2261 tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
2262 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2263 return NULL;
2264 break;
2265 case OP_OPEN_BRACKET:
2266 tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
2267 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2268 return NULL;
2269 break;
2270 case OP_BACK_REF:
2271 if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
2272 {
2273 *err = REG_ESUBREG;
2274 return NULL;
2275 }
2276 dfa->used_bkref_map |= 1 << token->opr.idx;
2277 tree = create_token_tree (dfa, NULL, NULL, token);
2278 if (BE (tree == NULL, 0))
2279 {
2280 *err = REG_ESPACE;
2281 return NULL;
2282 }
2283 ++dfa->nbackref;
2284 dfa->has_mb_node = 1;
2285 break;
2286 case OP_OPEN_DUP_NUM:
2287 if (syntax & RE_CONTEXT_INVALID_DUP)
2288 {
2289 *err = REG_BADRPT;
2290 return NULL;
2291 }
2292 /* FALLTHROUGH */
2293 case OP_DUP_ASTERISK:
2294 case OP_DUP_PLUS:
2295 case OP_DUP_QUESTION:
2296 if (syntax & RE_CONTEXT_INVALID_OPS)
2297 {
2298 *err = REG_BADRPT;
2299 return NULL;
2300 }
2301 else if (syntax & RE_CONTEXT_INDEP_OPS)
2302 {
2303 fetch_token (token, regexp, syntax);
2304 return parse_expression (regexp, preg, token, syntax, nest, err);
2305 }
2306 /* else fall through */
2307 case OP_CLOSE_SUBEXP:
2308 if ((token->type == OP_CLOSE_SUBEXP) &&
2309 !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
2310 {
2311 *err = REG_ERPAREN;
2312 return NULL;
2313 }
2314 /* else fall through */
2315 case OP_CLOSE_DUP_NUM:
2316 /* We treat it as a normal character. */
2317
2318 /* Then we can these characters as normal characters. */
2319 token->type = CHARACTER;
2320 /* mb_partial and word_char bits should be initialized already
2321 by peek_token. */
2322 tree = create_token_tree (dfa, NULL, NULL, token);
2323 if (BE (tree == NULL, 0))
2324 {
2325 *err = REG_ESPACE;
2326 return NULL;
2327 }
2328 break;
2329 case ANCHOR:
2330 if ((token->opr.ctx_type
2331 & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
2332 && dfa->word_ops_used == 0)
2333 init_word_char (dfa);
2334 if (token->opr.ctx_type == WORD_DELIM
2335 || token->opr.ctx_type == NOT_WORD_DELIM)
2336 {
2337 bin_tree_t *tree_first, *tree_last;
2338 if (token->opr.ctx_type == WORD_DELIM)
2339 {
2340 token->opr.ctx_type = WORD_FIRST;
2341 tree_first = create_token_tree (dfa, NULL, NULL, token);
2342 token->opr.ctx_type = WORD_LAST;
2343 }
2344 else
2345 {
2346 token->opr.ctx_type = INSIDE_WORD;
2347 tree_first = create_token_tree (dfa, NULL, NULL, token);
2348 token->opr.ctx_type = INSIDE_NOTWORD;
2349 }
2350 tree_last = create_token_tree (dfa, NULL, NULL, token);
2351 tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
2352 if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
2353 {
2354 *err = REG_ESPACE;
2355 return NULL;
2356 }
2357 }
2358 else
2359 {
2360 tree = create_token_tree (dfa, NULL, NULL, token);
2361 if (BE (tree == NULL, 0))
2362 {
2363 *err = REG_ESPACE;
2364 return NULL;
2365 }
2366 }
2367 /* We must return here, since ANCHORs can't be followed
2368 by repetition operators.
2369 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
2370 it must not be "<ANCHOR(^)><REPEAT(*)>". */
2371 fetch_token (token, regexp, syntax);
2372 return tree;
2373 case OP_PERIOD:
2374 tree = create_token_tree (dfa, NULL, NULL, token);
2375 if (BE (tree == NULL, 0))
2376 {
2377 *err = REG_ESPACE;
2378 return NULL;
2379 }
2380 if (dfa->mb_cur_max > 1)
2381 dfa->has_mb_node = 1;
2382 break;
2383 case OP_WORD:
2384 case OP_NOTWORD:
2385 tree = build_charclass_op (dfa, regexp->trans,
2386 "alnum",
2387 "_",
2388 token->type == OP_NOTWORD, err);
2389 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2390 return NULL;
2391 break;
2392 case OP_SPACE:
2393 case OP_NOTSPACE:
2394 tree = build_charclass_op (dfa, regexp->trans,
2395 "space",
2396 "",
2397 token->type == OP_NOTSPACE, err);
2398 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2399 return NULL;
2400 break;
2401 case OP_ALT:
2402 case END_OF_RE:
2403 return NULL;
2404 case BACK_SLASH:
2405 *err = REG_EESCAPE;
2406 return NULL;
2407 default:
2408 /* Must not happen? */
2409#ifdef DEBUG
2410 assert (0);
2411#endif
2412 return NULL;
2413 }
2414 fetch_token (token, regexp, syntax);
2415
2416 while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
2417 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
2418 {
2419 tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
2420 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2421 return NULL;
2422 /* In BRE consecutive duplications are not allowed. */
2423 if ((syntax & RE_CONTEXT_INVALID_DUP)
2424 && (token->type == OP_DUP_ASTERISK
2425 || token->type == OP_OPEN_DUP_NUM))
2426 {
2427 *err = REG_BADRPT;
2428 return NULL;
2429 }
2430 }
2431
2432 return tree;
2433}
2434
2435/* This function build the following tree, from regular expression
2436 (<reg_exp>):
2437 SUBEXP
2438 |
2439 <reg_exp>
2440*/
2441
2442static bin_tree_t *
2443parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2444 reg_syntax_t syntax, int nest, reg_errcode_t *err)
2445{
2446 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2447 bin_tree_t *tree;
2448 size_t cur_nsub;
2449 cur_nsub = preg->re_nsub++;
2450
2451 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
2452
2453 /* The subexpression may be a null string. */
2454 if (token->type == OP_CLOSE_SUBEXP)
2455 tree = NULL;
2456 else
2457 {
2458 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
2459 if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
2460 *err = REG_EPAREN;
2461 if (BE (*err != REG_NOERROR, 0))
2462 return NULL;
2463 }
2464
2465 if (cur_nsub <= '9' - '1')
2466 dfa->completed_bkref_map |= 1 << cur_nsub;
2467
2468 tree = create_tree (dfa, tree, NULL, SUBEXP);
2469 if (BE (tree == NULL, 0))
2470 {
2471 *err = REG_ESPACE;
2472 return NULL;
2473 }
2474 tree->token.opr.idx = cur_nsub;
2475 return tree;
2476}
2477
2478/* This function parse repetition operators like "*", "+", "{1,3}" etc. */
2479
2480static bin_tree_t *
2481parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2482 re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
2483{
2484 bin_tree_t *tree = NULL, *old_tree = NULL;
2485 int i, start, end, start_idx = re_string_cur_idx (regexp);
2486#ifndef RE_TOKEN_INIT_BUG
2487 re_token_t start_token = *token;
2488#else
2489 re_token_t start_token;
2490
2491 memcpy ((void *) &start_token, (void *) token, sizeof start_token);
2492#endif
2493
2494 if (token->type == OP_OPEN_DUP_NUM)
2495 {
2496 end = 0;
2497 start = fetch_number (regexp, token, syntax);
2498 if (start == -1)
2499 {
2500 if (token->type == CHARACTER && token->opr.c == ',')
2501 start = 0; /* We treat "{,m}" as "{0,m}". */
2502 else
2503 {
2504 *err = REG_BADBR; /* <re>{} is invalid. */
2505 return NULL;
2506 }
2507 }
2508 if (BE (start != -2, 1))
2509 {
2510 /* We treat "{n}" as "{n,n}". */
2511 end = ((token->type == OP_CLOSE_DUP_NUM) ? start
2512 : ((token->type == CHARACTER && token->opr.c == ',')
2513 ? fetch_number (regexp, token, syntax) : -2));
2514 }
2515 if (BE (start == -2 || end == -2, 0))
2516 {
2517 /* Invalid sequence. */
2518 if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
2519 {
2520 if (token->type == END_OF_RE)
2521 *err = REG_EBRACE;
2522 else
2523 *err = REG_BADBR;
2524
2525 return NULL;
2526 }
2527
2528 /* If the syntax bit is set, rollback. */
2529 re_string_set_index (regexp, start_idx);
2530 *token = start_token;
2531 token->type = CHARACTER;
2532 /* mb_partial and word_char bits should be already initialized by
2533 peek_token. */
2534 return elem;
2535 }
2536
2537 if (BE ((end != -1 && start > end) || token->type != OP_CLOSE_DUP_NUM, 0))
2538 {
2539 /* First number greater than second. */
2540 *err = REG_BADBR;
2541 return NULL;
2542 }
2543 }
2544 else
2545 {
2546 start = (token->type == OP_DUP_PLUS) ? 1 : 0;
2547 end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
2548 }
2549
2550 fetch_token (token, regexp, syntax);
2551
2552 if (BE (elem == NULL, 0))
2553 return NULL;
2554 if (BE (start == 0 && end == 0, 0))
2555 {
2556 postorder (elem, free_tree, NULL);
2557 return NULL;
2558 }
2559
2560 /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
2561 if (BE (start > 0, 0))
2562 {
2563 tree = elem;
2564 for (i = 2; i <= start; ++i)
2565 {
2566 elem = duplicate_tree (elem, dfa);
2567 tree = create_tree (dfa, tree, elem, CONCAT);
2568 if (BE (elem == NULL || tree == NULL, 0))
2569 goto parse_dup_op_espace;
2570 }
2571
2572 if (start == end)
2573 return tree;
2574
2575 /* Duplicate ELEM before it is marked optional. */
2576 elem = duplicate_tree (elem, dfa);
2577 old_tree = tree;
2578 }
2579 else
2580 old_tree = NULL;
2581
2582 if (elem->token.type == SUBEXP)
2583 postorder (elem, mark_opt_subexp, (void *) (intptr_t) elem->token.opr.idx);
2584
2585 tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
2586 if (BE (tree == NULL, 0))
2587 goto parse_dup_op_espace;
2588
2589 /* This loop is actually executed only when end != -1,
2590 to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
2591 already created the start+1-th copy. */
2592 for (i = start + 2; i <= end; ++i)
2593 {
2594 elem = duplicate_tree (elem, dfa);
2595 tree = create_tree (dfa, tree, elem, CONCAT);
2596 if (BE (elem == NULL || tree == NULL, 0))
2597 goto parse_dup_op_espace;
2598
2599 tree = create_tree (dfa, tree, NULL, OP_ALT);
2600 if (BE (tree == NULL, 0))
2601 goto parse_dup_op_espace;
2602 }
2603
2604 if (old_tree)
2605 tree = create_tree (dfa, old_tree, tree, CONCAT);
2606
2607 return tree;
2608
2609 parse_dup_op_espace:
2610 *err = REG_ESPACE;
2611 return NULL;
2612}
2613
2614/* Size of the names for collating symbol/equivalence_class/character_class.
2615 I'm not sure, but maybe enough. */
2616#define BRACKET_NAME_BUF_SIZE 32
2617
2618#ifndef _LIBC
2619 /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
2620 Build the range expression which starts from START_ELEM, and ends
2621 at END_ELEM. The result are written to MBCSET and SBCSET.
2622 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2623 mbcset->range_ends, is a pointer argument since we may
2624 update it. */
2625
2626static reg_errcode_t
2627internal_function
2628# ifdef RE_ENABLE_I18N
2629build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
2630 bracket_elem_t *start_elem, bracket_elem_t *end_elem)
2631# else /* not RE_ENABLE_I18N */
2632build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
2633 bracket_elem_t *end_elem)
2634# endif /* not RE_ENABLE_I18N */
2635{
2636 unsigned int start_ch, end_ch;
2637 /* Equivalence Classes and Character Classes can't be a range start/end. */
2638 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
2639 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
2640 0))
2641 return REG_ERANGE;
2642
2643 /* We can handle no multi character collating elements without libc
2644 support. */
2645 if (BE ((start_elem->type == COLL_SYM
2646 && strlen ((char *) start_elem->opr.name) > 1)
2647 || (end_elem->type == COLL_SYM
2648 && strlen ((char *) end_elem->opr.name) > 1), 0))
2649 return REG_ECOLLATE;
2650
2651# ifdef RE_ENABLE_I18N
2652 {
2653 wchar_t wc;
2654 wint_t start_wc;
2655 wint_t end_wc;
2656 wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
2657
2658 start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
2659 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2660 : 0));
2661 end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
2662 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2663 : 0));
2664#ifdef GAWK
2665 /*
2666 * Fedora Core 2, maybe others, have broken `btowc' that returns -1
2667 * for any value > 127. Sigh. Note that `start_ch' and `end_ch' are
2668 * unsigned, so we don't have sign extension problems.
2669 */
2670 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
2671 ? start_ch : start_elem->opr.wch);
2672 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
2673 ? end_ch : end_elem->opr.wch);
2674#else
2675 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
2676 ? __btowc (start_ch) : start_elem->opr.wch);
2677 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
2678 ? __btowc (end_ch) : end_elem->opr.wch);
2679#endif
2680 if (start_wc == WEOF || end_wc == WEOF)
2681 return REG_ECOLLATE;
2682 cmp_buf[0] = start_wc;
2683 cmp_buf[4] = end_wc;
2684 if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
2685 return REG_ERANGE;
2686
2687 /* Got valid collation sequence values, add them as a new entry.
2688 However, for !_LIBC we have no collation elements: if the
2689 character set is single byte, the single byte character set
2690 that we build below suffices. parse_bracket_exp passes
2691 no MBCSET if dfa->mb_cur_max == 1. */
2692 if (mbcset)
2693 {
2694 /* Check the space of the arrays. */
2695 if (BE (*range_alloc == mbcset->nranges, 0))
2696 {
2697 /* There is not enough space, need realloc. */
2698 wchar_t *new_array_start, *new_array_end;
2699 int new_nranges;
2700
2701 /* +1 in case of mbcset->nranges is 0. */
2702 new_nranges = 2 * mbcset->nranges + 1;
2703 /* Use realloc since mbcset->range_starts and mbcset->range_ends
2704 are NULL if *range_alloc == 0. */
2705 new_array_start = re_realloc (mbcset->range_starts, wchar_t,
2706 new_nranges);
2707 new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2708 new_nranges);
2709
2710 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
2711 return REG_ESPACE;
2712
2713 mbcset->range_starts = new_array_start;
2714 mbcset->range_ends = new_array_end;
2715 *range_alloc = new_nranges;
2716 }
2717
2718 mbcset->range_starts[mbcset->nranges] = start_wc;
2719 mbcset->range_ends[mbcset->nranges++] = end_wc;
2720 }
2721
2722 /* Build the table for single byte characters. */
2723 for (wc = 0; wc < SBC_MAX; ++wc)
2724 {
2725 cmp_buf[2] = wc;
2726 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
2727 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
2728 bitset_set (sbcset, wc);
2729 }
2730 }
2731# else /* not RE_ENABLE_I18N */
2732 {
2733 unsigned int ch;
2734 start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
2735 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2736 : 0));
2737 end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
2738 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2739 : 0));
2740 if (start_ch > end_ch)
2741 return REG_ERANGE;
2742 /* Build the table for single byte characters. */
2743 for (ch = 0; ch < SBC_MAX; ++ch)
2744 if (start_ch <= ch && ch <= end_ch)
2745 bitset_set (sbcset, ch);
2746 }
2747# endif /* not RE_ENABLE_I18N */
2748 return REG_NOERROR;
2749}
2750#endif /* not _LIBC */
2751
2752#ifndef _LIBC
2753/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
2754 Build the collating element which is represented by NAME.
2755 The result are written to MBCSET and SBCSET.
2756 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2757 pointer argument since we may update it. */
2758
2759static reg_errcode_t
2760internal_function
2761# ifdef RE_ENABLE_I18N
2762build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2763 int *coll_sym_alloc, const unsigned char *name)
2764# else /* not RE_ENABLE_I18N */
2765build_collating_symbol (bitset_t sbcset, const unsigned char *name)
2766# endif /* not RE_ENABLE_I18N */
2767{
2768 size_t name_len = strlen ((const char *) name);
2769 if (BE (name_len != 1, 0))
2770 return REG_ECOLLATE;
2771 else
2772 {
2773 bitset_set (sbcset, name[0]);
2774 return REG_NOERROR;
2775 }
2776}
2777#endif /* not _LIBC */
2778
2779/* This function parse bracket expression like "[abc]", "[a-c]",
2780 "[[.a-a.]]" etc. */
2781
2782static bin_tree_t *
2783parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2784 reg_syntax_t syntax, reg_errcode_t *err)
2785{
2786#ifdef _LIBC
2787 const unsigned char *collseqmb;
2788 const char *collseqwc;
2789 uint32_t nrules;
2790 int32_t table_size;
2791 const int32_t *symb_table;
2792 const unsigned char *extra;
2793
2794 /* Local function for parse_bracket_exp used in _LIBC environment.
2795 Seek the collating symbol entry correspondings to NAME.
2796 Return the index of the symbol in the SYMB_TABLE. */
2797
2798 auto inline int32_t
2799 __attribute ((always_inline))
2800 seek_collating_symbol_entry (name, name_len)
2801 const unsigned char *name;
2802 size_t name_len;
2803 {
2804 int32_t hash = elem_hash ((const char *) name, name_len);
2805 int32_t elem = hash % table_size;
2806 if (symb_table[2 * elem] != 0)
2807 {
2808 int32_t second = hash % (table_size - 2) + 1;
2809
2810 do
2811 {
2812 /* First compare the hashing value. */
2813 if (symb_table[2 * elem] == hash
2814 /* Compare the length of the name. */
2815 && name_len == extra[symb_table[2 * elem + 1]]
2816 /* Compare the name. */
2817 && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
2818 name_len) == 0)
2819 {
2820 /* Yep, this is the entry. */
2821 break;
2822 }
2823
2824 /* Next entry. */
2825 elem += second;
2826 }
2827 while (symb_table[2 * elem] != 0);
2828 }
2829 return elem;
2830 }
2831
2832 /* Local function for parse_bracket_exp used in _LIBC environment.
2833 Look up the collation sequence value of BR_ELEM.
2834 Return the value if succeeded, UINT_MAX otherwise. */
2835
2836 auto inline unsigned int
2837 __attribute ((always_inline))
2838 lookup_collation_sequence_value (br_elem)
2839 bracket_elem_t *br_elem;
2840 {
2841 if (br_elem->type == SB_CHAR)
2842 {
2843 /*
2844 if (MB_CUR_MAX == 1)
2845 */
2846 if (nrules == 0)
2847 return collseqmb[br_elem->opr.ch];
2848 else
2849 {
2850 wint_t wc = __btowc (br_elem->opr.ch);
2851 return __collseq_table_lookup (collseqwc, wc);
2852 }
2853 }
2854 else if (br_elem->type == MB_CHAR)
2855 {
2856 if (nrules != 0)
2857 return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
2858 }
2859 else if (br_elem->type == COLL_SYM)
2860 {
2861 size_t sym_name_len = strlen ((char *) br_elem->opr.name);
2862 if (nrules != 0)
2863 {
2864 int32_t elem, idx;
2865 elem = seek_collating_symbol_entry (br_elem->opr.name,
2866 sym_name_len);
2867 if (symb_table[2 * elem] != 0)
2868 {
2869 /* We found the entry. */
2870 idx = symb_table[2 * elem + 1];
2871 /* Skip the name of collating element name. */
2872 idx += 1 + extra[idx];
2873 /* Skip the byte sequence of the collating element. */
2874 idx += 1 + extra[idx];
2875 /* Adjust for the alignment. */
2876 idx = (idx + 3) & ~3;
2877 /* Skip the multibyte collation sequence value. */
2878 idx += sizeof (unsigned int);
2879 /* Skip the wide char sequence of the collating element. */
2880 idx += sizeof (unsigned int) *
2881 (1 + *(unsigned int *) (extra + idx));
2882 /* Return the collation sequence value. */
2883 return *(unsigned int *) (extra + idx);
2884 }
2885 else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
2886 {
2887 /* No valid character. Match it as a single byte
2888 character. */
2889 return collseqmb[br_elem->opr.name[0]];
2890 }
2891 }
2892 else if (sym_name_len == 1)
2893 return collseqmb[br_elem->opr.name[0]];
2894 }
2895 return UINT_MAX;
2896 }
2897
2898 /* Local function for parse_bracket_exp used in _LIBC environment.
2899 Build the range expression which starts from START_ELEM, and ends
2900 at END_ELEM. The result are written to MBCSET and SBCSET.
2901 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2902 mbcset->range_ends, is a pointer argument since we may
2903 update it. */
2904
2905 auto inline reg_errcode_t
2906 __attribute ((always_inline))
2907 build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
2908 re_charset_t *mbcset;
2909 int *range_alloc;
2910 bitset_t sbcset;
2911 bracket_elem_t *start_elem, *end_elem;
2912 {
2913 unsigned int ch;
2914 uint32_t start_collseq;
2915 uint32_t end_collseq;
2916
2917 /* Equivalence Classes and Character Classes can't be a range
2918 start/end. */
2919 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
2920 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
2921 0))
2922 return REG_ERANGE;
2923
2924 start_collseq = lookup_collation_sequence_value (start_elem);
2925 end_collseq = lookup_collation_sequence_value (end_elem);
2926 /* Check start/end collation sequence values. */
2927 if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
2928 return REG_ECOLLATE;
2929 if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
2930 return REG_ERANGE;
2931
2932 /* Got valid collation sequence values, add them as a new entry.
2933 However, if we have no collation elements, and the character set
2934 is single byte, the single byte character set that we
2935 build below suffices. */
2936 if (nrules > 0 || dfa->mb_cur_max > 1)
2937 {
2938 /* Check the space of the arrays. */
2939 if (BE (*range_alloc == mbcset->nranges, 0))
2940 {
2941 /* There is not enough space, need realloc. */
2942 uint32_t *new_array_start;
2943 uint32_t *new_array_end;
2944 int new_nranges;
2945
2946 /* +1 in case of mbcset->nranges is 0. */
2947 new_nranges = 2 * mbcset->nranges + 1;
2948 new_array_start = re_realloc (mbcset->range_starts, uint32_t,
2949 new_nranges);
2950 new_array_end = re_realloc (mbcset->range_ends, uint32_t,
2951 new_nranges);
2952
2953 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
2954 return REG_ESPACE;
2955
2956 mbcset->range_starts = new_array_start;
2957 mbcset->range_ends = new_array_end;
2958 *range_alloc = new_nranges;
2959 }
2960
2961 mbcset->range_starts[mbcset->nranges] = start_collseq;
2962 mbcset->range_ends[mbcset->nranges++] = end_collseq;
2963 }
2964
2965 /* Build the table for single byte characters. */
2966 for (ch = 0; ch < SBC_MAX; ch++)
2967 {
2968 uint32_t ch_collseq;
2969 /*
2970 if (MB_CUR_MAX == 1)
2971 */
2972 if (nrules == 0)
2973 ch_collseq = collseqmb[ch];
2974 else
2975 ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
2976 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
2977 bitset_set (sbcset, ch);
2978 }
2979 return REG_NOERROR;
2980 }
2981
2982 /* Local function for parse_bracket_exp used in _LIBC environment.
2983 Build the collating element which is represented by NAME.
2984 The result are written to MBCSET and SBCSET.
2985 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2986 pointer argument since we may update it. */
2987
2988 auto inline reg_errcode_t
2989 __attribute ((always_inline))
2990 build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
2991 re_charset_t *mbcset;
2992 int *coll_sym_alloc;
2993 bitset_t sbcset;
2994 const unsigned char *name;
2995 {
2996 int32_t elem, idx;
2997 size_t name_len = strlen ((const char *) name);
2998 if (nrules != 0)
2999 {
3000 elem = seek_collating_symbol_entry (name, name_len);
3001 if (symb_table[2 * elem] != 0)
3002 {
3003 /* We found the entry. */
3004 idx = symb_table[2 * elem + 1];
3005 /* Skip the name of collating element name. */
3006 idx += 1 + extra[idx];
3007 }
3008 else if (symb_table[2 * elem] == 0 && name_len == 1)
3009 {
3010 /* No valid character, treat it as a normal
3011 character. */
3012 bitset_set (sbcset, name[0]);
3013 return REG_NOERROR;
3014 }
3015 else
3016 return REG_ECOLLATE;
3017
3018 /* Got valid collation sequence, add it as a new entry. */
3019 /* Check the space of the arrays. */
3020 if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
3021 {
3022 /* Not enough, realloc it. */
3023 /* +1 in case of mbcset->ncoll_syms is 0. */
3024 int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
3025 /* Use realloc since mbcset->coll_syms is NULL
3026 if *alloc == 0. */
3027 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
3028 new_coll_sym_alloc);
3029 if (BE (new_coll_syms == NULL, 0))
3030 return REG_ESPACE;
3031 mbcset->coll_syms = new_coll_syms;
3032 *coll_sym_alloc = new_coll_sym_alloc;
3033 }
3034 mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
3035 return REG_NOERROR;
3036 }
3037 else
3038 {
3039 if (BE (name_len != 1, 0))
3040 return REG_ECOLLATE;
3041 else
3042 {
3043 bitset_set (sbcset, name[0]);
3044 return REG_NOERROR;
3045 }
3046 }
3047 }
3048#endif
3049
3050 re_token_t br_token;
3051 re_bitset_ptr_t sbcset;
3052#ifdef RE_ENABLE_I18N
3053 re_charset_t *mbcset;
3054 int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
3055 int equiv_class_alloc = 0, char_class_alloc = 0;
3056#endif /* not RE_ENABLE_I18N */
3057 int non_match = 0;
3058 bin_tree_t *work_tree;
3059 int token_len;
3060 int first_round = 1;
3061#ifdef _LIBC
3062 collseqmb = (const unsigned char *)
3063 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3064 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3065 if (nrules)
3066 {
3067 /*
3068 if (MB_CUR_MAX > 1)
3069 */
3070 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3071 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
3072 symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3073 _NL_COLLATE_SYMB_TABLEMB);
3074 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3075 _NL_COLLATE_SYMB_EXTRAMB);
3076 }
3077#endif
3078 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3079#ifdef RE_ENABLE_I18N
3080 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3081#endif /* RE_ENABLE_I18N */
3082#ifdef RE_ENABLE_I18N
3083 if (BE (sbcset == NULL || mbcset == NULL, 0))
3084#else
3085 if (BE (sbcset == NULL, 0))
3086#endif /* RE_ENABLE_I18N */
3087 {
3088 *err = REG_ESPACE;
3089 return NULL;
3090 }
3091
3092 token_len = peek_token_bracket (token, regexp, syntax);
3093 if (BE (token->type == END_OF_RE, 0))
3094 {
3095 *err = REG_BADPAT;
3096 goto parse_bracket_exp_free_return;
3097 }
3098 if (token->type == OP_NON_MATCH_LIST)
3099 {
3100#ifdef RE_ENABLE_I18N
3101 mbcset->non_match = 1;
3102#endif /* not RE_ENABLE_I18N */
3103 non_match = 1;
3104 if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
3105 bitset_set (sbcset, '\n');
3106 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3107 token_len = peek_token_bracket (token, regexp, syntax);
3108 if (BE (token->type == END_OF_RE, 0))
3109 {
3110 *err = REG_BADPAT;
3111 goto parse_bracket_exp_free_return;
3112 }
3113 }
3114
3115 /* We treat the first ']' as a normal character. */
3116 if (token->type == OP_CLOSE_BRACKET)
3117 token->type = CHARACTER;
3118
3119 while (1)
3120 {
3121 bracket_elem_t start_elem, end_elem;
3122 unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
3123 unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
3124 reg_errcode_t ret;
3125 int token_len2 = 0, is_range_exp = 0;
3126 re_token_t token2;
3127
3128 start_elem.opr.name = start_name_buf;
3129 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
3130 syntax, first_round);
3131 if (BE (ret != REG_NOERROR, 0))
3132 {
3133 *err = ret;
3134 goto parse_bracket_exp_free_return;
3135 }
3136 first_round = 0;
3137
3138 /* Get information about the next token. We need it in any case. */
3139 token_len = peek_token_bracket (token, regexp, syntax);
3140
3141 /* Do not check for ranges if we know they are not allowed. */
3142 if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
3143 {
3144 if (BE (token->type == END_OF_RE, 0))
3145 {
3146 *err = REG_EBRACK;
3147 goto parse_bracket_exp_free_return;
3148 }
3149 if (token->type == OP_CHARSET_RANGE)
3150 {
3151 re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
3152 token_len2 = peek_token_bracket (&token2, regexp, syntax);
3153 if (BE (token2.type == END_OF_RE, 0))
3154 {
3155 *err = REG_EBRACK;
3156 goto parse_bracket_exp_free_return;
3157 }
3158 if (token2.type == OP_CLOSE_BRACKET)
3159 {
3160 /* We treat the last '-' as a normal character. */
3161 re_string_skip_bytes (regexp, -token_len);
3162 token->type = CHARACTER;
3163 }
3164 else
3165 is_range_exp = 1;
3166 }
3167 }
3168
3169 if (is_range_exp == 1)
3170 {
3171 end_elem.opr.name = end_name_buf;
3172 ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
3173 dfa, syntax, 1);
3174 if (BE (ret != REG_NOERROR, 0))
3175 {
3176 *err = ret;
3177 goto parse_bracket_exp_free_return;
3178 }
3179
3180 token_len = peek_token_bracket (token, regexp, syntax);
3181
3182#ifdef _LIBC
3183 *err = build_range_exp (sbcset, mbcset, &range_alloc,
3184 &start_elem, &end_elem);
3185#else
3186# ifdef RE_ENABLE_I18N
3187 *err = build_range_exp (sbcset,
3188 dfa->mb_cur_max > 1 ? mbcset : NULL,
3189 &range_alloc, &start_elem, &end_elem);
3190# else
3191 *err = build_range_exp (sbcset, &start_elem, &end_elem);
3192# endif
3193#endif /* RE_ENABLE_I18N */
3194 if (BE (*err != REG_NOERROR, 0))
3195 goto parse_bracket_exp_free_return;
3196 }
3197 else
3198 {
3199 switch (start_elem.type)
3200 {
3201 case SB_CHAR:
3202 bitset_set (sbcset, start_elem.opr.ch);
3203 break;
3204#ifdef RE_ENABLE_I18N
3205 case MB_CHAR:
3206 /* Check whether the array has enough space. */
3207 if (BE (mbchar_alloc == mbcset->nmbchars, 0))
3208 {
3209 wchar_t *new_mbchars;
3210 /* Not enough, realloc it. */
3211 /* +1 in case of mbcset->nmbchars is 0. */
3212 mbchar_alloc = 2 * mbcset->nmbchars + 1;
3213 /* Use realloc since array is NULL if *alloc == 0. */
3214 new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
3215 mbchar_alloc);
3216 if (BE (new_mbchars == NULL, 0))
3217 goto parse_bracket_exp_espace;
3218 mbcset->mbchars = new_mbchars;
3219 }
3220 mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
3221 break;
3222#endif /* RE_ENABLE_I18N */
3223 case EQUIV_CLASS:
3224 *err = build_equiv_class (sbcset,
3225#ifdef RE_ENABLE_I18N
3226 mbcset, &equiv_class_alloc,
3227#endif /* RE_ENABLE_I18N */
3228 start_elem.opr.name);
3229 if (BE (*err != REG_NOERROR, 0))
3230 goto parse_bracket_exp_free_return;
3231 break;
3232 case COLL_SYM:
3233 *err = build_collating_symbol (sbcset,
3234#ifdef RE_ENABLE_I18N
3235 mbcset, &coll_sym_alloc,
3236#endif /* RE_ENABLE_I18N */
3237 start_elem.opr.name);
3238 if (BE (*err != REG_NOERROR, 0))
3239 goto parse_bracket_exp_free_return;
3240 break;
3241 case CHAR_CLASS:
3242 *err = build_charclass (regexp->trans, sbcset,
3243#ifdef RE_ENABLE_I18N
3244 mbcset, &char_class_alloc,
3245#endif /* RE_ENABLE_I18N */
3246 (const char *) start_elem.opr.name, syntax);
3247 if (BE (*err != REG_NOERROR, 0))
3248 goto parse_bracket_exp_free_return;
3249 break;
3250 default:
3251 assert (0);
3252 break;
3253 }
3254 }
3255 if (BE (token->type == END_OF_RE, 0))
3256 {
3257 *err = REG_EBRACK;
3258 goto parse_bracket_exp_free_return;
3259 }
3260 if (token->type == OP_CLOSE_BRACKET)
3261 break;
3262 }
3263
3264 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3265
3266 /* If it is non-matching list. */
3267 if (non_match)
3268 bitset_not (sbcset);
3269
3270#ifdef RE_ENABLE_I18N
3271 /* Ensure only single byte characters are set. */
3272 if (dfa->mb_cur_max > 1)
3273 bitset_mask (sbcset, dfa->sb_char);
3274
3275 if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
3276 || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
3277 || mbcset->non_match)))
3278 {
3279 bin_tree_t *mbc_tree;
3280 int sbc_idx;
3281 /* Build a tree for complex bracket. */
3282 dfa->has_mb_node = 1;
3283 br_token.type = COMPLEX_BRACKET;
3284 br_token.opr.mbcset = mbcset;
3285 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3286 if (BE (mbc_tree == NULL, 0))
3287 goto parse_bracket_exp_espace;
3288 for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
3289 if (sbcset[sbc_idx])
3290 break;
3291 /* If there are no bits set in sbcset, there is no point
3292 of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
3293 if (sbc_idx < BITSET_WORDS)
3294 {
3295 /* Build a tree for simple bracket. */
3296 br_token.type = SIMPLE_BRACKET;
3297 br_token.opr.sbcset = sbcset;
3298 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3299 if (BE (work_tree == NULL, 0))
3300 goto parse_bracket_exp_espace;
3301
3302 /* Then join them by ALT node. */
3303 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
3304 if (BE (work_tree == NULL, 0))
3305 goto parse_bracket_exp_espace;
3306 }
3307 else
3308 {
3309 re_free (sbcset);
3310 work_tree = mbc_tree;
3311 }
3312 }
3313 else
3314#endif /* not RE_ENABLE_I18N */
3315 {
3316#ifdef RE_ENABLE_I18N
3317 free_charset (mbcset);
3318#endif
3319 /* Build a tree for simple bracket. */
3320 br_token.type = SIMPLE_BRACKET;
3321 br_token.opr.sbcset = sbcset;
3322 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3323 if (BE (work_tree == NULL, 0))
3324 goto parse_bracket_exp_espace;
3325 }
3326 return work_tree;
3327
3328 parse_bracket_exp_espace:
3329 *err = REG_ESPACE;
3330 parse_bracket_exp_free_return:
3331 re_free (sbcset);
3332#ifdef RE_ENABLE_I18N
3333 free_charset (mbcset);
3334#endif /* RE_ENABLE_I18N */
3335 return NULL;
3336}
3337
3338/* Parse an element in the bracket expression. */
3339
3340static reg_errcode_t
3341parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
3342 re_token_t *token, int token_len,
3343 UNUSED_PARAM re_dfa_t *dfa, reg_syntax_t syntax,
3344 int accept_hyphen)
3345{
3346#ifdef RE_ENABLE_I18N
3347 int cur_char_size;
3348 cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
3349 if (cur_char_size > 1)
3350 {
3351 elem->type = MB_CHAR;
3352 elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
3353 re_string_skip_bytes (regexp, cur_char_size);
3354 return REG_NOERROR;
3355 }
3356#endif /* RE_ENABLE_I18N */
3357 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3358 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
3359 || token->type == OP_OPEN_EQUIV_CLASS)
3360 return parse_bracket_symbol (elem, regexp, token);
3361 if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
3362 {
3363 /* A '-' must only appear as anything but a range indicator before
3364 the closing bracket. Everything else is an error. */
3365 re_token_t token2;
3366 (void) peek_token_bracket (&token2, regexp, syntax);
3367 if (token2.type != OP_CLOSE_BRACKET)
3368 /* The actual error value is not standardized since this whole
3369 case is undefined. But ERANGE makes good sense. */
3370 return REG_ERANGE;
3371 }
3372 elem->type = SB_CHAR;
3373 elem->opr.ch = token->opr.c;
3374 return REG_NOERROR;
3375}
3376
3377/* Parse a bracket symbol in the bracket expression. Bracket symbols are
3378 such as [:<character_class>:], [.<collating_element>.], and
3379 [=<equivalent_class>=]. */
3380
3381static reg_errcode_t
3382parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
3383 re_token_t *token)
3384{
3385 unsigned char ch, delim = token->opr.c;
3386 int i = 0;
3387 if (re_string_eoi(regexp))
3388 return REG_EBRACK;
3389 for (;; ++i)
3390 {
3391 if (i >= BRACKET_NAME_BUF_SIZE)
3392 return REG_EBRACK;
3393 if (token->type == OP_OPEN_CHAR_CLASS)
3394 ch = re_string_fetch_byte_case (regexp);
3395 else
3396 ch = re_string_fetch_byte (regexp);
3397 if (re_string_eoi(regexp))
3398 return REG_EBRACK;
3399 if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
3400 break;
3401 elem->opr.name[i] = ch;
3402 }
3403 re_string_skip_bytes (regexp, 1);
3404 elem->opr.name[i] = '\0';
3405 switch (token->type)
3406 {
3407 case OP_OPEN_COLL_ELEM:
3408 elem->type = COLL_SYM;
3409 break;
3410 case OP_OPEN_EQUIV_CLASS:
3411 elem->type = EQUIV_CLASS;
3412 break;
3413 case OP_OPEN_CHAR_CLASS:
3414 elem->type = CHAR_CLASS;
3415 break;
3416 default:
3417 break;
3418 }
3419 return REG_NOERROR;
3420}
3421
3422 /* Helper function for parse_bracket_exp.
3423 Build the equivalence class which is represented by NAME.
3424 The result are written to MBCSET and SBCSET.
3425 EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
3426 is a pointer argument since we may update it. */
3427
3428static reg_errcode_t
3429#ifdef RE_ENABLE_I18N
3430build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
3431 int *equiv_class_alloc, const unsigned char *name)
3432#else /* not RE_ENABLE_I18N */
3433build_equiv_class (bitset_t sbcset, const unsigned char *name)
3434#endif /* not RE_ENABLE_I18N */
3435{
3436#ifdef _LIBC
3437 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3438 if (nrules != 0)
3439 {
3440 const int32_t *table, *indirect;
3441 const unsigned char *weights, *extra, *cp;
3442 unsigned char char_buf[2];
3443 int32_t idx1, idx2;
3444 unsigned int ch;
3445 size_t len;
3446 /* This #include defines a local function! */
3447# include <locale/weight.h>
3448 /* Calculate the index for equivalence class. */
3449 cp = name;
3450 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3451 weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3452 _NL_COLLATE_WEIGHTMB);
3453 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3454 _NL_COLLATE_EXTRAMB);
3455 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3456 _NL_COLLATE_INDIRECTMB);
3457 idx1 = findidx (&cp);
3458 if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
3459 /* This isn't a valid character. */
3460 return REG_ECOLLATE;
3461
3462 /* Build single byte matcing table for this equivalence class. */
3463 char_buf[1] = (unsigned char) '\0';
3464 len = weights[idx1 & 0xffffff];
3465 for (ch = 0; ch < SBC_MAX; ++ch)
3466 {
3467 char_buf[0] = ch;
3468 cp = char_buf;
3469 idx2 = findidx (&cp);
3470/*
3471 idx2 = table[ch];
3472*/
3473 if (idx2 == 0)
3474 /* This isn't a valid character. */
3475 continue;
3476 /* Compare only if the length matches and the collation rule
3477 index is the same. */
3478 if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
3479 {
3480 int cnt = 0;
3481
3482 while (cnt <= len &&
3483 weights[(idx1 & 0xffffff) + 1 + cnt]
3484 == weights[(idx2 & 0xffffff) + 1 + cnt])
3485 ++cnt;
3486
3487 if (cnt > len)
3488 bitset_set (sbcset, ch);
3489 }
3490 }
3491 /* Check whether the array has enough space. */
3492 if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
3493 {
3494 /* Not enough, realloc it. */
3495 /* +1 in case of mbcset->nequiv_classes is 0. */
3496 int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
3497 /* Use realloc since the array is NULL if *alloc == 0. */
3498 int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
3499 int32_t,
3500 new_equiv_class_alloc);
3501 if (BE (new_equiv_classes == NULL, 0))
3502 return REG_ESPACE;
3503 mbcset->equiv_classes = new_equiv_classes;
3504 *equiv_class_alloc = new_equiv_class_alloc;
3505 }
3506 mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
3507 }
3508 else
3509#endif /* _LIBC */
3510 {
3511 if (BE (strlen ((const char *) name) != 1, 0))
3512 return REG_ECOLLATE;
3513 bitset_set (sbcset, *name);
3514 }
3515 return REG_NOERROR;
3516}
3517
3518 /* Helper function for parse_bracket_exp.
3519 Build the character class which is represented by NAME.
3520 The result are written to MBCSET and SBCSET.
3521 CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
3522 is a pointer argument since we may update it. */
3523
3524static reg_errcode_t
3525#ifdef RE_ENABLE_I18N
3526build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3527 re_charset_t *mbcset, int *char_class_alloc,
3528 const char *class_name, reg_syntax_t syntax)
3529#else /* not RE_ENABLE_I18N */
3530build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3531 const char *class_name, reg_syntax_t syntax)
3532#endif /* not RE_ENABLE_I18N */
3533{
3534 int i;
3535
3536 /* In case of REG_ICASE "upper" and "lower" match the both of
3537 upper and lower cases. */
3538 if ((syntax & RE_ICASE)
3539 && (strcmp (class_name, "upper") == 0 || strcmp (class_name, "lower") == 0))
3540 class_name = "alpha";
3541
3542#ifdef RE_ENABLE_I18N
3543 /* Check the space of the arrays. */
3544 if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
3545 {
3546 /* Not enough, realloc it. */
3547 /* +1 in case of mbcset->nchar_classes is 0. */
3548 int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
3549 /* Use realloc since array is NULL if *alloc == 0. */
3550 wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
3551 new_char_class_alloc);
3552 if (BE (new_char_classes == NULL, 0))
3553 return REG_ESPACE;
3554 mbcset->char_classes = new_char_classes;
3555 *char_class_alloc = new_char_class_alloc;
3556 }
3557 mbcset->char_classes[mbcset->nchar_classes++] = __wctype (class_name);
3558#endif /* RE_ENABLE_I18N */
3559
3560#define BUILD_CHARCLASS_LOOP(ctype_func) \
3561 do { \
3562 if (BE (trans != NULL, 0)) \
3563 { \
3564 for (i = 0; i < SBC_MAX; ++i) \
3565 if (ctype_func (i)) \
3566 bitset_set (sbcset, trans[i]); \
3567 } \
3568 else \
3569 { \
3570 for (i = 0; i < SBC_MAX; ++i) \
3571 if (ctype_func (i)) \
3572 bitset_set (sbcset, i); \
3573 } \
3574 } while (0)
3575
3576#if 0
3577 if (strcmp (class_name, "alnum") == 0)
3578 BUILD_CHARCLASS_LOOP (isalnum);
3579 else if (strcmp (class_name, "cntrl") == 0)
3580 BUILD_CHARCLASS_LOOP (iscntrl);
3581 else if (strcmp (class_name, "lower") == 0)
3582 BUILD_CHARCLASS_LOOP (islower);
3583 else if (strcmp (class_name, "space") == 0)
3584 BUILD_CHARCLASS_LOOP (isspace);
3585 else if (strcmp (class_name, "alpha") == 0)
3586 BUILD_CHARCLASS_LOOP (isalpha);
3587 else if (strcmp (class_name, "digit") == 0)
3588 BUILD_CHARCLASS_LOOP (isdigit);
3589 else if (strcmp (class_name, "print") == 0)
3590 BUILD_CHARCLASS_LOOP (isprint);
3591 else if (strcmp (class_name, "upper") == 0)
3592 BUILD_CHARCLASS_LOOP (isupper);
3593 else if (strcmp (class_name, "blank") == 0)
3594#ifndef GAWK
3595 BUILD_CHARCLASS_LOOP (isblank);
3596#else
3597 /* see comments above */
3598 BUILD_CHARCLASS_LOOP (is_blank);
3599#endif
3600 else if (strcmp (class_name, "graph") == 0)
3601 BUILD_CHARCLASS_LOOP (isgraph);
3602 else if (strcmp (class_name, "punct") == 0)
3603 BUILD_CHARCLASS_LOOP (ispunct);
3604 else if (strcmp (class_name, "xdigit") == 0)
3605 BUILD_CHARCLASS_LOOP (isxdigit);
3606 else
3607 return REG_ECTYPE;
3608#else
3609 switch (match_class(class_name)) {
3610 case CCLASS_ALNUM:
3611 BUILD_CHARCLASS_LOOP (isalnum);
3612 break;
3613 case CCLASS_CNTRL:
3614 BUILD_CHARCLASS_LOOP (iscntrl);
3615 break;
3616 case CCLASS_LOWER:
3617 BUILD_CHARCLASS_LOOP (islower);
3618 break;
3619 case CCLASS_SPACE:
3620 BUILD_CHARCLASS_LOOP (isspace);
3621 break;
3622 case CCLASS_ALPHA:
3623 BUILD_CHARCLASS_LOOP (isalpha);
3624 break;
3625 case CCLASS_DIGIT:
3626 BUILD_CHARCLASS_LOOP (isdigit);
3627 break;
3628 case CCLASS_PRINT:
3629 BUILD_CHARCLASS_LOOP (isprint);
3630 break;
3631 case CCLASS_UPPER:
3632 BUILD_CHARCLASS_LOOP (isupper);
3633 break;
3634 case CCLASS_BLANK:
3635#ifndef GAWK
3636 BUILD_CHARCLASS_LOOP (isblank);
3637#else
3638 /* see comments above */
3639 BUILD_CHARCLASS_LOOP (is_blank);
3640#endif
3641 break;
3642 case CCLASS_GRAPH:
3643 BUILD_CHARCLASS_LOOP (isgraph);
3644 break;
3645 case CCLASS_PUNCT:
3646 BUILD_CHARCLASS_LOOP (ispunct);
3647 break;
3648 case CCLASS_XDIGIT:
3649 BUILD_CHARCLASS_LOOP (isxdigit);
3650 break;
3651 default:
3652 return REG_ECTYPE;
3653 }
3654#endif
3655
3656 return REG_NOERROR;
3657}
3658
3659static bin_tree_t *
3660build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3661 const char *class_name,
3662 const char *extra, int non_match,
3663 reg_errcode_t *err)
3664{
3665 re_bitset_ptr_t sbcset;
3666#ifdef RE_ENABLE_I18N
3667 re_charset_t *mbcset;
3668 int alloc = 0;
3669#endif /* not RE_ENABLE_I18N */
3670 reg_errcode_t ret;
3671 re_token_t br_token;
3672 bin_tree_t *tree;
3673
3674 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3675#ifdef RE_ENABLE_I18N
3676 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3677#endif /* RE_ENABLE_I18N */
3678
3679#ifdef RE_ENABLE_I18N
3680 if (BE (sbcset == NULL || mbcset == NULL, 0))
3681#else /* not RE_ENABLE_I18N */
3682 if (BE (sbcset == NULL, 0))
3683#endif /* not RE_ENABLE_I18N */
3684 {
3685 *err = REG_ESPACE;
3686 return NULL;
3687 }
3688
3689 if (non_match)
3690 {
3691#ifdef RE_ENABLE_I18N
3692 mbcset->non_match = 1;
3693#endif /* not RE_ENABLE_I18N */
3694 }
3695
3696 /* We don't care the syntax in this case. */
3697 ret = build_charclass (trans, sbcset,
3698#ifdef RE_ENABLE_I18N
3699 mbcset, &alloc,
3700#endif /* RE_ENABLE_I18N */
3701 class_name, 0);
3702
3703 if (BE (ret != REG_NOERROR, 0))
3704 {
3705 re_free (sbcset);
3706#ifdef RE_ENABLE_I18N
3707 free_charset (mbcset);
3708#endif /* RE_ENABLE_I18N */
3709 *err = ret;
3710 return NULL;
3711 }
3712 /* \w match '_' also. */
3713 for (; *extra; extra++)
3714 bitset_set (sbcset, *extra);
3715
3716 /* If it is non-matching list. */
3717 if (non_match)
3718 bitset_not (sbcset);
3719
3720#ifdef RE_ENABLE_I18N
3721 /* Ensure only single byte characters are set. */
3722 if (dfa->mb_cur_max > 1)
3723 bitset_mask (sbcset, dfa->sb_char);
3724#endif
3725
3726 /* Build a tree for simple bracket. */
3727 br_token.type = SIMPLE_BRACKET;
3728 br_token.opr.sbcset = sbcset;
3729 tree = create_token_tree (dfa, NULL, NULL, &br_token);
3730 if (BE (tree == NULL, 0))
3731 goto build_word_op_espace;
3732
3733#ifdef RE_ENABLE_I18N
3734 if (dfa->mb_cur_max > 1)
3735 {
3736 bin_tree_t *mbc_tree;
3737 /* Build a tree for complex bracket. */
3738 br_token.type = COMPLEX_BRACKET;
3739 br_token.opr.mbcset = mbcset;
3740 dfa->has_mb_node = 1;
3741 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3742 if (BE (mbc_tree == NULL, 0))
3743 goto build_word_op_espace;
3744 /* Then join them by ALT node. */
3745 tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
3746 if (BE (mbc_tree != NULL, 1))
3747 return tree;
3748 }
3749 else
3750 {
3751 free_charset (mbcset);
3752 return tree;
3753 }
3754#else /* not RE_ENABLE_I18N */
3755 return tree;
3756#endif /* not RE_ENABLE_I18N */
3757
3758 build_word_op_espace:
3759 re_free (sbcset);
3760#ifdef RE_ENABLE_I18N
3761 free_charset (mbcset);
3762#endif /* RE_ENABLE_I18N */
3763 *err = REG_ESPACE;
3764 return NULL;
3765}
3766
3767/* This is intended for the expressions like "a{1,3}".
3768 Fetch a number from `input', and return the number.
3769 Return -1, if the number field is empty like "{,1}".
3770 Return -2, if an error has occurred. */
3771
3772static int
3773fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
3774{
3775 int num = -1;
3776 unsigned char c;
3777 while (1)
3778 {
3779 fetch_token (token, input, syntax);
3780 c = token->opr.c;
3781 if (BE (token->type == END_OF_RE, 0))
3782 return -2;
3783 if (token->type == OP_CLOSE_DUP_NUM || c == ',')
3784 break;
3785 num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
3786 ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
3787 num = (num > RE_DUP_MAX) ? -2 : num;
3788 }
3789 return num;
3790}
3791
3792#ifdef RE_ENABLE_I18N
3793static void
3794free_charset (re_charset_t *cset)
3795{
3796 re_free (cset->mbchars);
3797# ifdef _LIBC
3798 re_free (cset->coll_syms);
3799 re_free (cset->equiv_classes);
3800 re_free (cset->range_starts);
3801 re_free (cset->range_ends);
3802# endif
3803 re_free (cset->char_classes);
3804 re_free (cset);
3805}
3806#endif /* RE_ENABLE_I18N */
3807
3808/* Functions for binary tree operation. */
3809
3810/* Create a tree node. */
3811
3812static bin_tree_t *
3813create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3814 re_token_type_t type)
3815{
3816 re_token_t t;
3817 t.type = type;
3818 return create_token_tree (dfa, left, right, &t);
3819}
3820
3821static bin_tree_t *
3822create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3823 const re_token_t *token)
3824{
3825 bin_tree_t *tree;
3826 if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
3827 {
3828 bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
3829
3830 if (storage == NULL)
3831 return NULL;
3832 storage->next = dfa->str_tree_storage;
3833 dfa->str_tree_storage = storage;
3834 dfa->str_tree_storage_idx = 0;
3835 }
3836 tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
3837
3838 tree->parent = NULL;
3839 tree->left = left;
3840 tree->right = right;
3841 tree->token = *token;
3842 tree->token.duplicated = 0;
3843 tree->token.opt_subexp = 0;
3844 tree->first = NULL;
3845 tree->next = NULL;
3846 tree->node_idx = -1;
3847
3848 if (left != NULL)
3849 left->parent = tree;
3850 if (right != NULL)
3851 right->parent = tree;
3852 return tree;
3853}
3854
3855/* Mark the tree SRC as an optional subexpression.
3856 To be called from preorder or postorder. */
3857
3858static reg_errcode_t
3859mark_opt_subexp (void *extra, bin_tree_t *node)
3860{
3861 int idx = (int) (intptr_t) extra;
3862 if (node->token.type == SUBEXP && node->token.opr.idx == idx)
3863 node->token.opt_subexp = 1;
3864
3865 return REG_NOERROR;
3866}
3867
3868/* Free the allocated memory inside NODE. */
3869
3870static void
3871free_token (re_token_t *node)
3872{
3873#ifdef RE_ENABLE_I18N
3874 if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
3875 free_charset (node->opr.mbcset);
3876 else
3877#endif /* RE_ENABLE_I18N */
3878 if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
3879 re_free (node->opr.sbcset);
3880}
3881
3882/* Worker function for tree walking. Free the allocated memory inside NODE
3883 and its children. */
3884
3885static reg_errcode_t
3886free_tree (UNUSED_PARAM void *extra, bin_tree_t *node)
3887{
3888 free_token (&node->token);
3889 return REG_NOERROR;
3890}
3891
3892
3893/* Duplicate the node SRC, and return new node. This is a preorder
3894 visit similar to the one implemented by the generic visitor, but
3895 we need more infrastructure to maintain two parallel trees --- so,
3896 it's easier to duplicate. */
3897
3898static bin_tree_t *
3899duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
3900{
3901 const bin_tree_t *node;
3902 bin_tree_t *dup_root;
3903 bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
3904
3905 for (node = root; ; )
3906 {
3907 /* Create a new tree and link it back to the current parent. */
3908 *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
3909 if (*p_new == NULL)
3910 return NULL;
3911 (*p_new)->parent = dup_node;
3912 (*p_new)->token.duplicated = 1;
3913 dup_node = *p_new;
3914
3915 /* Go to the left node, or up and to the right. */
3916 if (node->left)
3917 {
3918 node = node->left;
3919 p_new = &dup_node->left;
3920 }
3921 else
3922 {
3923 const bin_tree_t *prev = NULL;
3924 while (node->right == prev || node->right == NULL)
3925 {
3926 prev = node;
3927 node = node->parent;
3928 dup_node = dup_node->parent;
3929 if (!node)
3930 return dup_root;
3931 }
3932 node = node->right;
3933 p_new = &dup_node->right;
3934 }
3935 }
3936}
diff --git a/win32/regex.c b/win32/regex.c
new file mode 100644
index 000000000..e40a2ea01
--- /dev/null
+++ b/win32/regex.c
@@ -0,0 +1,90 @@
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA. */
20
21#define HAVE_LIBINTL_H 0
22#define ENABLE_NLS 0
23#define HAVE_ALLOCA 0
24#define NO_MBSUPPORT 1
25#define GAWK 1
26
27/* Make sure no one compiles this code with a C++ compiler. */
28#ifdef __cplusplus
29# error "This is C code, use a C compiler"
30#endif
31
32#ifdef _LIBC
33/* We have to keep the namespace clean. */
34# define regfree(preg) __regfree (preg)
35# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
36# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
37# define regerror(errcode, preg, errbuf, errbuf_size) \
38 __regerror(errcode, preg, errbuf, errbuf_size)
39# define re_set_registers(bu, re, nu, st, en) \
40 __re_set_registers (bu, re, nu, st, en)
41# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
42 __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
43# define re_match(bufp, string, size, pos, regs) \
44 __re_match (bufp, string, size, pos, regs)
45# define re_search(bufp, string, size, startpos, range, regs) \
46 __re_search (bufp, string, size, startpos, range, regs)
47# define re_compile_pattern(pattern, length, bufp) \
48 __re_compile_pattern (pattern, length, bufp)
49# define re_set_syntax(syntax) __re_set_syntax (syntax)
50# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
51 __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
52# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
53
54# include "../locale/localeinfo.h"
55#endif
56
57#if defined (_MSC_VER)
58#include <stdio.h> /* for size_t */
59#endif
60
61/* On some systems, limits.h sets RE_DUP_MAX to a lower value than
62 GNU regex allows. Include it before <regex.h>, which correctly
63 #undefs RE_DUP_MAX and sets it to the right value. */
64#include <limits.h>
65#include <stdint.h>
66
67#ifdef GAWK
68#undef alloca
69#define alloca alloca_is_bad_you_should_never_use_it
70#endif
71#include <regex.h>
72#include "regex_internal.h"
73
74#include "regex_internal.c"
75#ifdef GAWK
76#define bool int
77#define true (1)
78#define false (0)
79#endif
80#include "regcomp.c"
81#include "regexec.c"
82
83/* Binary backward compatibility. */
84#ifdef _LIBC
85# include <shlib-compat.h>
86# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
87link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
88int re_max_failures = 2000;
89# endif
90#endif
diff --git a/win32/regex.h b/win32/regex.h
new file mode 100644
index 000000000..61c968387
--- /dev/null
+++ b/win32/regex.h
@@ -0,0 +1,582 @@
1#include <stdio.h>
2#include <stddef.h>
3
4/* Definitions for data structures and routines for the regular
5 expression library.
6 Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006,2008
7 Free Software Foundation, Inc.
8 This file is part of the GNU C Library.
9
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
14
15 The GNU C Library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License for more details.
19
20 You should have received a copy of the GNU Lesser General Public
21 License along with the GNU C Library; if not, write to the Free
22 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 02110-1301 USA. */
24
25#ifndef _REGEX_H
26#define _REGEX_H 1
27
28#ifdef HAVE_STDDEF_H
29#include <stddef.h>
30#endif
31
32#ifdef HAVE_SYS_TYPES_H
33#include <sys/types.h>
34#endif
35
36#ifndef _LIBC
37#define __USE_GNU 1
38#endif
39
40/* Allow the use in C++ code. */
41#ifdef __cplusplus
42extern "C" {
43#endif
44
45/* The following two types have to be signed and unsigned integer type
46 wide enough to hold a value of a pointer. For most ANSI compilers
47 ptrdiff_t and size_t should be likely OK. Still size of these two
48 types is 2 for Microsoft C. Ugh... */
49typedef long int s_reg_t;
50typedef unsigned long int active_reg_t;
51
52/* The following bits are used to determine the regexp syntax we
53 recognize. The set/not-set meanings are chosen so that Emacs syntax
54 remains the value 0. The bits are given in alphabetical order, and
55 the definitions shifted by one from the previous bit; thus, when we
56 add or remove a bit, only one other definition need change. */
57typedef unsigned long int reg_syntax_t;
58
59#ifdef __USE_GNU
60/* If this bit is not set, then \ inside a bracket expression is literal.
61 If set, then such a \ quotes the following character. */
62# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
63
64/* If this bit is not set, then + and ? are operators, and \+ and \? are
65 literals.
66 If set, then \+ and \? are operators and + and ? are literals. */
67# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
68
69/* If this bit is set, then character classes are supported. They are:
70 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
71 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
72 If not set, then character classes are not supported. */
73# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
74
75/* If this bit is set, then ^ and $ are always anchors (outside bracket
76 expressions, of course).
77 If this bit is not set, then it depends:
78 ^ is an anchor if it is at the beginning of a regular
79 expression or after an open-group or an alternation operator;
80 $ is an anchor if it is at the end of a regular expression, or
81 before a close-group or an alternation operator.
82
83 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
84 POSIX draft 11.2 says that * etc. in leading positions is undefined.
85 We already implemented a previous draft which made those constructs
86 invalid, though, so we haven't changed the code back. */
87# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
88
89/* If this bit is set, then special characters are always special
90 regardless of where they are in the pattern.
91 If this bit is not set, then special characters are special only in
92 some contexts; otherwise they are ordinary. Specifically,
93 * + ? and intervals are only special when not after the beginning,
94 open-group, or alternation operator. */
95# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
96
97/* If this bit is set, then *, +, ?, and { cannot be first in an re or
98 immediately after an alternation or begin-group operator. */
99# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
100
101/* If this bit is set, then . matches newline.
102 If not set, then it doesn't. */
103# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
104
105/* If this bit is set, then . doesn't match NUL.
106 If not set, then it does. */
107# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
108
109/* If this bit is set, nonmatching lists [^...] do not match newline.
110 If not set, they do. */
111# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
112
113/* If this bit is set, either \{...\} or {...} defines an
114 interval, depending on RE_NO_BK_BRACES.
115 If not set, \{, \}, {, and } are literals. */
116# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
117
118/* If this bit is set, +, ? and | aren't recognized as operators.
119 If not set, they are. */
120# define RE_LIMITED_OPS (RE_INTERVALS << 1)
121
122/* If this bit is set, newline is an alternation operator.
123 If not set, newline is literal. */
124# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
125
126/* If this bit is set, then `{...}' defines an interval, and \{ and \}
127 are literals.
128 If not set, then `\{...\}' defines an interval. */
129# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
130
131/* If this bit is set, (...) defines a group, and \( and \) are literals.
132 If not set, \(...\) defines a group, and ( and ) are literals. */
133# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
134
135/* If this bit is set, then \<digit> matches <digit>.
136 If not set, then \<digit> is a back-reference. */
137# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
138
139/* If this bit is set, then | is an alternation operator, and \| is literal.
140 If not set, then \| is an alternation operator, and | is literal. */
141# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
142
143/* If this bit is set, then an ending range point collating higher
144 than the starting range point, as in [z-a], is invalid.
145 If not set, then when ending range point collates higher than the
146 starting range point, the range is ignored. */
147# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
148
149/* If this bit is set, then an unmatched ) is ordinary.
150 If not set, then an unmatched ) is invalid. */
151# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
152
153/* If this bit is set, succeed as soon as we match the whole pattern,
154 without further backtracking. */
155# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
156
157/* If this bit is set, do not process the GNU regex operators.
158 If not set, then the GNU regex operators are recognized. */
159# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
160
161/* If this bit is set, a syntactically invalid interval is treated as
162 a string of ordinary characters. For example, the ERE 'a{1' is
163 treated as 'a\{1'. */
164# define RE_INVALID_INTERVAL_ORD (RE_NO_GNU_OPS << 1)
165
166/* If this bit is set, then ignore case when matching.
167 If not set, then case is significant. */
168# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
169
170/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
171 for ^, because it is difficult to scan the regex backwards to find
172 whether ^ should be special. */
173# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
174
175/* If this bit is set, then \{ cannot be first in an bre or
176 immediately after an alternation or begin-group operator. */
177# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
178
179/* If this bit is set, then no_sub will be set to 1 during
180 re_compile_pattern. */
181#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
182#endif
183
184/* This global variable defines the particular regexp syntax to use (for
185 some interfaces). When a regexp is compiled, the syntax used is
186 stored in the pattern buffer, so changing this does not affect
187 already-compiled regexps. */
188extern reg_syntax_t re_syntax_options;
189
190#ifdef __USE_GNU
191/* Define combinations of the above bits for the standard possibilities.
192 (The [[[ comments delimit what gets put into the Texinfo file, so
193 don't delete them!) */
194/* [[[begin syntaxes]]] */
195#define RE_SYNTAX_EMACS 0
196
197#define RE_SYNTAX_AWK \
198 (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
199 | RE_NO_BK_PARENS | RE_NO_BK_REFS \
200 | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
201 | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
202 | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
203
204#define RE_SYNTAX_GNU_AWK \
205 ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
206 | RE_INVALID_INTERVAL_ORD) \
207 & ~(RE_DOT_NOT_NULL | RE_CONTEXT_INDEP_OPS \
208 | RE_CONTEXT_INVALID_OPS ))
209
210#define RE_SYNTAX_POSIX_AWK \
211 (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
212 | RE_INTERVALS | RE_NO_GNU_OPS \
213 | RE_INVALID_INTERVAL_ORD)
214
215#define RE_SYNTAX_GREP \
216 (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
217 | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
218 | RE_NEWLINE_ALT)
219
220#define RE_SYNTAX_EGREP \
221 (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
222 | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
223 | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
224 | RE_NO_BK_VBAR)
225
226#define RE_SYNTAX_POSIX_EGREP \
227 (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
228 | RE_INVALID_INTERVAL_ORD)
229
230/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
231#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
232
233#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
234
235/* Syntax bits common to both basic and extended POSIX regex syntax. */
236#define _RE_SYNTAX_POSIX_COMMON \
237 (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
238 | RE_INTERVALS | RE_NO_EMPTY_RANGES)
239
240#define RE_SYNTAX_POSIX_BASIC \
241 (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
242
243/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
244 RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
245 isn't minimal, since other operators, such as \`, aren't disabled. */
246#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
247 (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
248
249#define RE_SYNTAX_POSIX_EXTENDED \
250 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
251 | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
252 | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
253 | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
254
255/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
256 removed and RE_NO_BK_REFS is added. */
257#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
258 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
259 | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
260 | RE_NO_BK_PARENS | RE_NO_BK_REFS \
261 | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
262/* [[[end syntaxes]]] */
263
264/* Maximum number of duplicates an interval can allow. Some systems
265 (erroneously) define this in other header files, but we want our
266 value, so remove any previous define. */
267# ifdef RE_DUP_MAX
268# undef RE_DUP_MAX
269# endif
270/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
271# define RE_DUP_MAX (0x7fff)
272#endif
273
274
275/* POSIX `cflags' bits (i.e., information for `regcomp'). */
276
277/* If this bit is set, then use extended regular expression syntax.
278 If not set, then use basic regular expression syntax. */
279#define REG_EXTENDED 1
280
281/* If this bit is set, then ignore case when matching.
282 If not set, then case is significant. */
283#define REG_ICASE (REG_EXTENDED << 1)
284
285/* If this bit is set, then anchors do not match at newline
286 characters in the string.
287 If not set, then anchors do match at newlines. */
288#define REG_NEWLINE (REG_ICASE << 1)
289
290/* If this bit is set, then report only success or fail in regexec.
291 If not set, then returns differ between not matching and errors. */
292#define REG_NOSUB (REG_NEWLINE << 1)
293
294
295/* POSIX `eflags' bits (i.e., information for regexec). */
296
297/* If this bit is set, then the beginning-of-line operator doesn't match
298 the beginning of the string (presumably because it's not the
299 beginning of a line).
300 If not set, then the beginning-of-line operator does match the
301 beginning of the string. */
302#define REG_NOTBOL 1
303
304/* Like REG_NOTBOL, except for the end-of-line. */
305#define REG_NOTEOL (1 << 1)
306
307/* Use PMATCH[0] to delimit the start and end of the search in the
308 buffer. */
309#define REG_STARTEND (1 << 2)
310
311
312/* If any error codes are removed, changed, or added, update the
313 `re_error_msg' table in regex.c. */
314typedef enum
315{
316#if defined _XOPEN_SOURCE || defined __USE_XOPEN2K
317 REG_ENOSYS = -1, /* This will never happen for this implementation. */
318#endif
319
320 REG_NOERROR = 0, /* Success. */
321 REG_NOMATCH, /* Didn't find a match (for regexec). */
322
323 /* POSIX regcomp return error codes. (In the order listed in the
324 standard.) */
325 REG_BADPAT, /* Invalid pattern. */
326 REG_ECOLLATE, /* Inalid collating element. */
327 REG_ECTYPE, /* Invalid character class name. */
328 REG_EESCAPE, /* Trailing backslash. */
329 REG_ESUBREG, /* Invalid back reference. */
330 REG_EBRACK, /* Unmatched left bracket. */
331 REG_EPAREN, /* Parenthesis imbalance. */
332 REG_EBRACE, /* Unmatched \{. */
333 REG_BADBR, /* Invalid contents of \{\}. */
334 REG_ERANGE, /* Invalid range end. */
335 REG_ESPACE, /* Ran out of memory. */
336 REG_BADRPT, /* No preceding re for repetition op. */
337
338 /* Error codes we've added. */
339 REG_EEND, /* Premature end. */
340 REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
341 REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
342} reg_errcode_t;
343
344/* This data structure represents a compiled pattern. Before calling
345 the pattern compiler, the fields `buffer', `allocated', `fastmap',
346 `translate', and `no_sub' can be set. After the pattern has been
347 compiled, the `re_nsub' field is available. All other fields are
348 private to the regex routines. */
349
350#ifndef RE_TRANSLATE_TYPE
351# define __RE_TRANSLATE_TYPE unsigned char *
352# ifdef __USE_GNU
353# define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE
354# endif
355#endif
356
357#ifdef __USE_GNU
358# define __REPB_PREFIX(name) name
359#else
360# define __REPB_PREFIX(name) __##name
361#endif
362
363struct re_pattern_buffer
364{
365 /* Space that holds the compiled pattern. It is declared as
366 `unsigned char *' because its elements are sometimes used as
367 array indexes. */
368 unsigned char *__REPB_PREFIX(buffer);
369
370 /* Number of bytes to which `buffer' points. */
371 unsigned long int __REPB_PREFIX(allocated);
372
373 /* Number of bytes actually used in `buffer'. */
374 unsigned long int __REPB_PREFIX(used);
375
376 /* Syntax setting with which the pattern was compiled. */
377 reg_syntax_t __REPB_PREFIX(syntax);
378
379 /* Pointer to a fastmap, if any, otherwise zero. re_search uses the
380 fastmap, if there is one, to skip over impossible starting points
381 for matches. */
382 char *__REPB_PREFIX(fastmap);
383
384 /* Either a translate table to apply to all characters before
385 comparing them, or zero for no translation. The translation is
386 applied to a pattern when it is compiled and to a string when it
387 is matched. */
388 __RE_TRANSLATE_TYPE __REPB_PREFIX(translate);
389
390 /* Number of subexpressions found by the compiler. */
391 size_t re_nsub;
392
393 /* Zero if this pattern cannot match the empty string, one else.
394 Well, in truth it's used only in `re_search_2', to see whether or
395 not we should use the fastmap, so we don't set this absolutely
396 perfectly; see `re_compile_fastmap' (the `duplicate' case). */
397 unsigned __REPB_PREFIX(can_be_null) : 1;
398
399 /* If REGS_UNALLOCATED, allocate space in the `regs' structure
400 for `max (RE_NREGS, re_nsub + 1)' groups.
401 If REGS_REALLOCATE, reallocate space if necessary.
402 If REGS_FIXED, use what's there. */
403#ifdef __USE_GNU
404# define REGS_UNALLOCATED 0
405# define REGS_REALLOCATE 1
406# define REGS_FIXED 2
407#endif
408 unsigned __REPB_PREFIX(regs_allocated) : 2;
409
410 /* Set to zero when `regex_compile' compiles a pattern; set to one
411 by `re_compile_fastmap' if it updates the fastmap. */
412 unsigned __REPB_PREFIX(fastmap_accurate) : 1;
413
414 /* If set, `re_match_2' does not return information about
415 subexpressions. */
416 unsigned __REPB_PREFIX(no_sub) : 1;
417
418 /* If set, a beginning-of-line anchor doesn't match at the beginning
419 of the string. */
420 unsigned __REPB_PREFIX(not_bol) : 1;
421
422 /* Similarly for an end-of-line anchor. */
423 unsigned __REPB_PREFIX(not_eol) : 1;
424
425 /* If true, an anchor at a newline matches. */
426 unsigned __REPB_PREFIX(newline_anchor) : 1;
427};
428
429typedef struct re_pattern_buffer regex_t;
430
431/* Type for byte offsets within the string. POSIX mandates this. */
432typedef int regoff_t;
433
434
435#ifdef __USE_GNU
436/* This is the structure we store register match data in. See
437 regex.texinfo for a full description of what registers match. */
438struct re_registers
439{
440 unsigned num_regs;
441 regoff_t *start;
442 regoff_t *end;
443};
444
445
446/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
447 `re_match_2' returns information about at least this many registers
448 the first time a `regs' structure is passed. */
449# ifndef RE_NREGS
450# define RE_NREGS 30
451# endif
452#endif
453
454
455/* POSIX specification for registers. Aside from the different names than
456 `re_registers', POSIX uses an array of structures, instead of a
457 structure of arrays. */
458typedef struct
459{
460 regoff_t rm_so; /* Byte offset from string's start to substring's start. */
461 regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
462} regmatch_t;
463
464/* Declarations for routines. */
465
466#ifdef __USE_GNU
467/* Sets the current default syntax to SYNTAX, and return the old syntax.
468 You can also simply assign to the `re_syntax_options' variable. */
469extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
470
471/* Compile the regular expression PATTERN, with length LENGTH
472 and syntax given by the global `re_syntax_options', into the buffer
473 BUFFER. Return NULL if successful, and an error string if not. */
474extern const char *re_compile_pattern (const char *__pattern, size_t __length,
475 struct re_pattern_buffer *__buffer);
476
477
478/* Compile a fastmap for the compiled pattern in BUFFER; used to
479 accelerate searches. Return 0 if successful and -2 if was an
480 internal error. */
481extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
482
483
484/* Search in the string STRING (with length LENGTH) for the pattern
485 compiled into BUFFER. Start searching at position START, for RANGE
486 characters. Return the starting position of the match, -1 for no
487 match, or -2 for an internal error. Also return register
488 information in REGS (if REGS and BUFFER->no_sub are nonzero). */
489extern int re_search (struct re_pattern_buffer *__buffer, const char *__cstring,
490 int __length, int __start, int __range,
491 struct re_registers *__regs);
492
493
494/* Like `re_search', but search in the concatenation of STRING1 and
495 STRING2. Also, stop searching at index START + STOP. */
496extern int re_search_2 (struct re_pattern_buffer *__buffer,
497 const char *__string1, int __length1,
498 const char *__string2, int __length2, int __start,
499 int __range, struct re_registers *__regs, int __stop);
500
501
502/* Like `re_search', but return how many characters in STRING the regexp
503 in BUFFER matched, starting at position START. */
504extern int re_match (struct re_pattern_buffer *__buffer, const char *__cstring,
505 int __length, int __start, struct re_registers *__regs);
506
507
508/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
509extern int re_match_2 (struct re_pattern_buffer *__buffer,
510 const char *__string1, int __length1,
511 const char *__string2, int __length2, int __start,
512 struct re_registers *__regs, int __stop);
513
514
515/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
516 ENDS. Subsequent matches using BUFFER and REGS will use this memory
517 for recording register information. STARTS and ENDS must be
518 allocated with malloc, and must each be at least `NUM_REGS * sizeof
519 (regoff_t)' bytes long.
520
521 If NUM_REGS == 0, then subsequent matches should allocate their own
522 register data.
523
524 Unless this function is called, the first search or match using
525 PATTERN_BUFFER will allocate its own register data, without
526 freeing the old data. */
527extern void re_set_registers (struct re_pattern_buffer *__buffer,
528 struct re_registers *__regs,
529 unsigned int __num_regs,
530 regoff_t *__starts, regoff_t *__ends);
531#endif /* Use GNU */
532
533#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD)
534# ifndef _CRAY
535/* 4.2 bsd compatibility. */
536extern char *re_comp (const char *);
537extern int re_exec (const char *);
538# endif
539#endif
540
541/* GCC 2.95 and later have "__restrict"; C99 compilers have
542 "restrict", and "configure" may have defined "restrict". */
543#ifndef __restrict
544# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
545# if defined restrict || 199901L <= __STDC_VERSION__
546# define __restrict restrict
547# else
548# define __restrict
549# endif
550# endif
551#endif
552/* gcc 3.1 and up support the [restrict] syntax. */
553#ifndef __restrict_arr
554# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \
555 && !defined __GNUG__
556# define __restrict_arr __restrict
557# else
558# define __restrict_arr
559# endif
560#endif
561
562/* POSIX compatibility. */
563extern int regcomp (regex_t *__restrict __preg,
564 const char *__restrict __pattern,
565 int __cflags);
566
567extern int regexec (const regex_t *__restrict __preg,
568 const char *__restrict __cstring, size_t __nmatch,
569 regmatch_t __pmatch[__restrict_arr],
570 int __eflags);
571
572extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
573 char *__restrict __errbuf, size_t __errbuf_size);
574
575extern void regfree (regex_t *__preg);
576
577
578#ifdef __cplusplus
579}
580#endif /* C++ */
581
582#endif /* regex.h */
diff --git a/win32/regex_internal.c b/win32/regex_internal.c
new file mode 100644
index 000000000..c33561743
--- /dev/null
+++ b/win32/regex_internal.c
@@ -0,0 +1,1744 @@
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002-2006, 2010 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA. */
20
21static void re_string_construct_common (const char *str, int len,
22 re_string_t *pstr,
23 RE_TRANSLATE_TYPE trans, int icase,
24 const re_dfa_t *dfa) internal_function;
25static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
26 const re_node_set *nodes,
27 unsigned int hash) internal_function;
28static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
29 const re_node_set *nodes,
30 unsigned int context,
31 unsigned int hash) internal_function;
32
33#ifdef GAWK
34#undef MAX /* safety */
35static int
36MAX(size_t a, size_t b)
37{
38 return (a > b ? a : b);
39}
40#endif
41
42/* Functions for string operation. */
43
44/* This function allocate the buffers. It is necessary to call
45 re_string_reconstruct before using the object. */
46
47static reg_errcode_t
48internal_function
49re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
50 RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
51{
52 reg_errcode_t ret;
53 int init_buf_len;
54
55 /* Ensure at least one character fits into the buffers. */
56 if (init_len < dfa->mb_cur_max)
57 init_len = dfa->mb_cur_max;
58 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
59 re_string_construct_common (str, len, pstr, trans, icase, dfa);
60
61 ret = re_string_realloc_buffers (pstr, init_buf_len);
62 if (BE (ret != REG_NOERROR, 0))
63 return ret;
64
65 pstr->word_char = dfa->word_char;
66 pstr->word_ops_used = dfa->word_ops_used;
67 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
68 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
69 pstr->valid_raw_len = pstr->valid_len;
70 return REG_NOERROR;
71}
72
73/* This function allocate the buffers, and initialize them. */
74
75static reg_errcode_t
76internal_function
77re_string_construct (re_string_t *pstr, const char *str, int len,
78 RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
79{
80 reg_errcode_t ret;
81 memset (pstr, '\0', sizeof (re_string_t));
82 re_string_construct_common (str, len, pstr, trans, icase, dfa);
83
84 if (len > 0)
85 {
86 ret = re_string_realloc_buffers (pstr, len + 1);
87 if (BE (ret != REG_NOERROR, 0))
88 return ret;
89 }
90 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
91
92 if (icase)
93 {
94#ifdef RE_ENABLE_I18N
95 if (dfa->mb_cur_max > 1)
96 {
97 while (1)
98 {
99 ret = build_wcs_upper_buffer (pstr);
100 if (BE (ret != REG_NOERROR, 0))
101 return ret;
102 if (pstr->valid_raw_len >= len)
103 break;
104 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
105 break;
106 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
107 if (BE (ret != REG_NOERROR, 0))
108 return ret;
109 }
110 }
111 else
112#endif /* RE_ENABLE_I18N */
113 build_upper_buffer (pstr);
114 }
115 else
116 {
117#ifdef RE_ENABLE_I18N
118 if (dfa->mb_cur_max > 1)
119 build_wcs_buffer (pstr);
120 else
121#endif /* RE_ENABLE_I18N */
122 {
123 if (trans != NULL)
124 re_string_translate_buffer (pstr);
125 else
126 {
127 pstr->valid_len = pstr->bufs_len;
128 pstr->valid_raw_len = pstr->bufs_len;
129 }
130 }
131 }
132
133 return REG_NOERROR;
134}
135
136/* Helper functions for re_string_allocate, and re_string_construct. */
137
138static reg_errcode_t
139internal_function
140re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
141{
142#ifdef RE_ENABLE_I18N
143 if (pstr->mb_cur_max > 1)
144 {
145 wint_t *new_wcs;
146
147 /* Avoid overflow in realloc. */
148 const size_t max_object_size = MAX (sizeof (wint_t), sizeof (int));
149 if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
150 return REG_ESPACE;
151
152 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
153 if (BE (new_wcs == NULL, 0))
154 return REG_ESPACE;
155 pstr->wcs = new_wcs;
156 if (pstr->offsets != NULL)
157 {
158 int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
159 if (BE (new_offsets == NULL, 0))
160 return REG_ESPACE;
161 pstr->offsets = new_offsets;
162 }
163 }
164#endif /* RE_ENABLE_I18N */
165 if (pstr->mbs_allocated)
166 {
167 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
168 new_buf_len);
169 if (BE (new_mbs == NULL, 0))
170 return REG_ESPACE;
171 pstr->mbs = new_mbs;
172 }
173 pstr->bufs_len = new_buf_len;
174 return REG_NOERROR;
175}
176
177
178static void
179internal_function
180re_string_construct_common (const char *str, int len, re_string_t *pstr,
181 RE_TRANSLATE_TYPE trans, int icase,
182 const re_dfa_t *dfa)
183{
184 pstr->raw_mbs = (const unsigned char *) str;
185 pstr->len = len;
186 pstr->raw_len = len;
187 pstr->trans = trans;
188 pstr->icase = icase ? 1 : 0;
189 pstr->mbs_allocated = (trans != NULL || icase);
190 pstr->mb_cur_max = dfa->mb_cur_max;
191 pstr->is_utf8 = dfa->is_utf8;
192 pstr->map_notascii = dfa->map_notascii;
193 pstr->stop = pstr->len;
194 pstr->raw_stop = pstr->stop;
195}
196
197#ifdef RE_ENABLE_I18N
198
199/* Build wide character buffer PSTR->WCS.
200 If the byte sequence of the string are:
201 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
202 Then wide character buffer will be:
203 <wc1> , WEOF , <wc2> , WEOF , <wc3>
204 We use WEOF for padding, they indicate that the position isn't
205 a first byte of a multibyte character.
206
207 Note that this function assumes PSTR->VALID_LEN elements are already
208 built and starts from PSTR->VALID_LEN. */
209
210static void
211internal_function
212build_wcs_buffer (re_string_t *pstr)
213{
214#ifdef _LIBC
215 unsigned char buf[MB_LEN_MAX];
216 assert (MB_LEN_MAX >= pstr->mb_cur_max);
217#else
218 unsigned char buf[64];
219#endif
220 mbstate_t prev_st;
221 int byte_idx, end_idx, remain_len;
222 size_t mbclen;
223
224 /* Build the buffers from pstr->valid_len to either pstr->len or
225 pstr->bufs_len. */
226 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
227 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
228 {
229 wchar_t wc;
230 const char *p;
231
232 remain_len = end_idx - byte_idx;
233 prev_st = pstr->cur_state;
234 /* Apply the translation if we need. */
235 if (BE (pstr->trans != NULL, 0))
236 {
237 int i, ch;
238
239 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
240 {
241 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
242 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
243 }
244 p = (const char *) buf;
245 }
246 else
247 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
248 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
249 if (BE (mbclen == (size_t) -2, 0))
250 {
251 /* The buffer doesn't have enough space, finish to build. */
252 pstr->cur_state = prev_st;
253 break;
254 }
255 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
256 {
257 /* We treat these cases as a singlebyte character. */
258 mbclen = 1;
259 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
260 if (BE (pstr->trans != NULL, 0))
261 wc = pstr->trans[wc];
262 pstr->cur_state = prev_st;
263 }
264
265 /* Write wide character and padding. */
266 pstr->wcs[byte_idx++] = wc;
267 /* Write paddings. */
268 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
269 pstr->wcs[byte_idx++] = WEOF;
270 }
271 pstr->valid_len = byte_idx;
272 pstr->valid_raw_len = byte_idx;
273}
274
275/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
276 but for REG_ICASE. */
277
278static reg_errcode_t
279internal_function
280build_wcs_upper_buffer (re_string_t *pstr)
281{
282 mbstate_t prev_st;
283 int src_idx, byte_idx, end_idx, remain_len;
284 size_t mbclen;
285#ifdef _LIBC
286 char buf[MB_LEN_MAX];
287 assert (MB_LEN_MAX >= pstr->mb_cur_max);
288#else
289 char buf[64];
290#endif
291
292 byte_idx = pstr->valid_len;
293 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
294
295 /* The following optimization assumes that ASCII characters can be
296 mapped to wide characters with a simple cast. */
297 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
298 {
299 while (byte_idx < end_idx)
300 {
301 wchar_t wc;
302
303 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
304 && mbsinit (&pstr->cur_state))
305 {
306 /* In case of a singlebyte character. */
307 pstr->mbs[byte_idx]
308 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
309 /* The next step uses the assumption that wchar_t is encoded
310 ASCII-safe: all ASCII values can be converted like this. */
311 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
312 ++byte_idx;
313 continue;
314 }
315
316 remain_len = end_idx - byte_idx;
317 prev_st = pstr->cur_state;
318 mbclen = __mbrtowc (&wc,
319 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
320 + byte_idx), remain_len, &pstr->cur_state);
321 if (BE (mbclen + 2 > 2, 1))
322 {
323 wchar_t wcu = wc;
324 if (iswlower (wc))
325 {
326 size_t mbcdlen;
327
328 wcu = towupper (wc);
329 mbcdlen = wcrtomb (buf, wcu, &prev_st);
330 if (BE (mbclen == mbcdlen, 1))
331 memcpy (pstr->mbs + byte_idx, buf, mbclen);
332 else
333 {
334 src_idx = byte_idx;
335 goto offsets_needed;
336 }
337 }
338 else
339 memcpy (pstr->mbs + byte_idx,
340 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
341 pstr->wcs[byte_idx++] = wcu;
342 /* Write paddings. */
343 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
344 pstr->wcs[byte_idx++] = WEOF;
345 }
346 else if (mbclen == (size_t) -1 || mbclen == 0)
347 {
348 /* It is an invalid character or '\0'. Just use the byte. */
349 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
350 pstr->mbs[byte_idx] = ch;
351 /* And also cast it to wide char. */
352 pstr->wcs[byte_idx++] = (wchar_t) ch;
353 if (BE (mbclen == (size_t) -1, 0))
354 pstr->cur_state = prev_st;
355 }
356 else
357 {
358 /* The buffer doesn't have enough space, finish to build. */
359 pstr->cur_state = prev_st;
360 break;
361 }
362 }
363 pstr->valid_len = byte_idx;
364 pstr->valid_raw_len = byte_idx;
365 return REG_NOERROR;
366 }
367 else
368 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
369 {
370 wchar_t wc;
371 const char *p;
372 offsets_needed:
373 remain_len = end_idx - byte_idx;
374 prev_st = pstr->cur_state;
375 if (BE (pstr->trans != NULL, 0))
376 {
377 int i, ch;
378
379 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
380 {
381 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
382 buf[i] = pstr->trans[ch];
383 }
384 p = (const char *) buf;
385 }
386 else
387 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
388 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
389 if (BE (mbclen + 2 > 2, 1))
390 {
391 wchar_t wcu = wc;
392 if (iswlower (wc))
393 {
394 size_t mbcdlen;
395
396 wcu = towupper (wc);
397 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
398 if (BE (mbclen == mbcdlen, 1))
399 memcpy (pstr->mbs + byte_idx, buf, mbclen);
400 else if (mbcdlen != (size_t) -1)
401 {
402 size_t i;
403
404 if (byte_idx + mbcdlen > pstr->bufs_len)
405 {
406 pstr->cur_state = prev_st;
407 break;
408 }
409
410 if (pstr->offsets == NULL)
411 {
412 pstr->offsets = re_malloc (int, pstr->bufs_len);
413
414 if (pstr->offsets == NULL)
415 return REG_ESPACE;
416 }
417 if (!pstr->offsets_needed)
418 {
419 for (i = 0; i < (size_t) byte_idx; ++i)
420 pstr->offsets[i] = i;
421 pstr->offsets_needed = 1;
422 }
423
424 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
425 pstr->wcs[byte_idx] = wcu;
426 pstr->offsets[byte_idx] = src_idx;
427 for (i = 1; i < mbcdlen; ++i)
428 {
429 pstr->offsets[byte_idx + i]
430 = src_idx + (i < mbclen ? i : mbclen - 1);
431 pstr->wcs[byte_idx + i] = WEOF;
432 }
433 pstr->len += mbcdlen - mbclen;
434 if (pstr->raw_stop > src_idx)
435 pstr->stop += mbcdlen - mbclen;
436 end_idx = (pstr->bufs_len > pstr->len)
437 ? pstr->len : pstr->bufs_len;
438 byte_idx += mbcdlen;
439 src_idx += mbclen;
440 continue;
441 }
442 else
443 memcpy (pstr->mbs + byte_idx, p, mbclen);
444 }
445 else
446 memcpy (pstr->mbs + byte_idx, p, mbclen);
447
448 if (BE (pstr->offsets_needed != 0, 0))
449 {
450 size_t i;
451 for (i = 0; i < mbclen; ++i)
452 pstr->offsets[byte_idx + i] = src_idx + i;
453 }
454 src_idx += mbclen;
455
456 pstr->wcs[byte_idx++] = wcu;
457 /* Write paddings. */
458 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
459 pstr->wcs[byte_idx++] = WEOF;
460 }
461 else if (mbclen == (size_t) -1 || mbclen == 0)
462 {
463 /* It is an invalid character or '\0'. Just use the byte. */
464 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
465
466 if (BE (pstr->trans != NULL, 0))
467 ch = pstr->trans [ch];
468 pstr->mbs[byte_idx] = ch;
469
470 if (BE (pstr->offsets_needed != 0, 0))
471 pstr->offsets[byte_idx] = src_idx;
472 ++src_idx;
473
474 /* And also cast it to wide char. */
475 pstr->wcs[byte_idx++] = (wchar_t) ch;
476 if (BE (mbclen == (size_t) -1, 0))
477 pstr->cur_state = prev_st;
478 }
479 else
480 {
481 /* The buffer doesn't have enough space, finish to build. */
482 pstr->cur_state = prev_st;
483 break;
484 }
485 }
486 pstr->valid_len = byte_idx;
487 pstr->valid_raw_len = src_idx;
488 return REG_NOERROR;
489}
490
491/* Skip characters until the index becomes greater than NEW_RAW_IDX.
492 Return the index. */
493
494static int
495internal_function
496re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
497{
498 mbstate_t prev_st;
499 int rawbuf_idx;
500 size_t mbclen;
501 wint_t wc = WEOF;
502
503 /* Skip the characters which are not necessary to check. */
504 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
505 rawbuf_idx < new_raw_idx;)
506 {
507 wchar_t wc2;
508 int remain_len = pstr->len - rawbuf_idx;
509 prev_st = pstr->cur_state;
510 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
511 remain_len, &pstr->cur_state);
512 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
513 {
514 /* We treat these cases as a single byte character. */
515 if (mbclen == 0 || remain_len == 0)
516 wc = L'\0';
517 else
518 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
519 mbclen = 1;
520 pstr->cur_state = prev_st;
521 }
522 else
523 wc = (wint_t) wc2;
524 /* Then proceed the next character. */
525 rawbuf_idx += mbclen;
526 }
527 *last_wc = (wint_t) wc;
528 return rawbuf_idx;
529}
530#endif /* RE_ENABLE_I18N */
531
532/* Build the buffer PSTR->MBS, and apply the translation if we need.
533 This function is used in case of REG_ICASE. */
534
535static void
536internal_function
537build_upper_buffer (re_string_t *pstr)
538{
539 int char_idx, end_idx;
540 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
541
542 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
543 {
544 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
545 if (BE (pstr->trans != NULL, 0))
546 ch = pstr->trans[ch];
547 if (islower (ch))
548 pstr->mbs[char_idx] = toupper (ch);
549 else
550 pstr->mbs[char_idx] = ch;
551 }
552 pstr->valid_len = char_idx;
553 pstr->valid_raw_len = char_idx;
554}
555
556/* Apply TRANS to the buffer in PSTR. */
557
558static void
559internal_function
560re_string_translate_buffer (re_string_t *pstr)
561{
562 int buf_idx, end_idx;
563 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
564
565 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
566 {
567 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
568 pstr->mbs[buf_idx] = pstr->trans[ch];
569 }
570
571 pstr->valid_len = buf_idx;
572 pstr->valid_raw_len = buf_idx;
573}
574
575/* This function re-construct the buffers.
576 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
577 convert to upper case in case of REG_ICASE, apply translation. */
578
579static reg_errcode_t
580internal_function
581re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
582{
583 int offset = idx - pstr->raw_mbs_idx;
584 if (BE (offset < 0, 0))
585 {
586 /* Reset buffer. */
587#ifdef RE_ENABLE_I18N
588 if (pstr->mb_cur_max > 1)
589 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
590#endif /* RE_ENABLE_I18N */
591 pstr->len = pstr->raw_len;
592 pstr->stop = pstr->raw_stop;
593 pstr->valid_len = 0;
594 pstr->raw_mbs_idx = 0;
595 pstr->valid_raw_len = 0;
596 pstr->offsets_needed = 0;
597 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
598 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
599 if (!pstr->mbs_allocated)
600 pstr->mbs = (unsigned char *) pstr->raw_mbs;
601 offset = idx;
602 }
603
604 if (BE (offset != 0, 1))
605 {
606 /* Should the already checked characters be kept? */
607 if (BE (offset < pstr->valid_raw_len, 1))
608 {
609 /* Yes, move them to the front of the buffer. */
610#ifdef RE_ENABLE_I18N
611 if (BE (pstr->offsets_needed, 0))
612 {
613 int low = 0, high = pstr->valid_len, mid;
614 do
615 {
616 mid = (high + low) / 2;
617 if (pstr->offsets[mid] > offset)
618 high = mid;
619 else if (pstr->offsets[mid] < offset)
620 low = mid + 1;
621 else
622 break;
623 }
624 while (low < high);
625 if (pstr->offsets[mid] < offset)
626 ++mid;
627 pstr->tip_context = re_string_context_at (pstr, mid - 1,
628 eflags);
629 /* This can be quite complicated, so handle specially
630 only the common and easy case where the character with
631 different length representation of lower and upper
632 case is present at or after offset. */
633 if (pstr->valid_len > offset
634 && mid == offset && pstr->offsets[mid] == offset)
635 {
636 memmove (pstr->wcs, pstr->wcs + offset,
637 (pstr->valid_len - offset) * sizeof (wint_t));
638 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
639 pstr->valid_len -= offset;
640 pstr->valid_raw_len -= offset;
641 for (low = 0; low < pstr->valid_len; low++)
642 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
643 }
644 else
645 {
646 /* Otherwise, just find out how long the partial multibyte
647 character at offset is and fill it with WEOF/255. */
648 pstr->len = pstr->raw_len - idx + offset;
649 pstr->stop = pstr->raw_stop - idx + offset;
650 pstr->offsets_needed = 0;
651 while (mid > 0 && pstr->offsets[mid - 1] == offset)
652 --mid;
653 while (mid < pstr->valid_len)
654 if (pstr->wcs[mid] != WEOF)
655 break;
656 else
657 ++mid;
658 if (mid == pstr->valid_len)
659 pstr->valid_len = 0;
660 else
661 {
662 pstr->valid_len = pstr->offsets[mid] - offset;
663 if (pstr->valid_len)
664 {
665 for (low = 0; low < pstr->valid_len; ++low)
666 pstr->wcs[low] = WEOF;
667 memset (pstr->mbs, 255, pstr->valid_len);
668 }
669 }
670 pstr->valid_raw_len = pstr->valid_len;
671 }
672 }
673 else
674#endif
675 {
676 pstr->tip_context = re_string_context_at (pstr, offset - 1,
677 eflags);
678#ifdef RE_ENABLE_I18N
679 if (pstr->mb_cur_max > 1)
680 memmove (pstr->wcs, pstr->wcs + offset,
681 (pstr->valid_len - offset) * sizeof (wint_t));
682#endif /* RE_ENABLE_I18N */
683 if (BE (pstr->mbs_allocated, 0))
684 memmove (pstr->mbs, pstr->mbs + offset,
685 pstr->valid_len - offset);
686 pstr->valid_len -= offset;
687 pstr->valid_raw_len -= offset;
688#ifdef DEBUG
689 assert (pstr->valid_len > 0);
690#endif
691 }
692 }
693 else
694 {
695#ifdef RE_ENABLE_I18N
696 /* No, skip all characters until IDX. */
697 int prev_valid_len = pstr->valid_len;
698
699 if (BE (pstr->offsets_needed, 0))
700 {
701 pstr->len = pstr->raw_len - idx + offset;
702 pstr->stop = pstr->raw_stop - idx + offset;
703 pstr->offsets_needed = 0;
704 }
705#endif
706 pstr->valid_len = 0;
707#ifdef RE_ENABLE_I18N
708 if (pstr->mb_cur_max > 1)
709 {
710 int wcs_idx;
711 wint_t wc = WEOF;
712
713 if (pstr->is_utf8)
714 {
715 const unsigned char *raw, *p, *end;
716
717 /* Special case UTF-8. Multi-byte chars start with any
718 byte other than 0x80 - 0xbf. */
719 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
720 end = raw + (offset - pstr->mb_cur_max);
721 if (end < pstr->raw_mbs)
722 end = pstr->raw_mbs;
723 p = raw + offset - 1;
724#ifdef _LIBC
725 /* We know the wchar_t encoding is UCS4, so for the simple
726 case, ASCII characters, skip the conversion step. */
727 if (isascii (*p) && BE (pstr->trans == NULL, 1))
728 {
729 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
730 /* pstr->valid_len = 0; */
731 wc = (wchar_t) *p;
732 }
733 else
734#endif
735 for (; p >= end; --p)
736 if ((*p & 0xc0) != 0x80)
737 {
738 mbstate_t cur_state;
739 wchar_t wc2;
740 int mlen = raw + pstr->len - p;
741 unsigned char buf[6];
742 size_t mbclen;
743
744 if (BE (pstr->trans != NULL, 0))
745 {
746 int i = mlen < 6 ? mlen : 6;
747 while (--i >= 0)
748 buf[i] = pstr->trans[p[i]];
749 }
750 /* XXX Don't use mbrtowc, we know which conversion
751 to use (UTF-8 -> UCS4). */
752 memset (&cur_state, 0, sizeof (cur_state));
753 mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
754 &cur_state);
755 if (raw + offset - p <= mbclen
756 && mbclen < (size_t) -2)
757 {
758 memset (&pstr->cur_state, '\0',
759 sizeof (mbstate_t));
760 pstr->valid_len = mbclen - (raw + offset - p);
761 wc = wc2;
762 }
763 break;
764 }
765 }
766
767 if (wc == WEOF)
768 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
769 if (wc == WEOF)
770 pstr->tip_context
771 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
772 else
773 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
774 && IS_WIDE_WORD_CHAR (wc))
775 ? CONTEXT_WORD
776 : ((IS_WIDE_NEWLINE (wc)
777 && pstr->newline_anchor)
778 ? CONTEXT_NEWLINE : 0));
779 if (BE (pstr->valid_len, 0))
780 {
781 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
782 pstr->wcs[wcs_idx] = WEOF;
783 if (pstr->mbs_allocated)
784 memset (pstr->mbs, 255, pstr->valid_len);
785 }
786 pstr->valid_raw_len = pstr->valid_len;
787 }
788 else
789#endif /* RE_ENABLE_I18N */
790 {
791 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
792 pstr->valid_raw_len = 0;
793 if (pstr->trans)
794 c = pstr->trans[c];
795 pstr->tip_context = (bitset_contain (pstr->word_char, c)
796 ? CONTEXT_WORD
797 : ((IS_NEWLINE (c) && pstr->newline_anchor)
798 ? CONTEXT_NEWLINE : 0));
799 }
800 }
801 if (!BE (pstr->mbs_allocated, 0))
802 pstr->mbs += offset;
803 }
804 pstr->raw_mbs_idx = idx;
805 pstr->len -= offset;
806 pstr->stop -= offset;
807
808 /* Then build the buffers. */
809#ifdef RE_ENABLE_I18N
810 if (pstr->mb_cur_max > 1)
811 {
812 if (pstr->icase)
813 {
814 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
815 if (BE (ret != REG_NOERROR, 0))
816 return ret;
817 }
818 else
819 build_wcs_buffer (pstr);
820 }
821 else
822#endif /* RE_ENABLE_I18N */
823 if (BE (pstr->mbs_allocated, 0))
824 {
825 if (pstr->icase)
826 build_upper_buffer (pstr);
827 else if (pstr->trans != NULL)
828 re_string_translate_buffer (pstr);
829 }
830 else
831 pstr->valid_len = pstr->len;
832
833 pstr->cur_idx = 0;
834 return REG_NOERROR;
835}
836
837static unsigned char
838internal_function __attribute ((pure))
839re_string_peek_byte_case (const re_string_t *pstr, int idx)
840{
841 int ch, off;
842
843 /* Handle the common (easiest) cases first. */
844 if (BE (!pstr->mbs_allocated, 1))
845 return re_string_peek_byte (pstr, idx);
846
847#ifdef RE_ENABLE_I18N
848 if (pstr->mb_cur_max > 1
849 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
850 return re_string_peek_byte (pstr, idx);
851#endif
852
853 off = pstr->cur_idx + idx;
854#ifdef RE_ENABLE_I18N
855 if (pstr->offsets_needed)
856 off = pstr->offsets[off];
857#endif
858
859 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
860
861#ifdef RE_ENABLE_I18N
862 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
863 this function returns CAPITAL LETTER I instead of first byte of
864 DOTLESS SMALL LETTER I. The latter would confuse the parser,
865 since peek_byte_case doesn't advance cur_idx in any way. */
866 if (pstr->offsets_needed && !isascii (ch))
867 return re_string_peek_byte (pstr, idx);
868#endif
869
870 return ch;
871}
872
873static unsigned char
874internal_function __attribute ((pure))
875re_string_fetch_byte_case (re_string_t *pstr)
876{
877 if (BE (!pstr->mbs_allocated, 1))
878 return re_string_fetch_byte (pstr);
879
880#ifdef RE_ENABLE_I18N
881 if (pstr->offsets_needed)
882 {
883 int off, ch;
884
885 /* For tr_TR.UTF-8 [[:islower:]] there is
886 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
887 in that case the whole multi-byte character and return
888 the original letter. On the other side, with
889 [[: DOTLESS SMALL LETTER I return [[:I, as doing
890 anything else would complicate things too much. */
891
892 if (!re_string_first_byte (pstr, pstr->cur_idx))
893 return re_string_fetch_byte (pstr);
894
895 off = pstr->offsets[pstr->cur_idx];
896 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
897
898 if (! isascii (ch))
899 return re_string_fetch_byte (pstr);
900
901 re_string_skip_bytes (pstr,
902 re_string_char_size_at (pstr, pstr->cur_idx));
903 return ch;
904 }
905#endif
906
907 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
908}
909
910static void
911internal_function
912re_string_destruct (re_string_t *pstr)
913{
914#ifdef RE_ENABLE_I18N
915 re_free (pstr->wcs);
916 re_free (pstr->offsets);
917#endif /* RE_ENABLE_I18N */
918 if (pstr->mbs_allocated)
919 re_free (pstr->mbs);
920}
921
922/* Return the context at IDX in INPUT. */
923
924static unsigned int
925internal_function
926re_string_context_at (const re_string_t *input, int idx, int eflags)
927{
928 int c;
929 if (BE (idx < 0, 0))
930 /* In this case, we use the value stored in input->tip_context,
931 since we can't know the character in input->mbs[-1] here. */
932 return input->tip_context;
933 if (BE (idx == input->len, 0))
934 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
935 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
936#ifdef RE_ENABLE_I18N
937 if (input->mb_cur_max > 1)
938 {
939 wint_t wc;
940 int wc_idx = idx;
941 while(input->wcs[wc_idx] == WEOF)
942 {
943#ifdef DEBUG
944 /* It must not happen. */
945 assert (wc_idx >= 0);
946#endif
947 --wc_idx;
948 if (wc_idx < 0)
949 return input->tip_context;
950 }
951 wc = input->wcs[wc_idx];
952 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
953 return CONTEXT_WORD;
954 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
955 ? CONTEXT_NEWLINE : 0);
956 }
957 else
958#endif
959 {
960 c = re_string_byte_at (input, idx);
961 if (bitset_contain (input->word_char, c))
962 return CONTEXT_WORD;
963 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
964 }
965}
966
967/* Functions for set operation. */
968
969static reg_errcode_t
970internal_function
971re_node_set_alloc (re_node_set *set, int size)
972{
973 /*
974 * ADR: valgrind says size can be 0, which then doesn't
975 * free the block of size 0. Harumph. This seems
976 * to work ok, though.
977 */
978 if (size == 0)
979 {
980 memset(set, 0, sizeof(*set));
981 return REG_NOERROR;
982 }
983 set->alloc = size;
984 set->nelem = 0;
985 set->elems = re_malloc (int, size);
986 if (BE (set->elems == NULL, 0))
987 return REG_ESPACE;
988 return REG_NOERROR;
989}
990
991static reg_errcode_t
992internal_function
993re_node_set_init_1 (re_node_set *set, int elem)
994{
995 set->alloc = 1;
996 set->nelem = 1;
997 set->elems = re_malloc (int, 1);
998 if (BE (set->elems == NULL, 0))
999 {
1000 set->alloc = set->nelem = 0;
1001 return REG_ESPACE;
1002 }
1003 set->elems[0] = elem;
1004 return REG_NOERROR;
1005}
1006
1007static reg_errcode_t
1008internal_function
1009re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
1010{
1011 set->alloc = 2;
1012 set->elems = re_malloc (int, 2);
1013 if (BE (set->elems == NULL, 0))
1014 return REG_ESPACE;
1015 if (elem1 == elem2)
1016 {
1017 set->nelem = 1;
1018 set->elems[0] = elem1;
1019 }
1020 else
1021 {
1022 set->nelem = 2;
1023 if (elem1 < elem2)
1024 {
1025 set->elems[0] = elem1;
1026 set->elems[1] = elem2;
1027 }
1028 else
1029 {
1030 set->elems[0] = elem2;
1031 set->elems[1] = elem1;
1032 }
1033 }
1034 return REG_NOERROR;
1035}
1036
1037static reg_errcode_t
1038internal_function
1039re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1040{
1041 dest->nelem = src->nelem;
1042 if (src->nelem > 0)
1043 {
1044 dest->alloc = dest->nelem;
1045 dest->elems = re_malloc (int, dest->alloc);
1046 if (BE (dest->elems == NULL, 0))
1047 {
1048 dest->alloc = dest->nelem = 0;
1049 return REG_ESPACE;
1050 }
1051 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1052 }
1053 else
1054 re_node_set_init_empty (dest);
1055 return REG_NOERROR;
1056}
1057
1058/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1059 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1060 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1061
1062static reg_errcode_t
1063internal_function
1064re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1065 const re_node_set *src2)
1066{
1067 int i1, i2, is, id, delta, sbase;
1068 if (src1->nelem == 0 || src2->nelem == 0)
1069 return REG_NOERROR;
1070
1071 /* We need dest->nelem + 2 * elems_in_intersection; this is a
1072 conservative estimate. */
1073 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1074 {
1075 int new_alloc = src1->nelem + src2->nelem + dest->alloc;
1076 int *new_elems = re_realloc (dest->elems, int, new_alloc);
1077 if (BE (new_elems == NULL, 0))
1078 return REG_ESPACE;
1079 dest->elems = new_elems;
1080 dest->alloc = new_alloc;
1081 }
1082
1083 /* Find the items in the intersection of SRC1 and SRC2, and copy
1084 into the top of DEST those that are not already in DEST itself. */
1085 sbase = dest->nelem + src1->nelem + src2->nelem;
1086 i1 = src1->nelem - 1;
1087 i2 = src2->nelem - 1;
1088 id = dest->nelem - 1;
1089 for (;;)
1090 {
1091 if (src1->elems[i1] == src2->elems[i2])
1092 {
1093 /* Try to find the item in DEST. Maybe we could binary search? */
1094 while (id >= 0 && dest->elems[id] > src1->elems[i1])
1095 --id;
1096
1097 if (id < 0 || dest->elems[id] != src1->elems[i1])
1098 dest->elems[--sbase] = src1->elems[i1];
1099
1100 if (--i1 < 0 || --i2 < 0)
1101 break;
1102 }
1103
1104 /* Lower the highest of the two items. */
1105 else if (src1->elems[i1] < src2->elems[i2])
1106 {
1107 if (--i2 < 0)
1108 break;
1109 }
1110 else
1111 {
1112 if (--i1 < 0)
1113 break;
1114 }
1115 }
1116
1117 id = dest->nelem - 1;
1118 is = dest->nelem + src1->nelem + src2->nelem - 1;
1119 delta = is - sbase + 1;
1120
1121 /* Now copy. When DELTA becomes zero, the remaining
1122 DEST elements are already in place; this is more or
1123 less the same loop that is in re_node_set_merge. */
1124 dest->nelem += delta;
1125 if (delta > 0 && id >= 0)
1126 for (;;)
1127 {
1128 if (dest->elems[is] > dest->elems[id])
1129 {
1130 /* Copy from the top. */
1131 dest->elems[id + delta--] = dest->elems[is--];
1132 if (delta == 0)
1133 break;
1134 }
1135 else
1136 {
1137 /* Slide from the bottom. */
1138 dest->elems[id + delta] = dest->elems[id];
1139 if (--id < 0)
1140 break;
1141 }
1142 }
1143
1144 /* Copy remaining SRC elements. */
1145 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
1146
1147 return REG_NOERROR;
1148}
1149
1150/* Calculate the union set of the sets SRC1 and SRC2. And store it to
1151 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1152
1153static reg_errcode_t
1154internal_function
1155re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1156 const re_node_set *src2)
1157{
1158 int i1, i2, id;
1159 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1160 {
1161 dest->alloc = src1->nelem + src2->nelem;
1162 dest->elems = re_malloc (int, dest->alloc);
1163 if (BE (dest->elems == NULL, 0))
1164 return REG_ESPACE;
1165 }
1166 else
1167 {
1168 if (src1 != NULL && src1->nelem > 0)
1169 return re_node_set_init_copy (dest, src1);
1170 else if (src2 != NULL && src2->nelem > 0)
1171 return re_node_set_init_copy (dest, src2);
1172 else
1173 re_node_set_init_empty (dest);
1174 return REG_NOERROR;
1175 }
1176 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1177 {
1178 if (src1->elems[i1] > src2->elems[i2])
1179 {
1180 dest->elems[id++] = src2->elems[i2++];
1181 continue;
1182 }
1183 if (src1->elems[i1] == src2->elems[i2])
1184 ++i2;
1185 dest->elems[id++] = src1->elems[i1++];
1186 }
1187 if (i1 < src1->nelem)
1188 {
1189 memcpy (dest->elems + id, src1->elems + i1,
1190 (src1->nelem - i1) * sizeof (int));
1191 id += src1->nelem - i1;
1192 }
1193 else if (i2 < src2->nelem)
1194 {
1195 memcpy (dest->elems + id, src2->elems + i2,
1196 (src2->nelem - i2) * sizeof (int));
1197 id += src2->nelem - i2;
1198 }
1199 dest->nelem = id;
1200 return REG_NOERROR;
1201}
1202
1203/* Calculate the union set of the sets DEST and SRC. And store it to
1204 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1205
1206static reg_errcode_t
1207internal_function
1208re_node_set_merge (re_node_set *dest, const re_node_set *src)
1209{
1210 int is, id, sbase, delta;
1211 if (src == NULL || src->nelem == 0)
1212 return REG_NOERROR;
1213 if (dest->alloc < 2 * src->nelem + dest->nelem)
1214 {
1215 int new_alloc = 2 * (src->nelem + dest->alloc);
1216 int *new_buffer = re_realloc (dest->elems, int, new_alloc);
1217 if (BE (new_buffer == NULL, 0))
1218 return REG_ESPACE;
1219 dest->elems = new_buffer;
1220 dest->alloc = new_alloc;
1221 }
1222
1223 if (BE (dest->nelem == 0, 0))
1224 {
1225 dest->nelem = src->nelem;
1226 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1227 return REG_NOERROR;
1228 }
1229
1230 /* Copy into the top of DEST the items of SRC that are not
1231 found in DEST. Maybe we could binary search in DEST? */
1232 for (sbase = dest->nelem + 2 * src->nelem,
1233 is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
1234 {
1235 if (dest->elems[id] == src->elems[is])
1236 is--, id--;
1237 else if (dest->elems[id] < src->elems[is])
1238 dest->elems[--sbase] = src->elems[is--];
1239 else /* if (dest->elems[id] > src->elems[is]) */
1240 --id;
1241 }
1242
1243 if (is >= 0)
1244 {
1245 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1246 sbase -= is + 1;
1247 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
1248 }
1249
1250 id = dest->nelem - 1;
1251 is = dest->nelem + 2 * src->nelem - 1;
1252 delta = is - sbase + 1;
1253 if (delta == 0)
1254 return REG_NOERROR;
1255
1256 /* Now copy. When DELTA becomes zero, the remaining
1257 DEST elements are already in place. */
1258 dest->nelem += delta;
1259 for (;;)
1260 {
1261 if (dest->elems[is] > dest->elems[id])
1262 {
1263 /* Copy from the top. */
1264 dest->elems[id + delta--] = dest->elems[is--];
1265 if (delta == 0)
1266 break;
1267 }
1268 else
1269 {
1270 /* Slide from the bottom. */
1271 dest->elems[id + delta] = dest->elems[id];
1272 if (--id < 0)
1273 {
1274 /* Copy remaining SRC elements. */
1275 memcpy (dest->elems, dest->elems + sbase,
1276 delta * sizeof (int));
1277 break;
1278 }
1279 }
1280 }
1281
1282 return REG_NOERROR;
1283}
1284
1285/* Insert the new element ELEM to the re_node_set* SET.
1286 SET should not already have ELEM.
1287 return -1 if an error has occurred, return 1 otherwise. */
1288
1289static int
1290internal_function
1291re_node_set_insert (re_node_set *set, int elem)
1292{
1293 int idx;
1294 /* In case the set is empty. */
1295 if (set->alloc == 0)
1296 {
1297 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
1298 return 1;
1299 else
1300 return -1;
1301 }
1302
1303 if (BE (set->nelem, 0) == 0)
1304 {
1305 /* We already guaranteed above that set->alloc != 0. */
1306 set->elems[0] = elem;
1307 ++set->nelem;
1308 return 1;
1309 }
1310
1311 /* Realloc if we need. */
1312 if (set->alloc == set->nelem)
1313 {
1314 int *new_elems;
1315 set->alloc = set->alloc * 2;
1316 new_elems = re_realloc (set->elems, int, set->alloc);
1317 if (BE (new_elems == NULL, 0))
1318 return -1;
1319 set->elems = new_elems;
1320 }
1321
1322 /* Move the elements which follows the new element. Test the
1323 first element separately to skip a check in the inner loop. */
1324 if (elem < set->elems[0])
1325 {
1326 idx = 0;
1327 for (idx = set->nelem; idx > 0; idx--)
1328 set->elems[idx] = set->elems[idx - 1];
1329 }
1330 else
1331 {
1332 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1333 set->elems[idx] = set->elems[idx - 1];
1334 }
1335
1336 /* Insert the new element. */
1337 set->elems[idx] = elem;
1338 ++set->nelem;
1339 return 1;
1340}
1341
1342/* Insert the new element ELEM to the re_node_set* SET.
1343 SET should not already have any element greater than or equal to ELEM.
1344 Return -1 if an error has occurred, return 1 otherwise. */
1345
1346static int
1347internal_function
1348re_node_set_insert_last (re_node_set *set, int elem)
1349{
1350 /* Realloc if we need. */
1351 if (set->alloc == set->nelem)
1352 {
1353 int *new_elems;
1354 set->alloc = (set->alloc + 1) * 2;
1355 new_elems = re_realloc (set->elems, int, set->alloc);
1356 if (BE (new_elems == NULL, 0))
1357 return -1;
1358 set->elems = new_elems;
1359 }
1360
1361 /* Insert the new element. */
1362 set->elems[set->nelem++] = elem;
1363 return 1;
1364}
1365
1366/* Compare two node sets SET1 and SET2.
1367 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
1368
1369static int
1370internal_function __attribute ((pure))
1371re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1372{
1373 int i;
1374 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1375 return 0;
1376 for (i = set1->nelem ; --i >= 0 ; )
1377 if (set1->elems[i] != set2->elems[i])
1378 return 0;
1379 return 1;
1380}
1381
1382/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1383
1384static int
1385internal_function __attribute ((pure))
1386re_node_set_contains (const re_node_set *set, int elem)
1387{
1388 unsigned int idx, right, mid;
1389 if (set->nelem <= 0)
1390 return 0;
1391
1392 /* Binary search the element. */
1393 idx = 0;
1394 right = set->nelem - 1;
1395 while (idx < right)
1396 {
1397 mid = (idx + right) / 2;
1398 if (set->elems[mid] < elem)
1399 idx = mid + 1;
1400 else
1401 right = mid;
1402 }
1403 return set->elems[idx] == elem ? idx + 1 : 0;
1404}
1405
1406static void
1407internal_function
1408re_node_set_remove_at (re_node_set *set, int idx)
1409{
1410 if (idx < 0 || idx >= set->nelem)
1411 return;
1412 --set->nelem;
1413 for (; idx < set->nelem; idx++)
1414 set->elems[idx] = set->elems[idx + 1];
1415}
1416
1417
1418/* Add the token TOKEN to dfa->nodes, and return the index of the token.
1419 Or return -1, if an error has occurred. */
1420
1421static int
1422internal_function
1423re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1424{
1425 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1426 {
1427 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1428 int *new_nexts, *new_indices;
1429 re_node_set *new_edests, *new_eclosures;
1430 re_token_t *new_nodes;
1431
1432 /* Avoid overflows in realloc. */
1433 const size_t max_object_size = MAX (sizeof (re_token_t),
1434 MAX (sizeof (re_node_set),
1435 sizeof (int)));
1436 if (BE (SIZE_MAX / max_object_size < new_nodes_alloc, 0))
1437 return -1;
1438
1439 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1440 if (BE (new_nodes == NULL, 0))
1441 return -1;
1442 dfa->nodes = new_nodes;
1443 new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
1444 new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
1445 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1446 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1447 if (BE (new_nexts == NULL || new_indices == NULL
1448 || new_edests == NULL || new_eclosures == NULL, 0))
1449 return -1;
1450 dfa->nexts = new_nexts;
1451 dfa->org_indices = new_indices;
1452 dfa->edests = new_edests;
1453 dfa->eclosures = new_eclosures;
1454 dfa->nodes_alloc = new_nodes_alloc;
1455 }
1456 dfa->nodes[dfa->nodes_len] = token;
1457 dfa->nodes[dfa->nodes_len].constraint = 0;
1458#ifdef RE_ENABLE_I18N
1459 dfa->nodes[dfa->nodes_len].accept_mb =
1460 (token.type == OP_PERIOD && dfa->mb_cur_max > 1) || token.type == COMPLEX_BRACKET;
1461#endif
1462 dfa->nexts[dfa->nodes_len] = -1;
1463 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1464 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1465 return dfa->nodes_len++;
1466}
1467
1468static inline unsigned int
1469internal_function
1470calc_state_hash (const re_node_set *nodes, unsigned int context)
1471{
1472 unsigned int hash = nodes->nelem + context;
1473 int i;
1474 for (i = 0 ; i < nodes->nelem ; i++)
1475 hash += nodes->elems[i];
1476 return hash;
1477}
1478
1479/* Search for the state whose node_set is equivalent to NODES.
1480 Return the pointer to the state, if we found it in the DFA.
1481 Otherwise create the new one and return it. In case of an error
1482 return NULL and set the error code in ERR.
1483 Note: - We assume NULL as the invalid state, then it is possible that
1484 return value is NULL and ERR is REG_NOERROR.
1485 - We never return non-NULL value in case of any errors, it is for
1486 optimization. */
1487
1488static re_dfastate_t *
1489internal_function
1490re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1491 const re_node_set *nodes)
1492{
1493 unsigned int hash;
1494 re_dfastate_t *new_state;
1495 struct re_state_table_entry *spot;
1496 int i;
1497 if (BE (nodes->nelem == 0, 0))
1498 {
1499 *err = REG_NOERROR;
1500 return NULL;
1501 }
1502 hash = calc_state_hash (nodes, 0);
1503 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1504
1505 for (i = 0 ; i < spot->num ; i++)
1506 {
1507 re_dfastate_t *state = spot->array[i];
1508 if (hash != state->hash)
1509 continue;
1510 if (re_node_set_compare (&state->nodes, nodes))
1511 return state;
1512 }
1513
1514 /* There are no appropriate state in the dfa, create the new one. */
1515 new_state = create_ci_newstate (dfa, nodes, hash);
1516 if (BE (new_state == NULL, 0))
1517 *err = REG_ESPACE;
1518
1519 return new_state;
1520}
1521
1522/* Search for the state whose node_set is equivalent to NODES and
1523 whose context is equivalent to CONTEXT.
1524 Return the pointer to the state, if we found it in the DFA.
1525 Otherwise create the new one and return it. In case of an error
1526 return NULL and set the error code in ERR.
1527 Note: - We assume NULL as the invalid state, then it is possible that
1528 return value is NULL and ERR is REG_NOERROR.
1529 - We never return non-NULL value in case of any errors, it is for
1530 optimization. */
1531
1532static re_dfastate_t *
1533internal_function
1534re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1535 const re_node_set *nodes, unsigned int context)
1536{
1537 unsigned int hash;
1538 re_dfastate_t *new_state;
1539 struct re_state_table_entry *spot;
1540 int i;
1541 if (nodes->nelem == 0)
1542 {
1543 *err = REG_NOERROR;
1544 return NULL;
1545 }
1546 hash = calc_state_hash (nodes, context);
1547 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1548
1549 for (i = 0 ; i < spot->num ; i++)
1550 {
1551 re_dfastate_t *state = spot->array[i];
1552 if (state->hash == hash
1553 && state->context == context
1554 && re_node_set_compare (state->entrance_nodes, nodes))
1555 return state;
1556 }
1557 /* There are no appropriate state in `dfa', create the new one. */
1558 new_state = create_cd_newstate (dfa, nodes, context, hash);
1559 if (BE (new_state == NULL, 0))
1560 *err = REG_ESPACE;
1561
1562 return new_state;
1563}
1564
1565/* Finish initialization of the new state NEWSTATE, and using its hash value
1566 HASH put in the appropriate bucket of DFA's state table. Return value
1567 indicates the error code if failed. */
1568
1569static reg_errcode_t
1570register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1571 unsigned int hash)
1572{
1573 struct re_state_table_entry *spot;
1574 reg_errcode_t err;
1575 int i;
1576
1577 newstate->hash = hash;
1578 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1579 if (BE (err != REG_NOERROR, 0))
1580 return REG_ESPACE;
1581 for (i = 0; i < newstate->nodes.nelem; i++)
1582 {
1583 int elem = newstate->nodes.elems[i];
1584 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1585 if (re_node_set_insert_last (&newstate->non_eps_nodes, elem) < 0)
1586 return REG_ESPACE;
1587 }
1588
1589 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1590 if (BE (spot->alloc <= spot->num, 0))
1591 {
1592 int new_alloc = 2 * spot->num + 2;
1593 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1594 new_alloc);
1595 if (BE (new_array == NULL, 0))
1596 return REG_ESPACE;
1597 spot->array = new_array;
1598 spot->alloc = new_alloc;
1599 }
1600 spot->array[spot->num++] = newstate;
1601 return REG_NOERROR;
1602}
1603
1604static void
1605free_state (re_dfastate_t *state)
1606{
1607 re_node_set_free (&state->non_eps_nodes);
1608 re_node_set_free (&state->inveclosure);
1609 if (state->entrance_nodes != &state->nodes)
1610 {
1611 re_node_set_free (state->entrance_nodes);
1612 re_free (state->entrance_nodes);
1613 }
1614 re_node_set_free (&state->nodes);
1615 re_free (state->word_trtable);
1616 re_free (state->trtable);
1617 re_free (state);
1618}
1619
1620/* Create the new state which is independ of contexts.
1621 Return the new state if succeeded, otherwise return NULL. */
1622
1623static re_dfastate_t *
1624internal_function
1625create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1626 unsigned int hash)
1627{
1628 int i;
1629 reg_errcode_t err;
1630 re_dfastate_t *newstate;
1631
1632 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1633 if (BE (newstate == NULL, 0))
1634 return NULL;
1635 err = re_node_set_init_copy (&newstate->nodes, nodes);
1636 if (BE (err != REG_NOERROR, 0))
1637 {
1638 re_free (newstate);
1639 return NULL;
1640 }
1641
1642 newstate->entrance_nodes = &newstate->nodes;
1643 for (i = 0 ; i < nodes->nelem ; i++)
1644 {
1645 re_token_t *node = dfa->nodes + nodes->elems[i];
1646 re_token_type_t type = node->type;
1647 if (type == CHARACTER && !node->constraint)
1648 continue;
1649#ifdef RE_ENABLE_I18N
1650 newstate->accept_mb |= node->accept_mb;
1651#endif /* RE_ENABLE_I18N */
1652
1653 /* If the state has the halt node, the state is a halt state. */
1654 if (type == END_OF_RE)
1655 newstate->halt = 1;
1656 else if (type == OP_BACK_REF)
1657 newstate->has_backref = 1;
1658 else if (type == ANCHOR || node->constraint)
1659 newstate->has_constraint = 1;
1660 }
1661 err = register_state (dfa, newstate, hash);
1662 if (BE (err != REG_NOERROR, 0))
1663 {
1664 free_state (newstate);
1665 newstate = NULL;
1666 }
1667 return newstate;
1668}
1669
1670/* Create the new state which is depend on the context CONTEXT.
1671 Return the new state if succeeded, otherwise return NULL. */
1672
1673static re_dfastate_t *
1674internal_function
1675create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1676 unsigned int context, unsigned int hash)
1677{
1678 int i, nctx_nodes = 0;
1679 reg_errcode_t err;
1680 re_dfastate_t *newstate;
1681
1682 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1683 if (BE (newstate == NULL, 0))
1684 return NULL;
1685 err = re_node_set_init_copy (&newstate->nodes, nodes);
1686 if (BE (err != REG_NOERROR, 0))
1687 {
1688 re_free (newstate);
1689 return NULL;
1690 }
1691
1692 newstate->context = context;
1693 newstate->entrance_nodes = &newstate->nodes;
1694
1695 for (i = 0 ; i < nodes->nelem ; i++)
1696 {
1697 re_token_t *node = dfa->nodes + nodes->elems[i];
1698 re_token_type_t type = node->type;
1699 unsigned int constraint = node->constraint;
1700
1701 if (type == CHARACTER && !constraint)
1702 continue;
1703#ifdef RE_ENABLE_I18N
1704 newstate->accept_mb |= node->accept_mb;
1705#endif /* RE_ENABLE_I18N */
1706
1707 /* If the state has the halt node, the state is a halt state. */
1708 if (type == END_OF_RE)
1709 newstate->halt = 1;
1710 else if (type == OP_BACK_REF)
1711 newstate->has_backref = 1;
1712
1713 if (constraint)
1714 {
1715 if (newstate->entrance_nodes == &newstate->nodes)
1716 {
1717 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1718 if (BE (newstate->entrance_nodes == NULL, 0))
1719 {
1720 free_state (newstate);
1721 return NULL;
1722 }
1723 if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1724 != REG_NOERROR)
1725 return NULL;
1726 nctx_nodes = 0;
1727 newstate->has_constraint = 1;
1728 }
1729
1730 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1731 {
1732 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1733 ++nctx_nodes;
1734 }
1735 }
1736 }
1737 err = register_state (dfa, newstate, hash);
1738 if (BE (err != REG_NOERROR, 0))
1739 {
1740 free_state (newstate);
1741 newstate = NULL;
1742 }
1743 return newstate;
1744}
diff --git a/win32/regex_internal.h b/win32/regex_internal.h
new file mode 100644
index 000000000..1495059ab
--- /dev/null
+++ b/win32/regex_internal.h
@@ -0,0 +1,810 @@
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002-2005, 2007, 2008, 2010 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21#ifndef _REGEX_INTERNAL_H
22#define _REGEX_INTERNAL_H 1
23
24#include <assert.h>
25#include <ctype.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
31# include <langinfo.h>
32#endif
33#if defined HAVE_LOCALE_H || defined _LIBC
34# include <locale.h>
35#endif
36#if defined HAVE_WCHAR_H || defined _LIBC
37# include <wchar.h>
38#endif /* HAVE_WCHAR_H || _LIBC */
39#if defined HAVE_WCTYPE_H || defined _LIBC
40# include <wctype.h>
41#endif /* HAVE_WCTYPE_H || _LIBC */
42#if defined HAVE_STDBOOL_H || defined _LIBC
43# include <stdbool.h>
44#endif /* HAVE_STDBOOL_H || _LIBC */
45#if !defined(ZOS_USS)
46#if defined HAVE_STDINT_H || defined _LIBC
47# include <stdint.h>
48#endif /* HAVE_STDINT_H || _LIBC */
49#endif /* !ZOS_USS */
50#if defined _LIBC
51# include <bits/libc-lock.h>
52#else
53# define __libc_lock_define(CLASS,NAME)
54# define __libc_lock_init(NAME) do { } while (0)
55# define __libc_lock_lock(NAME) do { } while (0)
56# define __libc_lock_unlock(NAME) do { } while (0)
57#endif
58
59#ifndef GAWK
60/* In case that the system doesn't have isblank(). */
61#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
62# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
63#endif
64#else /* GAWK */
65/*
66 * This is a freaking mess. On glibc systems you have to define
67 * a magic constant to get isblank() out of <ctype.h>, since it's
68 * a C99 function. To heck with all that and borrow a page from
69 * dfa.c's book.
70 */
71
72static int
73is_blank (int c)
74{
75 return (c == ' ' || c == '\t');
76}
77#endif /* GAWK */
78
79#ifdef _LIBC
80# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
81# define _RE_DEFINE_LOCALE_FUNCTIONS 1
82# include <locale/localeinfo.h>
83# include <locale/elem-hash.h>
84# include <locale/coll-lookup.h>
85# endif
86#endif
87
88/* This is for other GNU distributions with internationalized messages. */
89#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
90# include <libintl.h>
91# ifdef _LIBC
92# undef gettext
93# define gettext(msgid) \
94 INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
95# endif
96#else
97# define gettext(msgid) (msgid)
98#endif
99
100#ifndef gettext_noop
101/* This define is so xgettext can find the internationalizable
102 strings. */
103# define gettext_noop(String) String
104#endif
105
106/* For loser systems without the definition. */
107#ifndef SIZE_MAX
108# define SIZE_MAX ((size_t) -1)
109#endif
110
111#ifndef NO_MBSUPPORT
112#include "mbsupport.h" /* gawk */
113#endif
114#ifndef MB_CUR_MAX
115#define MB_CUR_MAX 1
116#endif
117
118#if (defined MBS_SUPPORT) || defined _LIBC
119# define RE_ENABLE_I18N
120#endif
121
122#if __GNUC__ >= 3
123# define BE(expr, val) __builtin_expect (expr, val)
124#else
125# define BE(expr, val) (expr)
126# ifdef inline
127# undef inline
128# endif
129# define inline
130#endif
131
132/* Number of single byte character. */
133#define SBC_MAX 256
134
135#define COLL_ELEM_LEN_MAX 8
136
137/* The character which represents newline. */
138#define NEWLINE_CHAR '\n'
139#define WIDE_NEWLINE_CHAR L'\n'
140
141/* Rename to standard API for using out of glibc. */
142#ifndef _LIBC
143# ifdef __wctype
144# undef __wctype
145# endif
146# define __wctype wctype
147# ifdef __iswctype
148# undef __iswctype
149# endif
150# define __iswctype iswctype
151# define __btowc btowc
152# define __mbrtowc mbrtowc
153#undef __mempcpy /* GAWK */
154# define __mempcpy mempcpy
155# define __wcrtomb wcrtomb
156# define __regfree regfree
157# define attribute_hidden
158#endif /* not _LIBC */
159
160#ifdef __GNUC__
161# define __attribute(arg) __attribute__ (arg)
162#else
163# define __attribute(arg)
164#endif
165
166extern const char __re_error_msgid[] attribute_hidden;
167extern const size_t __re_error_msgid_idx[] attribute_hidden;
168
169/* An integer used to represent a set of bits. It must be unsigned,
170 and must be at least as wide as unsigned int. */
171typedef unsigned long int bitset_word_t;
172/* All bits set in a bitset_word_t. */
173#define BITSET_WORD_MAX ULONG_MAX
174/* Number of bits in a bitset_word_t. */
175#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT)
176/* Number of bitset_word_t in a bit_set. */
177#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS)
178typedef bitset_word_t bitset_t[BITSET_WORDS];
179typedef bitset_word_t *re_bitset_ptr_t;
180typedef const bitset_word_t *re_const_bitset_ptr_t;
181
182#define bitset_set(set,i) \
183 (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)
184#define bitset_clear(set,i) \
185 (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))
186#define bitset_contain(set,i) \
187 (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))
188#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))
189#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))
190#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))
191
192#define PREV_WORD_CONSTRAINT 0x0001
193#define PREV_NOTWORD_CONSTRAINT 0x0002
194#define NEXT_WORD_CONSTRAINT 0x0004
195#define NEXT_NOTWORD_CONSTRAINT 0x0008
196#define PREV_NEWLINE_CONSTRAINT 0x0010
197#define NEXT_NEWLINE_CONSTRAINT 0x0020
198#define PREV_BEGBUF_CONSTRAINT 0x0040
199#define NEXT_ENDBUF_CONSTRAINT 0x0080
200#define WORD_DELIM_CONSTRAINT 0x0100
201#define NOT_WORD_DELIM_CONSTRAINT 0x0200
202
203typedef enum
204{
205 INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
206 WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
207 WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
208 INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
209 LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
210 LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
211 BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
212 BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
213 WORD_DELIM = WORD_DELIM_CONSTRAINT,
214 NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
215} re_context_type;
216
217typedef struct
218{
219 int alloc;
220 int nelem;
221 int *elems;
222} re_node_set;
223
224typedef enum
225{
226 NON_TYPE = 0,
227
228 /* Node type, These are used by token, node, tree. */
229 CHARACTER = 1,
230 END_OF_RE = 2,
231 SIMPLE_BRACKET = 3,
232 OP_BACK_REF = 4,
233 OP_PERIOD = 5,
234#ifdef RE_ENABLE_I18N
235 COMPLEX_BRACKET = 6,
236 OP_UTF8_PERIOD = 7,
237#endif /* RE_ENABLE_I18N */
238
239 /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
240 when the debugger shows values of this enum type. */
241#define EPSILON_BIT 8
242 OP_OPEN_SUBEXP = EPSILON_BIT | 0,
243 OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
244 OP_ALT = EPSILON_BIT | 2,
245 OP_DUP_ASTERISK = EPSILON_BIT | 3,
246 ANCHOR = EPSILON_BIT | 4,
247
248 /* Tree type, these are used only by tree. */
249 CONCAT = 16,
250 SUBEXP = 17,
251
252 /* Token type, these are used only by token. */
253 OP_DUP_PLUS = 18,
254 OP_DUP_QUESTION,
255 OP_OPEN_BRACKET,
256 OP_CLOSE_BRACKET,
257 OP_CHARSET_RANGE,
258 OP_OPEN_DUP_NUM,
259 OP_CLOSE_DUP_NUM,
260 OP_NON_MATCH_LIST,
261 OP_OPEN_COLL_ELEM,
262 OP_CLOSE_COLL_ELEM,
263 OP_OPEN_EQUIV_CLASS,
264 OP_CLOSE_EQUIV_CLASS,
265 OP_OPEN_CHAR_CLASS,
266 OP_CLOSE_CHAR_CLASS,
267 OP_WORD,
268 OP_NOTWORD,
269 OP_SPACE,
270 OP_NOTSPACE,
271 BACK_SLASH
272
273} re_token_type_t;
274
275#ifdef RE_ENABLE_I18N
276typedef struct
277{
278 /* Multibyte characters. */
279 wchar_t *mbchars;
280
281 /* Collating symbols. */
282# ifdef _LIBC
283 int32_t *coll_syms;
284# endif
285
286 /* Equivalence classes. */
287# ifdef _LIBC
288 int32_t *equiv_classes;
289# endif
290
291 /* Range expressions. */
292# ifdef _LIBC
293 uint32_t *range_starts;
294 uint32_t *range_ends;
295# else /* not _LIBC */
296 wchar_t *range_starts;
297 wchar_t *range_ends;
298# endif /* not _LIBC */
299
300 /* Character classes. */
301 wctype_t *char_classes;
302
303 /* If this character set is the non-matching list. */
304 unsigned int non_match : 1;
305
306 /* # of multibyte characters. */
307 int nmbchars;
308
309 /* # of collating symbols. */
310 int ncoll_syms;
311
312 /* # of equivalence classes. */
313 int nequiv_classes;
314
315 /* # of range expressions. */
316 int nranges;
317
318 /* # of character classes. */
319 int nchar_classes;
320} re_charset_t;
321#endif /* RE_ENABLE_I18N */
322
323typedef struct
324{
325 union
326 {
327 unsigned char c; /* for CHARACTER */
328 re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */
329#ifdef RE_ENABLE_I18N
330 re_charset_t *mbcset; /* for COMPLEX_BRACKET */
331#endif /* RE_ENABLE_I18N */
332 int idx; /* for BACK_REF */
333 re_context_type ctx_type; /* for ANCHOR */
334 } opr;
335#if __GNUC__ >= 2
336 re_token_type_t type : 8;
337#else
338 re_token_type_t type;
339#endif
340 unsigned int constraint : 10; /* context constraint */
341 unsigned int duplicated : 1;
342 unsigned int opt_subexp : 1;
343#ifdef RE_ENABLE_I18N
344 unsigned int accept_mb : 1;
345 /* These 2 bits can be moved into the union if needed (e.g. if running out
346 of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
347 unsigned int mb_partial : 1;
348#endif
349 unsigned int word_char : 1;
350} re_token_t;
351
352#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
353
354struct re_string_t
355{
356 /* Indicate the raw buffer which is the original string passed as an
357 argument of regexec(), re_search(), etc.. */
358 const unsigned char *raw_mbs;
359 /* Store the multibyte string. In case of "case insensitive mode" like
360 REG_ICASE, upper cases of the string are stored, otherwise MBS points
361 the same address that RAW_MBS points. */
362 unsigned char *mbs;
363#ifdef RE_ENABLE_I18N
364 /* Store the wide character string which is corresponding to MBS. */
365 wint_t *wcs;
366 int *offsets;
367 mbstate_t cur_state;
368#endif
369 /* Index in RAW_MBS. Each character mbs[i] corresponds to
370 raw_mbs[raw_mbs_idx + i]. */
371 int raw_mbs_idx;
372 /* The length of the valid characters in the buffers. */
373 int valid_len;
374 /* The corresponding number of bytes in raw_mbs array. */
375 int valid_raw_len;
376 /* The length of the buffers MBS and WCS. */
377 int bufs_len;
378 /* The index in MBS, which is updated by re_string_fetch_byte. */
379 int cur_idx;
380 /* length of RAW_MBS array. */
381 int raw_len;
382 /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */
383 int len;
384 /* End of the buffer may be shorter than its length in the cases such
385 as re_match_2, re_search_2. Then, we use STOP for end of the buffer
386 instead of LEN. */
387 int raw_stop;
388 /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */
389 int stop;
390
391 /* The context of mbs[0]. We store the context independently, since
392 the context of mbs[0] may be different from raw_mbs[0], which is
393 the beginning of the input string. */
394 unsigned int tip_context;
395 /* The translation passed as a part of an argument of re_compile_pattern. */
396 RE_TRANSLATE_TYPE trans;
397 /* Copy of re_dfa_t's word_char. */
398 re_const_bitset_ptr_t word_char;
399 /* 1 if REG_ICASE. */
400 unsigned char icase;
401 unsigned char is_utf8;
402 unsigned char map_notascii;
403 unsigned char mbs_allocated;
404 unsigned char offsets_needed;
405 unsigned char newline_anchor;
406 unsigned char word_ops_used;
407 int mb_cur_max;
408};
409typedef struct re_string_t re_string_t;
410
411
412struct re_dfa_t;
413typedef struct re_dfa_t re_dfa_t;
414
415#ifndef _LIBC
416# ifdef __i386__
417# define internal_function __attribute ((regparm (3), stdcall))
418# else
419# define internal_function
420# endif
421#endif
422
423#ifndef NOT_IN_libc
424static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
425 int new_buf_len)
426 internal_function;
427# ifdef RE_ENABLE_I18N
428static void build_wcs_buffer (re_string_t *pstr) internal_function;
429static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr)
430 internal_function;
431# endif /* RE_ENABLE_I18N */
432static void build_upper_buffer (re_string_t *pstr) internal_function;
433static void re_string_translate_buffer (re_string_t *pstr) internal_function;
434static unsigned int re_string_context_at (const re_string_t *input, int idx,
435 int eflags)
436 internal_function __attribute ((pure));
437#endif
438#define re_string_peek_byte(pstr, offset) \
439 ((pstr)->mbs[(pstr)->cur_idx + offset])
440#define re_string_fetch_byte(pstr) \
441 ((pstr)->mbs[(pstr)->cur_idx++])
442#define re_string_first_byte(pstr, idx) \
443 ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
444#define re_string_is_single_byte_char(pstr, idx) \
445 ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
446 || (pstr)->wcs[(idx) + 1] != WEOF))
447#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
448#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
449#define re_string_get_buffer(pstr) ((pstr)->mbs)
450#define re_string_length(pstr) ((pstr)->len)
451#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
452#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
453#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
454
455#ifndef _LIBC
456# if HAVE_ALLOCA
457# ifdef (_MSC_VER)
458# include <malloc.h>
459# define __libc_use_alloca(n) 0
460# else
461# include <alloca.h>
462/* The OS usually guarantees only one guard page at the bottom of the stack,
463 and a page size can be as small as 4096 bytes. So we cannot safely
464 allocate anything larger than 4096 bytes. Also care for the possibility
465 of a few compiler-allocated temporary stack slots. */
466# define __libc_use_alloca(n) ((n) < 4032)
467# endif
468# else
469/* alloca is implemented with malloc, so just use malloc. */
470# define __libc_use_alloca(n) 0
471# endif
472#endif
473
474#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
475/* SunOS 4.1.x realloc doesn't accept null pointers: pre-Standard C. Sigh. */
476#define re_realloc(p,t,n) ((p != NULL) ? (t *) realloc (p,(n)*sizeof(t)) : (t *) calloc(n,sizeof(t)))
477#define re_free(p) free (p)
478
479struct bin_tree_t
480{
481 struct bin_tree_t *parent;
482 struct bin_tree_t *left;
483 struct bin_tree_t *right;
484 struct bin_tree_t *first;
485 struct bin_tree_t *next;
486
487 re_token_t token;
488
489 /* `node_idx' is the index in dfa->nodes, if `type' == 0.
490 Otherwise `type' indicate the type of this node. */
491 int node_idx;
492};
493typedef struct bin_tree_t bin_tree_t;
494
495#define BIN_TREE_STORAGE_SIZE \
496 ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
497
498struct bin_tree_storage_t
499{
500 struct bin_tree_storage_t *next;
501 bin_tree_t data[BIN_TREE_STORAGE_SIZE];
502};
503typedef struct bin_tree_storage_t bin_tree_storage_t;
504
505#define CONTEXT_WORD 1
506#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
507#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
508#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
509
510#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
511#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
512#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
513#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
514#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
515
516#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
517#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
518#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
519#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
520
521#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
522 ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
523 || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
524 || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
525 || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
526
527#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
528 ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
529 || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
530 || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
531 || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
532
533struct re_dfastate_t
534{
535 unsigned int hash;
536 re_node_set nodes;
537 re_node_set non_eps_nodes;
538 re_node_set inveclosure;
539 re_node_set *entrance_nodes;
540 struct re_dfastate_t **trtable, **word_trtable;
541 unsigned int context : 4;
542 unsigned int halt : 1;
543 /* If this state can accept `multi byte'.
544 Note that we refer to multibyte characters, and multi character
545 collating elements as `multi byte'. */
546 unsigned int accept_mb : 1;
547 /* If this state has backreference node(s). */
548 unsigned int has_backref : 1;
549 unsigned int has_constraint : 1;
550};
551typedef struct re_dfastate_t re_dfastate_t;
552
553struct re_state_table_entry
554{
555 int num;
556 int alloc;
557 re_dfastate_t **array;
558};
559
560/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */
561
562typedef struct
563{
564 int next_idx;
565 int alloc;
566 re_dfastate_t **array;
567} state_array_t;
568
569/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */
570
571typedef struct
572{
573 int node;
574 int str_idx; /* The position NODE match at. */
575 state_array_t path;
576} re_sub_match_last_t;
577
578/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
579 And information about the node, whose type is OP_CLOSE_SUBEXP,
580 corresponding to NODE is stored in LASTS. */
581
582typedef struct
583{
584 int str_idx;
585 int node;
586 state_array_t *path;
587 int alasts; /* Allocation size of LASTS. */
588 int nlasts; /* The number of LASTS. */
589 re_sub_match_last_t **lasts;
590} re_sub_match_top_t;
591
592struct re_backref_cache_entry
593{
594 int node;
595 int str_idx;
596 int subexp_from;
597 int subexp_to;
598 char more;
599 char unused;
600 unsigned short int eps_reachable_subexps_map;
601};
602
603typedef struct
604{
605 /* The string object corresponding to the input string. */
606 re_string_t input;
607#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
608 const re_dfa_t *const dfa;
609#else
610 const re_dfa_t *dfa;
611#endif
612 /* EFLAGS of the argument of regexec. */
613 int eflags;
614 /* Where the matching ends. */
615 int match_last;
616 int last_node;
617 /* The state log used by the matcher. */
618 re_dfastate_t **state_log;
619 int state_log_top;
620 /* Back reference cache. */
621 int nbkref_ents;
622 int abkref_ents;
623 struct re_backref_cache_entry *bkref_ents;
624 int max_mb_elem_len;
625 int nsub_tops;
626 int asub_tops;
627 re_sub_match_top_t **sub_tops;
628} re_match_context_t;
629
630typedef struct
631{
632 re_dfastate_t **sifted_states;
633 re_dfastate_t **limited_states;
634 int last_node;
635 int last_str_idx;
636 re_node_set limits;
637} re_sift_context_t;
638
639struct re_fail_stack_ent_t
640{
641 int idx;
642 int node;
643 regmatch_t *regs;
644 re_node_set eps_via_nodes;
645};
646
647struct re_fail_stack_t
648{
649 int num;
650 int alloc;
651 struct re_fail_stack_ent_t *stack;
652};
653
654struct re_dfa_t
655{
656 re_token_t *nodes;
657 size_t nodes_alloc;
658 size_t nodes_len;
659 int *nexts;
660 int *org_indices;
661 re_node_set *edests;
662 re_node_set *eclosures;
663 re_node_set *inveclosures;
664 struct re_state_table_entry *state_table;
665 re_dfastate_t *init_state;
666 re_dfastate_t *init_state_word;
667 re_dfastate_t *init_state_nl;
668 re_dfastate_t *init_state_begbuf;
669 bin_tree_t *str_tree;
670 bin_tree_storage_t *str_tree_storage;
671 re_bitset_ptr_t sb_char;
672 int str_tree_storage_idx;
673
674 /* number of subexpressions `re_nsub' is in regex_t. */
675 unsigned int state_hash_mask;
676 int init_node;
677 int nbackref; /* The number of backreference in this dfa. */
678
679 /* Bitmap expressing which backreference is used. */
680 bitset_word_t used_bkref_map;
681 bitset_word_t completed_bkref_map;
682
683 unsigned int has_plural_match : 1;
684 /* If this dfa has "multibyte node", which is a backreference or
685 a node which can accept multibyte character or multi character
686 collating element. */
687 unsigned int has_mb_node : 1;
688 unsigned int is_utf8 : 1;
689 unsigned int map_notascii : 1;
690 unsigned int word_ops_used : 1;
691 int mb_cur_max;
692 bitset_t word_char;
693 reg_syntax_t syntax;
694 int *subexp_map;
695#ifdef DEBUG
696 char* re_str;
697#endif
698#if defined _LIBC
699 __libc_lock_define (, lock)
700#endif
701};
702
703#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
704#define re_node_set_remove(set,id) \
705 (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
706#define re_node_set_empty(p) ((p)->nelem = 0)
707#define re_node_set_free(set) re_free ((set)->elems)
708
709
710typedef enum
711{
712 SB_CHAR,
713 MB_CHAR,
714 EQUIV_CLASS,
715 COLL_SYM,
716 CHAR_CLASS
717} bracket_elem_type;
718
719typedef struct
720{
721 bracket_elem_type type;
722 union
723 {
724 unsigned char ch;
725 unsigned char *name;
726 wchar_t wch;
727 } opr;
728} bracket_elem_t;
729
730
731/* Inline functions for bitset operation. */
732static inline void
733bitset_not (bitset_t set)
734{
735 int bitset_i;
736 for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
737 set[bitset_i] = ~set[bitset_i];
738}
739
740static inline void
741bitset_merge (bitset_t dest, const bitset_t src)
742{
743 int bitset_i;
744 for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
745 dest[bitset_i] |= src[bitset_i];
746}
747
748static inline void
749bitset_mask (bitset_t dest, const bitset_t src)
750{
751 int bitset_i;
752 for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
753 dest[bitset_i] &= src[bitset_i];
754}
755
756#ifdef RE_ENABLE_I18N
757/* Inline functions for re_string. */
758static inline int
759internal_function __attribute ((pure))
760re_string_char_size_at (const re_string_t *pstr, int idx)
761{
762 int byte_idx;
763 if (pstr->mb_cur_max == 1)
764 return 1;
765 for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
766 if (pstr->wcs[idx + byte_idx] != WEOF)
767 break;
768 return byte_idx;
769}
770
771static inline wint_t
772internal_function __attribute ((pure))
773re_string_wchar_at (const re_string_t *pstr, int idx)
774{
775 if (pstr->mb_cur_max == 1)
776 return (wint_t) pstr->mbs[idx];
777 return (wint_t) pstr->wcs[idx];
778}
779
780# ifndef NOT_IN_libc
781static int
782internal_function __attribute ((pure))
783re_string_elem_size_at (const re_string_t *pstr, int idx)
784{
785# ifdef _LIBC
786 const unsigned char *p, *extra;
787 const int32_t *table, *indirect;
788 int32_t tmp;
789# include <locale/weight.h>
790 uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
791
792 if (nrules != 0)
793 {
794 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
795 extra = (const unsigned char *)
796 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
797 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
798 _NL_COLLATE_INDIRECTMB);
799 p = pstr->mbs + idx;
800 tmp = findidx (&p);
801 return p - pstr->mbs - idx;
802 }
803 else
804# endif /* _LIBC */
805 return 1;
806}
807# endif
808#endif /* RE_ENABLE_I18N */
809
810#endif /* _REGEX_INTERNAL_H */
diff --git a/win32/regexec.c b/win32/regexec.c
new file mode 100644
index 000000000..eb5e1d443
--- /dev/null
+++ b/win32/regexec.c
@@ -0,0 +1,4369 @@
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002-2005, 2007, 2009, 2010 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA. */
20
21static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
22 int n) internal_function;
23static void match_ctx_clean (re_match_context_t *mctx) internal_function;
24static void match_ctx_free (re_match_context_t *cache) internal_function;
25static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
26 int str_idx, int from, int to)
27 internal_function;
28static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
29 internal_function;
30static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
31 int str_idx) internal_function;
32static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
33 int node, int str_idx)
34 internal_function;
35static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
36 re_dfastate_t **limited_sts, int last_node,
37 int last_str_idx)
38 internal_function;
39static reg_errcode_t re_search_internal (const regex_t *preg,
40 const char *string, int length,
41 int start, int range, int stop,
42 size_t nmatch, regmatch_t pmatch[],
43 int eflags);
44static int re_search_2_stub (struct re_pattern_buffer *bufp,
45 const char *string1, int length1,
46 const char *string2, int length2,
47 int start, int range, struct re_registers *regs,
48 int stop, int ret_len);
49static int re_search_stub (struct re_pattern_buffer *bufp,
50 const char *string, int length, int start,
51 int range, int stop, struct re_registers *regs,
52 int ret_len);
53static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
54 int nregs, int regs_allocated);
55static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
56static int check_matching (re_match_context_t *mctx, int fl_longest_match,
57 int *p_match_first) internal_function;
58static int check_halt_state_context (const re_match_context_t *mctx,
59 const re_dfastate_t *state, int idx)
60 internal_function;
61static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
62 regmatch_t *prev_idx_match, int cur_node,
63 int cur_idx, int nmatch) internal_function;
64static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
65 int str_idx, int dest_node, int nregs,
66 regmatch_t *regs,
67 re_node_set *eps_via_nodes)
68 internal_function;
69static reg_errcode_t set_regs (const regex_t *preg,
70 const re_match_context_t *mctx,
71 size_t nmatch, regmatch_t *pmatch,
72 int fl_backtrack) internal_function;
73static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
74 internal_function;
75
76#ifdef RE_ENABLE_I18N
77static int sift_states_iter_mb (const re_match_context_t *mctx,
78 re_sift_context_t *sctx,
79 int node_idx, int str_idx, int max_str_idx)
80 internal_function;
81#endif /* RE_ENABLE_I18N */
82static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
83 re_sift_context_t *sctx)
84 internal_function;
85static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
86 re_sift_context_t *sctx, int str_idx,
87 re_node_set *cur_dest)
88 internal_function;
89static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
90 re_sift_context_t *sctx,
91 int str_idx,
92 re_node_set *dest_nodes)
93 internal_function;
94static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
95 re_node_set *dest_nodes,
96 const re_node_set *candidates)
97 internal_function;
98static int check_dst_limits (const re_match_context_t *mctx,
99 re_node_set *limits,
100 int dst_node, int dst_idx, int src_node,
101 int src_idx) internal_function;
102static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
103 int boundaries, int subexp_idx,
104 int from_node, int bkref_idx)
105 internal_function;
106static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
107 int limit, int subexp_idx,
108 int node, int str_idx,
109 int bkref_idx) internal_function;
110static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
111 re_node_set *dest_nodes,
112 const re_node_set *candidates,
113 re_node_set *limits,
114 struct re_backref_cache_entry *bkref_ents,
115 int str_idx) internal_function;
116static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
117 re_sift_context_t *sctx,
118 int str_idx, const re_node_set *candidates)
119 internal_function;
120static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
121 re_dfastate_t **dst,
122 re_dfastate_t **src, int num)
123 internal_function;
124static re_dfastate_t *find_recover_state (reg_errcode_t *err,
125 re_match_context_t *mctx) internal_function;
126static re_dfastate_t *transit_state (reg_errcode_t *err,
127 re_match_context_t *mctx,
128 re_dfastate_t *state) internal_function;
129static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
130 re_match_context_t *mctx,
131 re_dfastate_t *next_state)
132 internal_function;
133static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
134 re_node_set *cur_nodes,
135 int str_idx) internal_function;
136#if 0
137static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
138 re_match_context_t *mctx,
139 re_dfastate_t *pstate)
140 internal_function;
141#endif
142#ifdef RE_ENABLE_I18N
143static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
144 re_dfastate_t *pstate)
145 internal_function;
146#endif /* RE_ENABLE_I18N */
147static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
148 const re_node_set *nodes)
149 internal_function;
150static reg_errcode_t get_subexp (re_match_context_t *mctx,
151 int bkref_node, int bkref_str_idx)
152 internal_function;
153static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
154 const re_sub_match_top_t *sub_top,
155 re_sub_match_last_t *sub_last,
156 int bkref_node, int bkref_str)
157 internal_function;
158static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
159 int subexp_idx, int type) internal_function;
160static reg_errcode_t check_arrival (re_match_context_t *mctx,
161 state_array_t *path, int top_node,
162 int top_str, int last_node, int last_str,
163 int type) internal_function;
164static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
165 int str_idx,
166 re_node_set *cur_nodes,
167 re_node_set *next_nodes)
168 internal_function;
169static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
170 re_node_set *cur_nodes,
171 int ex_subexp, int type)
172 internal_function;
173static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
174 re_node_set *dst_nodes,
175 int target, int ex_subexp,
176 int type) internal_function;
177static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
178 re_node_set *cur_nodes, int cur_str,
179 int subexp_num, int type)
180 internal_function;
181static int build_trtable (const re_dfa_t *dfa,
182 re_dfastate_t *state) internal_function;
183#ifdef RE_ENABLE_I18N
184static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
185 const re_string_t *input, int idx)
186 internal_function;
187# ifdef _LIBC
188static unsigned int find_collation_sequence_value (const unsigned char *mbs,
189 size_t name_len)
190 internal_function;
191# endif /* _LIBC */
192#endif /* RE_ENABLE_I18N */
193static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
194 const re_dfastate_t *state,
195 re_node_set *states_node,
196 bitset_t *states_ch) internal_function;
197static int check_node_accept (const re_match_context_t *mctx,
198 const re_token_t *node, int idx)
199 internal_function;
200static reg_errcode_t extend_buffers (re_match_context_t *mctx)
201 internal_function;
202
203/* Entry point for POSIX code. */
204
205/* regexec searches for a given pattern, specified by PREG, in the
206 string STRING.
207
208 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
209 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
210 least NMATCH elements, and we set them to the offsets of the
211 corresponding matched substrings.
212
213 EFLAGS specifies `execution flags' which affect matching: if
214 REG_NOTBOL is set, then ^ does not match at the beginning of the
215 string; if REG_NOTEOL is set, then $ does not match at the end.
216
217 We return 0 if we find a match and REG_NOMATCH if not. */
218
219int
220regexec (
221 const regex_t *__restrict preg,
222 const char *__restrict string,
223 size_t nmatch,
224 regmatch_t pmatch[],
225 int eflags)
226{
227 reg_errcode_t err;
228 int start, length;
229
230 if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
231 return REG_BADPAT;
232
233 if (eflags & REG_STARTEND)
234 {
235 start = pmatch[0].rm_so;
236 length = pmatch[0].rm_eo;
237 }
238 else
239 {
240 start = 0;
241 length = strlen (string);
242 }
243
244 __libc_lock_lock (dfa->lock);
245 if (preg->no_sub)
246 err = re_search_internal (preg, string, length, start, length - start,
247 length, 0, NULL, eflags);
248 else
249 err = re_search_internal (preg, string, length, start, length - start,
250 length, nmatch, pmatch, eflags);
251 __libc_lock_unlock (dfa->lock);
252 return err != REG_NOERROR;
253}
254
255#ifdef _LIBC
256# include <shlib-compat.h>
257versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
258
259# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
260__typeof__ (__regexec) __compat_regexec;
261
262int
263attribute_compat_text_section
264__compat_regexec (const regex_t *__restrict preg,
265 const char *__restrict string, size_t nmatch,
266 regmatch_t pmatch[], int eflags)
267{
268 return regexec (preg, string, nmatch, pmatch,
269 eflags & (REG_NOTBOL | REG_NOTEOL));
270}
271compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
272# endif
273#endif
274
275/* Entry points for GNU code. */
276
277/* re_match, re_search, re_match_2, re_search_2
278
279 The former two functions operate on STRING with length LENGTH,
280 while the later two operate on concatenation of STRING1 and STRING2
281 with lengths LENGTH1 and LENGTH2, respectively.
282
283 re_match() matches the compiled pattern in BUFP against the string,
284 starting at index START.
285
286 re_search() first tries matching at index START, then it tries to match
287 starting from index START + 1, and so on. The last start position tried
288 is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
289 way as re_match().)
290
291 The parameter STOP of re_{match,search}_2 specifies that no match exceeding
292 the first STOP characters of the concatenation of the strings should be
293 concerned.
294
295 If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
296 and all groups is stroed in REGS. (For the "_2" variants, the offsets are
297 computed relative to the concatenation, not relative to the individual
298 strings.)
299
300 On success, re_match* functions return the length of the match, re_search*
301 return the position of the start of the match. Return value -1 means no
302 match was found and -2 indicates an internal error. */
303
304int
305re_match (struct re_pattern_buffer *bufp,
306 const char *string,
307 int length,
308 int start,
309 struct re_registers *regs)
310{
311 return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
312}
313#ifdef _LIBC
314weak_alias (__re_match, re_match)
315#endif
316
317int
318re_search (struct re_pattern_buffer *bufp,
319 const char *string,
320 int length, int start, int range,
321 struct re_registers *regs)
322{
323 return re_search_stub (bufp, string, length, start, range, length, regs, 0);
324}
325#ifdef _LIBC
326weak_alias (__re_search, re_search)
327#endif
328
329int
330re_match_2 (struct re_pattern_buffer *bufp,
331 const char *string1, int length1,
332 const char *string2, int length2, int start,
333 struct re_registers *regs, int stop)
334{
335 return re_search_2_stub (bufp, string1, length1, string2, length2,
336 start, 0, regs, stop, 1);
337}
338#ifdef _LIBC
339weak_alias (__re_match_2, re_match_2)
340#endif
341
342int
343re_search_2 (struct re_pattern_buffer *bufp,
344 const char *string1, int length1,
345 const char *string2, int length2, int start,
346 int range, struct re_registers *regs, int stop)
347{
348 return re_search_2_stub (bufp, string1, length1, string2, length2,
349 start, range, regs, stop, 0);
350}
351#ifdef _LIBC
352weak_alias (__re_search_2, re_search_2)
353#endif
354
355static int
356re_search_2_stub (struct re_pattern_buffer *bufp,
357 const char *string1, int length1,
358 const char *string2, int length2, int start,
359 int range, struct re_registers *regs,
360 int stop, int ret_len)
361{
362 const char *str;
363 int rval;
364 int len = length1 + length2;
365 int free_str = 0;
366
367 if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
368 return -2;
369
370 /* Concatenate the strings. */
371 if (length2 > 0)
372 if (length1 > 0)
373 {
374 char *s = re_malloc (char, len);
375
376 if (BE (s == NULL, 0))
377 return -2;
378 memcpy (s, string1, length1);
379 memcpy (s + length1, string2, length2);
380 str = s;
381 free_str = 1;
382 }
383 else
384 str = string2;
385 else
386 str = string1;
387
388 rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len);
389 if (free_str)
390 re_free ((char *) str);
391 return rval;
392}
393
394/* The parameters have the same meaning as those of re_search.
395 Additional parameters:
396 If RET_LEN is nonzero the length of the match is returned (re_match style);
397 otherwise the position of the match is returned. */
398
399static int
400re_search_stub (struct re_pattern_buffer *bufp,
401 const char *string, int length, int start,
402 int range, int stop,
403 struct re_registers *regs, int ret_len)
404{
405 reg_errcode_t result;
406 regmatch_t *pmatch;
407 int nregs, rval;
408 int eflags = 0;
409
410 /* Check for out-of-range. */
411 if (BE (start < 0 || start > length, 0))
412 return -1;
413 if (BE (start + range > length, 0))
414 range = length - start;
415 else if (BE (start + range < 0, 0))
416 range = -start;
417
418 __libc_lock_lock (dfa->lock);
419
420 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
421 eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
422
423 /* Compile fastmap if we haven't yet. */
424 if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
425 re_compile_fastmap (bufp);
426
427 if (BE (bufp->no_sub, 0))
428 regs = NULL;
429
430 /* We need at least 1 register. */
431 if (regs == NULL)
432 nregs = 1;
433 else if (BE (bufp->regs_allocated == REGS_FIXED &&
434 regs->num_regs < bufp->re_nsub + 1, 0))
435 {
436 nregs = regs->num_regs;
437 if (BE (nregs < 1, 0))
438 {
439 /* Nothing can be copied to regs. */
440 regs = NULL;
441 nregs = 1;
442 }
443 }
444 else
445 nregs = bufp->re_nsub + 1;
446 pmatch = re_malloc (regmatch_t, nregs);
447 if (BE (pmatch == NULL, 0))
448 {
449 rval = -2;
450 goto out;
451 }
452
453 result = re_search_internal (bufp, string, length, start, range, stop,
454 nregs, pmatch, eflags);
455
456 rval = 0;
457
458 /* I hope we needn't fill their regs with -1's when no match was found. */
459 if (result != REG_NOERROR)
460 rval = -1;
461 else if (regs != NULL)
462 {
463 /* If caller wants register contents data back, copy them. */
464 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
465 bufp->regs_allocated);
466 if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
467 rval = -2;
468 }
469
470 if (BE (rval == 0, 1))
471 {
472 if (ret_len)
473 {
474 assert (pmatch[0].rm_so == start);
475 rval = pmatch[0].rm_eo - start;
476 }
477 else
478 rval = pmatch[0].rm_so;
479 }
480 re_free (pmatch);
481 out:
482 __libc_lock_unlock (dfa->lock);
483 return rval;
484}
485
486static unsigned
487re_copy_regs (struct re_registers *regs,
488 regmatch_t *pmatch,
489 int nregs, int regs_allocated)
490{
491 int rval = REGS_REALLOCATE;
492 int i;
493 int need_regs = nregs + 1;
494 /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
495 uses. */
496
497 /* Have the register data arrays been allocated? */
498 if (regs_allocated == REGS_UNALLOCATED)
499 { /* No. So allocate them with malloc. */
500 regs->start = re_malloc (regoff_t, need_regs);
501 if (BE (regs->start == NULL, 0))
502 return REGS_UNALLOCATED;
503 regs->end = re_malloc (regoff_t, need_regs);
504 if (BE (regs->end == NULL, 0))
505 {
506 re_free (regs->start);
507 return REGS_UNALLOCATED;
508 }
509 regs->num_regs = need_regs;
510 }
511 else if (regs_allocated == REGS_REALLOCATE)
512 { /* Yes. If we need more elements than were already
513 allocated, reallocate them. If we need fewer, just
514 leave it alone. */
515 if (BE (need_regs > regs->num_regs, 0))
516 {
517 regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
518 regoff_t *new_end;
519 if (BE (new_start == NULL, 0))
520 return REGS_UNALLOCATED;
521 new_end = re_realloc (regs->end, regoff_t, need_regs);
522 if (BE (new_end == NULL, 0))
523 {
524 re_free (new_start);
525 return REGS_UNALLOCATED;
526 }
527 regs->start = new_start;
528 regs->end = new_end;
529 regs->num_regs = need_regs;
530 }
531 }
532 else
533 {
534 assert (regs_allocated == REGS_FIXED);
535 /* This function may not be called with REGS_FIXED and nregs too big. */
536 assert (regs->num_regs >= nregs);
537 rval = REGS_FIXED;
538 }
539
540 /* Copy the regs. */
541 for (i = 0; i < nregs; ++i)
542 {
543 regs->start[i] = pmatch[i].rm_so;
544 regs->end[i] = pmatch[i].rm_eo;
545 }
546 for ( ; i < regs->num_regs; ++i)
547 regs->start[i] = regs->end[i] = -1;
548
549 return rval;
550}
551
552/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
553 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
554 this memory for recording register information. STARTS and ENDS
555 must be allocated using the malloc library routine, and must each
556 be at least NUM_REGS * sizeof (regoff_t) bytes long.
557
558 If NUM_REGS == 0, then subsequent matches should allocate their own
559 register data.
560
561 Unless this function is called, the first search or match using
562 PATTERN_BUFFER will allocate its own register data, without
563 freeing the old data. */
564
565void
566re_set_registers (struct re_pattern_buffer *bufp,
567 struct re_registers *regs,
568 unsigned num_regs,
569 regoff_t *starts,
570 regoff_t *ends)
571{
572 if (num_regs)
573 {
574 bufp->regs_allocated = REGS_REALLOCATE;
575 regs->num_regs = num_regs;
576 regs->start = starts;
577 regs->end = ends;
578 }
579 else
580 {
581 bufp->regs_allocated = REGS_UNALLOCATED;
582 regs->num_regs = 0;
583 regs->start = regs->end = (regoff_t *) 0;
584 }
585}
586#ifdef _LIBC
587weak_alias (__re_set_registers, re_set_registers)
588#endif
589
590/* Entry points compatible with 4.2 BSD regex library. We don't define
591 them unless specifically requested. */
592
593#if defined _REGEX_RE_COMP || defined _LIBC
594int
595# ifdef _LIBC
596weak_function
597# endif
598re_exec (s)
599 const char *s;
600{
601 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
602}
603#endif /* _REGEX_RE_COMP */
604
605/* Internal entry point. */
606
607/* Searches for a compiled pattern PREG in the string STRING, whose
608 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
609 mingings with regexec. START, and RANGE have the same meanings
610 with re_search.
611 Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
612 otherwise return the error code.
613 Note: We assume front end functions already check ranges.
614 (START + RANGE >= 0 && START + RANGE <= LENGTH) */
615
616static reg_errcode_t
617re_search_internal (const regex_t *preg,
618 const char *string,
619 int length, int start, int range, int stop,
620 size_t nmatch, regmatch_t pmatch[],
621 int eflags)
622{
623 reg_errcode_t err;
624 const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
625 int left_lim, right_lim, incr;
626 int fl_longest_match, match_first, match_kind, match_last = -1;
627 int extra_nmatch;
628 int sb, ch;
629#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
630 re_match_context_t mctx = { .dfa = dfa };
631#else
632 re_match_context_t mctx;
633#endif
634 char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
635 && range && !preg->can_be_null) ? preg->fastmap : NULL;
636 RE_TRANSLATE_TYPE t = preg->translate;
637
638#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
639 memset (&mctx, '\0', sizeof (re_match_context_t));
640 mctx.dfa = dfa;
641#endif
642
643 extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
644 nmatch -= extra_nmatch;
645
646 /* Check if the DFA haven't been compiled. */
647 if (BE (preg->used == 0 || dfa->init_state == NULL
648 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
649 || dfa->init_state_begbuf == NULL, 0))
650 return REG_NOMATCH;
651
652#ifdef DEBUG
653 /* We assume front-end functions already check them. */
654 assert (start + range >= 0 && start + range <= length);
655#endif
656
657 /* If initial states with non-begbuf contexts have no elements,
658 the regex must be anchored. If preg->newline_anchor is set,
659 we'll never use init_state_nl, so do not check it. */
660 if (dfa->init_state->nodes.nelem == 0
661 && dfa->init_state_word->nodes.nelem == 0
662 && (dfa->init_state_nl->nodes.nelem == 0
663 || !preg->newline_anchor))
664 {
665 if (start != 0 && start + range != 0)
666 return REG_NOMATCH;
667 start = range = 0;
668 }
669
670 /* We must check the longest matching, if nmatch > 0. */
671 fl_longest_match = (nmatch != 0 || dfa->nbackref);
672
673 err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
674 preg->translate, preg->syntax & RE_ICASE, dfa);
675 if (BE (err != REG_NOERROR, 0))
676 goto free_return;
677 mctx.input.stop = stop;
678 mctx.input.raw_stop = stop;
679 mctx.input.newline_anchor = preg->newline_anchor;
680
681 err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
682 if (BE (err != REG_NOERROR, 0))
683 goto free_return;
684
685 /* We will log all the DFA states through which the dfa pass,
686 if nmatch > 1, or this dfa has "multibyte node", which is a
687 back-reference or a node which can accept multibyte character or
688 multi character collating element. */
689 if (nmatch > 1 || dfa->has_mb_node)
690 {
691 /* Avoid overflow. */
692 if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
693 {
694 err = REG_ESPACE;
695 goto free_return;
696 }
697
698 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
699 if (BE (mctx.state_log == NULL, 0))
700 {
701 err = REG_ESPACE;
702 goto free_return;
703 }
704 }
705 else
706 mctx.state_log = NULL;
707
708 match_first = start;
709 mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
710 : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
711
712 /* Check incrementally whether of not the input string match. */
713 incr = (range < 0) ? -1 : 1;
714 left_lim = (range < 0) ? start + range : start;
715 right_lim = (range < 0) ? start : start + range;
716 sb = dfa->mb_cur_max == 1;
717 match_kind =
718 (fastmap
719 ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
720 | (range >= 0 ? 2 : 0)
721 | (t != NULL ? 1 : 0))
722 : 8);
723
724 for (;; match_first += incr)
725 {
726 err = REG_NOMATCH;
727 if (match_first < left_lim || right_lim < match_first)
728 goto free_return;
729
730 /* Advance as rapidly as possible through the string, until we
731 find a plausible place to start matching. This may be done
732 with varying efficiency, so there are various possibilities:
733 only the most common of them are specialized, in order to
734 save on code size. We use a switch statement for speed. */
735 switch (match_kind)
736 {
737 case 8:
738 /* No fastmap. */
739 break;
740
741 case 7:
742 /* Fastmap with single-byte translation, match forward. */
743 while (BE (match_first < right_lim, 1)
744 && !fastmap[t[(unsigned char) string[match_first]]])
745 ++match_first;
746 goto forward_match_found_start_or_reached_end;
747
748 case 6:
749 /* Fastmap without translation, match forward. */
750 while (BE (match_first < right_lim, 1)
751 && !fastmap[(unsigned char) string[match_first]])
752 ++match_first;
753
754 forward_match_found_start_or_reached_end:
755 if (BE (match_first == right_lim, 0))
756 {
757 ch = match_first >= length
758 ? 0 : (unsigned char) string[match_first];
759 if (!fastmap[t ? t[ch] : ch])
760 goto free_return;
761 }
762 break;
763
764 case 4:
765 case 5:
766 /* Fastmap without multi-byte translation, match backwards. */
767 while (match_first >= left_lim)
768 {
769 ch = match_first >= length
770 ? 0 : (unsigned char) string[match_first];
771 if (fastmap[t ? t[ch] : ch])
772 break;
773 --match_first;
774 }
775 if (match_first < left_lim)
776 goto free_return;
777 break;
778
779 default:
780 /* In this case, we can't determine easily the current byte,
781 since it might be a component byte of a multibyte
782 character. Then we use the constructed buffer instead. */
783 for (;;)
784 {
785 /* If MATCH_FIRST is out of the valid range, reconstruct the
786 buffers. */
787 unsigned int offset = match_first - mctx.input.raw_mbs_idx;
788 if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
789 {
790 err = re_string_reconstruct (&mctx.input, match_first,
791 eflags);
792 if (BE (err != REG_NOERROR, 0))
793 goto free_return;
794
795 offset = match_first - mctx.input.raw_mbs_idx;
796 }
797 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
798 Note that MATCH_FIRST must not be smaller than 0. */
799 ch = (match_first >= length
800 ? 0 : re_string_byte_at (&mctx.input, offset));
801 if (fastmap[ch])
802 break;
803 match_first += incr;
804 if (match_first < left_lim || match_first > right_lim)
805 {
806 err = REG_NOMATCH;
807 goto free_return;
808 }
809 }
810 break;
811 }
812
813 /* Reconstruct the buffers so that the matcher can assume that
814 the matching starts from the beginning of the buffer. */
815 err = re_string_reconstruct (&mctx.input, match_first, eflags);
816 if (BE (err != REG_NOERROR, 0))
817 goto free_return;
818
819#ifdef RE_ENABLE_I18N
820 /* Don't consider this char as a possible match start if it part,
821 yet isn't the head, of a multibyte character. */
822 if (!sb && !re_string_first_byte (&mctx.input, 0))
823 continue;
824#endif
825
826 /* It seems to be appropriate one, then use the matcher. */
827 /* We assume that the matching starts from 0. */
828 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
829 match_last = check_matching (&mctx, fl_longest_match,
830 range >= 0 ? &match_first : NULL);
831 if (match_last != -1)
832 {
833 if (BE (match_last == -2, 0))
834 {
835 err = REG_ESPACE;
836 goto free_return;
837 }
838 else
839 {
840 mctx.match_last = match_last;
841 if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
842 {
843 re_dfastate_t *pstate = mctx.state_log[match_last];
844 mctx.last_node = check_halt_state_context (&mctx, pstate,
845 match_last);
846 }
847 if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
848 || dfa->nbackref)
849 {
850 err = prune_impossible_nodes (&mctx);
851 if (err == REG_NOERROR)
852 break;
853 if (BE (err != REG_NOMATCH, 0))
854 goto free_return;
855 match_last = -1;
856 }
857 else
858 break; /* We found a match. */
859 }
860 }
861
862 match_ctx_clean (&mctx);
863 }
864
865#ifdef DEBUG
866 assert (match_last != -1);
867 assert (err == REG_NOERROR);
868#endif
869
870 /* Set pmatch[] if we need. */
871 if (nmatch > 0)
872 {
873 int reg_idx;
874
875 /* Initialize registers. */
876 for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
877 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
878
879 /* Set the points where matching start/end. */
880 pmatch[0].rm_so = 0;
881 pmatch[0].rm_eo = mctx.match_last;
882
883 if (!preg->no_sub && nmatch > 1)
884 {
885 err = set_regs (preg, &mctx, nmatch, pmatch,
886 dfa->has_plural_match && dfa->nbackref > 0);
887 if (BE (err != REG_NOERROR, 0))
888 goto free_return;
889 }
890
891 /* At last, add the offset to the each registers, since we slided
892 the buffers so that we could assume that the matching starts
893 from 0. */
894 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
895 if (pmatch[reg_idx].rm_so != -1)
896 {
897#ifdef RE_ENABLE_I18N
898 if (BE (mctx.input.offsets_needed != 0, 0))
899 {
900 pmatch[reg_idx].rm_so =
901 (pmatch[reg_idx].rm_so == mctx.input.valid_len
902 ? mctx.input.valid_raw_len
903 : mctx.input.offsets[pmatch[reg_idx].rm_so]);
904 pmatch[reg_idx].rm_eo =
905 (pmatch[reg_idx].rm_eo == mctx.input.valid_len
906 ? mctx.input.valid_raw_len
907 : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
908 }
909#else
910 assert (mctx.input.offsets_needed == 0);
911#endif
912 pmatch[reg_idx].rm_so += match_first;
913 pmatch[reg_idx].rm_eo += match_first;
914 }
915 for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
916 {
917 pmatch[nmatch + reg_idx].rm_so = -1;
918 pmatch[nmatch + reg_idx].rm_eo = -1;
919 }
920
921 if (dfa->subexp_map)
922 for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
923 if (dfa->subexp_map[reg_idx] != reg_idx)
924 {
925 pmatch[reg_idx + 1].rm_so
926 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
927 pmatch[reg_idx + 1].rm_eo
928 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
929 }
930 }
931
932 free_return:
933 re_free (mctx.state_log);
934 if (dfa->nbackref)
935 match_ctx_free (&mctx);
936 re_string_destruct (&mctx.input);
937 return err;
938}
939
940static reg_errcode_t
941prune_impossible_nodes (re_match_context_t *mctx)
942{
943 const re_dfa_t *const dfa = mctx->dfa;
944 int halt_node, match_last;
945 reg_errcode_t ret;
946 re_dfastate_t **sifted_states;
947 re_dfastate_t **lim_states = NULL;
948 re_sift_context_t sctx;
949#ifdef DEBUG
950 assert (mctx->state_log != NULL);
951#endif
952 match_last = mctx->match_last;
953 halt_node = mctx->last_node;
954
955 /* Avoid overflow. */
956 if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
957 return REG_ESPACE;
958
959 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
960 if (BE (sifted_states == NULL, 0))
961 {
962 ret = REG_ESPACE;
963 goto free_return;
964 }
965 if (dfa->nbackref)
966 {
967 lim_states = re_malloc (re_dfastate_t *, match_last + 1);
968 if (BE (lim_states == NULL, 0))
969 {
970 ret = REG_ESPACE;
971 goto free_return;
972 }
973 while (1)
974 {
975 memset (lim_states, '\0',
976 sizeof (re_dfastate_t *) * (match_last + 1));
977 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
978 match_last);
979 ret = sift_states_backward (mctx, &sctx);
980 re_node_set_free (&sctx.limits);
981 if (BE (ret != REG_NOERROR, 0))
982 goto free_return;
983 if (sifted_states[0] != NULL || lim_states[0] != NULL)
984 break;
985 do
986 {
987 --match_last;
988 if (match_last < 0)
989 {
990 ret = REG_NOMATCH;
991 goto free_return;
992 }
993 } while (mctx->state_log[match_last] == NULL
994 || !mctx->state_log[match_last]->halt);
995 halt_node = check_halt_state_context (mctx,
996 mctx->state_log[match_last],
997 match_last);
998 }
999 ret = merge_state_array (dfa, sifted_states, lim_states,
1000 match_last + 1);
1001 re_free (lim_states);
1002 lim_states = NULL;
1003 if (BE (ret != REG_NOERROR, 0))
1004 goto free_return;
1005 }
1006 else
1007 {
1008 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1009 ret = sift_states_backward (mctx, &sctx);
1010 re_node_set_free (&sctx.limits);
1011 if (BE (ret != REG_NOERROR, 0))
1012 goto free_return;
1013 if (sifted_states[0] == NULL)
1014 {
1015 ret = REG_NOMATCH;
1016 goto free_return;
1017 }
1018 }
1019 re_free (mctx->state_log);
1020 mctx->state_log = sifted_states;
1021 sifted_states = NULL;
1022 mctx->last_node = halt_node;
1023 mctx->match_last = match_last;
1024 ret = REG_NOERROR;
1025 free_return:
1026 re_free (sifted_states);
1027 re_free (lim_states);
1028 return ret;
1029}
1030
1031/* Acquire an initial state and return it.
1032 We must select appropriate initial state depending on the context,
1033 since initial states may have constraints like "\<", "^", etc.. */
1034
1035static inline re_dfastate_t *
1036__attribute ((always_inline)) internal_function
1037acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1038 int idx)
1039{
1040 const re_dfa_t *const dfa = mctx->dfa;
1041 if (dfa->init_state->has_constraint)
1042 {
1043 unsigned int context;
1044 context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1045 if (IS_WORD_CONTEXT (context))
1046 return dfa->init_state_word;
1047 else if (IS_ORDINARY_CONTEXT (context))
1048 return dfa->init_state;
1049 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1050 return dfa->init_state_begbuf;
1051 else if (IS_NEWLINE_CONTEXT (context))
1052 return dfa->init_state_nl;
1053 else if (IS_BEGBUF_CONTEXT (context))
1054 {
1055 /* It is relatively rare case, then calculate on demand. */
1056 return re_acquire_state_context (err, dfa,
1057 dfa->init_state->entrance_nodes,
1058 context);
1059 }
1060 else
1061 /* Must not happen? */
1062 return dfa->init_state;
1063 }
1064 else
1065 return dfa->init_state;
1066}
1067
1068/* Check whether the regular expression match input string INPUT or not,
1069 and return the index where the matching end, return -1 if not match,
1070 or return -2 in case of an error.
1071 FL_LONGEST_MATCH means we want the POSIX longest matching.
1072 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1073 next place where we may want to try matching.
1074 Note that the matcher assume that the matching starts from the current
1075 index of the buffer. */
1076
1077static int
1078internal_function
1079check_matching (re_match_context_t *mctx, int fl_longest_match,
1080 int *p_match_first)
1081{
1082 const re_dfa_t *const dfa = mctx->dfa;
1083 reg_errcode_t err;
1084 int match = 0;
1085 int match_last = -1;
1086 int cur_str_idx = re_string_cur_idx (&mctx->input);
1087 re_dfastate_t *cur_state;
1088 int at_init_state = p_match_first != NULL;
1089 int next_start_idx = cur_str_idx;
1090
1091 err = REG_NOERROR;
1092 cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1093 /* An initial state must not be NULL (invalid). */
1094 if (BE (cur_state == NULL, 0))
1095 {
1096 assert (err == REG_ESPACE);
1097 return -2;
1098 }
1099
1100 if (mctx->state_log != NULL)
1101 {
1102 mctx->state_log[cur_str_idx] = cur_state;
1103
1104 /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1105 later. E.g. Processing back references. */
1106 if (BE (dfa->nbackref, 0))
1107 {
1108 at_init_state = 0;
1109 err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1110 if (BE (err != REG_NOERROR, 0))
1111 return err;
1112
1113 if (cur_state->has_backref)
1114 {
1115 err = transit_state_bkref (mctx, &cur_state->nodes);
1116 if (BE (err != REG_NOERROR, 0))
1117 return err;
1118 }
1119 }
1120 }
1121
1122 /* If the RE accepts NULL string. */
1123 if (BE (cur_state->halt, 0))
1124 {
1125 if (!cur_state->has_constraint
1126 || check_halt_state_context (mctx, cur_state, cur_str_idx))
1127 {
1128 if (!fl_longest_match)
1129 return cur_str_idx;
1130 else
1131 {
1132 match_last = cur_str_idx;
1133 match = 1;
1134 }
1135 }
1136 }
1137
1138 while (!re_string_eoi (&mctx->input))
1139 {
1140 re_dfastate_t *old_state = cur_state;
1141 int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1142
1143 if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1144 || (BE (next_char_idx >= mctx->input.valid_len, 0)
1145 && mctx->input.valid_len < mctx->input.len))
1146 {
1147 err = extend_buffers (mctx);
1148 if (BE (err != REG_NOERROR, 0))
1149 {
1150 assert (err == REG_ESPACE);
1151 return -2;
1152 }
1153 }
1154
1155 cur_state = transit_state (&err, mctx, cur_state);
1156 if (mctx->state_log != NULL)
1157 cur_state = merge_state_with_log (&err, mctx, cur_state);
1158
1159 if (cur_state == NULL)
1160 {
1161 /* Reached the invalid state or an error. Try to recover a valid
1162 state using the state log, if available and if we have not
1163 already found a valid (even if not the longest) match. */
1164 if (BE (err != REG_NOERROR, 0))
1165 return -2;
1166
1167 if (mctx->state_log == NULL
1168 || (match && !fl_longest_match)
1169 || (cur_state = find_recover_state (&err, mctx)) == NULL)
1170 break;
1171 }
1172
1173 if (BE (at_init_state, 0))
1174 {
1175 if (old_state == cur_state)
1176 next_start_idx = next_char_idx;
1177 else
1178 at_init_state = 0;
1179 }
1180
1181 if (cur_state->halt)
1182 {
1183 /* Reached a halt state.
1184 Check the halt state can satisfy the current context. */
1185 if (!cur_state->has_constraint
1186 || check_halt_state_context (mctx, cur_state,
1187 re_string_cur_idx (&mctx->input)))
1188 {
1189 /* We found an appropriate halt state. */
1190 match_last = re_string_cur_idx (&mctx->input);
1191 match = 1;
1192
1193 /* We found a match, do not modify match_first below. */
1194 p_match_first = NULL;
1195 if (!fl_longest_match)
1196 break;
1197 }
1198 }
1199 }
1200
1201 if (p_match_first)
1202 *p_match_first += next_start_idx;
1203
1204 return match_last;
1205}
1206
1207/* Check NODE match the current context. */
1208
1209static int
1210internal_function
1211check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
1212{
1213 re_token_type_t type = dfa->nodes[node].type;
1214 unsigned int constraint = dfa->nodes[node].constraint;
1215 if (type != END_OF_RE)
1216 return 0;
1217 if (!constraint)
1218 return 1;
1219 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1220 return 0;
1221 return 1;
1222}
1223
1224/* Check the halt state STATE match the current context.
1225 Return 0 if not match, if the node, STATE has, is a halt node and
1226 match the context, return the node. */
1227
1228static int
1229internal_function
1230check_halt_state_context (const re_match_context_t *mctx,
1231 const re_dfastate_t *state, int idx)
1232{
1233 int i;
1234 unsigned int context;
1235#ifdef DEBUG
1236 assert (state->halt);
1237#endif
1238 context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1239 for (i = 0; i < state->nodes.nelem; ++i)
1240 if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1241 return state->nodes.elems[i];
1242 return 0;
1243}
1244
1245/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1246 corresponding to the DFA).
1247 Return the destination node, and update EPS_VIA_NODES, return -1 in case
1248 of errors. */
1249
1250static int
1251internal_function
1252proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
1253 int *pidx, int node, re_node_set *eps_via_nodes,
1254 struct re_fail_stack_t *fs)
1255{
1256 const re_dfa_t *const dfa = mctx->dfa;
1257 int i, err;
1258 if (IS_EPSILON_NODE (dfa->nodes[node].type))
1259 {
1260 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1261 re_node_set *edests = &dfa->edests[node];
1262 int dest_node;
1263 err = re_node_set_insert (eps_via_nodes, node);
1264 if (BE (err < 0, 0))
1265 return -2;
1266 /* Pick up a valid destination, or return -1 if none is found. */
1267 for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1268 {
1269 int candidate = edests->elems[i];
1270 if (!re_node_set_contains (cur_nodes, candidate))
1271 continue;
1272 if (dest_node == -1)
1273 dest_node = candidate;
1274
1275 else
1276 {
1277 /* In order to avoid infinite loop like "(a*)*", return the second
1278 epsilon-transition if the first was already considered. */
1279 if (re_node_set_contains (eps_via_nodes, dest_node))
1280 return candidate;
1281
1282 /* Otherwise, push the second epsilon-transition on the fail stack. */
1283 else if (fs != NULL
1284 && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1285 eps_via_nodes))
1286 return -2;
1287
1288 /* We know we are going to exit. */
1289 break;
1290 }
1291 }
1292 return dest_node;
1293 }
1294 else
1295 {
1296 int naccepted = 0;
1297 re_token_type_t type = dfa->nodes[node].type;
1298
1299#ifdef RE_ENABLE_I18N
1300 if (dfa->nodes[node].accept_mb)
1301 naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1302 else
1303#endif /* RE_ENABLE_I18N */
1304 if (type == OP_BACK_REF)
1305 {
1306 int subexp_idx = dfa->nodes[node].opr.idx + 1;
1307 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1308 if (fs != NULL)
1309 {
1310 if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1311 return -1;
1312 else if (naccepted)
1313 {
1314 char *buf = (char *) re_string_get_buffer (&mctx->input);
1315 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1316 naccepted) != 0)
1317 return -1;
1318 }
1319 }
1320
1321 if (naccepted == 0)
1322 {
1323 int dest_node;
1324 err = re_node_set_insert (eps_via_nodes, node);
1325 if (BE (err < 0, 0))
1326 return -2;
1327 dest_node = dfa->edests[node].elems[0];
1328 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1329 dest_node))
1330 return dest_node;
1331 }
1332 }
1333
1334 if (naccepted != 0
1335 || check_node_accept (mctx, dfa->nodes + node, *pidx))
1336 {
1337 int dest_node = dfa->nexts[node];
1338 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1339 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1340 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1341 dest_node)))
1342 return -1;
1343 re_node_set_empty (eps_via_nodes);
1344 return dest_node;
1345 }
1346 }
1347 return -1;
1348}
1349
1350static reg_errcode_t
1351internal_function
1352push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
1353 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1354{
1355 reg_errcode_t err;
1356 int num = fs->num++;
1357 if (fs->num == fs->alloc)
1358 {
1359 struct re_fail_stack_ent_t *new_array;
1360 new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1361 * fs->alloc * 2));
1362 if (new_array == NULL)
1363 return REG_ESPACE;
1364 fs->alloc *= 2;
1365 fs->stack = new_array;
1366 }
1367 fs->stack[num].idx = str_idx;
1368 fs->stack[num].node = dest_node;
1369 fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1370 if (fs->stack[num].regs == NULL)
1371 return REG_ESPACE;
1372 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1373 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1374 return err;
1375}
1376
1377static int
1378internal_function
1379pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
1380 regmatch_t *regs, re_node_set *eps_via_nodes)
1381{
1382 int num = --fs->num;
1383 assert (num >= 0);
1384 *pidx = fs->stack[num].idx;
1385 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1386 re_node_set_free (eps_via_nodes);
1387 re_free (fs->stack[num].regs);
1388 *eps_via_nodes = fs->stack[num].eps_via_nodes;
1389 return fs->stack[num].node;
1390}
1391
1392/* Set the positions where the subexpressions are starts/ends to registers
1393 PMATCH.
1394 Note: We assume that pmatch[0] is already set, and
1395 pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
1396
1397static reg_errcode_t
1398internal_function
1399set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1400 regmatch_t *pmatch, int fl_backtrack)
1401{
1402 const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
1403 int idx, cur_node;
1404 re_node_set eps_via_nodes;
1405 struct re_fail_stack_t *fs;
1406 struct re_fail_stack_t fs_body = { 0, 2, NULL };
1407 regmatch_t *prev_idx_match;
1408 int prev_idx_match_malloced = 0;
1409
1410#ifdef DEBUG
1411 assert (nmatch > 1);
1412 assert (mctx->state_log != NULL);
1413#endif
1414 if (fl_backtrack)
1415 {
1416 fs = &fs_body;
1417 fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1418 if (fs->stack == NULL)
1419 return REG_ESPACE;
1420 }
1421 else
1422 fs = NULL;
1423
1424 cur_node = dfa->init_node;
1425 re_node_set_init_empty (&eps_via_nodes);
1426
1427#ifdef HAVE_ALLOCA
1428 if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1429 prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1430 else
1431#endif
1432 {
1433 prev_idx_match = re_malloc (regmatch_t, nmatch);
1434 if (prev_idx_match == NULL)
1435 {
1436 free_fail_stack_return (fs);
1437 return REG_ESPACE;
1438 }
1439 prev_idx_match_malloced = 1;
1440 }
1441 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1442
1443 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1444 {
1445 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1446
1447 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1448 {
1449 int reg_idx;
1450 if (fs)
1451 {
1452 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1453 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1454 break;
1455 if (reg_idx == nmatch)
1456 {
1457 re_node_set_free (&eps_via_nodes);
1458 if (prev_idx_match_malloced)
1459 re_free (prev_idx_match);
1460 return free_fail_stack_return (fs);
1461 }
1462 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1463 &eps_via_nodes);
1464 }
1465 else
1466 {
1467 re_node_set_free (&eps_via_nodes);
1468 if (prev_idx_match_malloced)
1469 re_free (prev_idx_match);
1470 return REG_NOERROR;
1471 }
1472 }
1473
1474 /* Proceed to next node. */
1475 cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1476 &eps_via_nodes, fs);
1477
1478 if (BE (cur_node < 0, 0))
1479 {
1480 if (BE (cur_node == -2, 0))
1481 {
1482 re_node_set_free (&eps_via_nodes);
1483 if (prev_idx_match_malloced)
1484 re_free (prev_idx_match);
1485 free_fail_stack_return (fs);
1486 return REG_ESPACE;
1487 }
1488 if (fs)
1489 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1490 &eps_via_nodes);
1491 else
1492 {
1493 re_node_set_free (&eps_via_nodes);
1494 if (prev_idx_match_malloced)
1495 re_free (prev_idx_match);
1496 return REG_NOMATCH;
1497 }
1498 }
1499 }
1500 re_node_set_free (&eps_via_nodes);
1501 if (prev_idx_match_malloced)
1502 re_free (prev_idx_match);
1503 return free_fail_stack_return (fs);
1504}
1505
1506static reg_errcode_t
1507internal_function
1508free_fail_stack_return (struct re_fail_stack_t *fs)
1509{
1510 if (fs)
1511 {
1512 int fs_idx;
1513 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1514 {
1515 re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1516 re_free (fs->stack[fs_idx].regs);
1517 }
1518 re_free (fs->stack);
1519 }
1520 return REG_NOERROR;
1521}
1522
1523static void
1524internal_function
1525update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1526 regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
1527{
1528 int type = dfa->nodes[cur_node].type;
1529 if (type == OP_OPEN_SUBEXP)
1530 {
1531 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1532
1533 /* We are at the first node of this sub expression. */
1534 if (reg_num < nmatch)
1535 {
1536 pmatch[reg_num].rm_so = cur_idx;
1537 pmatch[reg_num].rm_eo = -1;
1538 }
1539 }
1540 else if (type == OP_CLOSE_SUBEXP)
1541 {
1542 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1543 if (reg_num < nmatch)
1544 {
1545 /* We are at the last node of this sub expression. */
1546 if (pmatch[reg_num].rm_so < cur_idx)
1547 {
1548 pmatch[reg_num].rm_eo = cur_idx;
1549 /* This is a non-empty match or we are not inside an optional
1550 subexpression. Accept this right away. */
1551 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1552 }
1553 else
1554 {
1555 if (dfa->nodes[cur_node].opt_subexp
1556 && prev_idx_match[reg_num].rm_so != -1)
1557 /* We transited through an empty match for an optional
1558 subexpression, like (a?)*, and this is not the subexp's
1559 first match. Copy back the old content of the registers
1560 so that matches of an inner subexpression are undone as
1561 well, like in ((a?))*. */
1562 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1563 else
1564 /* We completed a subexpression, but it may be part of
1565 an optional one, so do not update PREV_IDX_MATCH. */
1566 pmatch[reg_num].rm_eo = cur_idx;
1567 }
1568 }
1569 }
1570}
1571
1572/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1573 and sift the nodes in each states according to the following rules.
1574 Updated state_log will be wrote to STATE_LOG.
1575
1576 Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1577 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1578 If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1579 the LAST_NODE, we throw away the node `a'.
1580 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1581 string `s' and transit to `b':
1582 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1583 away the node `a'.
1584 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1585 thrown away, we throw away the node `a'.
1586 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1587 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1588 node `a'.
1589 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1590 we throw away the node `a'. */
1591
1592#define STATE_NODE_CONTAINS(state,node) \
1593 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1594
1595static reg_errcode_t
1596internal_function
1597sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1598{
1599 reg_errcode_t err;
1600 int null_cnt = 0;
1601 int str_idx = sctx->last_str_idx;
1602 re_node_set cur_dest;
1603
1604#ifdef DEBUG
1605 assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1606#endif
1607
1608 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1609 transit to the last_node and the last_node itself. */
1610 err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1611 if (BE (err != REG_NOERROR, 0))
1612 return err;
1613 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1614 if (BE (err != REG_NOERROR, 0))
1615 goto free_return;
1616
1617 /* Then check each states in the state_log. */
1618 while (str_idx > 0)
1619 {
1620 /* Update counters. */
1621 null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1622 if (null_cnt > mctx->max_mb_elem_len)
1623 {
1624 memset (sctx->sifted_states, '\0',
1625 sizeof (re_dfastate_t *) * str_idx);
1626 re_node_set_free (&cur_dest);
1627 return REG_NOERROR;
1628 }
1629 re_node_set_empty (&cur_dest);
1630 --str_idx;
1631
1632 if (mctx->state_log[str_idx])
1633 {
1634 err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1635 if (BE (err != REG_NOERROR, 0))
1636 goto free_return;
1637 }
1638
1639 /* Add all the nodes which satisfy the following conditions:
1640 - It can epsilon transit to a node in CUR_DEST.
1641 - It is in CUR_SRC.
1642 And update state_log. */
1643 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1644 if (BE (err != REG_NOERROR, 0))
1645 goto free_return;
1646 }
1647 err = REG_NOERROR;
1648 free_return:
1649 re_node_set_free (&cur_dest);
1650 return err;
1651}
1652
1653static reg_errcode_t
1654internal_function
1655build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1656 int str_idx, re_node_set *cur_dest)
1657{
1658 const re_dfa_t *const dfa = mctx->dfa;
1659 const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1660 int i;
1661
1662 /* Then build the next sifted state.
1663 We build the next sifted state on `cur_dest', and update
1664 `sifted_states[str_idx]' with `cur_dest'.
1665 Note:
1666 `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1667 `cur_src' points the node_set of the old `state_log[str_idx]'
1668 (with the epsilon nodes pre-filtered out). */
1669 for (i = 0; i < cur_src->nelem; i++)
1670 {
1671 int prev_node = cur_src->elems[i];
1672 int naccepted = 0;
1673 int ret;
1674
1675#ifdef DEBUG
1676 re_token_type_t type = dfa->nodes[prev_node].type;
1677 assert (!IS_EPSILON_NODE (type));
1678#endif
1679#ifdef RE_ENABLE_I18N
1680 /* If the node may accept `multi byte'. */
1681 if (dfa->nodes[prev_node].accept_mb)
1682 naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1683 str_idx, sctx->last_str_idx);
1684#endif /* RE_ENABLE_I18N */
1685
1686 /* We don't check backreferences here.
1687 See update_cur_sifted_state(). */
1688 if (!naccepted
1689 && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1690 && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1691 dfa->nexts[prev_node]))
1692 naccepted = 1;
1693
1694 if (naccepted == 0)
1695 continue;
1696
1697 if (sctx->limits.nelem)
1698 {
1699 int to_idx = str_idx + naccepted;
1700 if (check_dst_limits (mctx, &sctx->limits,
1701 dfa->nexts[prev_node], to_idx,
1702 prev_node, str_idx))
1703 continue;
1704 }
1705 ret = re_node_set_insert (cur_dest, prev_node);
1706 if (BE (ret == -1, 0))
1707 return REG_ESPACE;
1708 }
1709
1710 return REG_NOERROR;
1711}
1712
1713/* Helper functions. */
1714
1715static reg_errcode_t
1716internal_function
1717clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
1718{
1719 int top = mctx->state_log_top;
1720
1721 if (next_state_log_idx >= mctx->input.bufs_len
1722 || (next_state_log_idx >= mctx->input.valid_len
1723 && mctx->input.valid_len < mctx->input.len))
1724 {
1725 reg_errcode_t err;
1726 err = extend_buffers (mctx);
1727 if (BE (err != REG_NOERROR, 0))
1728 return err;
1729 }
1730
1731 if (top < next_state_log_idx)
1732 {
1733 memset (mctx->state_log + top + 1, '\0',
1734 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1735 mctx->state_log_top = next_state_log_idx;
1736 }
1737 return REG_NOERROR;
1738}
1739
1740static reg_errcode_t
1741internal_function
1742merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1743 re_dfastate_t **src, int num)
1744{
1745 int st_idx;
1746 reg_errcode_t err;
1747 for (st_idx = 0; st_idx < num; ++st_idx)
1748 {
1749 if (dst[st_idx] == NULL)
1750 dst[st_idx] = src[st_idx];
1751 else if (src[st_idx] != NULL)
1752 {
1753 re_node_set merged_set;
1754 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1755 &src[st_idx]->nodes);
1756 if (BE (err != REG_NOERROR, 0))
1757 return err;
1758 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1759 re_node_set_free (&merged_set);
1760 if (BE (err != REG_NOERROR, 0))
1761 return err;
1762 }
1763 }
1764 return REG_NOERROR;
1765}
1766
1767static reg_errcode_t
1768internal_function
1769update_cur_sifted_state (const re_match_context_t *mctx,
1770 re_sift_context_t *sctx, int str_idx,
1771 re_node_set *dest_nodes)
1772{
1773 const re_dfa_t *const dfa = mctx->dfa;
1774 reg_errcode_t err = REG_NOERROR;
1775 const re_node_set *candidates;
1776 candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1777 : &mctx->state_log[str_idx]->nodes);
1778
1779 if (dest_nodes->nelem == 0)
1780 sctx->sifted_states[str_idx] = NULL;
1781 else
1782 {
1783 if (candidates)
1784 {
1785 /* At first, add the nodes which can epsilon transit to a node in
1786 DEST_NODE. */
1787 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1788 if (BE (err != REG_NOERROR, 0))
1789 return err;
1790
1791 /* Then, check the limitations in the current sift_context. */
1792 if (sctx->limits.nelem)
1793 {
1794 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1795 mctx->bkref_ents, str_idx);
1796 if (BE (err != REG_NOERROR, 0))
1797 return err;
1798 }
1799 }
1800
1801 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1802 if (BE (err != REG_NOERROR, 0))
1803 return err;
1804 }
1805
1806 if (candidates && mctx->state_log[str_idx]->has_backref)
1807 {
1808 err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1809 if (BE (err != REG_NOERROR, 0))
1810 return err;
1811 }
1812 return REG_NOERROR;
1813}
1814
1815static reg_errcode_t
1816internal_function
1817add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1818 const re_node_set *candidates)
1819{
1820 reg_errcode_t err = REG_NOERROR;
1821 int i;
1822
1823 re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1824 if (BE (err != REG_NOERROR, 0))
1825 return err;
1826
1827 if (!state->inveclosure.alloc)
1828 {
1829 err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1830 if (BE (err != REG_NOERROR, 0))
1831 return REG_ESPACE;
1832 for (i = 0; i < dest_nodes->nelem; i++)
1833 {
1834 err = re_node_set_merge (&state->inveclosure,
1835 dfa->inveclosures + dest_nodes->elems[i]);
1836 if (BE (err != REG_NOERROR, 0))
1837 return REG_ESPACE;
1838 }
1839 }
1840 return re_node_set_add_intersect (dest_nodes, candidates,
1841 &state->inveclosure);
1842}
1843
1844static reg_errcode_t
1845internal_function
1846sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
1847 const re_node_set *candidates)
1848{
1849 int ecl_idx;
1850 reg_errcode_t err;
1851 re_node_set *inv_eclosure = dfa->inveclosures + node;
1852 re_node_set except_nodes;
1853 re_node_set_init_empty (&except_nodes);
1854 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1855 {
1856 int cur_node = inv_eclosure->elems[ecl_idx];
1857 if (cur_node == node)
1858 continue;
1859 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1860 {
1861 int edst1 = dfa->edests[cur_node].elems[0];
1862 int edst2 = ((dfa->edests[cur_node].nelem > 1)
1863 ? dfa->edests[cur_node].elems[1] : -1);
1864 if ((!re_node_set_contains (inv_eclosure, edst1)
1865 && re_node_set_contains (dest_nodes, edst1))
1866 || (edst2 > 0
1867 && !re_node_set_contains (inv_eclosure, edst2)
1868 && re_node_set_contains (dest_nodes, edst2)))
1869 {
1870 err = re_node_set_add_intersect (&except_nodes, candidates,
1871 dfa->inveclosures + cur_node);
1872 if (BE (err != REG_NOERROR, 0))
1873 {
1874 re_node_set_free (&except_nodes);
1875 return err;
1876 }
1877 }
1878 }
1879 }
1880 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1881 {
1882 int cur_node = inv_eclosure->elems[ecl_idx];
1883 if (!re_node_set_contains (&except_nodes, cur_node))
1884 {
1885 int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1886 re_node_set_remove_at (dest_nodes, idx);
1887 }
1888 }
1889 re_node_set_free (&except_nodes);
1890 return REG_NOERROR;
1891}
1892
1893static int
1894internal_function
1895check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
1896 int dst_node, int dst_idx, int src_node, int src_idx)
1897{
1898 const re_dfa_t *const dfa = mctx->dfa;
1899 int lim_idx, src_pos, dst_pos;
1900
1901 int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1902 int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1903 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1904 {
1905 int subexp_idx;
1906 struct re_backref_cache_entry *ent;
1907 ent = mctx->bkref_ents + limits->elems[lim_idx];
1908 subexp_idx = dfa->nodes[ent->node].opr.idx;
1909
1910 dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1911 subexp_idx, dst_node, dst_idx,
1912 dst_bkref_idx);
1913 src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1914 subexp_idx, src_node, src_idx,
1915 src_bkref_idx);
1916
1917 /* In case of:
1918 <src> <dst> ( <subexp> )
1919 ( <subexp> ) <src> <dst>
1920 ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
1921 if (src_pos == dst_pos)
1922 continue; /* This is unrelated limitation. */
1923 else
1924 return 1;
1925 }
1926 return 0;
1927}
1928
1929static int
1930internal_function
1931check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1932 int subexp_idx, int from_node, int bkref_idx)
1933{
1934 const re_dfa_t *const dfa = mctx->dfa;
1935 const re_node_set *eclosures = dfa->eclosures + from_node;
1936 int node_idx;
1937
1938 /* Else, we are on the boundary: examine the nodes on the epsilon
1939 closure. */
1940 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1941 {
1942 int node = eclosures->elems[node_idx];
1943 switch (dfa->nodes[node].type)
1944 {
1945 case OP_BACK_REF:
1946 if (bkref_idx != -1)
1947 {
1948 struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1949 do
1950 {
1951 int dst, cpos;
1952
1953 if (ent->node != node)
1954 continue;
1955
1956 if (subexp_idx < BITSET_WORD_BITS
1957 && !(ent->eps_reachable_subexps_map
1958 & ((bitset_word_t) 1 << subexp_idx)))
1959 continue;
1960
1961 /* Recurse trying to reach the OP_OPEN_SUBEXP and
1962 OP_CLOSE_SUBEXP cases below. But, if the
1963 destination node is the same node as the source
1964 node, don't recurse because it would cause an
1965 infinite loop: a regex that exhibits this behavior
1966 is ()\1*\1* */
1967 dst = dfa->edests[node].elems[0];
1968 if (dst == from_node)
1969 {
1970 if (boundaries & 1)
1971 return -1;
1972 else /* if (boundaries & 2) */
1973 return 0;
1974 }
1975
1976 cpos =
1977 check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1978 dst, bkref_idx);
1979 if (cpos == -1 /* && (boundaries & 1) */)
1980 return -1;
1981 if (cpos == 0 && (boundaries & 2))
1982 return 0;
1983
1984 if (subexp_idx < BITSET_WORD_BITS)
1985 ent->eps_reachable_subexps_map
1986 &= ~((bitset_word_t) 1 << subexp_idx);
1987 }
1988 while (ent++->more);
1989 }
1990 break;
1991
1992 case OP_OPEN_SUBEXP:
1993 if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1994 return -1;
1995 break;
1996
1997 case OP_CLOSE_SUBEXP:
1998 if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1999 return 0;
2000 break;
2001
2002 default:
2003 break;
2004 }
2005 }
2006
2007 return (boundaries & 2) ? 1 : 0;
2008}
2009
2010static int
2011internal_function
2012check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
2013 int subexp_idx, int from_node, int str_idx,
2014 int bkref_idx)
2015{
2016 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2017 int boundaries;
2018
2019 /* If we are outside the range of the subexpression, return -1 or 1. */
2020 if (str_idx < lim->subexp_from)
2021 return -1;
2022
2023 if (lim->subexp_to < str_idx)
2024 return 1;
2025
2026 /* If we are within the subexpression, return 0. */
2027 boundaries = (str_idx == lim->subexp_from);
2028 boundaries |= (str_idx == lim->subexp_to) << 1;
2029 if (boundaries == 0)
2030 return 0;
2031
2032 /* Else, examine epsilon closure. */
2033 return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2034 from_node, bkref_idx);
2035}
2036
2037/* Check the limitations of sub expressions LIMITS, and remove the nodes
2038 which are against limitations from DEST_NODES. */
2039
2040static reg_errcode_t
2041internal_function
2042check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2043 const re_node_set *candidates, re_node_set *limits,
2044 struct re_backref_cache_entry *bkref_ents, int str_idx)
2045{
2046 reg_errcode_t err;
2047 int node_idx, lim_idx;
2048
2049 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2050 {
2051 int subexp_idx;
2052 struct re_backref_cache_entry *ent;
2053 ent = bkref_ents + limits->elems[lim_idx];
2054
2055 if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2056 continue; /* This is unrelated limitation. */
2057
2058 subexp_idx = dfa->nodes[ent->node].opr.idx;
2059 if (ent->subexp_to == str_idx)
2060 {
2061 int ops_node = -1;
2062 int cls_node = -1;
2063 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2064 {
2065 int node = dest_nodes->elems[node_idx];
2066 re_token_type_t type = dfa->nodes[node].type;
2067 if (type == OP_OPEN_SUBEXP
2068 && subexp_idx == dfa->nodes[node].opr.idx)
2069 ops_node = node;
2070 else if (type == OP_CLOSE_SUBEXP
2071 && subexp_idx == dfa->nodes[node].opr.idx)
2072 cls_node = node;
2073 }
2074
2075 /* Check the limitation of the open subexpression. */
2076 /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
2077 if (ops_node >= 0)
2078 {
2079 err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2080 candidates);
2081 if (BE (err != REG_NOERROR, 0))
2082 return err;
2083 }
2084
2085 /* Check the limitation of the close subexpression. */
2086 if (cls_node >= 0)
2087 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2088 {
2089 int node = dest_nodes->elems[node_idx];
2090 if (!re_node_set_contains (dfa->inveclosures + node,
2091 cls_node)
2092 && !re_node_set_contains (dfa->eclosures + node,
2093 cls_node))
2094 {
2095 /* It is against this limitation.
2096 Remove it form the current sifted state. */
2097 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2098 candidates);
2099 if (BE (err != REG_NOERROR, 0))
2100 return err;
2101 --node_idx;
2102 }
2103 }
2104 }
2105 else /* (ent->subexp_to != str_idx) */
2106 {
2107 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2108 {
2109 int node = dest_nodes->elems[node_idx];
2110 re_token_type_t type = dfa->nodes[node].type;
2111 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2112 {
2113 if (subexp_idx != dfa->nodes[node].opr.idx)
2114 continue;
2115 /* It is against this limitation.
2116 Remove it form the current sifted state. */
2117 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2118 candidates);
2119 if (BE (err != REG_NOERROR, 0))
2120 return err;
2121 }
2122 }
2123 }
2124 }
2125 return REG_NOERROR;
2126}
2127
2128static reg_errcode_t
2129internal_function
2130sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2131 int str_idx, const re_node_set *candidates)
2132{
2133 const re_dfa_t *const dfa = mctx->dfa;
2134 reg_errcode_t err;
2135 int node_idx, node;
2136 re_sift_context_t local_sctx;
2137 int first_idx = search_cur_bkref_entry (mctx, str_idx);
2138
2139 if (first_idx == -1)
2140 return REG_NOERROR;
2141
2142 local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
2143
2144 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2145 {
2146 int enabled_idx;
2147 re_token_type_t type;
2148 struct re_backref_cache_entry *entry;
2149 node = candidates->elems[node_idx];
2150 type = dfa->nodes[node].type;
2151 /* Avoid infinite loop for the REs like "()\1+". */
2152 if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2153 continue;
2154 if (type != OP_BACK_REF)
2155 continue;
2156
2157 entry = mctx->bkref_ents + first_idx;
2158 enabled_idx = first_idx;
2159 do
2160 {
2161 int subexp_len;
2162 int to_idx;
2163 int dst_node;
2164 int ret;
2165 re_dfastate_t *cur_state;
2166
2167 if (entry->node != node)
2168 continue;
2169 subexp_len = entry->subexp_to - entry->subexp_from;
2170 to_idx = str_idx + subexp_len;
2171 dst_node = (subexp_len ? dfa->nexts[node]
2172 : dfa->edests[node].elems[0]);
2173
2174 if (to_idx > sctx->last_str_idx
2175 || sctx->sifted_states[to_idx] == NULL
2176 || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2177 || check_dst_limits (mctx, &sctx->limits, node,
2178 str_idx, dst_node, to_idx))
2179 continue;
2180
2181 if (local_sctx.sifted_states == NULL)
2182 {
2183 local_sctx = *sctx;
2184 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2185 if (BE (err != REG_NOERROR, 0))
2186 goto free_return;
2187 }
2188 local_sctx.last_node = node;
2189 local_sctx.last_str_idx = str_idx;
2190 ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
2191 if (BE (ret < 0, 0))
2192 {
2193 err = REG_ESPACE;
2194 goto free_return;
2195 }
2196 cur_state = local_sctx.sifted_states[str_idx];
2197 err = sift_states_backward (mctx, &local_sctx);
2198 if (BE (err != REG_NOERROR, 0))
2199 goto free_return;
2200 if (sctx->limited_states != NULL)
2201 {
2202 err = merge_state_array (dfa, sctx->limited_states,
2203 local_sctx.sifted_states,
2204 str_idx + 1);
2205 if (BE (err != REG_NOERROR, 0))
2206 goto free_return;
2207 }
2208 local_sctx.sifted_states[str_idx] = cur_state;
2209 re_node_set_remove (&local_sctx.limits, enabled_idx);
2210
2211 /* mctx->bkref_ents may have changed, reload the pointer. */
2212 entry = mctx->bkref_ents + enabled_idx;
2213 }
2214 while (enabled_idx++, entry++->more);
2215 }
2216 err = REG_NOERROR;
2217 free_return:
2218 if (local_sctx.sifted_states != NULL)
2219 {
2220 re_node_set_free (&local_sctx.limits);
2221 }
2222
2223 return err;
2224}
2225
2226
2227#ifdef RE_ENABLE_I18N
2228static int
2229internal_function
2230sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2231 int node_idx, int str_idx, int max_str_idx)
2232{
2233 const re_dfa_t *const dfa = mctx->dfa;
2234 int naccepted;
2235 /* Check the node can accept `multi byte'. */
2236 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2237 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2238 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2239 dfa->nexts[node_idx]))
2240 /* The node can't accept the `multi byte', or the
2241 destination was already thrown away, then the node
2242 couldn't accept the current input `multi byte'. */
2243 naccepted = 0;
2244 /* Otherwise, it is sure that the node could accept
2245 `naccepted' bytes input. */
2246 return naccepted;
2247}
2248#endif /* RE_ENABLE_I18N */
2249
2250
2251/* Functions for state transition. */
2252
2253/* Return the next state to which the current state STATE will transit by
2254 accepting the current input byte, and update STATE_LOG if necessary.
2255 If STATE can accept a multibyte char/collating element/back reference
2256 update the destination of STATE_LOG. */
2257
2258static re_dfastate_t *
2259internal_function
2260transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2261 re_dfastate_t *state)
2262{
2263 re_dfastate_t **trtable;
2264 unsigned char ch;
2265
2266#ifdef RE_ENABLE_I18N
2267 /* If the current state can accept multibyte. */
2268 if (BE (state->accept_mb, 0))
2269 {
2270 *err = transit_state_mb (mctx, state);
2271 if (BE (*err != REG_NOERROR, 0))
2272 return NULL;
2273 }
2274#endif /* RE_ENABLE_I18N */
2275
2276 /* Then decide the next state with the single byte. */
2277#if 0
2278 if (0)
2279 /* don't use transition table */
2280 return transit_state_sb (err, mctx, state);
2281#endif
2282
2283 /* Use transition table */
2284 ch = re_string_fetch_byte (&mctx->input);
2285 for (;;)
2286 {
2287 trtable = state->trtable;
2288 if (BE (trtable != NULL, 1))
2289 return trtable[ch];
2290
2291 trtable = state->word_trtable;
2292 if (BE (trtable != NULL, 1))
2293 {
2294 unsigned int context;
2295 context
2296 = re_string_context_at (&mctx->input,
2297 re_string_cur_idx (&mctx->input) - 1,
2298 mctx->eflags);
2299 if (IS_WORD_CONTEXT (context))
2300 return trtable[ch + SBC_MAX];
2301 else
2302 return trtable[ch];
2303 }
2304
2305 if (!build_trtable (mctx->dfa, state))
2306 {
2307 *err = REG_ESPACE;
2308 return NULL;
2309 }
2310
2311 /* Retry, we now have a transition table. */
2312 }
2313}
2314
2315/* Update the state_log if we need */
2316static re_dfastate_t *
2317internal_function
2318merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2319 re_dfastate_t *next_state)
2320{
2321 const re_dfa_t *const dfa = mctx->dfa;
2322 int cur_idx = re_string_cur_idx (&mctx->input);
2323
2324 if (cur_idx > mctx->state_log_top)
2325 {
2326 mctx->state_log[cur_idx] = next_state;
2327 mctx->state_log_top = cur_idx;
2328 }
2329 else if (mctx->state_log[cur_idx] == NULL)
2330 {
2331 mctx->state_log[cur_idx] = next_state;
2332 }
2333 else
2334 {
2335 re_dfastate_t *pstate;
2336 unsigned int context;
2337 re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2338 /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2339 the destination of a multibyte char/collating element/
2340 back reference. Then the next state is the union set of
2341 these destinations and the results of the transition table. */
2342 pstate = mctx->state_log[cur_idx];
2343 log_nodes = pstate->entrance_nodes;
2344 if (next_state != NULL)
2345 {
2346 table_nodes = next_state->entrance_nodes;
2347 *err = re_node_set_init_union (&next_nodes, table_nodes,
2348 log_nodes);
2349 if (BE (*err != REG_NOERROR, 0))
2350 return NULL;
2351 }
2352 else
2353 next_nodes = *log_nodes;
2354 /* Note: We already add the nodes of the initial state,
2355 then we don't need to add them here. */
2356
2357 context = re_string_context_at (&mctx->input,
2358 re_string_cur_idx (&mctx->input) - 1,
2359 mctx->eflags);
2360 next_state = mctx->state_log[cur_idx]
2361 = re_acquire_state_context (err, dfa, &next_nodes, context);
2362 /* We don't need to check errors here, since the return value of
2363 this function is next_state and ERR is already set. */
2364
2365 if (table_nodes != NULL)
2366 re_node_set_free (&next_nodes);
2367 }
2368
2369 if (BE (dfa->nbackref, 0) && next_state != NULL)
2370 {
2371 /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2372 later. We must check them here, since the back references in the
2373 next state might use them. */
2374 *err = check_subexp_matching_top (mctx, &next_state->nodes,
2375 cur_idx);
2376 if (BE (*err != REG_NOERROR, 0))
2377 return NULL;
2378
2379 /* If the next state has back references. */
2380 if (next_state->has_backref)
2381 {
2382 *err = transit_state_bkref (mctx, &next_state->nodes);
2383 if (BE (*err != REG_NOERROR, 0))
2384 return NULL;
2385 next_state = mctx->state_log[cur_idx];
2386 }
2387 }
2388
2389 return next_state;
2390}
2391
2392/* Skip bytes in the input that correspond to part of a
2393 multi-byte match, then look in the log for a state
2394 from which to restart matching. */
2395static re_dfastate_t *
2396internal_function
2397find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2398{
2399 re_dfastate_t *cur_state;
2400 do
2401 {
2402 int max = mctx->state_log_top;
2403 int cur_str_idx = re_string_cur_idx (&mctx->input);
2404
2405 do
2406 {
2407 if (++cur_str_idx > max)
2408 return NULL;
2409 re_string_skip_bytes (&mctx->input, 1);
2410 }
2411 while (mctx->state_log[cur_str_idx] == NULL);
2412
2413 cur_state = merge_state_with_log (err, mctx, NULL);
2414 }
2415 while (*err == REG_NOERROR && cur_state == NULL);
2416 return cur_state;
2417}
2418
2419/* Helper functions for transit_state. */
2420
2421/* From the node set CUR_NODES, pick up the nodes whose types are
2422 OP_OPEN_SUBEXP and which have corresponding back references in the regular
2423 expression. And register them to use them later for evaluating the
2424 correspoding back references. */
2425
2426static reg_errcode_t
2427internal_function
2428check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2429 int str_idx)
2430{
2431 const re_dfa_t *const dfa = mctx->dfa;
2432 int node_idx;
2433 reg_errcode_t err;
2434
2435 /* TODO: This isn't efficient.
2436 Because there might be more than one nodes whose types are
2437 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2438 nodes.
2439 E.g. RE: (a){2} */
2440 for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2441 {
2442 int node = cur_nodes->elems[node_idx];
2443 if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2444 && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2445 && (dfa->used_bkref_map
2446 & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2447 {
2448 err = match_ctx_add_subtop (mctx, node, str_idx);
2449 if (BE (err != REG_NOERROR, 0))
2450 return err;
2451 }
2452 }
2453 return REG_NOERROR;
2454}
2455
2456#if 0
2457/* Return the next state to which the current state STATE will transit by
2458 accepting the current input byte. */
2459
2460static re_dfastate_t *
2461transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2462 re_dfastate_t *state)
2463{
2464 const re_dfa_t *const dfa = mctx->dfa;
2465 re_node_set next_nodes;
2466 re_dfastate_t *next_state;
2467 int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2468 unsigned int context;
2469
2470 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2471 if (BE (*err != REG_NOERROR, 0))
2472 return NULL;
2473 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2474 {
2475 int cur_node = state->nodes.elems[node_cnt];
2476 if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2477 {
2478 *err = re_node_set_merge (&next_nodes,
2479 dfa->eclosures + dfa->nexts[cur_node]);
2480 if (BE (*err != REG_NOERROR, 0))
2481 {
2482 re_node_set_free (&next_nodes);
2483 return NULL;
2484 }
2485 }
2486 }
2487 context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2488 next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2489 /* We don't need to check errors here, since the return value of
2490 this function is next_state and ERR is already set. */
2491
2492 re_node_set_free (&next_nodes);
2493 re_string_skip_bytes (&mctx->input, 1);
2494 return next_state;
2495}
2496#endif
2497
2498#ifdef RE_ENABLE_I18N
2499static reg_errcode_t
2500internal_function
2501transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2502{
2503 const re_dfa_t *const dfa = mctx->dfa;
2504 reg_errcode_t err;
2505 int i;
2506
2507 for (i = 0; i < pstate->nodes.nelem; ++i)
2508 {
2509 re_node_set dest_nodes, *new_nodes;
2510 int cur_node_idx = pstate->nodes.elems[i];
2511 int naccepted, dest_idx;
2512 unsigned int context;
2513 re_dfastate_t *dest_state;
2514
2515 if (!dfa->nodes[cur_node_idx].accept_mb)
2516 continue;
2517
2518 if (dfa->nodes[cur_node_idx].constraint)
2519 {
2520 context = re_string_context_at (&mctx->input,
2521 re_string_cur_idx (&mctx->input),
2522 mctx->eflags);
2523 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2524 context))
2525 continue;
2526 }
2527
2528 /* How many bytes the node can accept? */
2529 naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2530 re_string_cur_idx (&mctx->input));
2531 if (naccepted == 0)
2532 continue;
2533
2534 /* The node can accepts `naccepted' bytes. */
2535 dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2536 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2537 : mctx->max_mb_elem_len);
2538 err = clean_state_log_if_needed (mctx, dest_idx);
2539 if (BE (err != REG_NOERROR, 0))
2540 return err;
2541#ifdef DEBUG
2542 assert (dfa->nexts[cur_node_idx] != -1);
2543#endif
2544 new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2545
2546 dest_state = mctx->state_log[dest_idx];
2547 if (dest_state == NULL)
2548 dest_nodes = *new_nodes;
2549 else
2550 {
2551 err = re_node_set_init_union (&dest_nodes,
2552 dest_state->entrance_nodes, new_nodes);
2553 if (BE (err != REG_NOERROR, 0))
2554 return err;
2555 }
2556 context = re_string_context_at (&mctx->input, dest_idx - 1,
2557 mctx->eflags);
2558 mctx->state_log[dest_idx]
2559 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2560 if (dest_state != NULL)
2561 re_node_set_free (&dest_nodes);
2562 if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2563 return err;
2564 }
2565 return REG_NOERROR;
2566}
2567#endif /* RE_ENABLE_I18N */
2568
2569static reg_errcode_t
2570internal_function
2571transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2572{
2573 const re_dfa_t *const dfa = mctx->dfa;
2574 reg_errcode_t err;
2575 int i;
2576 int cur_str_idx = re_string_cur_idx (&mctx->input);
2577
2578 for (i = 0; i < nodes->nelem; ++i)
2579 {
2580 int dest_str_idx, prev_nelem, bkc_idx;
2581 int node_idx = nodes->elems[i];
2582 unsigned int context;
2583 const re_token_t *node = dfa->nodes + node_idx;
2584 re_node_set *new_dest_nodes;
2585
2586 /* Check whether `node' is a backreference or not. */
2587 if (node->type != OP_BACK_REF)
2588 continue;
2589
2590 if (node->constraint)
2591 {
2592 context = re_string_context_at (&mctx->input, cur_str_idx,
2593 mctx->eflags);
2594 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2595 continue;
2596 }
2597
2598 /* `node' is a backreference.
2599 Check the substring which the substring matched. */
2600 bkc_idx = mctx->nbkref_ents;
2601 err = get_subexp (mctx, node_idx, cur_str_idx);
2602 if (BE (err != REG_NOERROR, 0))
2603 goto free_return;
2604
2605 /* And add the epsilon closures (which is `new_dest_nodes') of
2606 the backreference to appropriate state_log. */
2607#ifdef DEBUG
2608 assert (dfa->nexts[node_idx] != -1);
2609#endif
2610 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2611 {
2612 int subexp_len;
2613 re_dfastate_t *dest_state;
2614 struct re_backref_cache_entry *bkref_ent;
2615 bkref_ent = mctx->bkref_ents + bkc_idx;
2616 if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2617 continue;
2618 subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2619 new_dest_nodes = (subexp_len == 0
2620 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2621 : dfa->eclosures + dfa->nexts[node_idx]);
2622 dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2623 - bkref_ent->subexp_from);
2624 context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2625 mctx->eflags);
2626 dest_state = mctx->state_log[dest_str_idx];
2627 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2628 : mctx->state_log[cur_str_idx]->nodes.nelem);
2629 /* Add `new_dest_node' to state_log. */
2630 if (dest_state == NULL)
2631 {
2632 mctx->state_log[dest_str_idx]
2633 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2634 context);
2635 if (BE (mctx->state_log[dest_str_idx] == NULL
2636 && err != REG_NOERROR, 0))
2637 goto free_return;
2638 }
2639 else
2640 {
2641 re_node_set dest_nodes;
2642 err = re_node_set_init_union (&dest_nodes,
2643 dest_state->entrance_nodes,
2644 new_dest_nodes);
2645 if (BE (err != REG_NOERROR, 0))
2646 {
2647 re_node_set_free (&dest_nodes);
2648 goto free_return;
2649 }
2650 mctx->state_log[dest_str_idx]
2651 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2652 re_node_set_free (&dest_nodes);
2653 if (BE (mctx->state_log[dest_str_idx] == NULL
2654 && err != REG_NOERROR, 0))
2655 goto free_return;
2656 }
2657 /* We need to check recursively if the backreference can epsilon
2658 transit. */
2659 if (subexp_len == 0
2660 && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2661 {
2662 err = check_subexp_matching_top (mctx, new_dest_nodes,
2663 cur_str_idx);
2664 if (BE (err != REG_NOERROR, 0))
2665 goto free_return;
2666 err = transit_state_bkref (mctx, new_dest_nodes);
2667 if (BE (err != REG_NOERROR, 0))
2668 goto free_return;
2669 }
2670 }
2671 }
2672 err = REG_NOERROR;
2673 free_return:
2674 return err;
2675}
2676
2677/* Enumerate all the candidates which the backreference BKREF_NODE can match
2678 at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2679 Note that we might collect inappropriate candidates here.
2680 However, the cost of checking them strictly here is too high, then we
2681 delay these checking for prune_impossible_nodes(). */
2682
2683static reg_errcode_t
2684internal_function
2685get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
2686{
2687 const re_dfa_t *const dfa = mctx->dfa;
2688 int subexp_num, sub_top_idx;
2689 const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2690 /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
2691 int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2692 if (cache_idx != -1)
2693 {
2694 const struct re_backref_cache_entry *entry
2695 = mctx->bkref_ents + cache_idx;
2696 do
2697 if (entry->node == bkref_node)
2698 return REG_NOERROR; /* We already checked it. */
2699 while (entry++->more);
2700 }
2701
2702 subexp_num = dfa->nodes[bkref_node].opr.idx;
2703
2704 /* For each sub expression */
2705 for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2706 {
2707 reg_errcode_t err;
2708 re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2709 re_sub_match_last_t *sub_last;
2710 int sub_last_idx, sl_str, bkref_str_off;
2711
2712 if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2713 continue; /* It isn't related. */
2714
2715 sl_str = sub_top->str_idx;
2716 bkref_str_off = bkref_str_idx;
2717 /* At first, check the last node of sub expressions we already
2718 evaluated. */
2719 for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2720 {
2721 int sl_str_diff;
2722 sub_last = sub_top->lasts[sub_last_idx];
2723 sl_str_diff = sub_last->str_idx - sl_str;
2724 /* The matched string by the sub expression match with the substring
2725 at the back reference? */
2726 if (sl_str_diff > 0)
2727 {
2728 if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2729 {
2730 /* Not enough chars for a successful match. */
2731 if (bkref_str_off + sl_str_diff > mctx->input.len)
2732 break;
2733
2734 err = clean_state_log_if_needed (mctx,
2735 bkref_str_off
2736 + sl_str_diff);
2737 if (BE (err != REG_NOERROR, 0))
2738 return err;
2739 buf = (const char *) re_string_get_buffer (&mctx->input);
2740 }
2741 if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2742 /* We don't need to search this sub expression any more. */
2743 break;
2744 }
2745 bkref_str_off += sl_str_diff;
2746 sl_str += sl_str_diff;
2747 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2748 bkref_str_idx);
2749
2750 /* Reload buf, since the preceding call might have reallocated
2751 the buffer. */
2752 buf = (const char *) re_string_get_buffer (&mctx->input);
2753
2754 if (err == REG_NOMATCH)
2755 continue;
2756 if (BE (err != REG_NOERROR, 0))
2757 return err;
2758 }
2759
2760 if (sub_last_idx < sub_top->nlasts)
2761 continue;
2762 if (sub_last_idx > 0)
2763 ++sl_str;
2764 /* Then, search for the other last nodes of the sub expression. */
2765 for (; sl_str <= bkref_str_idx; ++sl_str)
2766 {
2767 int cls_node, sl_str_off;
2768 const re_node_set *nodes;
2769 sl_str_off = sl_str - sub_top->str_idx;
2770 /* The matched string by the sub expression match with the substring
2771 at the back reference? */
2772 if (sl_str_off > 0)
2773 {
2774 if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2775 {
2776 /* If we are at the end of the input, we cannot match. */
2777 if (bkref_str_off >= mctx->input.len)
2778 break;
2779
2780 err = extend_buffers (mctx);
2781 if (BE (err != REG_NOERROR, 0))
2782 return err;
2783
2784 buf = (const char *) re_string_get_buffer (&mctx->input);
2785 }
2786 if (buf [bkref_str_off++] != buf[sl_str - 1])
2787 break; /* We don't need to search this sub expression
2788 any more. */
2789 }
2790 if (mctx->state_log[sl_str] == NULL)
2791 continue;
2792 /* Does this state have a ')' of the sub expression? */
2793 nodes = &mctx->state_log[sl_str]->nodes;
2794 cls_node = find_subexp_node (dfa, nodes, subexp_num,
2795 OP_CLOSE_SUBEXP);
2796 if (cls_node == -1)
2797 continue; /* No. */
2798 if (sub_top->path == NULL)
2799 {
2800 sub_top->path = calloc (sizeof (state_array_t),
2801 sl_str - sub_top->str_idx + 1);
2802 if (sub_top->path == NULL)
2803 return REG_ESPACE;
2804 }
2805 /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2806 in the current context? */
2807 err = check_arrival (mctx, sub_top->path, sub_top->node,
2808 sub_top->str_idx, cls_node, sl_str,
2809 OP_CLOSE_SUBEXP);
2810 if (err == REG_NOMATCH)
2811 continue;
2812 if (BE (err != REG_NOERROR, 0))
2813 return err;
2814 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2815 if (BE (sub_last == NULL, 0))
2816 return REG_ESPACE;
2817 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2818 bkref_str_idx);
2819 if (err == REG_NOMATCH)
2820 continue;
2821 }
2822 }
2823 return REG_NOERROR;
2824}
2825
2826/* Helper functions for get_subexp(). */
2827
2828/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2829 If it can arrive, register the sub expression expressed with SUB_TOP
2830 and SUB_LAST. */
2831
2832static reg_errcode_t
2833internal_function
2834get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2835 re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
2836{
2837 reg_errcode_t err;
2838 int to_idx;
2839 /* Can the subexpression arrive the back reference? */
2840 err = check_arrival (mctx, &sub_last->path, sub_last->node,
2841 sub_last->str_idx, bkref_node, bkref_str,
2842 OP_OPEN_SUBEXP);
2843 if (err != REG_NOERROR)
2844 return err;
2845 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2846 sub_last->str_idx);
2847 if (BE (err != REG_NOERROR, 0))
2848 return err;
2849 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2850 return clean_state_log_if_needed (mctx, to_idx);
2851}
2852
2853/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2854 Search '(' if FL_OPEN, or search ')' otherwise.
2855 TODO: This function isn't efficient...
2856 Because there might be more than one nodes whose types are
2857 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2858 nodes.
2859 E.g. RE: (a){2} */
2860
2861static int
2862internal_function
2863find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2864 int subexp_idx, int type)
2865{
2866 int cls_idx;
2867 for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2868 {
2869 int cls_node = nodes->elems[cls_idx];
2870 const re_token_t *node = dfa->nodes + cls_node;
2871 if (node->type == type
2872 && node->opr.idx == subexp_idx)
2873 return cls_node;
2874 }
2875 return -1;
2876}
2877
2878/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2879 LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2880 heavily reused.
2881 Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
2882
2883static reg_errcode_t
2884internal_function
2885check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
2886 int top_str, int last_node, int last_str, int type)
2887{
2888 const re_dfa_t *const dfa = mctx->dfa;
2889 reg_errcode_t err = REG_NOERROR;
2890 int subexp_num, backup_cur_idx, str_idx, null_cnt;
2891 re_dfastate_t *cur_state = NULL;
2892 re_node_set *cur_nodes, next_nodes;
2893 re_dfastate_t **backup_state_log;
2894 unsigned int context;
2895
2896 subexp_num = dfa->nodes[top_node].opr.idx;
2897 /* Extend the buffer if we need. */
2898 if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2899 {
2900 re_dfastate_t **new_array;
2901 int old_alloc = path->alloc;
2902 path->alloc += last_str + mctx->max_mb_elem_len + 1;
2903 new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2904 if (BE (new_array == NULL, 0))
2905 {
2906 path->alloc = old_alloc;
2907 return REG_ESPACE;
2908 }
2909 path->array = new_array;
2910 memset (new_array + old_alloc, '\0',
2911 sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2912 }
2913
2914 str_idx = path->next_idx ? path->next_idx : top_str;
2915
2916 /* Temporary modify MCTX. */
2917 backup_state_log = mctx->state_log;
2918 backup_cur_idx = mctx->input.cur_idx;
2919 mctx->state_log = path->array;
2920 mctx->input.cur_idx = str_idx;
2921
2922 /* Setup initial node set. */
2923 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2924 if (str_idx == top_str)
2925 {
2926 err = re_node_set_init_1 (&next_nodes, top_node);
2927 if (BE (err != REG_NOERROR, 0))
2928 return err;
2929 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2930 if (BE (err != REG_NOERROR, 0))
2931 {
2932 re_node_set_free (&next_nodes);
2933 return err;
2934 }
2935 }
2936 else
2937 {
2938 cur_state = mctx->state_log[str_idx];
2939 if (cur_state && cur_state->has_backref)
2940 {
2941 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2942 if (BE (err != REG_NOERROR, 0))
2943 return err;
2944 }
2945 else
2946 re_node_set_init_empty (&next_nodes);
2947 }
2948 if (str_idx == top_str || (cur_state && cur_state->has_backref))
2949 {
2950 if (next_nodes.nelem)
2951 {
2952 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2953 subexp_num, type);
2954 if (BE (err != REG_NOERROR, 0))
2955 {
2956 re_node_set_free (&next_nodes);
2957 return err;
2958 }
2959 }
2960 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2961 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2962 {
2963 re_node_set_free (&next_nodes);
2964 return err;
2965 }
2966 mctx->state_log[str_idx] = cur_state;
2967 }
2968
2969 for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2970 {
2971 re_node_set_empty (&next_nodes);
2972 if (mctx->state_log[str_idx + 1])
2973 {
2974 err = re_node_set_merge (&next_nodes,
2975 &mctx->state_log[str_idx + 1]->nodes);
2976 if (BE (err != REG_NOERROR, 0))
2977 {
2978 re_node_set_free (&next_nodes);
2979 return err;
2980 }
2981 }
2982 if (cur_state)
2983 {
2984 err = check_arrival_add_next_nodes (mctx, str_idx,
2985 &cur_state->non_eps_nodes,
2986 &next_nodes);
2987 if (BE (err != REG_NOERROR, 0))
2988 {
2989 re_node_set_free (&next_nodes);
2990 return err;
2991 }
2992 }
2993 ++str_idx;
2994 if (next_nodes.nelem)
2995 {
2996 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2997 if (BE (err != REG_NOERROR, 0))
2998 {
2999 re_node_set_free (&next_nodes);
3000 return err;
3001 }
3002 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3003 subexp_num, type);
3004 if (BE (err != REG_NOERROR, 0))
3005 {
3006 re_node_set_free (&next_nodes);
3007 return err;
3008 }
3009 }
3010 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3011 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3012 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3013 {
3014 re_node_set_free (&next_nodes);
3015 return err;
3016 }
3017 mctx->state_log[str_idx] = cur_state;
3018 null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3019 }
3020 re_node_set_free (&next_nodes);
3021 cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3022 : &mctx->state_log[last_str]->nodes);
3023 path->next_idx = str_idx;
3024
3025 /* Fix MCTX. */
3026 mctx->state_log = backup_state_log;
3027 mctx->input.cur_idx = backup_cur_idx;
3028
3029 /* Then check the current node set has the node LAST_NODE. */
3030 if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3031 return REG_NOERROR;
3032
3033 return REG_NOMATCH;
3034}
3035
3036/* Helper functions for check_arrival. */
3037
3038/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3039 to NEXT_NODES.
3040 TODO: This function is similar to the functions transit_state*(),
3041 however this function has many additional works.
3042 Can't we unify them? */
3043
3044static reg_errcode_t
3045internal_function
3046check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
3047 re_node_set *cur_nodes, re_node_set *next_nodes)
3048{
3049 const re_dfa_t *const dfa = mctx->dfa;
3050 int result;
3051 int cur_idx;
3052#ifdef RE_ENABLE_I18N
3053 reg_errcode_t err = REG_NOERROR;
3054#endif
3055 re_node_set union_set;
3056 re_node_set_init_empty (&union_set);
3057 for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3058 {
3059 int naccepted = 0;
3060 int cur_node = cur_nodes->elems[cur_idx];
3061#ifdef DEBUG
3062 re_token_type_t type = dfa->nodes[cur_node].type;
3063 assert (!IS_EPSILON_NODE (type));
3064#endif
3065#ifdef RE_ENABLE_I18N
3066 /* If the node may accept `multi byte'. */
3067 if (dfa->nodes[cur_node].accept_mb)
3068 {
3069 naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3070 str_idx);
3071 if (naccepted > 1)
3072 {
3073 re_dfastate_t *dest_state;
3074 int next_node = dfa->nexts[cur_node];
3075 int next_idx = str_idx + naccepted;
3076 dest_state = mctx->state_log[next_idx];
3077 re_node_set_empty (&union_set);
3078 if (dest_state)
3079 {
3080 err = re_node_set_merge (&union_set, &dest_state->nodes);
3081 if (BE (err != REG_NOERROR, 0))
3082 {
3083 re_node_set_free (&union_set);
3084 return err;
3085 }
3086 }
3087 result = re_node_set_insert (&union_set, next_node);
3088 if (BE (result < 0, 0))
3089 {
3090 re_node_set_free (&union_set);
3091 return REG_ESPACE;
3092 }
3093 mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3094 &union_set);
3095 if (BE (mctx->state_log[next_idx] == NULL
3096 && err != REG_NOERROR, 0))
3097 {
3098 re_node_set_free (&union_set);
3099 return err;
3100 }
3101 }
3102 }
3103#endif /* RE_ENABLE_I18N */
3104 if (naccepted
3105 || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3106 {
3107 result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3108 if (BE (result < 0, 0))
3109 {
3110 re_node_set_free (&union_set);
3111 return REG_ESPACE;
3112 }
3113 }
3114 }
3115 re_node_set_free (&union_set);
3116 return REG_NOERROR;
3117}
3118
3119/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3120 CUR_NODES, however exclude the nodes which are:
3121 - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3122 - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3123*/
3124
3125static reg_errcode_t
3126internal_function
3127check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3128 int ex_subexp, int type)
3129{
3130 reg_errcode_t err;
3131 int idx, outside_node;
3132 re_node_set new_nodes;
3133#ifdef DEBUG
3134 assert (cur_nodes->nelem);
3135#endif
3136 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3137 if (BE (err != REG_NOERROR, 0))
3138 return err;
3139 /* Create a new node set NEW_NODES with the nodes which are epsilon
3140 closures of the node in CUR_NODES. */
3141
3142 for (idx = 0; idx < cur_nodes->nelem; ++idx)
3143 {
3144 int cur_node = cur_nodes->elems[idx];
3145 const re_node_set *eclosure = dfa->eclosures + cur_node;
3146 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3147 if (outside_node == -1)
3148 {
3149 /* There are no problematic nodes, just merge them. */
3150 err = re_node_set_merge (&new_nodes, eclosure);
3151 if (BE (err != REG_NOERROR, 0))
3152 {
3153 re_node_set_free (&new_nodes);
3154 return err;
3155 }
3156 }
3157 else
3158 {
3159 /* There are problematic nodes, re-calculate incrementally. */
3160 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3161 ex_subexp, type);
3162 if (BE (err != REG_NOERROR, 0))
3163 {
3164 re_node_set_free (&new_nodes);
3165 return err;
3166 }
3167 }
3168 }
3169 re_node_set_free (cur_nodes);
3170 *cur_nodes = new_nodes;
3171 return REG_NOERROR;
3172}
3173
3174/* Helper function for check_arrival_expand_ecl.
3175 Check incrementally the epsilon closure of TARGET, and if it isn't
3176 problematic append it to DST_NODES. */
3177
3178static reg_errcode_t
3179internal_function
3180check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3181 int target, int ex_subexp, int type)
3182{
3183 int cur_node;
3184 for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3185 {
3186 int err;
3187
3188 if (dfa->nodes[cur_node].type == type
3189 && dfa->nodes[cur_node].opr.idx == ex_subexp)
3190 {
3191 if (type == OP_CLOSE_SUBEXP)
3192 {
3193 err = re_node_set_insert (dst_nodes, cur_node);
3194 if (BE (err == -1, 0))
3195 return REG_ESPACE;
3196 }
3197 break;
3198 }
3199 err = re_node_set_insert (dst_nodes, cur_node);
3200 if (BE (err == -1, 0))
3201 return REG_ESPACE;
3202 if (dfa->edests[cur_node].nelem == 0)
3203 break;
3204 if (dfa->edests[cur_node].nelem == 2)
3205 {
3206 err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3207 dfa->edests[cur_node].elems[1],
3208 ex_subexp, type);
3209 if (BE (err != REG_NOERROR, 0))
3210 return err;
3211 }
3212 cur_node = dfa->edests[cur_node].elems[0];
3213 }
3214 return REG_NOERROR;
3215}
3216
3217
3218/* For all the back references in the current state, calculate the
3219 destination of the back references by the appropriate entry
3220 in MCTX->BKREF_ENTS. */
3221
3222static reg_errcode_t
3223internal_function
3224expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3225 int cur_str, int subexp_num, int type)
3226{
3227 const re_dfa_t *const dfa = mctx->dfa;
3228 reg_errcode_t err;
3229 int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3230 struct re_backref_cache_entry *ent;
3231
3232 if (cache_idx_start == -1)
3233 return REG_NOERROR;
3234
3235 restart:
3236 ent = mctx->bkref_ents + cache_idx_start;
3237 do
3238 {
3239 int to_idx, next_node;
3240
3241 /* Is this entry ENT is appropriate? */
3242 if (!re_node_set_contains (cur_nodes, ent->node))
3243 continue; /* No. */
3244
3245 to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3246 /* Calculate the destination of the back reference, and append it
3247 to MCTX->STATE_LOG. */
3248 if (to_idx == cur_str)
3249 {
3250 /* The backreference did epsilon transit, we must re-check all the
3251 node in the current state. */
3252 re_node_set new_dests;
3253 reg_errcode_t err2, err3;
3254 next_node = dfa->edests[ent->node].elems[0];
3255 if (re_node_set_contains (cur_nodes, next_node))
3256 continue;
3257 err = re_node_set_init_1 (&new_dests, next_node);
3258 err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3259 err3 = re_node_set_merge (cur_nodes, &new_dests);
3260 re_node_set_free (&new_dests);
3261 if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3262 || err3 != REG_NOERROR, 0))
3263 {
3264 err = (err != REG_NOERROR ? err
3265 : (err2 != REG_NOERROR ? err2 : err3));
3266 return err;
3267 }
3268 /* TODO: It is still inefficient... */
3269 goto restart;
3270 }
3271 else
3272 {
3273 re_node_set union_set;
3274 next_node = dfa->nexts[ent->node];
3275 if (mctx->state_log[to_idx])
3276 {
3277 int ret;
3278 if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3279 next_node))
3280 continue;
3281 err = re_node_set_init_copy (&union_set,
3282 &mctx->state_log[to_idx]->nodes);
3283 ret = re_node_set_insert (&union_set, next_node);
3284 if (BE (err != REG_NOERROR || ret < 0, 0))
3285 {
3286 re_node_set_free (&union_set);
3287 err = err != REG_NOERROR ? err : REG_ESPACE;
3288 return err;
3289 }
3290 }
3291 else
3292 {
3293 err = re_node_set_init_1 (&union_set, next_node);
3294 if (BE (err != REG_NOERROR, 0))
3295 return err;
3296 }
3297 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3298 re_node_set_free (&union_set);
3299 if (BE (mctx->state_log[to_idx] == NULL
3300 && err != REG_NOERROR, 0))
3301 return err;
3302 }
3303 }
3304 while (ent++->more);
3305 return REG_NOERROR;
3306}
3307
3308/* Build transition table for the state.
3309 Return 1 if succeeded, otherwise return NULL. */
3310
3311static int
3312internal_function
3313build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3314{
3315 reg_errcode_t err;
3316 int i, j, ch, need_word_trtable = 0;
3317 bitset_word_t elem, mask;
3318 bool dests_node_malloced = false;
3319 bool dest_states_malloced = false;
3320 int ndests; /* Number of the destination states from `state'. */
3321 re_dfastate_t **trtable;
3322 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3323 re_node_set follows, *dests_node;
3324 bitset_t *dests_ch;
3325 bitset_t acceptable;
3326
3327 struct dests_alloc
3328 {
3329 re_node_set dests_node[SBC_MAX];
3330 bitset_t dests_ch[SBC_MAX];
3331 } *dests_alloc;
3332
3333 /* We build DFA states which corresponds to the destination nodes
3334 from `state'. `dests_node[i]' represents the nodes which i-th
3335 destination state contains, and `dests_ch[i]' represents the
3336 characters which i-th destination state accepts. */
3337#ifdef HAVE_ALLOCA
3338 if (__libc_use_alloca (sizeof (struct dests_alloc)))
3339 dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3340 else
3341#endif
3342 {
3343 dests_alloc = re_malloc (struct dests_alloc, 1);
3344 if (BE (dests_alloc == NULL, 0))
3345 return 0;
3346 dests_node_malloced = true;
3347 }
3348 dests_node = dests_alloc->dests_node;
3349 dests_ch = dests_alloc->dests_ch;
3350
3351 /* Initialize transiton table. */
3352 state->word_trtable = state->trtable = NULL;
3353
3354 /* At first, group all nodes belonging to `state' into several
3355 destinations. */
3356 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3357 if (BE (ndests <= 0, 0))
3358 {
3359 if (dests_node_malloced)
3360 free (dests_alloc);
3361 /* Return 0 in case of an error, 1 otherwise. */
3362 if (ndests == 0)
3363 {
3364 state->trtable = (re_dfastate_t **)
3365 calloc (sizeof (re_dfastate_t *), SBC_MAX);
3366 return 1;
3367 }
3368 return 0;
3369 }
3370
3371 err = re_node_set_alloc (&follows, ndests + 1);
3372 if (BE (err != REG_NOERROR, 0))
3373 goto out_free;
3374
3375 /* Avoid arithmetic overflow in size calculation. */
3376 if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3377 / (3 * sizeof (re_dfastate_t *)))
3378 < ndests),
3379 0))
3380 goto out_free;
3381
3382#ifdef HAVE_ALLOCA
3383 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3384 + ndests * 3 * sizeof (re_dfastate_t *)))
3385 dest_states = (re_dfastate_t **)
3386 alloca (ndests * 3 * sizeof (re_dfastate_t *));
3387 else
3388#endif
3389 {
3390 dest_states = (re_dfastate_t **)
3391 malloc (ndests * 3 * sizeof (re_dfastate_t *));
3392 if (BE (dest_states == NULL, 0))
3393 {
3394out_free:
3395 if (dest_states_malloced)
3396 free (dest_states);
3397 re_node_set_free (&follows);
3398 for (i = 0; i < ndests; ++i)
3399 re_node_set_free (dests_node + i);
3400 if (dests_node_malloced)
3401 free (dests_alloc);
3402 return 0;
3403 }
3404 dest_states_malloced = true;
3405 }
3406 dest_states_word = dest_states + ndests;
3407 dest_states_nl = dest_states_word + ndests;
3408 bitset_empty (acceptable);
3409
3410 /* Then build the states for all destinations. */
3411 for (i = 0; i < ndests; ++i)
3412 {
3413 int next_node;
3414 re_node_set_empty (&follows);
3415 /* Merge the follows of this destination states. */
3416 for (j = 0; j < dests_node[i].nelem; ++j)
3417 {
3418 next_node = dfa->nexts[dests_node[i].elems[j]];
3419 if (next_node != -1)
3420 {
3421 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3422 if (BE (err != REG_NOERROR, 0))
3423 goto out_free;
3424 }
3425 }
3426 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3427 if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3428 goto out_free;
3429 /* If the new state has context constraint,
3430 build appropriate states for these contexts. */
3431 if (dest_states[i]->has_constraint)
3432 {
3433 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3434 CONTEXT_WORD);
3435 if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3436 goto out_free;
3437
3438 if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3439 need_word_trtable = 1;
3440
3441 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3442 CONTEXT_NEWLINE);
3443 if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3444 goto out_free;
3445 }
3446 else
3447 {
3448 dest_states_word[i] = dest_states[i];
3449 dest_states_nl[i] = dest_states[i];
3450 }
3451 bitset_merge (acceptable, dests_ch[i]);
3452 }
3453
3454 if (!BE (need_word_trtable, 0))
3455 {
3456 /* We don't care about whether the following character is a word
3457 character, or we are in a single-byte character set so we can
3458 discern by looking at the character code: allocate a
3459 256-entry transition table. */
3460 trtable = state->trtable =
3461 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3462 if (BE (trtable == NULL, 0))
3463 goto out_free;
3464
3465 /* For all characters ch...: */
3466 for (i = 0; i < BITSET_WORDS; ++i)
3467 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3468 elem;
3469 mask <<= 1, elem >>= 1, ++ch)
3470 if (BE (elem & 1, 0))
3471 {
3472 /* There must be exactly one destination which accepts
3473 character ch. See group_nodes_into_DFAstates. */
3474 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3475 ;
3476
3477 /* j-th destination accepts the word character ch. */
3478 if (dfa->word_char[i] & mask)
3479 trtable[ch] = dest_states_word[j];
3480 else
3481 trtable[ch] = dest_states[j];
3482 }
3483 }
3484 else
3485 {
3486 /* We care about whether the following character is a word
3487 character, and we are in a multi-byte character set: discern
3488 by looking at the character code: build two 256-entry
3489 transition tables, one starting at trtable[0] and one
3490 starting at trtable[SBC_MAX]. */
3491 trtable = state->word_trtable =
3492 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3493 if (BE (trtable == NULL, 0))
3494 goto out_free;
3495
3496 /* For all characters ch...: */
3497 for (i = 0; i < BITSET_WORDS; ++i)
3498 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3499 elem;
3500 mask <<= 1, elem >>= 1, ++ch)
3501 if (BE (elem & 1, 0))
3502 {
3503 /* There must be exactly one destination which accepts
3504 character ch. See group_nodes_into_DFAstates. */
3505 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3506 ;
3507
3508 /* j-th destination accepts the word character ch. */
3509 trtable[ch] = dest_states[j];
3510 trtable[ch + SBC_MAX] = dest_states_word[j];
3511 }
3512 }
3513
3514 /* new line */
3515 if (bitset_contain (acceptable, NEWLINE_CHAR))
3516 {
3517 /* The current state accepts newline character. */
3518 for (j = 0; j < ndests; ++j)
3519 if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3520 {
3521 /* k-th destination accepts newline character. */
3522 trtable[NEWLINE_CHAR] = dest_states_nl[j];
3523 if (need_word_trtable)
3524 trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3525 /* There must be only one destination which accepts
3526 newline. See group_nodes_into_DFAstates. */
3527 break;
3528 }
3529 }
3530
3531 if (dest_states_malloced)
3532 free (dest_states);
3533
3534 re_node_set_free (&follows);
3535 for (i = 0; i < ndests; ++i)
3536 re_node_set_free (dests_node + i);
3537
3538 if (dests_node_malloced)
3539 free (dests_alloc);
3540
3541 return 1;
3542}
3543
3544/* Group all nodes belonging to STATE into several destinations.
3545 Then for all destinations, set the nodes belonging to the destination
3546 to DESTS_NODE[i] and set the characters accepted by the destination
3547 to DEST_CH[i]. This function return the number of destinations. */
3548
3549static int
3550internal_function
3551group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3552 re_node_set *dests_node, bitset_t *dests_ch)
3553{
3554 reg_errcode_t err;
3555 int result;
3556 int i, j, k;
3557 int ndests; /* Number of the destinations from `state'. */
3558 bitset_t accepts; /* Characters a node can accept. */
3559 const re_node_set *cur_nodes = &state->nodes;
3560 bitset_empty (accepts);
3561 ndests = 0;
3562
3563 /* For all the nodes belonging to `state', */
3564 for (i = 0; i < cur_nodes->nelem; ++i)
3565 {
3566 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3567 re_token_type_t type = node->type;
3568 unsigned int constraint = node->constraint;
3569
3570 /* Enumerate all single byte character this node can accept. */
3571 if (type == CHARACTER)
3572 bitset_set (accepts, node->opr.c);
3573 else if (type == SIMPLE_BRACKET)
3574 {
3575 bitset_merge (accepts, node->opr.sbcset);
3576 }
3577 else if (type == OP_PERIOD)
3578 {
3579#ifdef RE_ENABLE_I18N
3580 if (dfa->mb_cur_max > 1)
3581 bitset_merge (accepts, dfa->sb_char);
3582 else
3583#endif
3584 bitset_set_all (accepts);
3585 if (!(dfa->syntax & RE_DOT_NEWLINE))
3586 bitset_clear (accepts, '\n');
3587 if (dfa->syntax & RE_DOT_NOT_NULL)
3588 bitset_clear (accepts, '\0');
3589 }
3590#ifdef RE_ENABLE_I18N
3591 else if (type == OP_UTF8_PERIOD)
3592 {
3593 memset (accepts, '\xff', sizeof (bitset_t) / 2);
3594 if (!(dfa->syntax & RE_DOT_NEWLINE))
3595 bitset_clear (accepts, '\n');
3596 if (dfa->syntax & RE_DOT_NOT_NULL)
3597 bitset_clear (accepts, '\0');
3598 }
3599#endif
3600 else
3601 continue;
3602
3603 /* Check the `accepts' and sift the characters which are not
3604 match it the context. */
3605 if (constraint)
3606 {
3607 if (constraint & NEXT_NEWLINE_CONSTRAINT)
3608 {
3609 bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3610 bitset_empty (accepts);
3611 if (accepts_newline)
3612 bitset_set (accepts, NEWLINE_CHAR);
3613 else
3614 continue;
3615 }
3616 if (constraint & NEXT_ENDBUF_CONSTRAINT)
3617 {
3618 bitset_empty (accepts);
3619 continue;
3620 }
3621
3622 if (constraint & NEXT_WORD_CONSTRAINT)
3623 {
3624 bitset_word_t any_set = 0;
3625 if (type == CHARACTER && !node->word_char)
3626 {
3627 bitset_empty (accepts);
3628 continue;
3629 }
3630#ifdef RE_ENABLE_I18N
3631 if (dfa->mb_cur_max > 1)
3632 for (j = 0; j < BITSET_WORDS; ++j)
3633 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3634 else
3635#endif
3636 for (j = 0; j < BITSET_WORDS; ++j)
3637 any_set |= (accepts[j] &= dfa->word_char[j]);
3638 if (!any_set)
3639 continue;
3640 }
3641 if (constraint & NEXT_NOTWORD_CONSTRAINT)
3642 {
3643 bitset_word_t any_set = 0;
3644 if (type == CHARACTER && node->word_char)
3645 {
3646 bitset_empty (accepts);
3647 continue;
3648 }
3649#ifdef RE_ENABLE_I18N
3650 if (dfa->mb_cur_max > 1)
3651 for (j = 0; j < BITSET_WORDS; ++j)
3652 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3653 else
3654#endif
3655 for (j = 0; j < BITSET_WORDS; ++j)
3656 any_set |= (accepts[j] &= ~dfa->word_char[j]);
3657 if (!any_set)
3658 continue;
3659 }
3660 }
3661
3662 /* Then divide `accepts' into DFA states, or create a new
3663 state. Above, we make sure that accepts is not empty. */
3664 for (j = 0; j < ndests; ++j)
3665 {
3666 bitset_t intersec; /* Intersection sets, see below. */
3667 bitset_t remains;
3668 /* Flags, see below. */
3669 bitset_word_t has_intersec, not_subset, not_consumed;
3670
3671 /* Optimization, skip if this state doesn't accept the character. */
3672 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3673 continue;
3674
3675 /* Enumerate the intersection set of this state and `accepts'. */
3676 has_intersec = 0;
3677 for (k = 0; k < BITSET_WORDS; ++k)
3678 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3679 /* And skip if the intersection set is empty. */
3680 if (!has_intersec)
3681 continue;
3682
3683 /* Then check if this state is a subset of `accepts'. */
3684 not_subset = not_consumed = 0;
3685 for (k = 0; k < BITSET_WORDS; ++k)
3686 {
3687 not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3688 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3689 }
3690
3691 /* If this state isn't a subset of `accepts', create a
3692 new group state, which has the `remains'. */
3693 if (not_subset)
3694 {
3695 bitset_copy (dests_ch[ndests], remains);
3696 bitset_copy (dests_ch[j], intersec);
3697 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3698 if (BE (err != REG_NOERROR, 0))
3699 goto error_return;
3700 ++ndests;
3701 }
3702
3703 /* Put the position in the current group. */
3704 result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3705 if (BE (result < 0, 0))
3706 goto error_return;
3707
3708 /* If all characters are consumed, go to next node. */
3709 if (!not_consumed)
3710 break;
3711 }
3712 /* Some characters remain, create a new group. */
3713 if (j == ndests)
3714 {
3715 bitset_copy (dests_ch[ndests], accepts);
3716 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3717 if (BE (err != REG_NOERROR, 0))
3718 goto error_return;
3719 ++ndests;
3720 bitset_empty (accepts);
3721 }
3722 }
3723 return ndests;
3724 error_return:
3725 for (j = 0; j < ndests; ++j)
3726 re_node_set_free (dests_node + j);
3727 return -1;
3728}
3729
3730#ifdef RE_ENABLE_I18N
3731/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3732 Return the number of the bytes the node accepts.
3733 STR_IDX is the current index of the input string.
3734
3735 This function handles the nodes which can accept one character, or
3736 one collating element like '.', '[a-z]', opposite to the other nodes
3737 can only accept one byte. */
3738
3739static int
3740internal_function
3741check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
3742 const re_string_t *input, int str_idx)
3743{
3744 const re_token_t *node = dfa->nodes + node_idx;
3745 int char_len, elem_len;
3746 int i;
3747 wint_t wc;
3748
3749 if (BE (node->type == OP_UTF8_PERIOD, 0))
3750 {
3751 unsigned char c = re_string_byte_at (input, str_idx), d;
3752 if (BE (c < 0xc2, 1))
3753 return 0;
3754
3755 if (str_idx + 2 > input->len)
3756 return 0;
3757
3758 d = re_string_byte_at (input, str_idx + 1);
3759 if (c < 0xe0)
3760 return (d < 0x80 || d > 0xbf) ? 0 : 2;
3761 else if (c < 0xf0)
3762 {
3763 char_len = 3;
3764 if (c == 0xe0 && d < 0xa0)
3765 return 0;
3766 }
3767 else if (c < 0xf8)
3768 {
3769 char_len = 4;
3770 if (c == 0xf0 && d < 0x90)
3771 return 0;
3772 }
3773 else if (c < 0xfc)
3774 {
3775 char_len = 5;
3776 if (c == 0xf8 && d < 0x88)
3777 return 0;
3778 }
3779 else if (c < 0xfe)
3780 {
3781 char_len = 6;
3782 if (c == 0xfc && d < 0x84)
3783 return 0;
3784 }
3785 else
3786 return 0;
3787
3788 if (str_idx + char_len > input->len)
3789 return 0;
3790
3791 for (i = 1; i < char_len; ++i)
3792 {
3793 d = re_string_byte_at (input, str_idx + i);
3794 if (d < 0x80 || d > 0xbf)
3795 return 0;
3796 }
3797 return char_len;
3798 }
3799
3800 char_len = re_string_char_size_at (input, str_idx);
3801 if (node->type == OP_PERIOD)
3802 {
3803 if (char_len <= 1)
3804 return 0;
3805 /* FIXME: I don't think this if is needed, as both '\n'
3806 and '\0' are char_len == 1. */
3807 /* '.' accepts any one character except the following two cases. */
3808 if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3809 re_string_byte_at (input, str_idx) == '\n') ||
3810 ((dfa->syntax & RE_DOT_NOT_NULL) &&
3811 re_string_byte_at (input, str_idx) == '\0'))
3812 return 0;
3813 return char_len;
3814 }
3815
3816 elem_len = re_string_elem_size_at (input, str_idx);
3817 wc = __btowc(*(input->mbs+str_idx));
3818 if (((elem_len <= 1 && char_len <= 1) || char_len == 0) && (wc != WEOF && wc < SBC_MAX))
3819 return 0;
3820
3821 if (node->type == COMPLEX_BRACKET)
3822 {
3823 const re_charset_t *cset = node->opr.mbcset;
3824# ifdef _LIBC
3825 const unsigned char *pin
3826 = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3827 int j;
3828 uint32_t nrules;
3829# endif /* _LIBC */
3830 int match_len = 0;
3831 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3832 ? re_string_wchar_at (input, str_idx) : 0);
3833
3834 /* match with multibyte character? */
3835 for (i = 0; i < cset->nmbchars; ++i)
3836 if (wc == cset->mbchars[i])
3837 {
3838 match_len = char_len;
3839 goto check_node_accept_bytes_match;
3840 }
3841 /* match with character_class? */
3842 for (i = 0; i < cset->nchar_classes; ++i)
3843 {
3844 wctype_t wt = cset->char_classes[i];
3845 if (__iswctype (wc, wt))
3846 {
3847 match_len = char_len;
3848 goto check_node_accept_bytes_match;
3849 }
3850 }
3851
3852# ifdef _LIBC
3853 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3854 if (nrules != 0)
3855 {
3856 unsigned int in_collseq = 0;
3857 const int32_t *table, *indirect;
3858 const unsigned char *weights, *extra;
3859 const char *collseqwc;
3860 /* This #include defines a local function! */
3861# include <locale/weight.h>
3862
3863 /* match with collating_symbol? */
3864 if (cset->ncoll_syms)
3865 extra = (const unsigned char *)
3866 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3867 for (i = 0; i < cset->ncoll_syms; ++i)
3868 {
3869 const unsigned char *coll_sym = extra + cset->coll_syms[i];
3870 /* Compare the length of input collating element and
3871 the length of current collating element. */
3872 if (*coll_sym != elem_len)
3873 continue;
3874 /* Compare each bytes. */
3875 for (j = 0; j < *coll_sym; j++)
3876 if (pin[j] != coll_sym[1 + j])
3877 break;
3878 if (j == *coll_sym)
3879 {
3880 /* Match if every bytes is equal. */
3881 match_len = j;
3882 goto check_node_accept_bytes_match;
3883 }
3884 }
3885
3886 if (cset->nranges)
3887 {
3888 if (elem_len <= char_len)
3889 {
3890 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3891 in_collseq = __collseq_table_lookup (collseqwc, wc);
3892 }
3893 else
3894 in_collseq = find_collation_sequence_value (pin, elem_len);
3895 }
3896 /* match with range expression? */
3897 for (i = 0; i < cset->nranges; ++i)
3898 if (cset->range_starts[i] <= in_collseq
3899 && in_collseq <= cset->range_ends[i])
3900 {
3901 match_len = elem_len;
3902 goto check_node_accept_bytes_match;
3903 }
3904
3905 /* match with equivalence_class? */
3906 if (cset->nequiv_classes)
3907 {
3908 const unsigned char *cp = pin;
3909 table = (const int32_t *)
3910 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3911 weights = (const unsigned char *)
3912 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3913 extra = (const unsigned char *)
3914 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3915 indirect = (const int32_t *)
3916 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3917 int32_t idx = findidx (&cp);
3918 if (idx > 0)
3919 for (i = 0; i < cset->nequiv_classes; ++i)
3920 {
3921 int32_t equiv_class_idx = cset->equiv_classes[i];
3922 size_t weight_len = weights[idx & 0xffffff];
3923 if (weight_len == weights[equiv_class_idx & 0xffffff]
3924 && (idx >> 24) == (equiv_class_idx >> 24))
3925 {
3926 int cnt = 0;
3927
3928 idx &= 0xffffff;
3929 equiv_class_idx &= 0xffffff;
3930
3931 while (cnt <= weight_len
3932 && (weights[equiv_class_idx + 1 + cnt]
3933 == weights[idx + 1 + cnt]))
3934 ++cnt;
3935 if (cnt > weight_len)
3936 {
3937 match_len = elem_len;
3938 goto check_node_accept_bytes_match;
3939 }
3940 }
3941 }
3942 }
3943 }
3944 else
3945# endif /* _LIBC */
3946 {
3947 /* match with range expression? */
3948#if __GNUC__ >= 2
3949 wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3950#else
3951 wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3952 cmp_buf[2] = wc;
3953#endif
3954 for (i = 0; i < cset->nranges; ++i)
3955 {
3956 cmp_buf[0] = cset->range_starts[i];
3957 cmp_buf[4] = cset->range_ends[i];
3958 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3959 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3960 {
3961 match_len = char_len;
3962 goto check_node_accept_bytes_match;
3963 }
3964 }
3965 }
3966 check_node_accept_bytes_match:
3967 if (!cset->non_match)
3968 return match_len;
3969 else
3970 {
3971 if (match_len > 0)
3972 return 0;
3973 else
3974 return (elem_len > char_len) ? elem_len : char_len;
3975 }
3976 }
3977 return 0;
3978}
3979
3980# ifdef _LIBC
3981static unsigned int
3982internal_function
3983find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3984{
3985 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3986 if (nrules == 0)
3987 {
3988 if (mbs_len == 1)
3989 {
3990 /* No valid character. Match it as a single byte character. */
3991 const unsigned char *collseq = (const unsigned char *)
3992 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3993 return collseq[mbs[0]];
3994 }
3995 return UINT_MAX;
3996 }
3997 else
3998 {
3999 int32_t idx;
4000 const unsigned char *extra = (const unsigned char *)
4001 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4002 int32_t extrasize = (const unsigned char *)
4003 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4004
4005 for (idx = 0; idx < extrasize;)
4006 {
4007 int mbs_cnt, found = 0;
4008 int32_t elem_mbs_len;
4009 /* Skip the name of collating element name. */
4010 idx = idx + extra[idx] + 1;
4011 elem_mbs_len = extra[idx++];
4012 if (mbs_len == elem_mbs_len)
4013 {
4014 for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4015 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4016 break;
4017 if (mbs_cnt == elem_mbs_len)
4018 /* Found the entry. */
4019 found = 1;
4020 }
4021 /* Skip the byte sequence of the collating element. */
4022 idx += elem_mbs_len;
4023 /* Adjust for the alignment. */
4024 idx = (idx + 3) & ~3;
4025 /* Skip the collation sequence value. */
4026 idx += sizeof (uint32_t);
4027 /* Skip the wide char sequence of the collating element. */
4028 idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
4029 /* If we found the entry, return the sequence value. */
4030 if (found)
4031 return *(uint32_t *) (extra + idx);
4032 /* Skip the collation sequence value. */
4033 idx += sizeof (uint32_t);
4034 }
4035 return UINT_MAX;
4036 }
4037}
4038# endif /* _LIBC */
4039#endif /* RE_ENABLE_I18N */
4040
4041/* Check whether the node accepts the byte which is IDX-th
4042 byte of the INPUT. */
4043
4044static int
4045internal_function
4046check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4047 int idx)
4048{
4049 unsigned char ch;
4050 ch = re_string_byte_at (&mctx->input, idx);
4051 switch (node->type)
4052 {
4053 case CHARACTER:
4054 if (node->opr.c != ch)
4055 return 0;
4056 break;
4057
4058 case SIMPLE_BRACKET:
4059 if (!bitset_contain (node->opr.sbcset, ch))
4060 return 0;
4061 break;
4062
4063#ifdef RE_ENABLE_I18N
4064 case OP_UTF8_PERIOD:
4065 if (ch >= 0x80)
4066 return 0;
4067 /* FALLTHROUGH */
4068#endif
4069 case OP_PERIOD:
4070 if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4071 || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4072 return 0;
4073 break;
4074
4075 default:
4076 return 0;
4077 }
4078
4079 if (node->constraint)
4080 {
4081 /* The node has constraints. Check whether the current context
4082 satisfies the constraints. */
4083 unsigned int context = re_string_context_at (&mctx->input, idx,
4084 mctx->eflags);
4085 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4086 return 0;
4087 }
4088
4089 return 1;
4090}
4091
4092/* Extend the buffers, if the buffers have run out. */
4093
4094static reg_errcode_t
4095internal_function
4096extend_buffers (re_match_context_t *mctx)
4097{
4098 reg_errcode_t ret;
4099 re_string_t *pstr = &mctx->input;
4100
4101 /* Avoid overflow. */
4102 if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
4103 return REG_ESPACE;
4104
4105 /* Double the lengthes of the buffers. */
4106 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4107 if (BE (ret != REG_NOERROR, 0))
4108 return ret;
4109
4110 if (mctx->state_log != NULL)
4111 {
4112 /* And double the length of state_log. */
4113 /* XXX We have no indication of the size of this buffer. If this
4114 allocation fail we have no indication that the state_log array
4115 does not have the right size. */
4116 re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4117 pstr->bufs_len + 1);
4118 if (BE (new_array == NULL, 0))
4119 return REG_ESPACE;
4120 mctx->state_log = new_array;
4121 }
4122
4123 /* Then reconstruct the buffers. */
4124 if (pstr->icase)
4125 {
4126#ifdef RE_ENABLE_I18N
4127 if (pstr->mb_cur_max > 1)
4128 {
4129 ret = build_wcs_upper_buffer (pstr);
4130 if (BE (ret != REG_NOERROR, 0))
4131 return ret;
4132 }
4133 else
4134#endif /* RE_ENABLE_I18N */
4135 build_upper_buffer (pstr);
4136 }
4137 else
4138 {
4139#ifdef RE_ENABLE_I18N
4140 if (pstr->mb_cur_max > 1)
4141 build_wcs_buffer (pstr);
4142 else
4143#endif /* RE_ENABLE_I18N */
4144 {
4145 if (pstr->trans != NULL)
4146 re_string_translate_buffer (pstr);
4147 }
4148 }
4149 return REG_NOERROR;
4150}
4151
4152
4153/* Functions for matching context. */
4154
4155/* Initialize MCTX. */
4156
4157static reg_errcode_t
4158internal_function
4159match_ctx_init (re_match_context_t *mctx, int eflags, int n)
4160{
4161 mctx->eflags = eflags;
4162 mctx->match_last = -1;
4163 if (n > 0)
4164 {
4165 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4166 mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4167 if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4168 return REG_ESPACE;
4169 }
4170 /* Already zero-ed by the caller.
4171 else
4172 mctx->bkref_ents = NULL;
4173 mctx->nbkref_ents = 0;
4174 mctx->nsub_tops = 0; */
4175 mctx->abkref_ents = n;
4176 mctx->max_mb_elem_len = 1;
4177 mctx->asub_tops = n;
4178 return REG_NOERROR;
4179}
4180
4181/* Clean the entries which depend on the current input in MCTX.
4182 This function must be invoked when the matcher changes the start index
4183 of the input, or changes the input string. */
4184
4185static void
4186internal_function
4187match_ctx_clean (re_match_context_t *mctx)
4188{
4189 int st_idx;
4190 for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4191 {
4192 int sl_idx;
4193 re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4194 for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4195 {
4196 re_sub_match_last_t *last = top->lasts[sl_idx];
4197 re_free (last->path.array);
4198 re_free (last);
4199 }
4200 re_free (top->lasts);
4201 if (top->path)
4202 {
4203 re_free (top->path->array);
4204 re_free (top->path);
4205 }
4206 free (top);
4207 }
4208
4209 mctx->nsub_tops = 0;
4210 mctx->nbkref_ents = 0;
4211}
4212
4213/* Free all the memory associated with MCTX. */
4214
4215static void
4216internal_function
4217match_ctx_free (re_match_context_t *mctx)
4218{
4219 /* First, free all the memory associated with MCTX->SUB_TOPS. */
4220 match_ctx_clean (mctx);
4221 re_free (mctx->sub_tops);
4222 re_free (mctx->bkref_ents);
4223}
4224
4225/* Add a new backreference entry to MCTX.
4226 Note that we assume that caller never call this function with duplicate
4227 entry, and call with STR_IDX which isn't smaller than any existing entry.
4228*/
4229
4230static reg_errcode_t
4231internal_function
4232match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
4233 int to)
4234{
4235 if (mctx->nbkref_ents >= mctx->abkref_ents)
4236 {
4237 struct re_backref_cache_entry* new_entry;
4238 new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4239 mctx->abkref_ents * 2);
4240 if (BE (new_entry == NULL, 0))
4241 {
4242 re_free (mctx->bkref_ents);
4243 return REG_ESPACE;
4244 }
4245 mctx->bkref_ents = new_entry;
4246 memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4247 sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4248 mctx->abkref_ents *= 2;
4249 }
4250 if (mctx->nbkref_ents > 0
4251 && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4252 mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4253
4254 mctx->bkref_ents[mctx->nbkref_ents].node = node;
4255 mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4256 mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4257 mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4258
4259 /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4260 If bit N is clear, means that this entry won't epsilon-transition to
4261 an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4262 it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4263 such node.
4264
4265 A backreference does not epsilon-transition unless it is empty, so set
4266 to all zeros if FROM != TO. */
4267 mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4268 = (from == to ? ~0 : 0);
4269
4270 mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4271 if (mctx->max_mb_elem_len < to - from)
4272 mctx->max_mb_elem_len = to - from;
4273 return REG_NOERROR;
4274}
4275
4276/* Search for the first entry which has the same str_idx, or -1 if none is
4277 found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
4278
4279static int
4280internal_function
4281search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
4282{
4283 int left, right, mid, last;
4284 last = right = mctx->nbkref_ents;
4285 for (left = 0; left < right;)
4286 {
4287 mid = (left + right) / 2;
4288 if (mctx->bkref_ents[mid].str_idx < str_idx)
4289 left = mid + 1;
4290 else
4291 right = mid;
4292 }
4293 if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4294 return left;
4295 else
4296 return -1;
4297}
4298
4299/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4300 at STR_IDX. */
4301
4302static reg_errcode_t
4303internal_function
4304match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
4305{
4306#ifdef DEBUG
4307 assert (mctx->sub_tops != NULL);
4308 assert (mctx->asub_tops > 0);
4309#endif
4310 if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4311 {
4312 int new_asub_tops = mctx->asub_tops * 2;
4313 re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4314 re_sub_match_top_t *,
4315 new_asub_tops);
4316 if (BE (new_array == NULL, 0))
4317 return REG_ESPACE;
4318 mctx->sub_tops = new_array;
4319 mctx->asub_tops = new_asub_tops;
4320 }
4321 mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4322 if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4323 return REG_ESPACE;
4324 mctx->sub_tops[mctx->nsub_tops]->node = node;
4325 mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4326 return REG_NOERROR;
4327}
4328
4329/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4330 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
4331
4332static re_sub_match_last_t *
4333internal_function
4334match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
4335{
4336 re_sub_match_last_t *new_entry;
4337 if (BE (subtop->nlasts == subtop->alasts, 0))
4338 {
4339 int new_alasts = 2 * subtop->alasts + 1;
4340 re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4341 re_sub_match_last_t *,
4342 new_alasts);
4343 if (BE (new_array == NULL, 0))
4344 return NULL;
4345 subtop->lasts = new_array;
4346 subtop->alasts = new_alasts;
4347 }
4348 new_entry = calloc (1, sizeof (re_sub_match_last_t));
4349 if (BE (new_entry != NULL, 1))
4350 {
4351 subtop->lasts[subtop->nlasts] = new_entry;
4352 new_entry->node = node;
4353 new_entry->str_idx = str_idx;
4354 ++subtop->nlasts;
4355 }
4356 return new_entry;
4357}
4358
4359static void
4360internal_function
4361sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4362 re_dfastate_t **limited_sts, int last_node, int last_str_idx)
4363{
4364 sctx->sifted_states = sifted_sts;
4365 sctx->limited_states = limited_sts;
4366 sctx->last_node = last_node;
4367 sctx->last_str_idx = last_str_idx;
4368 re_node_set_init_empty (&sctx->limits);
4369}
diff --git a/win32/resources/COPYING_CCBYSA3 b/win32/resources/COPYING_CCBYSA3
new file mode 100644
index 000000000..fc45d7818
--- /dev/null
+++ b/win32/resources/COPYING_CCBYSA3
@@ -0,0 +1,7 @@
1This work is licenced under the Creative Commons Attribution-Share Alike 3.0
2United States License. To view a copy of this licence, visit
3http://creativecommons.org/licenses/by-sa/3.0/ or send a letter to Creative
4Commons, 171 Second Street, Suite 300, San Francisco, California 94105, USA.
5
6When attributing the artwork, using "GNOME Project" is enough.
7Please link to http://www.gnome.org where available.
diff --git a/win32/resources/Kbuild.src b/win32/resources/Kbuild.src
new file mode 100644
index 000000000..d056a5964
--- /dev/null
+++ b/win32/resources/Kbuild.src
@@ -0,0 +1,29 @@
1# Makefile for busybox
2#
3# Copyright (C) 2018 by R M Yorston <rmy@pobox.com>
4#
5# Licensed under GPLv2, see file LICENSE in this source tree.
6
7obj-y :=
8
9obj-$(CONFIG_FEATURE_RESOURCES) += resources.o
10
11# return commit level if available or 0
12bb_level = $(or $(word 2,$(subst -, ,$1)),0)
13
14WRFLAGS := -D"BB_VER=$(BB_VER)" \
15 -D"BB_VERSION=$(VERSION)" -D"BB_PATCHLEVEL=$(PATCHLEVEL)" \
16 -D"BB_SUBLEVEL=$(SUBLEVEL)" \
17 -D"BB_EXTRAVERSION=$(call bb_level,$(EXTRAVERSION))" \
18 --include-dir=$(objtree)/include --include-dir=$(objtree)/win32/resources
19
20quiet_cmd_windres = WINDRES $@
21 cmd_windres = $(WINDRES) $(WRFLAGS) $< $@
22
23%.o: %.rc FORCE
24 $(call if_changed,windres)
25
26win32/resources/resources.o: win32/resources/resources.rc .config
27win32/resources/resources.o: win32/resources/aterm.ico win32/resources/sterm.ico
28win32/resources/resources.o: win32/resources/utf8.manifest
29win32/resources/resources.o: win32/resources/app.manifest
diff --git a/win32/resources/README b/win32/resources/README
new file mode 100644
index 000000000..33a245386
--- /dev/null
+++ b/win32/resources/README
@@ -0,0 +1,9 @@
1The icons are based on those for GNOME terminal in the Adwaita theme.
2
3They were generated by importing the 16x16, 32x32 and 48x48 PNG files
4into GIMP as separate layers then exporting as a single .ico file.
5
6The original files are dual-licensed under either the GNU LGPL v3 or
7Creative Commons Attribution-Share Alike 3.0 United States License.
8
9The .ico files are licensed under the latter.
diff --git a/win32/resources/app.manifest b/win32/resources/app.manifest
new file mode 100644
index 000000000..5e76b7b8e
--- /dev/null
+++ b/win32/resources/app.manifest
@@ -0,0 +1,24 @@
1<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
3 <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
4 <security>
5 <requestedPrivileges>
6 <requestedExecutionLevel level="asInvoker"/>
7 </requestedPrivileges>
8 </security>
9 </trustInfo>
10 <compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1">
11 <application>
12 <!--The ID below indicates application support for Windows Vista -->
13 <supportedOS Id="{e2011457-1546-43c5-a5fe-008deee3d3f0}"/>
14 <!--The ID below indicates application support for Windows 7 -->
15 <supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/>
16 <!--The ID below indicates application support for Windows 8 -->
17 <supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
18 <!--The ID below indicates application support for Windows 8.1 -->
19 <supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
20 <!--The ID below indicates application support for Windows 10 -->
21 <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
22 </application>
23 </compatibility>
24</assembly>
diff --git a/win32/resources/aterm.ico b/win32/resources/aterm.ico
new file mode 100644
index 000000000..e680216a2
--- /dev/null
+++ b/win32/resources/aterm.ico
Binary files differ
diff --git a/win32/resources/dummy.c b/win32/resources/dummy.c
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/resources/dummy.c
diff --git a/win32/resources/resources.rc b/win32/resources/resources.rc
new file mode 100644
index 000000000..5d06dda3c
--- /dev/null
+++ b/win32/resources/resources.rc
@@ -0,0 +1,45 @@
1#include <autoconf.h>
2#define xstr(s) str(s)
3#define str(s) #s
4
5#if ENABLE_FEATURE_ICON_ATERM || ENABLE_FEATURE_ICON_ALL
61 ICON "aterm.ico"
7#endif
8#if ENABLE_FEATURE_ICON_STERM || ENABLE_FEATURE_ICON_ALL
92 ICON "sterm.ico"
10#endif
11
12#if ENABLE_FEATURE_VERSIONINFO
131 VERSIONINFO
14FILEVERSION BB_VERSION,BB_PATCHLEVEL,BB_SUBLEVEL,BB_EXTRAVERSION
15PRODUCTVERSION BB_VERSION,BB_PATCHLEVEL,BB_SUBLEVEL,BB_EXTRAVERSION
16BEGIN
17 BLOCK "StringFileInfo"
18 BEGIN
19 BLOCK "080904E4"
20 BEGIN
21 VALUE "CompanyName", "frippery.org"
22 VALUE "FileDescription", "BusyBox multi-call binary"
23 VALUE "FileVersion", xstr(BB_VER)
24 VALUE "InternalName", "busybox"
25 VALUE "LegalCopyright", "(C) 1998-2022 Many authors"
26 VALUE "OriginalFilename", "busybox.exe"
27 VALUE "ProductName", "busybox-w32"
28 VALUE "ProductVersion", xstr(BB_VER)
29 END
30 END
31 BLOCK "VarFileInfo"
32 BEGIN
33 VALUE "Translation", 0x809, 1252
34 END
35END
36#endif
37
38/* Hardcode numeric value for MANIFEST for llvm windres */
39#if ENABLE_FEATURE_UTF8_MANIFEST
401 24 "utf8.manifest"
41#endif
42
43#if ENABLE_FEATURE_APP_MANIFEST
441 24 "app.manifest"
45#endif
diff --git a/win32/resources/sterm.ico b/win32/resources/sterm.ico
new file mode 100644
index 000000000..b9125b34d
--- /dev/null
+++ b/win32/resources/sterm.ico
Binary files differ
diff --git a/win32/resources/utf8.manifest b/win32/resources/utf8.manifest
new file mode 100644
index 000000000..efe6a3d2f
--- /dev/null
+++ b/win32/resources/utf8.manifest
@@ -0,0 +1,30 @@
1<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2<assembly manifestVersion="1.0" xmlns="urn:schemas-microsoft-com:asm.v1">
3 <assemblyIdentity type="win32" name="busybox.exe" version="6.0.0.0"/>
4 <application>
5 <windowsSettings>
6 <activeCodePage xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</activeCodePage>
7 </windowsSettings>
8 </application>
9 <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
10 <security>
11 <requestedPrivileges>
12 <requestedExecutionLevel level="asInvoker"/>
13 </requestedPrivileges>
14 </security>
15 </trustInfo>
16 <compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1">
17 <application>
18 <!--The ID below indicates application support for Windows Vista -->
19 <supportedOS Id="{e2011457-1546-43c5-a5fe-008deee3d3f0}"/>
20 <!--The ID below indicates application support for Windows 7 -->
21 <supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/>
22 <!--The ID below indicates application support for Windows 8 -->
23 <supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
24 <!--The ID below indicates application support for Windows 8.1 -->
25 <supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
26 <!--The ID below indicates application support for Windows 10 -->
27 <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
28 </application>
29 </compatibility>
30</assembly>
diff --git a/win32/sched.h b/win32/sched.h
new file mode 100644
index 000000000..128bfe698
--- /dev/null
+++ b/win32/sched.h
@@ -0,0 +1 @@
static inline void sched_yield(void) {}
diff --git a/win32/select.c b/win32/select.c
new file mode 100644
index 000000000..2be221ac8
--- /dev/null
+++ b/win32/select.c
@@ -0,0 +1,592 @@
1/* Emulation for select(2)
2 Contributed by Paolo Bonzini.
3
4 Copyright 2008-2021 Free Software Foundation, Inc.
5
6 This file is part of gnulib.
7
8 This file is free software: you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as
10 published by the Free Software Foundation; either version 2.1 of the
11 License, or (at your option) any later version.
12
13 This file is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with this program. If not, see <https://www.gnu.org/licenses/>. */
20
21#include "libbb.h"
22
23/* Specification. */
24#include <sys/select.h>
25
26#if defined _WIN32 && ! defined __CYGWIN__
27/* Native Windows. */
28
29#include <malloc.h>
30#include <assert.h>
31#include <sys/types.h>
32#include <errno.h>
33#include <limits.h>
34
35#include <winsock2.h>
36#include <windows.h>
37#include <io.h>
38#include <stdio.h>
39#include <conio.h>
40#include <time.h>
41
42/* Get the overridden 'struct timeval'. */
43
44#undef select
45
46/* Don't assume that UNICODE is not defined. */
47#undef GetModuleHandle
48#define GetModuleHandle GetModuleHandleA
49#undef PeekConsoleInput
50#define PeekConsoleInput PeekConsoleInputA
51#undef CreateEvent
52#define CreateEvent CreateEventA
53#undef PeekMessage
54#define PeekMessage PeekMessageA
55#undef DispatchMessage
56#define DispatchMessage DispatchMessageA
57
58/* Avoid warnings from gcc -Wcast-function-type. */
59#define GetProcAddress \
60 (void *) GetProcAddress
61
62struct bitset {
63 unsigned char in[FD_SETSIZE / CHAR_BIT];
64 unsigned char out[FD_SETSIZE / CHAR_BIT];
65};
66
67/* Declare data structures for ntdll functions. */
68typedef struct _FILE_PIPE_LOCAL_INFORMATION {
69 ULONG NamedPipeType;
70 ULONG NamedPipeConfiguration;
71 ULONG MaximumInstances;
72 ULONG CurrentInstances;
73 ULONG InboundQuota;
74 ULONG ReadDataAvailable;
75 ULONG OutboundQuota;
76 ULONG WriteQuotaAvailable;
77 ULONG NamedPipeState;
78 ULONG NamedPipeEnd;
79} FILE_PIPE_LOCAL_INFORMATION, *PFILE_PIPE_LOCAL_INFORMATION;
80
81typedef struct _IO_STATUS_BLOCK
82{
83 union {
84 DWORD Status;
85 PVOID Pointer;
86 } u;
87 ULONG_PTR Information;
88} IO_STATUS_BLOCK, *PIO_STATUS_BLOCK;
89
90typedef enum _FILE_INFORMATION_CLASS {
91 FilePipeLocalInformation = 24
92} FILE_INFORMATION_CLASS, *PFILE_INFORMATION_CLASS;
93
94typedef DWORD (WINAPI *PNtQueryInformationFile)
95 (HANDLE, IO_STATUS_BLOCK *, VOID *, ULONG, FILE_INFORMATION_CLASS);
96
97#ifndef PIPE_BUF
98#define PIPE_BUF 512
99#endif
100
101static BOOL IsConsoleHandle (HANDLE h)
102{
103 DWORD mode;
104 return GetConsoleMode (h, &mode) != 0;
105}
106
107static BOOL
108IsSocketHandle (HANDLE h)
109{
110 WSANETWORKEVENTS ev;
111
112 if (IsConsoleHandle (h))
113 return FALSE;
114
115 /* Under Wine, it seems that getsockopt returns 0 for pipes too.
116 WSAEnumNetworkEvents instead distinguishes the two correctly. */
117 ev.lNetworkEvents = 0xDEADBEEF;
118 WSAEnumNetworkEvents ((SOCKET) h, NULL, &ev);
119 return ev.lNetworkEvents != 0xDEADBEEF;
120}
121
122/* Compute output fd_sets for libc descriptor FD (whose Windows handle is
123 H). */
124
125static int
126windows_poll_handle (HANDLE h, int fd,
127 struct bitset *rbits,
128 struct bitset *wbits,
129 struct bitset *xbits)
130{
131 BOOL read, write, except;
132 int i, ret;
133 INPUT_RECORD *irbuffer;
134 DWORD avail, nbuffer;
135 BOOL bRet;
136 IO_STATUS_BLOCK iosb;
137 FILE_PIPE_LOCAL_INFORMATION fpli;
138 static PNtQueryInformationFile NtQueryInformationFile;
139 static BOOL once_only;
140
141 read = write = except = FALSE;
142 switch (GetFileType (h))
143 {
144 case FILE_TYPE_DISK:
145 read = TRUE;
146 write = TRUE;
147 break;
148
149 case FILE_TYPE_PIPE:
150 if (!once_only)
151 {
152 NtQueryInformationFile = (PNtQueryInformationFile)
153 GetProcAddress (GetModuleHandle ("ntdll.dll"),
154 "NtQueryInformationFile");
155 once_only = TRUE;
156 }
157
158 if (PeekNamedPipe (h, NULL, 0, NULL, &avail, NULL) != 0)
159 {
160 if (avail)
161 read = TRUE;
162 }
163 else if (GetLastError () == ERROR_BROKEN_PIPE)
164 read = TRUE;
165
166 else
167 {
168 /* It was the write-end of the pipe. Check if it is writable.
169 If NtQueryInformationFile fails, optimistically assume the pipe is
170 writable. This could happen on Windows 9x, where
171 NtQueryInformationFile is not available, or if we inherit a pipe
172 that doesn't permit FILE_READ_ATTRIBUTES access on the write end
173 (I think this should not happen since Windows XP SP2; WINE seems
174 fine too). Otherwise, ensure that enough space is available for
175 atomic writes. */
176 memset (&iosb, 0, sizeof (iosb));
177 memset (&fpli, 0, sizeof (fpli));
178
179 if (!NtQueryInformationFile
180 || NtQueryInformationFile (h, &iosb, &fpli, sizeof (fpli),
181 FilePipeLocalInformation)
182 || fpli.WriteQuotaAvailable >= PIPE_BUF
183 || (fpli.OutboundQuota < PIPE_BUF &&
184 fpli.WriteQuotaAvailable == fpli.OutboundQuota))
185 write = TRUE;
186 }
187 break;
188
189 case FILE_TYPE_CHAR:
190 write = TRUE;
191 if (!(rbits->in[fd / CHAR_BIT] & (1 << (fd & (CHAR_BIT - 1)))))
192 break;
193
194 ret = WaitForSingleObject (h, 0);
195 if (ret == WAIT_OBJECT_0)
196 {
197 if (!IsConsoleHandle (h))
198 {
199 read = TRUE;
200 break;
201 }
202
203 nbuffer = avail = 0;
204 bRet = GetNumberOfConsoleInputEvents (h, &nbuffer);
205
206 /* Screen buffers handles are filtered earlier. */
207 assert (bRet);
208 if (nbuffer == 0)
209 {
210 except = TRUE;
211 break;
212 }
213
214 irbuffer = (INPUT_RECORD *) alloca (nbuffer * sizeof (INPUT_RECORD));
215 bRet = PeekConsoleInput (h, irbuffer, nbuffer, &avail);
216 if (!bRet || avail == 0)
217 {
218 except = TRUE;
219 break;
220 }
221
222 for (i = 0; i < avail; i++)
223 if (irbuffer[i].EventType == KEY_EVENT &&
224 irbuffer[i].Event.KeyEvent.bKeyDown)
225 read = TRUE;
226 }
227 break;
228
229 default:
230 ret = WaitForSingleObject (h, 0);
231 write = TRUE;
232 if (ret == WAIT_OBJECT_0)
233 read = TRUE;
234
235 break;
236 }
237
238 ret = 0;
239 if (read && (rbits->in[fd / CHAR_BIT] & (1 << (fd & (CHAR_BIT - 1)))))
240 {
241 rbits->out[fd / CHAR_BIT] |= (1 << (fd & (CHAR_BIT - 1)));
242 ret++;
243 }
244
245 if (write && (wbits->in[fd / CHAR_BIT] & (1 << (fd & (CHAR_BIT - 1)))))
246 {
247 wbits->out[fd / CHAR_BIT] |= (1 << (fd & (CHAR_BIT - 1)));
248 ret++;
249 }
250
251 if (except && (xbits->in[fd / CHAR_BIT] & (1 << (fd & (CHAR_BIT - 1)))))
252 {
253 xbits->out[fd / CHAR_BIT] |= (1 << (fd & (CHAR_BIT - 1)));
254 ret++;
255 }
256
257 return ret;
258}
259
260int
261mingw_select (int nfds, fd_set *rfds, fd_set *wfds, fd_set *xfds,
262 struct timeval *timeout)
263#undef timeval
264{
265 static struct timeval tv0;
266 static HANDLE hEvent;
267 HANDLE h, handle_array[FD_SETSIZE + 2];
268 fd_set handle_rfds, handle_wfds, handle_xfds;
269 struct bitset rbits, wbits, xbits;
270 unsigned char anyfds_in[FD_SETSIZE / CHAR_BIT];
271 DWORD ret, wait_timeout, nhandles, nsock, nbuffer;
272 MSG msg;
273 int i, fd, rc;
274 clock_t tend = 0;
275
276 if (nfds > FD_SETSIZE)
277 nfds = FD_SETSIZE;
278
279 if (!timeout)
280 wait_timeout = INFINITE;
281 else
282 {
283 wait_timeout = timeout->tv_sec * 1000 + timeout->tv_usec / 1000;
284
285 /* select is also used as a portable usleep. */
286 if (!rfds && !wfds && !xfds)
287 {
288 Sleep (wait_timeout);
289 return 0;
290 }
291 }
292
293 if (!hEvent)
294 hEvent = CreateEvent (NULL, FALSE, FALSE, NULL);
295
296 handle_array[0] = hEvent;
297 nhandles = 1;
298 nsock = 0;
299
300 /* Copy descriptors to bitsets. At the same time, eliminate
301 bits in the "wrong" direction for console input buffers
302 and screen buffers, because screen buffers are waitable
303 and they will block until a character is available. */
304 memset (&rbits, 0, sizeof (rbits));
305 memset (&wbits, 0, sizeof (wbits));
306 memset (&xbits, 0, sizeof (xbits));
307 memset (anyfds_in, 0, sizeof (anyfds_in));
308 if (rfds)
309 for (i = 0; i < rfds->fd_count; i++)
310 {
311 fd = rfds->fd_array[i];
312 h = (HANDLE) _get_osfhandle (fd);
313 if (IsConsoleHandle (h)
314 && !GetNumberOfConsoleInputEvents (h, &nbuffer))
315 continue;
316
317 rbits.in[fd / CHAR_BIT] |= 1 << (fd & (CHAR_BIT - 1));
318 anyfds_in[fd / CHAR_BIT] |= 1 << (fd & (CHAR_BIT - 1));
319 }
320 else
321 rfds = (fd_set *) alloca (sizeof (fd_set));
322
323 if (wfds)
324 for (i = 0; i < wfds->fd_count; i++)
325 {
326 fd = wfds->fd_array[i];
327 h = (HANDLE) _get_osfhandle (fd);
328 if (IsConsoleHandle (h)
329 && GetNumberOfConsoleInputEvents (h, &nbuffer))
330 continue;
331
332 wbits.in[fd / CHAR_BIT] |= 1 << (fd & (CHAR_BIT - 1));
333 anyfds_in[fd / CHAR_BIT] |= 1 << (fd & (CHAR_BIT - 1));
334 }
335 else
336 wfds = (fd_set *) alloca (sizeof (fd_set));
337
338 if (xfds)
339 for (i = 0; i < xfds->fd_count; i++)
340 {
341 fd = xfds->fd_array[i];
342 xbits.in[fd / CHAR_BIT] |= 1 << (fd & (CHAR_BIT - 1));
343 anyfds_in[fd / CHAR_BIT] |= 1 << (fd & (CHAR_BIT - 1));
344 }
345 else
346 xfds = (fd_set *) alloca (sizeof (fd_set));
347
348 /* Zero all the fd_sets, including the application's. */
349 FD_ZERO (rfds);
350 FD_ZERO (wfds);
351 FD_ZERO (xfds);
352 FD_ZERO (&handle_rfds);
353 FD_ZERO (&handle_wfds);
354 FD_ZERO (&handle_xfds);
355
356 /* Classify handles. Create fd sets for sockets, poll the others. */
357 for (i = 0; i < nfds; i++)
358 {
359 if ((anyfds_in[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1)))) == 0)
360 continue;
361
362 h = (HANDLE) _get_osfhandle (i);
363 if (!h)
364 {
365 errno = EBADF;
366 return -1;
367 }
368
369 if (IsSocketHandle (h))
370 {
371 int requested = FD_CLOSE;
372
373 /* See above; socket handles are mapped onto select, but we
374 need to map descriptors to handles. */
375 if (rbits.in[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1))))
376 {
377 requested |= FD_READ | FD_ACCEPT;
378 FD_SET ((SOCKET) h, rfds);
379 FD_SET ((SOCKET) h, &handle_rfds);
380 }
381 if (wbits.in[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1))))
382 {
383 requested |= FD_WRITE | FD_CONNECT;
384 FD_SET ((SOCKET) h, wfds);
385 FD_SET ((SOCKET) h, &handle_wfds);
386 }
387 if (xbits.in[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1))))
388 {
389 requested |= FD_OOB;
390 FD_SET ((SOCKET) h, xfds);
391 FD_SET ((SOCKET) h, &handle_xfds);
392 }
393
394 WSAEventSelect ((SOCKET) h, hEvent, requested);
395 nsock++;
396 }
397 else
398 {
399 handle_array[nhandles++] = h;
400
401 /* Poll now. If we get an event, do not wait below. */
402 if (wait_timeout != 0
403 && windows_poll_handle (h, i, &rbits, &wbits, &xbits))
404 wait_timeout = 0;
405 }
406 }
407
408 /* Place a sentinel at the end of the array. */
409 handle_array[nhandles] = NULL;
410
411 /* When will the waiting period expire? */
412 if (wait_timeout != INFINITE)
413 tend = clock () + wait_timeout;
414
415restart:
416 if (wait_timeout == 0 || nsock == 0)
417 rc = 0;
418 else
419 {
420 /* See if we need to wait in the loop below. If any select is ready,
421 do MsgWaitForMultipleObjects anyway to dispatch messages, but
422 no need to call select again. */
423 rc = select (0, &handle_rfds, &handle_wfds, &handle_xfds, &tv0);
424 if (rc == 0)
425 {
426 /* Restore the fd_sets for the other select we do below. */
427 memcpy (&handle_rfds, rfds, sizeof (fd_set));
428 memcpy (&handle_wfds, wfds, sizeof (fd_set));
429 memcpy (&handle_xfds, xfds, sizeof (fd_set));
430 }
431 else
432 wait_timeout = 0;
433 }
434
435 /* How much is left to wait? */
436 if (wait_timeout != INFINITE)
437 {
438 clock_t tnow = clock ();
439 if (tend >= tnow)
440 wait_timeout = tend - tnow;
441 else
442 wait_timeout = 0;
443 }
444
445 for (;;)
446 {
447 ret = MsgWaitForMultipleObjects (nhandles, handle_array, FALSE,
448 wait_timeout, QS_ALLINPUT);
449
450 if (ret == WAIT_OBJECT_0 + nhandles)
451 {
452 /* new input of some other kind */
453 BOOL bRet;
454 while ((bRet = PeekMessage (&msg, NULL, 0, 0, PM_REMOVE)) != 0)
455 {
456 TranslateMessage (&msg);
457 DispatchMessage (&msg);
458 }
459 }
460 else
461 break;
462 }
463
464 /* If we haven't done it yet, check the status of the sockets. */
465 if (rc == 0 && nsock > 0)
466 rc = select (0, &handle_rfds, &handle_wfds, &handle_xfds, &tv0);
467
468 if (nhandles > 1)
469 {
470 /* Count results that are not counted in the return value of select. */
471 nhandles = 1;
472 for (i = 0; i < nfds; i++)
473 {
474 if ((anyfds_in[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1)))) == 0)
475 continue;
476
477 h = (HANDLE) _get_osfhandle (i);
478 if (h == handle_array[nhandles])
479 {
480 /* Not a socket. */
481 nhandles++;
482 windows_poll_handle (h, i, &rbits, &wbits, &xbits);
483 if (rbits.out[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1)))
484 || wbits.out[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1)))
485 || xbits.out[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1))))
486 rc++;
487 }
488 }
489
490 if (rc == 0
491 && (wait_timeout == INFINITE
492 /* If NHANDLES > 1, but no bits are set, it means we've
493 been told incorrectly that some handle was signaled.
494 This happens with anonymous pipes, which always cause
495 MsgWaitForMultipleObjects to exit immediately, but no
496 data is found ready to be read by windows_poll_handle.
497 To avoid a total failure (whereby we return zero and
498 don't wait at all), let's poll in a more busy loop. */
499 || (wait_timeout != 0 && nhandles > 1)))
500 {
501 /* Sleep 1 millisecond to avoid busy wait and retry with the
502 original fd_sets. */
503 memcpy (&handle_rfds, rfds, sizeof (fd_set));
504 memcpy (&handle_wfds, wfds, sizeof (fd_set));
505 memcpy (&handle_xfds, xfds, sizeof (fd_set));
506 SleepEx (1, TRUE);
507 goto restart;
508 }
509 if (timeout && wait_timeout == 0 && rc == 0)
510 timeout->tv_sec = timeout->tv_usec = 0;
511 }
512
513 /* Now fill in the results. */
514 FD_ZERO (rfds);
515 FD_ZERO (wfds);
516 FD_ZERO (xfds);
517 nhandles = 1;
518 for (i = 0; i < nfds; i++)
519 {
520 if ((anyfds_in[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1)))) == 0)
521 continue;
522
523 h = (HANDLE) _get_osfhandle (i);
524 if (h != handle_array[nhandles])
525 {
526 /* Perform handle->descriptor mapping. */
527 SOCKET s = (SOCKET) h;
528 WSAEventSelect (s, NULL, 0);
529 if (FD_ISSET (s, &handle_rfds))
530 FD_SET (i, rfds);
531 if (FD_ISSET (s, &handle_wfds))
532 FD_SET (i, wfds);
533 if (FD_ISSET (s, &handle_xfds))
534 FD_SET (i, xfds);
535 }
536 else
537 {
538 /* Not a socket. */
539 nhandles++;
540 if (rbits.out[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1))))
541 FD_SET (i, rfds);
542 if (wbits.out[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1))))
543 FD_SET (i, wfds);
544 if (xbits.out[i / CHAR_BIT] & (1 << (i & (CHAR_BIT - 1))))
545 FD_SET (i, xfds);
546 }
547 }
548
549 return rc;
550}
551
552#else /* ! Native Windows. */
553
554#include <stddef.h> /* NULL */
555#include <errno.h>
556#include <unistd.h>
557
558#undef select
559
560int
561rpl_select (int nfds, fd_set *rfds, fd_set *wfds, fd_set *xfds,
562 struct timeval *timeout)
563{
564 int i;
565
566 /* FreeBSD 8.2 has a bug: it does not always detect invalid fds. */
567 if (nfds < 0 || nfds > FD_SETSIZE)
568 {
569 errno = EINVAL;
570 return -1;
571 }
572 for (i = 0; i < nfds; i++)
573 {
574 if (((rfds && FD_ISSET (i, rfds))
575 || (wfds && FD_ISSET (i, wfds))
576 || (xfds && FD_ISSET (i, xfds)))
577 && dup2 (i, i) != i)
578 return -1;
579 }
580
581 /* Interix 3.5 has a bug: it does not support nfds == 0. */
582 if (nfds == 0)
583 {
584 nfds = 1;
585 rfds = NULL;
586 wfds = NULL;
587 xfds = NULL;
588 }
589 return select (nfds, rfds, wfds, xfds, timeout);
590}
591
592#endif
diff --git a/win32/sh_random.c b/win32/sh_random.c
new file mode 100644
index 000000000..10e942e80
--- /dev/null
+++ b/win32/sh_random.c
@@ -0,0 +1,59 @@
1#include "libbb.h"
2#include <ntsecapi.h>
3#include "../shell/random.h"
4
5/*
6 * Obtain a few bytes of random-ish data to initialise the generator.
7 * This is unlikely to be very robust: don't rely on it for
8 * anything that needs to be secure.
9 */
10static void get_entropy(uint32_t state[2])
11{
12#if defined(__MINGW64_VERSION_MAJOR) && \
13 (__MINGW64_VERSION_MAJOR >= 7 || defined(__MINGW64__))
14 if (!RtlGenRandom(state, sizeof(state[0])*2))
15#endif
16 GetSystemTimeAsFileTime((FILETIME *)state);
17
18#if 0
19 {
20 unsigned char *p = (unsigned char *)state;
21 int j;
22
23 for (j=0; j<8; ++j) {
24 fprintf(stderr, "%02x", *p++);
25 if ((j&3) == 3) {
26 fprintf(stderr, " ");
27 }
28 }
29 fprintf(stderr, "\n");
30 }
31#endif
32}
33
34ssize_t get_random_bytes(void *buf, ssize_t count)
35{
36 static random_t rnd;
37 ssize_t save_count = count;
38 uint32_t value;
39 unsigned char *ptr = (unsigned char *)&value;
40
41 if (buf == NULL || count < 0) {
42 errno = EINVAL;
43 return -1;
44 }
45
46 if (UNINITED_RANDOM_T(&rnd)) {
47 uint32_t state[2] = {0, 0};
48
49 get_entropy(state);
50 INIT_RANDOM_T(&rnd, state[0] ? state[0] : 1, state[1]);
51 }
52
53 for (;count > 0; buf+=4, count-=4) {
54 value = full_random(&rnd);
55 memcpy(buf, ptr, count >= 4 ? 4 : count);
56 }
57
58 return save_count;
59}
diff --git a/win32/statfs.c b/win32/statfs.c
new file mode 100644
index 000000000..97b3ce679
--- /dev/null
+++ b/win32/statfs.c
@@ -0,0 +1,70 @@
1#include <sys/statfs.h>
2#include "libbb.h"
3
4/*
5 * Code from libguestfs (with addition of GetVolumeInformation call)
6 */
7int statfs(const char *file, struct statfs *buf)
8{
9 ULONGLONG free_bytes_available; /* for user - similar to bavail */
10 ULONGLONG total_number_of_bytes;
11 ULONGLONG total_number_of_free_bytes; /* for everyone - bfree */
12 DWORD serial, namelen, flags;
13 char fsname[100];
14 struct mntent *mnt;
15 /* Valid filesystem names don't seem to be documented. The following
16 * are present in Wine (dlls/kernel32/volume.c). */
17#define FS_NAMES "NTFS\0FAT\0FAT32\0CDFS\0UDF\0"
18 int fstypes[] = {0, 0x5346544e, 0x4006, 0x4006, 0x9660, 0x15013346};
19
20 if ( (mnt=find_mount_point(file, 0)) == NULL ) {
21 return -1;
22 }
23
24 file = mnt->mnt_dir;
25 if ( !GetDiskFreeSpaceEx(file, (PULARGE_INTEGER) &free_bytes_available,
26 (PULARGE_INTEGER) &total_number_of_bytes,
27 (PULARGE_INTEGER) &total_number_of_free_bytes) ) {
28 errno = err_win_to_posix();
29 return -1;
30 }
31
32 if ( !GetVolumeInformation(file, NULL, 0, &serial, &namelen, &flags,
33 fsname, 100) ) {
34 errno = err_win_to_posix();
35 return -1;
36 }
37
38 memset(buf, 0, sizeof(*buf));
39
40 /* XXX I couldn't determine how to get block size. MSDN has a
41 * unhelpful hard-coded list here:
42 * http://support.microsoft.com/kb/140365
43 * but this depends on the filesystem type, the size of the disk and
44 * the version of Windows. So this code assumes the disk is NTFS
45 * and the version of Windows is >= Win2K.
46 */
47 if (total_number_of_bytes < UINT64_C(16) * 1024 * 1024 * 1024 * 1024)
48 buf->f_bsize = 4096;
49 else if (total_number_of_bytes < UINT64_C(32) * 1024 * 1024 * 1024 * 1024)
50 buf->f_bsize = 8192;
51 else if (total_number_of_bytes < UINT64_C(64) * 1024 * 1024 * 1024 * 1024)
52 buf->f_bsize = 16384;
53 else if (total_number_of_bytes < UINT64_C(128) * 1024 * 1024 * 1024 * 1024)
54 buf->f_bsize = 32768;
55 else
56 buf->f_bsize = 65536;
57
58 buf->f_type = fstypes[index_in_strings(FS_NAMES, fsname)+1];
59 buf->f_frsize = buf->f_bsize;
60 buf->f_blocks = total_number_of_bytes / buf->f_bsize;
61 buf->f_bfree = total_number_of_free_bytes / buf->f_bsize;
62 buf->f_bavail = free_bytes_available / buf->f_bsize;
63 //buf->f_files = 0;
64 //buf->f_ffree = 0;
65 buf->f_fsid = serial;
66 //buf->f_flag = 0;
67 buf->f_namelen = namelen;
68
69 return 0;
70}
diff --git a/win32/strndup.c b/win32/strndup.c
new file mode 100644
index 000000000..4d04609f6
--- /dev/null
+++ b/win32/strndup.c
@@ -0,0 +1,36 @@
1/* A replacement function, for systems that lack strndup.
2
3 Copyright (C) 1996-1998, 2001-2003, 2005-2007, 2009-2020 Free Software
4 Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <https://www.gnu.org/licenses/>. */
18
19#include "libbb.h"
20
21#include <string.h>
22
23#include <stdlib.h>
24
25char *
26strndup (char const *s, size_t n)
27{
28 size_t len = strnlen (s, n);
29 char *new = malloc (len + 1);
30
31 if (new == NULL)
32 return NULL;
33
34 new[len] = '\0';
35 return memcpy (new, s, len);
36}
diff --git a/win32/strptime.c b/win32/strptime.c
new file mode 100644
index 000000000..3205b95a2
--- /dev/null
+++ b/win32/strptime.c
@@ -0,0 +1,603 @@
1/* Copyright (C) 2002, 2004-2005, 2007, 2009-2020 Free Software Foundation,
2 Inc.
3 This file is part of the GNU C Library.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, see <https://www.gnu.org/licenses/>. */
17
18/*
19 * File from gnulib (https://www.gnu.org/software/gnulib/), processed with
20 * coan source -U_LIBC -U_NL_CURRENT -UHAVE_TM_GMTOFF strptime.c
21 * and lightly edited.
22 *
23 * A form of support for tm_gmtoff was later restored.
24 */
25
26#include "libbb.h"
27#include <time.h>
28
29#include <assert.h>
30#include <ctype.h>
31#include <limits.h>
32#include <string.h>
33#include <stdbool.h>
34
35
36enum ptime_locale_status { not, loc, raw };
37
38
39
40#define match_char(ch1, ch2) if (ch1 != ch2) return NULL
41/* Oh come on. Get a reasonable compiler. */
42# define match_string(cs1, s2) \
43 (strncasecmp ((cs1), (s2), strlen (cs1)) ? 0 : ((s2) += strlen (cs1), 1))
44/* We intentionally do not use isdigit() for testing because this will
45 lead to problems with the wide character version. */
46#define get_number(from, to, n) \
47 do { \
48 int __n = n; \
49 val = 0; \
50 while (*rp == ' ') \
51 ++rp; \
52 if (*rp < '0' || *rp > '9') \
53 return NULL; \
54 do { \
55 val *= 10; \
56 val += *rp++ - '0'; \
57 } while (--__n > 0 && val * 10 <= to && *rp >= '0' && *rp <= '9'); \
58 if (val < from || val > to) \
59 return NULL; \
60 } while (0)
61# define get_alt_number(from, to, n) \
62 /* We don't have the alternate representation. */ \
63 get_number(from, to, n)
64#define recursive(new_fmt) \
65 (*(new_fmt) != '\0' \
66 && (rp = __strptime_internal (rp, (new_fmt), tm, \
67 decided, era_cnt, gmtoff)) != NULL)
68
69
70static char const weekday_name[][10] =
71 {
72 "Sunday", "Monday", "Tuesday", "Wednesday",
73 "Thursday", "Friday", "Saturday"
74 };
75static char const ab_weekday_name[][4] =
76 {
77 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
78 };
79static char const month_name[][10] =
80 {
81 "January", "February", "March", "April", "May", "June",
82 "July", "August", "September", "October", "November", "December"
83 };
84static char const ab_month_name[][4] =
85 {
86 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
87 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
88 };
89# define HERE_D_T_FMT "%a %b %e %H:%M:%S %Y"
90# define HERE_D_FMT "%m/%d/%y"
91# define HERE_AM_STR "AM"
92# define HERE_PM_STR "PM"
93# define HERE_T_FMT_AMPM "%I:%M:%S %p"
94# define HERE_T_FMT "%H:%M:%S"
95
96static const unsigned short int __mon_yday[2][13] =
97 {
98 /* Normal years. */
99 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
100 /* Leap years. */
101 { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
102 };
103
104# define ISSPACE(Ch) isspace (Ch)
105
106
107
108
109#ifndef __isleap
110/* Nonzero if YEAR is a leap year (every 4 years,
111 except every 100th isn't, and every 400th is). */
112# define __isleap(year) \
113 ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
114#endif
115
116/* Compute the day of the week. */
117static void
118day_of_the_week (struct tm *tm)
119{
120 /* We know that January 1st 1970 was a Thursday (= 4). Compute the
121 difference between this data in the one on TM and so determine
122 the weekday. */
123 int corr_year = 1900 + tm->tm_year - (tm->tm_mon < 2);
124 int corr_quad = corr_year / 4;
125 int wday = (-473
126 + (365 * (tm->tm_year - 70))
127 + corr_quad
128 - ((corr_quad + (corr_quad < 0)) / 25 - (corr_quad < 0))
129 + ((corr_quad / 25) / 4)
130 + __mon_yday[0][tm->tm_mon]
131 + tm->tm_mday - 1);
132 tm->tm_wday = ((wday % 7) + 7) % 7;
133}
134
135/* Compute the day of the year. */
136static void
137day_of_the_year (struct tm *tm)
138{
139 tm->tm_yday = (__mon_yday[__isleap (1900 + tm->tm_year)][tm->tm_mon]
140 + (tm->tm_mday - 1));
141}
142
143
144static char *
145__strptime_internal (const char *rp, const char *fmt, struct tm *tm,
146 enum ptime_locale_status *decided, int era_cnt,
147 long *gmtoff)
148{
149
150 int cnt;
151 size_t val;
152 int have_I, is_pm;
153 int century, want_century;
154 int want_era;
155 int have_wday, want_xday;
156 int have_yday;
157 int have_mon, have_mday;
158 int have_uweek, have_wweek;
159 int week_no;
160
161 have_I = is_pm = 0;
162 century = -1;
163 want_century = 0;
164 want_era = 0;
165 week_no = 0;
166
167 have_wday = want_xday = have_yday = have_mon = have_mday = have_uweek = 0;
168 have_wweek = 0;
169
170 while (*fmt != '\0')
171 {
172 /* A white space in the format string matches 0 more or white
173 space in the input string. */
174 if (ISSPACE (*fmt))
175 {
176 while (ISSPACE (*rp))
177 ++rp;
178 ++fmt;
179 continue;
180 }
181
182 /* Any character but '%' must be matched by the same character
183 in the input string. */
184 if (*fmt != '%')
185 {
186 match_char (*fmt++, *rp++);
187 continue;
188 }
189
190 ++fmt;
191 /* We need this for handling the 'E' modifier. */
192 start_over:
193
194 switch (*fmt++)
195 {
196 case '%':
197 /* Match the '%' character itself. */
198 match_char ('%', *rp++);
199 break;
200 case 'a':
201 case 'A':
202 /* Match day of week. */
203 for (cnt = 0; cnt < 7; ++cnt)
204 {
205 if (*decided != loc
206 && (match_string (weekday_name[cnt], rp)
207 || match_string (ab_weekday_name[cnt], rp)))
208 {
209 *decided = raw;
210 break;
211 }
212 }
213 if (cnt == 7)
214 /* Does not match a weekday name. */
215 return NULL;
216 tm->tm_wday = cnt;
217 have_wday = 1;
218 break;
219 case 'b':
220 case 'B':
221 case 'h':
222 /* Match month name. */
223 for (cnt = 0; cnt < 12; ++cnt)
224 {
225 if (match_string (month_name[cnt], rp)
226 || match_string (ab_month_name[cnt], rp))
227 {
228 *decided = raw;
229 break;
230 }
231 }
232 if (cnt == 12)
233 /* Does not match a month name. */
234 return NULL;
235 tm->tm_mon = cnt;
236 want_xday = 1;
237 break;
238 case 'c':
239 /* Match locale's date and time format. */
240 if (!recursive (HERE_D_T_FMT))
241 return NULL;
242 want_xday = 1;
243 break;
244 case 'C':
245 /* Match century number. */
246 get_number (0, 99, 2);
247 century = val;
248 want_xday = 1;
249 break;
250 case 'd':
251 case 'e':
252 /* Match day of month. */
253 get_number (1, 31, 2);
254 tm->tm_mday = val;
255 have_mday = 1;
256 want_xday = 1;
257 break;
258 case 'F':
259 if (!recursive ("%Y-%m-%d"))
260 return NULL;
261 want_xday = 1;
262 break;
263 case 'x':
264 /* Fall through. */
265 case 'D':
266 /* Match standard day format. */
267 if (!recursive (HERE_D_FMT))
268 return NULL;
269 want_xday = 1;
270 break;
271 case 'k':
272 case 'H':
273 /* Match hour in 24-hour clock. */
274 get_number (0, 23, 2);
275 tm->tm_hour = val;
276 have_I = 0;
277 break;
278 case 'l':
279 /* Match hour in 12-hour clock. GNU extension. */
280 case 'I':
281 /* Match hour in 12-hour clock. */
282 get_number (1, 12, 2);
283 tm->tm_hour = val % 12;
284 have_I = 1;
285 break;
286 case 'j':
287 /* Match day number of year. */
288 get_number (1, 366, 3);
289 tm->tm_yday = val - 1;
290 have_yday = 1;
291 break;
292 case 'm':
293 /* Match number of month. */
294 get_number (1, 12, 2);
295 tm->tm_mon = val - 1;
296 have_mon = 1;
297 want_xday = 1;
298 break;
299 case 'M':
300 /* Match minute. */
301 get_number (0, 59, 2);
302 tm->tm_min = val;
303 break;
304 case 'n':
305 case 't':
306 /* Match any white space. */
307 while (ISSPACE (*rp))
308 ++rp;
309 break;
310 case 'p':
311 /* Match locale's equivalent of AM/PM. */
312 if (!match_string (HERE_AM_STR, rp))
313 {
314 if (match_string (HERE_PM_STR, rp))
315 is_pm = 1;
316 else
317 return NULL;
318 }
319 break;
320 case 'q':
321 /* Match quarter of year. GNU extension. */
322 get_number (1, 4, 1);
323 tm->tm_mon = (val - 1) * 3;
324 tm->tm_mday = 1;
325 have_mon = 1;
326 have_mday = 1;
327 want_xday = 1;
328 break;
329 case 'r':
330 if (!recursive (HERE_T_FMT_AMPM))
331 return NULL;
332 break;
333 case 'R':
334 if (!recursive ("%H:%M"))
335 return NULL;
336 break;
337 case 's':
338 {
339 /* The number of seconds may be very high so we cannot use
340 the 'get_number' macro. Instead read the number
341 character for character and construct the result while
342 doing this. */
343 time_t secs = 0;
344 if (*rp < '0' || *rp > '9')
345 /* We need at least one digit. */
346 return NULL;
347
348 do
349 {
350 secs *= 10;
351 secs += *rp++ - '0';
352 }
353 while (*rp >= '0' && *rp <= '9');
354
355 if (localtime_r (&secs, tm) == NULL)
356 /* Error in function. */
357 return NULL;
358 }
359 break;
360 case 'S':
361 get_number (0, 61, 2);
362 tm->tm_sec = val;
363 break;
364 case 'X':
365 /* Fall through. */
366 case 'T':
367 if (!recursive (HERE_T_FMT))
368 return NULL;
369 break;
370 case 'u':
371 get_number (1, 7, 1);
372 tm->tm_wday = val % 7;
373 have_wday = 1;
374 break;
375 case 'g':
376 get_number (0, 99, 2);
377 /* XXX This cannot determine any field in TM. */
378 break;
379 case 'G':
380 if (*rp < '0' || *rp > '9')
381 return NULL;
382 /* XXX Ignore the number since we would need some more
383 information to compute a real date. */
384 do
385 ++rp;
386 while (*rp >= '0' && *rp <= '9');
387 break;
388 case 'U':
389 get_number (0, 53, 2);
390 week_no = val;
391 have_uweek = 1;
392 break;
393 case 'W':
394 get_number (0, 53, 2);
395 week_no = val;
396 have_wweek = 1;
397 break;
398 case 'V':
399 get_number (0, 53, 2);
400 /* XXX This cannot determine any field in TM without some
401 information. */
402 break;
403 case 'w':
404 /* Match number of weekday. */
405 get_number (0, 6, 1);
406 tm->tm_wday = val;
407 have_wday = 1;
408 break;
409 case 'y':
410 /* Match year within century. */
411 get_number (0, 99, 2);
412 /* The "Year 2000: The Millennium Rollover" paper suggests that
413 values in the range 69-99 refer to the twentieth century. */
414 tm->tm_year = val >= 69 ? val : val + 100;
415 /* Indicate that we want to use the century, if specified. */
416 want_century = 1;
417 want_xday = 1;
418 break;
419 case 'Y':
420 /* Match year including century number. */
421 get_number (0, 9999, 4);
422 tm->tm_year = val - 1900;
423 want_century = 0;
424 want_xday = 1;
425 break;
426 case 'Z':
427 /* XXX How to handle this? */
428 break;
429 case 'z':
430 /* We recognize two formats: if two digits are given, these
431 specify hours. If fours digits are used, minutes are
432 also specified. And 'Z'.
433
434 Three formats! We recognize three formats... */
435 {
436 bool neg;
437 int n;
438
439 val = 0;
440 while (*rp == ' ')
441 ++rp;
442 if (*rp == 'Z') {
443 ++rp;
444 if (gmtoff)
445 *gmtoff = 0;
446 break;
447 }
448 if (*rp != '+' && *rp != '-')
449 return NULL;
450 neg = *rp++ == '-';
451 n = 0;
452 while (n < 4 && *rp >= '0' && *rp <= '9')
453 {
454 val = val * 10 + *rp++ - '0';
455 ++n;
456 }
457 if (n == 2)
458 val *= 100;
459 else if (n != 4)
460 /* Only two or four digits recognized. */
461 return NULL;
462 else
463 {
464 /* We have to convert the minutes into decimal. */
465 if (val % 100 >= 60)
466 return NULL;
467 val = (val / 100) * 100 + ((val % 100) * 50) / 30;
468 }
469 if (val > 1200)
470 return NULL;
471 if (gmtoff) {
472 *gmtoff = (val * 3600) / 100;
473 if (neg)
474 *gmtoff = -*gmtoff;
475 }
476 }
477 break;
478 case 'E':
479 /* We have no information about the era format. Just use
480 the normal format. */
481 if (strchr("cCyYxX", *fmt) == NULL)
482 /* This is an illegal format. */
483 return NULL;
484
485 goto start_over;
486 case 'O':
487 /* We don't have an alternative number format. Just use
488 the normal format. */
489 if (strchr("deHImMqSUWVwy", *fmt) == NULL)
490 /* This is an illegal format. */
491 return NULL;
492
493 goto start_over;
494 default:
495 return NULL;
496 }
497 }
498
499 if (have_I && is_pm)
500 tm->tm_hour += 12;
501
502 if (century != -1)
503 {
504 if (want_century)
505 tm->tm_year = tm->tm_year % 100 + (century - 19) * 100;
506 else
507 /* Only the century, but not the year. Strange, but so be it. */
508 tm->tm_year = (century - 19) * 100;
509 }
510
511 if (era_cnt != -1)
512 {
513 }
514 else
515 if (want_era)
516 {
517 /* No era found but we have seen an E modifier. Rectify some
518 values. */
519 if (want_century && century == -1 && tm->tm_year < 69)
520 tm->tm_year += 100;
521 }
522
523 if (want_xday && !have_wday)
524 {
525 if ( !(have_mon && have_mday) && have_yday)
526 {
527 /* We don't have tm_mon and/or tm_mday, compute them. */
528 int t_mon = 0;
529 while (__mon_yday[__isleap(1900 + tm->tm_year)][t_mon] <= tm->tm_yday)
530 t_mon++;
531 if (!have_mon)
532 tm->tm_mon = t_mon - 1;
533 if (!have_mday)
534 tm->tm_mday =
535 (tm->tm_yday
536 - __mon_yday[__isleap(1900 + tm->tm_year)][t_mon - 1] + 1);
537 }
538 day_of_the_week (tm);
539 }
540
541 if (want_xday && !have_yday)
542 day_of_the_year (tm);
543
544 if ((have_uweek || have_wweek) && have_wday)
545 {
546 int save_wday = tm->tm_wday;
547 int save_mday = tm->tm_mday;
548 int save_mon = tm->tm_mon;
549 int w_offset = have_uweek ? 0 : 1;
550
551 tm->tm_mday = 1;
552 tm->tm_mon = 0;
553 day_of_the_week (tm);
554 if (have_mday)
555 tm->tm_mday = save_mday;
556 if (have_mon)
557 tm->tm_mon = save_mon;
558
559 if (!have_yday)
560 tm->tm_yday = ((7 - (tm->tm_wday - w_offset)) % 7
561 + (week_no - 1) *7
562 + save_wday - w_offset);
563
564 if (!have_mday || !have_mon)
565 {
566 int t_mon = 0;
567 while (__mon_yday[__isleap(1900 + tm->tm_year)][t_mon]
568 <= tm->tm_yday)
569 t_mon++;
570 if (!have_mon)
571 tm->tm_mon = t_mon - 1;
572 if (!have_mday)
573 tm->tm_mday =
574 (tm->tm_yday
575 - __mon_yday[__isleap(1900 + tm->tm_year)][t_mon - 1] + 1);
576 }
577
578 tm->tm_wday = save_wday;
579 }
580
581 return (char *) rp;
582}
583
584
585char *
586strptime (const char *buf, const char *format, struct tm *tm)
587{
588 enum ptime_locale_status decided;
589
590 decided = raw;
591 return __strptime_internal (buf, format, tm, &decided, -1, NULL);
592}
593
594char *
595mingw_strptime (const char *buf, const char *format, struct tm *tm,
596 long *gmtoff)
597{
598 enum ptime_locale_status decided;
599
600 decided = raw;
601 return __strptime_internal (buf, format, tm, &decided, -1, gmtoff);
602}
603
diff --git a/win32/strverscmp.c b/win32/strverscmp.c
new file mode 100644
index 000000000..05dc60c39
--- /dev/null
+++ b/win32/strverscmp.c
@@ -0,0 +1,62 @@
1/*
2 strverscmp from musl (https://www.musl-libc.org/).
3
4 MIT licensed:
5
6----------------------------------------------------------------------
7Copyright © 2005-2020 Rich Felker, et al.
8
9Permission is hereby granted, free of charge, to any person obtaining
10a copy of this software and associated documentation files (the
11"Software"), to deal in the Software without restriction, including
12without limitation the rights to use, copy, modify, merge, publish,
13distribute, sublicense, and/or sell copies of the Software, and to
14permit persons to whom the Software is furnished to do so, subject to
15the following conditions:
16
17The above copyright notice and this permission notice shall be
18included in all copies or substantial portions of the Software.
19
20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27----------------------------------------------------------------------
28*/
29#include "libbb.h"
30#include <ctype.h>
31#include <string.h>
32
33int strverscmp(const char *l0, const char *r0)
34{
35 const unsigned char *l = (const void *)l0;
36 const unsigned char *r = (const void *)r0;
37 size_t i, dp, j;
38 int z = 1;
39
40 /* Find maximal matching prefix and track its maximal digit
41 * suffix and whether those digits are all zeros. */
42 for (dp=i=0; l[i]==r[i]; i++) {
43 int c = l[i];
44 if (!c) return 0;
45 if (!isdigit(c)) dp=i+1, z=1;
46 else if (c!='0') z=0;
47 }
48
49 if (l[dp]-'1'<9U && r[dp]-'1'<9U) {
50 /* If we're looking at non-degenerate digit sequences starting
51 * with nonzero digits, longest digit string is greater. */
52 for (j=i; isdigit(l[j]); j++)
53 if (!isdigit(r[j])) return 1;
54 if (isdigit(r[j])) return -1;
55 } else if (z && dp<i && (isdigit(l[i]) || isdigit(r[i]))) {
56 /* Otherwise, if common prefix of digit sequence is
57 * all zeros, digits order less than non-digits. */
58 return (unsigned char)(l[i]-'0') - (unsigned char)(r[i]-'0');
59 }
60
61 return l[i] - r[i];
62}
diff --git a/win32/sys/inotify.h b/win32/sys/inotify.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/inotify.h
diff --git a/win32/sys/ioctl.h b/win32/sys/ioctl.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/ioctl.h
diff --git a/win32/sys/mman.h b/win32/sys/mman.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/mman.h
diff --git a/win32/sys/resource.h b/win32/sys/resource.h
new file mode 100644
index 000000000..3220d8112
--- /dev/null
+++ b/win32/sys/resource.h
@@ -0,0 +1,11 @@
1#ifndef _SYS_RESOURCE_H
2#define _SYS_RESOURCE_H 1
3
4#include <time.h>
5
6struct rusage {
7 struct timeval ru_utime;
8 struct timeval ru_stime;
9};
10
11#endif
diff --git a/win32/sys/select.h b/win32/sys/select.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/select.h
diff --git a/win32/sys/socket.h b/win32/sys/socket.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/socket.h
diff --git a/win32/sys/statfs.h b/win32/sys/statfs.h
new file mode 100644
index 000000000..498f41e50
--- /dev/null
+++ b/win32/sys/statfs.h
@@ -0,0 +1,22 @@
1#ifndef _SYS_STATFS_H
2#define _SYS_STATFS_H 1
3
4#include <stdint.h>
5
6struct statfs {
7 int f_type;
8 uint64_t f_bsize;
9 uint64_t f_frsize;
10 uint64_t f_blocks;
11 uint64_t f_bfree;
12 uint64_t f_bavail;
13 uint64_t f_files;
14 uint64_t f_ffree;
15 uint64_t f_fsid;
16 uint64_t f_flag;
17 uint64_t f_namelen;
18};
19
20extern int statfs(const char *file, struct statfs *buf);
21
22#endif
diff --git a/win32/sys/statvfs.h b/win32/sys/statvfs.h
new file mode 100644
index 000000000..ceb9ee353
--- /dev/null
+++ b/win32/sys/statvfs.h
@@ -0,0 +1,3 @@
1#include <sys/statfs.h>
2
3#define statvfs statfs
diff --git a/win32/sys/syscall.h b/win32/sys/syscall.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/syscall.h
diff --git a/win32/sys/sysmacros.h b/win32/sys/sysmacros.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/sysmacros.h
diff --git a/win32/sys/times.h b/win32/sys/times.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/times.h
diff --git a/win32/sys/un.h b/win32/sys/un.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/un.h
diff --git a/win32/sys/utsname.h b/win32/sys/utsname.h
new file mode 100644
index 000000000..6f12efd58
--- /dev/null
+++ b/win32/sys/utsname.h
@@ -0,0 +1,66 @@
1/* Copyright (C) 1991,92,94,96,97,99,2002 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 02111-1307 USA. */
18
19/*
20 * POSIX Standard: 4.4 System Identification <sys/utsname.h>
21 */
22
23#ifndef _SYS_UTSNAME_H
24#define _SYS_UTSNAME_H 1
25
26#define _UTSNAME_LENGTH 65
27
28#ifndef _UTSNAME_SYSNAME_LENGTH
29# define _UTSNAME_SYSNAME_LENGTH _UTSNAME_LENGTH
30#endif
31#ifndef _UTSNAME_NODENAME_LENGTH
32# define _UTSNAME_NODENAME_LENGTH _UTSNAME_LENGTH
33#endif
34#ifndef _UTSNAME_RELEASE_LENGTH
35# define _UTSNAME_RELEASE_LENGTH _UTSNAME_LENGTH
36#endif
37#ifndef _UTSNAME_VERSION_LENGTH
38# define _UTSNAME_VERSION_LENGTH _UTSNAME_LENGTH
39#endif
40#ifndef _UTSNAME_MACHINE_LENGTH
41# define _UTSNAME_MACHINE_LENGTH _UTSNAME_LENGTH
42#endif
43
44/* Structure describing the system and machine. */
45struct utsname
46 {
47 /* Name of the implementation of the operating system. */
48 char sysname[_UTSNAME_SYSNAME_LENGTH];
49
50 /* Name of this node on the network. */
51 char nodename[_UTSNAME_NODENAME_LENGTH];
52
53 /* Current release level of this implementation. */
54 char release[_UTSNAME_RELEASE_LENGTH];
55 /* Current version level of this release. */
56 char version[_UTSNAME_VERSION_LENGTH];
57
58 /* Name of the hardware type the system is running on. */
59 char machine[_UTSNAME_MACHINE_LENGTH];
60 };
61
62/* Put information about the system in NAME. */
63extern int uname (struct utsname *__name);
64
65
66#endif /* sys/utsname.h */
diff --git a/win32/sys/wait.h b/win32/sys/wait.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/win32/sys/wait.h
diff --git a/win32/system.c b/win32/system.c
new file mode 100644
index 000000000..c718d9948
--- /dev/null
+++ b/win32/system.c
@@ -0,0 +1,22 @@
1#include "libbb.h"
2
3int mingw_system(const char *cmd)
4{
5 const char *argv[4] = { "sh", "-c", cmd, NULL };
6 intptr_t proc;
7 HANDLE h;
8 DWORD ret = 0;
9
10 if (cmd == NULL)
11 return 1;
12
13 if ((proc=mingw_spawn_proc(argv)) == -1)
14 return -1;
15
16 h = (HANDLE)proc;
17 WaitForSingleObject(h, INFINITE);
18 GetExitCodeProcess(h, &ret);
19 CloseHandle(h);
20
21 return exit_code_to_wait_status(ret);
22}
diff --git a/win32/termios.c b/win32/termios.c
new file mode 100644
index 000000000..f18ff7c3b
--- /dev/null
+++ b/win32/termios.c
@@ -0,0 +1,128 @@
1#include "libbb.h"
2
3int tcsetattr(int fd, int mode UNUSED_PARAM, const struct termios *t)
4{
5 if (terminal_mode(FALSE) & VT_INPUT) {
6 HANDLE h = (HANDLE)_get_osfhandle(fd);
7 if (!SetConsoleMode(h, t->imode)) {
8 errno = err_win_to_posix();
9 return -1;
10 }
11 }
12
13 return 0;
14}
15
16int tcgetattr(int fd, struct termios *t)
17{
18 if (terminal_mode(FALSE) & VT_INPUT) {
19 HANDLE h = (HANDLE)_get_osfhandle(fd);
20 if (!GetConsoleMode(h, &t->imode)) {
21 errno = err_win_to_posix();
22 return -1;
23 }
24 }
25 t->c_cc[VINTR] = 3; // ctrl-c
26 t->c_cc[VEOF] = 4; // ctrl-d
27
28 return 0;
29}
30
31int64_t FAST_FUNC windows_read_key(int fd, char *buf UNUSED_PARAM, int timeout)
32{
33 HANDLE cin = GetStdHandle(STD_INPUT_HANDLE);
34 INPUT_RECORD record;
35 DWORD nevent_out, mode;
36 int ret = -1;
37 DWORD alt_pressed = FALSE;
38 DWORD state;
39
40 if (fd != 0)
41 bb_error_msg_and_die("read_key only works on stdin");
42 if (cin == INVALID_HANDLE_VALUE)
43 return -1;
44 GetConsoleMode(cin, &mode);
45 SetConsoleMode(cin, 0);
46
47 while (1) {
48 errno = 0;
49 if (timeout > 0) {
50 if (WaitForSingleObject(cin, timeout) != WAIT_OBJECT_0)
51 goto done;
52 }
53 if (!readConsoleInput_utf8(cin, &record, 1, &nevent_out))
54 goto done;
55
56 if (record.EventType != KEY_EVENT)
57 continue;
58
59 state = record.Event.KeyEvent.dwControlKeyState;
60 if (!record.Event.KeyEvent.bKeyDown) {
61 /* ignore all key up events except Alt */
62 if (!(alt_pressed && (state & LEFT_ALT_PRESSED) == 0 &&
63 record.Event.KeyEvent.wVirtualKeyCode == VK_MENU))
64 continue;
65 }
66 alt_pressed = state & LEFT_ALT_PRESSED;
67
68 if (!record.Event.KeyEvent.uChar.AsciiChar) {
69 if (alt_pressed && !(state & ENHANCED_KEY)) {
70 /* keys on numeric pad used to enter character codes */
71 switch (record.Event.KeyEvent.wVirtualKeyCode) {
72 case VK_NUMPAD0: case VK_INSERT:
73 case VK_NUMPAD1: case VK_END:
74 case VK_NUMPAD2: case VK_DOWN:
75 case VK_NUMPAD3: case VK_NEXT:
76 case VK_NUMPAD4: case VK_LEFT:
77 case VK_NUMPAD5: case VK_CLEAR:
78 case VK_NUMPAD6: case VK_RIGHT:
79 case VK_NUMPAD7: case VK_HOME:
80 case VK_NUMPAD8: case VK_UP:
81 case VK_NUMPAD9: case VK_PRIOR:
82 continue;
83 }
84 }
85
86 switch (record.Event.KeyEvent.wVirtualKeyCode) {
87 case VK_DELETE: ret = KEYCODE_DELETE; break;
88 case VK_INSERT: ret = KEYCODE_INSERT; break;
89 case VK_UP: ret = KEYCODE_UP; break;
90 case VK_DOWN: ret = KEYCODE_DOWN; break;
91 case VK_RIGHT: ret = KEYCODE_RIGHT; break;
92 case VK_LEFT: ret = KEYCODE_LEFT; break;
93 case VK_HOME: ret = KEYCODE_HOME; break;
94 case VK_END: ret = KEYCODE_END; break;
95 case VK_PRIOR: ret = KEYCODE_PAGEUP; break;
96 case VK_NEXT: ret = KEYCODE_PAGEDOWN; break;
97 default:
98 alt_pressed = FALSE;
99 continue;
100 }
101
102 if (state & (RIGHT_ALT_PRESSED|LEFT_ALT_PRESSED))
103 ret &= ~0x20;
104 if (state & (RIGHT_CTRL_PRESSED|LEFT_CTRL_PRESSED))
105 ret &= ~0x40;
106 if (state & SHIFT_PRESSED)
107 ret &= ~0x80;
108 goto done;
109 }
110 if ( (record.Event.KeyEvent.uChar.AsciiChar & 0x80) == 0x80 ) {
111 char *s = &record.Event.KeyEvent.uChar.AsciiChar;
112 conToCharBuffA(s, 1);
113 }
114 ret = record.Event.KeyEvent.uChar.AsciiChar;
115 if (state & (RIGHT_ALT_PRESSED|LEFT_ALT_PRESSED)) {
116 switch (ret) {
117 case '\b': ret = KEYCODE_ALT_BACKSPACE; goto done;
118 case 'b': ret = KEYCODE_ALT_LEFT; goto done;
119 case 'd': ret = KEYCODE_ALT_D; goto done;
120 case 'f': ret = KEYCODE_ALT_RIGHT; goto done;
121 }
122 }
123 break;
124 }
125 done:
126 SetConsoleMode(cin, mode);
127 return ret;
128}
diff --git a/win32/termios.h b/win32/termios.h
new file mode 100644
index 000000000..8408aa3e3
--- /dev/null
+++ b/win32/termios.h
@@ -0,0 +1,31 @@
1#ifndef TERMIOS_H
2#define TERMIOS_H
3
4#define VINTR 0
5#define VEOF 1
6
7#define TCIFLUSH 0
8#define TCSAFLUSH 1
9#define TCSANOW 2
10#define TCSADRAIN 3
11#define TCSADFLUSH 4
12
13typedef unsigned char cc_t;
14typedef unsigned int speed_t;
15
16#define NCCS 2
17struct termios {
18 cc_t c_cc[NCCS];
19 unsigned long imode;
20 unsigned long omode;
21};
22
23struct winsize {
24 unsigned short ws_row, ws_col;
25 unsigned short ws_xpixel, ws_ypixel;
26};
27
28int tcgetattr(int fd, struct termios *t);
29int tcsetattr(int fd, int mode, const struct termios *t);
30
31#endif /* TERMIOS_H */
diff --git a/win32/timegm.c b/win32/timegm.c
new file mode 100644
index 000000000..ac39a26f5
--- /dev/null
+++ b/win32/timegm.c
@@ -0,0 +1,133 @@
1/*
2 timegm from musl (https://www.musl-libc.org/).
3
4 MIT licensed:
5
6----------------------------------------------------------------------
7Copyright © 2005-2020 Rich Felker, et al.
8
9Permission is hereby granted, free of charge, to any person obtaining
10a copy of this software and associated documentation files (the
11"Software"), to deal in the Software without restriction, including
12without limitation the rights to use, copy, modify, merge, publish,
13distribute, sublicense, and/or sell copies of the Software, and to
14permit persons to whom the Software is furnished to do so, subject to
15the following conditions:
16
17The above copyright notice and this permission notice shall be
18included in all copies or substantial portions of the Software.
19
20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27----------------------------------------------------------------------
28*/
29#include "libbb.h"
30
31static long long __year_to_secs(long long year, int *is_leap)
32{
33 int cycles, centuries, leaps, rem;
34
35 if (year-2ULL <= 136) {
36 int y = year;
37 leaps = (y-68)>>2;
38 if (!((y-68)&3)) {
39 leaps--;
40 if (is_leap) *is_leap = 1;
41 } else if (is_leap) *is_leap = 0;
42 return 31536000*(y-70) + 86400*leaps;
43 }
44
45 cycles = (year-100) / 400;
46 rem = (year-100) % 400;
47 if (rem < 0) {
48 cycles--;
49 rem += 400;
50 }
51 if (!rem) {
52 *is_leap = 1;
53 centuries = 0;
54 leaps = 0;
55 } else {
56 if (rem >= 200) {
57 if (rem >= 300) centuries = 3, rem -= 300;
58 else centuries = 2, rem -= 200;
59 } else {
60 if (rem >= 100) centuries = 1, rem -= 100;
61 else centuries = 0;
62 }
63 if (!rem) {
64 *is_leap = 0;
65 leaps = 0;
66 } else {
67 leaps = rem / 4U;
68 rem %= 4U;
69 *is_leap = !rem;
70 }
71 }
72
73 leaps += 97*cycles + 24*centuries - *is_leap;
74
75 return (year-100) * 31536000LL + leaps * 86400LL + 946684800 + 86400;
76}
77
78static int __month_to_secs(int month, int is_leap)
79{
80 static const int secs_through_month[] = {
81 0, 31*86400, 59*86400, 90*86400,
82 120*86400, 151*86400, 181*86400, 212*86400,
83 243*86400, 273*86400, 304*86400, 334*86400 };
84 int t = secs_through_month[month];
85 if (is_leap && month >= 2) t+=86400;
86 return t;
87}
88
89static long long __tm_to_secs(const struct tm *tm)
90{
91 int is_leap;
92 long long t;
93 long long year = tm->tm_year;
94 int month = tm->tm_mon;
95 if (month >= 12 || month < 0) {
96 int adj = month / 12;
97 month %= 12;
98 if (month < 0) {
99 adj--;
100 month += 12;
101 }
102 year += adj;
103 }
104 t = __year_to_secs(year, &is_leap);
105 t += __month_to_secs(month, is_leap);
106 t += 86400LL * (tm->tm_mday-1);
107 t += 3600LL * tm->tm_hour;
108 t += 60LL * tm->tm_min;
109 t += tm->tm_sec;
110 return t;
111}
112
113/*
114 * Restricted version of timegm:
115 *
116 * it doesn't normalise its argument
117 * its return value is limited to the range Microsoft supports
118 */
119time_t timegm(struct tm *tm)
120{
121 long long t = __tm_to_secs(tm);
122 if (t < 0 ||
123#ifdef _USE_32BIT_TIME_T
124 t > INT_MAX /* 2038-01-19 03:14:07Z */
125#else
126 t > 32535215999 /* 3000-12-31 23:59:59Z */
127#endif
128 ) {
129 errno = EOVERFLOW;
130 return -1;
131 }
132 return t;
133}
diff --git a/win32/uname.c b/win32/uname.c
new file mode 100644
index 000000000..9474e5c04
--- /dev/null
+++ b/win32/uname.c
@@ -0,0 +1,47 @@
1#include "libbb.h"
2/* After libbb.h, since it needs sys/types.h on some systems */
3#include <sys/utsname.h>
4
5int uname(struct utsname *name)
6{
7 const char *unk = "unknown";
8 OSVERSIONINFO os_info;
9 SYSTEM_INFO sys_info;
10
11 strcpy(name->sysname, "Windows_NT");
12
13 if ( gethostname(name->nodename, sizeof(name->nodename)) != 0 ) {
14 strcpy(name->nodename, unk);
15 }
16
17 memset(&os_info, 0, sizeof(OSVERSIONINFO));
18 os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
19
20 GetVersionEx(&os_info);
21 sprintf(name->release, "%u.%u", (unsigned int)os_info.dwMajorVersion,
22 (unsigned int)os_info.dwMinorVersion);
23 sprintf(name->version, "%u", (unsigned int)os_info.dwBuildNumber);
24
25 GetSystemInfo(&sys_info);
26 switch (sys_info.wProcessorArchitecture) {
27 case PROCESSOR_ARCHITECTURE_AMD64:
28 strcpy(name->machine, "x86_64");
29 break;
30 case PROCESSOR_ARCHITECTURE_INTEL:
31 strcpy(name->machine, "i686");
32 if (sys_info.wProcessorLevel < 6) {
33 name->machine[1] = '3';
34 }
35 break;
36#if defined(PROCESSOR_ARCHITECTURE_ARM64)
37 case PROCESSOR_ARCHITECTURE_ARM64:
38 strcpy(name->machine, "aarch64");
39 break;
40#endif
41 default:
42 strcpy(name->machine, unk);
43 break;
44 }
45
46 return 0;
47}
diff --git a/win32/winansi.c b/win32/winansi.c
new file mode 100644
index 000000000..c7529c453
--- /dev/null
+++ b/win32/winansi.c
@@ -0,0 +1,1608 @@
1/*
2 * Copyright 2008 Peter Harris <git@peter.is-a-geek.org>
3 */
4
5#include "libbb.h"
6#include <windows.h>
7#include "lazyload.h"
8#undef PACKED
9
10static BOOL charToConBuffA(LPSTR s, DWORD len);
11static BOOL charToConA(LPSTR s);
12
13static int conv_fwriteCon(FILE *stream, char *buf, size_t siz);
14static int conv_writeCon(int fd, char *buf, size_t siz);
15
16/*
17 Functions to be wrapped:
18*/
19#undef vfprintf
20#undef vprintf
21#undef printf
22#undef fprintf
23#undef fputs
24#undef fputc
25#undef putchar
26#undef fwrite
27#undef puts
28#undef write
29#undef read
30#undef fread
31#undef getc
32#undef fgets
33
34#define FOREGROUND_ALL (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE)
35#define BACKGROUND_ALL (BACKGROUND_RED | BACKGROUND_GREEN | BACKGROUND_BLUE)
36
37static WORD plain_attr = 0xffff;
38static WORD current_attr;
39
40static HANDLE get_console(void)
41{
42 return GetStdHandle(STD_OUTPUT_HANDLE);
43}
44
45static WORD get_console_attr(void)
46{
47 CONSOLE_SCREEN_BUFFER_INFO sbi;
48
49 if (GetConsoleScreenBufferInfo(get_console(), &sbi))
50 return sbi.wAttributes;
51
52 return FOREGROUND_ALL;
53}
54
55static int is_console(int fd)
56{
57 if (plain_attr == 0xffff)
58 current_attr = plain_attr = get_console_attr();
59 return isatty(fd) && get_console() != INVALID_HANDLE_VALUE;
60}
61
62static ALWAYS_INLINE int is_console_in(int fd)
63{
64 return isatty(fd) && GetStdHandle(STD_INPUT_HANDLE) != INVALID_HANDLE_VALUE;
65}
66
67static int is_wine(void)
68{
69 DECLARE_PROC_ADDR(const char *, wine_get_version, void);
70
71 return INIT_PROC_ADDR(ntdll.dll, wine_get_version) != NULL;
72}
73
74#ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING
75#define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x0004
76#endif
77
78#ifndef DISABLE_NEWLINE_AUTO_RETURN
79#define DISABLE_NEWLINE_AUTO_RETURN 0x0008
80#endif
81
82#ifndef ENABLE_VIRTUAL_TERMINAL_INPUT
83#define ENABLE_VIRTUAL_TERMINAL_INPUT 0x0200
84#endif
85
86int FAST_FUNC terminal_mode(int reset)
87{
88 static int mode = -1;
89
90#if ENABLE_FEATURE_EURO
91 if (mode < 0) {
92 if (GetConsoleCP() == 850 && GetConsoleOutputCP() == 850) {
93 SetConsoleCP(858);
94 SetConsoleOutputCP(858);
95 }
96 }
97#endif
98
99 if (mode < 0 || reset) {
100 HANDLE h;
101 DWORD oldmode, newmode;
102 const char *term = getenv(BB_TERMINAL_MODE);
103 const char *skip = getenv(BB_SKIP_ANSI_EMULATION);
104
105 if (term) {
106 mode = atoi(term);
107 } else if (skip) {
108 mode = atoi(skip);
109 if (mode == 2)
110 mode = 5;
111 else if (mode != 1)
112 mode = 0;
113 } else {
114 mode = (getenv("CONEMUPID") != NULL || is_wine()) ? 0 :
115 CONFIG_TERMINAL_MODE;
116 }
117
118 if (mode < 0 || mode > 5)
119 mode = CONFIG_TERMINAL_MODE;
120
121 if (is_console(STDOUT_FILENO)) {
122 h = get_console();
123 if (GetConsoleMode(h, &oldmode)) {
124 // Try to recover from mode 0 induced by SSH.
125 newmode = oldmode == 0 ? 3 : oldmode;
126 // Turn off DISABLE_NEWLINE_AUTO_RETURN induced by Gradle?
127 newmode &= ~DISABLE_NEWLINE_AUTO_RETURN;
128
129 if ((mode & VT_OUTPUT)) {
130 newmode |= ENABLE_VIRTUAL_TERMINAL_PROCESSING;
131 } else if (mode < 4) {
132 newmode &= ~ENABLE_VIRTUAL_TERMINAL_PROCESSING;
133 } else if ((oldmode & ENABLE_VIRTUAL_TERMINAL_PROCESSING)) {
134 mode |= VT_OUTPUT;
135 }
136
137 if (newmode != oldmode) {
138 if (!SetConsoleMode(h, newmode)) {
139 if (mode >= 4)
140 mode &= ~VT_OUTPUT;
141 newmode &= ~ENABLE_VIRTUAL_TERMINAL_PROCESSING;
142 SetConsoleMode(h, newmode);
143 }
144 }
145 }
146 }
147
148 if (is_console_in(STDIN_FILENO)) {
149 h = GetStdHandle(STD_INPUT_HANDLE);
150 if (GetConsoleMode(h, &oldmode)) {
151 // Try to recover from mode 0 induced by SSH.
152 newmode = oldmode == 0 ? 0x1f7 : oldmode;
153
154 if (mode < 4) {
155 if ((mode & VT_INPUT))
156 newmode |= ENABLE_VIRTUAL_TERMINAL_INPUT;
157 else
158 newmode &= ~ENABLE_VIRTUAL_TERMINAL_INPUT;
159 } else if ((oldmode & ENABLE_VIRTUAL_TERMINAL_INPUT)) {
160 mode |= VT_INPUT;
161 }
162
163 if (newmode != oldmode) {
164 if (!SetConsoleMode(h, newmode)) {
165 if (mode >= 4)
166 mode &= ~VT_INPUT;
167 // Failure to set the new mode seems to leave
168 // the flag set. Forcibly unset it.
169 newmode &= ~ENABLE_VIRTUAL_TERMINAL_INPUT;
170 SetConsoleMode(h, newmode);
171 }
172 }
173 }
174 }
175 }
176
177 return mode;
178}
179
180void set_title(const char *str)
181{
182 SetConsoleTitle(str);
183}
184
185int get_title(char *buf, int len)
186{
187 return GetConsoleTitle(buf, len);
188}
189
190static HANDLE dup_handle(HANDLE h)
191{
192 HANDLE h2;
193
194 if (!DuplicateHandle(GetCurrentProcess(), h, GetCurrentProcess(),
195 &h2, 0, TRUE, DUPLICATE_SAME_ACCESS))
196 return INVALID_HANDLE_VALUE;
197 return h2;
198}
199
200static void use_alt_buffer(int flag)
201{
202 static HANDLE console_orig = INVALID_HANDLE_VALUE;
203 HANDLE console, h;
204
205 if (flag) {
206 SECURITY_ATTRIBUTES sa;
207 CONSOLE_SCREEN_BUFFER_INFO sbi;
208
209 if (console_orig != INVALID_HANDLE_VALUE)
210 return;
211
212 console = get_console();
213 console_orig = dup_handle(console);
214
215 // handle should be inheritable
216 memset(&sa, 0, sizeof(sa));
217 sa.nLength = sizeof(sa);
218 /* sa.lpSecurityDescriptor = NULL; - memset did it */
219 sa.bInheritHandle = TRUE;
220
221 // create new alternate buffer
222 h = CreateConsoleScreenBuffer(GENERIC_READ|GENERIC_WRITE,
223 FILE_SHARE_READ|FILE_SHARE_WRITE, &sa,
224 CONSOLE_TEXTMODE_BUFFER, NULL);
225 if (h == INVALID_HANDLE_VALUE)
226 return;
227
228 if (GetConsoleScreenBufferInfo(console, &sbi))
229 SetConsoleScreenBufferSize(h, sbi.dwSize);
230 }
231 else {
232 if (console_orig == INVALID_HANDLE_VALUE)
233 return;
234
235 // revert to original buffer
236 h = dup_handle(console_orig);
237 console_orig = INVALID_HANDLE_VALUE;
238 if (h == INVALID_HANDLE_VALUE)
239 return;
240 }
241
242 console = h;
243 SetConsoleActiveScreenBuffer(console);
244 close(STDOUT_FILENO);
245 _open_osfhandle((intptr_t)console, O_RDWR|O_BINARY);
246}
247
248static void clear_buffer(DWORD len, COORD pos)
249{
250 HANDLE console = get_console();
251 DWORD dummy;
252
253 FillConsoleOutputCharacterA(console, ' ', len, pos, &dummy);
254 FillConsoleOutputAttribute(console, plain_attr, len, pos, &dummy);
255}
256
257static void erase_in_line(void)
258{
259 HANDLE console = get_console();
260 CONSOLE_SCREEN_BUFFER_INFO sbi;
261
262 if (!GetConsoleScreenBufferInfo(console, &sbi))
263 return;
264 clear_buffer(sbi.dwSize.X - sbi.dwCursorPosition.X, sbi.dwCursorPosition);
265}
266
267static void erase_till_end_of_screen(void)
268{
269 HANDLE console = get_console();
270 CONSOLE_SCREEN_BUFFER_INFO sbi;
271 DWORD len;
272
273 if(!GetConsoleScreenBufferInfo(console, &sbi))
274 return;
275 len = sbi.dwSize.X - sbi.dwCursorPosition.X +
276 sbi.dwSize.X * (sbi.srWindow.Bottom - sbi.dwCursorPosition.Y);
277 clear_buffer(len, sbi.dwCursorPosition);
278}
279
280void reset_screen(void)
281{
282 HANDLE console = get_console();
283 CONSOLE_SCREEN_BUFFER_INFO sbi;
284 COORD pos = { 0, 0 };
285
286 /* move to start of screen buffer and clear it all */
287 if (!GetConsoleScreenBufferInfo(console, &sbi))
288 return;
289 SetConsoleCursorPosition(console, pos);
290 clear_buffer(sbi.dwSize.X * sbi.dwSize.Y, pos);
291}
292
293void move_cursor_row(int n)
294{
295 HANDLE console = get_console();
296 CONSOLE_SCREEN_BUFFER_INFO sbi;
297
298 if(!GetConsoleScreenBufferInfo(console, &sbi))
299 return;
300 sbi.dwCursorPosition.Y += n;
301 SetConsoleCursorPosition(console, sbi.dwCursorPosition);
302}
303
304static void move_cursor_column(int n)
305{
306 HANDLE console = get_console();
307 CONSOLE_SCREEN_BUFFER_INFO sbi;
308
309 if (!GetConsoleScreenBufferInfo(console, &sbi))
310 return;
311 sbi.dwCursorPosition.X += n;
312 SetConsoleCursorPosition(console, sbi.dwCursorPosition);
313}
314
315static void move_cursor(int x, int y)
316{
317 HANDLE console = get_console();
318 COORD pos;
319 CONSOLE_SCREEN_BUFFER_INFO sbi;
320
321 if (!GetConsoleScreenBufferInfo(console, &sbi))
322 return;
323 pos.X = sbi.srWindow.Left + x;
324 pos.Y = sbi.srWindow.Top + y;
325 SetConsoleCursorPosition(console, pos);
326}
327
328static const unsigned char colour_1bit[16] = {
329 /* Black */ 0,
330 /* Red */ FOREGROUND_RED,
331 /* Green */ FOREGROUND_GREEN,
332 /* Yellow */ FOREGROUND_RED | FOREGROUND_GREEN,
333 /* Blue */ FOREGROUND_BLUE,
334 /* Magenta */ FOREGROUND_RED | FOREGROUND_BLUE,
335 /* Cyan */ FOREGROUND_GREEN | FOREGROUND_BLUE,
336 /* White */ FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE,
337 /* ... and again but brighter */
338 FOREGROUND_INTENSITY,
339 FOREGROUND_RED | FOREGROUND_INTENSITY,
340 FOREGROUND_GREEN | FOREGROUND_INTENSITY,
341 FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_INTENSITY,
342 FOREGROUND_BLUE | FOREGROUND_INTENSITY,
343 FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_INTENSITY,
344 FOREGROUND_GREEN | FOREGROUND_BLUE | FOREGROUND_INTENSITY,
345 FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE | FOREGROUND_INTENSITY
346};
347
348#if !ENABLE_FEATURE_IMPROVED_COLOUR_MAPPING
349static WORD rgb_to_console(int *rgb)
350{
351 int dark = 0, bright;
352 WORD attr = 0;
353
354 if (rgb[0] > 85)
355 attr |= FOREGROUND_RED;
356 else
357 ++dark;
358
359 if (rgb[1] > 85)
360 attr |= FOREGROUND_GREEN;
361 else
362 ++dark;
363
364 if (rgb[2] > 85)
365 attr |= FOREGROUND_BLUE;
366 else
367 ++dark;
368
369 /* increase intensity if all components are either bright or
370 * dark and at least one is bright */
371 bright = (rgb[0] > 171) + (rgb[1] > 171) + (rgb[2] > 171);
372 if (bright + dark == 3 && dark != 3) {
373 attr |= FOREGROUND_INTENSITY;
374 }
375
376 return attr;
377}
378#else
379#include <math.h>
380
381/* Standard console colours in LAB colour space */
382static float colour_lab[16][3] = {
383 {-0.000000, 0.000000, 0.000000},
384 {25.530788, 48.055233, 38.059635},
385 {46.228817, -51.699638, 49.897949},
386 {51.868336, -12.930751, 56.677288},
387 {12.975313, 47.507763, -64.704285},
388 {29.782101, 58.939846, -36.497940},
389 {48.256081, -28.841570, -8.481050},
390 {77.704361, 0.004262, -0.008416},
391 {53.585018, 0.003129, -0.006235},
392 {53.232883, 80.109299, 67.220078},
393 {87.737038, -86.184654, 83.181168},
394 {97.138245, -21.555901, 94.482483},
395 {32.302586, 79.196678, -107.863686},
396 {60.319931, 98.254234, -60.842991},
397 {91.116524, -48.079609, -14.138126},
398 {100.000000, 0.005245, -0.010419},
399};
400
401/* Convert RGB to XYZ and XYZ to LAB. See:
402 * http://www.easyrgb.com/en/math.php#text1 */
403static void rgb2lab(const int *rgb, float *lab)
404{
405 float var_RGB[3], var_XYZ[3];
406 int i;
407
408 for (i = 0; i < 3; ++i) {
409 var_RGB[i] = rgb[i]/255.0f;
410 if (var_RGB[i] > 0.04045f)
411 var_RGB[i] = pow((var_RGB[i] + 0.055f) / 1.055f, 2.4f);
412 else
413 var_RGB[i] /= 12.92f;
414 }
415
416 /* use equal energy reference values */
417 var_XYZ[0] = var_RGB[0]*0.4124f + var_RGB[1]*0.3576f + var_RGB[2]*0.1805f;
418 var_XYZ[1] = var_RGB[0]*0.2126f + var_RGB[1]*0.7152f + var_RGB[2]*0.0722f;
419 var_XYZ[2] = var_RGB[0]*0.0193f + var_RGB[1]*0.1192f + var_RGB[2]*0.9505f;
420
421 for (i = 0; i < 3; ++i) {
422 if (var_XYZ[i] > 0.008856f)
423 var_XYZ[i] = pow(var_XYZ[i], 1.0f / 3.0f);
424 else
425 var_XYZ[i] = 7.787f * var_XYZ[i] + 16.0f / 116.0f;
426 }
427
428 lab[0] = 116.0f * var_XYZ[1] - 16.0f;
429 lab[1] = 500.0f * (var_XYZ[0] - var_XYZ[1]);
430 lab[2] = 200.0f * (var_XYZ[1] - var_XYZ[2]);
431}
432
433static WORD rgb_to_console(int *rgb)
434{
435 int i, imin = 0;
436 float deltamin = 1.0e20;
437
438 /* Use 1976 CIE deltaE to find closest console colour. See:
439 * https://zschuessler.github.io/DeltaE/learn */
440 for (i = 0; i < 16; ++i) {
441 float lab[3], dl, da, db, delta;
442
443 rgb2lab(rgb, lab);
444 dl = colour_lab[i][0] - lab[0];
445 da = colour_lab[i][1] - lab[1];
446 db = colour_lab[i][2] - lab[2];
447 delta = dl * dl + da * da + db *db;
448 if (delta < deltamin) {
449 imin = i;
450 deltamin = delta;
451 }
452 }
453 return colour_1bit[imin];
454}
455#endif
456
457/* 24-bit colour */
458static char *process_24bit(char *str, WORD *attr)
459{
460 int count;
461 int rgb[3];
462
463 for (count = 0; count < 3; ++count) {
464 rgb[count] = strtol(str, (char **)&str, 10);
465 if (*str == ';')
466 ++str;
467 }
468
469 *attr = rgb_to_console(rgb);
470
471 return *(str - 1) == ';' ? str - 1 : str;
472}
473
474/* 8-bit colour */
475static char *process_8bit(char *str, WORD *attr)
476{
477 int val = strtol(str, &str, 10);
478
479 if (val < 16) {
480 *attr = colour_1bit[val];
481 }
482 else if (val < 232) {
483 int i, rgb[3];
484
485 val -= 16;
486 for (i = 2; i >= 0; --i) {
487 rgb[i] = (val % 6) * 42 + 21;
488 val /= 6;
489 }
490
491 *attr = rgb_to_console(rgb);
492 }
493 else if (val < 238) {
494 /* black */
495 *attr = 0;
496 }
497 else if (val < 244) {
498 /* bright black */
499 *attr = FOREGROUND_INTENSITY;
500 }
501 else if (val < 250) {
502 /* white */
503 *attr = FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE;
504 }
505 else if (val < 256) {
506 /* bright white */
507 *attr = FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE |
508 FOREGROUND_INTENSITY;
509 }
510
511 return str;
512}
513
514static char *process_colour(char *str, WORD *attr)
515{
516 long val = strtol(str, (char **)&str, 10);
517
518 *attr = 0xffff; /* error return */
519 switch (val) {
520 case 2:
521 str = process_24bit(str + 1, attr);
522 break;
523 case 5:
524 str = process_8bit(str + 1, attr);
525 break;
526 default:
527 break;
528 }
529
530 return str;
531}
532
533/* On input pos points to the start of a suspected escape sequence.
534 * If a valid sequence is found return a pointer to the character
535 * following it, otherwise return the original pointer. */
536static char *process_escape(char *pos)
537{
538 char *str, *func;
539 char *bel;
540 size_t len;
541 WORD t, attr = current_attr;
542 static int reverse = 0;
543
544 switch (pos[1]) {
545 case '[':
546 /* go ahead and process "\033[" sequence */
547 break;
548 case ']':
549 if ((pos[2] == '0' || pos[2] == '2') && pos[3] == ';' &&
550 (bel=strchr(pos+4, '\007')) && bel - pos < 260) {
551 /* set console title */
552 *bel++ = '\0';
553 charToConA(pos+4);
554 SetConsoleTitle(pos+4);
555 return bel;
556 }
557 /* invalid "\033]" sequence, fall through */
558 default:
559 return pos;
560 }
561
562 str = pos + 2;
563 len = strspn(str, "0123456789;");
564 func = str + len;
565 switch (*func) {
566 case 'm':
567 do {
568 long val = strtol(str, (char **)&str, 10);
569 switch (val) {
570 case 0: /* reset */
571 attr = plain_attr;
572 reverse = 0;
573 break;
574 case 1: /* bold */
575 attr |= FOREGROUND_INTENSITY;
576 break;
577 case 2: /* faint */
578 case 22: /* normal */
579 attr &= ~FOREGROUND_INTENSITY;
580 break;
581 case 3: /* italic */
582 /* Unsupported */
583 break;
584 case 4: /* underline */
585 case 21: /* double underline */
586 /* Wikipedia says this flag does nothing */
587 /* Furthermore, mingw doesn't define this flag
588 attr |= COMMON_LVB_UNDERSCORE; */
589 break;
590 case 24: /* no underline */
591 /* attr &= ~COMMON_LVB_UNDERSCORE; */
592 break;
593 case 5: /* slow blink */
594 case 6: /* fast blink */
595 /* We don't have blink, but we do have
596 background intensity */
597 attr |= BACKGROUND_INTENSITY;
598 break;
599 case 25: /* no blink */
600 attr &= ~BACKGROUND_INTENSITY;
601 break;
602 case 7: /* reverse video on */
603 reverse = 1;
604 break;
605 case 27: /* reverse video off */
606 reverse = 0;
607 break;
608 case 8: /* conceal */
609 case 9: /* strike through */
610 case 28: /* reveal */
611 /* Unsupported */
612 break;
613
614 /* Foreground colours */
615 case 30: /* Black */
616 case 31: /* Red */
617 case 32: /* Green */
618 case 33: /* Yellow */
619 case 34: /* Blue */
620 case 35: /* Magenta */
621 case 36: /* Cyan */
622 case 37: /* White */
623 attr &= ~FOREGROUND_ALL;
624 attr |= colour_1bit[val - 30];
625 break;
626 case 38: /* 8/24 bit */
627 str = process_colour(str + 1, &t);
628 if (t != 0xffff) {
629 attr &= ~(FOREGROUND_ALL|FOREGROUND_INTENSITY);
630 attr |= t;
631 }
632 break;
633 case 39: /* reset */
634 attr &= ~FOREGROUND_ALL;
635 attr |= (plain_attr & FOREGROUND_ALL);
636 break;
637
638 /* Background colours */
639 case 40: /* Black */
640 case 41: /* Red */
641 case 42: /* Green */
642 case 43: /* Yellow */
643 case 44: /* Blue */
644 case 45: /* Magenta */
645 case 46: /* Cyan */
646 case 47: /* White */
647 attr &= ~BACKGROUND_ALL;
648 attr |= colour_1bit[val - 40] << 4;
649 break;
650 case 48: /* 8/24 bit */
651 str = process_colour(str + 1, &t);
652 if (t != 0xffff) {
653 attr &= ~(BACKGROUND_ALL|BACKGROUND_INTENSITY);
654 attr |= t << 4;
655 }
656 break;
657 case 49: /* reset */
658 attr &= ~BACKGROUND_ALL;
659 attr |= (plain_attr & BACKGROUND_ALL);
660 break;
661
662 default:
663 /* Unsupported code */
664 return pos;
665 }
666 str++;
667 } while (str < func);
668
669 current_attr = attr;
670 if (reverse)
671 attr = ((attr >> 4) & 0xf) | ((attr << 4) & 0xf0);
672 SetConsoleTextAttribute(get_console(), attr);
673 break;
674 case 'A': /* up */
675 move_cursor_row(-strtol(str, (char **)&str, 10));
676 break;
677 case 'B': /* down */
678 move_cursor_row(strtol(str, (char **)&str, 10));
679 break;
680 case 'C': /* forward */
681 move_cursor_column(strtol(str, (char **)&str, 10));
682 break;
683 case 'D': /* back */
684 move_cursor_column(-strtol(str, (char **)&str, 10));
685 break;
686 case 'H':
687 if (!len)
688 move_cursor(0, 0);
689 else {
690 int row, col = 1;
691
692 row = strtol(str, (char **)&str, 10);
693 if (*str == ';') {
694 col = strtol(str+1, (char **)&str, 10);
695 }
696 move_cursor(col > 0 ? col-1 : 0, row > 0 ? row-1 : 0);
697 }
698 break;
699 case 'J':
700 erase_till_end_of_screen();
701 break;
702 case 'K':
703 erase_in_line();
704 break;
705 case '?':
706 if (strncmp(str+1, "1049", 4) == 0 &&
707 (str[5] == 'h' || str[5] == 'l') ) {
708 use_alt_buffer(str[5] == 'h');
709 func = str + 5;
710 break;
711 }
712 /* fall through */
713 default:
714 /* Unsupported code */
715 return pos;
716 }
717
718 return (char *)func + 1;
719}
720
721static BOOL charToConBuffA(LPSTR s, DWORD len)
722{
723 UINT acp = GetACP(), conocp = GetConsoleOutputCP();
724 CPINFO acp_info, con_info;
725 WCHAR *buf;
726
727 if (acp == conocp)
728 return TRUE;
729
730 if (!s || !GetCPInfo(acp, &acp_info) || !GetCPInfo(conocp, &con_info) ||
731 con_info.MaxCharSize > acp_info.MaxCharSize ||
732 (len == 1 && acp_info.MaxCharSize != 1))
733 return FALSE;
734
735 terminal_mode(FALSE);
736 buf = xmalloc(len*sizeof(WCHAR));
737 MultiByteToWideChar(CP_ACP, 0, s, len, buf, len);
738 WideCharToMultiByte(conocp, 0, buf, len, s, len, NULL, NULL);
739 free(buf);
740 return TRUE;
741}
742
743static BOOL charToConA(LPSTR s)
744{
745 if (!s)
746 return FALSE;
747 return charToConBuffA(s, strlen(s)+1);
748}
749
750BOOL conToCharBuffA(LPSTR s, DWORD len)
751{
752 UINT acp = GetACP(), conicp = GetConsoleCP();
753 CPINFO acp_info, con_info;
754 WCHAR *buf;
755
756 if (acp == conicp
757#if ENABLE_FEATURE_UTF8_INPUT
758 // if acp is UTF8 then we got UTF8 via readConsoleInput_utf8
759 || acp == CP_UTF8
760#endif
761 )
762 return TRUE;
763
764 if (!s || !GetCPInfo(acp, &acp_info) || !GetCPInfo(conicp, &con_info) ||
765 acp_info.MaxCharSize > con_info.MaxCharSize ||
766 (len == 1 && con_info.MaxCharSize != 1))
767 return FALSE;
768
769 terminal_mode(FALSE);
770 buf = xmalloc(len*sizeof(WCHAR));
771 MultiByteToWideChar(conicp, 0, s, len, buf, len);
772 WideCharToMultiByte(CP_ACP, 0, buf, len, s, len, NULL, NULL);
773 free(buf);
774 return TRUE;
775}
776
777static int ansi_emulate(const char *s, FILE *stream)
778{
779 int rv = 0;
780 const unsigned char *t;
781 char *pos, *str;
782 size_t cur_len;
783 static size_t max_len = 0;
784 static char *mem = NULL;
785
786 /* if no special treatment is required output the string as-is */
787 for ( t=(unsigned char *)s; *t; ++t ) {
788 if ( *t == '\033' || *t > 0x7f ) {
789 break;
790 }
791 }
792
793 if ( *t == '\0' ) {
794 return fputs(s, stream) == EOF ? EOF : strlen(s);
795 }
796
797 /*
798 * Make a writable copy of the string and retain array for reuse.
799 * The test above guarantees that the string length won't be zero
800 * so the array will always be allocated.
801 */
802 cur_len = strlen(s);
803 if ( cur_len > max_len ) {
804 free(mem);
805 mem = xstrdup(s);
806 max_len = cur_len;
807 }
808 else {
809 strcpy(mem, s);
810 }
811 pos = str = mem;
812
813 while (*pos) {
814 pos = strchr(str, '\033');
815 if (pos && !(terminal_mode(FALSE) & VT_OUTPUT)) {
816 size_t len = pos - str;
817
818 if (len) {
819 if (conv_fwriteCon(stream, str, len) == EOF)
820 return EOF;
821 rv += len;
822 }
823
824 if (fflush(stream) == EOF)
825 return EOF;
826
827 str = process_escape(pos);
828 if (str == pos) {
829 if (fputc('\033', stream) == EOF)
830 return EOF;
831 ++str;
832 }
833 rv += str - pos;
834 pos = str;
835
836 if (fflush(stream) == EOF)
837 return EOF;
838
839 } else {
840 size_t len = strlen(str);
841 rv += len;
842 return conv_fwriteCon(stream, str, len) == EOF ? EOF : rv;
843 }
844 }
845 return rv;
846}
847
848int winansi_putchar(int c)
849{
850 return winansi_fputc(c, stdout);
851}
852
853int winansi_puts(const char *s)
854{
855 return (winansi_fputs(s, stdout) == EOF || putchar('\n') == EOF) ? EOF : 0;
856}
857
858static sighandler_t sigpipe_handler = SIG_DFL;
859
860#undef signal
861sighandler_t winansi_signal(int signum, sighandler_t handler)
862{
863 sighandler_t old;
864
865 if (signum == SIGPIPE) {
866 old = sigpipe_handler;
867 sigpipe_handler = handler;
868 return old;
869 }
870 return signal(signum, handler);
871}
872
873static void check_pipe_fd(int fd)
874{
875 int error = GetLastError();
876
877 if ((error == ERROR_NO_DATA &&
878 GetFileType((HANDLE)_get_osfhandle(fd)) == FILE_TYPE_PIPE) ||
879 error == ERROR_BROKEN_PIPE) {
880 if (sigpipe_handler == SIG_DFL)
881 exit(128+SIGPIPE);
882 else /* SIG_IGN */
883 errno = EPIPE;
884 }
885}
886
887static void check_pipe(FILE *stream)
888{
889 int fd = fileno(stream);
890
891 if (fd != -1 && ferror(stream)) {
892 check_pipe_fd(fd);
893 }
894}
895
896size_t winansi_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
897{
898 size_t lsize, lmemb, ret;
899 char *str;
900 int rv;
901
902 lsize = MIN(size, nmemb);
903 lmemb = MAX(size, nmemb);
904 if (lsize != 1 || !is_console(fileno(stream))) {
905 SetLastError(0);
906 if ((ret=fwrite(ptr, size, nmemb, stream)) < nmemb)
907 check_pipe(stream);
908 return ret;
909 }
910
911 str = xmalloc(lmemb+1);
912 memcpy(str, ptr, lmemb);
913 str[lmemb] = '\0';
914
915 rv = ansi_emulate(str, stream);
916 free(str);
917
918 return rv == EOF ? 0 : nmemb;
919}
920
921int winansi_fputs(const char *str, FILE *stream)
922{
923 int ret;
924
925 if (!is_console(fileno(stream))) {
926 SetLastError(0);
927 if ((ret=fputs(str, stream)) == EOF)
928 check_pipe(stream);
929 return ret;
930 }
931
932 return ansi_emulate(str, stream) == EOF ? EOF : 0;
933}
934
935int winansi_fputc(int c, FILE *stream)
936{
937 int ret;
938 char t = c;
939 char *s = &t;
940
941 if ((unsigned char)c <= 0x7f || !is_console(fileno(stream))) {
942 SetLastError(0);
943 if ((ret=fputc(c, stream)) == EOF)
944 check_pipe(stream);
945 return ret;
946 }
947
948 return conv_fwriteCon(stream, s, 1) == EOF ? EOF : (unsigned char )c;
949}
950
951#if !defined(__USE_MINGW_ANSI_STDIO) || !__USE_MINGW_ANSI_STDIO
952/*
953 * Prior to Windows 10 vsnprintf was incompatible with the C99 standard.
954 * Implement a replacement using _vsnprintf.
955 */
956int winansi_vsnprintf(char *buf, size_t size, const char *format, va_list list)
957{
958 size_t len;
959 va_list list2;
960
961 va_copy(list2, list);
962 len = _vsnprintf(NULL, 0, format, list2);
963 va_end(list2);
964 if (len < 0)
965 return -1;
966
967 _vsnprintf(buf, size, format, list);
968 buf[size-1] = '\0';
969 return len;
970}
971#endif
972
973int winansi_vfprintf(FILE *stream, const char *format, va_list list)
974{
975 int len, rv;
976 char small_buf[256];
977 char *buf = small_buf;
978 va_list cp;
979
980 if (!is_console(fileno(stream)))
981 goto abort;
982
983 va_copy(cp, list);
984 len = vsnprintf(small_buf, sizeof(small_buf), format, cp);
985 va_end(cp);
986
987 if (len > sizeof(small_buf) - 1) {
988 buf = xmalloc(len + 1);
989 va_copy(cp, list);
990 len = vsnprintf(buf, len + 1, format, cp);
991 va_end(cp);
992 }
993
994 if (len == -1)
995 goto abort;
996
997 rv = ansi_emulate(buf, stream);
998
999 if (buf != small_buf)
1000 free(buf);
1001 return rv;
1002
1003abort:
1004 SetLastError(0);
1005 if ((rv=vfprintf(stream, format, list)) == EOF || ferror(stream) != 0)
1006 check_pipe(stream);
1007 return rv;
1008}
1009
1010int winansi_fprintf(FILE *stream, const char *format, ...)
1011{
1012 va_list list;
1013 int rv;
1014
1015 va_start(list, format);
1016 rv = winansi_vfprintf(stream, format, list);
1017 va_end(list);
1018
1019 return rv;
1020}
1021
1022int winansi_printf(const char *format, ...)
1023{
1024 va_list list;
1025 int rv;
1026
1027 va_start(list, format);
1028 rv = winansi_vfprintf(stdout, format, list);
1029 va_end(list);
1030
1031 return rv;
1032}
1033
1034static int ansi_emulate_write(int fd, const void *buf, size_t count)
1035{
1036 int rv = 0, i;
1037 int special = FALSE, has_null = FALSE;
1038 const unsigned char *s = (const unsigned char *)buf;
1039 char *pos, *str;
1040 size_t len, out_len;
1041 static size_t max_len = 0;
1042 static char *mem = NULL;
1043
1044 for ( i=0; i<count; ++i ) {
1045 if ( s[i] == '\033' || s[i] > 0x7f ) {
1046 special = TRUE;
1047 }
1048 else if ( !s[i] ) {
1049 has_null = TRUE;
1050 }
1051 }
1052
1053 /*
1054 * If no special treatment is required or the data contains NUL
1055 * characters output the string as-is.
1056 */
1057 if ( !special || has_null ) {
1058 return write(fd, buf, count);
1059 }
1060
1061 /* make a writable copy of the data and retain array for reuse */
1062 if ( count > max_len ) {
1063 free(mem);
1064 mem = malloc(count+1);
1065 max_len = count;
1066 }
1067 memcpy(mem, buf, count);
1068 mem[count] = '\0';
1069 pos = str = mem;
1070
1071 /* we've checked the data doesn't contain any NULs */
1072 while (*pos) {
1073 pos = strchr(str, '\033');
1074 if (pos && !(terminal_mode(FALSE) & VT_OUTPUT)) {
1075 len = pos - str;
1076
1077 if (len) {
1078 out_len = conv_writeCon(fd, str, len);
1079 if (out_len == -1)
1080 return -1;
1081 rv += out_len;
1082 }
1083
1084 str = process_escape(pos);
1085 if (str == pos) {
1086 if (write(fd, pos, 1) == -1)
1087 return -1;
1088 ++str;
1089 }
1090 rv += str - pos;
1091 pos = str;
1092 } else {
1093 len = strlen(str);
1094 out_len = conv_writeCon(fd, str, len);
1095 return (out_len == -1) ? -1 : rv+out_len;
1096 }
1097 }
1098 return rv;
1099}
1100
1101int winansi_write(int fd, const void *buf, size_t count)
1102{
1103 if (!is_console(fd)) {
1104 int ret;
1105
1106 SetLastError(0);
1107 if ((ret=write(fd, buf, count)) == -1) {
1108 check_pipe_fd(fd);
1109 }
1110 return ret;
1111 }
1112
1113 return ansi_emulate_write(fd, buf, count);
1114}
1115
1116int winansi_read(int fd, void *buf, size_t count)
1117{
1118 int rv;
1119
1120 rv = mingw_read(fd, buf, count);
1121 if (!is_console_in(fd))
1122 return rv;
1123
1124 if ( rv > 0 ) {
1125 conToCharBuffA(buf, rv);
1126 }
1127
1128 return rv;
1129}
1130
1131size_t winansi_fread(void *ptr, size_t size, size_t nmemb, FILE *stream)
1132{
1133 int rv;
1134
1135 rv = fread(ptr, size, nmemb, stream);
1136 if (!is_console_in(fileno(stream)))
1137 return rv;
1138
1139 if (rv > 0)
1140 conToCharBuffA(ptr, rv * size);
1141
1142 return rv;
1143}
1144
1145int winansi_getc(FILE *stream)
1146{
1147 int rv;
1148
1149 rv = _getc_nolock(stream);
1150 if (!is_console_in(fileno(stream)))
1151 return rv;
1152
1153 if ( rv != EOF ) {
1154 unsigned char c = (unsigned char)rv;
1155 char *s = (char *)&c;
1156 conToCharBuffA(s, 1);
1157 rv = (int)c;
1158 }
1159
1160 return rv;
1161}
1162
1163int winansi_getchar(void)
1164{
1165 return winansi_getc(stdin);
1166}
1167
1168char *winansi_fgets(char *s, int size, FILE *stream)
1169{
1170 char *rv;
1171
1172 rv = fgets(s, size, stream);
1173 if (!is_console_in(fileno(stream)))
1174 return rv;
1175
1176 if (rv)
1177 conToCharBuffA(s, strlen(s));
1178
1179 return rv;
1180}
1181
1182/* Ensure that isatty(fd) returns 0 for the NUL device */
1183int mingw_isatty(int fd)
1184{
1185 int result = _isatty(fd);
1186
1187 if (result) {
1188 HANDLE handle = (HANDLE) _get_osfhandle(fd);
1189 DWORD mode;
1190
1191 if (handle == INVALID_HANDLE_VALUE)
1192 return 0;
1193
1194 /* check if its a device (i.e. console, printer, serial port) */
1195 if (GetFileType(handle) != FILE_TYPE_CHAR)
1196 return 0;
1197
1198 if (!GetConsoleMode(handle, &mode))
1199 return 0;
1200 }
1201
1202 return result;
1203}
1204
1205#if ENABLE_FEATURE_UTF8_INPUT
1206// intentionally also converts invalid values (surrogate halfs, too big)
1207static int toutf8(DWORD cp, unsigned char *buf) {
1208 if (cp <= 0x7f) {
1209 *buf = cp;
1210 return 1;
1211 }
1212 if (cp <= 0x7ff) {
1213 *buf++ = 0xc0 | (cp >> 6);
1214 *buf = 0x80 | (cp & 0x3f);
1215 return 2;
1216 }
1217 if (cp <= 0xffff) {
1218 *buf++ = 0xe0 | (cp >> 12);
1219 *buf++ = 0x80 | ((cp >> 6) & 0x3f);
1220 *buf = 0x80 | (cp & 0x3f);
1221 return 3;
1222 }
1223 if (cp <= 0x10ffff) {
1224 *buf++ = 0xf0 | (cp >> 18);
1225 *buf++ = 0x80 | ((cp >> 12) & 0x3f);
1226 *buf++ = 0x80 | ((cp >> 6) & 0x3f);
1227 *buf = 0x80 | (cp & 0x3f);
1228 return 4;
1229 }
1230 // invalid. returning 0 works in our context because it's delivered
1231 // as a key event, where 0 values are typically ignored by the caller
1232 *buf = 0;
1233 return 1;
1234}
1235
1236// peek into the console input queue and try to find a key-up event of
1237// a surrugate-2nd-half, at which case eat the console events up to this
1238// one (excluding), and combine the pair values into *ph1
1239static void maybeEatUpto2ndHalfUp(HANDLE h, DWORD *ph1)
1240{
1241 // Peek into the queue arbitrary 16 records deep
1242 INPUT_RECORD r[16];
1243 DWORD got;
1244 int i;
1245
1246 if (!PeekConsoleInputW(h, r, 16, &got))
1247 return;
1248
1249 // we're conservative, and abort the search on anything which
1250 // seems out of place, like non-key event, non-2nd-half, etc.
1251 // search from 1 because i==0 is still the 1st half down record.
1252 for (i = 1; i < got; ++i) {
1253 DWORD h2;
1254 int is2nd, isdown;
1255
1256 if (r[i].EventType != KEY_EVENT)
1257 return;
1258
1259 isdown = r[i].Event.KeyEvent.bKeyDown;
1260 h2 = r[i].Event.KeyEvent.uChar.UnicodeChar;
1261 is2nd = h2 >= 0xDC00 && h2 <= 0xDFFF;
1262
1263 // skip 0 values, keyup of 1st half, and keydown of a 2nd half, if any
1264 if (!h2 || (h2 == *ph1 && !isdown) || (is2nd && isdown))
1265 continue;
1266
1267 if (!is2nd)
1268 return;
1269
1270 // got 2nd-half-up. eat the events up to this, combine the values
1271 ReadConsoleInputW(h, r, i, &got);
1272 *ph1 = 0x10000 + (((*ph1 & ~0xD800) << 10) | (h2 & ~0xDC00));
1273 return;
1274 }
1275}
1276
1277// if the codepoint is a key-down event, remember it, else if
1278// it's a key-up event with matching prior down - forget the down,
1279// else (up without matching prior key-down) - change it to down.
1280// We remember few prior key-down events so that a sequence
1281// like X-down Y-down X-up Y-up won't trigger this hack for Y-up.
1282// When up is changed into down there won't be further key-up event,
1283// but that's OK because the caller ignores key-up events anyway.
1284static void maybe_change_up_to_down(wchar_t key, BOOL *isdown)
1285{
1286 #define DOWN_BUF_SIZ 8
1287 static wchar_t downbuf[DOWN_BUF_SIZ] = {0};
1288 static int pos = 0;
1289
1290 if (*isdown) {
1291 downbuf[pos++] = key;
1292 pos = pos % DOWN_BUF_SIZ;
1293 return;
1294 }
1295
1296 // the missing-key-down issue was only observed with unicode values,
1297 // so limit this hack to non-ASCII-7 values.
1298 // also, launching a new shell/read process from CLI captures
1299 // an ENTER-up event without prior down at this new process, which
1300 // would otherwise change it to down - creating a wrong ENTER keypress.
1301 if (key <= 127)
1302 return;
1303
1304 // key up, try to match a prior down
1305 for (int i = 0; i < DOWN_BUF_SIZ; ++i) {
1306 if (downbuf[i] == key) {
1307 downbuf[i] = 0; // "forget" this down
1308 return;
1309 }
1310 }
1311
1312 // no prior key-down - replace the up with down
1313 *isdown = TRUE;
1314}
1315
1316/*
1317 * readConsoleInput_utf8 behaves similar enough to ReadConsoleInputA when
1318 * the console (input) CP is UTF8, but addressed two issues:
1319 * - It depend on the console CP, while we use ReadConsoleInputW internally.
1320 * - ReadConsoleInputA with Console CP of UTF8 (65001) is buggy:
1321 * - Doesn't work on Windows 7 (reads 0 or '?' for non-ASCII codepoints).
1322 * - When used at the cmd.exe console - but not Windows Terminal:
1323 * sometimes only key-up events arrive without the expected prior key-down.
1324 * Seems to depend both on the console CP and the entered/pasted codepoint.
1325 * - If reading one record at a time (which is how we use it), then input
1326 * codepoints of U+0800 or higher crash the console/terminal window.
1327 * (tested on Windows 10.0.19045.3086: console and Windows Terminal 1.17)
1328 * Example: U+0C80 (UTF8: 0xE0 0xB2 0x80): "ಀ"
1329 * Example: U+1F600 (UTF8: 0xF0 0x9F 0x98 0x80): "😀"
1330 * - If reading more than one record at a time:
1331 * - Unknown whether it can still crash in some cases (was not observed).
1332 * - Codepoints above U+FFFF are broken, and arrive as
1333 * U+FFFD REPLACEMENT CHARACTER "�"
1334 * - Few more codepoints to test the issues above (and below):
1335 * - U+0500 (UTF8: 0xD4, 0x80): "Ԁ" (OK in UTF8 CP, else maybe no key-down)
1336 * - U+07C0 (UTF8: 0xDF, 0x80): "߀" (might exhibit missing key-down)
1337 *
1338 * So this function uses ReadConsoleInputW and then delivers it as UTF8:
1339 * - Works with any console CP, in Windows terminal and Windows 7/10 console.
1340 * - Surrogate pairs are combined and delivered as a single UTF8 codepoint.
1341 * - Ignore occasional intermediate control events between the halfs.
1342 * - If we can't find the 2nd half, or if for some reason we get a 2nd half
1343 * wiithout the 1st, deliver the half we got as UTF8 (a-la WTF8).
1344 * - The "sometimes key-down is missing" issue at the cmd.exe console happens
1345 * also when using ReadConsoleInputW (for U+0080 or higher), so handle it.
1346 * This can also happen with surrogate pairs.
1347 * - Up to 4-bytes state is maintained for a single UTF8 codepoint buffer.
1348 *
1349 * Gotchas (could be solved, but currently there's no need):
1350 * - We support reading one record at a time, else fail - to make it obvious.
1351 * - We have a state which is hidden from PeekConsoleInput - so not in sync.
1352 * - We don't deliver key-up events in some cases: when working around
1353 * the "missing key-down" issue, and with combined surrogate halfs value.
1354 */
1355BOOL readConsoleInput_utf8(HANDLE h, INPUT_RECORD *r, DWORD len, DWORD *got)
1356{
1357 static unsigned char u8buf[4]; // any single codepoint in UTF8
1358 static int u8pos = 0, u8len = 0;
1359 static INPUT_RECORD srec;
1360
1361 if (len != 1)
1362 return FALSE;
1363
1364 // if ACP is UTF8 then we read UTF8 regardless of console (in) CP
1365 if (GetConsoleCP() != CP_UTF8 && GetACP() != CP_UTF8)
1366 return ReadConsoleInput(h, r, len, got);
1367
1368 if (u8pos == u8len) {
1369 DWORD codepoint;
1370
1371 // wait-and-peek rather than read to keep the last processed record
1372 // at the console queue until we deliver all of its products, so
1373 // that external WaitForSingleObject(h) shows there's data ready.
1374 if (WaitForSingleObject(h, INFINITE) != WAIT_OBJECT_0)
1375 return FALSE;
1376 if (!PeekConsoleInputW(h, r, 1, got))
1377 return FALSE;
1378 if (*got == 0)
1379 return TRUE;
1380 if (r->EventType != KEY_EVENT)
1381 return ReadConsoleInput(h, r, 1, got);
1382
1383 srec = *r;
1384 codepoint = srec.Event.KeyEvent.uChar.UnicodeChar;
1385
1386 // Observed when pasting unicode at cmd.exe console (but not
1387 // windows terminal), we sometimes get key-up event without
1388 // a prior matching key-down (or with key-down codepoint 0),
1389 // so this call would change the up into down in such case.
1390 // E.g. pastes fixed by this hack: U+1F600 "😀", or U+0C80 "ಀ"
1391 if (codepoint)
1392 maybe_change_up_to_down(codepoint, &srec.Event.KeyEvent.bKeyDown);
1393
1394 // if it's a 1st (high) surrogate pair half, try to eat upto and
1395 // excluding the 2nd (low) half, and combine them into codepoint.
1396 // this does not interfere with the missing-key-down workaround
1397 // (no issue if the down-buffer has 1st-half-down without up).
1398 if (codepoint >= 0xD800 && codepoint <= 0xDBFF)
1399 maybeEatUpto2ndHalfUp(h, &codepoint);
1400
1401 u8len = toutf8(codepoint, u8buf);
1402 u8pos = 0;
1403 }
1404
1405 *r = srec;
1406 r->Event.KeyEvent.uChar.AsciiChar = (char)u8buf[u8pos++];
1407 if (u8pos == u8len) // consume the record which generated this buffer
1408 ReadConsoleInputW(h, &srec, 1, got);
1409 *got = 1;
1410 return TRUE;
1411}
1412#else
1413/*
1414 * In Windows 10 and 11 using ReadConsoleInputA() with a console input
1415 * code page of CP_UTF8 can crash the console/terminal. Avoid this by
1416 * using ReadConsoleInputW() in that case.
1417 */
1418BOOL readConsoleInput_utf8(HANDLE h, INPUT_RECORD *r, DWORD len, DWORD *got)
1419{
1420 if (GetConsoleCP() != CP_UTF8)
1421 return ReadConsoleInput(h, r, len, got);
1422
1423 if (ReadConsoleInputW(h, r, len, got)) {
1424 wchar_t uchar = r->Event.KeyEvent.uChar.UnicodeChar;
1425 char achar = uchar & 0x7f;
1426 if (achar != uchar)
1427 achar = '?';
1428 r->Event.KeyEvent.uChar.AsciiChar = achar;
1429 return TRUE;
1430 }
1431 return FALSE;
1432}
1433#endif
1434
1435#if ENABLE_FEATURE_UTF8_OUTPUT
1436// Write u8buf as if the console output CP is UTF8 - regardless of the CP.
1437// fd should be associated with a console output.
1438// Return: 0 on successful write[s], else -1 (e.g. if fd is not a console).
1439//
1440// Up to 3 bytes of an incomplete codepoint may be buffered from prior call[s].
1441// All the completed codepoints in one call are written using WriteConsoleW.
1442// Bad sequence of any length (till ASCII7 or UTF8 lead) prints 1 subst wchar.
1443//
1444// note: one console is assumed, and the (3 bytes) buffer is shared regardless
1445// of the original output stream (stdout/err), or even if the handle is
1446// of a different console. This can result in invalid codepoints output
1447// if streams are multiplexed mid-codepoint (same as elsewhere?)
1448static int writeCon_utf8(int fd, const char *u8buf, size_t u8siz)
1449{
1450 static int state = 0; // -1: bad, 0-3: remaining cp bytes (0: done/new)
1451 static uint32_t codepoint = 0; // accumulated from up to 4 UTF8 bytes
1452
1453 // not a state, only avoids re-alloc on every call
1454 static const int wbufwsiz = 4096;
1455 static wchar_t *wbuf = 0;
1456
1457 HANDLE h = (HANDLE)_get_osfhandle(fd);
1458 int wlen = 0;
1459
1460 if (!wbuf)
1461 wbuf = xmalloc(wbufwsiz * sizeof(wchar_t));
1462
1463 // ASCII7 uses least logic, then UTF8 continuations, UTF8 lead, errors
1464 while (u8siz--) {
1465 unsigned char c = *u8buf++;
1466 int topbits = 0;
1467
1468 while (c & (0x80 >> topbits))
1469 ++topbits;
1470
1471 if (state == 0 && topbits == 0) {
1472 // valid ASCII7, state remains 0
1473 codepoint = c;
1474
1475 } else if (state > 0 && topbits == 1) {
1476 // valid continuation byte
1477 codepoint = (codepoint << 6) | (c & 0x3f);
1478 if (--state)
1479 continue;
1480
1481 } else if (state == 0 && topbits >= 2 && topbits <= 4) {
1482 // valid UTF8 lead of 2/3/4 bytes codepoint
1483 codepoint = c & (0x7f >> topbits);
1484 state = topbits - 1; // remaining bytes after lead
1485 continue;
1486
1487 } else {
1488 // already bad (state<0), or unexpected c at state 0-3.
1489 // placeholder is added only at the 1st (state>=0).
1490 // regardless, c may be valid to reprocess as state 0
1491 // (even when it's the 1st unexpected in state 1/2/3)
1492 int placeholder_done = state < 0;
1493
1494 if (topbits < 5 && topbits != 1) {
1495 --u8buf; // valid for state 0, reprocess
1496 ++u8siz;
1497 state = 0;
1498 } else {
1499 state = -1; // set/keep bad state
1500 }
1501
1502 if (placeholder_done)
1503 continue;
1504
1505 // 1st unexpected char, add placeholder
1506 codepoint = CONFIG_SUBST_WCHAR;
1507 }
1508
1509 // codepoint is complete
1510 // we don't reject surrogate halves, reserved, etc
1511 if (codepoint < 0x10000) {
1512 wbuf[wlen++] = codepoint;
1513 } else {
1514 // generate a surrogates pair (wbuf has room for 2+)
1515 codepoint -= 0x10000;
1516 wbuf[wlen++] = 0xd800 | (codepoint >> 10);
1517 wbuf[wlen++] = 0xdc00 | (codepoint & 0x3ff);
1518 }
1519
1520 // flush if we have less than two empty spaces
1521 if (wlen > wbufwsiz - 2) {
1522 if (!WriteConsoleW(h, wbuf, wlen, 0, 0))
1523 return -1;
1524 wlen = 0;
1525 }
1526 }
1527
1528 if (wlen && !WriteConsoleW(h, wbuf, wlen, 0, 0))
1529 return -1;
1530 return 0;
1531}
1532#endif
1533
1534void console_write(const char *str, int len)
1535{
1536 char *buf = xmemdup(str, len);
1537 int fd = _open("CONOUT$", _O_WRONLY);
1538 conv_writeCon(fd, buf, len);
1539 close(fd);
1540 free(buf);
1541}
1542
1543// LC_ALL=C disables console output conversion, so that the source
1544// data is interpreted only by the console according to its output CP.
1545static int conout_conv_enabled(void)
1546{
1547 static int enabled, tested; /* = 0 */
1548
1549 if (!tested) {
1550 // keep in sync with [re]init_unicode at libbb/unicode.c
1551 char *s = getenv("LC_ALL");
1552 if (!s) s = getenv("LC_CTYPE");
1553 if (!s) s = getenv("LANG");
1554
1555 enabled = !(s && s[0] == 'C' && s[1] == 0);
1556 tested = 1;
1557 }
1558
1559 return enabled;
1560}
1561
1562// TODO: improvements:
1563//
1564// 1. currently conv_[f]writeCon modify buf inplace, which means the caller
1565// typically has to make a writable copy first just for this.
1566// Sometimes it allocates a big copy once, and calls us with substrings.
1567// Instead, we could make a writable copy here - it's not used later anyway.
1568// To avoid the performance hit of many small allocations, we could use
1569// a local buffer for short strings, and allocate only if it doesn't fit
1570// (or maybe just reuse the local buffer with substring iterations).
1571//
1572// 2. Instead of converting from ACP to the console out CP - which guarantees
1573// potential data-loss if they differ, we could convert it to wchar_t and
1574// write it using WriteConsoleW. This should prevent all output data-loss.
1575// care should be taken with DBCS codepages (e.g. 936) or other multi-byte
1576// because then converting on arbitrary substring boundaries can fail.
1577
1578// convert buf inplace from ACP to console out CP and write it to stream
1579// returns EOF on error, 0 on success
1580static int conv_fwriteCon(FILE *stream, char *buf, size_t siz)
1581{
1582 if (conout_conv_enabled()) {
1583#if ENABLE_FEATURE_UTF8_OUTPUT
1584 if (GetConsoleOutputCP() != CP_UTF8) {
1585 fflush(stream); // writeCon_utf8 is unbuffered
1586 return writeCon_utf8(fileno(stream), buf, siz) ? EOF : 0;
1587 }
1588#else
1589 charToConBuffA(buf, siz);
1590#endif
1591 }
1592 return fwrite(buf, 1, siz, stream) < siz ? EOF : 0;
1593}
1594
1595// similar to above, but using lower level write
1596// returns -1 on error, actually-written bytes on suceess
1597static int conv_writeCon(int fd, char *buf, size_t siz)
1598{
1599 if (conout_conv_enabled()) {
1600#if ENABLE_FEATURE_UTF8_OUTPUT
1601 if (GetConsoleOutputCP() != CP_UTF8)
1602 return writeCon_utf8(fd, buf, siz) ? -1 : siz;
1603#else
1604 charToConBuffA(buf, siz);
1605#endif
1606 }
1607 return write(fd, buf, siz);
1608}