diff options
author | Nguyễn Thái Ngọc Duy <pclouds@gmail.com> | 2009-04-22 21:54:40 +1000 |
---|---|---|
committer | Nguyễn Thái Ngọc Duy <pclouds@gmail.com> | 2009-04-23 04:44:30 +1000 |
commit | 014aabc8c0d20b8ab6e9af7f9234c76a46a3014f (patch) | |
tree | d76dfae5b9b609151ca9fbea4e72f61067513931 | |
parent | 37ec9a93b0d1ba7d3252f36842155194c7f0c4c0 (diff) | |
download | busybox-w32-014aabc8c0d20b8ab6e9af7f9234c76a46a3014f.tar.gz busybox-w32-014aabc8c0d20b8ab6e9af7f9234c76a46a3014f.tar.bz2 busybox-w32-014aabc8c0d20b8ab6e9af7f9234c76a46a3014f.zip |
import (unchanged) git files from e56b799d6ad8afba4168fffa7218d44c041a72d2
-rw-r--r-- | include/fnmatch.h | 84 | ||||
-rw-r--r-- | include/mingw.h | 235 | ||||
-rw-r--r-- | include/quote.h | 68 | ||||
-rw-r--r-- | include/regex.h | 490 | ||||
-rw-r--r-- | include/run-command.h | 93 | ||||
-rw-r--r-- | include/strbuf.h | 137 | ||||
-rw-r--r-- | libbb/fnmatch.c | 488 | ||||
-rw-r--r-- | libbb/mingw.c | 1141 | ||||
-rw-r--r-- | libbb/quote.c | 478 | ||||
-rw-r--r-- | libbb/regex.c | 4927 | ||||
-rw-r--r-- | libbb/run-command.c | 399 | ||||
-rw-r--r-- | libbb/setenv.c | 34 | ||||
-rw-r--r-- | libbb/strbuf.c | 376 | ||||
-rw-r--r-- | libbb/strlcpy.c | 13 | ||||
-rw-r--r-- | libbb/trace.c | 127 | ||||
-rw-r--r-- | libbb/usage.c | 80 | ||||
-rw-r--r-- | libbb/win32.h | 34 | ||||
-rw-r--r-- | libbb/winansi.c | 357 | ||||
-rw-r--r-- | libbb/write_or_die.c | 86 |
19 files changed, 9647 insertions, 0 deletions
diff --git a/include/fnmatch.h b/include/fnmatch.h new file mode 100644 index 000000000..cc3ec3794 --- /dev/null +++ b/include/fnmatch.h | |||
@@ -0,0 +1,84 @@ | |||
1 | /* Copyright (C) 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc. | ||
2 | This file is part of the GNU C Library. | ||
3 | |||
4 | The GNU C Library is free software; you can redistribute it and/or | ||
5 | modify it under the terms of the GNU Library General Public License as | ||
6 | published by the Free Software Foundation; either version 2 of the | ||
7 | License, or (at your option) any later version. | ||
8 | |||
9 | The GNU C Library is distributed in the hope that it will be useful, | ||
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | Library General Public License for more details. | ||
13 | |||
14 | You should have received a copy of the GNU Library General Public | ||
15 | License along with the GNU C Library; see the file COPYING.LIB. If not, | ||
16 | write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
17 | Boston, MA 02111-1307, USA. */ | ||
18 | |||
19 | #ifndef _FNMATCH_H | ||
20 | #define _FNMATCH_H 1 | ||
21 | |||
22 | #ifdef __cplusplus | ||
23 | extern "C" { | ||
24 | #endif | ||
25 | |||
26 | #if defined __cplusplus || (defined __STDC__ && __STDC__) || defined WINDOWS32 | ||
27 | # if !defined __GLIBC__ || !defined __P | ||
28 | # undef __P | ||
29 | # define __P(protos) protos | ||
30 | # endif | ||
31 | #else /* Not C++ or ANSI C. */ | ||
32 | # undef __P | ||
33 | # define __P(protos) () | ||
34 | /* We can get away without defining `const' here only because in this file | ||
35 | it is used only inside the prototype for `fnmatch', which is elided in | ||
36 | non-ANSI C where `const' is problematical. */ | ||
37 | #endif /* C++ or ANSI C. */ | ||
38 | |||
39 | #ifndef const | ||
40 | # if (defined __STDC__ && __STDC__) || defined __cplusplus | ||
41 | # define __const const | ||
42 | # else | ||
43 | # define __const | ||
44 | # endif | ||
45 | #endif | ||
46 | |||
47 | /* We #undef these before defining them because some losing systems | ||
48 | (HP-UX A.08.07 for example) define these in <unistd.h>. */ | ||
49 | #undef FNM_PATHNAME | ||
50 | #undef FNM_NOESCAPE | ||
51 | #undef FNM_PERIOD | ||
52 | |||
53 | /* Bits set in the FLAGS argument to `fnmatch'. */ | ||
54 | #define FNM_PATHNAME (1 << 0) /* No wildcard can ever match `/'. */ | ||
55 | #define FNM_NOESCAPE (1 << 1) /* Backslashes don't quote special chars. */ | ||
56 | #define FNM_PERIOD (1 << 2) /* Leading `.' is matched only explicitly. */ | ||
57 | |||
58 | #if !defined _POSIX_C_SOURCE || _POSIX_C_SOURCE < 2 || defined _GNU_SOURCE | ||
59 | # define FNM_FILE_NAME FNM_PATHNAME /* Preferred GNU name. */ | ||
60 | # define FNM_LEADING_DIR (1 << 3) /* Ignore `/...' after a match. */ | ||
61 | # define FNM_CASEFOLD (1 << 4) /* Compare without regard to case. */ | ||
62 | #endif | ||
63 | |||
64 | /* Value returned by `fnmatch' if STRING does not match PATTERN. */ | ||
65 | #define FNM_NOMATCH 1 | ||
66 | |||
67 | /* This value is returned if the implementation does not support | ||
68 | `fnmatch'. Since this is not the case here it will never be | ||
69 | returned but the conformance test suites still require the symbol | ||
70 | to be defined. */ | ||
71 | #ifdef _XOPEN_SOURCE | ||
72 | # define FNM_NOSYS (-1) | ||
73 | #endif | ||
74 | |||
75 | /* Match NAME against the filename pattern PATTERN, | ||
76 | returning zero if it matches, FNM_NOMATCH if not. */ | ||
77 | extern int fnmatch __P ((__const char *__pattern, __const char *__name, | ||
78 | int __flags)); | ||
79 | |||
80 | #ifdef __cplusplus | ||
81 | } | ||
82 | #endif | ||
83 | |||
84 | #endif /* fnmatch.h */ | ||
diff --git a/include/mingw.h b/include/mingw.h new file mode 100644 index 000000000..762eb143a --- /dev/null +++ b/include/mingw.h | |||
@@ -0,0 +1,235 @@ | |||
1 | #include <winsock2.h> | ||
2 | |||
3 | /* | ||
4 | * things that are not available in header files | ||
5 | */ | ||
6 | |||
7 | typedef int pid_t; | ||
8 | #define hstrerror strerror | ||
9 | |||
10 | #define S_IFLNK 0120000 /* Symbolic link */ | ||
11 | #define S_ISLNK(x) (((x) & S_IFMT) == S_IFLNK) | ||
12 | #define S_ISSOCK(x) 0 | ||
13 | #define S_IRGRP 0 | ||
14 | #define S_IWGRP 0 | ||
15 | #define S_IXGRP 0 | ||
16 | #define S_ISGID 0 | ||
17 | #define S_IROTH 0 | ||
18 | #define S_IXOTH 0 | ||
19 | |||
20 | #define WIFEXITED(x) ((unsigned)(x) < 259) /* STILL_ACTIVE */ | ||
21 | #define WEXITSTATUS(x) ((x) & 0xff) | ||
22 | #define WIFSIGNALED(x) ((unsigned)(x) > 259) | ||
23 | |||
24 | #define SIGHUP 1 | ||
25 | #define SIGQUIT 3 | ||
26 | #define SIGKILL 9 | ||
27 | #define SIGPIPE 13 | ||
28 | #define SIGALRM 14 | ||
29 | #define SIGCHLD 17 | ||
30 | |||
31 | #define F_GETFD 1 | ||
32 | #define F_SETFD 2 | ||
33 | #define FD_CLOEXEC 0x1 | ||
34 | |||
35 | struct passwd { | ||
36 | char *pw_name; | ||
37 | char *pw_gecos; | ||
38 | char *pw_dir; | ||
39 | }; | ||
40 | |||
41 | struct pollfd { | ||
42 | int fd; /* file descriptor */ | ||
43 | short events; /* requested events */ | ||
44 | short revents; /* returned events */ | ||
45 | }; | ||
46 | #define POLLIN 1 | ||
47 | #define POLLHUP 2 | ||
48 | |||
49 | typedef void (__cdecl *sig_handler_t)(int); | ||
50 | struct sigaction { | ||
51 | sig_handler_t sa_handler; | ||
52 | unsigned sa_flags; | ||
53 | }; | ||
54 | #define sigemptyset(x) (void)0 | ||
55 | #define SA_RESTART 0 | ||
56 | |||
57 | struct itimerval { | ||
58 | struct timeval it_value, it_interval; | ||
59 | }; | ||
60 | #define ITIMER_REAL 0 | ||
61 | |||
62 | /* | ||
63 | * trivial stubs | ||
64 | */ | ||
65 | |||
66 | static inline int readlink(const char *path, char *buf, size_t bufsiz) | ||
67 | { errno = ENOSYS; return -1; } | ||
68 | static inline int symlink(const char *oldpath, const char *newpath) | ||
69 | { errno = ENOSYS; return -1; } | ||
70 | static inline int fchmod(int fildes, mode_t mode) | ||
71 | { errno = ENOSYS; return -1; } | ||
72 | static inline int fork(void) | ||
73 | { errno = ENOSYS; return -1; } | ||
74 | static inline unsigned int alarm(unsigned int seconds) | ||
75 | { return 0; } | ||
76 | static inline int fsync(int fd) | ||
77 | { return 0; } | ||
78 | static inline int getppid(void) | ||
79 | { return 1; } | ||
80 | static inline void sync(void) | ||
81 | {} | ||
82 | static inline int getuid() | ||
83 | { return 1; } | ||
84 | static inline struct passwd *getpwnam(const char *name) | ||
85 | { return NULL; } | ||
86 | static inline int fcntl(int fd, int cmd, long arg) | ||
87 | { | ||
88 | if (cmd == F_GETFD || cmd == F_SETFD) | ||
89 | return 0; | ||
90 | errno = EINVAL; | ||
91 | return -1; | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * simple adaptors | ||
96 | */ | ||
97 | |||
98 | static inline int mingw_mkdir(const char *path, int mode) | ||
99 | { | ||
100 | return mkdir(path); | ||
101 | } | ||
102 | #define mkdir mingw_mkdir | ||
103 | |||
104 | static inline int mingw_unlink(const char *pathname) | ||
105 | { | ||
106 | /* read-only files cannot be removed */ | ||
107 | chmod(pathname, 0666); | ||
108 | return unlink(pathname); | ||
109 | } | ||
110 | #define unlink mingw_unlink | ||
111 | |||
112 | static inline int waitpid(pid_t pid, unsigned *status, unsigned options) | ||
113 | { | ||
114 | if (options == 0) | ||
115 | return _cwait(status, pid, 0); | ||
116 | errno = EINVAL; | ||
117 | return -1; | ||
118 | } | ||
119 | |||
120 | /* | ||
121 | * implementations of missing functions | ||
122 | */ | ||
123 | |||
124 | int pipe(int filedes[2]); | ||
125 | unsigned int sleep (unsigned int seconds); | ||
126 | int mkstemp(char *template); | ||
127 | int gettimeofday(struct timeval *tv, void *tz); | ||
128 | int poll(struct pollfd *ufds, unsigned int nfds, int timeout); | ||
129 | struct tm *gmtime_r(const time_t *timep, struct tm *result); | ||
130 | struct tm *localtime_r(const time_t *timep, struct tm *result); | ||
131 | int getpagesize(void); /* defined in MinGW's libgcc.a */ | ||
132 | struct passwd *getpwuid(int uid); | ||
133 | int setitimer(int type, struct itimerval *in, struct itimerval *out); | ||
134 | int sigaction(int sig, struct sigaction *in, struct sigaction *out); | ||
135 | int link(const char *oldpath, const char *newpath); | ||
136 | |||
137 | /* | ||
138 | * replacements of existing functions | ||
139 | */ | ||
140 | |||
141 | int mingw_open (const char *filename, int oflags, ...); | ||
142 | #define open mingw_open | ||
143 | |||
144 | char *mingw_getcwd(char *pointer, int len); | ||
145 | #define getcwd mingw_getcwd | ||
146 | |||
147 | char *mingw_getenv(const char *name); | ||
148 | #define getenv mingw_getenv | ||
149 | |||
150 | struct hostent *mingw_gethostbyname(const char *host); | ||
151 | #define gethostbyname mingw_gethostbyname | ||
152 | |||
153 | int mingw_socket(int domain, int type, int protocol); | ||
154 | #define socket mingw_socket | ||
155 | |||
156 | int mingw_connect(int sockfd, struct sockaddr *sa, size_t sz); | ||
157 | #define connect mingw_connect | ||
158 | |||
159 | int mingw_rename(const char*, const char*); | ||
160 | #define rename mingw_rename | ||
161 | |||
162 | #ifdef USE_WIN32_MMAP | ||
163 | int mingw_getpagesize(void); | ||
164 | #define getpagesize mingw_getpagesize | ||
165 | #endif | ||
166 | |||
167 | /* Use mingw_lstat() instead of lstat()/stat() and | ||
168 | * mingw_fstat() instead of fstat() on Windows. | ||
169 | */ | ||
170 | #define off_t off64_t | ||
171 | #define stat _stati64 | ||
172 | #define lseek _lseeki64 | ||
173 | int mingw_lstat(const char *file_name, struct stat *buf); | ||
174 | int mingw_fstat(int fd, struct stat *buf); | ||
175 | #define fstat mingw_fstat | ||
176 | #define lstat mingw_lstat | ||
177 | #define _stati64(x,y) mingw_lstat(x,y) | ||
178 | |||
179 | int mingw_utime(const char *file_name, const struct utimbuf *times); | ||
180 | #define utime mingw_utime | ||
181 | |||
182 | pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **env); | ||
183 | void mingw_execvp(const char *cmd, char *const *argv); | ||
184 | #define execvp mingw_execvp | ||
185 | |||
186 | static inline unsigned int git_ntohl(unsigned int x) | ||
187 | { return (unsigned int)ntohl(x); } | ||
188 | #define ntohl git_ntohl | ||
189 | |||
190 | sig_handler_t mingw_signal(int sig, sig_handler_t handler); | ||
191 | #define signal mingw_signal | ||
192 | |||
193 | /* | ||
194 | * ANSI emulation wrappers | ||
195 | */ | ||
196 | |||
197 | int winansi_fputs(const char *str, FILE *stream); | ||
198 | int winansi_printf(const char *format, ...) __attribute__((format (printf, 1, 2))); | ||
199 | int winansi_fprintf(FILE *stream, const char *format, ...) __attribute__((format (printf, 2, 3))); | ||
200 | #define fputs winansi_fputs | ||
201 | #define printf(...) winansi_printf(__VA_ARGS__) | ||
202 | #define fprintf(...) winansi_fprintf(__VA_ARGS__) | ||
203 | |||
204 | /* | ||
205 | * git specific compatibility | ||
206 | */ | ||
207 | |||
208 | #define has_dos_drive_prefix(path) (isalpha(*(path)) && (path)[1] == ':') | ||
209 | #define is_dir_sep(c) ((c) == '/' || (c) == '\\') | ||
210 | #define PATH_SEP ';' | ||
211 | #define PRIuMAX "I64u" | ||
212 | |||
213 | void mingw_open_html(const char *path); | ||
214 | #define open_html mingw_open_html | ||
215 | |||
216 | /* | ||
217 | * helpers | ||
218 | */ | ||
219 | |||
220 | char **copy_environ(void); | ||
221 | void free_environ(char **env); | ||
222 | char **env_setenv(char **env, const char *name); | ||
223 | |||
224 | /* | ||
225 | * A replacement of main() that ensures that argv[0] has a path | ||
226 | */ | ||
227 | |||
228 | #define main(c,v) dummy_decl_mingw_main(); \ | ||
229 | static int mingw_main(); \ | ||
230 | int main(int argc, const char **argv) \ | ||
231 | { \ | ||
232 | argv[0] = xstrdup(_pgmptr); \ | ||
233 | return mingw_main(argc, argv); \ | ||
234 | } \ | ||
235 | static int mingw_main(c,v) | ||
diff --git a/include/quote.h b/include/quote.h new file mode 100644 index 000000000..66730f2bf --- /dev/null +++ b/include/quote.h | |||
@@ -0,0 +1,68 @@ | |||
1 | #ifndef QUOTE_H | ||
2 | #define QUOTE_H | ||
3 | |||
4 | #include <stddef.h> | ||
5 | #include <stdio.h> | ||
6 | |||
7 | /* Help to copy the thing properly quoted for the shell safety. | ||
8 | * any single quote is replaced with '\'', any exclamation point | ||
9 | * is replaced with '\!', and the whole thing is enclosed in a | ||
10 | * single quote pair. | ||
11 | * | ||
12 | * For example, if you are passing the result to system() as an | ||
13 | * argument: | ||
14 | * | ||
15 | * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1)) | ||
16 | * | ||
17 | * would be appropriate. If the system() is going to call ssh to | ||
18 | * run the command on the other side: | ||
19 | * | ||
20 | * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1)); | ||
21 | * sprintf(rcmd, "ssh %s %s", sq_quote(host), sq_quote(cmd)); | ||
22 | * | ||
23 | * Note that the above examples leak memory! Remember to free result from | ||
24 | * sq_quote() in a real application. | ||
25 | * | ||
26 | * sq_quote_buf() writes to an existing buffer of specified size; it | ||
27 | * will return the number of characters that would have been written | ||
28 | * excluding the final null regardless of the buffer size. | ||
29 | */ | ||
30 | |||
31 | extern void sq_quote_print(FILE *stream, const char *src); | ||
32 | |||
33 | extern void sq_quote_buf(struct strbuf *, const char *src); | ||
34 | extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); | ||
35 | |||
36 | /* This unwraps what sq_quote() produces in place, but returns | ||
37 | * NULL if the input does not look like what sq_quote would have | ||
38 | * produced. | ||
39 | */ | ||
40 | extern char *sq_dequote(char *); | ||
41 | |||
42 | /* | ||
43 | * Same as the above, but can be used to unwrap many arguments in the | ||
44 | * same string separated by space. "next" is changed to point to the | ||
45 | * next argument that should be passed as first parameter. When there | ||
46 | * is no more argument to be dequoted, "next" is updated to point to NULL. | ||
47 | */ | ||
48 | extern char *sq_dequote_step(char *arg, char **next); | ||
49 | extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc); | ||
50 | |||
51 | extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp); | ||
52 | extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq); | ||
53 | extern void quote_two_c_style(struct strbuf *, const char *, const char *, int); | ||
54 | |||
55 | extern void write_name_quoted(const char *name, FILE *, int terminator); | ||
56 | extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, | ||
57 | const char *name, FILE *, int terminator); | ||
58 | |||
59 | /* quote path as relative to the given prefix */ | ||
60 | char *quote_path_relative(const char *in, int len, | ||
61 | struct strbuf *out, const char *prefix); | ||
62 | |||
63 | /* quoting as a string literal for other languages */ | ||
64 | extern void perl_quote_print(FILE *stream, const char *src); | ||
65 | extern void python_quote_print(FILE *stream, const char *src); | ||
66 | extern void tcl_quote_print(FILE *stream, const char *src); | ||
67 | |||
68 | #endif | ||
diff --git a/include/regex.h b/include/regex.h new file mode 100644 index 000000000..6eb64f140 --- /dev/null +++ b/include/regex.h | |||
@@ -0,0 +1,490 @@ | |||
1 | /* Definitions for data structures and routines for the regular | ||
2 | expression library, version 0.12. | ||
3 | |||
4 | Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc. | ||
5 | |||
6 | This program is free software; you can redistribute it and/or modify | ||
7 | it under the terms of the GNU General Public License as published by | ||
8 | the Free Software Foundation; either version 2, or (at your option) | ||
9 | any later version. | ||
10 | |||
11 | This program is distributed in the hope that it will be useful, | ||
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | GNU General Public License for more details. | ||
15 | |||
16 | You should have received a copy of the GNU General Public License | ||
17 | along with this program; if not, write to the Free Software | ||
18 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ | ||
19 | |||
20 | #ifndef __REGEXP_LIBRARY_H__ | ||
21 | #define __REGEXP_LIBRARY_H__ | ||
22 | |||
23 | /* POSIX says that <sys/types.h> must be included (by the caller) before | ||
24 | <regex.h>. */ | ||
25 | |||
26 | #ifdef VMS | ||
27 | /* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it | ||
28 | should be there. */ | ||
29 | #include <stddef.h> | ||
30 | #endif | ||
31 | |||
32 | |||
33 | /* The following bits are used to determine the regexp syntax we | ||
34 | recognize. The set/not-set meanings are chosen so that Emacs syntax | ||
35 | remains the value 0. The bits are given in alphabetical order, and | ||
36 | the definitions shifted by one from the previous bit; thus, when we | ||
37 | add or remove a bit, only one other definition need change. */ | ||
38 | typedef unsigned reg_syntax_t; | ||
39 | |||
40 | /* If this bit is not set, then \ inside a bracket expression is literal. | ||
41 | If set, then such a \ quotes the following character. */ | ||
42 | #define RE_BACKSLASH_ESCAPE_IN_LISTS (1) | ||
43 | |||
44 | /* If this bit is not set, then + and ? are operators, and \+ and \? are | ||
45 | literals. | ||
46 | If set, then \+ and \? are operators and + and ? are literals. */ | ||
47 | #define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) | ||
48 | |||
49 | /* If this bit is set, then character classes are supported. They are: | ||
50 | [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], | ||
51 | [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. | ||
52 | If not set, then character classes are not supported. */ | ||
53 | #define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) | ||
54 | |||
55 | /* If this bit is set, then ^ and $ are always anchors (outside bracket | ||
56 | expressions, of course). | ||
57 | If this bit is not set, then it depends: | ||
58 | ^ is an anchor if it is at the beginning of a regular | ||
59 | expression or after an open-group or an alternation operator; | ||
60 | $ is an anchor if it is at the end of a regular expression, or | ||
61 | before a close-group or an alternation operator. | ||
62 | |||
63 | This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because | ||
64 | POSIX draft 11.2 says that * etc. in leading positions is undefined. | ||
65 | We already implemented a previous draft which made those constructs | ||
66 | invalid, though, so we haven't changed the code back. */ | ||
67 | #define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) | ||
68 | |||
69 | /* If this bit is set, then special characters are always special | ||
70 | regardless of where they are in the pattern. | ||
71 | If this bit is not set, then special characters are special only in | ||
72 | some contexts; otherwise they are ordinary. Specifically, | ||
73 | * + ? and intervals are only special when not after the beginning, | ||
74 | open-group, or alternation operator. */ | ||
75 | #define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) | ||
76 | |||
77 | /* If this bit is set, then *, +, ?, and { cannot be first in an re or | ||
78 | immediately after an alternation or begin-group operator. */ | ||
79 | #define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) | ||
80 | |||
81 | /* If this bit is set, then . matches newline. | ||
82 | If not set, then it doesn't. */ | ||
83 | #define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) | ||
84 | |||
85 | /* If this bit is set, then . doesn't match NUL. | ||
86 | If not set, then it does. */ | ||
87 | #define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) | ||
88 | |||
89 | /* If this bit is set, nonmatching lists [^...] do not match newline. | ||
90 | If not set, they do. */ | ||
91 | #define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) | ||
92 | |||
93 | /* If this bit is set, either \{...\} or {...} defines an | ||
94 | interval, depending on RE_NO_BK_BRACES. | ||
95 | If not set, \{, \}, {, and } are literals. */ | ||
96 | #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) | ||
97 | |||
98 | /* If this bit is set, +, ? and | aren't recognized as operators. | ||
99 | If not set, they are. */ | ||
100 | #define RE_LIMITED_OPS (RE_INTERVALS << 1) | ||
101 | |||
102 | /* If this bit is set, newline is an alternation operator. | ||
103 | If not set, newline is literal. */ | ||
104 | #define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) | ||
105 | |||
106 | /* If this bit is set, then `{...}' defines an interval, and \{ and \} | ||
107 | are literals. | ||
108 | If not set, then `\{...\}' defines an interval. */ | ||
109 | #define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) | ||
110 | |||
111 | /* If this bit is set, (...) defines a group, and \( and \) are literals. | ||
112 | If not set, \(...\) defines a group, and ( and ) are literals. */ | ||
113 | #define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) | ||
114 | |||
115 | /* If this bit is set, then \<digit> matches <digit>. | ||
116 | If not set, then \<digit> is a back-reference. */ | ||
117 | #define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) | ||
118 | |||
119 | /* If this bit is set, then | is an alternation operator, and \| is literal. | ||
120 | If not set, then \| is an alternation operator, and | is literal. */ | ||
121 | #define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) | ||
122 | |||
123 | /* If this bit is set, then an ending range point collating higher | ||
124 | than the starting range point, as in [z-a], is invalid. | ||
125 | If not set, then when ending range point collates higher than the | ||
126 | starting range point, the range is ignored. */ | ||
127 | #define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) | ||
128 | |||
129 | /* If this bit is set, then an unmatched ) is ordinary. | ||
130 | If not set, then an unmatched ) is invalid. */ | ||
131 | #define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) | ||
132 | |||
133 | /* This global variable defines the particular regexp syntax to use (for | ||
134 | some interfaces). When a regexp is compiled, the syntax used is | ||
135 | stored in the pattern buffer, so changing this does not affect | ||
136 | already-compiled regexps. */ | ||
137 | extern reg_syntax_t re_syntax_options; | ||
138 | |||
139 | /* Define combinations of the above bits for the standard possibilities. | ||
140 | (The [[[ comments delimit what gets put into the Texinfo file, so | ||
141 | don't delete them!) */ | ||
142 | /* [[[begin syntaxes]]] */ | ||
143 | #define RE_SYNTAX_EMACS 0 | ||
144 | |||
145 | #define RE_SYNTAX_AWK \ | ||
146 | (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ | ||
147 | | RE_NO_BK_PARENS | RE_NO_BK_REFS \ | ||
148 | | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ | ||
149 | | RE_UNMATCHED_RIGHT_PAREN_ORD) | ||
150 | |||
151 | #define RE_SYNTAX_POSIX_AWK \ | ||
152 | (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) | ||
153 | |||
154 | #define RE_SYNTAX_GREP \ | ||
155 | (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ | ||
156 | | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ | ||
157 | | RE_NEWLINE_ALT) | ||
158 | |||
159 | #define RE_SYNTAX_EGREP \ | ||
160 | (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ | ||
161 | | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ | ||
162 | | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ | ||
163 | | RE_NO_BK_VBAR) | ||
164 | |||
165 | #define RE_SYNTAX_POSIX_EGREP \ | ||
166 | (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) | ||
167 | |||
168 | /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ | ||
169 | #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC | ||
170 | |||
171 | #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC | ||
172 | |||
173 | /* Syntax bits common to both basic and extended POSIX regex syntax. */ | ||
174 | #define _RE_SYNTAX_POSIX_COMMON \ | ||
175 | (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ | ||
176 | | RE_INTERVALS | RE_NO_EMPTY_RANGES) | ||
177 | |||
178 | #define RE_SYNTAX_POSIX_BASIC \ | ||
179 | (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) | ||
180 | |||
181 | /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes | ||
182 | RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this | ||
183 | isn't minimal, since other operators, such as \`, aren't disabled. */ | ||
184 | #define RE_SYNTAX_POSIX_MINIMAL_BASIC \ | ||
185 | (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) | ||
186 | |||
187 | #define RE_SYNTAX_POSIX_EXTENDED \ | ||
188 | (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ | ||
189 | | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ | ||
190 | | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ | ||
191 | | RE_UNMATCHED_RIGHT_PAREN_ORD) | ||
192 | |||
193 | /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS | ||
194 | replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ | ||
195 | #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ | ||
196 | (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ | ||
197 | | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ | ||
198 | | RE_NO_BK_PARENS | RE_NO_BK_REFS \ | ||
199 | | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) | ||
200 | /* [[[end syntaxes]]] */ | ||
201 | |||
202 | /* Maximum number of duplicates an interval can allow. Some systems | ||
203 | (erroneously) define this in other header files, but we want our | ||
204 | value, so remove any previous define. */ | ||
205 | #ifdef RE_DUP_MAX | ||
206 | #undef RE_DUP_MAX | ||
207 | #endif | ||
208 | #define RE_DUP_MAX ((1 << 15) - 1) | ||
209 | |||
210 | |||
211 | /* POSIX `cflags' bits (i.e., information for `regcomp'). */ | ||
212 | |||
213 | /* If this bit is set, then use extended regular expression syntax. | ||
214 | If not set, then use basic regular expression syntax. */ | ||
215 | #define REG_EXTENDED 1 | ||
216 | |||
217 | /* If this bit is set, then ignore case when matching. | ||
218 | If not set, then case is significant. */ | ||
219 | #define REG_ICASE (REG_EXTENDED << 1) | ||
220 | |||
221 | /* If this bit is set, then anchors do not match at newline | ||
222 | characters in the string. | ||
223 | If not set, then anchors do match at newlines. */ | ||
224 | #define REG_NEWLINE (REG_ICASE << 1) | ||
225 | |||
226 | /* If this bit is set, then report only success or fail in regexec. | ||
227 | If not set, then returns differ between not matching and errors. */ | ||
228 | #define REG_NOSUB (REG_NEWLINE << 1) | ||
229 | |||
230 | |||
231 | /* POSIX `eflags' bits (i.e., information for regexec). */ | ||
232 | |||
233 | /* If this bit is set, then the beginning-of-line operator doesn't match | ||
234 | the beginning of the string (presumably because it's not the | ||
235 | beginning of a line). | ||
236 | If not set, then the beginning-of-line operator does match the | ||
237 | beginning of the string. */ | ||
238 | #define REG_NOTBOL 1 | ||
239 | |||
240 | /* Like REG_NOTBOL, except for the end-of-line. */ | ||
241 | #define REG_NOTEOL (1 << 1) | ||
242 | |||
243 | |||
244 | /* If any error codes are removed, changed, or added, update the | ||
245 | `re_error_msg' table in regex.c. */ | ||
246 | typedef enum | ||
247 | { | ||
248 | REG_NOERROR = 0, /* Success. */ | ||
249 | REG_NOMATCH, /* Didn't find a match (for regexec). */ | ||
250 | |||
251 | /* POSIX regcomp return error codes. (In the order listed in the | ||
252 | standard.) */ | ||
253 | REG_BADPAT, /* Invalid pattern. */ | ||
254 | REG_ECOLLATE, /* Not implemented. */ | ||
255 | REG_ECTYPE, /* Invalid character class name. */ | ||
256 | REG_EESCAPE, /* Trailing backslash. */ | ||
257 | REG_ESUBREG, /* Invalid back reference. */ | ||
258 | REG_EBRACK, /* Unmatched left bracket. */ | ||
259 | REG_EPAREN, /* Parenthesis imbalance. */ | ||
260 | REG_EBRACE, /* Unmatched \{. */ | ||
261 | REG_BADBR, /* Invalid contents of \{\}. */ | ||
262 | REG_ERANGE, /* Invalid range end. */ | ||
263 | REG_ESPACE, /* Ran out of memory. */ | ||
264 | REG_BADRPT, /* No preceding re for repetition op. */ | ||
265 | |||
266 | /* Error codes we've added. */ | ||
267 | REG_EEND, /* Premature end. */ | ||
268 | REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ | ||
269 | REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ | ||
270 | } reg_errcode_t; | ||
271 | |||
272 | /* This data structure represents a compiled pattern. Before calling | ||
273 | the pattern compiler, the fields `buffer', `allocated', `fastmap', | ||
274 | `translate', and `no_sub' can be set. After the pattern has been | ||
275 | compiled, the `re_nsub' field is available. All other fields are | ||
276 | private to the regex routines. */ | ||
277 | |||
278 | struct re_pattern_buffer | ||
279 | { | ||
280 | /* [[[begin pattern_buffer]]] */ | ||
281 | /* Space that holds the compiled pattern. It is declared as | ||
282 | `unsigned char *' because its elements are | ||
283 | sometimes used as array indexes. */ | ||
284 | unsigned char *buffer; | ||
285 | |||
286 | /* Number of bytes to which `buffer' points. */ | ||
287 | unsigned long allocated; | ||
288 | |||
289 | /* Number of bytes actually used in `buffer'. */ | ||
290 | unsigned long used; | ||
291 | |||
292 | /* Syntax setting with which the pattern was compiled. */ | ||
293 | reg_syntax_t syntax; | ||
294 | |||
295 | /* Pointer to a fastmap, if any, otherwise zero. re_search uses | ||
296 | the fastmap, if there is one, to skip over impossible | ||
297 | starting points for matches. */ | ||
298 | char *fastmap; | ||
299 | |||
300 | /* Either a translate table to apply to all characters before | ||
301 | comparing them, or zero for no translation. The translation | ||
302 | is applied to a pattern when it is compiled and to a string | ||
303 | when it is matched. */ | ||
304 | char *translate; | ||
305 | |||
306 | /* Number of subexpressions found by the compiler. */ | ||
307 | size_t re_nsub; | ||
308 | |||
309 | /* Zero if this pattern cannot match the empty string, one else. | ||
310 | Well, in truth it's used only in `re_search_2', to see | ||
311 | whether or not we should use the fastmap, so we don't set | ||
312 | this absolutely perfectly; see `re_compile_fastmap' (the | ||
313 | `duplicate' case). */ | ||
314 | unsigned can_be_null : 1; | ||
315 | |||
316 | /* If REGS_UNALLOCATED, allocate space in the `regs' structure | ||
317 | for `max (RE_NREGS, re_nsub + 1)' groups. | ||
318 | If REGS_REALLOCATE, reallocate space if necessary. | ||
319 | If REGS_FIXED, use what's there. */ | ||
320 | #define REGS_UNALLOCATED 0 | ||
321 | #define REGS_REALLOCATE 1 | ||
322 | #define REGS_FIXED 2 | ||
323 | unsigned regs_allocated : 2; | ||
324 | |||
325 | /* Set to zero when `regex_compile' compiles a pattern; set to one | ||
326 | by `re_compile_fastmap' if it updates the fastmap. */ | ||
327 | unsigned fastmap_accurate : 1; | ||
328 | |||
329 | /* If set, `re_match_2' does not return information about | ||
330 | subexpressions. */ | ||
331 | unsigned no_sub : 1; | ||
332 | |||
333 | /* If set, a beginning-of-line anchor doesn't match at the | ||
334 | beginning of the string. */ | ||
335 | unsigned not_bol : 1; | ||
336 | |||
337 | /* Similarly for an end-of-line anchor. */ | ||
338 | unsigned not_eol : 1; | ||
339 | |||
340 | /* If true, an anchor at a newline matches. */ | ||
341 | unsigned newline_anchor : 1; | ||
342 | |||
343 | /* [[[end pattern_buffer]]] */ | ||
344 | }; | ||
345 | |||
346 | typedef struct re_pattern_buffer regex_t; | ||
347 | |||
348 | |||
349 | /* search.c (search_buffer) in Emacs needs this one opcode value. It is | ||
350 | defined both in `regex.c' and here. */ | ||
351 | #define RE_EXACTN_VALUE 1 | ||
352 | |||
353 | /* Type for byte offsets within the string. POSIX mandates this. */ | ||
354 | typedef int regoff_t; | ||
355 | |||
356 | |||
357 | /* This is the structure we store register match data in. See | ||
358 | regex.texinfo for a full description of what registers match. */ | ||
359 | struct re_registers | ||
360 | { | ||
361 | unsigned num_regs; | ||
362 | regoff_t *start; | ||
363 | regoff_t *end; | ||
364 | }; | ||
365 | |||
366 | |||
367 | /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, | ||
368 | `re_match_2' returns information about at least this many registers | ||
369 | the first time a `regs' structure is passed. */ | ||
370 | #ifndef RE_NREGS | ||
371 | #define RE_NREGS 30 | ||
372 | #endif | ||
373 | |||
374 | |||
375 | /* POSIX specification for registers. Aside from the different names than | ||
376 | `re_registers', POSIX uses an array of structures, instead of a | ||
377 | structure of arrays. */ | ||
378 | typedef struct | ||
379 | { | ||
380 | regoff_t rm_so; /* Byte offset from string's start to substring's start. */ | ||
381 | regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ | ||
382 | } regmatch_t; | ||
383 | |||
384 | /* Declarations for routines. */ | ||
385 | |||
386 | /* To avoid duplicating every routine declaration -- once with a | ||
387 | prototype (if we are ANSI), and once without (if we aren't) -- we | ||
388 | use the following macro to declare argument types. This | ||
389 | unfortunately clutters up the declarations a bit, but I think it's | ||
390 | worth it. */ | ||
391 | |||
392 | #if __STDC__ | ||
393 | |||
394 | #define _RE_ARGS(args) args | ||
395 | |||
396 | #else /* not __STDC__ */ | ||
397 | |||
398 | #define _RE_ARGS(args) () | ||
399 | |||
400 | #endif /* not __STDC__ */ | ||
401 | |||
402 | /* Sets the current default syntax to SYNTAX, and return the old syntax. | ||
403 | You can also simply assign to the `re_syntax_options' variable. */ | ||
404 | extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); | ||
405 | |||
406 | /* Compile the regular expression PATTERN, with length LENGTH | ||
407 | and syntax given by the global `re_syntax_options', into the buffer | ||
408 | BUFFER. Return NULL if successful, and an error string if not. */ | ||
409 | extern const char *re_compile_pattern | ||
410 | _RE_ARGS ((const char *pattern, int length, | ||
411 | struct re_pattern_buffer *buffer)); | ||
412 | |||
413 | |||
414 | /* Compile a fastmap for the compiled pattern in BUFFER; used to | ||
415 | accelerate searches. Return 0 if successful and -2 if was an | ||
416 | internal error. */ | ||
417 | extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); | ||
418 | |||
419 | |||
420 | /* Search in the string STRING (with length LENGTH) for the pattern | ||
421 | compiled into BUFFER. Start searching at position START, for RANGE | ||
422 | characters. Return the starting position of the match, -1 for no | ||
423 | match, or -2 for an internal error. Also return register | ||
424 | information in REGS (if REGS and BUFFER->no_sub are nonzero). */ | ||
425 | extern int re_search | ||
426 | _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, | ||
427 | int length, int start, int range, struct re_registers *regs)); | ||
428 | |||
429 | |||
430 | /* Like `re_search', but search in the concatenation of STRING1 and | ||
431 | STRING2. Also, stop searching at index START + STOP. */ | ||
432 | extern int re_search_2 | ||
433 | _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, | ||
434 | int length1, const char *string2, int length2, | ||
435 | int start, int range, struct re_registers *regs, int stop)); | ||
436 | |||
437 | |||
438 | /* Like `re_search', but return how many characters in STRING the regexp | ||
439 | in BUFFER matched, starting at position START. */ | ||
440 | extern int re_match | ||
441 | _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, | ||
442 | int length, int start, struct re_registers *regs)); | ||
443 | |||
444 | |||
445 | /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ | ||
446 | extern int re_match_2 | ||
447 | _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, | ||
448 | int length1, const char *string2, int length2, | ||
449 | int start, struct re_registers *regs, int stop)); | ||
450 | |||
451 | |||
452 | /* Set REGS to hold NUM_REGS registers, storing them in STARTS and | ||
453 | ENDS. Subsequent matches using BUFFER and REGS will use this memory | ||
454 | for recording register information. STARTS and ENDS must be | ||
455 | allocated with malloc, and must each be at least `NUM_REGS * sizeof | ||
456 | (regoff_t)' bytes long. | ||
457 | |||
458 | If NUM_REGS == 0, then subsequent matches should allocate their own | ||
459 | register data. | ||
460 | |||
461 | Unless this function is called, the first search or match using | ||
462 | PATTERN_BUFFER will allocate its own register data, without | ||
463 | freeing the old data. */ | ||
464 | extern void re_set_registers | ||
465 | _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, | ||
466 | unsigned num_regs, regoff_t *starts, regoff_t *ends)); | ||
467 | |||
468 | /* 4.2 bsd compatibility. */ | ||
469 | extern char *re_comp _RE_ARGS ((const char *)); | ||
470 | extern int re_exec _RE_ARGS ((const char *)); | ||
471 | |||
472 | /* POSIX compatibility. */ | ||
473 | extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags)); | ||
474 | extern int regexec | ||
475 | _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch, | ||
476 | regmatch_t pmatch[], int eflags)); | ||
477 | extern size_t regerror | ||
478 | _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf, | ||
479 | size_t errbuf_size)); | ||
480 | extern void regfree _RE_ARGS ((regex_t *preg)); | ||
481 | |||
482 | #endif /* not __REGEXP_LIBRARY_H__ */ | ||
483 | |||
484 | /* | ||
485 | Local variables: | ||
486 | make-backup-files: t | ||
487 | version-control: t | ||
488 | trim-versions-without-asking: nil | ||
489 | End: | ||
490 | */ | ||
diff --git a/include/run-command.h b/include/run-command.h new file mode 100644 index 000000000..e34550284 --- /dev/null +++ b/include/run-command.h | |||
@@ -0,0 +1,93 @@ | |||
1 | #ifndef RUN_COMMAND_H | ||
2 | #define RUN_COMMAND_H | ||
3 | |||
4 | enum { | ||
5 | ERR_RUN_COMMAND_FORK = 10000, | ||
6 | ERR_RUN_COMMAND_EXEC, | ||
7 | ERR_RUN_COMMAND_PIPE, | ||
8 | ERR_RUN_COMMAND_WAITPID, | ||
9 | ERR_RUN_COMMAND_WAITPID_WRONG_PID, | ||
10 | ERR_RUN_COMMAND_WAITPID_SIGNAL, | ||
11 | ERR_RUN_COMMAND_WAITPID_NOEXIT, | ||
12 | }; | ||
13 | #define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK) | ||
14 | |||
15 | struct child_process { | ||
16 | const char **argv; | ||
17 | pid_t pid; | ||
18 | /* | ||
19 | * Using .in, .out, .err: | ||
20 | * - Specify 0 for no redirections (child inherits stdin, stdout, | ||
21 | * stderr from parent). | ||
22 | * - Specify -1 to have a pipe allocated as follows: | ||
23 | * .in: returns the writable pipe end; parent writes to it, | ||
24 | * the readable pipe end becomes child's stdin | ||
25 | * .out, .err: returns the readable pipe end; parent reads from | ||
26 | * it, the writable pipe end becomes child's stdout/stderr | ||
27 | * The caller of start_command() must close the returned FDs | ||
28 | * after it has completed reading from/writing to it! | ||
29 | * - Specify > 0 to set a channel to a particular FD as follows: | ||
30 | * .in: a readable FD, becomes child's stdin | ||
31 | * .out: a writable FD, becomes child's stdout/stderr | ||
32 | * .err > 0 not supported | ||
33 | * The specified FD is closed by start_command(), even in case | ||
34 | * of errors! | ||
35 | */ | ||
36 | int in; | ||
37 | int out; | ||
38 | int err; | ||
39 | const char *dir; | ||
40 | const char *const *env; | ||
41 | unsigned no_stdin:1; | ||
42 | unsigned no_stdout:1; | ||
43 | unsigned no_stderr:1; | ||
44 | unsigned git_cmd:1; /* if this is to be git sub-command */ | ||
45 | unsigned stdout_to_stderr:1; | ||
46 | void (*preexec_cb)(void); | ||
47 | }; | ||
48 | |||
49 | int start_command(struct child_process *); | ||
50 | int finish_command(struct child_process *); | ||
51 | int run_command(struct child_process *); | ||
52 | |||
53 | extern int run_hook(const char *index_file, const char *name, ...); | ||
54 | |||
55 | #define RUN_COMMAND_NO_STDIN 1 | ||
56 | #define RUN_GIT_CMD 2 /*If this is to be git sub-command */ | ||
57 | #define RUN_COMMAND_STDOUT_TO_STDERR 4 | ||
58 | int run_command_v_opt(const char **argv, int opt); | ||
59 | |||
60 | /* | ||
61 | * env (the environment) is to be formatted like environ: "VAR=VALUE". | ||
62 | * To unset an environment variable use just "VAR". | ||
63 | */ | ||
64 | int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env); | ||
65 | |||
66 | /* | ||
67 | * The purpose of the following functions is to feed a pipe by running | ||
68 | * a function asynchronously and providing output that the caller reads. | ||
69 | * | ||
70 | * It is expected that no synchronization and mutual exclusion between | ||
71 | * the caller and the feed function is necessary so that the function | ||
72 | * can run in a thread without interfering with the caller. | ||
73 | */ | ||
74 | struct async { | ||
75 | /* | ||
76 | * proc writes to fd and closes it; | ||
77 | * returns 0 on success, non-zero on failure | ||
78 | */ | ||
79 | int (*proc)(int fd, void *data); | ||
80 | void *data; | ||
81 | int out; /* caller reads from here and closes it */ | ||
82 | #ifndef __MINGW32__ | ||
83 | pid_t pid; | ||
84 | #else | ||
85 | HANDLE tid; | ||
86 | int fd_for_proc; | ||
87 | #endif | ||
88 | }; | ||
89 | |||
90 | int start_async(struct async *async); | ||
91 | int finish_async(struct async *async); | ||
92 | |||
93 | #endif | ||
diff --git a/include/strbuf.h b/include/strbuf.h new file mode 100644 index 000000000..9ee908a3e --- /dev/null +++ b/include/strbuf.h | |||
@@ -0,0 +1,137 @@ | |||
1 | #ifndef STRBUF_H | ||
2 | #define STRBUF_H | ||
3 | |||
4 | /* | ||
5 | * Strbuf's can be use in many ways: as a byte array, or to store arbitrary | ||
6 | * long, overflow safe strings. | ||
7 | * | ||
8 | * Strbufs has some invariants that are very important to keep in mind: | ||
9 | * | ||
10 | * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to | ||
11 | * build complex strings/buffers whose final size isn't easily known. | ||
12 | * | ||
13 | * It is NOT legal to copy the ->buf pointer away. | ||
14 | * `strbuf_detach' is the operation that detachs a buffer from its shell | ||
15 | * while keeping the shell valid wrt its invariants. | ||
16 | * | ||
17 | * 2. the ->buf member is a byte array that has at least ->len + 1 bytes | ||
18 | * allocated. The extra byte is used to store a '\0', allowing the ->buf | ||
19 | * member to be a valid C-string. Every strbuf function ensure this | ||
20 | * invariant is preserved. | ||
21 | * | ||
22 | * Note that it is OK to "play" with the buffer directly if you work it | ||
23 | * that way: | ||
24 | * | ||
25 | * strbuf_grow(sb, SOME_SIZE); | ||
26 | * ... Here, the memory array starting at sb->buf, and of length | ||
27 | * ... strbuf_avail(sb) is all yours, and you are sure that | ||
28 | * ... strbuf_avail(sb) is at least SOME_SIZE. | ||
29 | * strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE); | ||
30 | * | ||
31 | * Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb). | ||
32 | * | ||
33 | * Doing so is safe, though if it has to be done in many places, adding the | ||
34 | * missing API to the strbuf module is the way to go. | ||
35 | * | ||
36 | * XXX: do _not_ assume that the area that is yours is of size ->alloc - 1 | ||
37 | * even if it's true in the current implementation. Alloc is somehow a | ||
38 | * "private" member that should not be messed with. | ||
39 | */ | ||
40 | |||
41 | #include <assert.h> | ||
42 | |||
43 | extern char strbuf_slopbuf[]; | ||
44 | struct strbuf { | ||
45 | size_t alloc; | ||
46 | size_t len; | ||
47 | char *buf; | ||
48 | }; | ||
49 | |||
50 | #define STRBUF_INIT { 0, 0, strbuf_slopbuf } | ||
51 | |||
52 | /*----- strbuf life cycle -----*/ | ||
53 | extern void strbuf_init(struct strbuf *, size_t); | ||
54 | extern void strbuf_release(struct strbuf *); | ||
55 | extern char *strbuf_detach(struct strbuf *, size_t *); | ||
56 | extern void strbuf_attach(struct strbuf *, void *, size_t, size_t); | ||
57 | static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) { | ||
58 | struct strbuf tmp = *a; | ||
59 | *a = *b; | ||
60 | *b = tmp; | ||
61 | } | ||
62 | |||
63 | /*----- strbuf size related -----*/ | ||
64 | static inline size_t strbuf_avail(const struct strbuf *sb) { | ||
65 | return sb->alloc ? sb->alloc - sb->len - 1 : 0; | ||
66 | } | ||
67 | |||
68 | extern void strbuf_grow(struct strbuf *, size_t); | ||
69 | |||
70 | static inline void strbuf_setlen(struct strbuf *sb, size_t len) { | ||
71 | if (!sb->alloc) | ||
72 | strbuf_grow(sb, 0); | ||
73 | assert(len < sb->alloc); | ||
74 | sb->len = len; | ||
75 | sb->buf[len] = '\0'; | ||
76 | } | ||
77 | #define strbuf_reset(sb) strbuf_setlen(sb, 0) | ||
78 | |||
79 | /*----- content related -----*/ | ||
80 | extern void strbuf_trim(struct strbuf *); | ||
81 | extern void strbuf_rtrim(struct strbuf *); | ||
82 | extern void strbuf_ltrim(struct strbuf *); | ||
83 | extern int strbuf_cmp(const struct strbuf *, const struct strbuf *); | ||
84 | extern void strbuf_tolower(struct strbuf *); | ||
85 | |||
86 | extern struct strbuf **strbuf_split(const struct strbuf *, int delim); | ||
87 | extern void strbuf_list_free(struct strbuf **); | ||
88 | |||
89 | /*----- add data in your buffer -----*/ | ||
90 | static inline void strbuf_addch(struct strbuf *sb, int c) { | ||
91 | strbuf_grow(sb, 1); | ||
92 | sb->buf[sb->len++] = c; | ||
93 | sb->buf[sb->len] = '\0'; | ||
94 | } | ||
95 | |||
96 | extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t); | ||
97 | extern void strbuf_remove(struct strbuf *, size_t pos, size_t len); | ||
98 | |||
99 | /* splice pos..pos+len with given data */ | ||
100 | extern void strbuf_splice(struct strbuf *, size_t pos, size_t len, | ||
101 | const void *, size_t); | ||
102 | |||
103 | extern void strbuf_add(struct strbuf *, const void *, size_t); | ||
104 | static inline void strbuf_addstr(struct strbuf *sb, const char *s) { | ||
105 | strbuf_add(sb, s, strlen(s)); | ||
106 | } | ||
107 | static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) { | ||
108 | strbuf_add(sb, sb2->buf, sb2->len); | ||
109 | } | ||
110 | extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len); | ||
111 | |||
112 | typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context); | ||
113 | extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context); | ||
114 | struct strbuf_expand_dict_entry { | ||
115 | const char *placeholder; | ||
116 | const char *value; | ||
117 | }; | ||
118 | extern size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, void *context); | ||
119 | |||
120 | __attribute__((format(printf,2,3))) | ||
121 | extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); | ||
122 | |||
123 | extern size_t strbuf_fread(struct strbuf *, size_t, FILE *); | ||
124 | /* XXX: if read fails, any partial read is undone */ | ||
125 | extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint); | ||
126 | extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint); | ||
127 | extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint); | ||
128 | |||
129 | extern int strbuf_getline(struct strbuf *, FILE *, int); | ||
130 | |||
131 | extern void stripspace(struct strbuf *buf, int skip_comments); | ||
132 | extern int launch_editor(const char *path, struct strbuf *buffer, const char *const *env); | ||
133 | |||
134 | extern int strbuf_branchname(struct strbuf *sb, const char *name); | ||
135 | extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name); | ||
136 | |||
137 | #endif /* STRBUF_H */ | ||
diff --git a/libbb/fnmatch.c b/libbb/fnmatch.c new file mode 100644 index 000000000..1f4ead5f9 --- /dev/null +++ b/libbb/fnmatch.c | |||
@@ -0,0 +1,488 @@ | |||
1 | /* Copyright (C) 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc. | ||
2 | This file is part of the GNU C Library. | ||
3 | |||
4 | This library is free software; you can redistribute it and/or | ||
5 | modify it under the terms of the GNU Library General Public License as | ||
6 | published by the Free Software Foundation; either version 2 of the | ||
7 | License, or (at your option) any later version. | ||
8 | |||
9 | This library is distributed in the hope that it will be useful, | ||
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | Library General Public License for more details. | ||
13 | |||
14 | You should have received a copy of the GNU Library General Public | ||
15 | License along with this library; see the file COPYING.LIB. If not, | ||
16 | write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
17 | Boston, MA 02111-1307, USA. */ | ||
18 | |||
19 | #if HAVE_CONFIG_H | ||
20 | # include <config.h> | ||
21 | #endif | ||
22 | |||
23 | /* Enable GNU extensions in fnmatch.h. */ | ||
24 | #ifndef _GNU_SOURCE | ||
25 | # define _GNU_SOURCE 1 | ||
26 | #endif | ||
27 | |||
28 | #include <errno.h> | ||
29 | #include <fnmatch.h> | ||
30 | #include <ctype.h> | ||
31 | |||
32 | #if HAVE_STRING_H || defined _LIBC | ||
33 | # include <string.h> | ||
34 | #else | ||
35 | # include <strings.h> | ||
36 | #endif | ||
37 | |||
38 | #if defined STDC_HEADERS || defined _LIBC | ||
39 | # include <stdlib.h> | ||
40 | #endif | ||
41 | |||
42 | /* For platform which support the ISO C amendement 1 functionality we | ||
43 | support user defined character classes. */ | ||
44 | #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) | ||
45 | /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ | ||
46 | # include <wchar.h> | ||
47 | # include <wctype.h> | ||
48 | #endif | ||
49 | |||
50 | /* Comment out all this code if we are using the GNU C Library, and are not | ||
51 | actually compiling the library itself. This code is part of the GNU C | ||
52 | Library, but also included in many other GNU distributions. Compiling | ||
53 | and linking in this code is a waste when using the GNU C library | ||
54 | (especially if it is a shared library). Rather than having every GNU | ||
55 | program understand `configure --with-gnu-libc' and omit the object files, | ||
56 | it is simpler to just do this in the source for each such file. */ | ||
57 | |||
58 | #if defined _LIBC || !defined __GNU_LIBRARY__ | ||
59 | |||
60 | |||
61 | # if defined STDC_HEADERS || !defined isascii | ||
62 | # define ISASCII(c) 1 | ||
63 | # else | ||
64 | # define ISASCII(c) isascii(c) | ||
65 | # endif | ||
66 | |||
67 | # ifdef isblank | ||
68 | # define ISBLANK(c) (ISASCII (c) && isblank (c)) | ||
69 | # else | ||
70 | # define ISBLANK(c) ((c) == ' ' || (c) == '\t') | ||
71 | # endif | ||
72 | # ifdef isgraph | ||
73 | # define ISGRAPH(c) (ISASCII (c) && isgraph (c)) | ||
74 | # else | ||
75 | # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) | ||
76 | # endif | ||
77 | |||
78 | # define ISPRINT(c) (ISASCII (c) && isprint (c)) | ||
79 | # define ISDIGIT(c) (ISASCII (c) && isdigit (c)) | ||
80 | # define ISALNUM(c) (ISASCII (c) && isalnum (c)) | ||
81 | # define ISALPHA(c) (ISASCII (c) && isalpha (c)) | ||
82 | # define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) | ||
83 | # define ISLOWER(c) (ISASCII (c) && islower (c)) | ||
84 | # define ISPUNCT(c) (ISASCII (c) && ispunct (c)) | ||
85 | # define ISSPACE(c) (ISASCII (c) && isspace (c)) | ||
86 | # define ISUPPER(c) (ISASCII (c) && isupper (c)) | ||
87 | # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) | ||
88 | |||
89 | # define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) | ||
90 | |||
91 | # if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) | ||
92 | /* The GNU C library provides support for user-defined character classes | ||
93 | and the functions from ISO C amendement 1. */ | ||
94 | # ifdef CHARCLASS_NAME_MAX | ||
95 | # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX | ||
96 | # else | ||
97 | /* This shouldn't happen but some implementation might still have this | ||
98 | problem. Use a reasonable default value. */ | ||
99 | # define CHAR_CLASS_MAX_LENGTH 256 | ||
100 | # endif | ||
101 | |||
102 | # ifdef _LIBC | ||
103 | # define IS_CHAR_CLASS(string) __wctype (string) | ||
104 | # else | ||
105 | # define IS_CHAR_CLASS(string) wctype (string) | ||
106 | # endif | ||
107 | # else | ||
108 | # define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ | ||
109 | |||
110 | # define IS_CHAR_CLASS(string) \ | ||
111 | (STREQ (string, "alpha") || STREQ (string, "upper") \ | ||
112 | || STREQ (string, "lower") || STREQ (string, "digit") \ | ||
113 | || STREQ (string, "alnum") || STREQ (string, "xdigit") \ | ||
114 | || STREQ (string, "space") || STREQ (string, "print") \ | ||
115 | || STREQ (string, "punct") || STREQ (string, "graph") \ | ||
116 | || STREQ (string, "cntrl") || STREQ (string, "blank")) | ||
117 | # endif | ||
118 | |||
119 | /* Avoid depending on library functions or files | ||
120 | whose names are inconsistent. */ | ||
121 | |||
122 | # if !defined _LIBC && !defined getenv | ||
123 | extern char *getenv (); | ||
124 | # endif | ||
125 | |||
126 | # ifndef errno | ||
127 | extern int errno; | ||
128 | # endif | ||
129 | |||
130 | /* This function doesn't exist on most systems. */ | ||
131 | |||
132 | # if !defined HAVE___STRCHRNUL && !defined _LIBC | ||
133 | static char * | ||
134 | __strchrnul (s, c) | ||
135 | const char *s; | ||
136 | int c; | ||
137 | { | ||
138 | char *result = strchr (s, c); | ||
139 | if (result == NULL) | ||
140 | result = strchr (s, '\0'); | ||
141 | return result; | ||
142 | } | ||
143 | # endif | ||
144 | |||
145 | # ifndef internal_function | ||
146 | /* Inside GNU libc we mark some function in a special way. In other | ||
147 | environments simply ignore the marking. */ | ||
148 | # define internal_function | ||
149 | # endif | ||
150 | |||
151 | /* Match STRING against the filename pattern PATTERN, returning zero if | ||
152 | it matches, nonzero if not. */ | ||
153 | static int internal_fnmatch __P ((const char *pattern, const char *string, | ||
154 | int no_leading_period, int flags)) | ||
155 | internal_function; | ||
156 | static int | ||
157 | internal_function | ||
158 | internal_fnmatch (pattern, string, no_leading_period, flags) | ||
159 | const char *pattern; | ||
160 | const char *string; | ||
161 | int no_leading_period; | ||
162 | int flags; | ||
163 | { | ||
164 | register const char *p = pattern, *n = string; | ||
165 | register unsigned char c; | ||
166 | |||
167 | /* Note that this evaluates C many times. */ | ||
168 | # ifdef _LIBC | ||
169 | # define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c)) | ||
170 | # else | ||
171 | # define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c)) | ||
172 | # endif | ||
173 | |||
174 | while ((c = *p++) != '\0') | ||
175 | { | ||
176 | c = FOLD (c); | ||
177 | |||
178 | switch (c) | ||
179 | { | ||
180 | case '?': | ||
181 | if (*n == '\0') | ||
182 | return FNM_NOMATCH; | ||
183 | else if (*n == '/' && (flags & FNM_FILE_NAME)) | ||
184 | return FNM_NOMATCH; | ||
185 | else if (*n == '.' && no_leading_period | ||
186 | && (n == string | ||
187 | || (n[-1] == '/' && (flags & FNM_FILE_NAME)))) | ||
188 | return FNM_NOMATCH; | ||
189 | break; | ||
190 | |||
191 | case '\\': | ||
192 | if (!(flags & FNM_NOESCAPE)) | ||
193 | { | ||
194 | c = *p++; | ||
195 | if (c == '\0') | ||
196 | /* Trailing \ loses. */ | ||
197 | return FNM_NOMATCH; | ||
198 | c = FOLD (c); | ||
199 | } | ||
200 | if (FOLD ((unsigned char) *n) != c) | ||
201 | return FNM_NOMATCH; | ||
202 | break; | ||
203 | |||
204 | case '*': | ||
205 | if (*n == '.' && no_leading_period | ||
206 | && (n == string | ||
207 | || (n[-1] == '/' && (flags & FNM_FILE_NAME)))) | ||
208 | return FNM_NOMATCH; | ||
209 | |||
210 | for (c = *p++; c == '?' || c == '*'; c = *p++) | ||
211 | { | ||
212 | if (*n == '/' && (flags & FNM_FILE_NAME)) | ||
213 | /* A slash does not match a wildcard under FNM_FILE_NAME. */ | ||
214 | return FNM_NOMATCH; | ||
215 | else if (c == '?') | ||
216 | { | ||
217 | /* A ? needs to match one character. */ | ||
218 | if (*n == '\0') | ||
219 | /* There isn't another character; no match. */ | ||
220 | return FNM_NOMATCH; | ||
221 | else | ||
222 | /* One character of the string is consumed in matching | ||
223 | this ? wildcard, so *??? won't match if there are | ||
224 | less than three characters. */ | ||
225 | ++n; | ||
226 | } | ||
227 | } | ||
228 | |||
229 | if (c == '\0') | ||
230 | /* The wildcard(s) is/are the last element of the pattern. | ||
231 | If the name is a file name and contains another slash | ||
232 | this does mean it cannot match. */ | ||
233 | return ((flags & FNM_FILE_NAME) && strchr (n, '/') != NULL | ||
234 | ? FNM_NOMATCH : 0); | ||
235 | else | ||
236 | { | ||
237 | const char *endp; | ||
238 | |||
239 | endp = __strchrnul (n, (flags & FNM_FILE_NAME) ? '/' : '\0'); | ||
240 | |||
241 | if (c == '[') | ||
242 | { | ||
243 | int flags2 = ((flags & FNM_FILE_NAME) | ||
244 | ? flags : (flags & ~FNM_PERIOD)); | ||
245 | |||
246 | for (--p; n < endp; ++n) | ||
247 | if (internal_fnmatch (p, n, | ||
248 | (no_leading_period | ||
249 | && (n == string | ||
250 | || (n[-1] == '/' | ||
251 | && (flags | ||
252 | & FNM_FILE_NAME)))), | ||
253 | flags2) | ||
254 | == 0) | ||
255 | return 0; | ||
256 | } | ||
257 | else if (c == '/' && (flags & FNM_FILE_NAME)) | ||
258 | { | ||
259 | while (*n != '\0' && *n != '/') | ||
260 | ++n; | ||
261 | if (*n == '/' | ||
262 | && (internal_fnmatch (p, n + 1, flags & FNM_PERIOD, | ||
263 | flags) == 0)) | ||
264 | return 0; | ||
265 | } | ||
266 | else | ||
267 | { | ||
268 | int flags2 = ((flags & FNM_FILE_NAME) | ||
269 | ? flags : (flags & ~FNM_PERIOD)); | ||
270 | |||
271 | if (c == '\\' && !(flags & FNM_NOESCAPE)) | ||
272 | c = *p; | ||
273 | c = FOLD (c); | ||
274 | for (--p; n < endp; ++n) | ||
275 | if (FOLD ((unsigned char) *n) == c | ||
276 | && (internal_fnmatch (p, n, | ||
277 | (no_leading_period | ||
278 | && (n == string | ||
279 | || (n[-1] == '/' | ||
280 | && (flags | ||
281 | & FNM_FILE_NAME)))), | ||
282 | flags2) == 0)) | ||
283 | return 0; | ||
284 | } | ||
285 | } | ||
286 | |||
287 | /* If we come here no match is possible with the wildcard. */ | ||
288 | return FNM_NOMATCH; | ||
289 | |||
290 | case '[': | ||
291 | { | ||
292 | /* Nonzero if the sense of the character class is inverted. */ | ||
293 | static int posixly_correct; | ||
294 | register int not; | ||
295 | char cold; | ||
296 | |||
297 | if (posixly_correct == 0) | ||
298 | posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; | ||
299 | |||
300 | if (*n == '\0') | ||
301 | return FNM_NOMATCH; | ||
302 | |||
303 | if (*n == '.' && no_leading_period && (n == string | ||
304 | || (n[-1] == '/' | ||
305 | && (flags | ||
306 | & FNM_FILE_NAME)))) | ||
307 | return FNM_NOMATCH; | ||
308 | |||
309 | if (*n == '/' && (flags & FNM_FILE_NAME)) | ||
310 | /* `/' cannot be matched. */ | ||
311 | return FNM_NOMATCH; | ||
312 | |||
313 | not = (*p == '!' || (posixly_correct < 0 && *p == '^')); | ||
314 | if (not) | ||
315 | ++p; | ||
316 | |||
317 | c = *p++; | ||
318 | for (;;) | ||
319 | { | ||
320 | unsigned char fn = FOLD ((unsigned char) *n); | ||
321 | |||
322 | if (!(flags & FNM_NOESCAPE) && c == '\\') | ||
323 | { | ||
324 | if (*p == '\0') | ||
325 | return FNM_NOMATCH; | ||
326 | c = FOLD ((unsigned char) *p); | ||
327 | ++p; | ||
328 | |||
329 | if (c == fn) | ||
330 | goto matched; | ||
331 | } | ||
332 | else if (c == '[' && *p == ':') | ||
333 | { | ||
334 | /* Leave room for the null. */ | ||
335 | char str[CHAR_CLASS_MAX_LENGTH + 1]; | ||
336 | size_t c1 = 0; | ||
337 | # if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) | ||
338 | wctype_t wt; | ||
339 | # endif | ||
340 | const char *startp = p; | ||
341 | |||
342 | for (;;) | ||
343 | { | ||
344 | if (c1 == CHAR_CLASS_MAX_LENGTH) | ||
345 | /* The name is too long and therefore the pattern | ||
346 | is ill-formed. */ | ||
347 | return FNM_NOMATCH; | ||
348 | |||
349 | c = *++p; | ||
350 | if (c == ':' && p[1] == ']') | ||
351 | { | ||
352 | p += 2; | ||
353 | break; | ||
354 | } | ||
355 | if (c < 'a' || c >= 'z') | ||
356 | { | ||
357 | /* This cannot possibly be a character class name. | ||
358 | Match it as a normal range. */ | ||
359 | p = startp; | ||
360 | c = '['; | ||
361 | goto normal_bracket; | ||
362 | } | ||
363 | str[c1++] = c; | ||
364 | } | ||
365 | str[c1] = '\0'; | ||
366 | |||
367 | # if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) | ||
368 | wt = IS_CHAR_CLASS (str); | ||
369 | if (wt == 0) | ||
370 | /* Invalid character class name. */ | ||
371 | return FNM_NOMATCH; | ||
372 | |||
373 | if (__iswctype (__btowc ((unsigned char) *n), wt)) | ||
374 | goto matched; | ||
375 | # else | ||
376 | if ((STREQ (str, "alnum") && ISALNUM ((unsigned char) *n)) | ||
377 | || (STREQ (str, "alpha") && ISALPHA ((unsigned char) *n)) | ||
378 | || (STREQ (str, "blank") && ISBLANK ((unsigned char) *n)) | ||
379 | || (STREQ (str, "cntrl") && ISCNTRL ((unsigned char) *n)) | ||
380 | || (STREQ (str, "digit") && ISDIGIT ((unsigned char) *n)) | ||
381 | || (STREQ (str, "graph") && ISGRAPH ((unsigned char) *n)) | ||
382 | || (STREQ (str, "lower") && ISLOWER ((unsigned char) *n)) | ||
383 | || (STREQ (str, "print") && ISPRINT ((unsigned char) *n)) | ||
384 | || (STREQ (str, "punct") && ISPUNCT ((unsigned char) *n)) | ||
385 | || (STREQ (str, "space") && ISSPACE ((unsigned char) *n)) | ||
386 | || (STREQ (str, "upper") && ISUPPER ((unsigned char) *n)) | ||
387 | || (STREQ (str, "xdigit") && ISXDIGIT ((unsigned char) *n))) | ||
388 | goto matched; | ||
389 | # endif | ||
390 | } | ||
391 | else if (c == '\0') | ||
392 | /* [ (unterminated) loses. */ | ||
393 | return FNM_NOMATCH; | ||
394 | else | ||
395 | { | ||
396 | normal_bracket: | ||
397 | if (FOLD (c) == fn) | ||
398 | goto matched; | ||
399 | |||
400 | cold = c; | ||
401 | c = *p++; | ||
402 | |||
403 | if (c == '-' && *p != ']') | ||
404 | { | ||
405 | /* It is a range. */ | ||
406 | unsigned char cend = *p++; | ||
407 | if (!(flags & FNM_NOESCAPE) && cend == '\\') | ||
408 | cend = *p++; | ||
409 | if (cend == '\0') | ||
410 | return FNM_NOMATCH; | ||
411 | |||
412 | if (cold <= fn && fn <= FOLD (cend)) | ||
413 | goto matched; | ||
414 | |||
415 | c = *p++; | ||
416 | } | ||
417 | } | ||
418 | |||
419 | if (c == ']') | ||
420 | break; | ||
421 | } | ||
422 | |||
423 | if (!not) | ||
424 | return FNM_NOMATCH; | ||
425 | break; | ||
426 | |||
427 | matched: | ||
428 | /* Skip the rest of the [...] that already matched. */ | ||
429 | while (c != ']') | ||
430 | { | ||
431 | if (c == '\0') | ||
432 | /* [... (unterminated) loses. */ | ||
433 | return FNM_NOMATCH; | ||
434 | |||
435 | c = *p++; | ||
436 | if (!(flags & FNM_NOESCAPE) && c == '\\') | ||
437 | { | ||
438 | if (*p == '\0') | ||
439 | return FNM_NOMATCH; | ||
440 | /* XXX 1003.2d11 is unclear if this is right. */ | ||
441 | ++p; | ||
442 | } | ||
443 | else if (c == '[' && *p == ':') | ||
444 | { | ||
445 | do | ||
446 | if (*++p == '\0') | ||
447 | return FNM_NOMATCH; | ||
448 | while (*p != ':' || p[1] == ']'); | ||
449 | p += 2; | ||
450 | c = *p; | ||
451 | } | ||
452 | } | ||
453 | if (not) | ||
454 | return FNM_NOMATCH; | ||
455 | } | ||
456 | break; | ||
457 | |||
458 | default: | ||
459 | if (c != FOLD ((unsigned char) *n)) | ||
460 | return FNM_NOMATCH; | ||
461 | } | ||
462 | |||
463 | ++n; | ||
464 | } | ||
465 | |||
466 | if (*n == '\0') | ||
467 | return 0; | ||
468 | |||
469 | if ((flags & FNM_LEADING_DIR) && *n == '/') | ||
470 | /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ | ||
471 | return 0; | ||
472 | |||
473 | return FNM_NOMATCH; | ||
474 | |||
475 | # undef FOLD | ||
476 | } | ||
477 | |||
478 | |||
479 | int | ||
480 | fnmatch (pattern, string, flags) | ||
481 | const char *pattern; | ||
482 | const char *string; | ||
483 | int flags; | ||
484 | { | ||
485 | return internal_fnmatch (pattern, string, flags & FNM_PERIOD, flags); | ||
486 | } | ||
487 | |||
488 | #endif /* _LIBC or not __GNU_LIBRARY__. */ | ||
diff --git a/libbb/mingw.c b/libbb/mingw.c new file mode 100644 index 000000000..2839d9df6 --- /dev/null +++ b/libbb/mingw.c | |||
@@ -0,0 +1,1141 @@ | |||
1 | #include "../git-compat-util.h" | ||
2 | #include "win32.h" | ||
3 | #include "../strbuf.h" | ||
4 | |||
5 | unsigned int _CRT_fmode = _O_BINARY; | ||
6 | |||
7 | static int err_win_to_posix(DWORD winerr) | ||
8 | { | ||
9 | int error = ENOSYS; | ||
10 | switch(winerr) { | ||
11 | case ERROR_ACCESS_DENIED: error = EACCES; break; | ||
12 | case ERROR_ACCOUNT_DISABLED: error = EACCES; break; | ||
13 | case ERROR_ACCOUNT_RESTRICTION: error = EACCES; break; | ||
14 | case ERROR_ALREADY_ASSIGNED: error = EBUSY; break; | ||
15 | case ERROR_ALREADY_EXISTS: error = EEXIST; break; | ||
16 | case ERROR_ARITHMETIC_OVERFLOW: error = ERANGE; break; | ||
17 | case ERROR_BAD_COMMAND: error = EIO; break; | ||
18 | case ERROR_BAD_DEVICE: error = ENODEV; break; | ||
19 | case ERROR_BAD_DRIVER_LEVEL: error = ENXIO; break; | ||
20 | case ERROR_BAD_EXE_FORMAT: error = ENOEXEC; break; | ||
21 | case ERROR_BAD_FORMAT: error = ENOEXEC; break; | ||
22 | case ERROR_BAD_LENGTH: error = EINVAL; break; | ||
23 | case ERROR_BAD_PATHNAME: error = ENOENT; break; | ||
24 | case ERROR_BAD_PIPE: error = EPIPE; break; | ||
25 | case ERROR_BAD_UNIT: error = ENODEV; break; | ||
26 | case ERROR_BAD_USERNAME: error = EINVAL; break; | ||
27 | case ERROR_BROKEN_PIPE: error = EPIPE; break; | ||
28 | case ERROR_BUFFER_OVERFLOW: error = ENAMETOOLONG; break; | ||
29 | case ERROR_BUSY: error = EBUSY; break; | ||
30 | case ERROR_BUSY_DRIVE: error = EBUSY; break; | ||
31 | case ERROR_CALL_NOT_IMPLEMENTED: error = ENOSYS; break; | ||
32 | case ERROR_CANNOT_MAKE: error = EACCES; break; | ||
33 | case ERROR_CANTOPEN: error = EIO; break; | ||
34 | case ERROR_CANTREAD: error = EIO; break; | ||
35 | case ERROR_CANTWRITE: error = EIO; break; | ||
36 | case ERROR_CRC: error = EIO; break; | ||
37 | case ERROR_CURRENT_DIRECTORY: error = EACCES; break; | ||
38 | case ERROR_DEVICE_IN_USE: error = EBUSY; break; | ||
39 | case ERROR_DEV_NOT_EXIST: error = ENODEV; break; | ||
40 | case ERROR_DIRECTORY: error = EINVAL; break; | ||
41 | case ERROR_DIR_NOT_EMPTY: error = ENOTEMPTY; break; | ||
42 | case ERROR_DISK_CHANGE: error = EIO; break; | ||
43 | case ERROR_DISK_FULL: error = ENOSPC; break; | ||
44 | case ERROR_DRIVE_LOCKED: error = EBUSY; break; | ||
45 | case ERROR_ENVVAR_NOT_FOUND: error = EINVAL; break; | ||
46 | case ERROR_EXE_MARKED_INVALID: error = ENOEXEC; break; | ||
47 | case ERROR_FILENAME_EXCED_RANGE: error = ENAMETOOLONG; break; | ||
48 | case ERROR_FILE_EXISTS: error = EEXIST; break; | ||
49 | case ERROR_FILE_INVALID: error = ENODEV; break; | ||
50 | case ERROR_FILE_NOT_FOUND: error = ENOENT; break; | ||
51 | case ERROR_GEN_FAILURE: error = EIO; break; | ||
52 | case ERROR_HANDLE_DISK_FULL: error = ENOSPC; break; | ||
53 | case ERROR_INSUFFICIENT_BUFFER: error = ENOMEM; break; | ||
54 | case ERROR_INVALID_ACCESS: error = EACCES; break; | ||
55 | case ERROR_INVALID_ADDRESS: error = EFAULT; break; | ||
56 | case ERROR_INVALID_BLOCK: error = EFAULT; break; | ||
57 | case ERROR_INVALID_DATA: error = EINVAL; break; | ||
58 | case ERROR_INVALID_DRIVE: error = ENODEV; break; | ||
59 | case ERROR_INVALID_EXE_SIGNATURE: error = ENOEXEC; break; | ||
60 | case ERROR_INVALID_FLAGS: error = EINVAL; break; | ||
61 | case ERROR_INVALID_FUNCTION: error = ENOSYS; break; | ||
62 | case ERROR_INVALID_HANDLE: error = EBADF; break; | ||
63 | case ERROR_INVALID_LOGON_HOURS: error = EACCES; break; | ||
64 | case ERROR_INVALID_NAME: error = EINVAL; break; | ||
65 | case ERROR_INVALID_OWNER: error = EINVAL; break; | ||
66 | case ERROR_INVALID_PARAMETER: error = EINVAL; break; | ||
67 | case ERROR_INVALID_PASSWORD: error = EPERM; break; | ||
68 | case ERROR_INVALID_PRIMARY_GROUP: error = EINVAL; break; | ||
69 | case ERROR_INVALID_SIGNAL_NUMBER: error = EINVAL; break; | ||
70 | case ERROR_INVALID_TARGET_HANDLE: error = EIO; break; | ||
71 | case ERROR_INVALID_WORKSTATION: error = EACCES; break; | ||
72 | case ERROR_IO_DEVICE: error = EIO; break; | ||
73 | case ERROR_IO_INCOMPLETE: error = EINTR; break; | ||
74 | case ERROR_LOCKED: error = EBUSY; break; | ||
75 | case ERROR_LOCK_VIOLATION: error = EACCES; break; | ||
76 | case ERROR_LOGON_FAILURE: error = EACCES; break; | ||
77 | case ERROR_MAPPED_ALIGNMENT: error = EINVAL; break; | ||
78 | case ERROR_META_EXPANSION_TOO_LONG: error = E2BIG; break; | ||
79 | case ERROR_MORE_DATA: error = EPIPE; break; | ||
80 | case ERROR_NEGATIVE_SEEK: error = ESPIPE; break; | ||
81 | case ERROR_NOACCESS: error = EFAULT; break; | ||
82 | case ERROR_NONE_MAPPED: error = EINVAL; break; | ||
83 | case ERROR_NOT_ENOUGH_MEMORY: error = ENOMEM; break; | ||
84 | case ERROR_NOT_READY: error = EAGAIN; break; | ||
85 | case ERROR_NOT_SAME_DEVICE: error = EXDEV; break; | ||
86 | case ERROR_NO_DATA: error = EPIPE; break; | ||
87 | case ERROR_NO_MORE_SEARCH_HANDLES: error = EIO; break; | ||
88 | case ERROR_NO_PROC_SLOTS: error = EAGAIN; break; | ||
89 | case ERROR_NO_SUCH_PRIVILEGE: error = EACCES; break; | ||
90 | case ERROR_OPEN_FAILED: error = EIO; break; | ||
91 | case ERROR_OPEN_FILES: error = EBUSY; break; | ||
92 | case ERROR_OPERATION_ABORTED: error = EINTR; break; | ||
93 | case ERROR_OUTOFMEMORY: error = ENOMEM; break; | ||
94 | case ERROR_PASSWORD_EXPIRED: error = EACCES; break; | ||
95 | case ERROR_PATH_BUSY: error = EBUSY; break; | ||
96 | case ERROR_PATH_NOT_FOUND: error = ENOENT; break; | ||
97 | case ERROR_PIPE_BUSY: error = EBUSY; break; | ||
98 | case ERROR_PIPE_CONNECTED: error = EPIPE; break; | ||
99 | case ERROR_PIPE_LISTENING: error = EPIPE; break; | ||
100 | case ERROR_PIPE_NOT_CONNECTED: error = EPIPE; break; | ||
101 | case ERROR_PRIVILEGE_NOT_HELD: error = EACCES; break; | ||
102 | case ERROR_READ_FAULT: error = EIO; break; | ||
103 | case ERROR_SEEK: error = EIO; break; | ||
104 | case ERROR_SEEK_ON_DEVICE: error = ESPIPE; break; | ||
105 | case ERROR_SHARING_BUFFER_EXCEEDED: error = ENFILE; break; | ||
106 | case ERROR_SHARING_VIOLATION: error = EACCES; break; | ||
107 | case ERROR_STACK_OVERFLOW: error = ENOMEM; break; | ||
108 | case ERROR_SWAPERROR: error = ENOENT; break; | ||
109 | case ERROR_TOO_MANY_MODULES: error = EMFILE; break; | ||
110 | case ERROR_TOO_MANY_OPEN_FILES: error = EMFILE; break; | ||
111 | case ERROR_UNRECOGNIZED_MEDIA: error = ENXIO; break; | ||
112 | case ERROR_UNRECOGNIZED_VOLUME: error = ENODEV; break; | ||
113 | case ERROR_WAIT_NO_CHILDREN: error = ECHILD; break; | ||
114 | case ERROR_WRITE_FAULT: error = EIO; break; | ||
115 | case ERROR_WRITE_PROTECT: error = EROFS; break; | ||
116 | } | ||
117 | return error; | ||
118 | } | ||
119 | |||
120 | #undef open | ||
121 | int mingw_open (const char *filename, int oflags, ...) | ||
122 | { | ||
123 | va_list args; | ||
124 | unsigned mode; | ||
125 | va_start(args, oflags); | ||
126 | mode = va_arg(args, int); | ||
127 | va_end(args); | ||
128 | |||
129 | if (!strcmp(filename, "/dev/null")) | ||
130 | filename = "nul"; | ||
131 | int fd = open(filename, oflags, mode); | ||
132 | if (fd < 0 && (oflags & O_CREAT) && errno == EACCES) { | ||
133 | DWORD attrs = GetFileAttributes(filename); | ||
134 | if (attrs != INVALID_FILE_ATTRIBUTES && (attrs & FILE_ATTRIBUTE_DIRECTORY)) | ||
135 | errno = EISDIR; | ||
136 | } | ||
137 | return fd; | ||
138 | } | ||
139 | |||
140 | static inline time_t filetime_to_time_t(const FILETIME *ft) | ||
141 | { | ||
142 | long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; | ||
143 | winTime -= 116444736000000000LL; /* Windows to Unix Epoch conversion */ | ||
144 | winTime /= 10000000; /* Nano to seconds resolution */ | ||
145 | return (time_t)winTime; | ||
146 | } | ||
147 | |||
148 | /* We keep the do_lstat code in a separate function to avoid recursion. | ||
149 | * When a path ends with a slash, the stat will fail with ENOENT. In | ||
150 | * this case, we strip the trailing slashes and stat again. | ||
151 | */ | ||
152 | static int do_lstat(const char *file_name, struct stat *buf) | ||
153 | { | ||
154 | WIN32_FILE_ATTRIBUTE_DATA fdata; | ||
155 | |||
156 | if (!(errno = get_file_attr(file_name, &fdata))) { | ||
157 | buf->st_ino = 0; | ||
158 | buf->st_gid = 0; | ||
159 | buf->st_uid = 0; | ||
160 | buf->st_nlink = 1; | ||
161 | buf->st_mode = file_attr_to_st_mode(fdata.dwFileAttributes); | ||
162 | buf->st_size = fdata.nFileSizeLow | | ||
163 | (((off_t)fdata.nFileSizeHigh)<<32); | ||
164 | buf->st_dev = buf->st_rdev = 0; /* not used by Git */ | ||
165 | buf->st_atime = filetime_to_time_t(&(fdata.ftLastAccessTime)); | ||
166 | buf->st_mtime = filetime_to_time_t(&(fdata.ftLastWriteTime)); | ||
167 | buf->st_ctime = filetime_to_time_t(&(fdata.ftCreationTime)); | ||
168 | return 0; | ||
169 | } | ||
170 | return -1; | ||
171 | } | ||
172 | |||
173 | /* We provide our own lstat/fstat functions, since the provided | ||
174 | * lstat/fstat functions are so slow. These stat functions are | ||
175 | * tailored for Git's usage (read: fast), and are not meant to be | ||
176 | * complete. Note that Git stat()s are redirected to mingw_lstat() | ||
177 | * too, since Windows doesn't really handle symlinks that well. | ||
178 | */ | ||
179 | int mingw_lstat(const char *file_name, struct stat *buf) | ||
180 | { | ||
181 | int namelen; | ||
182 | static char alt_name[PATH_MAX]; | ||
183 | |||
184 | if (!do_lstat(file_name, buf)) | ||
185 | return 0; | ||
186 | |||
187 | /* if file_name ended in a '/', Windows returned ENOENT; | ||
188 | * try again without trailing slashes | ||
189 | */ | ||
190 | if (errno != ENOENT) | ||
191 | return -1; | ||
192 | |||
193 | namelen = strlen(file_name); | ||
194 | if (namelen && file_name[namelen-1] != '/') | ||
195 | return -1; | ||
196 | while (namelen && file_name[namelen-1] == '/') | ||
197 | --namelen; | ||
198 | if (!namelen || namelen >= PATH_MAX) | ||
199 | return -1; | ||
200 | |||
201 | memcpy(alt_name, file_name, namelen); | ||
202 | alt_name[namelen] = 0; | ||
203 | return do_lstat(alt_name, buf); | ||
204 | } | ||
205 | |||
206 | #undef fstat | ||
207 | int mingw_fstat(int fd, struct stat *buf) | ||
208 | { | ||
209 | HANDLE fh = (HANDLE)_get_osfhandle(fd); | ||
210 | BY_HANDLE_FILE_INFORMATION fdata; | ||
211 | |||
212 | if (fh == INVALID_HANDLE_VALUE) { | ||
213 | errno = EBADF; | ||
214 | return -1; | ||
215 | } | ||
216 | /* direct non-file handles to MS's fstat() */ | ||
217 | if (GetFileType(fh) != FILE_TYPE_DISK) | ||
218 | return _fstati64(fd, buf); | ||
219 | |||
220 | if (GetFileInformationByHandle(fh, &fdata)) { | ||
221 | buf->st_ino = 0; | ||
222 | buf->st_gid = 0; | ||
223 | buf->st_uid = 0; | ||
224 | buf->st_nlink = 1; | ||
225 | buf->st_mode = file_attr_to_st_mode(fdata.dwFileAttributes); | ||
226 | buf->st_size = fdata.nFileSizeLow | | ||
227 | (((off_t)fdata.nFileSizeHigh)<<32); | ||
228 | buf->st_dev = buf->st_rdev = 0; /* not used by Git */ | ||
229 | buf->st_atime = filetime_to_time_t(&(fdata.ftLastAccessTime)); | ||
230 | buf->st_mtime = filetime_to_time_t(&(fdata.ftLastWriteTime)); | ||
231 | buf->st_ctime = filetime_to_time_t(&(fdata.ftCreationTime)); | ||
232 | return 0; | ||
233 | } | ||
234 | errno = EBADF; | ||
235 | return -1; | ||
236 | } | ||
237 | |||
238 | static inline void time_t_to_filetime(time_t t, FILETIME *ft) | ||
239 | { | ||
240 | long long winTime = t * 10000000LL + 116444736000000000LL; | ||
241 | ft->dwLowDateTime = winTime; | ||
242 | ft->dwHighDateTime = winTime >> 32; | ||
243 | } | ||
244 | |||
245 | int mingw_utime (const char *file_name, const struct utimbuf *times) | ||
246 | { | ||
247 | FILETIME mft, aft; | ||
248 | int fh, rc; | ||
249 | |||
250 | /* must have write permission */ | ||
251 | if ((fh = open(file_name, O_RDWR | O_BINARY)) < 0) | ||
252 | return -1; | ||
253 | |||
254 | time_t_to_filetime(times->modtime, &mft); | ||
255 | time_t_to_filetime(times->actime, &aft); | ||
256 | if (!SetFileTime((HANDLE)_get_osfhandle(fh), NULL, &aft, &mft)) { | ||
257 | errno = EINVAL; | ||
258 | rc = -1; | ||
259 | } else | ||
260 | rc = 0; | ||
261 | close(fh); | ||
262 | return rc; | ||
263 | } | ||
264 | |||
265 | unsigned int sleep (unsigned int seconds) | ||
266 | { | ||
267 | Sleep(seconds*1000); | ||
268 | return 0; | ||
269 | } | ||
270 | |||
271 | int mkstemp(char *template) | ||
272 | { | ||
273 | char *filename = mktemp(template); | ||
274 | if (filename == NULL) | ||
275 | return -1; | ||
276 | return open(filename, O_RDWR | O_CREAT, 0600); | ||
277 | } | ||
278 | |||
279 | int gettimeofday(struct timeval *tv, void *tz) | ||
280 | { | ||
281 | SYSTEMTIME st; | ||
282 | struct tm tm; | ||
283 | GetSystemTime(&st); | ||
284 | tm.tm_year = st.wYear-1900; | ||
285 | tm.tm_mon = st.wMonth-1; | ||
286 | tm.tm_mday = st.wDay; | ||
287 | tm.tm_hour = st.wHour; | ||
288 | tm.tm_min = st.wMinute; | ||
289 | tm.tm_sec = st.wSecond; | ||
290 | tv->tv_sec = tm_to_time_t(&tm); | ||
291 | if (tv->tv_sec < 0) | ||
292 | return -1; | ||
293 | tv->tv_usec = st.wMilliseconds*1000; | ||
294 | return 0; | ||
295 | } | ||
296 | |||
297 | int pipe(int filedes[2]) | ||
298 | { | ||
299 | int fd; | ||
300 | HANDLE h[2], parent; | ||
301 | |||
302 | if (_pipe(filedes, 8192, 0) < 0) | ||
303 | return -1; | ||
304 | |||
305 | parent = GetCurrentProcess(); | ||
306 | |||
307 | if (!DuplicateHandle (parent, (HANDLE)_get_osfhandle(filedes[0]), | ||
308 | parent, &h[0], 0, FALSE, DUPLICATE_SAME_ACCESS)) { | ||
309 | close(filedes[0]); | ||
310 | close(filedes[1]); | ||
311 | return -1; | ||
312 | } | ||
313 | if (!DuplicateHandle (parent, (HANDLE)_get_osfhandle(filedes[1]), | ||
314 | parent, &h[1], 0, FALSE, DUPLICATE_SAME_ACCESS)) { | ||
315 | close(filedes[0]); | ||
316 | close(filedes[1]); | ||
317 | CloseHandle(h[0]); | ||
318 | return -1; | ||
319 | } | ||
320 | fd = _open_osfhandle((int)h[0], O_NOINHERIT); | ||
321 | if (fd < 0) { | ||
322 | close(filedes[0]); | ||
323 | close(filedes[1]); | ||
324 | CloseHandle(h[0]); | ||
325 | CloseHandle(h[1]); | ||
326 | return -1; | ||
327 | } | ||
328 | close(filedes[0]); | ||
329 | filedes[0] = fd; | ||
330 | fd = _open_osfhandle((int)h[1], O_NOINHERIT); | ||
331 | if (fd < 0) { | ||
332 | close(filedes[0]); | ||
333 | close(filedes[1]); | ||
334 | CloseHandle(h[1]); | ||
335 | return -1; | ||
336 | } | ||
337 | close(filedes[1]); | ||
338 | filedes[1] = fd; | ||
339 | return 0; | ||
340 | } | ||
341 | |||
342 | int poll(struct pollfd *ufds, unsigned int nfds, int timeout) | ||
343 | { | ||
344 | int i, pending; | ||
345 | |||
346 | if (timeout >= 0) { | ||
347 | if (nfds == 0) { | ||
348 | Sleep(timeout); | ||
349 | return 0; | ||
350 | } | ||
351 | return errno = EINVAL, error("poll timeout not supported"); | ||
352 | } | ||
353 | |||
354 | /* When there is only one fd to wait for, then we pretend that | ||
355 | * input is available and let the actual wait happen when the | ||
356 | * caller invokes read(). | ||
357 | */ | ||
358 | if (nfds == 1) { | ||
359 | if (!(ufds[0].events & POLLIN)) | ||
360 | return errno = EINVAL, error("POLLIN not set"); | ||
361 | ufds[0].revents = POLLIN; | ||
362 | return 0; | ||
363 | } | ||
364 | |||
365 | repeat: | ||
366 | pending = 0; | ||
367 | for (i = 0; i < nfds; i++) { | ||
368 | DWORD avail = 0; | ||
369 | HANDLE h = (HANDLE) _get_osfhandle(ufds[i].fd); | ||
370 | if (h == INVALID_HANDLE_VALUE) | ||
371 | return -1; /* errno was set */ | ||
372 | |||
373 | if (!(ufds[i].events & POLLIN)) | ||
374 | return errno = EINVAL, error("POLLIN not set"); | ||
375 | |||
376 | /* this emulation works only for pipes */ | ||
377 | if (!PeekNamedPipe(h, NULL, 0, NULL, &avail, NULL)) { | ||
378 | int err = GetLastError(); | ||
379 | if (err == ERROR_BROKEN_PIPE) { | ||
380 | ufds[i].revents = POLLHUP; | ||
381 | pending++; | ||
382 | } else { | ||
383 | errno = EINVAL; | ||
384 | return error("PeekNamedPipe failed," | ||
385 | " GetLastError: %u", err); | ||
386 | } | ||
387 | } else if (avail) { | ||
388 | ufds[i].revents = POLLIN; | ||
389 | pending++; | ||
390 | } else | ||
391 | ufds[i].revents = 0; | ||
392 | } | ||
393 | if (!pending) { | ||
394 | /* The only times that we spin here is when the process | ||
395 | * that is connected through the pipes is waiting for | ||
396 | * its own input data to become available. But since | ||
397 | * the process (pack-objects) is itself CPU intensive, | ||
398 | * it will happily pick up the time slice that we are | ||
399 | * relinguishing here. | ||
400 | */ | ||
401 | Sleep(0); | ||
402 | goto repeat; | ||
403 | } | ||
404 | return 0; | ||
405 | } | ||
406 | |||
407 | struct tm *gmtime_r(const time_t *timep, struct tm *result) | ||
408 | { | ||
409 | /* gmtime() in MSVCRT.DLL is thread-safe, but not reentrant */ | ||
410 | memcpy(result, gmtime(timep), sizeof(struct tm)); | ||
411 | return result; | ||
412 | } | ||
413 | |||
414 | struct tm *localtime_r(const time_t *timep, struct tm *result) | ||
415 | { | ||
416 | /* localtime() in MSVCRT.DLL is thread-safe, but not reentrant */ | ||
417 | memcpy(result, localtime(timep), sizeof(struct tm)); | ||
418 | return result; | ||
419 | } | ||
420 | |||
421 | #undef getcwd | ||
422 | char *mingw_getcwd(char *pointer, int len) | ||
423 | { | ||
424 | int i; | ||
425 | char *ret = getcwd(pointer, len); | ||
426 | if (!ret) | ||
427 | return ret; | ||
428 | for (i = 0; pointer[i]; i++) | ||
429 | if (pointer[i] == '\\') | ||
430 | pointer[i] = '/'; | ||
431 | return ret; | ||
432 | } | ||
433 | |||
434 | #undef getenv | ||
435 | char *mingw_getenv(const char *name) | ||
436 | { | ||
437 | char *result = getenv(name); | ||
438 | if (!result && !strcmp(name, "TMPDIR")) { | ||
439 | /* on Windows it is TMP and TEMP */ | ||
440 | result = getenv("TMP"); | ||
441 | if (!result) | ||
442 | result = getenv("TEMP"); | ||
443 | } | ||
444 | return result; | ||
445 | } | ||
446 | |||
447 | /* | ||
448 | * See http://msdn2.microsoft.com/en-us/library/17w5ykft(vs.71).aspx | ||
449 | * (Parsing C++ Command-Line Arguments) | ||
450 | */ | ||
451 | static const char *quote_arg(const char *arg) | ||
452 | { | ||
453 | /* count chars to quote */ | ||
454 | int len = 0, n = 0; | ||
455 | int force_quotes = 0; | ||
456 | char *q, *d; | ||
457 | const char *p = arg; | ||
458 | if (!*p) force_quotes = 1; | ||
459 | while (*p) { | ||
460 | if (isspace(*p) || *p == '*' || *p == '?' || *p == '{' || *p == '\'') | ||
461 | force_quotes = 1; | ||
462 | else if (*p == '"') | ||
463 | n++; | ||
464 | else if (*p == '\\') { | ||
465 | int count = 0; | ||
466 | while (*p == '\\') { | ||
467 | count++; | ||
468 | p++; | ||
469 | len++; | ||
470 | } | ||
471 | if (*p == '"') | ||
472 | n += count*2 + 1; | ||
473 | continue; | ||
474 | } | ||
475 | len++; | ||
476 | p++; | ||
477 | } | ||
478 | if (!force_quotes && n == 0) | ||
479 | return arg; | ||
480 | |||
481 | /* insert \ where necessary */ | ||
482 | d = q = xmalloc(len+n+3); | ||
483 | *d++ = '"'; | ||
484 | while (*arg) { | ||
485 | if (*arg == '"') | ||
486 | *d++ = '\\'; | ||
487 | else if (*arg == '\\') { | ||
488 | int count = 0; | ||
489 | while (*arg == '\\') { | ||
490 | count++; | ||
491 | *d++ = *arg++; | ||
492 | } | ||
493 | if (*arg == '"') { | ||
494 | while (count-- > 0) | ||
495 | *d++ = '\\'; | ||
496 | *d++ = '\\'; | ||
497 | } | ||
498 | } | ||
499 | *d++ = *arg++; | ||
500 | } | ||
501 | *d++ = '"'; | ||
502 | *d++ = 0; | ||
503 | return q; | ||
504 | } | ||
505 | |||
506 | static const char *parse_interpreter(const char *cmd) | ||
507 | { | ||
508 | static char buf[100]; | ||
509 | char *p, *opt; | ||
510 | int n, fd; | ||
511 | |||
512 | /* don't even try a .exe */ | ||
513 | n = strlen(cmd); | ||
514 | if (n >= 4 && !strcasecmp(cmd+n-4, ".exe")) | ||
515 | return NULL; | ||
516 | |||
517 | fd = open(cmd, O_RDONLY); | ||
518 | if (fd < 0) | ||
519 | return NULL; | ||
520 | n = read(fd, buf, sizeof(buf)-1); | ||
521 | close(fd); | ||
522 | if (n < 4) /* at least '#!/x' and not error */ | ||
523 | return NULL; | ||
524 | |||
525 | if (buf[0] != '#' || buf[1] != '!') | ||
526 | return NULL; | ||
527 | buf[n] = '\0'; | ||
528 | p = strchr(buf, '\n'); | ||
529 | if (!p) | ||
530 | return NULL; | ||
531 | |||
532 | *p = '\0'; | ||
533 | if (!(p = strrchr(buf+2, '/')) && !(p = strrchr(buf+2, '\\'))) | ||
534 | return NULL; | ||
535 | /* strip options */ | ||
536 | if ((opt = strchr(p+1, ' '))) | ||
537 | *opt = '\0'; | ||
538 | return p+1; | ||
539 | } | ||
540 | |||
541 | /* | ||
542 | * Splits the PATH into parts. | ||
543 | */ | ||
544 | static char **get_path_split(void) | ||
545 | { | ||
546 | char *p, **path, *envpath = getenv("PATH"); | ||
547 | int i, n = 0; | ||
548 | |||
549 | if (!envpath || !*envpath) | ||
550 | return NULL; | ||
551 | |||
552 | envpath = xstrdup(envpath); | ||
553 | p = envpath; | ||
554 | while (p) { | ||
555 | char *dir = p; | ||
556 | p = strchr(p, ';'); | ||
557 | if (p) *p++ = '\0'; | ||
558 | if (*dir) { /* not earlier, catches series of ; */ | ||
559 | ++n; | ||
560 | } | ||
561 | } | ||
562 | if (!n) | ||
563 | return NULL; | ||
564 | |||
565 | path = xmalloc((n+1)*sizeof(char*)); | ||
566 | p = envpath; | ||
567 | i = 0; | ||
568 | do { | ||
569 | if (*p) | ||
570 | path[i++] = xstrdup(p); | ||
571 | p = p+strlen(p)+1; | ||
572 | } while (i < n); | ||
573 | path[i] = NULL; | ||
574 | |||
575 | free(envpath); | ||
576 | |||
577 | return path; | ||
578 | } | ||
579 | |||
580 | static void free_path_split(char **path) | ||
581 | { | ||
582 | if (!path) | ||
583 | return; | ||
584 | |||
585 | char **p = path; | ||
586 | while (*p) | ||
587 | free(*p++); | ||
588 | free(path); | ||
589 | } | ||
590 | |||
591 | /* | ||
592 | * exe_only means that we only want to detect .exe files, but not scripts | ||
593 | * (which do not have an extension) | ||
594 | */ | ||
595 | static char *lookup_prog(const char *dir, const char *cmd, int isexe, int exe_only) | ||
596 | { | ||
597 | char path[MAX_PATH]; | ||
598 | snprintf(path, sizeof(path), "%s/%s.exe", dir, cmd); | ||
599 | |||
600 | if (!isexe && access(path, F_OK) == 0) | ||
601 | return xstrdup(path); | ||
602 | path[strlen(path)-4] = '\0'; | ||
603 | if ((!exe_only || isexe) && access(path, F_OK) == 0) | ||
604 | if (!(GetFileAttributes(path) & FILE_ATTRIBUTE_DIRECTORY)) | ||
605 | return xstrdup(path); | ||
606 | return NULL; | ||
607 | } | ||
608 | |||
609 | /* | ||
610 | * Determines the absolute path of cmd using the the split path in path. | ||
611 | * If cmd contains a slash or backslash, no lookup is performed. | ||
612 | */ | ||
613 | static char *path_lookup(const char *cmd, char **path, int exe_only) | ||
614 | { | ||
615 | char *prog = NULL; | ||
616 | int len = strlen(cmd); | ||
617 | int isexe = len >= 4 && !strcasecmp(cmd+len-4, ".exe"); | ||
618 | |||
619 | if (strchr(cmd, '/') || strchr(cmd, '\\')) | ||
620 | prog = xstrdup(cmd); | ||
621 | |||
622 | while (!prog && *path) | ||
623 | prog = lookup_prog(*path++, cmd, isexe, exe_only); | ||
624 | |||
625 | return prog; | ||
626 | } | ||
627 | |||
628 | static int env_compare(const void *a, const void *b) | ||
629 | { | ||
630 | char *const *ea = a; | ||
631 | char *const *eb = b; | ||
632 | return strcasecmp(*ea, *eb); | ||
633 | } | ||
634 | |||
635 | static pid_t mingw_spawnve(const char *cmd, const char **argv, char **env, | ||
636 | int prepend_cmd) | ||
637 | { | ||
638 | STARTUPINFO si; | ||
639 | PROCESS_INFORMATION pi; | ||
640 | struct strbuf envblk, args; | ||
641 | unsigned flags; | ||
642 | BOOL ret; | ||
643 | |||
644 | /* Determine whether or not we are associated to a console */ | ||
645 | HANDLE cons = CreateFile("CONOUT$", GENERIC_WRITE, | ||
646 | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, | ||
647 | FILE_ATTRIBUTE_NORMAL, NULL); | ||
648 | if (cons == INVALID_HANDLE_VALUE) { | ||
649 | /* There is no console associated with this process. | ||
650 | * Since the child is a console process, Windows | ||
651 | * would normally create a console window. But | ||
652 | * since we'll be redirecting std streams, we do | ||
653 | * not need the console. | ||
654 | * It is necessary to use DETACHED_PROCESS | ||
655 | * instead of CREATE_NO_WINDOW to make ssh | ||
656 | * recognize that it has no console. | ||
657 | */ | ||
658 | flags = DETACHED_PROCESS; | ||
659 | } else { | ||
660 | /* There is already a console. If we specified | ||
661 | * DETACHED_PROCESS here, too, Windows would | ||
662 | * disassociate the child from the console. | ||
663 | * The same is true for CREATE_NO_WINDOW. | ||
664 | * Go figure! | ||
665 | */ | ||
666 | flags = 0; | ||
667 | CloseHandle(cons); | ||
668 | } | ||
669 | memset(&si, 0, sizeof(si)); | ||
670 | si.cb = sizeof(si); | ||
671 | si.dwFlags = STARTF_USESTDHANDLES; | ||
672 | si.hStdInput = (HANDLE) _get_osfhandle(0); | ||
673 | si.hStdOutput = (HANDLE) _get_osfhandle(1); | ||
674 | si.hStdError = (HANDLE) _get_osfhandle(2); | ||
675 | |||
676 | /* concatenate argv, quoting args as we go */ | ||
677 | strbuf_init(&args, 0); | ||
678 | if (prepend_cmd) { | ||
679 | char *quoted = (char *)quote_arg(cmd); | ||
680 | strbuf_addstr(&args, quoted); | ||
681 | if (quoted != cmd) | ||
682 | free(quoted); | ||
683 | } | ||
684 | for (; *argv; argv++) { | ||
685 | char *quoted = (char *)quote_arg(*argv); | ||
686 | if (*args.buf) | ||
687 | strbuf_addch(&args, ' '); | ||
688 | strbuf_addstr(&args, quoted); | ||
689 | if (quoted != *argv) | ||
690 | free(quoted); | ||
691 | } | ||
692 | |||
693 | if (env) { | ||
694 | int count = 0; | ||
695 | char **e, **sorted_env; | ||
696 | |||
697 | for (e = env; *e; e++) | ||
698 | count++; | ||
699 | |||
700 | /* environment must be sorted */ | ||
701 | sorted_env = xmalloc(sizeof(*sorted_env) * (count + 1)); | ||
702 | memcpy(sorted_env, env, sizeof(*sorted_env) * (count + 1)); | ||
703 | qsort(sorted_env, count, sizeof(*sorted_env), env_compare); | ||
704 | |||
705 | strbuf_init(&envblk, 0); | ||
706 | for (e = sorted_env; *e; e++) { | ||
707 | strbuf_addstr(&envblk, *e); | ||
708 | strbuf_addch(&envblk, '\0'); | ||
709 | } | ||
710 | free(sorted_env); | ||
711 | } | ||
712 | |||
713 | memset(&pi, 0, sizeof(pi)); | ||
714 | ret = CreateProcess(cmd, args.buf, NULL, NULL, TRUE, flags, | ||
715 | env ? envblk.buf : NULL, NULL, &si, &pi); | ||
716 | |||
717 | if (env) | ||
718 | strbuf_release(&envblk); | ||
719 | strbuf_release(&args); | ||
720 | |||
721 | if (!ret) { | ||
722 | errno = ENOENT; | ||
723 | return -1; | ||
724 | } | ||
725 | CloseHandle(pi.hThread); | ||
726 | return (pid_t)pi.hProcess; | ||
727 | } | ||
728 | |||
729 | pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **env) | ||
730 | { | ||
731 | pid_t pid; | ||
732 | char **path = get_path_split(); | ||
733 | char *prog = path_lookup(cmd, path, 0); | ||
734 | |||
735 | if (!prog) { | ||
736 | errno = ENOENT; | ||
737 | pid = -1; | ||
738 | } | ||
739 | else { | ||
740 | const char *interpr = parse_interpreter(prog); | ||
741 | |||
742 | if (interpr) { | ||
743 | const char *argv0 = argv[0]; | ||
744 | char *iprog = path_lookup(interpr, path, 1); | ||
745 | argv[0] = prog; | ||
746 | if (!iprog) { | ||
747 | errno = ENOENT; | ||
748 | pid = -1; | ||
749 | } | ||
750 | else { | ||
751 | pid = mingw_spawnve(iprog, argv, env, 1); | ||
752 | free(iprog); | ||
753 | } | ||
754 | argv[0] = argv0; | ||
755 | } | ||
756 | else | ||
757 | pid = mingw_spawnve(prog, argv, env, 0); | ||
758 | free(prog); | ||
759 | } | ||
760 | free_path_split(path); | ||
761 | return pid; | ||
762 | } | ||
763 | |||
764 | static int try_shell_exec(const char *cmd, char *const *argv, char **env) | ||
765 | { | ||
766 | const char *interpr = parse_interpreter(cmd); | ||
767 | char **path; | ||
768 | char *prog; | ||
769 | int pid = 0; | ||
770 | |||
771 | if (!interpr) | ||
772 | return 0; | ||
773 | path = get_path_split(); | ||
774 | prog = path_lookup(interpr, path, 1); | ||
775 | if (prog) { | ||
776 | int argc = 0; | ||
777 | const char **argv2; | ||
778 | while (argv[argc]) argc++; | ||
779 | argv2 = xmalloc(sizeof(*argv) * (argc+1)); | ||
780 | argv2[0] = (char *)cmd; /* full path to the script file */ | ||
781 | memcpy(&argv2[1], &argv[1], sizeof(*argv) * argc); | ||
782 | pid = mingw_spawnve(prog, argv2, env, 1); | ||
783 | if (pid >= 0) { | ||
784 | int status; | ||
785 | if (waitpid(pid, &status, 0) < 0) | ||
786 | status = 255; | ||
787 | exit(status); | ||
788 | } | ||
789 | pid = 1; /* indicate that we tried but failed */ | ||
790 | free(prog); | ||
791 | free(argv2); | ||
792 | } | ||
793 | free_path_split(path); | ||
794 | return pid; | ||
795 | } | ||
796 | |||
797 | static void mingw_execve(const char *cmd, char *const *argv, char *const *env) | ||
798 | { | ||
799 | /* check if git_command is a shell script */ | ||
800 | if (!try_shell_exec(cmd, argv, (char **)env)) { | ||
801 | int pid, status; | ||
802 | |||
803 | pid = mingw_spawnve(cmd, (const char **)argv, (char **)env, 0); | ||
804 | if (pid < 0) | ||
805 | return; | ||
806 | if (waitpid(pid, &status, 0) < 0) | ||
807 | status = 255; | ||
808 | exit(status); | ||
809 | } | ||
810 | } | ||
811 | |||
812 | void mingw_execvp(const char *cmd, char *const *argv) | ||
813 | { | ||
814 | char **path = get_path_split(); | ||
815 | char *prog = path_lookup(cmd, path, 0); | ||
816 | |||
817 | if (prog) { | ||
818 | mingw_execve(prog, argv, environ); | ||
819 | free(prog); | ||
820 | } else | ||
821 | errno = ENOENT; | ||
822 | |||
823 | free_path_split(path); | ||
824 | } | ||
825 | |||
826 | char **copy_environ() | ||
827 | { | ||
828 | char **env; | ||
829 | int i = 0; | ||
830 | while (environ[i]) | ||
831 | i++; | ||
832 | env = xmalloc((i+1)*sizeof(*env)); | ||
833 | for (i = 0; environ[i]; i++) | ||
834 | env[i] = xstrdup(environ[i]); | ||
835 | env[i] = NULL; | ||
836 | return env; | ||
837 | } | ||
838 | |||
839 | void free_environ(char **env) | ||
840 | { | ||
841 | int i; | ||
842 | for (i = 0; env[i]; i++) | ||
843 | free(env[i]); | ||
844 | free(env); | ||
845 | } | ||
846 | |||
847 | static int lookup_env(char **env, const char *name, size_t nmln) | ||
848 | { | ||
849 | int i; | ||
850 | |||
851 | for (i = 0; env[i]; i++) { | ||
852 | if (0 == strncmp(env[i], name, nmln) | ||
853 | && '=' == env[i][nmln]) | ||
854 | /* matches */ | ||
855 | return i; | ||
856 | } | ||
857 | return -1; | ||
858 | } | ||
859 | |||
860 | /* | ||
861 | * If name contains '=', then sets the variable, otherwise it unsets it | ||
862 | */ | ||
863 | char **env_setenv(char **env, const char *name) | ||
864 | { | ||
865 | char *eq = strchrnul(name, '='); | ||
866 | int i = lookup_env(env, name, eq-name); | ||
867 | |||
868 | if (i < 0) { | ||
869 | if (*eq) { | ||
870 | for (i = 0; env[i]; i++) | ||
871 | ; | ||
872 | env = xrealloc(env, (i+2)*sizeof(*env)); | ||
873 | env[i] = xstrdup(name); | ||
874 | env[i+1] = NULL; | ||
875 | } | ||
876 | } | ||
877 | else { | ||
878 | free(env[i]); | ||
879 | if (*eq) | ||
880 | env[i] = xstrdup(name); | ||
881 | else | ||
882 | for (; env[i]; i++) | ||
883 | env[i] = env[i+1]; | ||
884 | } | ||
885 | return env; | ||
886 | } | ||
887 | |||
888 | /* this is the first function to call into WS_32; initialize it */ | ||
889 | #undef gethostbyname | ||
890 | struct hostent *mingw_gethostbyname(const char *host) | ||
891 | { | ||
892 | WSADATA wsa; | ||
893 | |||
894 | if (WSAStartup(MAKEWORD(2,2), &wsa)) | ||
895 | die("unable to initialize winsock subsystem, error %d", | ||
896 | WSAGetLastError()); | ||
897 | atexit((void(*)(void)) WSACleanup); | ||
898 | return gethostbyname(host); | ||
899 | } | ||
900 | |||
901 | int mingw_socket(int domain, int type, int protocol) | ||
902 | { | ||
903 | int sockfd; | ||
904 | SOCKET s = WSASocket(domain, type, protocol, NULL, 0, 0); | ||
905 | if (s == INVALID_SOCKET) { | ||
906 | /* | ||
907 | * WSAGetLastError() values are regular BSD error codes | ||
908 | * biased by WSABASEERR. | ||
909 | * However, strerror() does not know about networking | ||
910 | * specific errors, which are values beginning at 38 or so. | ||
911 | * Therefore, we choose to leave the biased error code | ||
912 | * in errno so that _if_ someone looks up the code somewhere, | ||
913 | * then it is at least the number that are usually listed. | ||
914 | */ | ||
915 | errno = WSAGetLastError(); | ||
916 | return -1; | ||
917 | } | ||
918 | /* convert into a file descriptor */ | ||
919 | if ((sockfd = _open_osfhandle(s, O_RDWR|O_BINARY)) < 0) { | ||
920 | closesocket(s); | ||
921 | return error("unable to make a socket file descriptor: %s", | ||
922 | strerror(errno)); | ||
923 | } | ||
924 | return sockfd; | ||
925 | } | ||
926 | |||
927 | #undef connect | ||
928 | int mingw_connect(int sockfd, struct sockaddr *sa, size_t sz) | ||
929 | { | ||
930 | SOCKET s = (SOCKET)_get_osfhandle(sockfd); | ||
931 | return connect(s, sa, sz); | ||
932 | } | ||
933 | |||
934 | #undef rename | ||
935 | int mingw_rename(const char *pold, const char *pnew) | ||
936 | { | ||
937 | DWORD attrs; | ||
938 | |||
939 | /* | ||
940 | * Try native rename() first to get errno right. | ||
941 | * It is based on MoveFile(), which cannot overwrite existing files. | ||
942 | */ | ||
943 | if (!rename(pold, pnew)) | ||
944 | return 0; | ||
945 | if (errno != EEXIST) | ||
946 | return -1; | ||
947 | if (MoveFileEx(pold, pnew, MOVEFILE_REPLACE_EXISTING)) | ||
948 | return 0; | ||
949 | /* TODO: translate more errors */ | ||
950 | if (GetLastError() == ERROR_ACCESS_DENIED && | ||
951 | (attrs = GetFileAttributes(pnew)) != INVALID_FILE_ATTRIBUTES) { | ||
952 | if (attrs & FILE_ATTRIBUTE_DIRECTORY) { | ||
953 | errno = EISDIR; | ||
954 | return -1; | ||
955 | } | ||
956 | if ((attrs & FILE_ATTRIBUTE_READONLY) && | ||
957 | SetFileAttributes(pnew, attrs & ~FILE_ATTRIBUTE_READONLY)) { | ||
958 | if (MoveFileEx(pold, pnew, MOVEFILE_REPLACE_EXISTING)) | ||
959 | return 0; | ||
960 | /* revert file attributes on failure */ | ||
961 | SetFileAttributes(pnew, attrs); | ||
962 | } | ||
963 | } | ||
964 | errno = EACCES; | ||
965 | return -1; | ||
966 | } | ||
967 | |||
968 | struct passwd *getpwuid(int uid) | ||
969 | { | ||
970 | static char user_name[100]; | ||
971 | static struct passwd p; | ||
972 | |||
973 | DWORD len = sizeof(user_name); | ||
974 | if (!GetUserName(user_name, &len)) | ||
975 | return NULL; | ||
976 | p.pw_name = user_name; | ||
977 | p.pw_gecos = "unknown"; | ||
978 | p.pw_dir = NULL; | ||
979 | return &p; | ||
980 | } | ||
981 | |||
982 | static HANDLE timer_event; | ||
983 | static HANDLE timer_thread; | ||
984 | static int timer_interval; | ||
985 | static int one_shot; | ||
986 | static sig_handler_t timer_fn = SIG_DFL; | ||
987 | |||
988 | /* The timer works like this: | ||
989 | * The thread, ticktack(), is a trivial routine that most of the time | ||
990 | * only waits to receive the signal to terminate. The main thread tells | ||
991 | * the thread to terminate by setting the timer_event to the signalled | ||
992 | * state. | ||
993 | * But ticktack() interrupts the wait state after the timer's interval | ||
994 | * length to call the signal handler. | ||
995 | */ | ||
996 | |||
997 | static __stdcall unsigned ticktack(void *dummy) | ||
998 | { | ||
999 | while (WaitForSingleObject(timer_event, timer_interval) == WAIT_TIMEOUT) { | ||
1000 | if (timer_fn == SIG_DFL) | ||
1001 | die("Alarm"); | ||
1002 | if (timer_fn != SIG_IGN) | ||
1003 | timer_fn(SIGALRM); | ||
1004 | if (one_shot) | ||
1005 | break; | ||
1006 | } | ||
1007 | return 0; | ||
1008 | } | ||
1009 | |||
1010 | static int start_timer_thread(void) | ||
1011 | { | ||
1012 | timer_event = CreateEvent(NULL, FALSE, FALSE, NULL); | ||
1013 | if (timer_event) { | ||
1014 | timer_thread = (HANDLE) _beginthreadex(NULL, 0, ticktack, NULL, 0, NULL); | ||
1015 | if (!timer_thread ) | ||
1016 | return errno = ENOMEM, | ||
1017 | error("cannot start timer thread"); | ||
1018 | } else | ||
1019 | return errno = ENOMEM, | ||
1020 | error("cannot allocate resources for timer"); | ||
1021 | return 0; | ||
1022 | } | ||
1023 | |||
1024 | static void stop_timer_thread(void) | ||
1025 | { | ||
1026 | if (timer_event) | ||
1027 | SetEvent(timer_event); /* tell thread to terminate */ | ||
1028 | if (timer_thread) { | ||
1029 | int rc = WaitForSingleObject(timer_thread, 1000); | ||
1030 | if (rc == WAIT_TIMEOUT) | ||
1031 | error("timer thread did not terminate timely"); | ||
1032 | else if (rc != WAIT_OBJECT_0) | ||
1033 | error("waiting for timer thread failed: %lu", | ||
1034 | GetLastError()); | ||
1035 | CloseHandle(timer_thread); | ||
1036 | } | ||
1037 | if (timer_event) | ||
1038 | CloseHandle(timer_event); | ||
1039 | timer_event = NULL; | ||
1040 | timer_thread = NULL; | ||
1041 | } | ||
1042 | |||
1043 | static inline int is_timeval_eq(const struct timeval *i1, const struct timeval *i2) | ||
1044 | { | ||
1045 | return i1->tv_sec == i2->tv_sec && i1->tv_usec == i2->tv_usec; | ||
1046 | } | ||
1047 | |||
1048 | int setitimer(int type, struct itimerval *in, struct itimerval *out) | ||
1049 | { | ||
1050 | static const struct timeval zero; | ||
1051 | static int atexit_done; | ||
1052 | |||
1053 | if (out != NULL) | ||
1054 | return errno = EINVAL, | ||
1055 | error("setitimer param 3 != NULL not implemented"); | ||
1056 | if (!is_timeval_eq(&in->it_interval, &zero) && | ||
1057 | !is_timeval_eq(&in->it_interval, &in->it_value)) | ||
1058 | return errno = EINVAL, | ||
1059 | error("setitimer: it_interval must be zero or eq it_value"); | ||
1060 | |||
1061 | if (timer_thread) | ||
1062 | stop_timer_thread(); | ||
1063 | |||
1064 | if (is_timeval_eq(&in->it_value, &zero) && | ||
1065 | is_timeval_eq(&in->it_interval, &zero)) | ||
1066 | return 0; | ||
1067 | |||
1068 | timer_interval = in->it_value.tv_sec * 1000 + in->it_value.tv_usec / 1000; | ||
1069 | one_shot = is_timeval_eq(&in->it_interval, &zero); | ||
1070 | if (!atexit_done) { | ||
1071 | atexit(stop_timer_thread); | ||
1072 | atexit_done = 1; | ||
1073 | } | ||
1074 | return start_timer_thread(); | ||
1075 | } | ||
1076 | |||
1077 | int sigaction(int sig, struct sigaction *in, struct sigaction *out) | ||
1078 | { | ||
1079 | if (sig != SIGALRM) | ||
1080 | return errno = EINVAL, | ||
1081 | error("sigaction only implemented for SIGALRM"); | ||
1082 | if (out != NULL) | ||
1083 | return errno = EINVAL, | ||
1084 | error("sigaction: param 3 != NULL not implemented"); | ||
1085 | |||
1086 | timer_fn = in->sa_handler; | ||
1087 | return 0; | ||
1088 | } | ||
1089 | |||
1090 | #undef signal | ||
1091 | sig_handler_t mingw_signal(int sig, sig_handler_t handler) | ||
1092 | { | ||
1093 | if (sig != SIGALRM) | ||
1094 | return signal(sig, handler); | ||
1095 | sig_handler_t old = timer_fn; | ||
1096 | timer_fn = handler; | ||
1097 | return old; | ||
1098 | } | ||
1099 | |||
1100 | static const char *make_backslash_path(const char *path) | ||
1101 | { | ||
1102 | static char buf[PATH_MAX + 1]; | ||
1103 | char *c; | ||
1104 | |||
1105 | if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) | ||
1106 | die("Too long path: %.*s", 60, path); | ||
1107 | |||
1108 | for (c = buf; *c; c++) { | ||
1109 | if (*c == '/') | ||
1110 | *c = '\\'; | ||
1111 | } | ||
1112 | return buf; | ||
1113 | } | ||
1114 | |||
1115 | void mingw_open_html(const char *unixpath) | ||
1116 | { | ||
1117 | const char *htmlpath = make_backslash_path(unixpath); | ||
1118 | printf("Launching default browser to display HTML ...\n"); | ||
1119 | ShellExecute(NULL, "open", htmlpath, NULL, "\\", 0); | ||
1120 | } | ||
1121 | |||
1122 | int link(const char *oldpath, const char *newpath) | ||
1123 | { | ||
1124 | typedef BOOL WINAPI (*T)(const char*, const char*, LPSECURITY_ATTRIBUTES); | ||
1125 | static T create_hard_link = NULL; | ||
1126 | if (!create_hard_link) { | ||
1127 | create_hard_link = (T) GetProcAddress( | ||
1128 | GetModuleHandle("kernel32.dll"), "CreateHardLinkA"); | ||
1129 | if (!create_hard_link) | ||
1130 | create_hard_link = (T)-1; | ||
1131 | } | ||
1132 | if (create_hard_link == (T)-1) { | ||
1133 | errno = ENOSYS; | ||
1134 | return -1; | ||
1135 | } | ||
1136 | if (!create_hard_link(newpath, oldpath, NULL)) { | ||
1137 | errno = err_win_to_posix(GetLastError()); | ||
1138 | return -1; | ||
1139 | } | ||
1140 | return 0; | ||
1141 | } | ||
diff --git a/libbb/quote.c b/libbb/quote.c new file mode 100644 index 000000000..7a49fcf69 --- /dev/null +++ b/libbb/quote.c | |||
@@ -0,0 +1,478 @@ | |||
1 | #include "cache.h" | ||
2 | #include "quote.h" | ||
3 | |||
4 | int quote_path_fully = 1; | ||
5 | |||
6 | /* Help to copy the thing properly quoted for the shell safety. | ||
7 | * any single quote is replaced with '\'', any exclamation point | ||
8 | * is replaced with '\!', and the whole thing is enclosed in a | ||
9 | * | ||
10 | * E.g. | ||
11 | * original sq_quote result | ||
12 | * name ==> name ==> 'name' | ||
13 | * a b ==> a b ==> 'a b' | ||
14 | * a'b ==> a'\''b ==> 'a'\''b' | ||
15 | * a!b ==> a'\!'b ==> 'a'\!'b' | ||
16 | */ | ||
17 | static inline int need_bs_quote(char c) | ||
18 | { | ||
19 | return (c == '\'' || c == '!'); | ||
20 | } | ||
21 | |||
22 | void sq_quote_buf(struct strbuf *dst, const char *src) | ||
23 | { | ||
24 | char *to_free = NULL; | ||
25 | |||
26 | if (dst->buf == src) | ||
27 | to_free = strbuf_detach(dst, NULL); | ||
28 | |||
29 | strbuf_addch(dst, '\''); | ||
30 | while (*src) { | ||
31 | size_t len = strcspn(src, "'!"); | ||
32 | strbuf_add(dst, src, len); | ||
33 | src += len; | ||
34 | while (need_bs_quote(*src)) { | ||
35 | strbuf_addstr(dst, "'\\"); | ||
36 | strbuf_addch(dst, *src++); | ||
37 | strbuf_addch(dst, '\''); | ||
38 | } | ||
39 | } | ||
40 | strbuf_addch(dst, '\''); | ||
41 | free(to_free); | ||
42 | } | ||
43 | |||
44 | void sq_quote_print(FILE *stream, const char *src) | ||
45 | { | ||
46 | char c; | ||
47 | |||
48 | fputc('\'', stream); | ||
49 | while ((c = *src++)) { | ||
50 | if (need_bs_quote(c)) { | ||
51 | fputs("'\\", stream); | ||
52 | fputc(c, stream); | ||
53 | fputc('\'', stream); | ||
54 | } else { | ||
55 | fputc(c, stream); | ||
56 | } | ||
57 | } | ||
58 | fputc('\'', stream); | ||
59 | } | ||
60 | |||
61 | void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen) | ||
62 | { | ||
63 | int i; | ||
64 | |||
65 | /* Copy into destination buffer. */ | ||
66 | strbuf_grow(dst, 255); | ||
67 | for (i = 0; argv[i]; ++i) { | ||
68 | strbuf_addch(dst, ' '); | ||
69 | sq_quote_buf(dst, argv[i]); | ||
70 | if (maxlen && dst->len > maxlen) | ||
71 | die("Too many or long arguments"); | ||
72 | } | ||
73 | } | ||
74 | |||
75 | char *sq_dequote_step(char *arg, char **next) | ||
76 | { | ||
77 | char *dst = arg; | ||
78 | char *src = arg; | ||
79 | char c; | ||
80 | |||
81 | if (*src != '\'') | ||
82 | return NULL; | ||
83 | for (;;) { | ||
84 | c = *++src; | ||
85 | if (!c) | ||
86 | return NULL; | ||
87 | if (c != '\'') { | ||
88 | *dst++ = c; | ||
89 | continue; | ||
90 | } | ||
91 | /* We stepped out of sq */ | ||
92 | switch (*++src) { | ||
93 | case '\0': | ||
94 | *dst = 0; | ||
95 | if (next) | ||
96 | *next = NULL; | ||
97 | return arg; | ||
98 | case '\\': | ||
99 | c = *++src; | ||
100 | if (need_bs_quote(c) && *++src == '\'') { | ||
101 | *dst++ = c; | ||
102 | continue; | ||
103 | } | ||
104 | /* Fallthrough */ | ||
105 | default: | ||
106 | if (!next || !isspace(*src)) | ||
107 | return NULL; | ||
108 | do { | ||
109 | c = *++src; | ||
110 | } while (isspace(c)); | ||
111 | *dst = 0; | ||
112 | *next = src; | ||
113 | return arg; | ||
114 | } | ||
115 | } | ||
116 | } | ||
117 | |||
118 | char *sq_dequote(char *arg) | ||
119 | { | ||
120 | return sq_dequote_step(arg, NULL); | ||
121 | } | ||
122 | |||
123 | int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc) | ||
124 | { | ||
125 | char *next = arg; | ||
126 | |||
127 | if (!*arg) | ||
128 | return 0; | ||
129 | do { | ||
130 | char *dequoted = sq_dequote_step(next, &next); | ||
131 | if (!dequoted) | ||
132 | return -1; | ||
133 | ALLOC_GROW(*argv, *nr + 1, *alloc); | ||
134 | (*argv)[(*nr)++] = dequoted; | ||
135 | } while (next); | ||
136 | |||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | /* 1 means: quote as octal | ||
141 | * 0 means: quote as octal if (quote_path_fully) | ||
142 | * -1 means: never quote | ||
143 | * c: quote as "\\c" | ||
144 | */ | ||
145 | #define X8(x) x, x, x, x, x, x, x, x | ||
146 | #define X16(x) X8(x), X8(x) | ||
147 | static signed char const sq_lookup[256] = { | ||
148 | /* 0 1 2 3 4 5 6 7 */ | ||
149 | /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 'a', | ||
150 | /* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r', 1, 1, | ||
151 | /* 0x10 */ X16(1), | ||
152 | /* 0x20 */ -1, -1, '"', -1, -1, -1, -1, -1, | ||
153 | /* 0x28 */ X16(-1), X16(-1), X16(-1), | ||
154 | /* 0x58 */ -1, -1, -1, -1,'\\', -1, -1, -1, | ||
155 | /* 0x60 */ X16(-1), X8(-1), | ||
156 | /* 0x78 */ -1, -1, -1, -1, -1, -1, -1, 1, | ||
157 | /* 0x80 */ /* set to 0 */ | ||
158 | }; | ||
159 | |||
160 | static inline int sq_must_quote(char c) | ||
161 | { | ||
162 | return sq_lookup[(unsigned char)c] + quote_path_fully > 0; | ||
163 | } | ||
164 | |||
165 | /* returns the longest prefix not needing a quote up to maxlen if positive. | ||
166 | This stops at the first \0 because it's marked as a character needing an | ||
167 | escape */ | ||
168 | static size_t next_quote_pos(const char *s, ssize_t maxlen) | ||
169 | { | ||
170 | size_t len; | ||
171 | if (maxlen < 0) { | ||
172 | for (len = 0; !sq_must_quote(s[len]); len++); | ||
173 | } else { | ||
174 | for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++); | ||
175 | } | ||
176 | return len; | ||
177 | } | ||
178 | |||
179 | /* | ||
180 | * C-style name quoting. | ||
181 | * | ||
182 | * (1) if sb and fp are both NULL, inspect the input name and counts the | ||
183 | * number of bytes that are needed to hold c_style quoted version of name, | ||
184 | * counting the double quotes around it but not terminating NUL, and | ||
185 | * returns it. | ||
186 | * However, if name does not need c_style quoting, it returns 0. | ||
187 | * | ||
188 | * (2) if sb or fp are not NULL, it emits the c_style quoted version | ||
189 | * of name, enclosed with double quotes if asked and needed only. | ||
190 | * Return value is the same as in (1). | ||
191 | */ | ||
192 | static size_t quote_c_style_counted(const char *name, ssize_t maxlen, | ||
193 | struct strbuf *sb, FILE *fp, int no_dq) | ||
194 | { | ||
195 | #undef EMIT | ||
196 | #define EMIT(c) \ | ||
197 | do { \ | ||
198 | if (sb) strbuf_addch(sb, (c)); \ | ||
199 | if (fp) fputc((c), fp); \ | ||
200 | count++; \ | ||
201 | } while (0) | ||
202 | #define EMITBUF(s, l) \ | ||
203 | do { \ | ||
204 | if (sb) strbuf_add(sb, (s), (l)); \ | ||
205 | if (fp) fwrite((s), (l), 1, fp); \ | ||
206 | count += (l); \ | ||
207 | } while (0) | ||
208 | |||
209 | size_t len, count = 0; | ||
210 | const char *p = name; | ||
211 | |||
212 | for (;;) { | ||
213 | int ch; | ||
214 | |||
215 | len = next_quote_pos(p, maxlen); | ||
216 | if (len == maxlen || !p[len]) | ||
217 | break; | ||
218 | |||
219 | if (!no_dq && p == name) | ||
220 | EMIT('"'); | ||
221 | |||
222 | EMITBUF(p, len); | ||
223 | EMIT('\\'); | ||
224 | p += len; | ||
225 | ch = (unsigned char)*p++; | ||
226 | if (sq_lookup[ch] >= ' ') { | ||
227 | EMIT(sq_lookup[ch]); | ||
228 | } else { | ||
229 | EMIT(((ch >> 6) & 03) + '0'); | ||
230 | EMIT(((ch >> 3) & 07) + '0'); | ||
231 | EMIT(((ch >> 0) & 07) + '0'); | ||
232 | } | ||
233 | } | ||
234 | |||
235 | EMITBUF(p, len); | ||
236 | if (p == name) /* no ending quote needed */ | ||
237 | return 0; | ||
238 | |||
239 | if (!no_dq) | ||
240 | EMIT('"'); | ||
241 | return count; | ||
242 | } | ||
243 | |||
244 | size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq) | ||
245 | { | ||
246 | return quote_c_style_counted(name, -1, sb, fp, nodq); | ||
247 | } | ||
248 | |||
249 | void quote_two_c_style(struct strbuf *sb, const char *prefix, const char *path, int nodq) | ||
250 | { | ||
251 | if (quote_c_style(prefix, NULL, NULL, 0) || | ||
252 | quote_c_style(path, NULL, NULL, 0)) { | ||
253 | if (!nodq) | ||
254 | strbuf_addch(sb, '"'); | ||
255 | quote_c_style(prefix, sb, NULL, 1); | ||
256 | quote_c_style(path, sb, NULL, 1); | ||
257 | if (!nodq) | ||
258 | strbuf_addch(sb, '"'); | ||
259 | } else { | ||
260 | strbuf_addstr(sb, prefix); | ||
261 | strbuf_addstr(sb, path); | ||
262 | } | ||
263 | } | ||
264 | |||
265 | void write_name_quoted(const char *name, FILE *fp, int terminator) | ||
266 | { | ||
267 | if (terminator) { | ||
268 | quote_c_style(name, NULL, fp, 0); | ||
269 | } else { | ||
270 | fputs(name, fp); | ||
271 | } | ||
272 | fputc(terminator, fp); | ||
273 | } | ||
274 | |||
275 | extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, | ||
276 | const char *name, FILE *fp, int terminator) | ||
277 | { | ||
278 | int needquote = 0; | ||
279 | |||
280 | if (terminator) { | ||
281 | needquote = next_quote_pos(pfx, pfxlen) < pfxlen | ||
282 | || name[next_quote_pos(name, -1)]; | ||
283 | } | ||
284 | if (needquote) { | ||
285 | fputc('"', fp); | ||
286 | quote_c_style_counted(pfx, pfxlen, NULL, fp, 1); | ||
287 | quote_c_style(name, NULL, fp, 1); | ||
288 | fputc('"', fp); | ||
289 | } else { | ||
290 | fwrite(pfx, pfxlen, 1, fp); | ||
291 | fputs(name, fp); | ||
292 | } | ||
293 | fputc(terminator, fp); | ||
294 | } | ||
295 | |||
296 | /* quote path as relative to the given prefix */ | ||
297 | char *quote_path_relative(const char *in, int len, | ||
298 | struct strbuf *out, const char *prefix) | ||
299 | { | ||
300 | int needquote; | ||
301 | |||
302 | if (len < 0) | ||
303 | len = strlen(in); | ||
304 | |||
305 | /* "../" prefix itself does not need quoting, but "in" might. */ | ||
306 | needquote = next_quote_pos(in, len) < len; | ||
307 | strbuf_setlen(out, 0); | ||
308 | strbuf_grow(out, len); | ||
309 | |||
310 | if (needquote) | ||
311 | strbuf_addch(out, '"'); | ||
312 | if (prefix) { | ||
313 | int off = 0; | ||
314 | while (prefix[off] && off < len && prefix[off] == in[off]) | ||
315 | if (prefix[off] == '/') { | ||
316 | prefix += off + 1; | ||
317 | in += off + 1; | ||
318 | len -= off + 1; | ||
319 | off = 0; | ||
320 | } else | ||
321 | off++; | ||
322 | |||
323 | for (; *prefix; prefix++) | ||
324 | if (*prefix == '/') | ||
325 | strbuf_addstr(out, "../"); | ||
326 | } | ||
327 | |||
328 | quote_c_style_counted (in, len, out, NULL, 1); | ||
329 | |||
330 | if (needquote) | ||
331 | strbuf_addch(out, '"'); | ||
332 | if (!out->len) | ||
333 | strbuf_addstr(out, "./"); | ||
334 | |||
335 | return out->buf; | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * C-style name unquoting. | ||
340 | * | ||
341 | * Quoted should point at the opening double quote. | ||
342 | * + Returns 0 if it was able to unquote the string properly, and appends the | ||
343 | * result in the strbuf `sb'. | ||
344 | * + Returns -1 in case of error, and doesn't touch the strbuf. Though note | ||
345 | * that this function will allocate memory in the strbuf, so calling | ||
346 | * strbuf_release is mandatory whichever result unquote_c_style returns. | ||
347 | * | ||
348 | * Updates endp pointer to point at one past the ending double quote if given. | ||
349 | */ | ||
350 | int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp) | ||
351 | { | ||
352 | size_t oldlen = sb->len, len; | ||
353 | int ch, ac; | ||
354 | |||
355 | if (*quoted++ != '"') | ||
356 | return -1; | ||
357 | |||
358 | for (;;) { | ||
359 | len = strcspn(quoted, "\"\\"); | ||
360 | strbuf_add(sb, quoted, len); | ||
361 | quoted += len; | ||
362 | |||
363 | switch (*quoted++) { | ||
364 | case '"': | ||
365 | if (endp) | ||
366 | *endp = quoted; | ||
367 | return 0; | ||
368 | case '\\': | ||
369 | break; | ||
370 | default: | ||
371 | goto error; | ||
372 | } | ||
373 | |||
374 | switch ((ch = *quoted++)) { | ||
375 | case 'a': ch = '\a'; break; | ||
376 | case 'b': ch = '\b'; break; | ||
377 | case 'f': ch = '\f'; break; | ||
378 | case 'n': ch = '\n'; break; | ||
379 | case 'r': ch = '\r'; break; | ||
380 | case 't': ch = '\t'; break; | ||
381 | case 'v': ch = '\v'; break; | ||
382 | |||
383 | case '\\': case '"': | ||
384 | break; /* verbatim */ | ||
385 | |||
386 | /* octal values with first digit over 4 overflow */ | ||
387 | case '0': case '1': case '2': case '3': | ||
388 | ac = ((ch - '0') << 6); | ||
389 | if ((ch = *quoted++) < '0' || '7' < ch) | ||
390 | goto error; | ||
391 | ac |= ((ch - '0') << 3); | ||
392 | if ((ch = *quoted++) < '0' || '7' < ch) | ||
393 | goto error; | ||
394 | ac |= (ch - '0'); | ||
395 | ch = ac; | ||
396 | break; | ||
397 | default: | ||
398 | goto error; | ||
399 | } | ||
400 | strbuf_addch(sb, ch); | ||
401 | } | ||
402 | |||
403 | error: | ||
404 | strbuf_setlen(sb, oldlen); | ||
405 | return -1; | ||
406 | } | ||
407 | |||
408 | /* quoting as a string literal for other languages */ | ||
409 | |||
410 | void perl_quote_print(FILE *stream, const char *src) | ||
411 | { | ||
412 | const char sq = '\''; | ||
413 | const char bq = '\\'; | ||
414 | char c; | ||
415 | |||
416 | fputc(sq, stream); | ||
417 | while ((c = *src++)) { | ||
418 | if (c == sq || c == bq) | ||
419 | fputc(bq, stream); | ||
420 | fputc(c, stream); | ||
421 | } | ||
422 | fputc(sq, stream); | ||
423 | } | ||
424 | |||
425 | void python_quote_print(FILE *stream, const char *src) | ||
426 | { | ||
427 | const char sq = '\''; | ||
428 | const char bq = '\\'; | ||
429 | const char nl = '\n'; | ||
430 | char c; | ||
431 | |||
432 | fputc(sq, stream); | ||
433 | while ((c = *src++)) { | ||
434 | if (c == nl) { | ||
435 | fputc(bq, stream); | ||
436 | fputc('n', stream); | ||
437 | continue; | ||
438 | } | ||
439 | if (c == sq || c == bq) | ||
440 | fputc(bq, stream); | ||
441 | fputc(c, stream); | ||
442 | } | ||
443 | fputc(sq, stream); | ||
444 | } | ||
445 | |||
446 | void tcl_quote_print(FILE *stream, const char *src) | ||
447 | { | ||
448 | char c; | ||
449 | |||
450 | fputc('"', stream); | ||
451 | while ((c = *src++)) { | ||
452 | switch (c) { | ||
453 | case '[': case ']': | ||
454 | case '{': case '}': | ||
455 | case '$': case '\\': case '"': | ||
456 | fputc('\\', stream); | ||
457 | default: | ||
458 | fputc(c, stream); | ||
459 | break; | ||
460 | case '\f': | ||
461 | fputs("\\f", stream); | ||
462 | break; | ||
463 | case '\r': | ||
464 | fputs("\\r", stream); | ||
465 | break; | ||
466 | case '\n': | ||
467 | fputs("\\n", stream); | ||
468 | break; | ||
469 | case '\t': | ||
470 | fputs("\\t", stream); | ||
471 | break; | ||
472 | case '\v': | ||
473 | fputs("\\v", stream); | ||
474 | break; | ||
475 | } | ||
476 | } | ||
477 | fputc('"', stream); | ||
478 | } | ||
diff --git a/libbb/regex.c b/libbb/regex.c new file mode 100644 index 000000000..87b33e466 --- /dev/null +++ b/libbb/regex.c | |||
@@ -0,0 +1,4927 @@ | |||
1 | /* Extended regular expression matching and search library, | ||
2 | version 0.12. | ||
3 | (Implements POSIX draft P10003.2/D11.2, except for | ||
4 | internationalization features.) | ||
5 | |||
6 | Copyright (C) 1993 Free Software Foundation, Inc. | ||
7 | |||
8 | This program is free software; you can redistribute it and/or modify | ||
9 | it under the terms of the GNU General Public License as published by | ||
10 | the Free Software Foundation; either version 2, or (at your option) | ||
11 | any later version. | ||
12 | |||
13 | This program is distributed in the hope that it will be useful, | ||
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | GNU General Public License for more details. | ||
17 | |||
18 | You should have received a copy of the GNU General Public License | ||
19 | along with this program; if not, write to the Free Software | ||
20 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ | ||
21 | |||
22 | /* AIX requires this to be the first thing in the file. */ | ||
23 | #if defined (_AIX) && !defined (REGEX_MALLOC) | ||
24 | #pragma alloca | ||
25 | #endif | ||
26 | |||
27 | #define _GNU_SOURCE | ||
28 | |||
29 | /* We need this for `regex.h', and perhaps for the Emacs include files. */ | ||
30 | #include <sys/types.h> | ||
31 | |||
32 | /* We used to test for `BSTRING' here, but only GCC and Emacs define | ||
33 | `BSTRING', as far as I know, and neither of them use this code. */ | ||
34 | #include <string.h> | ||
35 | #ifndef bcmp | ||
36 | #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) | ||
37 | #endif | ||
38 | #ifndef bcopy | ||
39 | #define bcopy(s, d, n) memcpy ((d), (s), (n)) | ||
40 | #endif | ||
41 | #ifndef bzero | ||
42 | #define bzero(s, n) memset ((s), 0, (n)) | ||
43 | #endif | ||
44 | |||
45 | #include <stdlib.h> | ||
46 | |||
47 | |||
48 | /* Define the syntax stuff for \<, \>, etc. */ | ||
49 | |||
50 | /* This must be nonzero for the wordchar and notwordchar pattern | ||
51 | commands in re_match_2. */ | ||
52 | #ifndef Sword | ||
53 | #define Sword 1 | ||
54 | #endif | ||
55 | |||
56 | #ifdef SYNTAX_TABLE | ||
57 | |||
58 | extern char *re_syntax_table; | ||
59 | |||
60 | #else /* not SYNTAX_TABLE */ | ||
61 | |||
62 | /* How many characters in the character set. */ | ||
63 | #define CHAR_SET_SIZE 256 | ||
64 | |||
65 | static char re_syntax_table[CHAR_SET_SIZE]; | ||
66 | |||
67 | static void | ||
68 | init_syntax_once () | ||
69 | { | ||
70 | register int c; | ||
71 | static int done = 0; | ||
72 | |||
73 | if (done) | ||
74 | return; | ||
75 | |||
76 | bzero (re_syntax_table, sizeof re_syntax_table); | ||
77 | |||
78 | for (c = 'a'; c <= 'z'; c++) | ||
79 | re_syntax_table[c] = Sword; | ||
80 | |||
81 | for (c = 'A'; c <= 'Z'; c++) | ||
82 | re_syntax_table[c] = Sword; | ||
83 | |||
84 | for (c = '0'; c <= '9'; c++) | ||
85 | re_syntax_table[c] = Sword; | ||
86 | |||
87 | re_syntax_table['_'] = Sword; | ||
88 | |||
89 | done = 1; | ||
90 | } | ||
91 | |||
92 | #endif /* not SYNTAX_TABLE */ | ||
93 | |||
94 | #define SYNTAX(c) re_syntax_table[c] | ||
95 | |||
96 | |||
97 | /* Get the interface, including the syntax bits. */ | ||
98 | #include "regex.h" | ||
99 | |||
100 | /* isalpha etc. are used for the character classes. */ | ||
101 | #include <ctype.h> | ||
102 | |||
103 | #ifndef isascii | ||
104 | #define isascii(c) 1 | ||
105 | #endif | ||
106 | |||
107 | #ifdef isblank | ||
108 | #define ISBLANK(c) (isascii (c) && isblank (c)) | ||
109 | #else | ||
110 | #define ISBLANK(c) ((c) == ' ' || (c) == '\t') | ||
111 | #endif | ||
112 | #ifdef isgraph | ||
113 | #define ISGRAPH(c) (isascii (c) && isgraph (c)) | ||
114 | #else | ||
115 | #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c)) | ||
116 | #endif | ||
117 | |||
118 | #define ISPRINT(c) (isascii (c) && isprint (c)) | ||
119 | #define ISDIGIT(c) (isascii (c) && isdigit (c)) | ||
120 | #define ISALNUM(c) (isascii (c) && isalnum (c)) | ||
121 | #define ISALPHA(c) (isascii (c) && isalpha (c)) | ||
122 | #define ISCNTRL(c) (isascii (c) && iscntrl (c)) | ||
123 | #define ISLOWER(c) (isascii (c) && islower (c)) | ||
124 | #define ISPUNCT(c) (isascii (c) && ispunct (c)) | ||
125 | #define ISSPACE(c) (isascii (c) && isspace (c)) | ||
126 | #define ISUPPER(c) (isascii (c) && isupper (c)) | ||
127 | #define ISXDIGIT(c) (isascii (c) && isxdigit (c)) | ||
128 | |||
129 | #ifndef NULL | ||
130 | #define NULL 0 | ||
131 | #endif | ||
132 | |||
133 | /* We remove any previous definition of `SIGN_EXTEND_CHAR', | ||
134 | since ours (we hope) works properly with all combinations of | ||
135 | machines, compilers, `char' and `unsigned char' argument types. | ||
136 | (Per Bothner suggested the basic approach.) */ | ||
137 | #undef SIGN_EXTEND_CHAR | ||
138 | #if __STDC__ | ||
139 | #define SIGN_EXTEND_CHAR(c) ((signed char) (c)) | ||
140 | #else /* not __STDC__ */ | ||
141 | /* As in Harbison and Steele. */ | ||
142 | #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) | ||
143 | #endif | ||
144 | |||
145 | /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we | ||
146 | use `alloca' instead of `malloc'. This is because using malloc in | ||
147 | re_search* or re_match* could cause memory leaks when C-g is used in | ||
148 | Emacs; also, malloc is slower and causes storage fragmentation. On | ||
149 | the other hand, malloc is more portable, and easier to debug. | ||
150 | |||
151 | Because we sometimes use alloca, some routines have to be macros, | ||
152 | not functions -- `alloca'-allocated space disappears at the end of the | ||
153 | function it is called in. */ | ||
154 | |||
155 | #ifdef REGEX_MALLOC | ||
156 | |||
157 | #define REGEX_ALLOCATE malloc | ||
158 | #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) | ||
159 | |||
160 | #else /* not REGEX_MALLOC */ | ||
161 | |||
162 | /* Emacs already defines alloca, sometimes. */ | ||
163 | #ifndef alloca | ||
164 | |||
165 | /* Make alloca work the best possible way. */ | ||
166 | #ifdef __GNUC__ | ||
167 | #define alloca __builtin_alloca | ||
168 | #else /* not __GNUC__ */ | ||
169 | #if HAVE_ALLOCA_H | ||
170 | #include <alloca.h> | ||
171 | #else /* not __GNUC__ or HAVE_ALLOCA_H */ | ||
172 | #ifndef _AIX /* Already did AIX, up at the top. */ | ||
173 | char *alloca (); | ||
174 | #endif /* not _AIX */ | ||
175 | #endif /* not HAVE_ALLOCA_H */ | ||
176 | #endif /* not __GNUC__ */ | ||
177 | |||
178 | #endif /* not alloca */ | ||
179 | |||
180 | #define REGEX_ALLOCATE alloca | ||
181 | |||
182 | /* Assumes a `char *destination' variable. */ | ||
183 | #define REGEX_REALLOCATE(source, osize, nsize) \ | ||
184 | (destination = (char *) alloca (nsize), \ | ||
185 | bcopy (source, destination, osize), \ | ||
186 | destination) | ||
187 | |||
188 | #endif /* not REGEX_MALLOC */ | ||
189 | |||
190 | |||
191 | /* True if `size1' is non-NULL and PTR is pointing anywhere inside | ||
192 | `string1' or just past its end. This works if PTR is NULL, which is | ||
193 | a good thing. */ | ||
194 | #define FIRST_STRING_P(ptr) \ | ||
195 | (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) | ||
196 | |||
197 | /* (Re)Allocate N items of type T using malloc, or fail. */ | ||
198 | #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) | ||
199 | #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) | ||
200 | #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) | ||
201 | |||
202 | #define BYTEWIDTH 8 /* In bits. */ | ||
203 | |||
204 | #define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) | ||
205 | |||
206 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) | ||
207 | #define MIN(a, b) ((a) < (b) ? (a) : (b)) | ||
208 | |||
209 | typedef char boolean; | ||
210 | #define false 0 | ||
211 | #define true 1 | ||
212 | |||
213 | /* These are the command codes that appear in compiled regular | ||
214 | expressions. Some opcodes are followed by argument bytes. A | ||
215 | command code can specify any interpretation whatsoever for its | ||
216 | arguments. Zero bytes may appear in the compiled regular expression. | ||
217 | |||
218 | The value of `exactn' is needed in search.c (search_buffer) in Emacs. | ||
219 | So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of | ||
220 | `exactn' we use here must also be 1. */ | ||
221 | |||
222 | typedef enum | ||
223 | { | ||
224 | no_op = 0, | ||
225 | |||
226 | /* Followed by one byte giving n, then by n literal bytes. */ | ||
227 | exactn = 1, | ||
228 | |||
229 | /* Matches any (more or less) character. */ | ||
230 | anychar, | ||
231 | |||
232 | /* Matches any one char belonging to specified set. First | ||
233 | following byte is number of bitmap bytes. Then come bytes | ||
234 | for a bitmap saying which chars are in. Bits in each byte | ||
235 | are ordered low-bit-first. A character is in the set if its | ||
236 | bit is 1. A character too large to have a bit in the map is | ||
237 | automatically not in the set. */ | ||
238 | charset, | ||
239 | |||
240 | /* Same parameters as charset, but match any character that is | ||
241 | not one of those specified. */ | ||
242 | charset_not, | ||
243 | |||
244 | /* Start remembering the text that is matched, for storing in a | ||
245 | register. Followed by one byte with the register number, in | ||
246 | the range 0 to one less than the pattern buffer's re_nsub | ||
247 | field. Then followed by one byte with the number of groups | ||
248 | inner to this one. (This last has to be part of the | ||
249 | start_memory only because we need it in the on_failure_jump | ||
250 | of re_match_2.) */ | ||
251 | start_memory, | ||
252 | |||
253 | /* Stop remembering the text that is matched and store it in a | ||
254 | memory register. Followed by one byte with the register | ||
255 | number, in the range 0 to one less than `re_nsub' in the | ||
256 | pattern buffer, and one byte with the number of inner groups, | ||
257 | just like `start_memory'. (We need the number of inner | ||
258 | groups here because we don't have any easy way of finding the | ||
259 | corresponding start_memory when we're at a stop_memory.) */ | ||
260 | stop_memory, | ||
261 | |||
262 | /* Match a duplicate of something remembered. Followed by one | ||
263 | byte containing the register number. */ | ||
264 | duplicate, | ||
265 | |||
266 | /* Fail unless at beginning of line. */ | ||
267 | begline, | ||
268 | |||
269 | /* Fail unless at end of line. */ | ||
270 | endline, | ||
271 | |||
272 | /* Succeeds if at beginning of buffer (if emacs) or at beginning | ||
273 | of string to be matched (if not). */ | ||
274 | begbuf, | ||
275 | |||
276 | /* Analogously, for end of buffer/string. */ | ||
277 | endbuf, | ||
278 | |||
279 | /* Followed by two byte relative address to which to jump. */ | ||
280 | jump, | ||
281 | |||
282 | /* Same as jump, but marks the end of an alternative. */ | ||
283 | jump_past_alt, | ||
284 | |||
285 | /* Followed by two-byte relative address of place to resume at | ||
286 | in case of failure. */ | ||
287 | on_failure_jump, | ||
288 | |||
289 | /* Like on_failure_jump, but pushes a placeholder instead of the | ||
290 | current string position when executed. */ | ||
291 | on_failure_keep_string_jump, | ||
292 | |||
293 | /* Throw away latest failure point and then jump to following | ||
294 | two-byte relative address. */ | ||
295 | pop_failure_jump, | ||
296 | |||
297 | /* Change to pop_failure_jump if know won't have to backtrack to | ||
298 | match; otherwise change to jump. This is used to jump | ||
299 | back to the beginning of a repeat. If what follows this jump | ||
300 | clearly won't match what the repeat does, such that we can be | ||
301 | sure that there is no use backtracking out of repetitions | ||
302 | already matched, then we change it to a pop_failure_jump. | ||
303 | Followed by two-byte address. */ | ||
304 | maybe_pop_jump, | ||
305 | |||
306 | /* Jump to following two-byte address, and push a dummy failure | ||
307 | point. This failure point will be thrown away if an attempt | ||
308 | is made to use it for a failure. A `+' construct makes this | ||
309 | before the first repeat. Also used as an intermediary kind | ||
310 | of jump when compiling an alternative. */ | ||
311 | dummy_failure_jump, | ||
312 | |||
313 | /* Push a dummy failure point and continue. Used at the end of | ||
314 | alternatives. */ | ||
315 | push_dummy_failure, | ||
316 | |||
317 | /* Followed by two-byte relative address and two-byte number n. | ||
318 | After matching N times, jump to the address upon failure. */ | ||
319 | succeed_n, | ||
320 | |||
321 | /* Followed by two-byte relative address, and two-byte number n. | ||
322 | Jump to the address N times, then fail. */ | ||
323 | jump_n, | ||
324 | |||
325 | /* Set the following two-byte relative address to the | ||
326 | subsequent two-byte number. The address *includes* the two | ||
327 | bytes of number. */ | ||
328 | set_number_at, | ||
329 | |||
330 | wordchar, /* Matches any word-constituent character. */ | ||
331 | notwordchar, /* Matches any char that is not a word-constituent. */ | ||
332 | |||
333 | wordbeg, /* Succeeds if at word beginning. */ | ||
334 | wordend, /* Succeeds if at word end. */ | ||
335 | |||
336 | wordbound, /* Succeeds if at a word boundary. */ | ||
337 | notwordbound /* Succeeds if not at a word boundary. */ | ||
338 | |||
339 | #ifdef emacs | ||
340 | ,before_dot, /* Succeeds if before point. */ | ||
341 | at_dot, /* Succeeds if at point. */ | ||
342 | after_dot, /* Succeeds if after point. */ | ||
343 | |||
344 | /* Matches any character whose syntax is specified. Followed by | ||
345 | a byte which contains a syntax code, e.g., Sword. */ | ||
346 | syntaxspec, | ||
347 | |||
348 | /* Matches any character whose syntax is not that specified. */ | ||
349 | notsyntaxspec | ||
350 | #endif /* emacs */ | ||
351 | } re_opcode_t; | ||
352 | |||
353 | /* Common operations on the compiled pattern. */ | ||
354 | |||
355 | /* Store NUMBER in two contiguous bytes starting at DESTINATION. */ | ||
356 | |||
357 | #define STORE_NUMBER(destination, number) \ | ||
358 | do { \ | ||
359 | (destination)[0] = (number) & 0377; \ | ||
360 | (destination)[1] = (number) >> 8; \ | ||
361 | } while (0) | ||
362 | |||
363 | /* Same as STORE_NUMBER, except increment DESTINATION to | ||
364 | the byte after where the number is stored. Therefore, DESTINATION | ||
365 | must be an lvalue. */ | ||
366 | |||
367 | #define STORE_NUMBER_AND_INCR(destination, number) \ | ||
368 | do { \ | ||
369 | STORE_NUMBER (destination, number); \ | ||
370 | (destination) += 2; \ | ||
371 | } while (0) | ||
372 | |||
373 | /* Put into DESTINATION a number stored in two contiguous bytes starting | ||
374 | at SOURCE. */ | ||
375 | |||
376 | #define EXTRACT_NUMBER(destination, source) \ | ||
377 | do { \ | ||
378 | (destination) = *(source) & 0377; \ | ||
379 | (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ | ||
380 | } while (0) | ||
381 | |||
382 | #ifdef DEBUG | ||
383 | static void | ||
384 | extract_number (dest, source) | ||
385 | int *dest; | ||
386 | unsigned char *source; | ||
387 | { | ||
388 | int temp = SIGN_EXTEND_CHAR (*(source + 1)); | ||
389 | *dest = *source & 0377; | ||
390 | *dest += temp << 8; | ||
391 | } | ||
392 | |||
393 | #ifndef EXTRACT_MACROS /* To debug the macros. */ | ||
394 | #undef EXTRACT_NUMBER | ||
395 | #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) | ||
396 | #endif /* not EXTRACT_MACROS */ | ||
397 | |||
398 | #endif /* DEBUG */ | ||
399 | |||
400 | /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. | ||
401 | SOURCE must be an lvalue. */ | ||
402 | |||
403 | #define EXTRACT_NUMBER_AND_INCR(destination, source) \ | ||
404 | do { \ | ||
405 | EXTRACT_NUMBER (destination, source); \ | ||
406 | (source) += 2; \ | ||
407 | } while (0) | ||
408 | |||
409 | #ifdef DEBUG | ||
410 | static void | ||
411 | extract_number_and_incr (destination, source) | ||
412 | int *destination; | ||
413 | unsigned char **source; | ||
414 | { | ||
415 | extract_number (destination, *source); | ||
416 | *source += 2; | ||
417 | } | ||
418 | |||
419 | #ifndef EXTRACT_MACROS | ||
420 | #undef EXTRACT_NUMBER_AND_INCR | ||
421 | #define EXTRACT_NUMBER_AND_INCR(dest, src) \ | ||
422 | extract_number_and_incr (&dest, &src) | ||
423 | #endif /* not EXTRACT_MACROS */ | ||
424 | |||
425 | #endif /* DEBUG */ | ||
426 | |||
427 | /* If DEBUG is defined, Regex prints many voluminous messages about what | ||
428 | it is doing (if the variable `debug' is nonzero). If linked with the | ||
429 | main program in `iregex.c', you can enter patterns and strings | ||
430 | interactively. And if linked with the main program in `main.c' and | ||
431 | the other test files, you can run the already-written tests. */ | ||
432 | |||
433 | #ifdef DEBUG | ||
434 | |||
435 | /* We use standard I/O for debugging. */ | ||
436 | #include <stdio.h> | ||
437 | |||
438 | /* It is useful to test things that ``must'' be true when debugging. */ | ||
439 | #include <assert.h> | ||
440 | |||
441 | static int debug = 0; | ||
442 | |||
443 | #define DEBUG_STATEMENT(e) e | ||
444 | #define DEBUG_PRINT1(x) if (debug) printf (x) | ||
445 | #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) | ||
446 | #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) | ||
447 | #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) | ||
448 | #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ | ||
449 | if (debug) print_partial_compiled_pattern (s, e) | ||
450 | #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ | ||
451 | if (debug) print_double_string (w, s1, sz1, s2, sz2) | ||
452 | |||
453 | |||
454 | extern void printchar (); | ||
455 | |||
456 | /* Print the fastmap in human-readable form. */ | ||
457 | |||
458 | void | ||
459 | print_fastmap (fastmap) | ||
460 | char *fastmap; | ||
461 | { | ||
462 | unsigned was_a_range = 0; | ||
463 | unsigned i = 0; | ||
464 | |||
465 | while (i < (1 << BYTEWIDTH)) | ||
466 | { | ||
467 | if (fastmap[i++]) | ||
468 | { | ||
469 | was_a_range = 0; | ||
470 | printchar (i - 1); | ||
471 | while (i < (1 << BYTEWIDTH) && fastmap[i]) | ||
472 | { | ||
473 | was_a_range = 1; | ||
474 | i++; | ||
475 | } | ||
476 | if (was_a_range) | ||
477 | { | ||
478 | printf ("-"); | ||
479 | printchar (i - 1); | ||
480 | } | ||
481 | } | ||
482 | } | ||
483 | putchar ('\n'); | ||
484 | } | ||
485 | |||
486 | |||
487 | /* Print a compiled pattern string in human-readable form, starting at | ||
488 | the START pointer into it and ending just before the pointer END. */ | ||
489 | |||
490 | void | ||
491 | print_partial_compiled_pattern (start, end) | ||
492 | unsigned char *start; | ||
493 | unsigned char *end; | ||
494 | { | ||
495 | int mcnt, mcnt2; | ||
496 | unsigned char *p = start; | ||
497 | unsigned char *pend = end; | ||
498 | |||
499 | if (start == NULL) | ||
500 | { | ||
501 | printf ("(null)\n"); | ||
502 | return; | ||
503 | } | ||
504 | |||
505 | /* Loop over pattern commands. */ | ||
506 | while (p < pend) | ||
507 | { | ||
508 | switch ((re_opcode_t) *p++) | ||
509 | { | ||
510 | case no_op: | ||
511 | printf ("/no_op"); | ||
512 | break; | ||
513 | |||
514 | case exactn: | ||
515 | mcnt = *p++; | ||
516 | printf ("/exactn/%d", mcnt); | ||
517 | do | ||
518 | { | ||
519 | putchar ('/'); | ||
520 | printchar (*p++); | ||
521 | } | ||
522 | while (--mcnt); | ||
523 | break; | ||
524 | |||
525 | case start_memory: | ||
526 | mcnt = *p++; | ||
527 | printf ("/start_memory/%d/%d", mcnt, *p++); | ||
528 | break; | ||
529 | |||
530 | case stop_memory: | ||
531 | mcnt = *p++; | ||
532 | printf ("/stop_memory/%d/%d", mcnt, *p++); | ||
533 | break; | ||
534 | |||
535 | case duplicate: | ||
536 | printf ("/duplicate/%d", *p++); | ||
537 | break; | ||
538 | |||
539 | case anychar: | ||
540 | printf ("/anychar"); | ||
541 | break; | ||
542 | |||
543 | case charset: | ||
544 | case charset_not: | ||
545 | { | ||
546 | register int c; | ||
547 | |||
548 | printf ("/charset%s", | ||
549 | (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); | ||
550 | |||
551 | assert (p + *p < pend); | ||
552 | |||
553 | for (c = 0; c < *p; c++) | ||
554 | { | ||
555 | unsigned bit; | ||
556 | unsigned char map_byte = p[1 + c]; | ||
557 | |||
558 | putchar ('/'); | ||
559 | |||
560 | for (bit = 0; bit < BYTEWIDTH; bit++) | ||
561 | if (map_byte & (1 << bit)) | ||
562 | printchar (c * BYTEWIDTH + bit); | ||
563 | } | ||
564 | p += 1 + *p; | ||
565 | break; | ||
566 | } | ||
567 | |||
568 | case begline: | ||
569 | printf ("/begline"); | ||
570 | break; | ||
571 | |||
572 | case endline: | ||
573 | printf ("/endline"); | ||
574 | break; | ||
575 | |||
576 | case on_failure_jump: | ||
577 | extract_number_and_incr (&mcnt, &p); | ||
578 | printf ("/on_failure_jump/0/%d", mcnt); | ||
579 | break; | ||
580 | |||
581 | case on_failure_keep_string_jump: | ||
582 | extract_number_and_incr (&mcnt, &p); | ||
583 | printf ("/on_failure_keep_string_jump/0/%d", mcnt); | ||
584 | break; | ||
585 | |||
586 | case dummy_failure_jump: | ||
587 | extract_number_and_incr (&mcnt, &p); | ||
588 | printf ("/dummy_failure_jump/0/%d", mcnt); | ||
589 | break; | ||
590 | |||
591 | case push_dummy_failure: | ||
592 | printf ("/push_dummy_failure"); | ||
593 | break; | ||
594 | |||
595 | case maybe_pop_jump: | ||
596 | extract_number_and_incr (&mcnt, &p); | ||
597 | printf ("/maybe_pop_jump/0/%d", mcnt); | ||
598 | break; | ||
599 | |||
600 | case pop_failure_jump: | ||
601 | extract_number_and_incr (&mcnt, &p); | ||
602 | printf ("/pop_failure_jump/0/%d", mcnt); | ||
603 | break; | ||
604 | |||
605 | case jump_past_alt: | ||
606 | extract_number_and_incr (&mcnt, &p); | ||
607 | printf ("/jump_past_alt/0/%d", mcnt); | ||
608 | break; | ||
609 | |||
610 | case jump: | ||
611 | extract_number_and_incr (&mcnt, &p); | ||
612 | printf ("/jump/0/%d", mcnt); | ||
613 | break; | ||
614 | |||
615 | case succeed_n: | ||
616 | extract_number_and_incr (&mcnt, &p); | ||
617 | extract_number_and_incr (&mcnt2, &p); | ||
618 | printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2); | ||
619 | break; | ||
620 | |||
621 | case jump_n: | ||
622 | extract_number_and_incr (&mcnt, &p); | ||
623 | extract_number_and_incr (&mcnt2, &p); | ||
624 | printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2); | ||
625 | break; | ||
626 | |||
627 | case set_number_at: | ||
628 | extract_number_and_incr (&mcnt, &p); | ||
629 | extract_number_and_incr (&mcnt2, &p); | ||
630 | printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2); | ||
631 | break; | ||
632 | |||
633 | case wordbound: | ||
634 | printf ("/wordbound"); | ||
635 | break; | ||
636 | |||
637 | case notwordbound: | ||
638 | printf ("/notwordbound"); | ||
639 | break; | ||
640 | |||
641 | case wordbeg: | ||
642 | printf ("/wordbeg"); | ||
643 | break; | ||
644 | |||
645 | case wordend: | ||
646 | printf ("/wordend"); | ||
647 | |||
648 | #ifdef emacs | ||
649 | case before_dot: | ||
650 | printf ("/before_dot"); | ||
651 | break; | ||
652 | |||
653 | case at_dot: | ||
654 | printf ("/at_dot"); | ||
655 | break; | ||
656 | |||
657 | case after_dot: | ||
658 | printf ("/after_dot"); | ||
659 | break; | ||
660 | |||
661 | case syntaxspec: | ||
662 | printf ("/syntaxspec"); | ||
663 | mcnt = *p++; | ||
664 | printf ("/%d", mcnt); | ||
665 | break; | ||
666 | |||
667 | case notsyntaxspec: | ||
668 | printf ("/notsyntaxspec"); | ||
669 | mcnt = *p++; | ||
670 | printf ("/%d", mcnt); | ||
671 | break; | ||
672 | #endif /* emacs */ | ||
673 | |||
674 | case wordchar: | ||
675 | printf ("/wordchar"); | ||
676 | break; | ||
677 | |||
678 | case notwordchar: | ||
679 | printf ("/notwordchar"); | ||
680 | break; | ||
681 | |||
682 | case begbuf: | ||
683 | printf ("/begbuf"); | ||
684 | break; | ||
685 | |||
686 | case endbuf: | ||
687 | printf ("/endbuf"); | ||
688 | break; | ||
689 | |||
690 | default: | ||
691 | printf ("?%d", *(p-1)); | ||
692 | } | ||
693 | } | ||
694 | printf ("/\n"); | ||
695 | } | ||
696 | |||
697 | |||
698 | void | ||
699 | print_compiled_pattern (bufp) | ||
700 | struct re_pattern_buffer *bufp; | ||
701 | { | ||
702 | unsigned char *buffer = bufp->buffer; | ||
703 | |||
704 | print_partial_compiled_pattern (buffer, buffer + bufp->used); | ||
705 | printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); | ||
706 | |||
707 | if (bufp->fastmap_accurate && bufp->fastmap) | ||
708 | { | ||
709 | printf ("fastmap: "); | ||
710 | print_fastmap (bufp->fastmap); | ||
711 | } | ||
712 | |||
713 | printf ("re_nsub: %d\t", bufp->re_nsub); | ||
714 | printf ("regs_alloc: %d\t", bufp->regs_allocated); | ||
715 | printf ("can_be_null: %d\t", bufp->can_be_null); | ||
716 | printf ("newline_anchor: %d\n", bufp->newline_anchor); | ||
717 | printf ("no_sub: %d\t", bufp->no_sub); | ||
718 | printf ("not_bol: %d\t", bufp->not_bol); | ||
719 | printf ("not_eol: %d\t", bufp->not_eol); | ||
720 | printf ("syntax: %d\n", bufp->syntax); | ||
721 | /* Perhaps we should print the translate table? */ | ||
722 | } | ||
723 | |||
724 | |||
725 | void | ||
726 | print_double_string (where, string1, size1, string2, size2) | ||
727 | const char *where; | ||
728 | const char *string1; | ||
729 | const char *string2; | ||
730 | int size1; | ||
731 | int size2; | ||
732 | { | ||
733 | unsigned this_char; | ||
734 | |||
735 | if (where == NULL) | ||
736 | printf ("(null)"); | ||
737 | else | ||
738 | { | ||
739 | if (FIRST_STRING_P (where)) | ||
740 | { | ||
741 | for (this_char = where - string1; this_char < size1; this_char++) | ||
742 | printchar (string1[this_char]); | ||
743 | |||
744 | where = string2; | ||
745 | } | ||
746 | |||
747 | for (this_char = where - string2; this_char < size2; this_char++) | ||
748 | printchar (string2[this_char]); | ||
749 | } | ||
750 | } | ||
751 | |||
752 | #else /* not DEBUG */ | ||
753 | |||
754 | #undef assert | ||
755 | #define assert(e) | ||
756 | |||
757 | #define DEBUG_STATEMENT(e) | ||
758 | #define DEBUG_PRINT1(x) | ||
759 | #define DEBUG_PRINT2(x1, x2) | ||
760 | #define DEBUG_PRINT3(x1, x2, x3) | ||
761 | #define DEBUG_PRINT4(x1, x2, x3, x4) | ||
762 | #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) | ||
763 | #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) | ||
764 | |||
765 | #endif /* not DEBUG */ | ||
766 | |||
767 | /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can | ||
768 | also be assigned to arbitrarily: each pattern buffer stores its own | ||
769 | syntax, so it can be changed between regex compilations. */ | ||
770 | reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS; | ||
771 | |||
772 | |||
773 | /* Specify the precise syntax of regexps for compilation. This provides | ||
774 | for compatibility for various utilities which historically have | ||
775 | different, incompatible syntaxes. | ||
776 | |||
777 | The argument SYNTAX is a bit mask comprised of the various bits | ||
778 | defined in regex.h. We return the old syntax. */ | ||
779 | |||
780 | reg_syntax_t | ||
781 | re_set_syntax (syntax) | ||
782 | reg_syntax_t syntax; | ||
783 | { | ||
784 | reg_syntax_t ret = re_syntax_options; | ||
785 | |||
786 | re_syntax_options = syntax; | ||
787 | return ret; | ||
788 | } | ||
789 | |||
790 | /* This table gives an error message for each of the error codes listed | ||
791 | in regex.h. Obviously the order here has to be same as there. */ | ||
792 | |||
793 | static const char *re_error_msg[] = | ||
794 | { NULL, /* REG_NOERROR */ | ||
795 | "No match", /* REG_NOMATCH */ | ||
796 | "Invalid regular expression", /* REG_BADPAT */ | ||
797 | "Invalid collation character", /* REG_ECOLLATE */ | ||
798 | "Invalid character class name", /* REG_ECTYPE */ | ||
799 | "Trailing backslash", /* REG_EESCAPE */ | ||
800 | "Invalid back reference", /* REG_ESUBREG */ | ||
801 | "Unmatched [ or [^", /* REG_EBRACK */ | ||
802 | "Unmatched ( or \\(", /* REG_EPAREN */ | ||
803 | "Unmatched \\{", /* REG_EBRACE */ | ||
804 | "Invalid content of \\{\\}", /* REG_BADBR */ | ||
805 | "Invalid range end", /* REG_ERANGE */ | ||
806 | "Memory exhausted", /* REG_ESPACE */ | ||
807 | "Invalid preceding regular expression", /* REG_BADRPT */ | ||
808 | "Premature end of regular expression", /* REG_EEND */ | ||
809 | "Regular expression too big", /* REG_ESIZE */ | ||
810 | "Unmatched ) or \\)", /* REG_ERPAREN */ | ||
811 | }; | ||
812 | |||
813 | /* Subroutine declarations and macros for regex_compile. */ | ||
814 | |||
815 | static void store_op1 (), store_op2 (); | ||
816 | static void insert_op1 (), insert_op2 (); | ||
817 | static boolean at_begline_loc_p (), at_endline_loc_p (); | ||
818 | static boolean group_in_compile_stack (); | ||
819 | static reg_errcode_t compile_range (); | ||
820 | |||
821 | /* Fetch the next character in the uncompiled pattern---translating it | ||
822 | if necessary. Also cast from a signed character in the constant | ||
823 | string passed to us by the user to an unsigned char that we can use | ||
824 | as an array index (in, e.g., `translate'). */ | ||
825 | #define PATFETCH(c) \ | ||
826 | do {if (p == pend) return REG_EEND; \ | ||
827 | c = (unsigned char) *p++; \ | ||
828 | if (translate) c = translate[c]; \ | ||
829 | } while (0) | ||
830 | |||
831 | /* Fetch the next character in the uncompiled pattern, with no | ||
832 | translation. */ | ||
833 | #define PATFETCH_RAW(c) \ | ||
834 | do {if (p == pend) return REG_EEND; \ | ||
835 | c = (unsigned char) *p++; \ | ||
836 | } while (0) | ||
837 | |||
838 | /* Go backwards one character in the pattern. */ | ||
839 | #define PATUNFETCH p-- | ||
840 | |||
841 | |||
842 | /* If `translate' is non-null, return translate[D], else just D. We | ||
843 | cast the subscript to translate because some data is declared as | ||
844 | `char *', to avoid warnings when a string constant is passed. But | ||
845 | when we use a character as a subscript we must make it unsigned. */ | ||
846 | #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d)) | ||
847 | |||
848 | |||
849 | /* Macros for outputting the compiled pattern into `buffer'. */ | ||
850 | |||
851 | /* If the buffer isn't allocated when it comes in, use this. */ | ||
852 | #define INIT_BUF_SIZE 32 | ||
853 | |||
854 | /* Make sure we have at least N more bytes of space in buffer. */ | ||
855 | #define GET_BUFFER_SPACE(n) \ | ||
856 | while (b - bufp->buffer + (n) > bufp->allocated) \ | ||
857 | EXTEND_BUFFER () | ||
858 | |||
859 | /* Make sure we have one more byte of buffer space and then add C to it. */ | ||
860 | #define BUF_PUSH(c) \ | ||
861 | do { \ | ||
862 | GET_BUFFER_SPACE (1); \ | ||
863 | *b++ = (unsigned char) (c); \ | ||
864 | } while (0) | ||
865 | |||
866 | |||
867 | /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ | ||
868 | #define BUF_PUSH_2(c1, c2) \ | ||
869 | do { \ | ||
870 | GET_BUFFER_SPACE (2); \ | ||
871 | *b++ = (unsigned char) (c1); \ | ||
872 | *b++ = (unsigned char) (c2); \ | ||
873 | } while (0) | ||
874 | |||
875 | |||
876 | /* As with BUF_PUSH_2, except for three bytes. */ | ||
877 | #define BUF_PUSH_3(c1, c2, c3) \ | ||
878 | do { \ | ||
879 | GET_BUFFER_SPACE (3); \ | ||
880 | *b++ = (unsigned char) (c1); \ | ||
881 | *b++ = (unsigned char) (c2); \ | ||
882 | *b++ = (unsigned char) (c3); \ | ||
883 | } while (0) | ||
884 | |||
885 | |||
886 | /* Store a jump with opcode OP at LOC to location TO. We store a | ||
887 | relative address offset by the three bytes the jump itself occupies. */ | ||
888 | #define STORE_JUMP(op, loc, to) \ | ||
889 | store_op1 (op, loc, (to) - (loc) - 3) | ||
890 | |||
891 | /* Likewise, for a two-argument jump. */ | ||
892 | #define STORE_JUMP2(op, loc, to, arg) \ | ||
893 | store_op2 (op, loc, (to) - (loc) - 3, arg) | ||
894 | |||
895 | /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ | ||
896 | #define INSERT_JUMP(op, loc, to) \ | ||
897 | insert_op1 (op, loc, (to) - (loc) - 3, b) | ||
898 | |||
899 | /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ | ||
900 | #define INSERT_JUMP2(op, loc, to, arg) \ | ||
901 | insert_op2 (op, loc, (to) - (loc) - 3, arg, b) | ||
902 | |||
903 | |||
904 | /* This is not an arbitrary limit: the arguments which represent offsets | ||
905 | into the pattern are two bytes long. So if 2^16 bytes turns out to | ||
906 | be too small, many things would have to change. */ | ||
907 | #define MAX_BUF_SIZE (1L << 16) | ||
908 | |||
909 | |||
910 | /* Extend the buffer by twice its current size via realloc and | ||
911 | reset the pointers that pointed into the old block to point to the | ||
912 | correct places in the new one. If extending the buffer results in it | ||
913 | being larger than MAX_BUF_SIZE, then flag memory exhausted. */ | ||
914 | #define EXTEND_BUFFER() \ | ||
915 | do { \ | ||
916 | unsigned char *old_buffer = bufp->buffer; \ | ||
917 | if (bufp->allocated == MAX_BUF_SIZE) \ | ||
918 | return REG_ESIZE; \ | ||
919 | bufp->allocated <<= 1; \ | ||
920 | if (bufp->allocated > MAX_BUF_SIZE) \ | ||
921 | bufp->allocated = MAX_BUF_SIZE; \ | ||
922 | bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ | ||
923 | if (bufp->buffer == NULL) \ | ||
924 | return REG_ESPACE; \ | ||
925 | /* If the buffer moved, move all the pointers into it. */ \ | ||
926 | if (old_buffer != bufp->buffer) \ | ||
927 | { \ | ||
928 | b = (b - old_buffer) + bufp->buffer; \ | ||
929 | begalt = (begalt - old_buffer) + bufp->buffer; \ | ||
930 | if (fixup_alt_jump) \ | ||
931 | fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ | ||
932 | if (laststart) \ | ||
933 | laststart = (laststart - old_buffer) + bufp->buffer; \ | ||
934 | if (pending_exact) \ | ||
935 | pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ | ||
936 | } \ | ||
937 | } while (0) | ||
938 | |||
939 | |||
940 | /* Since we have one byte reserved for the register number argument to | ||
941 | {start,stop}_memory, the maximum number of groups we can report | ||
942 | things about is what fits in that byte. */ | ||
943 | #define MAX_REGNUM 255 | ||
944 | |||
945 | /* But patterns can have more than `MAX_REGNUM' registers. We just | ||
946 | ignore the excess. */ | ||
947 | typedef unsigned regnum_t; | ||
948 | |||
949 | |||
950 | /* Macros for the compile stack. */ | ||
951 | |||
952 | /* Since offsets can go either forwards or backwards, this type needs to | ||
953 | be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ | ||
954 | typedef int pattern_offset_t; | ||
955 | |||
956 | typedef struct | ||
957 | { | ||
958 | pattern_offset_t begalt_offset; | ||
959 | pattern_offset_t fixup_alt_jump; | ||
960 | pattern_offset_t inner_group_offset; | ||
961 | pattern_offset_t laststart_offset; | ||
962 | regnum_t regnum; | ||
963 | } compile_stack_elt_t; | ||
964 | |||
965 | |||
966 | typedef struct | ||
967 | { | ||
968 | compile_stack_elt_t *stack; | ||
969 | unsigned size; | ||
970 | unsigned avail; /* Offset of next open position. */ | ||
971 | } compile_stack_type; | ||
972 | |||
973 | |||
974 | #define INIT_COMPILE_STACK_SIZE 32 | ||
975 | |||
976 | #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) | ||
977 | #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) | ||
978 | |||
979 | /* The next available element. */ | ||
980 | #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) | ||
981 | |||
982 | |||
983 | /* Set the bit for character C in a list. */ | ||
984 | #define SET_LIST_BIT(c) \ | ||
985 | (b[((unsigned char) (c)) / BYTEWIDTH] \ | ||
986 | |= 1 << (((unsigned char) c) % BYTEWIDTH)) | ||
987 | |||
988 | |||
989 | /* Get the next unsigned number in the uncompiled pattern. */ | ||
990 | #define GET_UNSIGNED_NUMBER(num) \ | ||
991 | { if (p != pend) \ | ||
992 | { \ | ||
993 | PATFETCH (c); \ | ||
994 | while (ISDIGIT (c)) \ | ||
995 | { \ | ||
996 | if (num < 0) \ | ||
997 | num = 0; \ | ||
998 | num = num * 10 + c - '0'; \ | ||
999 | if (p == pend) \ | ||
1000 | break; \ | ||
1001 | PATFETCH (c); \ | ||
1002 | } \ | ||
1003 | } \ | ||
1004 | } | ||
1005 | |||
1006 | #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ | ||
1007 | |||
1008 | #define IS_CHAR_CLASS(string) \ | ||
1009 | (STREQ (string, "alpha") || STREQ (string, "upper") \ | ||
1010 | || STREQ (string, "lower") || STREQ (string, "digit") \ | ||
1011 | || STREQ (string, "alnum") || STREQ (string, "xdigit") \ | ||
1012 | || STREQ (string, "space") || STREQ (string, "print") \ | ||
1013 | || STREQ (string, "punct") || STREQ (string, "graph") \ | ||
1014 | || STREQ (string, "cntrl") || STREQ (string, "blank")) | ||
1015 | |||
1016 | /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. | ||
1017 | Returns one of error codes defined in `regex.h', or zero for success. | ||
1018 | |||
1019 | Assumes the `allocated' (and perhaps `buffer') and `translate' | ||
1020 | fields are set in BUFP on entry. | ||
1021 | |||
1022 | If it succeeds, results are put in BUFP (if it returns an error, the | ||
1023 | contents of BUFP are undefined): | ||
1024 | `buffer' is the compiled pattern; | ||
1025 | `syntax' is set to SYNTAX; | ||
1026 | `used' is set to the length of the compiled pattern; | ||
1027 | `fastmap_accurate' is zero; | ||
1028 | `re_nsub' is the number of subexpressions in PATTERN; | ||
1029 | `not_bol' and `not_eol' are zero; | ||
1030 | |||
1031 | The `fastmap' and `newline_anchor' fields are neither | ||
1032 | examined nor set. */ | ||
1033 | |||
1034 | static reg_errcode_t | ||
1035 | regex_compile (pattern, size, syntax, bufp) | ||
1036 | const char *pattern; | ||
1037 | int size; | ||
1038 | reg_syntax_t syntax; | ||
1039 | struct re_pattern_buffer *bufp; | ||
1040 | { | ||
1041 | /* We fetch characters from PATTERN here. Even though PATTERN is | ||
1042 | `char *' (i.e., signed), we declare these variables as unsigned, so | ||
1043 | they can be reliably used as array indices. */ | ||
1044 | register unsigned char c, c1; | ||
1045 | |||
1046 | /* A random tempory spot in PATTERN. */ | ||
1047 | const char *p1; | ||
1048 | |||
1049 | /* Points to the end of the buffer, where we should append. */ | ||
1050 | register unsigned char *b; | ||
1051 | |||
1052 | /* Keeps track of unclosed groups. */ | ||
1053 | compile_stack_type compile_stack; | ||
1054 | |||
1055 | /* Points to the current (ending) position in the pattern. */ | ||
1056 | const char *p = pattern; | ||
1057 | const char *pend = pattern + size; | ||
1058 | |||
1059 | /* How to translate the characters in the pattern. */ | ||
1060 | char *translate = bufp->translate; | ||
1061 | |||
1062 | /* Address of the count-byte of the most recently inserted `exactn' | ||
1063 | command. This makes it possible to tell if a new exact-match | ||
1064 | character can be added to that command or if the character requires | ||
1065 | a new `exactn' command. */ | ||
1066 | unsigned char *pending_exact = 0; | ||
1067 | |||
1068 | /* Address of start of the most recently finished expression. | ||
1069 | This tells, e.g., postfix * where to find the start of its | ||
1070 | operand. Reset at the beginning of groups and alternatives. */ | ||
1071 | unsigned char *laststart = 0; | ||
1072 | |||
1073 | /* Address of beginning of regexp, or inside of last group. */ | ||
1074 | unsigned char *begalt; | ||
1075 | |||
1076 | /* Place in the uncompiled pattern (i.e., the {) to | ||
1077 | which to go back if the interval is invalid. */ | ||
1078 | const char *beg_interval; | ||
1079 | |||
1080 | /* Address of the place where a forward jump should go to the end of | ||
1081 | the containing expression. Each alternative of an `or' -- except the | ||
1082 | last -- ends with a forward jump of this sort. */ | ||
1083 | unsigned char *fixup_alt_jump = 0; | ||
1084 | |||
1085 | /* Counts open-groups as they are encountered. Remembered for the | ||
1086 | matching close-group on the compile stack, so the same register | ||
1087 | number is put in the stop_memory as the start_memory. */ | ||
1088 | regnum_t regnum = 0; | ||
1089 | |||
1090 | #ifdef DEBUG | ||
1091 | DEBUG_PRINT1 ("\nCompiling pattern: "); | ||
1092 | if (debug) | ||
1093 | { | ||
1094 | unsigned debug_count; | ||
1095 | |||
1096 | for (debug_count = 0; debug_count < size; debug_count++) | ||
1097 | printchar (pattern[debug_count]); | ||
1098 | putchar ('\n'); | ||
1099 | } | ||
1100 | #endif /* DEBUG */ | ||
1101 | |||
1102 | /* Initialize the compile stack. */ | ||
1103 | compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); | ||
1104 | if (compile_stack.stack == NULL) | ||
1105 | return REG_ESPACE; | ||
1106 | |||
1107 | compile_stack.size = INIT_COMPILE_STACK_SIZE; | ||
1108 | compile_stack.avail = 0; | ||
1109 | |||
1110 | /* Initialize the pattern buffer. */ | ||
1111 | bufp->syntax = syntax; | ||
1112 | bufp->fastmap_accurate = 0; | ||
1113 | bufp->not_bol = bufp->not_eol = 0; | ||
1114 | |||
1115 | /* Set `used' to zero, so that if we return an error, the pattern | ||
1116 | printer (for debugging) will think there's no pattern. We reset it | ||
1117 | at the end. */ | ||
1118 | bufp->used = 0; | ||
1119 | |||
1120 | /* Always count groups, whether or not bufp->no_sub is set. */ | ||
1121 | bufp->re_nsub = 0; | ||
1122 | |||
1123 | #if !defined (emacs) && !defined (SYNTAX_TABLE) | ||
1124 | /* Initialize the syntax table. */ | ||
1125 | init_syntax_once (); | ||
1126 | #endif | ||
1127 | |||
1128 | if (bufp->allocated == 0) | ||
1129 | { | ||
1130 | if (bufp->buffer) | ||
1131 | { /* If zero allocated, but buffer is non-null, try to realloc | ||
1132 | enough space. This loses if buffer's address is bogus, but | ||
1133 | that is the user's responsibility. */ | ||
1134 | RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); | ||
1135 | } | ||
1136 | else | ||
1137 | { /* Caller did not allocate a buffer. Do it for them. */ | ||
1138 | bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); | ||
1139 | } | ||
1140 | if (!bufp->buffer) return REG_ESPACE; | ||
1141 | |||
1142 | bufp->allocated = INIT_BUF_SIZE; | ||
1143 | } | ||
1144 | |||
1145 | begalt = b = bufp->buffer; | ||
1146 | |||
1147 | /* Loop through the uncompiled pattern until we're at the end. */ | ||
1148 | while (p != pend) | ||
1149 | { | ||
1150 | PATFETCH (c); | ||
1151 | |||
1152 | switch (c) | ||
1153 | { | ||
1154 | case '^': | ||
1155 | { | ||
1156 | if ( /* If at start of pattern, it's an operator. */ | ||
1157 | p == pattern + 1 | ||
1158 | /* If context independent, it's an operator. */ | ||
1159 | || syntax & RE_CONTEXT_INDEP_ANCHORS | ||
1160 | /* Otherwise, depends on what's come before. */ | ||
1161 | || at_begline_loc_p (pattern, p, syntax)) | ||
1162 | BUF_PUSH (begline); | ||
1163 | else | ||
1164 | goto normal_char; | ||
1165 | } | ||
1166 | break; | ||
1167 | |||
1168 | |||
1169 | case '$': | ||
1170 | { | ||
1171 | if ( /* If at end of pattern, it's an operator. */ | ||
1172 | p == pend | ||
1173 | /* If context independent, it's an operator. */ | ||
1174 | || syntax & RE_CONTEXT_INDEP_ANCHORS | ||
1175 | /* Otherwise, depends on what's next. */ | ||
1176 | || at_endline_loc_p (p, pend, syntax)) | ||
1177 | BUF_PUSH (endline); | ||
1178 | else | ||
1179 | goto normal_char; | ||
1180 | } | ||
1181 | break; | ||
1182 | |||
1183 | |||
1184 | case '+': | ||
1185 | case '?': | ||
1186 | if ((syntax & RE_BK_PLUS_QM) | ||
1187 | || (syntax & RE_LIMITED_OPS)) | ||
1188 | goto normal_char; | ||
1189 | handle_plus: | ||
1190 | case '*': | ||
1191 | /* If there is no previous pattern... */ | ||
1192 | if (!laststart) | ||
1193 | { | ||
1194 | if (syntax & RE_CONTEXT_INVALID_OPS) | ||
1195 | return REG_BADRPT; | ||
1196 | else if (!(syntax & RE_CONTEXT_INDEP_OPS)) | ||
1197 | goto normal_char; | ||
1198 | } | ||
1199 | |||
1200 | { | ||
1201 | /* Are we optimizing this jump? */ | ||
1202 | boolean keep_string_p = false; | ||
1203 | |||
1204 | /* 1 means zero (many) matches is allowed. */ | ||
1205 | char zero_times_ok = 0, many_times_ok = 0; | ||
1206 | |||
1207 | /* If there is a sequence of repetition chars, collapse it | ||
1208 | down to just one (the right one). We can't combine | ||
1209 | interval operators with these because of, e.g., `a{2}*', | ||
1210 | which should only match an even number of `a's. */ | ||
1211 | |||
1212 | for (;;) | ||
1213 | { | ||
1214 | zero_times_ok |= c != '+'; | ||
1215 | many_times_ok |= c != '?'; | ||
1216 | |||
1217 | if (p == pend) | ||
1218 | break; | ||
1219 | |||
1220 | PATFETCH (c); | ||
1221 | |||
1222 | if (c == '*' | ||
1223 | || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) | ||
1224 | ; | ||
1225 | |||
1226 | else if (syntax & RE_BK_PLUS_QM && c == '\\') | ||
1227 | { | ||
1228 | if (p == pend) return REG_EESCAPE; | ||
1229 | |||
1230 | PATFETCH (c1); | ||
1231 | if (!(c1 == '+' || c1 == '?')) | ||
1232 | { | ||
1233 | PATUNFETCH; | ||
1234 | PATUNFETCH; | ||
1235 | break; | ||
1236 | } | ||
1237 | |||
1238 | c = c1; | ||
1239 | } | ||
1240 | else | ||
1241 | { | ||
1242 | PATUNFETCH; | ||
1243 | break; | ||
1244 | } | ||
1245 | |||
1246 | /* If we get here, we found another repeat character. */ | ||
1247 | } | ||
1248 | |||
1249 | /* Star, etc. applied to an empty pattern is equivalent | ||
1250 | to an empty pattern. */ | ||
1251 | if (!laststart) | ||
1252 | break; | ||
1253 | |||
1254 | /* Now we know whether or not zero matches is allowed | ||
1255 | and also whether or not two or more matches is allowed. */ | ||
1256 | if (many_times_ok) | ||
1257 | { /* More than one repetition is allowed, so put in at the | ||
1258 | end a backward relative jump from `b' to before the next | ||
1259 | jump we're going to put in below (which jumps from | ||
1260 | laststart to after this jump). | ||
1261 | |||
1262 | But if we are at the `*' in the exact sequence `.*\n', | ||
1263 | insert an unconditional jump backwards to the ., | ||
1264 | instead of the beginning of the loop. This way we only | ||
1265 | push a failure point once, instead of every time | ||
1266 | through the loop. */ | ||
1267 | assert (p - 1 > pattern); | ||
1268 | |||
1269 | /* Allocate the space for the jump. */ | ||
1270 | GET_BUFFER_SPACE (3); | ||
1271 | |||
1272 | /* We know we are not at the first character of the pattern, | ||
1273 | because laststart was nonzero. And we've already | ||
1274 | incremented `p', by the way, to be the character after | ||
1275 | the `*'. Do we have to do something analogous here | ||
1276 | for null bytes, because of RE_DOT_NOT_NULL? */ | ||
1277 | if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') | ||
1278 | && zero_times_ok | ||
1279 | && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') | ||
1280 | && !(syntax & RE_DOT_NEWLINE)) | ||
1281 | { /* We have .*\n. */ | ||
1282 | STORE_JUMP (jump, b, laststart); | ||
1283 | keep_string_p = true; | ||
1284 | } | ||
1285 | else | ||
1286 | /* Anything else. */ | ||
1287 | STORE_JUMP (maybe_pop_jump, b, laststart - 3); | ||
1288 | |||
1289 | /* We've added more stuff to the buffer. */ | ||
1290 | b += 3; | ||
1291 | } | ||
1292 | |||
1293 | /* On failure, jump from laststart to b + 3, which will be the | ||
1294 | end of the buffer after this jump is inserted. */ | ||
1295 | GET_BUFFER_SPACE (3); | ||
1296 | INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump | ||
1297 | : on_failure_jump, | ||
1298 | laststart, b + 3); | ||
1299 | pending_exact = 0; | ||
1300 | b += 3; | ||
1301 | |||
1302 | if (!zero_times_ok) | ||
1303 | { | ||
1304 | /* At least one repetition is required, so insert a | ||
1305 | `dummy_failure_jump' before the initial | ||
1306 | `on_failure_jump' instruction of the loop. This | ||
1307 | effects a skip over that instruction the first time | ||
1308 | we hit that loop. */ | ||
1309 | GET_BUFFER_SPACE (3); | ||
1310 | INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); | ||
1311 | b += 3; | ||
1312 | } | ||
1313 | } | ||
1314 | break; | ||
1315 | |||
1316 | |||
1317 | case '.': | ||
1318 | laststart = b; | ||
1319 | BUF_PUSH (anychar); | ||
1320 | break; | ||
1321 | |||
1322 | |||
1323 | case '[': | ||
1324 | { | ||
1325 | boolean had_char_class = false; | ||
1326 | |||
1327 | if (p == pend) return REG_EBRACK; | ||
1328 | |||
1329 | /* Ensure that we have enough space to push a charset: the | ||
1330 | opcode, the length count, and the bitset; 34 bytes in all. */ | ||
1331 | GET_BUFFER_SPACE (34); | ||
1332 | |||
1333 | laststart = b; | ||
1334 | |||
1335 | /* We test `*p == '^' twice, instead of using an if | ||
1336 | statement, so we only need one BUF_PUSH. */ | ||
1337 | BUF_PUSH (*p == '^' ? charset_not : charset); | ||
1338 | if (*p == '^') | ||
1339 | p++; | ||
1340 | |||
1341 | /* Remember the first position in the bracket expression. */ | ||
1342 | p1 = p; | ||
1343 | |||
1344 | /* Push the number of bytes in the bitmap. */ | ||
1345 | BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); | ||
1346 | |||
1347 | /* Clear the whole map. */ | ||
1348 | bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); | ||
1349 | |||
1350 | /* charset_not matches newline according to a syntax bit. */ | ||
1351 | if ((re_opcode_t) b[-2] == charset_not | ||
1352 | && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) | ||
1353 | SET_LIST_BIT ('\n'); | ||
1354 | |||
1355 | /* Read in characters and ranges, setting map bits. */ | ||
1356 | for (;;) | ||
1357 | { | ||
1358 | if (p == pend) return REG_EBRACK; | ||
1359 | |||
1360 | PATFETCH (c); | ||
1361 | |||
1362 | /* \ might escape characters inside [...] and [^...]. */ | ||
1363 | if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') | ||
1364 | { | ||
1365 | if (p == pend) return REG_EESCAPE; | ||
1366 | |||
1367 | PATFETCH (c1); | ||
1368 | SET_LIST_BIT (c1); | ||
1369 | continue; | ||
1370 | } | ||
1371 | |||
1372 | /* Could be the end of the bracket expression. If it's | ||
1373 | not (i.e., when the bracket expression is `[]' so | ||
1374 | far), the ']' character bit gets set way below. */ | ||
1375 | if (c == ']' && p != p1 + 1) | ||
1376 | break; | ||
1377 | |||
1378 | /* Look ahead to see if it's a range when the last thing | ||
1379 | was a character class. */ | ||
1380 | if (had_char_class && c == '-' && *p != ']') | ||
1381 | return REG_ERANGE; | ||
1382 | |||
1383 | /* Look ahead to see if it's a range when the last thing | ||
1384 | was a character: if this is a hyphen not at the | ||
1385 | beginning or the end of a list, then it's the range | ||
1386 | operator. */ | ||
1387 | if (c == '-' | ||
1388 | && !(p - 2 >= pattern && p[-2] == '[') | ||
1389 | && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') | ||
1390 | && *p != ']') | ||
1391 | { | ||
1392 | reg_errcode_t ret | ||
1393 | = compile_range (&p, pend, translate, syntax, b); | ||
1394 | if (ret != REG_NOERROR) return ret; | ||
1395 | } | ||
1396 | |||
1397 | else if (p[0] == '-' && p[1] != ']') | ||
1398 | { /* This handles ranges made up of characters only. */ | ||
1399 | reg_errcode_t ret; | ||
1400 | |||
1401 | /* Move past the `-'. */ | ||
1402 | PATFETCH (c1); | ||
1403 | |||
1404 | ret = compile_range (&p, pend, translate, syntax, b); | ||
1405 | if (ret != REG_NOERROR) return ret; | ||
1406 | } | ||
1407 | |||
1408 | /* See if we're at the beginning of a possible character | ||
1409 | class. */ | ||
1410 | |||
1411 | else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') | ||
1412 | { /* Leave room for the null. */ | ||
1413 | char str[CHAR_CLASS_MAX_LENGTH + 1]; | ||
1414 | |||
1415 | PATFETCH (c); | ||
1416 | c1 = 0; | ||
1417 | |||
1418 | /* If pattern is `[[:'. */ | ||
1419 | if (p == pend) return REG_EBRACK; | ||
1420 | |||
1421 | for (;;) | ||
1422 | { | ||
1423 | PATFETCH (c); | ||
1424 | if (c == ':' || c == ']' || p == pend | ||
1425 | || c1 == CHAR_CLASS_MAX_LENGTH) | ||
1426 | break; | ||
1427 | str[c1++] = c; | ||
1428 | } | ||
1429 | str[c1] = '\0'; | ||
1430 | |||
1431 | /* If isn't a word bracketed by `[:' and:`]': | ||
1432 | undo the ending character, the letters, and leave | ||
1433 | the leading `:' and `[' (but set bits for them). */ | ||
1434 | if (c == ':' && *p == ']') | ||
1435 | { | ||
1436 | int ch; | ||
1437 | boolean is_alnum = STREQ (str, "alnum"); | ||
1438 | boolean is_alpha = STREQ (str, "alpha"); | ||
1439 | boolean is_blank = STREQ (str, "blank"); | ||
1440 | boolean is_cntrl = STREQ (str, "cntrl"); | ||
1441 | boolean is_digit = STREQ (str, "digit"); | ||
1442 | boolean is_graph = STREQ (str, "graph"); | ||
1443 | boolean is_lower = STREQ (str, "lower"); | ||
1444 | boolean is_print = STREQ (str, "print"); | ||
1445 | boolean is_punct = STREQ (str, "punct"); | ||
1446 | boolean is_space = STREQ (str, "space"); | ||
1447 | boolean is_upper = STREQ (str, "upper"); | ||
1448 | boolean is_xdigit = STREQ (str, "xdigit"); | ||
1449 | |||
1450 | if (!IS_CHAR_CLASS (str)) return REG_ECTYPE; | ||
1451 | |||
1452 | /* Throw away the ] at the end of the character | ||
1453 | class. */ | ||
1454 | PATFETCH (c); | ||
1455 | |||
1456 | if (p == pend) return REG_EBRACK; | ||
1457 | |||
1458 | for (ch = 0; ch < 1 << BYTEWIDTH; ch++) | ||
1459 | { | ||
1460 | if ( (is_alnum && ISALNUM (ch)) | ||
1461 | || (is_alpha && ISALPHA (ch)) | ||
1462 | || (is_blank && ISBLANK (ch)) | ||
1463 | || (is_cntrl && ISCNTRL (ch)) | ||
1464 | || (is_digit && ISDIGIT (ch)) | ||
1465 | || (is_graph && ISGRAPH (ch)) | ||
1466 | || (is_lower && ISLOWER (ch)) | ||
1467 | || (is_print && ISPRINT (ch)) | ||
1468 | || (is_punct && ISPUNCT (ch)) | ||
1469 | || (is_space && ISSPACE (ch)) | ||
1470 | || (is_upper && ISUPPER (ch)) | ||
1471 | || (is_xdigit && ISXDIGIT (ch))) | ||
1472 | SET_LIST_BIT (ch); | ||
1473 | } | ||
1474 | had_char_class = true; | ||
1475 | } | ||
1476 | else | ||
1477 | { | ||
1478 | c1++; | ||
1479 | while (c1--) | ||
1480 | PATUNFETCH; | ||
1481 | SET_LIST_BIT ('['); | ||
1482 | SET_LIST_BIT (':'); | ||
1483 | had_char_class = false; | ||
1484 | } | ||
1485 | } | ||
1486 | else | ||
1487 | { | ||
1488 | had_char_class = false; | ||
1489 | SET_LIST_BIT (c); | ||
1490 | } | ||
1491 | } | ||
1492 | |||
1493 | /* Discard any (non)matching list bytes that are all 0 at the | ||
1494 | end of the map. Decrease the map-length byte too. */ | ||
1495 | while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) | ||
1496 | b[-1]--; | ||
1497 | b += b[-1]; | ||
1498 | } | ||
1499 | break; | ||
1500 | |||
1501 | |||
1502 | case '(': | ||
1503 | if (syntax & RE_NO_BK_PARENS) | ||
1504 | goto handle_open; | ||
1505 | else | ||
1506 | goto normal_char; | ||
1507 | |||
1508 | |||
1509 | case ')': | ||
1510 | if (syntax & RE_NO_BK_PARENS) | ||
1511 | goto handle_close; | ||
1512 | else | ||
1513 | goto normal_char; | ||
1514 | |||
1515 | |||
1516 | case '\n': | ||
1517 | if (syntax & RE_NEWLINE_ALT) | ||
1518 | goto handle_alt; | ||
1519 | else | ||
1520 | goto normal_char; | ||
1521 | |||
1522 | |||
1523 | case '|': | ||
1524 | if (syntax & RE_NO_BK_VBAR) | ||
1525 | goto handle_alt; | ||
1526 | else | ||
1527 | goto normal_char; | ||
1528 | |||
1529 | |||
1530 | case '{': | ||
1531 | if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) | ||
1532 | goto handle_interval; | ||
1533 | else | ||
1534 | goto normal_char; | ||
1535 | |||
1536 | |||
1537 | case '\\': | ||
1538 | if (p == pend) return REG_EESCAPE; | ||
1539 | |||
1540 | /* Do not translate the character after the \, so that we can | ||
1541 | distinguish, e.g., \B from \b, even if we normally would | ||
1542 | translate, e.g., B to b. */ | ||
1543 | PATFETCH_RAW (c); | ||
1544 | |||
1545 | switch (c) | ||
1546 | { | ||
1547 | case '(': | ||
1548 | if (syntax & RE_NO_BK_PARENS) | ||
1549 | goto normal_backslash; | ||
1550 | |||
1551 | handle_open: | ||
1552 | bufp->re_nsub++; | ||
1553 | regnum++; | ||
1554 | |||
1555 | if (COMPILE_STACK_FULL) | ||
1556 | { | ||
1557 | RETALLOC (compile_stack.stack, compile_stack.size << 1, | ||
1558 | compile_stack_elt_t); | ||
1559 | if (compile_stack.stack == NULL) return REG_ESPACE; | ||
1560 | |||
1561 | compile_stack.size <<= 1; | ||
1562 | } | ||
1563 | |||
1564 | /* These are the values to restore when we hit end of this | ||
1565 | group. They are all relative offsets, so that if the | ||
1566 | whole pattern moves because of realloc, they will still | ||
1567 | be valid. */ | ||
1568 | COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; | ||
1569 | COMPILE_STACK_TOP.fixup_alt_jump | ||
1570 | = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; | ||
1571 | COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; | ||
1572 | COMPILE_STACK_TOP.regnum = regnum; | ||
1573 | |||
1574 | /* We will eventually replace the 0 with the number of | ||
1575 | groups inner to this one. But do not push a | ||
1576 | start_memory for groups beyond the last one we can | ||
1577 | represent in the compiled pattern. */ | ||
1578 | if (regnum <= MAX_REGNUM) | ||
1579 | { | ||
1580 | COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; | ||
1581 | BUF_PUSH_3 (start_memory, regnum, 0); | ||
1582 | } | ||
1583 | |||
1584 | compile_stack.avail++; | ||
1585 | |||
1586 | fixup_alt_jump = 0; | ||
1587 | laststart = 0; | ||
1588 | begalt = b; | ||
1589 | /* If we've reached MAX_REGNUM groups, then this open | ||
1590 | won't actually generate any code, so we'll have to | ||
1591 | clear pending_exact explicitly. */ | ||
1592 | pending_exact = 0; | ||
1593 | break; | ||
1594 | |||
1595 | |||
1596 | case ')': | ||
1597 | if (syntax & RE_NO_BK_PARENS) goto normal_backslash; | ||
1598 | |||
1599 | if (COMPILE_STACK_EMPTY) | ||
1600 | { | ||
1601 | if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) | ||
1602 | goto normal_backslash; | ||
1603 | else | ||
1604 | return REG_ERPAREN; | ||
1605 | } | ||
1606 | |||
1607 | handle_close: | ||
1608 | if (fixup_alt_jump) | ||
1609 | { /* Push a dummy failure point at the end of the | ||
1610 | alternative for a possible future | ||
1611 | `pop_failure_jump' to pop. See comments at | ||
1612 | `push_dummy_failure' in `re_match_2'. */ | ||
1613 | BUF_PUSH (push_dummy_failure); | ||
1614 | |||
1615 | /* We allocated space for this jump when we assigned | ||
1616 | to `fixup_alt_jump', in the `handle_alt' case below. */ | ||
1617 | STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); | ||
1618 | } | ||
1619 | |||
1620 | /* See similar code for backslashed left paren above. */ | ||
1621 | if (COMPILE_STACK_EMPTY) | ||
1622 | { | ||
1623 | if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) | ||
1624 | goto normal_char; | ||
1625 | else | ||
1626 | return REG_ERPAREN; | ||
1627 | } | ||
1628 | |||
1629 | /* Since we just checked for an empty stack above, this | ||
1630 | ``can't happen''. */ | ||
1631 | assert (compile_stack.avail != 0); | ||
1632 | { | ||
1633 | /* We don't just want to restore into `regnum', because | ||
1634 | later groups should continue to be numbered higher, | ||
1635 | as in `(ab)c(de)' -- the second group is #2. */ | ||
1636 | regnum_t this_group_regnum; | ||
1637 | |||
1638 | compile_stack.avail--; | ||
1639 | begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; | ||
1640 | fixup_alt_jump | ||
1641 | = COMPILE_STACK_TOP.fixup_alt_jump | ||
1642 | ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 | ||
1643 | : 0; | ||
1644 | laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; | ||
1645 | this_group_regnum = COMPILE_STACK_TOP.regnum; | ||
1646 | /* If we've reached MAX_REGNUM groups, then this open | ||
1647 | won't actually generate any code, so we'll have to | ||
1648 | clear pending_exact explicitly. */ | ||
1649 | pending_exact = 0; | ||
1650 | |||
1651 | /* We're at the end of the group, so now we know how many | ||
1652 | groups were inside this one. */ | ||
1653 | if (this_group_regnum <= MAX_REGNUM) | ||
1654 | { | ||
1655 | unsigned char *inner_group_loc | ||
1656 | = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; | ||
1657 | |||
1658 | *inner_group_loc = regnum - this_group_regnum; | ||
1659 | BUF_PUSH_3 (stop_memory, this_group_regnum, | ||
1660 | regnum - this_group_regnum); | ||
1661 | } | ||
1662 | } | ||
1663 | break; | ||
1664 | |||
1665 | |||
1666 | case '|': /* `\|'. */ | ||
1667 | if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) | ||
1668 | goto normal_backslash; | ||
1669 | handle_alt: | ||
1670 | if (syntax & RE_LIMITED_OPS) | ||
1671 | goto normal_char; | ||
1672 | |||
1673 | /* Insert before the previous alternative a jump which | ||
1674 | jumps to this alternative if the former fails. */ | ||
1675 | GET_BUFFER_SPACE (3); | ||
1676 | INSERT_JUMP (on_failure_jump, begalt, b + 6); | ||
1677 | pending_exact = 0; | ||
1678 | b += 3; | ||
1679 | |||
1680 | /* The alternative before this one has a jump after it | ||
1681 | which gets executed if it gets matched. Adjust that | ||
1682 | jump so it will jump to this alternative's analogous | ||
1683 | jump (put in below, which in turn will jump to the next | ||
1684 | (if any) alternative's such jump, etc.). The last such | ||
1685 | jump jumps to the correct final destination. A picture: | ||
1686 | _____ _____ | ||
1687 | | | | | | ||
1688 | | v | v | ||
1689 | a | b | c | ||
1690 | |||
1691 | If we are at `b', then fixup_alt_jump right now points to a | ||
1692 | three-byte space after `a'. We'll put in the jump, set | ||
1693 | fixup_alt_jump to right after `b', and leave behind three | ||
1694 | bytes which we'll fill in when we get to after `c'. */ | ||
1695 | |||
1696 | if (fixup_alt_jump) | ||
1697 | STORE_JUMP (jump_past_alt, fixup_alt_jump, b); | ||
1698 | |||
1699 | /* Mark and leave space for a jump after this alternative, | ||
1700 | to be filled in later either by next alternative or | ||
1701 | when know we're at the end of a series of alternatives. */ | ||
1702 | fixup_alt_jump = b; | ||
1703 | GET_BUFFER_SPACE (3); | ||
1704 | b += 3; | ||
1705 | |||
1706 | laststart = 0; | ||
1707 | begalt = b; | ||
1708 | break; | ||
1709 | |||
1710 | |||
1711 | case '{': | ||
1712 | /* If \{ is a literal. */ | ||
1713 | if (!(syntax & RE_INTERVALS) | ||
1714 | /* If we're at `\{' and it's not the open-interval | ||
1715 | operator. */ | ||
1716 | || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) | ||
1717 | || (p - 2 == pattern && p == pend)) | ||
1718 | goto normal_backslash; | ||
1719 | |||
1720 | handle_interval: | ||
1721 | { | ||
1722 | /* If got here, then the syntax allows intervals. */ | ||
1723 | |||
1724 | /* At least (most) this many matches must be made. */ | ||
1725 | int lower_bound = -1, upper_bound = -1; | ||
1726 | |||
1727 | beg_interval = p - 1; | ||
1728 | |||
1729 | if (p == pend) | ||
1730 | { | ||
1731 | if (syntax & RE_NO_BK_BRACES) | ||
1732 | goto unfetch_interval; | ||
1733 | else | ||
1734 | return REG_EBRACE; | ||
1735 | } | ||
1736 | |||
1737 | GET_UNSIGNED_NUMBER (lower_bound); | ||
1738 | |||
1739 | if (c == ',') | ||
1740 | { | ||
1741 | GET_UNSIGNED_NUMBER (upper_bound); | ||
1742 | if (upper_bound < 0) upper_bound = RE_DUP_MAX; | ||
1743 | } | ||
1744 | else | ||
1745 | /* Interval such as `{1}' => match exactly once. */ | ||
1746 | upper_bound = lower_bound; | ||
1747 | |||
1748 | if (lower_bound < 0 || upper_bound > RE_DUP_MAX | ||
1749 | || lower_bound > upper_bound) | ||
1750 | { | ||
1751 | if (syntax & RE_NO_BK_BRACES) | ||
1752 | goto unfetch_interval; | ||
1753 | else | ||
1754 | return REG_BADBR; | ||
1755 | } | ||
1756 | |||
1757 | if (!(syntax & RE_NO_BK_BRACES)) | ||
1758 | { | ||
1759 | if (c != '\\') return REG_EBRACE; | ||
1760 | |||
1761 | PATFETCH (c); | ||
1762 | } | ||
1763 | |||
1764 | if (c != '}') | ||
1765 | { | ||
1766 | if (syntax & RE_NO_BK_BRACES) | ||
1767 | goto unfetch_interval; | ||
1768 | else | ||
1769 | return REG_BADBR; | ||
1770 | } | ||
1771 | |||
1772 | /* We just parsed a valid interval. */ | ||
1773 | |||
1774 | /* If it's invalid to have no preceding re. */ | ||
1775 | if (!laststart) | ||
1776 | { | ||
1777 | if (syntax & RE_CONTEXT_INVALID_OPS) | ||
1778 | return REG_BADRPT; | ||
1779 | else if (syntax & RE_CONTEXT_INDEP_OPS) | ||
1780 | laststart = b; | ||
1781 | else | ||
1782 | goto unfetch_interval; | ||
1783 | } | ||
1784 | |||
1785 | /* If the upper bound is zero, don't want to succeed at | ||
1786 | all; jump from `laststart' to `b + 3', which will be | ||
1787 | the end of the buffer after we insert the jump. */ | ||
1788 | if (upper_bound == 0) | ||
1789 | { | ||
1790 | GET_BUFFER_SPACE (3); | ||
1791 | INSERT_JUMP (jump, laststart, b + 3); | ||
1792 | b += 3; | ||
1793 | } | ||
1794 | |||
1795 | /* Otherwise, we have a nontrivial interval. When | ||
1796 | we're all done, the pattern will look like: | ||
1797 | set_number_at <jump count> <upper bound> | ||
1798 | set_number_at <succeed_n count> <lower bound> | ||
1799 | succeed_n <after jump addr> <succed_n count> | ||
1800 | <body of loop> | ||
1801 | jump_n <succeed_n addr> <jump count> | ||
1802 | (The upper bound and `jump_n' are omitted if | ||
1803 | `upper_bound' is 1, though.) */ | ||
1804 | else | ||
1805 | { /* If the upper bound is > 1, we need to insert | ||
1806 | more at the end of the loop. */ | ||
1807 | unsigned nbytes = 10 + (upper_bound > 1) * 10; | ||
1808 | |||
1809 | GET_BUFFER_SPACE (nbytes); | ||
1810 | |||
1811 | /* Initialize lower bound of the `succeed_n', even | ||
1812 | though it will be set during matching by its | ||
1813 | attendant `set_number_at' (inserted next), | ||
1814 | because `re_compile_fastmap' needs to know. | ||
1815 | Jump to the `jump_n' we might insert below. */ | ||
1816 | INSERT_JUMP2 (succeed_n, laststart, | ||
1817 | b + 5 + (upper_bound > 1) * 5, | ||
1818 | lower_bound); | ||
1819 | b += 5; | ||
1820 | |||
1821 | /* Code to initialize the lower bound. Insert | ||
1822 | before the `succeed_n'. The `5' is the last two | ||
1823 | bytes of this `set_number_at', plus 3 bytes of | ||
1824 | the following `succeed_n'. */ | ||
1825 | insert_op2 (set_number_at, laststart, 5, lower_bound, b); | ||
1826 | b += 5; | ||
1827 | |||
1828 | if (upper_bound > 1) | ||
1829 | { /* More than one repetition is allowed, so | ||
1830 | append a backward jump to the `succeed_n' | ||
1831 | that starts this interval. | ||
1832 | |||
1833 | When we've reached this during matching, | ||
1834 | we'll have matched the interval once, so | ||
1835 | jump back only `upper_bound - 1' times. */ | ||
1836 | STORE_JUMP2 (jump_n, b, laststart + 5, | ||
1837 | upper_bound - 1); | ||
1838 | b += 5; | ||
1839 | |||
1840 | /* The location we want to set is the second | ||
1841 | parameter of the `jump_n'; that is `b-2' as | ||
1842 | an absolute address. `laststart' will be | ||
1843 | the `set_number_at' we're about to insert; | ||
1844 | `laststart+3' the number to set, the source | ||
1845 | for the relative address. But we are | ||
1846 | inserting into the middle of the pattern -- | ||
1847 | so everything is getting moved up by 5. | ||
1848 | Conclusion: (b - 2) - (laststart + 3) + 5, | ||
1849 | i.e., b - laststart. | ||
1850 | |||
1851 | We insert this at the beginning of the loop | ||
1852 | so that if we fail during matching, we'll | ||
1853 | reinitialize the bounds. */ | ||
1854 | insert_op2 (set_number_at, laststart, b - laststart, | ||
1855 | upper_bound - 1, b); | ||
1856 | b += 5; | ||
1857 | } | ||
1858 | } | ||
1859 | pending_exact = 0; | ||
1860 | beg_interval = NULL; | ||
1861 | } | ||
1862 | break; | ||
1863 | |||
1864 | unfetch_interval: | ||
1865 | /* If an invalid interval, match the characters as literals. */ | ||
1866 | assert (beg_interval); | ||
1867 | p = beg_interval; | ||
1868 | beg_interval = NULL; | ||
1869 | |||
1870 | /* normal_char and normal_backslash need `c'. */ | ||
1871 | PATFETCH (c); | ||
1872 | |||
1873 | if (!(syntax & RE_NO_BK_BRACES)) | ||
1874 | { | ||
1875 | if (p > pattern && p[-1] == '\\') | ||
1876 | goto normal_backslash; | ||
1877 | } | ||
1878 | goto normal_char; | ||
1879 | |||
1880 | #ifdef emacs | ||
1881 | /* There is no way to specify the before_dot and after_dot | ||
1882 | operators. rms says this is ok. --karl */ | ||
1883 | case '=': | ||
1884 | BUF_PUSH (at_dot); | ||
1885 | break; | ||
1886 | |||
1887 | case 's': | ||
1888 | laststart = b; | ||
1889 | PATFETCH (c); | ||
1890 | BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); | ||
1891 | break; | ||
1892 | |||
1893 | case 'S': | ||
1894 | laststart = b; | ||
1895 | PATFETCH (c); | ||
1896 | BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); | ||
1897 | break; | ||
1898 | #endif /* emacs */ | ||
1899 | |||
1900 | |||
1901 | case 'w': | ||
1902 | laststart = b; | ||
1903 | BUF_PUSH (wordchar); | ||
1904 | break; | ||
1905 | |||
1906 | |||
1907 | case 'W': | ||
1908 | laststart = b; | ||
1909 | BUF_PUSH (notwordchar); | ||
1910 | break; | ||
1911 | |||
1912 | |||
1913 | case '<': | ||
1914 | BUF_PUSH (wordbeg); | ||
1915 | break; | ||
1916 | |||
1917 | case '>': | ||
1918 | BUF_PUSH (wordend); | ||
1919 | break; | ||
1920 | |||
1921 | case 'b': | ||
1922 | BUF_PUSH (wordbound); | ||
1923 | break; | ||
1924 | |||
1925 | case 'B': | ||
1926 | BUF_PUSH (notwordbound); | ||
1927 | break; | ||
1928 | |||
1929 | case '`': | ||
1930 | BUF_PUSH (begbuf); | ||
1931 | break; | ||
1932 | |||
1933 | case '\'': | ||
1934 | BUF_PUSH (endbuf); | ||
1935 | break; | ||
1936 | |||
1937 | case '1': case '2': case '3': case '4': case '5': | ||
1938 | case '6': case '7': case '8': case '9': | ||
1939 | if (syntax & RE_NO_BK_REFS) | ||
1940 | goto normal_char; | ||
1941 | |||
1942 | c1 = c - '0'; | ||
1943 | |||
1944 | if (c1 > regnum) | ||
1945 | return REG_ESUBREG; | ||
1946 | |||
1947 | /* Can't back reference to a subexpression if inside of it. */ | ||
1948 | if (group_in_compile_stack (compile_stack, c1)) | ||
1949 | goto normal_char; | ||
1950 | |||
1951 | laststart = b; | ||
1952 | BUF_PUSH_2 (duplicate, c1); | ||
1953 | break; | ||
1954 | |||
1955 | |||
1956 | case '+': | ||
1957 | case '?': | ||
1958 | if (syntax & RE_BK_PLUS_QM) | ||
1959 | goto handle_plus; | ||
1960 | else | ||
1961 | goto normal_backslash; | ||
1962 | |||
1963 | default: | ||
1964 | normal_backslash: | ||
1965 | /* You might think it would be useful for \ to mean | ||
1966 | not to translate; but if we don't translate it | ||
1967 | it will never match anything. */ | ||
1968 | c = TRANSLATE (c); | ||
1969 | goto normal_char; | ||
1970 | } | ||
1971 | break; | ||
1972 | |||
1973 | |||
1974 | default: | ||
1975 | /* Expects the character in `c'. */ | ||
1976 | normal_char: | ||
1977 | /* If no exactn currently being built. */ | ||
1978 | if (!pending_exact | ||
1979 | |||
1980 | /* If last exactn not at current position. */ | ||
1981 | || pending_exact + *pending_exact + 1 != b | ||
1982 | |||
1983 | /* We have only one byte following the exactn for the count. */ | ||
1984 | || *pending_exact == (1 << BYTEWIDTH) - 1 | ||
1985 | |||
1986 | /* If followed by a repetition operator. */ | ||
1987 | || *p == '*' || *p == '^' | ||
1988 | || ((syntax & RE_BK_PLUS_QM) | ||
1989 | ? *p == '\\' && (p[1] == '+' || p[1] == '?') | ||
1990 | : (*p == '+' || *p == '?')) | ||
1991 | || ((syntax & RE_INTERVALS) | ||
1992 | && ((syntax & RE_NO_BK_BRACES) | ||
1993 | ? *p == '{' | ||
1994 | : (p[0] == '\\' && p[1] == '{')))) | ||
1995 | { | ||
1996 | /* Start building a new exactn. */ | ||
1997 | |||
1998 | laststart = b; | ||
1999 | |||
2000 | BUF_PUSH_2 (exactn, 0); | ||
2001 | pending_exact = b - 1; | ||
2002 | } | ||
2003 | |||
2004 | BUF_PUSH (c); | ||
2005 | (*pending_exact)++; | ||
2006 | break; | ||
2007 | } /* switch (c) */ | ||
2008 | } /* while p != pend */ | ||
2009 | |||
2010 | |||
2011 | /* Through the pattern now. */ | ||
2012 | |||
2013 | if (fixup_alt_jump) | ||
2014 | STORE_JUMP (jump_past_alt, fixup_alt_jump, b); | ||
2015 | |||
2016 | if (!COMPILE_STACK_EMPTY) | ||
2017 | return REG_EPAREN; | ||
2018 | |||
2019 | free (compile_stack.stack); | ||
2020 | |||
2021 | /* We have succeeded; set the length of the buffer. */ | ||
2022 | bufp->used = b - bufp->buffer; | ||
2023 | |||
2024 | #ifdef DEBUG | ||
2025 | if (debug) | ||
2026 | { | ||
2027 | DEBUG_PRINT1 ("\nCompiled pattern: "); | ||
2028 | print_compiled_pattern (bufp); | ||
2029 | } | ||
2030 | #endif /* DEBUG */ | ||
2031 | |||
2032 | return REG_NOERROR; | ||
2033 | } /* regex_compile */ | ||
2034 | |||
2035 | /* Subroutines for `regex_compile'. */ | ||
2036 | |||
2037 | /* Store OP at LOC followed by two-byte integer parameter ARG. */ | ||
2038 | |||
2039 | static void | ||
2040 | store_op1 (op, loc, arg) | ||
2041 | re_opcode_t op; | ||
2042 | unsigned char *loc; | ||
2043 | int arg; | ||
2044 | { | ||
2045 | *loc = (unsigned char) op; | ||
2046 | STORE_NUMBER (loc + 1, arg); | ||
2047 | } | ||
2048 | |||
2049 | |||
2050 | /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ | ||
2051 | |||
2052 | static void | ||
2053 | store_op2 (op, loc, arg1, arg2) | ||
2054 | re_opcode_t op; | ||
2055 | unsigned char *loc; | ||
2056 | int arg1, arg2; | ||
2057 | { | ||
2058 | *loc = (unsigned char) op; | ||
2059 | STORE_NUMBER (loc + 1, arg1); | ||
2060 | STORE_NUMBER (loc + 3, arg2); | ||
2061 | } | ||
2062 | |||
2063 | |||
2064 | /* Copy the bytes from LOC to END to open up three bytes of space at LOC | ||
2065 | for OP followed by two-byte integer parameter ARG. */ | ||
2066 | |||
2067 | static void | ||
2068 | insert_op1 (op, loc, arg, end) | ||
2069 | re_opcode_t op; | ||
2070 | unsigned char *loc; | ||
2071 | int arg; | ||
2072 | unsigned char *end; | ||
2073 | { | ||
2074 | register unsigned char *pfrom = end; | ||
2075 | register unsigned char *pto = end + 3; | ||
2076 | |||
2077 | while (pfrom != loc) | ||
2078 | *--pto = *--pfrom; | ||
2079 | |||
2080 | store_op1 (op, loc, arg); | ||
2081 | } | ||
2082 | |||
2083 | |||
2084 | /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ | ||
2085 | |||
2086 | static void | ||
2087 | insert_op2 (op, loc, arg1, arg2, end) | ||
2088 | re_opcode_t op; | ||
2089 | unsigned char *loc; | ||
2090 | int arg1, arg2; | ||
2091 | unsigned char *end; | ||
2092 | { | ||
2093 | register unsigned char *pfrom = end; | ||
2094 | register unsigned char *pto = end + 5; | ||
2095 | |||
2096 | while (pfrom != loc) | ||
2097 | *--pto = *--pfrom; | ||
2098 | |||
2099 | store_op2 (op, loc, arg1, arg2); | ||
2100 | } | ||
2101 | |||
2102 | |||
2103 | /* P points to just after a ^ in PATTERN. Return true if that ^ comes | ||
2104 | after an alternative or a begin-subexpression. We assume there is at | ||
2105 | least one character before the ^. */ | ||
2106 | |||
2107 | static boolean | ||
2108 | at_begline_loc_p (pattern, p, syntax) | ||
2109 | const char *pattern, *p; | ||
2110 | reg_syntax_t syntax; | ||
2111 | { | ||
2112 | const char *prev = p - 2; | ||
2113 | boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; | ||
2114 | |||
2115 | return | ||
2116 | /* After a subexpression? */ | ||
2117 | (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) | ||
2118 | /* After an alternative? */ | ||
2119 | || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); | ||
2120 | } | ||
2121 | |||
2122 | |||
2123 | /* The dual of at_begline_loc_p. This one is for $. We assume there is | ||
2124 | at least one character after the $, i.e., `P < PEND'. */ | ||
2125 | |||
2126 | static boolean | ||
2127 | at_endline_loc_p (p, pend, syntax) | ||
2128 | const char *p, *pend; | ||
2129 | int syntax; | ||
2130 | { | ||
2131 | const char *next = p; | ||
2132 | boolean next_backslash = *next == '\\'; | ||
2133 | const char *next_next = p + 1 < pend ? p + 1 : NULL; | ||
2134 | |||
2135 | return | ||
2136 | /* Before a subexpression? */ | ||
2137 | (syntax & RE_NO_BK_PARENS ? *next == ')' | ||
2138 | : next_backslash && next_next && *next_next == ')') | ||
2139 | /* Before an alternative? */ | ||
2140 | || (syntax & RE_NO_BK_VBAR ? *next == '|' | ||
2141 | : next_backslash && next_next && *next_next == '|'); | ||
2142 | } | ||
2143 | |||
2144 | |||
2145 | /* Returns true if REGNUM is in one of COMPILE_STACK's elements and | ||
2146 | false if it's not. */ | ||
2147 | |||
2148 | static boolean | ||
2149 | group_in_compile_stack (compile_stack, regnum) | ||
2150 | compile_stack_type compile_stack; | ||
2151 | regnum_t regnum; | ||
2152 | { | ||
2153 | int this_element; | ||
2154 | |||
2155 | for (this_element = compile_stack.avail - 1; | ||
2156 | this_element >= 0; | ||
2157 | this_element--) | ||
2158 | if (compile_stack.stack[this_element].regnum == regnum) | ||
2159 | return true; | ||
2160 | |||
2161 | return false; | ||
2162 | } | ||
2163 | |||
2164 | |||
2165 | /* Read the ending character of a range (in a bracket expression) from the | ||
2166 | uncompiled pattern *P_PTR (which ends at PEND). We assume the | ||
2167 | starting character is in `P[-2]'. (`P[-1]' is the character `-'.) | ||
2168 | Then we set the translation of all bits between the starting and | ||
2169 | ending characters (inclusive) in the compiled pattern B. | ||
2170 | |||
2171 | Return an error code. | ||
2172 | |||
2173 | We use these short variable names so we can use the same macros as | ||
2174 | `regex_compile' itself. */ | ||
2175 | |||
2176 | static reg_errcode_t | ||
2177 | compile_range (p_ptr, pend, translate, syntax, b) | ||
2178 | const char **p_ptr, *pend; | ||
2179 | char *translate; | ||
2180 | reg_syntax_t syntax; | ||
2181 | unsigned char *b; | ||
2182 | { | ||
2183 | unsigned this_char; | ||
2184 | |||
2185 | const char *p = *p_ptr; | ||
2186 | int range_start, range_end; | ||
2187 | |||
2188 | if (p == pend) | ||
2189 | return REG_ERANGE; | ||
2190 | |||
2191 | /* Even though the pattern is a signed `char *', we need to fetch | ||
2192 | with unsigned char *'s; if the high bit of the pattern character | ||
2193 | is set, the range endpoints will be negative if we fetch using a | ||
2194 | signed char *. | ||
2195 | |||
2196 | We also want to fetch the endpoints without translating them; the | ||
2197 | appropriate translation is done in the bit-setting loop below. */ | ||
2198 | range_start = ((unsigned char *) p)[-2]; | ||
2199 | range_end = ((unsigned char *) p)[0]; | ||
2200 | |||
2201 | /* Have to increment the pointer into the pattern string, so the | ||
2202 | caller isn't still at the ending character. */ | ||
2203 | (*p_ptr)++; | ||
2204 | |||
2205 | /* If the start is after the end, the range is empty. */ | ||
2206 | if (range_start > range_end) | ||
2207 | return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; | ||
2208 | |||
2209 | /* Here we see why `this_char' has to be larger than an `unsigned | ||
2210 | char' -- the range is inclusive, so if `range_end' == 0xff | ||
2211 | (assuming 8-bit characters), we would otherwise go into an infinite | ||
2212 | loop, since all characters <= 0xff. */ | ||
2213 | for (this_char = range_start; this_char <= range_end; this_char++) | ||
2214 | { | ||
2215 | SET_LIST_BIT (TRANSLATE (this_char)); | ||
2216 | } | ||
2217 | |||
2218 | return REG_NOERROR; | ||
2219 | } | ||
2220 | |||
2221 | /* Failure stack declarations and macros; both re_compile_fastmap and | ||
2222 | re_match_2 use a failure stack. These have to be macros because of | ||
2223 | REGEX_ALLOCATE. */ | ||
2224 | |||
2225 | |||
2226 | /* Number of failure points for which to initially allocate space | ||
2227 | when matching. If this number is exceeded, we allocate more | ||
2228 | space, so it is not a hard limit. */ | ||
2229 | #ifndef INIT_FAILURE_ALLOC | ||
2230 | #define INIT_FAILURE_ALLOC 5 | ||
2231 | #endif | ||
2232 | |||
2233 | /* Roughly the maximum number of failure points on the stack. Would be | ||
2234 | exactly that if always used MAX_FAILURE_SPACE each time we failed. | ||
2235 | This is a variable only so users of regex can assign to it; we never | ||
2236 | change it ourselves. */ | ||
2237 | int re_max_failures = 2000; | ||
2238 | |||
2239 | typedef const unsigned char *fail_stack_elt_t; | ||
2240 | |||
2241 | typedef struct | ||
2242 | { | ||
2243 | fail_stack_elt_t *stack; | ||
2244 | unsigned size; | ||
2245 | unsigned avail; /* Offset of next open position. */ | ||
2246 | } fail_stack_type; | ||
2247 | |||
2248 | #define FAIL_STACK_EMPTY() (fail_stack.avail == 0) | ||
2249 | #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) | ||
2250 | #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) | ||
2251 | #define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail]) | ||
2252 | |||
2253 | |||
2254 | /* Initialize `fail_stack'. Do `return -2' if the alloc fails. */ | ||
2255 | |||
2256 | #define INIT_FAIL_STACK() \ | ||
2257 | do { \ | ||
2258 | fail_stack.stack = (fail_stack_elt_t *) \ | ||
2259 | REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ | ||
2260 | \ | ||
2261 | if (fail_stack.stack == NULL) \ | ||
2262 | return -2; \ | ||
2263 | \ | ||
2264 | fail_stack.size = INIT_FAILURE_ALLOC; \ | ||
2265 | fail_stack.avail = 0; \ | ||
2266 | } while (0) | ||
2267 | |||
2268 | |||
2269 | /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. | ||
2270 | |||
2271 | Return 1 if succeeds, and 0 if either ran out of memory | ||
2272 | allocating space for it or it was already too large. | ||
2273 | |||
2274 | REGEX_REALLOCATE requires `destination' be declared. */ | ||
2275 | |||
2276 | #define DOUBLE_FAIL_STACK(fail_stack) \ | ||
2277 | ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ | ||
2278 | ? 0 \ | ||
2279 | : ((fail_stack).stack = (fail_stack_elt_t *) \ | ||
2280 | REGEX_REALLOCATE ((fail_stack).stack, \ | ||
2281 | (fail_stack).size * sizeof (fail_stack_elt_t), \ | ||
2282 | ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ | ||
2283 | \ | ||
2284 | (fail_stack).stack == NULL \ | ||
2285 | ? 0 \ | ||
2286 | : ((fail_stack).size <<= 1, \ | ||
2287 | 1))) | ||
2288 | |||
2289 | |||
2290 | /* Push PATTERN_OP on FAIL_STACK. | ||
2291 | |||
2292 | Return 1 if was able to do so and 0 if ran out of memory allocating | ||
2293 | space to do so. */ | ||
2294 | #define PUSH_PATTERN_OP(pattern_op, fail_stack) \ | ||
2295 | ((FAIL_STACK_FULL () \ | ||
2296 | && !DOUBLE_FAIL_STACK (fail_stack)) \ | ||
2297 | ? 0 \ | ||
2298 | : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \ | ||
2299 | 1)) | ||
2300 | |||
2301 | /* This pushes an item onto the failure stack. Must be a four-byte | ||
2302 | value. Assumes the variable `fail_stack'. Probably should only | ||
2303 | be called from within `PUSH_FAILURE_POINT'. */ | ||
2304 | #define PUSH_FAILURE_ITEM(item) \ | ||
2305 | fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item | ||
2306 | |||
2307 | /* The complement operation. Assumes `fail_stack' is nonempty. */ | ||
2308 | #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail] | ||
2309 | |||
2310 | /* Used to omit pushing failure point id's when we're not debugging. */ | ||
2311 | #ifdef DEBUG | ||
2312 | #define DEBUG_PUSH PUSH_FAILURE_ITEM | ||
2313 | #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM () | ||
2314 | #else | ||
2315 | #define DEBUG_PUSH(item) | ||
2316 | #define DEBUG_POP(item_addr) | ||
2317 | #endif | ||
2318 | |||
2319 | |||
2320 | /* Push the information about the state we will need | ||
2321 | if we ever fail back to it. | ||
2322 | |||
2323 | Requires variables fail_stack, regstart, regend, reg_info, and | ||
2324 | num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be | ||
2325 | declared. | ||
2326 | |||
2327 | Does `return FAILURE_CODE' if runs out of memory. */ | ||
2328 | |||
2329 | #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ | ||
2330 | do { \ | ||
2331 | char *destination; \ | ||
2332 | /* Must be int, so when we don't save any registers, the arithmetic \ | ||
2333 | of 0 + -1 isn't done as unsigned. */ \ | ||
2334 | int this_reg; \ | ||
2335 | \ | ||
2336 | DEBUG_STATEMENT (failure_id++); \ | ||
2337 | DEBUG_STATEMENT (nfailure_points_pushed++); \ | ||
2338 | DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ | ||
2339 | DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ | ||
2340 | DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ | ||
2341 | \ | ||
2342 | DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ | ||
2343 | DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ | ||
2344 | \ | ||
2345 | /* Ensure we have enough space allocated for what we will push. */ \ | ||
2346 | while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ | ||
2347 | { \ | ||
2348 | if (!DOUBLE_FAIL_STACK (fail_stack)) \ | ||
2349 | return failure_code; \ | ||
2350 | \ | ||
2351 | DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ | ||
2352 | (fail_stack).size); \ | ||
2353 | DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ | ||
2354 | } \ | ||
2355 | \ | ||
2356 | /* Push the info, starting with the registers. */ \ | ||
2357 | DEBUG_PRINT1 ("\n"); \ | ||
2358 | \ | ||
2359 | for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ | ||
2360 | this_reg++) \ | ||
2361 | { \ | ||
2362 | DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ | ||
2363 | DEBUG_STATEMENT (num_regs_pushed++); \ | ||
2364 | \ | ||
2365 | DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ | ||
2366 | PUSH_FAILURE_ITEM (regstart[this_reg]); \ | ||
2367 | \ | ||
2368 | DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ | ||
2369 | PUSH_FAILURE_ITEM (regend[this_reg]); \ | ||
2370 | \ | ||
2371 | DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ | ||
2372 | DEBUG_PRINT2 (" match_null=%d", \ | ||
2373 | REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ | ||
2374 | DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ | ||
2375 | DEBUG_PRINT2 (" matched_something=%d", \ | ||
2376 | MATCHED_SOMETHING (reg_info[this_reg])); \ | ||
2377 | DEBUG_PRINT2 (" ever_matched=%d", \ | ||
2378 | EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ | ||
2379 | DEBUG_PRINT1 ("\n"); \ | ||
2380 | PUSH_FAILURE_ITEM (reg_info[this_reg].word); \ | ||
2381 | } \ | ||
2382 | \ | ||
2383 | DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ | ||
2384 | PUSH_FAILURE_ITEM (lowest_active_reg); \ | ||
2385 | \ | ||
2386 | DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ | ||
2387 | PUSH_FAILURE_ITEM (highest_active_reg); \ | ||
2388 | \ | ||
2389 | DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ | ||
2390 | DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ | ||
2391 | PUSH_FAILURE_ITEM (pattern_place); \ | ||
2392 | \ | ||
2393 | DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ | ||
2394 | DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ | ||
2395 | size2); \ | ||
2396 | DEBUG_PRINT1 ("'\n"); \ | ||
2397 | PUSH_FAILURE_ITEM (string_place); \ | ||
2398 | \ | ||
2399 | DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ | ||
2400 | DEBUG_PUSH (failure_id); \ | ||
2401 | } while (0) | ||
2402 | |||
2403 | /* This is the number of items that are pushed and popped on the stack | ||
2404 | for each register. */ | ||
2405 | #define NUM_REG_ITEMS 3 | ||
2406 | |||
2407 | /* Individual items aside from the registers. */ | ||
2408 | #ifdef DEBUG | ||
2409 | #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ | ||
2410 | #else | ||
2411 | #define NUM_NONREG_ITEMS 4 | ||
2412 | #endif | ||
2413 | |||
2414 | /* We push at most this many items on the stack. */ | ||
2415 | #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS) | ||
2416 | |||
2417 | /* We actually push this many items. */ | ||
2418 | #define NUM_FAILURE_ITEMS \ | ||
2419 | ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ | ||
2420 | + NUM_NONREG_ITEMS) | ||
2421 | |||
2422 | /* How many items can still be added to the stack without overflowing it. */ | ||
2423 | #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) | ||
2424 | |||
2425 | |||
2426 | /* Pops what PUSH_FAIL_STACK pushes. | ||
2427 | |||
2428 | We restore into the parameters, all of which should be lvalues: | ||
2429 | STR -- the saved data position. | ||
2430 | PAT -- the saved pattern position. | ||
2431 | LOW_REG, HIGH_REG -- the highest and lowest active registers. | ||
2432 | REGSTART, REGEND -- arrays of string positions. | ||
2433 | REG_INFO -- array of information about each subexpression. | ||
2434 | |||
2435 | Also assumes the variables `fail_stack' and (if debugging), `bufp', | ||
2436 | `pend', `string1', `size1', `string2', and `size2'. */ | ||
2437 | |||
2438 | #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ | ||
2439 | { \ | ||
2440 | DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ | ||
2441 | int this_reg; \ | ||
2442 | const unsigned char *string_temp; \ | ||
2443 | \ | ||
2444 | assert (!FAIL_STACK_EMPTY ()); \ | ||
2445 | \ | ||
2446 | /* Remove failure points and point to how many regs pushed. */ \ | ||
2447 | DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ | ||
2448 | DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ | ||
2449 | DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ | ||
2450 | \ | ||
2451 | assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ | ||
2452 | \ | ||
2453 | DEBUG_POP (&failure_id); \ | ||
2454 | DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ | ||
2455 | \ | ||
2456 | /* If the saved string location is NULL, it came from an \ | ||
2457 | on_failure_keep_string_jump opcode, and we want to throw away the \ | ||
2458 | saved NULL, thus retaining our current position in the string. */ \ | ||
2459 | string_temp = POP_FAILURE_ITEM (); \ | ||
2460 | if (string_temp != NULL) \ | ||
2461 | str = (const char *) string_temp; \ | ||
2462 | \ | ||
2463 | DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ | ||
2464 | DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ | ||
2465 | DEBUG_PRINT1 ("'\n"); \ | ||
2466 | \ | ||
2467 | pat = (unsigned char *) POP_FAILURE_ITEM (); \ | ||
2468 | DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \ | ||
2469 | DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ | ||
2470 | \ | ||
2471 | /* Restore register info. */ \ | ||
2472 | high_reg = (unsigned) POP_FAILURE_ITEM (); \ | ||
2473 | DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ | ||
2474 | \ | ||
2475 | low_reg = (unsigned) POP_FAILURE_ITEM (); \ | ||
2476 | DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ | ||
2477 | \ | ||
2478 | for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ | ||
2479 | { \ | ||
2480 | DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ | ||
2481 | \ | ||
2482 | reg_info[this_reg].word = POP_FAILURE_ITEM (); \ | ||
2483 | DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ | ||
2484 | \ | ||
2485 | regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \ | ||
2486 | DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ | ||
2487 | \ | ||
2488 | regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \ | ||
2489 | DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ | ||
2490 | } \ | ||
2491 | \ | ||
2492 | DEBUG_STATEMENT (nfailure_points_popped++); \ | ||
2493 | } /* POP_FAILURE_POINT */ | ||
2494 | |||
2495 | /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in | ||
2496 | BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible | ||
2497 | characters can start a string that matches the pattern. This fastmap | ||
2498 | is used by re_search to skip quickly over impossible starting points. | ||
2499 | |||
2500 | The caller must supply the address of a (1 << BYTEWIDTH)-byte data | ||
2501 | area as BUFP->fastmap. | ||
2502 | |||
2503 | We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in | ||
2504 | the pattern buffer. | ||
2505 | |||
2506 | Returns 0 if we succeed, -2 if an internal error. */ | ||
2507 | |||
2508 | int | ||
2509 | re_compile_fastmap (bufp) | ||
2510 | struct re_pattern_buffer *bufp; | ||
2511 | { | ||
2512 | int j, k; | ||
2513 | fail_stack_type fail_stack; | ||
2514 | #ifndef REGEX_MALLOC | ||
2515 | char *destination; | ||
2516 | #endif | ||
2517 | /* We don't push any register information onto the failure stack. */ | ||
2518 | unsigned num_regs = 0; | ||
2519 | |||
2520 | register char *fastmap = bufp->fastmap; | ||
2521 | unsigned char *pattern = bufp->buffer; | ||
2522 | unsigned long size = bufp->used; | ||
2523 | const unsigned char *p = pattern; | ||
2524 | register unsigned char *pend = pattern + size; | ||
2525 | |||
2526 | /* Assume that each path through the pattern can be null until | ||
2527 | proven otherwise. We set this false at the bottom of switch | ||
2528 | statement, to which we get only if a particular path doesn't | ||
2529 | match the empty string. */ | ||
2530 | boolean path_can_be_null = true; | ||
2531 | |||
2532 | /* We aren't doing a `succeed_n' to begin with. */ | ||
2533 | boolean succeed_n_p = false; | ||
2534 | |||
2535 | assert (fastmap != NULL && p != NULL); | ||
2536 | |||
2537 | INIT_FAIL_STACK (); | ||
2538 | bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ | ||
2539 | bufp->fastmap_accurate = 1; /* It will be when we're done. */ | ||
2540 | bufp->can_be_null = 0; | ||
2541 | |||
2542 | while (p != pend || !FAIL_STACK_EMPTY ()) | ||
2543 | { | ||
2544 | if (p == pend) | ||
2545 | { | ||
2546 | bufp->can_be_null |= path_can_be_null; | ||
2547 | |||
2548 | /* Reset for next path. */ | ||
2549 | path_can_be_null = true; | ||
2550 | |||
2551 | p = fail_stack.stack[--fail_stack.avail]; | ||
2552 | } | ||
2553 | |||
2554 | /* We should never be about to go beyond the end of the pattern. */ | ||
2555 | assert (p < pend); | ||
2556 | |||
2557 | #ifdef SWITCH_ENUM_BUG | ||
2558 | switch ((int) ((re_opcode_t) *p++)) | ||
2559 | #else | ||
2560 | switch ((re_opcode_t) *p++) | ||
2561 | #endif | ||
2562 | { | ||
2563 | |||
2564 | /* I guess the idea here is to simply not bother with a fastmap | ||
2565 | if a backreference is used, since it's too hard to figure out | ||
2566 | the fastmap for the corresponding group. Setting | ||
2567 | `can_be_null' stops `re_search_2' from using the fastmap, so | ||
2568 | that is all we do. */ | ||
2569 | case duplicate: | ||
2570 | bufp->can_be_null = 1; | ||
2571 | return 0; | ||
2572 | |||
2573 | |||
2574 | /* Following are the cases which match a character. These end | ||
2575 | with `break'. */ | ||
2576 | |||
2577 | case exactn: | ||
2578 | fastmap[p[1]] = 1; | ||
2579 | break; | ||
2580 | |||
2581 | |||
2582 | case charset: | ||
2583 | for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) | ||
2584 | if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) | ||
2585 | fastmap[j] = 1; | ||
2586 | break; | ||
2587 | |||
2588 | |||
2589 | case charset_not: | ||
2590 | /* Chars beyond end of map must be allowed. */ | ||
2591 | for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) | ||
2592 | fastmap[j] = 1; | ||
2593 | |||
2594 | for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) | ||
2595 | if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) | ||
2596 | fastmap[j] = 1; | ||
2597 | break; | ||
2598 | |||
2599 | |||
2600 | case wordchar: | ||
2601 | for (j = 0; j < (1 << BYTEWIDTH); j++) | ||
2602 | if (SYNTAX (j) == Sword) | ||
2603 | fastmap[j] = 1; | ||
2604 | break; | ||
2605 | |||
2606 | |||
2607 | case notwordchar: | ||
2608 | for (j = 0; j < (1 << BYTEWIDTH); j++) | ||
2609 | if (SYNTAX (j) != Sword) | ||
2610 | fastmap[j] = 1; | ||
2611 | break; | ||
2612 | |||
2613 | |||
2614 | case anychar: | ||
2615 | /* `.' matches anything ... */ | ||
2616 | for (j = 0; j < (1 << BYTEWIDTH); j++) | ||
2617 | fastmap[j] = 1; | ||
2618 | |||
2619 | /* ... except perhaps newline. */ | ||
2620 | if (!(bufp->syntax & RE_DOT_NEWLINE)) | ||
2621 | fastmap['\n'] = 0; | ||
2622 | |||
2623 | /* Return if we have already set `can_be_null'; if we have, | ||
2624 | then the fastmap is irrelevant. Something's wrong here. */ | ||
2625 | else if (bufp->can_be_null) | ||
2626 | return 0; | ||
2627 | |||
2628 | /* Otherwise, have to check alternative paths. */ | ||
2629 | break; | ||
2630 | |||
2631 | |||
2632 | #ifdef emacs | ||
2633 | case syntaxspec: | ||
2634 | k = *p++; | ||
2635 | for (j = 0; j < (1 << BYTEWIDTH); j++) | ||
2636 | if (SYNTAX (j) == (enum syntaxcode) k) | ||
2637 | fastmap[j] = 1; | ||
2638 | break; | ||
2639 | |||
2640 | |||
2641 | case notsyntaxspec: | ||
2642 | k = *p++; | ||
2643 | for (j = 0; j < (1 << BYTEWIDTH); j++) | ||
2644 | if (SYNTAX (j) != (enum syntaxcode) k) | ||
2645 | fastmap[j] = 1; | ||
2646 | break; | ||
2647 | |||
2648 | |||
2649 | /* All cases after this match the empty string. These end with | ||
2650 | `continue'. */ | ||
2651 | |||
2652 | |||
2653 | case before_dot: | ||
2654 | case at_dot: | ||
2655 | case after_dot: | ||
2656 | continue; | ||
2657 | #endif /* not emacs */ | ||
2658 | |||
2659 | |||
2660 | case no_op: | ||
2661 | case begline: | ||
2662 | case endline: | ||
2663 | case begbuf: | ||
2664 | case endbuf: | ||
2665 | case wordbound: | ||
2666 | case notwordbound: | ||
2667 | case wordbeg: | ||
2668 | case wordend: | ||
2669 | case push_dummy_failure: | ||
2670 | continue; | ||
2671 | |||
2672 | |||
2673 | case jump_n: | ||
2674 | case pop_failure_jump: | ||
2675 | case maybe_pop_jump: | ||
2676 | case jump: | ||
2677 | case jump_past_alt: | ||
2678 | case dummy_failure_jump: | ||
2679 | EXTRACT_NUMBER_AND_INCR (j, p); | ||
2680 | p += j; | ||
2681 | if (j > 0) | ||
2682 | continue; | ||
2683 | |||
2684 | /* Jump backward implies we just went through the body of a | ||
2685 | loop and matched nothing. Opcode jumped to should be | ||
2686 | `on_failure_jump' or `succeed_n'. Just treat it like an | ||
2687 | ordinary jump. For a * loop, it has pushed its failure | ||
2688 | point already; if so, discard that as redundant. */ | ||
2689 | if ((re_opcode_t) *p != on_failure_jump | ||
2690 | && (re_opcode_t) *p != succeed_n) | ||
2691 | continue; | ||
2692 | |||
2693 | p++; | ||
2694 | EXTRACT_NUMBER_AND_INCR (j, p); | ||
2695 | p += j; | ||
2696 | |||
2697 | /* If what's on the stack is where we are now, pop it. */ | ||
2698 | if (!FAIL_STACK_EMPTY () | ||
2699 | && fail_stack.stack[fail_stack.avail - 1] == p) | ||
2700 | fail_stack.avail--; | ||
2701 | |||
2702 | continue; | ||
2703 | |||
2704 | |||
2705 | case on_failure_jump: | ||
2706 | case on_failure_keep_string_jump: | ||
2707 | handle_on_failure_jump: | ||
2708 | EXTRACT_NUMBER_AND_INCR (j, p); | ||
2709 | |||
2710 | /* For some patterns, e.g., `(a?)?', `p+j' here points to the | ||
2711 | end of the pattern. We don't want to push such a point, | ||
2712 | since when we restore it above, entering the switch will | ||
2713 | increment `p' past the end of the pattern. We don't need | ||
2714 | to push such a point since we obviously won't find any more | ||
2715 | fastmap entries beyond `pend'. Such a pattern can match | ||
2716 | the null string, though. */ | ||
2717 | if (p + j < pend) | ||
2718 | { | ||
2719 | if (!PUSH_PATTERN_OP (p + j, fail_stack)) | ||
2720 | return -2; | ||
2721 | } | ||
2722 | else | ||
2723 | bufp->can_be_null = 1; | ||
2724 | |||
2725 | if (succeed_n_p) | ||
2726 | { | ||
2727 | EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ | ||
2728 | succeed_n_p = false; | ||
2729 | } | ||
2730 | |||
2731 | continue; | ||
2732 | |||
2733 | |||
2734 | case succeed_n: | ||
2735 | /* Get to the number of times to succeed. */ | ||
2736 | p += 2; | ||
2737 | |||
2738 | /* Increment p past the n for when k != 0. */ | ||
2739 | EXTRACT_NUMBER_AND_INCR (k, p); | ||
2740 | if (k == 0) | ||
2741 | { | ||
2742 | p -= 4; | ||
2743 | succeed_n_p = true; /* Spaghetti code alert. */ | ||
2744 | goto handle_on_failure_jump; | ||
2745 | } | ||
2746 | continue; | ||
2747 | |||
2748 | |||
2749 | case set_number_at: | ||
2750 | p += 4; | ||
2751 | continue; | ||
2752 | |||
2753 | |||
2754 | case start_memory: | ||
2755 | case stop_memory: | ||
2756 | p += 2; | ||
2757 | continue; | ||
2758 | |||
2759 | |||
2760 | default: | ||
2761 | abort (); /* We have listed all the cases. */ | ||
2762 | } /* switch *p++ */ | ||
2763 | |||
2764 | /* Getting here means we have found the possible starting | ||
2765 | characters for one path of the pattern -- and that the empty | ||
2766 | string does not match. We need not follow this path further. | ||
2767 | Instead, look at the next alternative (remembered on the | ||
2768 | stack), or quit if no more. The test at the top of the loop | ||
2769 | does these things. */ | ||
2770 | path_can_be_null = false; | ||
2771 | p = pend; | ||
2772 | } /* while p */ | ||
2773 | |||
2774 | /* Set `can_be_null' for the last path (also the first path, if the | ||
2775 | pattern is empty). */ | ||
2776 | bufp->can_be_null |= path_can_be_null; | ||
2777 | return 0; | ||
2778 | } /* re_compile_fastmap */ | ||
2779 | |||
2780 | /* Set REGS to hold NUM_REGS registers, storing them in STARTS and | ||
2781 | ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use | ||
2782 | this memory for recording register information. STARTS and ENDS | ||
2783 | must be allocated using the malloc library routine, and must each | ||
2784 | be at least NUM_REGS * sizeof (regoff_t) bytes long. | ||
2785 | |||
2786 | If NUM_REGS == 0, then subsequent matches should allocate their own | ||
2787 | register data. | ||
2788 | |||
2789 | Unless this function is called, the first search or match using | ||
2790 | PATTERN_BUFFER will allocate its own register data, without | ||
2791 | freeing the old data. */ | ||
2792 | |||
2793 | void | ||
2794 | re_set_registers (bufp, regs, num_regs, starts, ends) | ||
2795 | struct re_pattern_buffer *bufp; | ||
2796 | struct re_registers *regs; | ||
2797 | unsigned num_regs; | ||
2798 | regoff_t *starts, *ends; | ||
2799 | { | ||
2800 | if (num_regs) | ||
2801 | { | ||
2802 | bufp->regs_allocated = REGS_REALLOCATE; | ||
2803 | regs->num_regs = num_regs; | ||
2804 | regs->start = starts; | ||
2805 | regs->end = ends; | ||
2806 | } | ||
2807 | else | ||
2808 | { | ||
2809 | bufp->regs_allocated = REGS_UNALLOCATED; | ||
2810 | regs->num_regs = 0; | ||
2811 | regs->start = regs->end = (regoff_t) 0; | ||
2812 | } | ||
2813 | } | ||
2814 | |||
2815 | /* Searching routines. */ | ||
2816 | |||
2817 | /* Like re_search_2, below, but only one string is specified, and | ||
2818 | doesn't let you say where to stop matching. */ | ||
2819 | |||
2820 | int | ||
2821 | re_search (bufp, string, size, startpos, range, regs) | ||
2822 | struct re_pattern_buffer *bufp; | ||
2823 | const char *string; | ||
2824 | int size, startpos, range; | ||
2825 | struct re_registers *regs; | ||
2826 | { | ||
2827 | return re_search_2 (bufp, NULL, 0, string, size, startpos, range, | ||
2828 | regs, size); | ||
2829 | } | ||
2830 | |||
2831 | |||
2832 | /* Using the compiled pattern in BUFP->buffer, first tries to match the | ||
2833 | virtual concatenation of STRING1 and STRING2, starting first at index | ||
2834 | STARTPOS, then at STARTPOS + 1, and so on. | ||
2835 | |||
2836 | STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. | ||
2837 | |||
2838 | RANGE is how far to scan while trying to match. RANGE = 0 means try | ||
2839 | only at STARTPOS; in general, the last start tried is STARTPOS + | ||
2840 | RANGE. | ||
2841 | |||
2842 | In REGS, return the indices of the virtual concatenation of STRING1 | ||
2843 | and STRING2 that matched the entire BUFP->buffer and its contained | ||
2844 | subexpressions. | ||
2845 | |||
2846 | Do not consider matching one past the index STOP in the virtual | ||
2847 | concatenation of STRING1 and STRING2. | ||
2848 | |||
2849 | We return either the position in the strings at which the match was | ||
2850 | found, -1 if no match, or -2 if error (such as failure | ||
2851 | stack overflow). */ | ||
2852 | |||
2853 | int | ||
2854 | re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) | ||
2855 | struct re_pattern_buffer *bufp; | ||
2856 | const char *string1, *string2; | ||
2857 | int size1, size2; | ||
2858 | int startpos; | ||
2859 | int range; | ||
2860 | struct re_registers *regs; | ||
2861 | int stop; | ||
2862 | { | ||
2863 | int val; | ||
2864 | register char *fastmap = bufp->fastmap; | ||
2865 | register char *translate = bufp->translate; | ||
2866 | int total_size = size1 + size2; | ||
2867 | int endpos = startpos + range; | ||
2868 | |||
2869 | /* Check for out-of-range STARTPOS. */ | ||
2870 | if (startpos < 0 || startpos > total_size) | ||
2871 | return -1; | ||
2872 | |||
2873 | /* Fix up RANGE if it might eventually take us outside | ||
2874 | the virtual concatenation of STRING1 and STRING2. */ | ||
2875 | if (endpos < -1) | ||
2876 | range = -1 - startpos; | ||
2877 | else if (endpos > total_size) | ||
2878 | range = total_size - startpos; | ||
2879 | |||
2880 | /* If the search isn't to be a backwards one, don't waste time in a | ||
2881 | search for a pattern that must be anchored. */ | ||
2882 | if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) | ||
2883 | { | ||
2884 | if (startpos > 0) | ||
2885 | return -1; | ||
2886 | else | ||
2887 | range = 1; | ||
2888 | } | ||
2889 | |||
2890 | /* Update the fastmap now if not correct already. */ | ||
2891 | if (fastmap && !bufp->fastmap_accurate) | ||
2892 | if (re_compile_fastmap (bufp) == -2) | ||
2893 | return -2; | ||
2894 | |||
2895 | /* Loop through the string, looking for a place to start matching. */ | ||
2896 | for (;;) | ||
2897 | { | ||
2898 | /* If a fastmap is supplied, skip quickly over characters that | ||
2899 | cannot be the start of a match. If the pattern can match the | ||
2900 | null string, however, we don't need to skip characters; we want | ||
2901 | the first null string. */ | ||
2902 | if (fastmap && startpos < total_size && !bufp->can_be_null) | ||
2903 | { | ||
2904 | if (range > 0) /* Searching forwards. */ | ||
2905 | { | ||
2906 | register const char *d; | ||
2907 | register int lim = 0; | ||
2908 | int irange = range; | ||
2909 | |||
2910 | if (startpos < size1 && startpos + range >= size1) | ||
2911 | lim = range - (size1 - startpos); | ||
2912 | |||
2913 | d = (startpos >= size1 ? string2 - size1 : string1) + startpos; | ||
2914 | |||
2915 | /* Written out as an if-else to avoid testing `translate' | ||
2916 | inside the loop. */ | ||
2917 | if (translate) | ||
2918 | while (range > lim | ||
2919 | && !fastmap[(unsigned char) | ||
2920 | translate[(unsigned char) *d++]]) | ||
2921 | range--; | ||
2922 | else | ||
2923 | while (range > lim && !fastmap[(unsigned char) *d++]) | ||
2924 | range--; | ||
2925 | |||
2926 | startpos += irange - range; | ||
2927 | } | ||
2928 | else /* Searching backwards. */ | ||
2929 | { | ||
2930 | register char c = (size1 == 0 || startpos >= size1 | ||
2931 | ? string2[startpos - size1] | ||
2932 | : string1[startpos]); | ||
2933 | |||
2934 | if (!fastmap[(unsigned char) TRANSLATE (c)]) | ||
2935 | goto advance; | ||
2936 | } | ||
2937 | } | ||
2938 | |||
2939 | /* If can't match the null string, and that's all we have left, fail. */ | ||
2940 | if (range >= 0 && startpos == total_size && fastmap | ||
2941 | && !bufp->can_be_null) | ||
2942 | return -1; | ||
2943 | |||
2944 | val = re_match_2 (bufp, string1, size1, string2, size2, | ||
2945 | startpos, regs, stop); | ||
2946 | if (val >= 0) | ||
2947 | return startpos; | ||
2948 | |||
2949 | if (val == -2) | ||
2950 | return -2; | ||
2951 | |||
2952 | advance: | ||
2953 | if (!range) | ||
2954 | break; | ||
2955 | else if (range > 0) | ||
2956 | { | ||
2957 | range--; | ||
2958 | startpos++; | ||
2959 | } | ||
2960 | else | ||
2961 | { | ||
2962 | range++; | ||
2963 | startpos--; | ||
2964 | } | ||
2965 | } | ||
2966 | return -1; | ||
2967 | } /* re_search_2 */ | ||
2968 | |||
2969 | /* Declarations and macros for re_match_2. */ | ||
2970 | |||
2971 | static int bcmp_translate (); | ||
2972 | static boolean alt_match_null_string_p (), | ||
2973 | common_op_match_null_string_p (), | ||
2974 | group_match_null_string_p (); | ||
2975 | |||
2976 | /* Structure for per-register (a.k.a. per-group) information. | ||
2977 | This must not be longer than one word, because we push this value | ||
2978 | onto the failure stack. Other register information, such as the | ||
2979 | starting and ending positions (which are addresses), and the list of | ||
2980 | inner groups (which is a bits list) are maintained in separate | ||
2981 | variables. | ||
2982 | |||
2983 | We are making a (strictly speaking) nonportable assumption here: that | ||
2984 | the compiler will pack our bit fields into something that fits into | ||
2985 | the type of `word', i.e., is something that fits into one item on the | ||
2986 | failure stack. */ | ||
2987 | typedef union | ||
2988 | { | ||
2989 | fail_stack_elt_t word; | ||
2990 | struct | ||
2991 | { | ||
2992 | /* This field is one if this group can match the empty string, | ||
2993 | zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ | ||
2994 | #define MATCH_NULL_UNSET_VALUE 3 | ||
2995 | unsigned match_null_string_p : 2; | ||
2996 | unsigned is_active : 1; | ||
2997 | unsigned matched_something : 1; | ||
2998 | unsigned ever_matched_something : 1; | ||
2999 | } bits; | ||
3000 | } register_info_type; | ||
3001 | |||
3002 | #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) | ||
3003 | #define IS_ACTIVE(R) ((R).bits.is_active) | ||
3004 | #define MATCHED_SOMETHING(R) ((R).bits.matched_something) | ||
3005 | #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) | ||
3006 | |||
3007 | |||
3008 | /* Call this when have matched a real character; it sets `matched' flags | ||
3009 | for the subexpressions which we are currently inside. Also records | ||
3010 | that those subexprs have matched. */ | ||
3011 | #define SET_REGS_MATCHED() \ | ||
3012 | do \ | ||
3013 | { \ | ||
3014 | unsigned r; \ | ||
3015 | for (r = lowest_active_reg; r <= highest_active_reg; r++) \ | ||
3016 | { \ | ||
3017 | MATCHED_SOMETHING (reg_info[r]) \ | ||
3018 | = EVER_MATCHED_SOMETHING (reg_info[r]) \ | ||
3019 | = 1; \ | ||
3020 | } \ | ||
3021 | } \ | ||
3022 | while (0) | ||
3023 | |||
3024 | |||
3025 | /* This converts PTR, a pointer into one of the search strings `string1' | ||
3026 | and `string2' into an offset from the beginning of that string. */ | ||
3027 | #define POINTER_TO_OFFSET(ptr) \ | ||
3028 | (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1) | ||
3029 | |||
3030 | /* Registers are set to a sentinel when they haven't yet matched. */ | ||
3031 | #define REG_UNSET_VALUE ((char *) -1) | ||
3032 | #define REG_UNSET(e) ((e) == REG_UNSET_VALUE) | ||
3033 | |||
3034 | |||
3035 | /* Macros for dealing with the split strings in re_match_2. */ | ||
3036 | |||
3037 | #define MATCHING_IN_FIRST_STRING (dend == end_match_1) | ||
3038 | |||
3039 | /* Call before fetching a character with *d. This switches over to | ||
3040 | string2 if necessary. */ | ||
3041 | #define PREFETCH() \ | ||
3042 | while (d == dend) \ | ||
3043 | { \ | ||
3044 | /* End of string2 => fail. */ \ | ||
3045 | if (dend == end_match_2) \ | ||
3046 | goto fail; \ | ||
3047 | /* End of string1 => advance to string2. */ \ | ||
3048 | d = string2; \ | ||
3049 | dend = end_match_2; \ | ||
3050 | } | ||
3051 | |||
3052 | |||
3053 | /* Test if at very beginning or at very end of the virtual concatenation | ||
3054 | of `string1' and `string2'. If only one string, it's `string2'. */ | ||
3055 | #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) | ||
3056 | #define AT_STRINGS_END(d) ((d) == end2) | ||
3057 | |||
3058 | |||
3059 | /* Test if D points to a character which is word-constituent. We have | ||
3060 | two special cases to check for: if past the end of string1, look at | ||
3061 | the first character in string2; and if before the beginning of | ||
3062 | string2, look at the last character in string1. */ | ||
3063 | #define WORDCHAR_P(d) \ | ||
3064 | (SYNTAX ((d) == end1 ? *string2 \ | ||
3065 | : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ | ||
3066 | == Sword) | ||
3067 | |||
3068 | /* Test if the character before D and the one at D differ with respect | ||
3069 | to being word-constituent. */ | ||
3070 | #define AT_WORD_BOUNDARY(d) \ | ||
3071 | (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ | ||
3072 | || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) | ||
3073 | |||
3074 | |||
3075 | /* Free everything we malloc. */ | ||
3076 | #ifdef REGEX_MALLOC | ||
3077 | #define FREE_VAR(var) if (var) free (var); var = NULL | ||
3078 | #define FREE_VARIABLES() \ | ||
3079 | do { \ | ||
3080 | FREE_VAR (fail_stack.stack); \ | ||
3081 | FREE_VAR (regstart); \ | ||
3082 | FREE_VAR (regend); \ | ||
3083 | FREE_VAR (old_regstart); \ | ||
3084 | FREE_VAR (old_regend); \ | ||
3085 | FREE_VAR (best_regstart); \ | ||
3086 | FREE_VAR (best_regend); \ | ||
3087 | FREE_VAR (reg_info); \ | ||
3088 | FREE_VAR (reg_dummy); \ | ||
3089 | FREE_VAR (reg_info_dummy); \ | ||
3090 | } while (0) | ||
3091 | #else /* not REGEX_MALLOC */ | ||
3092 | /* Some MIPS systems (at least) want this to free alloca'd storage. */ | ||
3093 | #define FREE_VARIABLES() alloca (0) | ||
3094 | #endif /* not REGEX_MALLOC */ | ||
3095 | |||
3096 | |||
3097 | /* These values must meet several constraints. They must not be valid | ||
3098 | register values; since we have a limit of 255 registers (because | ||
3099 | we use only one byte in the pattern for the register number), we can | ||
3100 | use numbers larger than 255. They must differ by 1, because of | ||
3101 | NUM_FAILURE_ITEMS above. And the value for the lowest register must | ||
3102 | be larger than the value for the highest register, so we do not try | ||
3103 | to actually save any registers when none are active. */ | ||
3104 | #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) | ||
3105 | #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) | ||
3106 | |||
3107 | /* Matching routines. */ | ||
3108 | |||
3109 | #ifndef emacs /* Emacs never uses this. */ | ||
3110 | /* re_match is like re_match_2 except it takes only a single string. */ | ||
3111 | |||
3112 | int | ||
3113 | re_match (bufp, string, size, pos, regs) | ||
3114 | struct re_pattern_buffer *bufp; | ||
3115 | const char *string; | ||
3116 | int size, pos; | ||
3117 | struct re_registers *regs; | ||
3118 | { | ||
3119 | return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size); | ||
3120 | } | ||
3121 | #endif /* not emacs */ | ||
3122 | |||
3123 | |||
3124 | /* re_match_2 matches the compiled pattern in BUFP against the | ||
3125 | the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 | ||
3126 | and SIZE2, respectively). We start matching at POS, and stop | ||
3127 | matching at STOP. | ||
3128 | |||
3129 | If REGS is non-null and the `no_sub' field of BUFP is nonzero, we | ||
3130 | store offsets for the substring each group matched in REGS. See the | ||
3131 | documentation for exactly how many groups we fill. | ||
3132 | |||
3133 | We return -1 if no match, -2 if an internal error (such as the | ||
3134 | failure stack overflowing). Otherwise, we return the length of the | ||
3135 | matched substring. */ | ||
3136 | |||
3137 | int | ||
3138 | re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | ||
3139 | struct re_pattern_buffer *bufp; | ||
3140 | const char *string1, *string2; | ||
3141 | int size1, size2; | ||
3142 | int pos; | ||
3143 | struct re_registers *regs; | ||
3144 | int stop; | ||
3145 | { | ||
3146 | /* General temporaries. */ | ||
3147 | int mcnt; | ||
3148 | unsigned char *p1; | ||
3149 | |||
3150 | /* Just past the end of the corresponding string. */ | ||
3151 | const char *end1, *end2; | ||
3152 | |||
3153 | /* Pointers into string1 and string2, just past the last characters in | ||
3154 | each to consider matching. */ | ||
3155 | const char *end_match_1, *end_match_2; | ||
3156 | |||
3157 | /* Where we are in the data, and the end of the current string. */ | ||
3158 | const char *d, *dend; | ||
3159 | |||
3160 | /* Where we are in the pattern, and the end of the pattern. */ | ||
3161 | unsigned char *p = bufp->buffer; | ||
3162 | register unsigned char *pend = p + bufp->used; | ||
3163 | |||
3164 | /* We use this to map every character in the string. */ | ||
3165 | char *translate = bufp->translate; | ||
3166 | |||
3167 | /* Failure point stack. Each place that can handle a failure further | ||
3168 | down the line pushes a failure point on this stack. It consists of | ||
3169 | restart, regend, and reg_info for all registers corresponding to | ||
3170 | the subexpressions we're currently inside, plus the number of such | ||
3171 | registers, and, finally, two char *'s. The first char * is where | ||
3172 | to resume scanning the pattern; the second one is where to resume | ||
3173 | scanning the strings. If the latter is zero, the failure point is | ||
3174 | a ``dummy''; if a failure happens and the failure point is a dummy, | ||
3175 | it gets discarded and the next next one is tried. */ | ||
3176 | fail_stack_type fail_stack; | ||
3177 | #ifdef DEBUG | ||
3178 | static unsigned failure_id = 0; | ||
3179 | unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; | ||
3180 | #endif | ||
3181 | |||
3182 | /* We fill all the registers internally, independent of what we | ||
3183 | return, for use in backreferences. The number here includes | ||
3184 | an element for register zero. */ | ||
3185 | unsigned num_regs = bufp->re_nsub + 1; | ||
3186 | |||
3187 | /* The currently active registers. */ | ||
3188 | unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; | ||
3189 | unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; | ||
3190 | |||
3191 | /* Information on the contents of registers. These are pointers into | ||
3192 | the input strings; they record just what was matched (on this | ||
3193 | attempt) by a subexpression part of the pattern, that is, the | ||
3194 | regnum-th regstart pointer points to where in the pattern we began | ||
3195 | matching and the regnum-th regend points to right after where we | ||
3196 | stopped matching the regnum-th subexpression. (The zeroth register | ||
3197 | keeps track of what the whole pattern matches.) */ | ||
3198 | const char **regstart = NULL, **regend = NULL; | ||
3199 | |||
3200 | /* If a group that's operated upon by a repetition operator fails to | ||
3201 | match anything, then the register for its start will need to be | ||
3202 | restored because it will have been set to wherever in the string we | ||
3203 | are when we last see its open-group operator. Similarly for a | ||
3204 | register's end. */ | ||
3205 | const char **old_regstart = NULL, **old_regend = NULL; | ||
3206 | |||
3207 | /* The is_active field of reg_info helps us keep track of which (possibly | ||
3208 | nested) subexpressions we are currently in. The matched_something | ||
3209 | field of reg_info[reg_num] helps us tell whether or not we have | ||
3210 | matched any of the pattern so far this time through the reg_num-th | ||
3211 | subexpression. These two fields get reset each time through any | ||
3212 | loop their register is in. */ | ||
3213 | register_info_type *reg_info = NULL; | ||
3214 | |||
3215 | /* The following record the register info as found in the above | ||
3216 | variables when we find a match better than any we've seen before. | ||
3217 | This happens as we backtrack through the failure points, which in | ||
3218 | turn happens only if we have not yet matched the entire string. */ | ||
3219 | unsigned best_regs_set = false; | ||
3220 | const char **best_regstart = NULL, **best_regend = NULL; | ||
3221 | |||
3222 | /* Logically, this is `best_regend[0]'. But we don't want to have to | ||
3223 | allocate space for that if we're not allocating space for anything | ||
3224 | else (see below). Also, we never need info about register 0 for | ||
3225 | any of the other register vectors, and it seems rather a kludge to | ||
3226 | treat `best_regend' differently than the rest. So we keep track of | ||
3227 | the end of the best match so far in a separate variable. We | ||
3228 | initialize this to NULL so that when we backtrack the first time | ||
3229 | and need to test it, it's not garbage. */ | ||
3230 | const char *match_end = NULL; | ||
3231 | |||
3232 | /* Used when we pop values we don't care about. */ | ||
3233 | const char **reg_dummy = NULL; | ||
3234 | register_info_type *reg_info_dummy = NULL; | ||
3235 | |||
3236 | #ifdef DEBUG | ||
3237 | /* Counts the total number of registers pushed. */ | ||
3238 | unsigned num_regs_pushed = 0; | ||
3239 | #endif | ||
3240 | |||
3241 | DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); | ||
3242 | |||
3243 | INIT_FAIL_STACK (); | ||
3244 | |||
3245 | /* Do not bother to initialize all the register variables if there are | ||
3246 | no groups in the pattern, as it takes a fair amount of time. If | ||
3247 | there are groups, we include space for register 0 (the whole | ||
3248 | pattern), even though we never use it, since it simplifies the | ||
3249 | array indexing. We should fix this. */ | ||
3250 | if (bufp->re_nsub) | ||
3251 | { | ||
3252 | regstart = REGEX_TALLOC (num_regs, const char *); | ||
3253 | regend = REGEX_TALLOC (num_regs, const char *); | ||
3254 | old_regstart = REGEX_TALLOC (num_regs, const char *); | ||
3255 | old_regend = REGEX_TALLOC (num_regs, const char *); | ||
3256 | best_regstart = REGEX_TALLOC (num_regs, const char *); | ||
3257 | best_regend = REGEX_TALLOC (num_regs, const char *); | ||
3258 | reg_info = REGEX_TALLOC (num_regs, register_info_type); | ||
3259 | reg_dummy = REGEX_TALLOC (num_regs, const char *); | ||
3260 | reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); | ||
3261 | |||
3262 | if (!(regstart && regend && old_regstart && old_regend && reg_info | ||
3263 | && best_regstart && best_regend && reg_dummy && reg_info_dummy)) | ||
3264 | { | ||
3265 | FREE_VARIABLES (); | ||
3266 | return -2; | ||
3267 | } | ||
3268 | } | ||
3269 | #ifdef REGEX_MALLOC | ||
3270 | else | ||
3271 | { | ||
3272 | /* We must initialize all our variables to NULL, so that | ||
3273 | `FREE_VARIABLES' doesn't try to free them. */ | ||
3274 | regstart = regend = old_regstart = old_regend = best_regstart | ||
3275 | = best_regend = reg_dummy = NULL; | ||
3276 | reg_info = reg_info_dummy = (register_info_type *) NULL; | ||
3277 | } | ||
3278 | #endif /* REGEX_MALLOC */ | ||
3279 | |||
3280 | /* The starting position is bogus. */ | ||
3281 | if (pos < 0 || pos > size1 + size2) | ||
3282 | { | ||
3283 | FREE_VARIABLES (); | ||
3284 | return -1; | ||
3285 | } | ||
3286 | |||
3287 | /* Initialize subexpression text positions to -1 to mark ones that no | ||
3288 | start_memory/stop_memory has been seen for. Also initialize the | ||
3289 | register information struct. */ | ||
3290 | for (mcnt = 1; mcnt < num_regs; mcnt++) | ||
3291 | { | ||
3292 | regstart[mcnt] = regend[mcnt] | ||
3293 | = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; | ||
3294 | |||
3295 | REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; | ||
3296 | IS_ACTIVE (reg_info[mcnt]) = 0; | ||
3297 | MATCHED_SOMETHING (reg_info[mcnt]) = 0; | ||
3298 | EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; | ||
3299 | } | ||
3300 | |||
3301 | /* We move `string1' into `string2' if the latter's empty -- but not if | ||
3302 | `string1' is null. */ | ||
3303 | if (size2 == 0 && string1 != NULL) | ||
3304 | { | ||
3305 | string2 = string1; | ||
3306 | size2 = size1; | ||
3307 | string1 = 0; | ||
3308 | size1 = 0; | ||
3309 | } | ||
3310 | end1 = string1 + size1; | ||
3311 | end2 = string2 + size2; | ||
3312 | |||
3313 | /* Compute where to stop matching, within the two strings. */ | ||
3314 | if (stop <= size1) | ||
3315 | { | ||
3316 | end_match_1 = string1 + stop; | ||
3317 | end_match_2 = string2; | ||
3318 | } | ||
3319 | else | ||
3320 | { | ||
3321 | end_match_1 = end1; | ||
3322 | end_match_2 = string2 + stop - size1; | ||
3323 | } | ||
3324 | |||
3325 | /* `p' scans through the pattern as `d' scans through the data. | ||
3326 | `dend' is the end of the input string that `d' points within. `d' | ||
3327 | is advanced into the following input string whenever necessary, but | ||
3328 | this happens before fetching; therefore, at the beginning of the | ||
3329 | loop, `d' can be pointing at the end of a string, but it cannot | ||
3330 | equal `string2'. */ | ||
3331 | if (size1 > 0 && pos <= size1) | ||
3332 | { | ||
3333 | d = string1 + pos; | ||
3334 | dend = end_match_1; | ||
3335 | } | ||
3336 | else | ||
3337 | { | ||
3338 | d = string2 + pos - size1; | ||
3339 | dend = end_match_2; | ||
3340 | } | ||
3341 | |||
3342 | DEBUG_PRINT1 ("The compiled pattern is: "); | ||
3343 | DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); | ||
3344 | DEBUG_PRINT1 ("The string to match is: `"); | ||
3345 | DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); | ||
3346 | DEBUG_PRINT1 ("'\n"); | ||
3347 | |||
3348 | /* This loops over pattern commands. It exits by returning from the | ||
3349 | function if the match is complete, or it drops through if the match | ||
3350 | fails at this starting point in the input data. */ | ||
3351 | for (;;) | ||
3352 | { | ||
3353 | DEBUG_PRINT2 ("\n0x%x: ", p); | ||
3354 | |||
3355 | if (p == pend) | ||
3356 | { /* End of pattern means we might have succeeded. */ | ||
3357 | DEBUG_PRINT1 ("end of pattern ... "); | ||
3358 | |||
3359 | /* If we haven't matched the entire string, and we want the | ||
3360 | longest match, try backtracking. */ | ||
3361 | if (d != end_match_2) | ||
3362 | { | ||
3363 | DEBUG_PRINT1 ("backtracking.\n"); | ||
3364 | |||
3365 | if (!FAIL_STACK_EMPTY ()) | ||
3366 | { /* More failure points to try. */ | ||
3367 | boolean same_str_p = (FIRST_STRING_P (match_end) | ||
3368 | == MATCHING_IN_FIRST_STRING); | ||
3369 | |||
3370 | /* If exceeds best match so far, save it. */ | ||
3371 | if (!best_regs_set | ||
3372 | || (same_str_p && d > match_end) | ||
3373 | || (!same_str_p && !MATCHING_IN_FIRST_STRING)) | ||
3374 | { | ||
3375 | best_regs_set = true; | ||
3376 | match_end = d; | ||
3377 | |||
3378 | DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); | ||
3379 | |||
3380 | for (mcnt = 1; mcnt < num_regs; mcnt++) | ||
3381 | { | ||
3382 | best_regstart[mcnt] = regstart[mcnt]; | ||
3383 | best_regend[mcnt] = regend[mcnt]; | ||
3384 | } | ||
3385 | } | ||
3386 | goto fail; | ||
3387 | } | ||
3388 | |||
3389 | /* If no failure points, don't restore garbage. */ | ||
3390 | else if (best_regs_set) | ||
3391 | { | ||
3392 | restore_best_regs: | ||
3393 | /* Restore best match. It may happen that `dend == | ||
3394 | end_match_1' while the restored d is in string2. | ||
3395 | For example, the pattern `x.*y.*z' against the | ||
3396 | strings `x-' and `y-z-', if the two strings are | ||
3397 | not consecutive in memory. */ | ||
3398 | DEBUG_PRINT1 ("Restoring best registers.\n"); | ||
3399 | |||
3400 | d = match_end; | ||
3401 | dend = ((d >= string1 && d <= end1) | ||
3402 | ? end_match_1 : end_match_2); | ||
3403 | |||
3404 | for (mcnt = 1; mcnt < num_regs; mcnt++) | ||
3405 | { | ||
3406 | regstart[mcnt] = best_regstart[mcnt]; | ||
3407 | regend[mcnt] = best_regend[mcnt]; | ||
3408 | } | ||
3409 | } | ||
3410 | } /* d != end_match_2 */ | ||
3411 | |||
3412 | DEBUG_PRINT1 ("Accepting match.\n"); | ||
3413 | |||
3414 | /* If caller wants register contents data back, do it. */ | ||
3415 | if (regs && !bufp->no_sub) | ||
3416 | { | ||
3417 | /* Have the register data arrays been allocated? */ | ||
3418 | if (bufp->regs_allocated == REGS_UNALLOCATED) | ||
3419 | { /* No. So allocate them with malloc. We need one | ||
3420 | extra element beyond `num_regs' for the `-1' marker | ||
3421 | GNU code uses. */ | ||
3422 | regs->num_regs = MAX (RE_NREGS, num_regs + 1); | ||
3423 | regs->start = TALLOC (regs->num_regs, regoff_t); | ||
3424 | regs->end = TALLOC (regs->num_regs, regoff_t); | ||
3425 | if (regs->start == NULL || regs->end == NULL) | ||
3426 | return -2; | ||
3427 | bufp->regs_allocated = REGS_REALLOCATE; | ||
3428 | } | ||
3429 | else if (bufp->regs_allocated == REGS_REALLOCATE) | ||
3430 | { /* Yes. If we need more elements than were already | ||
3431 | allocated, reallocate them. If we need fewer, just | ||
3432 | leave it alone. */ | ||
3433 | if (regs->num_regs < num_regs + 1) | ||
3434 | { | ||
3435 | regs->num_regs = num_regs + 1; | ||
3436 | RETALLOC (regs->start, regs->num_regs, regoff_t); | ||
3437 | RETALLOC (regs->end, regs->num_regs, regoff_t); | ||
3438 | if (regs->start == NULL || regs->end == NULL) | ||
3439 | return -2; | ||
3440 | } | ||
3441 | } | ||
3442 | else | ||
3443 | assert (bufp->regs_allocated == REGS_FIXED); | ||
3444 | |||
3445 | /* Convert the pointer data in `regstart' and `regend' to | ||
3446 | indices. Register zero has to be set differently, | ||
3447 | since we haven't kept track of any info for it. */ | ||
3448 | if (regs->num_regs > 0) | ||
3449 | { | ||
3450 | regs->start[0] = pos; | ||
3451 | regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1 | ||
3452 | : d - string2 + size1); | ||
3453 | } | ||
3454 | |||
3455 | /* Go through the first `min (num_regs, regs->num_regs)' | ||
3456 | registers, since that is all we initialized. */ | ||
3457 | for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) | ||
3458 | { | ||
3459 | if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) | ||
3460 | regs->start[mcnt] = regs->end[mcnt] = -1; | ||
3461 | else | ||
3462 | { | ||
3463 | regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]); | ||
3464 | regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]); | ||
3465 | } | ||
3466 | } | ||
3467 | |||
3468 | /* If the regs structure we return has more elements than | ||
3469 | were in the pattern, set the extra elements to -1. If | ||
3470 | we (re)allocated the registers, this is the case, | ||
3471 | because we always allocate enough to have at least one | ||
3472 | -1 at the end. */ | ||
3473 | for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) | ||
3474 | regs->start[mcnt] = regs->end[mcnt] = -1; | ||
3475 | } /* regs && !bufp->no_sub */ | ||
3476 | |||
3477 | FREE_VARIABLES (); | ||
3478 | DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", | ||
3479 | nfailure_points_pushed, nfailure_points_popped, | ||
3480 | nfailure_points_pushed - nfailure_points_popped); | ||
3481 | DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); | ||
3482 | |||
3483 | mcnt = d - pos - (MATCHING_IN_FIRST_STRING | ||
3484 | ? string1 | ||
3485 | : string2 - size1); | ||
3486 | |||
3487 | DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); | ||
3488 | |||
3489 | return mcnt; | ||
3490 | } | ||
3491 | |||
3492 | /* Otherwise match next pattern command. */ | ||
3493 | #ifdef SWITCH_ENUM_BUG | ||
3494 | switch ((int) ((re_opcode_t) *p++)) | ||
3495 | #else | ||
3496 | switch ((re_opcode_t) *p++) | ||
3497 | #endif | ||
3498 | { | ||
3499 | /* Ignore these. Used to ignore the n of succeed_n's which | ||
3500 | currently have n == 0. */ | ||
3501 | case no_op: | ||
3502 | DEBUG_PRINT1 ("EXECUTING no_op.\n"); | ||
3503 | break; | ||
3504 | |||
3505 | |||
3506 | /* Match the next n pattern characters exactly. The following | ||
3507 | byte in the pattern defines n, and the n bytes after that | ||
3508 | are the characters to match. */ | ||
3509 | case exactn: | ||
3510 | mcnt = *p++; | ||
3511 | DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); | ||
3512 | |||
3513 | /* This is written out as an if-else so we don't waste time | ||
3514 | testing `translate' inside the loop. */ | ||
3515 | if (translate) | ||
3516 | { | ||
3517 | do | ||
3518 | { | ||
3519 | PREFETCH (); | ||
3520 | if (translate[(unsigned char) *d++] != (char) *p++) | ||
3521 | goto fail; | ||
3522 | } | ||
3523 | while (--mcnt); | ||
3524 | } | ||
3525 | else | ||
3526 | { | ||
3527 | do | ||
3528 | { | ||
3529 | PREFETCH (); | ||
3530 | if (*d++ != (char) *p++) goto fail; | ||
3531 | } | ||
3532 | while (--mcnt); | ||
3533 | } | ||
3534 | SET_REGS_MATCHED (); | ||
3535 | break; | ||
3536 | |||
3537 | |||
3538 | /* Match any character except possibly a newline or a null. */ | ||
3539 | case anychar: | ||
3540 | DEBUG_PRINT1 ("EXECUTING anychar.\n"); | ||
3541 | |||
3542 | PREFETCH (); | ||
3543 | |||
3544 | if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') | ||
3545 | || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) | ||
3546 | goto fail; | ||
3547 | |||
3548 | SET_REGS_MATCHED (); | ||
3549 | DEBUG_PRINT2 (" Matched `%d'.\n", *d); | ||
3550 | d++; | ||
3551 | break; | ||
3552 | |||
3553 | |||
3554 | case charset: | ||
3555 | case charset_not: | ||
3556 | { | ||
3557 | register unsigned char c; | ||
3558 | boolean not = (re_opcode_t) *(p - 1) == charset_not; | ||
3559 | |||
3560 | DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); | ||
3561 | |||
3562 | PREFETCH (); | ||
3563 | c = TRANSLATE (*d); /* The character to match. */ | ||
3564 | |||
3565 | /* Cast to `unsigned' instead of `unsigned char' in case the | ||
3566 | bit list is a full 32 bytes long. */ | ||
3567 | if (c < (unsigned) (*p * BYTEWIDTH) | ||
3568 | && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) | ||
3569 | not = !not; | ||
3570 | |||
3571 | p += 1 + *p; | ||
3572 | |||
3573 | if (!not) goto fail; | ||
3574 | |||
3575 | SET_REGS_MATCHED (); | ||
3576 | d++; | ||
3577 | break; | ||
3578 | } | ||
3579 | |||
3580 | |||
3581 | /* The beginning of a group is represented by start_memory. | ||
3582 | The arguments are the register number in the next byte, and the | ||
3583 | number of groups inner to this one in the next. The text | ||
3584 | matched within the group is recorded (in the internal | ||
3585 | registers data structure) under the register number. */ | ||
3586 | case start_memory: | ||
3587 | DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); | ||
3588 | |||
3589 | /* Find out if this group can match the empty string. */ | ||
3590 | p1 = p; /* To send to group_match_null_string_p. */ | ||
3591 | |||
3592 | if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) | ||
3593 | REG_MATCH_NULL_STRING_P (reg_info[*p]) | ||
3594 | = group_match_null_string_p (&p1, pend, reg_info); | ||
3595 | |||
3596 | /* Save the position in the string where we were the last time | ||
3597 | we were at this open-group operator in case the group is | ||
3598 | operated upon by a repetition operator, e.g., with `(a*)*b' | ||
3599 | against `ab'; then we want to ignore where we are now in | ||
3600 | the string in case this attempt to match fails. */ | ||
3601 | old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) | ||
3602 | ? REG_UNSET (regstart[*p]) ? d : regstart[*p] | ||
3603 | : regstart[*p]; | ||
3604 | DEBUG_PRINT2 (" old_regstart: %d\n", | ||
3605 | POINTER_TO_OFFSET (old_regstart[*p])); | ||
3606 | |||
3607 | regstart[*p] = d; | ||
3608 | DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); | ||
3609 | |||
3610 | IS_ACTIVE (reg_info[*p]) = 1; | ||
3611 | MATCHED_SOMETHING (reg_info[*p]) = 0; | ||
3612 | |||
3613 | /* This is the new highest active register. */ | ||
3614 | highest_active_reg = *p; | ||
3615 | |||
3616 | /* If nothing was active before, this is the new lowest active | ||
3617 | register. */ | ||
3618 | if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) | ||
3619 | lowest_active_reg = *p; | ||
3620 | |||
3621 | /* Move past the register number and inner group count. */ | ||
3622 | p += 2; | ||
3623 | break; | ||
3624 | |||
3625 | |||
3626 | /* The stop_memory opcode represents the end of a group. Its | ||
3627 | arguments are the same as start_memory's: the register | ||
3628 | number, and the number of inner groups. */ | ||
3629 | case stop_memory: | ||
3630 | DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); | ||
3631 | |||
3632 | /* We need to save the string position the last time we were at | ||
3633 | this close-group operator in case the group is operated | ||
3634 | upon by a repetition operator, e.g., with `((a*)*(b*)*)*' | ||
3635 | against `aba'; then we want to ignore where we are now in | ||
3636 | the string in case this attempt to match fails. */ | ||
3637 | old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) | ||
3638 | ? REG_UNSET (regend[*p]) ? d : regend[*p] | ||
3639 | : regend[*p]; | ||
3640 | DEBUG_PRINT2 (" old_regend: %d\n", | ||
3641 | POINTER_TO_OFFSET (old_regend[*p])); | ||
3642 | |||
3643 | regend[*p] = d; | ||
3644 | DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); | ||
3645 | |||
3646 | /* This register isn't active anymore. */ | ||
3647 | IS_ACTIVE (reg_info[*p]) = 0; | ||
3648 | |||
3649 | /* If this was the only register active, nothing is active | ||
3650 | anymore. */ | ||
3651 | if (lowest_active_reg == highest_active_reg) | ||
3652 | { | ||
3653 | lowest_active_reg = NO_LOWEST_ACTIVE_REG; | ||
3654 | highest_active_reg = NO_HIGHEST_ACTIVE_REG; | ||
3655 | } | ||
3656 | else | ||
3657 | { /* We must scan for the new highest active register, since | ||
3658 | it isn't necessarily one less than now: consider | ||
3659 | (a(b)c(d(e)f)g). When group 3 ends, after the f), the | ||
3660 | new highest active register is 1. */ | ||
3661 | unsigned char r = *p - 1; | ||
3662 | while (r > 0 && !IS_ACTIVE (reg_info[r])) | ||
3663 | r--; | ||
3664 | |||
3665 | /* If we end up at register zero, that means that we saved | ||
3666 | the registers as the result of an `on_failure_jump', not | ||
3667 | a `start_memory', and we jumped to past the innermost | ||
3668 | `stop_memory'. For example, in ((.)*) we save | ||
3669 | registers 1 and 2 as a result of the *, but when we pop | ||
3670 | back to the second ), we are at the stop_memory 1. | ||
3671 | Thus, nothing is active. */ | ||
3672 | if (r == 0) | ||
3673 | { | ||
3674 | lowest_active_reg = NO_LOWEST_ACTIVE_REG; | ||
3675 | highest_active_reg = NO_HIGHEST_ACTIVE_REG; | ||
3676 | } | ||
3677 | else | ||
3678 | highest_active_reg = r; | ||
3679 | } | ||
3680 | |||
3681 | /* If just failed to match something this time around with a | ||
3682 | group that's operated on by a repetition operator, try to | ||
3683 | force exit from the ``loop'', and restore the register | ||
3684 | information for this group that we had before trying this | ||
3685 | last match. */ | ||
3686 | if ((!MATCHED_SOMETHING (reg_info[*p]) | ||
3687 | || (re_opcode_t) p[-3] == start_memory) | ||
3688 | && (p + 2) < pend) | ||
3689 | { | ||
3690 | boolean is_a_jump_n = false; | ||
3691 | |||
3692 | p1 = p + 2; | ||
3693 | mcnt = 0; | ||
3694 | switch ((re_opcode_t) *p1++) | ||
3695 | { | ||
3696 | case jump_n: | ||
3697 | is_a_jump_n = true; | ||
3698 | case pop_failure_jump: | ||
3699 | case maybe_pop_jump: | ||
3700 | case jump: | ||
3701 | case dummy_failure_jump: | ||
3702 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
3703 | if (is_a_jump_n) | ||
3704 | p1 += 2; | ||
3705 | break; | ||
3706 | |||
3707 | default: | ||
3708 | /* do nothing */ ; | ||
3709 | } | ||
3710 | p1 += mcnt; | ||
3711 | |||
3712 | /* If the next operation is a jump backwards in the pattern | ||
3713 | to an on_failure_jump right before the start_memory | ||
3714 | corresponding to this stop_memory, exit from the loop | ||
3715 | by forcing a failure after pushing on the stack the | ||
3716 | on_failure_jump's jump in the pattern, and d. */ | ||
3717 | if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump | ||
3718 | && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) | ||
3719 | { | ||
3720 | /* If this group ever matched anything, then restore | ||
3721 | what its registers were before trying this last | ||
3722 | failed match, e.g., with `(a*)*b' against `ab' for | ||
3723 | regstart[1], and, e.g., with `((a*)*(b*)*)*' | ||
3724 | against `aba' for regend[3]. | ||
3725 | |||
3726 | Also restore the registers for inner groups for, | ||
3727 | e.g., `((a*)(b*))*' against `aba' (register 3 would | ||
3728 | otherwise get trashed). */ | ||
3729 | |||
3730 | if (EVER_MATCHED_SOMETHING (reg_info[*p])) | ||
3731 | { | ||
3732 | unsigned r; | ||
3733 | |||
3734 | EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; | ||
3735 | |||
3736 | /* Restore this and inner groups' (if any) registers. */ | ||
3737 | for (r = *p; r < *p + *(p + 1); r++) | ||
3738 | { | ||
3739 | regstart[r] = old_regstart[r]; | ||
3740 | |||
3741 | /* xx why this test? */ | ||
3742 | if ((int) old_regend[r] >= (int) regstart[r]) | ||
3743 | regend[r] = old_regend[r]; | ||
3744 | } | ||
3745 | } | ||
3746 | p1++; | ||
3747 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
3748 | PUSH_FAILURE_POINT (p1 + mcnt, d, -2); | ||
3749 | |||
3750 | goto fail; | ||
3751 | } | ||
3752 | } | ||
3753 | |||
3754 | /* Move past the register number and the inner group count. */ | ||
3755 | p += 2; | ||
3756 | break; | ||
3757 | |||
3758 | |||
3759 | /* \<digit> has been turned into a `duplicate' command which is | ||
3760 | followed by the numeric value of <digit> as the register number. */ | ||
3761 | case duplicate: | ||
3762 | { | ||
3763 | register const char *d2, *dend2; | ||
3764 | int regno = *p++; /* Get which register to match against. */ | ||
3765 | DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); | ||
3766 | |||
3767 | /* Can't back reference a group which we've never matched. */ | ||
3768 | if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) | ||
3769 | goto fail; | ||
3770 | |||
3771 | /* Where in input to try to start matching. */ | ||
3772 | d2 = regstart[regno]; | ||
3773 | |||
3774 | /* Where to stop matching; if both the place to start and | ||
3775 | the place to stop matching are in the same string, then | ||
3776 | set to the place to stop, otherwise, for now have to use | ||
3777 | the end of the first string. */ | ||
3778 | |||
3779 | dend2 = ((FIRST_STRING_P (regstart[regno]) | ||
3780 | == FIRST_STRING_P (regend[regno])) | ||
3781 | ? regend[regno] : end_match_1); | ||
3782 | for (;;) | ||
3783 | { | ||
3784 | /* If necessary, advance to next segment in register | ||
3785 | contents. */ | ||
3786 | while (d2 == dend2) | ||
3787 | { | ||
3788 | if (dend2 == end_match_2) break; | ||
3789 | if (dend2 == regend[regno]) break; | ||
3790 | |||
3791 | /* End of string1 => advance to string2. */ | ||
3792 | d2 = string2; | ||
3793 | dend2 = regend[regno]; | ||
3794 | } | ||
3795 | /* At end of register contents => success */ | ||
3796 | if (d2 == dend2) break; | ||
3797 | |||
3798 | /* If necessary, advance to next segment in data. */ | ||
3799 | PREFETCH (); | ||
3800 | |||
3801 | /* How many characters left in this segment to match. */ | ||
3802 | mcnt = dend - d; | ||
3803 | |||
3804 | /* Want how many consecutive characters we can match in | ||
3805 | one shot, so, if necessary, adjust the count. */ | ||
3806 | if (mcnt > dend2 - d2) | ||
3807 | mcnt = dend2 - d2; | ||
3808 | |||
3809 | /* Compare that many; failure if mismatch, else move | ||
3810 | past them. */ | ||
3811 | if (translate | ||
3812 | ? bcmp_translate (d, d2, mcnt, translate) | ||
3813 | : bcmp (d, d2, mcnt)) | ||
3814 | goto fail; | ||
3815 | d += mcnt, d2 += mcnt; | ||
3816 | } | ||
3817 | } | ||
3818 | break; | ||
3819 | |||
3820 | |||
3821 | /* begline matches the empty string at the beginning of the string | ||
3822 | (unless `not_bol' is set in `bufp'), and, if | ||
3823 | `newline_anchor' is set, after newlines. */ | ||
3824 | case begline: | ||
3825 | DEBUG_PRINT1 ("EXECUTING begline.\n"); | ||
3826 | |||
3827 | if (AT_STRINGS_BEG (d)) | ||
3828 | { | ||
3829 | if (!bufp->not_bol) break; | ||
3830 | } | ||
3831 | else if (d[-1] == '\n' && bufp->newline_anchor) | ||
3832 | { | ||
3833 | break; | ||
3834 | } | ||
3835 | /* In all other cases, we fail. */ | ||
3836 | goto fail; | ||
3837 | |||
3838 | |||
3839 | /* endline is the dual of begline. */ | ||
3840 | case endline: | ||
3841 | DEBUG_PRINT1 ("EXECUTING endline.\n"); | ||
3842 | |||
3843 | if (AT_STRINGS_END (d)) | ||
3844 | { | ||
3845 | if (!bufp->not_eol) break; | ||
3846 | } | ||
3847 | |||
3848 | /* We have to ``prefetch'' the next character. */ | ||
3849 | else if ((d == end1 ? *string2 : *d) == '\n' | ||
3850 | && bufp->newline_anchor) | ||
3851 | { | ||
3852 | break; | ||
3853 | } | ||
3854 | goto fail; | ||
3855 | |||
3856 | |||
3857 | /* Match at the very beginning of the data. */ | ||
3858 | case begbuf: | ||
3859 | DEBUG_PRINT1 ("EXECUTING begbuf.\n"); | ||
3860 | if (AT_STRINGS_BEG (d)) | ||
3861 | break; | ||
3862 | goto fail; | ||
3863 | |||
3864 | |||
3865 | /* Match at the very end of the data. */ | ||
3866 | case endbuf: | ||
3867 | DEBUG_PRINT1 ("EXECUTING endbuf.\n"); | ||
3868 | if (AT_STRINGS_END (d)) | ||
3869 | break; | ||
3870 | goto fail; | ||
3871 | |||
3872 | |||
3873 | /* on_failure_keep_string_jump is used to optimize `.*\n'. It | ||
3874 | pushes NULL as the value for the string on the stack. Then | ||
3875 | `pop_failure_point' will keep the current value for the | ||
3876 | string, instead of restoring it. To see why, consider | ||
3877 | matching `foo\nbar' against `.*\n'. The .* matches the foo; | ||
3878 | then the . fails against the \n. But the next thing we want | ||
3879 | to do is match the \n against the \n; if we restored the | ||
3880 | string value, we would be back at the foo. | ||
3881 | |||
3882 | Because this is used only in specific cases, we don't need to | ||
3883 | check all the things that `on_failure_jump' does, to make | ||
3884 | sure the right things get saved on the stack. Hence we don't | ||
3885 | share its code. The only reason to push anything on the | ||
3886 | stack at all is that otherwise we would have to change | ||
3887 | `anychar's code to do something besides goto fail in this | ||
3888 | case; that seems worse than this. */ | ||
3889 | case on_failure_keep_string_jump: | ||
3890 | DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); | ||
3891 | |||
3892 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | ||
3893 | DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); | ||
3894 | |||
3895 | PUSH_FAILURE_POINT (p + mcnt, NULL, -2); | ||
3896 | break; | ||
3897 | |||
3898 | |||
3899 | /* Uses of on_failure_jump: | ||
3900 | |||
3901 | Each alternative starts with an on_failure_jump that points | ||
3902 | to the beginning of the next alternative. Each alternative | ||
3903 | except the last ends with a jump that in effect jumps past | ||
3904 | the rest of the alternatives. (They really jump to the | ||
3905 | ending jump of the following alternative, because tensioning | ||
3906 | these jumps is a hassle.) | ||
3907 | |||
3908 | Repeats start with an on_failure_jump that points past both | ||
3909 | the repetition text and either the following jump or | ||
3910 | pop_failure_jump back to this on_failure_jump. */ | ||
3911 | case on_failure_jump: | ||
3912 | on_failure: | ||
3913 | DEBUG_PRINT1 ("EXECUTING on_failure_jump"); | ||
3914 | |||
3915 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | ||
3916 | DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); | ||
3917 | |||
3918 | /* If this on_failure_jump comes right before a group (i.e., | ||
3919 | the original * applied to a group), save the information | ||
3920 | for that group and all inner ones, so that if we fail back | ||
3921 | to this point, the group's information will be correct. | ||
3922 | For example, in \(a*\)*\1, we need the preceding group, | ||
3923 | and in \(\(a*\)b*\)\2, we need the inner group. */ | ||
3924 | |||
3925 | /* We can't use `p' to check ahead because we push | ||
3926 | a failure point to `p + mcnt' after we do this. */ | ||
3927 | p1 = p; | ||
3928 | |||
3929 | /* We need to skip no_op's before we look for the | ||
3930 | start_memory in case this on_failure_jump is happening as | ||
3931 | the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 | ||
3932 | against aba. */ | ||
3933 | while (p1 < pend && (re_opcode_t) *p1 == no_op) | ||
3934 | p1++; | ||
3935 | |||
3936 | if (p1 < pend && (re_opcode_t) *p1 == start_memory) | ||
3937 | { | ||
3938 | /* We have a new highest active register now. This will | ||
3939 | get reset at the start_memory we are about to get to, | ||
3940 | but we will have saved all the registers relevant to | ||
3941 | this repetition op, as described above. */ | ||
3942 | highest_active_reg = *(p1 + 1) + *(p1 + 2); | ||
3943 | if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) | ||
3944 | lowest_active_reg = *(p1 + 1); | ||
3945 | } | ||
3946 | |||
3947 | DEBUG_PRINT1 (":\n"); | ||
3948 | PUSH_FAILURE_POINT (p + mcnt, d, -2); | ||
3949 | break; | ||
3950 | |||
3951 | |||
3952 | /* A smart repeat ends with `maybe_pop_jump'. | ||
3953 | We change it to either `pop_failure_jump' or `jump'. */ | ||
3954 | case maybe_pop_jump: | ||
3955 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | ||
3956 | DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); | ||
3957 | { | ||
3958 | register unsigned char *p2 = p; | ||
3959 | |||
3960 | /* Compare the beginning of the repeat with what in the | ||
3961 | pattern follows its end. If we can establish that there | ||
3962 | is nothing that they would both match, i.e., that we | ||
3963 | would have to backtrack because of (as in, e.g., `a*a') | ||
3964 | then we can change to pop_failure_jump, because we'll | ||
3965 | never have to backtrack. | ||
3966 | |||
3967 | This is not true in the case of alternatives: in | ||
3968 | `(a|ab)*' we do need to backtrack to the `ab' alternative | ||
3969 | (e.g., if the string was `ab'). But instead of trying to | ||
3970 | detect that here, the alternative has put on a dummy | ||
3971 | failure point which is what we will end up popping. */ | ||
3972 | |||
3973 | /* Skip over open/close-group commands. */ | ||
3974 | while (p2 + 2 < pend | ||
3975 | && ((re_opcode_t) *p2 == stop_memory | ||
3976 | || (re_opcode_t) *p2 == start_memory)) | ||
3977 | p2 += 3; /* Skip over args, too. */ | ||
3978 | |||
3979 | /* If we're at the end of the pattern, we can change. */ | ||
3980 | if (p2 == pend) | ||
3981 | { | ||
3982 | /* Consider what happens when matching ":\(.*\)" | ||
3983 | against ":/". I don't really understand this code | ||
3984 | yet. */ | ||
3985 | p[-3] = (unsigned char) pop_failure_jump; | ||
3986 | DEBUG_PRINT1 | ||
3987 | (" End of pattern: change to `pop_failure_jump'.\n"); | ||
3988 | } | ||
3989 | |||
3990 | else if ((re_opcode_t) *p2 == exactn | ||
3991 | || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) | ||
3992 | { | ||
3993 | register unsigned char c | ||
3994 | = *p2 == (unsigned char) endline ? '\n' : p2[2]; | ||
3995 | p1 = p + mcnt; | ||
3996 | |||
3997 | /* p1[0] ... p1[2] are the `on_failure_jump' corresponding | ||
3998 | to the `maybe_finalize_jump' of this case. Examine what | ||
3999 | follows. */ | ||
4000 | if ((re_opcode_t) p1[3] == exactn && p1[5] != c) | ||
4001 | { | ||
4002 | p[-3] = (unsigned char) pop_failure_jump; | ||
4003 | DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", | ||
4004 | c, p1[5]); | ||
4005 | } | ||
4006 | |||
4007 | else if ((re_opcode_t) p1[3] == charset | ||
4008 | || (re_opcode_t) p1[3] == charset_not) | ||
4009 | { | ||
4010 | int not = (re_opcode_t) p1[3] == charset_not; | ||
4011 | |||
4012 | if (c < (unsigned char) (p1[4] * BYTEWIDTH) | ||
4013 | && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) | ||
4014 | not = !not; | ||
4015 | |||
4016 | /* `not' is equal to 1 if c would match, which means | ||
4017 | that we can't change to pop_failure_jump. */ | ||
4018 | if (!not) | ||
4019 | { | ||
4020 | p[-3] = (unsigned char) pop_failure_jump; | ||
4021 | DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); | ||
4022 | } | ||
4023 | } | ||
4024 | } | ||
4025 | } | ||
4026 | p -= 2; /* Point at relative address again. */ | ||
4027 | if ((re_opcode_t) p[-1] != pop_failure_jump) | ||
4028 | { | ||
4029 | p[-1] = (unsigned char) jump; | ||
4030 | DEBUG_PRINT1 (" Match => jump.\n"); | ||
4031 | goto unconditional_jump; | ||
4032 | } | ||
4033 | /* Note fall through. */ | ||
4034 | |||
4035 | |||
4036 | /* The end of a simple repeat has a pop_failure_jump back to | ||
4037 | its matching on_failure_jump, where the latter will push a | ||
4038 | failure point. The pop_failure_jump takes off failure | ||
4039 | points put on by this pop_failure_jump's matching | ||
4040 | on_failure_jump; we got through the pattern to here from the | ||
4041 | matching on_failure_jump, so didn't fail. */ | ||
4042 | case pop_failure_jump: | ||
4043 | { | ||
4044 | /* We need to pass separate storage for the lowest and | ||
4045 | highest registers, even though we don't care about the | ||
4046 | actual values. Otherwise, we will restore only one | ||
4047 | register from the stack, since lowest will == highest in | ||
4048 | `pop_failure_point'. */ | ||
4049 | unsigned dummy_low_reg, dummy_high_reg; | ||
4050 | unsigned char *pdummy; | ||
4051 | const char *sdummy; | ||
4052 | |||
4053 | DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); | ||
4054 | POP_FAILURE_POINT (sdummy, pdummy, | ||
4055 | dummy_low_reg, dummy_high_reg, | ||
4056 | reg_dummy, reg_dummy, reg_info_dummy); | ||
4057 | } | ||
4058 | /* Note fall through. */ | ||
4059 | |||
4060 | |||
4061 | /* Unconditionally jump (without popping any failure points). */ | ||
4062 | case jump: | ||
4063 | unconditional_jump: | ||
4064 | EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ | ||
4065 | DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); | ||
4066 | p += mcnt; /* Do the jump. */ | ||
4067 | DEBUG_PRINT2 ("(to 0x%x).\n", p); | ||
4068 | break; | ||
4069 | |||
4070 | |||
4071 | /* We need this opcode so we can detect where alternatives end | ||
4072 | in `group_match_null_string_p' et al. */ | ||
4073 | case jump_past_alt: | ||
4074 | DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); | ||
4075 | goto unconditional_jump; | ||
4076 | |||
4077 | |||
4078 | /* Normally, the on_failure_jump pushes a failure point, which | ||
4079 | then gets popped at pop_failure_jump. We will end up at | ||
4080 | pop_failure_jump, also, and with a pattern of, say, `a+', we | ||
4081 | are skipping over the on_failure_jump, so we have to push | ||
4082 | something meaningless for pop_failure_jump to pop. */ | ||
4083 | case dummy_failure_jump: | ||
4084 | DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); | ||
4085 | /* It doesn't matter what we push for the string here. What | ||
4086 | the code at `fail' tests is the value for the pattern. */ | ||
4087 | PUSH_FAILURE_POINT (0, 0, -2); | ||
4088 | goto unconditional_jump; | ||
4089 | |||
4090 | |||
4091 | /* At the end of an alternative, we need to push a dummy failure | ||
4092 | point in case we are followed by a `pop_failure_jump', because | ||
4093 | we don't want the failure point for the alternative to be | ||
4094 | popped. For example, matching `(a|ab)*' against `aab' | ||
4095 | requires that we match the `ab' alternative. */ | ||
4096 | case push_dummy_failure: | ||
4097 | DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); | ||
4098 | /* See comments just above at `dummy_failure_jump' about the | ||
4099 | two zeroes. */ | ||
4100 | PUSH_FAILURE_POINT (0, 0, -2); | ||
4101 | break; | ||
4102 | |||
4103 | /* Have to succeed matching what follows at least n times. | ||
4104 | After that, handle like `on_failure_jump'. */ | ||
4105 | case succeed_n: | ||
4106 | EXTRACT_NUMBER (mcnt, p + 2); | ||
4107 | DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); | ||
4108 | |||
4109 | assert (mcnt >= 0); | ||
4110 | /* Originally, this is how many times we HAVE to succeed. */ | ||
4111 | if (mcnt > 0) | ||
4112 | { | ||
4113 | mcnt--; | ||
4114 | p += 2; | ||
4115 | STORE_NUMBER_AND_INCR (p, mcnt); | ||
4116 | DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt); | ||
4117 | } | ||
4118 | else if (mcnt == 0) | ||
4119 | { | ||
4120 | DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); | ||
4121 | p[2] = (unsigned char) no_op; | ||
4122 | p[3] = (unsigned char) no_op; | ||
4123 | goto on_failure; | ||
4124 | } | ||
4125 | break; | ||
4126 | |||
4127 | case jump_n: | ||
4128 | EXTRACT_NUMBER (mcnt, p + 2); | ||
4129 | DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); | ||
4130 | |||
4131 | /* Originally, this is how many times we CAN jump. */ | ||
4132 | if (mcnt) | ||
4133 | { | ||
4134 | mcnt--; | ||
4135 | STORE_NUMBER (p + 2, mcnt); | ||
4136 | goto unconditional_jump; | ||
4137 | } | ||
4138 | /* If don't have to jump any more, skip over the rest of command. */ | ||
4139 | else | ||
4140 | p += 4; | ||
4141 | break; | ||
4142 | |||
4143 | case set_number_at: | ||
4144 | { | ||
4145 | DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); | ||
4146 | |||
4147 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | ||
4148 | p1 = p + mcnt; | ||
4149 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | ||
4150 | DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); | ||
4151 | STORE_NUMBER (p1, mcnt); | ||
4152 | break; | ||
4153 | } | ||
4154 | |||
4155 | case wordbound: | ||
4156 | DEBUG_PRINT1 ("EXECUTING wordbound.\n"); | ||
4157 | if (AT_WORD_BOUNDARY (d)) | ||
4158 | break; | ||
4159 | goto fail; | ||
4160 | |||
4161 | case notwordbound: | ||
4162 | DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); | ||
4163 | if (AT_WORD_BOUNDARY (d)) | ||
4164 | goto fail; | ||
4165 | break; | ||
4166 | |||
4167 | case wordbeg: | ||
4168 | DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); | ||
4169 | if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) | ||
4170 | break; | ||
4171 | goto fail; | ||
4172 | |||
4173 | case wordend: | ||
4174 | DEBUG_PRINT1 ("EXECUTING wordend.\n"); | ||
4175 | if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) | ||
4176 | && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) | ||
4177 | break; | ||
4178 | goto fail; | ||
4179 | |||
4180 | #ifdef emacs | ||
4181 | #ifdef emacs19 | ||
4182 | case before_dot: | ||
4183 | DEBUG_PRINT1 ("EXECUTING before_dot.\n"); | ||
4184 | if (PTR_CHAR_POS ((unsigned char *) d) >= point) | ||
4185 | goto fail; | ||
4186 | break; | ||
4187 | |||
4188 | case at_dot: | ||
4189 | DEBUG_PRINT1 ("EXECUTING at_dot.\n"); | ||
4190 | if (PTR_CHAR_POS ((unsigned char *) d) != point) | ||
4191 | goto fail; | ||
4192 | break; | ||
4193 | |||
4194 | case after_dot: | ||
4195 | DEBUG_PRINT1 ("EXECUTING after_dot.\n"); | ||
4196 | if (PTR_CHAR_POS ((unsigned char *) d) <= point) | ||
4197 | goto fail; | ||
4198 | break; | ||
4199 | #else /* not emacs19 */ | ||
4200 | case at_dot: | ||
4201 | DEBUG_PRINT1 ("EXECUTING at_dot.\n"); | ||
4202 | if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point) | ||
4203 | goto fail; | ||
4204 | break; | ||
4205 | #endif /* not emacs19 */ | ||
4206 | |||
4207 | case syntaxspec: | ||
4208 | DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); | ||
4209 | mcnt = *p++; | ||
4210 | goto matchsyntax; | ||
4211 | |||
4212 | case wordchar: | ||
4213 | DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); | ||
4214 | mcnt = (int) Sword; | ||
4215 | matchsyntax: | ||
4216 | PREFETCH (); | ||
4217 | if (SYNTAX (*d++) != (enum syntaxcode) mcnt) | ||
4218 | goto fail; | ||
4219 | SET_REGS_MATCHED (); | ||
4220 | break; | ||
4221 | |||
4222 | case notsyntaxspec: | ||
4223 | DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); | ||
4224 | mcnt = *p++; | ||
4225 | goto matchnotsyntax; | ||
4226 | |||
4227 | case notwordchar: | ||
4228 | DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); | ||
4229 | mcnt = (int) Sword; | ||
4230 | matchnotsyntax: | ||
4231 | PREFETCH (); | ||
4232 | if (SYNTAX (*d++) == (enum syntaxcode) mcnt) | ||
4233 | goto fail; | ||
4234 | SET_REGS_MATCHED (); | ||
4235 | break; | ||
4236 | |||
4237 | #else /* not emacs */ | ||
4238 | case wordchar: | ||
4239 | DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); | ||
4240 | PREFETCH (); | ||
4241 | if (!WORDCHAR_P (d)) | ||
4242 | goto fail; | ||
4243 | SET_REGS_MATCHED (); | ||
4244 | d++; | ||
4245 | break; | ||
4246 | |||
4247 | case notwordchar: | ||
4248 | DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); | ||
4249 | PREFETCH (); | ||
4250 | if (WORDCHAR_P (d)) | ||
4251 | goto fail; | ||
4252 | SET_REGS_MATCHED (); | ||
4253 | d++; | ||
4254 | break; | ||
4255 | #endif /* not emacs */ | ||
4256 | |||
4257 | default: | ||
4258 | abort (); | ||
4259 | } | ||
4260 | continue; /* Successfully executed one pattern command; keep going. */ | ||
4261 | |||
4262 | |||
4263 | /* We goto here if a matching operation fails. */ | ||
4264 | fail: | ||
4265 | if (!FAIL_STACK_EMPTY ()) | ||
4266 | { /* A restart point is known. Restore to that state. */ | ||
4267 | DEBUG_PRINT1 ("\nFAIL:\n"); | ||
4268 | POP_FAILURE_POINT (d, p, | ||
4269 | lowest_active_reg, highest_active_reg, | ||
4270 | regstart, regend, reg_info); | ||
4271 | |||
4272 | /* If this failure point is a dummy, try the next one. */ | ||
4273 | if (!p) | ||
4274 | goto fail; | ||
4275 | |||
4276 | /* If we failed to the end of the pattern, don't examine *p. */ | ||
4277 | assert (p <= pend); | ||
4278 | if (p < pend) | ||
4279 | { | ||
4280 | boolean is_a_jump_n = false; | ||
4281 | |||
4282 | /* If failed to a backwards jump that's part of a repetition | ||
4283 | loop, need to pop this failure point and use the next one. */ | ||
4284 | switch ((re_opcode_t) *p) | ||
4285 | { | ||
4286 | case jump_n: | ||
4287 | is_a_jump_n = true; | ||
4288 | case maybe_pop_jump: | ||
4289 | case pop_failure_jump: | ||
4290 | case jump: | ||
4291 | p1 = p + 1; | ||
4292 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
4293 | p1 += mcnt; | ||
4294 | |||
4295 | if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) | ||
4296 | || (!is_a_jump_n | ||
4297 | && (re_opcode_t) *p1 == on_failure_jump)) | ||
4298 | goto fail; | ||
4299 | break; | ||
4300 | default: | ||
4301 | /* do nothing */ ; | ||
4302 | } | ||
4303 | } | ||
4304 | |||
4305 | if (d >= string1 && d <= end1) | ||
4306 | dend = end_match_1; | ||
4307 | } | ||
4308 | else | ||
4309 | break; /* Matching at this starting point really fails. */ | ||
4310 | } /* for (;;) */ | ||
4311 | |||
4312 | if (best_regs_set) | ||
4313 | goto restore_best_regs; | ||
4314 | |||
4315 | FREE_VARIABLES (); | ||
4316 | |||
4317 | return -1; /* Failure to match. */ | ||
4318 | } /* re_match_2 */ | ||
4319 | |||
4320 | /* Subroutine definitions for re_match_2. */ | ||
4321 | |||
4322 | |||
4323 | /* We are passed P pointing to a register number after a start_memory. | ||
4324 | |||
4325 | Return true if the pattern up to the corresponding stop_memory can | ||
4326 | match the empty string, and false otherwise. | ||
4327 | |||
4328 | If we find the matching stop_memory, sets P to point to one past its number. | ||
4329 | Otherwise, sets P to an undefined byte less than or equal to END. | ||
4330 | |||
4331 | We don't handle duplicates properly (yet). */ | ||
4332 | |||
4333 | static boolean | ||
4334 | group_match_null_string_p (p, end, reg_info) | ||
4335 | unsigned char **p, *end; | ||
4336 | register_info_type *reg_info; | ||
4337 | { | ||
4338 | int mcnt; | ||
4339 | /* Point to after the args to the start_memory. */ | ||
4340 | unsigned char *p1 = *p + 2; | ||
4341 | |||
4342 | while (p1 < end) | ||
4343 | { | ||
4344 | /* Skip over opcodes that can match nothing, and return true or | ||
4345 | false, as appropriate, when we get to one that can't, or to the | ||
4346 | matching stop_memory. */ | ||
4347 | |||
4348 | switch ((re_opcode_t) *p1) | ||
4349 | { | ||
4350 | /* Could be either a loop or a series of alternatives. */ | ||
4351 | case on_failure_jump: | ||
4352 | p1++; | ||
4353 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
4354 | |||
4355 | /* If the next operation is not a jump backwards in the | ||
4356 | pattern. */ | ||
4357 | |||
4358 | if (mcnt >= 0) | ||
4359 | { | ||
4360 | /* Go through the on_failure_jumps of the alternatives, | ||
4361 | seeing if any of the alternatives cannot match nothing. | ||
4362 | The last alternative starts with only a jump, | ||
4363 | whereas the rest start with on_failure_jump and end | ||
4364 | with a jump, e.g., here is the pattern for `a|b|c': | ||
4365 | |||
4366 | /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 | ||
4367 | /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 | ||
4368 | /exactn/1/c | ||
4369 | |||
4370 | So, we have to first go through the first (n-1) | ||
4371 | alternatives and then deal with the last one separately. */ | ||
4372 | |||
4373 | |||
4374 | /* Deal with the first (n-1) alternatives, which start | ||
4375 | with an on_failure_jump (see above) that jumps to right | ||
4376 | past a jump_past_alt. */ | ||
4377 | |||
4378 | while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) | ||
4379 | { | ||
4380 | /* `mcnt' holds how many bytes long the alternative | ||
4381 | is, including the ending `jump_past_alt' and | ||
4382 | its number. */ | ||
4383 | |||
4384 | if (!alt_match_null_string_p (p1, p1 + mcnt - 3, | ||
4385 | reg_info)) | ||
4386 | return false; | ||
4387 | |||
4388 | /* Move to right after this alternative, including the | ||
4389 | jump_past_alt. */ | ||
4390 | p1 += mcnt; | ||
4391 | |||
4392 | /* Break if it's the beginning of an n-th alternative | ||
4393 | that doesn't begin with an on_failure_jump. */ | ||
4394 | if ((re_opcode_t) *p1 != on_failure_jump) | ||
4395 | break; | ||
4396 | |||
4397 | /* Still have to check that it's not an n-th | ||
4398 | alternative that starts with an on_failure_jump. */ | ||
4399 | p1++; | ||
4400 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
4401 | if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) | ||
4402 | { | ||
4403 | /* Get to the beginning of the n-th alternative. */ | ||
4404 | p1 -= 3; | ||
4405 | break; | ||
4406 | } | ||
4407 | } | ||
4408 | |||
4409 | /* Deal with the last alternative: go back and get number | ||
4410 | of the `jump_past_alt' just before it. `mcnt' contains | ||
4411 | the length of the alternative. */ | ||
4412 | EXTRACT_NUMBER (mcnt, p1 - 2); | ||
4413 | |||
4414 | if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) | ||
4415 | return false; | ||
4416 | |||
4417 | p1 += mcnt; /* Get past the n-th alternative. */ | ||
4418 | } /* if mcnt > 0 */ | ||
4419 | break; | ||
4420 | |||
4421 | |||
4422 | case stop_memory: | ||
4423 | assert (p1[1] == **p); | ||
4424 | *p = p1 + 2; | ||
4425 | return true; | ||
4426 | |||
4427 | |||
4428 | default: | ||
4429 | if (!common_op_match_null_string_p (&p1, end, reg_info)) | ||
4430 | return false; | ||
4431 | } | ||
4432 | } /* while p1 < end */ | ||
4433 | |||
4434 | return false; | ||
4435 | } /* group_match_null_string_p */ | ||
4436 | |||
4437 | |||
4438 | /* Similar to group_match_null_string_p, but doesn't deal with alternatives: | ||
4439 | It expects P to be the first byte of a single alternative and END one | ||
4440 | byte past the last. The alternative can contain groups. */ | ||
4441 | |||
4442 | static boolean | ||
4443 | alt_match_null_string_p (p, end, reg_info) | ||
4444 | unsigned char *p, *end; | ||
4445 | register_info_type *reg_info; | ||
4446 | { | ||
4447 | int mcnt; | ||
4448 | unsigned char *p1 = p; | ||
4449 | |||
4450 | while (p1 < end) | ||
4451 | { | ||
4452 | /* Skip over opcodes that can match nothing, and break when we get | ||
4453 | to one that can't. */ | ||
4454 | |||
4455 | switch ((re_opcode_t) *p1) | ||
4456 | { | ||
4457 | /* It's a loop. */ | ||
4458 | case on_failure_jump: | ||
4459 | p1++; | ||
4460 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
4461 | p1 += mcnt; | ||
4462 | break; | ||
4463 | |||
4464 | default: | ||
4465 | if (!common_op_match_null_string_p (&p1, end, reg_info)) | ||
4466 | return false; | ||
4467 | } | ||
4468 | } /* while p1 < end */ | ||
4469 | |||
4470 | return true; | ||
4471 | } /* alt_match_null_string_p */ | ||
4472 | |||
4473 | |||
4474 | /* Deals with the ops common to group_match_null_string_p and | ||
4475 | alt_match_null_string_p. | ||
4476 | |||
4477 | Sets P to one after the op and its arguments, if any. */ | ||
4478 | |||
4479 | static boolean | ||
4480 | common_op_match_null_string_p (p, end, reg_info) | ||
4481 | unsigned char **p, *end; | ||
4482 | register_info_type *reg_info; | ||
4483 | { | ||
4484 | int mcnt; | ||
4485 | boolean ret; | ||
4486 | int reg_no; | ||
4487 | unsigned char *p1 = *p; | ||
4488 | |||
4489 | switch ((re_opcode_t) *p1++) | ||
4490 | { | ||
4491 | case no_op: | ||
4492 | case begline: | ||
4493 | case endline: | ||
4494 | case begbuf: | ||
4495 | case endbuf: | ||
4496 | case wordbeg: | ||
4497 | case wordend: | ||
4498 | case wordbound: | ||
4499 | case notwordbound: | ||
4500 | #ifdef emacs | ||
4501 | case before_dot: | ||
4502 | case at_dot: | ||
4503 | case after_dot: | ||
4504 | #endif | ||
4505 | break; | ||
4506 | |||
4507 | case start_memory: | ||
4508 | reg_no = *p1; | ||
4509 | assert (reg_no > 0 && reg_no <= MAX_REGNUM); | ||
4510 | ret = group_match_null_string_p (&p1, end, reg_info); | ||
4511 | |||
4512 | /* Have to set this here in case we're checking a group which | ||
4513 | contains a group and a back reference to it. */ | ||
4514 | |||
4515 | if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) | ||
4516 | REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; | ||
4517 | |||
4518 | if (!ret) | ||
4519 | return false; | ||
4520 | break; | ||
4521 | |||
4522 | /* If this is an optimized succeed_n for zero times, make the jump. */ | ||
4523 | case jump: | ||
4524 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
4525 | if (mcnt >= 0) | ||
4526 | p1 += mcnt; | ||
4527 | else | ||
4528 | return false; | ||
4529 | break; | ||
4530 | |||
4531 | case succeed_n: | ||
4532 | /* Get to the number of times to succeed. */ | ||
4533 | p1 += 2; | ||
4534 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
4535 | |||
4536 | if (mcnt == 0) | ||
4537 | { | ||
4538 | p1 -= 4; | ||
4539 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
4540 | p1 += mcnt; | ||
4541 | } | ||
4542 | else | ||
4543 | return false; | ||
4544 | break; | ||
4545 | |||
4546 | case duplicate: | ||
4547 | if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) | ||
4548 | return false; | ||
4549 | break; | ||
4550 | |||
4551 | case set_number_at: | ||
4552 | p1 += 4; | ||
4553 | |||
4554 | default: | ||
4555 | /* All other opcodes mean we cannot match the empty string. */ | ||
4556 | return false; | ||
4557 | } | ||
4558 | |||
4559 | *p = p1; | ||
4560 | return true; | ||
4561 | } /* common_op_match_null_string_p */ | ||
4562 | |||
4563 | |||
4564 | /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN | ||
4565 | bytes; nonzero otherwise. */ | ||
4566 | |||
4567 | static int | ||
4568 | bcmp_translate( | ||
4569 | unsigned char *s1, | ||
4570 | unsigned char *s2, | ||
4571 | int len, | ||
4572 | char *translate | ||
4573 | ) | ||
4574 | { | ||
4575 | register unsigned char *p1 = s1, *p2 = s2; | ||
4576 | while (len) | ||
4577 | { | ||
4578 | if (translate[*p1++] != translate[*p2++]) return 1; | ||
4579 | len--; | ||
4580 | } | ||
4581 | return 0; | ||
4582 | } | ||
4583 | |||
4584 | /* Entry points for GNU code. */ | ||
4585 | |||
4586 | /* re_compile_pattern is the GNU regular expression compiler: it | ||
4587 | compiles PATTERN (of length SIZE) and puts the result in BUFP. | ||
4588 | Returns 0 if the pattern was valid, otherwise an error string. | ||
4589 | |||
4590 | Assumes the `allocated' (and perhaps `buffer') and `translate' fields | ||
4591 | are set in BUFP on entry. | ||
4592 | |||
4593 | We call regex_compile to do the actual compilation. */ | ||
4594 | |||
4595 | const char * | ||
4596 | re_compile_pattern (pattern, length, bufp) | ||
4597 | const char *pattern; | ||
4598 | int length; | ||
4599 | struct re_pattern_buffer *bufp; | ||
4600 | { | ||
4601 | reg_errcode_t ret; | ||
4602 | |||
4603 | /* GNU code is written to assume at least RE_NREGS registers will be set | ||
4604 | (and at least one extra will be -1). */ | ||
4605 | bufp->regs_allocated = REGS_UNALLOCATED; | ||
4606 | |||
4607 | /* And GNU code determines whether or not to get register information | ||
4608 | by passing null for the REGS argument to re_match, etc., not by | ||
4609 | setting no_sub. */ | ||
4610 | bufp->no_sub = 0; | ||
4611 | |||
4612 | /* Match anchors at newline. */ | ||
4613 | bufp->newline_anchor = 1; | ||
4614 | |||
4615 | ret = regex_compile (pattern, length, re_syntax_options, bufp); | ||
4616 | |||
4617 | return re_error_msg[(int) ret]; | ||
4618 | } | ||
4619 | |||
4620 | /* Entry points compatible with 4.2 BSD regex library. We don't define | ||
4621 | them if this is an Emacs or POSIX compilation. */ | ||
4622 | |||
4623 | #if !defined (emacs) && !defined (_POSIX_SOURCE) | ||
4624 | |||
4625 | /* BSD has one and only one pattern buffer. */ | ||
4626 | static struct re_pattern_buffer re_comp_buf; | ||
4627 | |||
4628 | char * | ||
4629 | re_comp (s) | ||
4630 | const char *s; | ||
4631 | { | ||
4632 | reg_errcode_t ret; | ||
4633 | |||
4634 | if (!s) | ||
4635 | { | ||
4636 | if (!re_comp_buf.buffer) | ||
4637 | return "No previous regular expression"; | ||
4638 | return 0; | ||
4639 | } | ||
4640 | |||
4641 | if (!re_comp_buf.buffer) | ||
4642 | { | ||
4643 | re_comp_buf.buffer = (unsigned char *) malloc (200); | ||
4644 | if (re_comp_buf.buffer == NULL) | ||
4645 | return "Memory exhausted"; | ||
4646 | re_comp_buf.allocated = 200; | ||
4647 | |||
4648 | re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); | ||
4649 | if (re_comp_buf.fastmap == NULL) | ||
4650 | return "Memory exhausted"; | ||
4651 | } | ||
4652 | |||
4653 | /* Since `re_exec' always passes NULL for the `regs' argument, we | ||
4654 | don't need to initialize the pattern buffer fields which affect it. */ | ||
4655 | |||
4656 | /* Match anchors at newlines. */ | ||
4657 | re_comp_buf.newline_anchor = 1; | ||
4658 | |||
4659 | ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); | ||
4660 | |||
4661 | /* Yes, we're discarding `const' here. */ | ||
4662 | return (char *) re_error_msg[(int) ret]; | ||
4663 | } | ||
4664 | |||
4665 | |||
4666 | int | ||
4667 | re_exec (s) | ||
4668 | const char *s; | ||
4669 | { | ||
4670 | const int len = strlen (s); | ||
4671 | return | ||
4672 | 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); | ||
4673 | } | ||
4674 | #endif /* not emacs and not _POSIX_SOURCE */ | ||
4675 | |||
4676 | /* POSIX.2 functions. Don't define these for Emacs. */ | ||
4677 | |||
4678 | #ifndef emacs | ||
4679 | |||
4680 | /* regcomp takes a regular expression as a string and compiles it. | ||
4681 | |||
4682 | PREG is a regex_t *. We do not expect any fields to be initialized, | ||
4683 | since POSIX says we shouldn't. Thus, we set | ||
4684 | |||
4685 | `buffer' to the compiled pattern; | ||
4686 | `used' to the length of the compiled pattern; | ||
4687 | `syntax' to RE_SYNTAX_POSIX_EXTENDED if the | ||
4688 | REG_EXTENDED bit in CFLAGS is set; otherwise, to | ||
4689 | RE_SYNTAX_POSIX_BASIC; | ||
4690 | `newline_anchor' to REG_NEWLINE being set in CFLAGS; | ||
4691 | `fastmap' and `fastmap_accurate' to zero; | ||
4692 | `re_nsub' to the number of subexpressions in PATTERN. | ||
4693 | |||
4694 | PATTERN is the address of the pattern string. | ||
4695 | |||
4696 | CFLAGS is a series of bits which affect compilation. | ||
4697 | |||
4698 | If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we | ||
4699 | use POSIX basic syntax. | ||
4700 | |||
4701 | If REG_NEWLINE is set, then . and [^...] don't match newline. | ||
4702 | Also, regexec will try a match beginning after every newline. | ||
4703 | |||
4704 | If REG_ICASE is set, then we considers upper- and lowercase | ||
4705 | versions of letters to be equivalent when matching. | ||
4706 | |||
4707 | If REG_NOSUB is set, then when PREG is passed to regexec, that | ||
4708 | routine will report only success or failure, and nothing about the | ||
4709 | registers. | ||
4710 | |||
4711 | It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for | ||
4712 | the return codes and their meanings.) */ | ||
4713 | |||
4714 | int | ||
4715 | regcomp (preg, pattern, cflags) | ||
4716 | regex_t *preg; | ||
4717 | const char *pattern; | ||
4718 | int cflags; | ||
4719 | { | ||
4720 | reg_errcode_t ret; | ||
4721 | unsigned syntax | ||
4722 | = (cflags & REG_EXTENDED) ? | ||
4723 | RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; | ||
4724 | |||
4725 | /* regex_compile will allocate the space for the compiled pattern. */ | ||
4726 | preg->buffer = 0; | ||
4727 | preg->allocated = 0; | ||
4728 | |||
4729 | /* Don't bother to use a fastmap when searching. This simplifies the | ||
4730 | REG_NEWLINE case: if we used a fastmap, we'd have to put all the | ||
4731 | characters after newlines into the fastmap. This way, we just try | ||
4732 | every character. */ | ||
4733 | preg->fastmap = 0; | ||
4734 | |||
4735 | if (cflags & REG_ICASE) | ||
4736 | { | ||
4737 | unsigned i; | ||
4738 | |||
4739 | preg->translate = (char *) malloc (CHAR_SET_SIZE); | ||
4740 | if (preg->translate == NULL) | ||
4741 | return (int) REG_ESPACE; | ||
4742 | |||
4743 | /* Map uppercase characters to corresponding lowercase ones. */ | ||
4744 | for (i = 0; i < CHAR_SET_SIZE; i++) | ||
4745 | preg->translate[i] = ISUPPER (i) ? tolower (i) : i; | ||
4746 | } | ||
4747 | else | ||
4748 | preg->translate = NULL; | ||
4749 | |||
4750 | /* If REG_NEWLINE is set, newlines are treated differently. */ | ||
4751 | if (cflags & REG_NEWLINE) | ||
4752 | { /* REG_NEWLINE implies neither . nor [^...] match newline. */ | ||
4753 | syntax &= ~RE_DOT_NEWLINE; | ||
4754 | syntax |= RE_HAT_LISTS_NOT_NEWLINE; | ||
4755 | /* It also changes the matching behavior. */ | ||
4756 | preg->newline_anchor = 1; | ||
4757 | } | ||
4758 | else | ||
4759 | preg->newline_anchor = 0; | ||
4760 | |||
4761 | preg->no_sub = !!(cflags & REG_NOSUB); | ||
4762 | |||
4763 | /* POSIX says a null character in the pattern terminates it, so we | ||
4764 | can use strlen here in compiling the pattern. */ | ||
4765 | ret = regex_compile (pattern, strlen (pattern), syntax, preg); | ||
4766 | |||
4767 | /* POSIX doesn't distinguish between an unmatched open-group and an | ||
4768 | unmatched close-group: both are REG_EPAREN. */ | ||
4769 | if (ret == REG_ERPAREN) ret = REG_EPAREN; | ||
4770 | |||
4771 | return (int) ret; | ||
4772 | } | ||
4773 | |||
4774 | |||
4775 | /* regexec searches for a given pattern, specified by PREG, in the | ||
4776 | string STRING. | ||
4777 | |||
4778 | If NMATCH is zero or REG_NOSUB was set in the cflags argument to | ||
4779 | `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at | ||
4780 | least NMATCH elements, and we set them to the offsets of the | ||
4781 | corresponding matched substrings. | ||
4782 | |||
4783 | EFLAGS specifies `execution flags' which affect matching: if | ||
4784 | REG_NOTBOL is set, then ^ does not match at the beginning of the | ||
4785 | string; if REG_NOTEOL is set, then $ does not match at the end. | ||
4786 | |||
4787 | We return 0 if we find a match and REG_NOMATCH if not. */ | ||
4788 | |||
4789 | int | ||
4790 | regexec (preg, string, nmatch, pmatch, eflags) | ||
4791 | const regex_t *preg; | ||
4792 | const char *string; | ||
4793 | size_t nmatch; | ||
4794 | regmatch_t pmatch[]; | ||
4795 | int eflags; | ||
4796 | { | ||
4797 | int ret; | ||
4798 | struct re_registers regs; | ||
4799 | regex_t private_preg; | ||
4800 | int len = strlen (string); | ||
4801 | boolean want_reg_info = !preg->no_sub && nmatch > 0; | ||
4802 | |||
4803 | private_preg = *preg; | ||
4804 | |||
4805 | private_preg.not_bol = !!(eflags & REG_NOTBOL); | ||
4806 | private_preg.not_eol = !!(eflags & REG_NOTEOL); | ||
4807 | |||
4808 | /* The user has told us exactly how many registers to return | ||
4809 | information about, via `nmatch'. We have to pass that on to the | ||
4810 | matching routines. */ | ||
4811 | private_preg.regs_allocated = REGS_FIXED; | ||
4812 | |||
4813 | if (want_reg_info) | ||
4814 | { | ||
4815 | regs.num_regs = nmatch; | ||
4816 | regs.start = TALLOC (nmatch, regoff_t); | ||
4817 | regs.end = TALLOC (nmatch, regoff_t); | ||
4818 | if (regs.start == NULL || regs.end == NULL) | ||
4819 | return (int) REG_NOMATCH; | ||
4820 | } | ||
4821 | |||
4822 | /* Perform the searching operation. */ | ||
4823 | ret = re_search (&private_preg, string, len, | ||
4824 | /* start: */ 0, /* range: */ len, | ||
4825 | want_reg_info ? ®s : (struct re_registers *) 0); | ||
4826 | |||
4827 | /* Copy the register information to the POSIX structure. */ | ||
4828 | if (want_reg_info) | ||
4829 | { | ||
4830 | if (ret >= 0) | ||
4831 | { | ||
4832 | unsigned r; | ||
4833 | |||
4834 | for (r = 0; r < nmatch; r++) | ||
4835 | { | ||
4836 | pmatch[r].rm_so = regs.start[r]; | ||
4837 | pmatch[r].rm_eo = regs.end[r]; | ||
4838 | } | ||
4839 | } | ||
4840 | |||
4841 | /* If we needed the temporary register info, free the space now. */ | ||
4842 | free (regs.start); | ||
4843 | free (regs.end); | ||
4844 | } | ||
4845 | |||
4846 | /* We want zero return to mean success, unlike `re_search'. */ | ||
4847 | return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; | ||
4848 | } | ||
4849 | |||
4850 | |||
4851 | /* Returns a message corresponding to an error code, ERRCODE, returned | ||
4852 | from either regcomp or regexec. We don't use PREG here. */ | ||
4853 | |||
4854 | size_t | ||
4855 | regerror (errcode, preg, errbuf, errbuf_size) | ||
4856 | int errcode; | ||
4857 | const regex_t *preg; | ||
4858 | char *errbuf; | ||
4859 | size_t errbuf_size; | ||
4860 | { | ||
4861 | const char *msg; | ||
4862 | size_t msg_size; | ||
4863 | |||
4864 | if (errcode < 0 | ||
4865 | || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0]))) | ||
4866 | /* Only error codes returned by the rest of the code should be passed | ||
4867 | to this routine. If we are given anything else, or if other regex | ||
4868 | code generates an invalid error code, then the program has a bug. | ||
4869 | Dump core so we can fix it. */ | ||
4870 | abort (); | ||
4871 | |||
4872 | msg = re_error_msg[errcode]; | ||
4873 | |||
4874 | /* POSIX doesn't require that we do anything in this case, but why | ||
4875 | not be nice. */ | ||
4876 | if (! msg) | ||
4877 | msg = "Success"; | ||
4878 | |||
4879 | msg_size = strlen (msg) + 1; /* Includes the null. */ | ||
4880 | |||
4881 | if (errbuf_size != 0) | ||
4882 | { | ||
4883 | if (msg_size > errbuf_size) | ||
4884 | { | ||
4885 | strncpy (errbuf, msg, errbuf_size - 1); | ||
4886 | errbuf[errbuf_size - 1] = 0; | ||
4887 | } | ||
4888 | else | ||
4889 | strcpy (errbuf, msg); | ||
4890 | } | ||
4891 | |||
4892 | return msg_size; | ||
4893 | } | ||
4894 | |||
4895 | |||
4896 | /* Free dynamically allocated space used by PREG. */ | ||
4897 | |||
4898 | void | ||
4899 | regfree (preg) | ||
4900 | regex_t *preg; | ||
4901 | { | ||
4902 | if (preg->buffer != NULL) | ||
4903 | free (preg->buffer); | ||
4904 | preg->buffer = NULL; | ||
4905 | |||
4906 | preg->allocated = 0; | ||
4907 | preg->used = 0; | ||
4908 | |||
4909 | if (preg->fastmap != NULL) | ||
4910 | free (preg->fastmap); | ||
4911 | preg->fastmap = NULL; | ||
4912 | preg->fastmap_accurate = 0; | ||
4913 | |||
4914 | if (preg->translate != NULL) | ||
4915 | free (preg->translate); | ||
4916 | preg->translate = NULL; | ||
4917 | } | ||
4918 | |||
4919 | #endif /* not emacs */ | ||
4920 | |||
4921 | /* | ||
4922 | Local variables: | ||
4923 | make-backup-files: t | ||
4924 | version-control: t | ||
4925 | trim-versions-without-asking: nil | ||
4926 | End: | ||
4927 | */ | ||
diff --git a/libbb/run-command.c b/libbb/run-command.c new file mode 100644 index 000000000..b05c734d0 --- /dev/null +++ b/libbb/run-command.c | |||
@@ -0,0 +1,399 @@ | |||
1 | #include "cache.h" | ||
2 | #include "run-command.h" | ||
3 | #include "exec_cmd.h" | ||
4 | |||
5 | static inline void close_pair(int fd[2]) | ||
6 | { | ||
7 | close(fd[0]); | ||
8 | close(fd[1]); | ||
9 | } | ||
10 | |||
11 | static inline void dup_devnull(int to) | ||
12 | { | ||
13 | int fd = open("/dev/null", O_RDWR); | ||
14 | dup2(fd, to); | ||
15 | close(fd); | ||
16 | } | ||
17 | |||
18 | int start_command(struct child_process *cmd) | ||
19 | { | ||
20 | int need_in, need_out, need_err; | ||
21 | int fdin[2], fdout[2], fderr[2]; | ||
22 | |||
23 | /* | ||
24 | * In case of errors we must keep the promise to close FDs | ||
25 | * that have been passed in via ->in and ->out. | ||
26 | */ | ||
27 | |||
28 | need_in = !cmd->no_stdin && cmd->in < 0; | ||
29 | if (need_in) { | ||
30 | if (pipe(fdin) < 0) { | ||
31 | if (cmd->out > 0) | ||
32 | close(cmd->out); | ||
33 | return -ERR_RUN_COMMAND_PIPE; | ||
34 | } | ||
35 | cmd->in = fdin[1]; | ||
36 | } | ||
37 | |||
38 | need_out = !cmd->no_stdout | ||
39 | && !cmd->stdout_to_stderr | ||
40 | && cmd->out < 0; | ||
41 | if (need_out) { | ||
42 | if (pipe(fdout) < 0) { | ||
43 | if (need_in) | ||
44 | close_pair(fdin); | ||
45 | else if (cmd->in) | ||
46 | close(cmd->in); | ||
47 | return -ERR_RUN_COMMAND_PIPE; | ||
48 | } | ||
49 | cmd->out = fdout[0]; | ||
50 | } | ||
51 | |||
52 | need_err = !cmd->no_stderr && cmd->err < 0; | ||
53 | if (need_err) { | ||
54 | if (pipe(fderr) < 0) { | ||
55 | if (need_in) | ||
56 | close_pair(fdin); | ||
57 | else if (cmd->in) | ||
58 | close(cmd->in); | ||
59 | if (need_out) | ||
60 | close_pair(fdout); | ||
61 | else if (cmd->out) | ||
62 | close(cmd->out); | ||
63 | return -ERR_RUN_COMMAND_PIPE; | ||
64 | } | ||
65 | cmd->err = fderr[0]; | ||
66 | } | ||
67 | |||
68 | trace_argv_printf(cmd->argv, "trace: run_command:"); | ||
69 | |||
70 | #ifndef __MINGW32__ | ||
71 | fflush(NULL); | ||
72 | cmd->pid = fork(); | ||
73 | if (!cmd->pid) { | ||
74 | if (cmd->no_stdin) | ||
75 | dup_devnull(0); | ||
76 | else if (need_in) { | ||
77 | dup2(fdin[0], 0); | ||
78 | close_pair(fdin); | ||
79 | } else if (cmd->in) { | ||
80 | dup2(cmd->in, 0); | ||
81 | close(cmd->in); | ||
82 | } | ||
83 | |||
84 | if (cmd->no_stderr) | ||
85 | dup_devnull(2); | ||
86 | else if (need_err) { | ||
87 | dup2(fderr[1], 2); | ||
88 | close_pair(fderr); | ||
89 | } | ||
90 | |||
91 | if (cmd->no_stdout) | ||
92 | dup_devnull(1); | ||
93 | else if (cmd->stdout_to_stderr) | ||
94 | dup2(2, 1); | ||
95 | else if (need_out) { | ||
96 | dup2(fdout[1], 1); | ||
97 | close_pair(fdout); | ||
98 | } else if (cmd->out > 1) { | ||
99 | dup2(cmd->out, 1); | ||
100 | close(cmd->out); | ||
101 | } | ||
102 | |||
103 | if (cmd->dir && chdir(cmd->dir)) | ||
104 | die("exec %s: cd to %s failed (%s)", cmd->argv[0], | ||
105 | cmd->dir, strerror(errno)); | ||
106 | if (cmd->env) { | ||
107 | for (; *cmd->env; cmd->env++) { | ||
108 | if (strchr(*cmd->env, '=')) | ||
109 | putenv((char*)*cmd->env); | ||
110 | else | ||
111 | unsetenv(*cmd->env); | ||
112 | } | ||
113 | } | ||
114 | if (cmd->preexec_cb) | ||
115 | cmd->preexec_cb(); | ||
116 | if (cmd->git_cmd) { | ||
117 | execv_git_cmd(cmd->argv); | ||
118 | } else { | ||
119 | execvp(cmd->argv[0], (char *const*) cmd->argv); | ||
120 | } | ||
121 | trace_printf("trace: exec '%s' failed: %s\n", cmd->argv[0], | ||
122 | strerror(errno)); | ||
123 | exit(127); | ||
124 | } | ||
125 | #else | ||
126 | int s0 = -1, s1 = -1, s2 = -1; /* backups of stdin, stdout, stderr */ | ||
127 | const char **sargv = cmd->argv; | ||
128 | char **env = environ; | ||
129 | |||
130 | if (cmd->no_stdin) { | ||
131 | s0 = dup(0); | ||
132 | dup_devnull(0); | ||
133 | } else if (need_in) { | ||
134 | s0 = dup(0); | ||
135 | dup2(fdin[0], 0); | ||
136 | } else if (cmd->in) { | ||
137 | s0 = dup(0); | ||
138 | dup2(cmd->in, 0); | ||
139 | } | ||
140 | |||
141 | if (cmd->no_stderr) { | ||
142 | s2 = dup(2); | ||
143 | dup_devnull(2); | ||
144 | } else if (need_err) { | ||
145 | s2 = dup(2); | ||
146 | dup2(fderr[1], 2); | ||
147 | } | ||
148 | |||
149 | if (cmd->no_stdout) { | ||
150 | s1 = dup(1); | ||
151 | dup_devnull(1); | ||
152 | } else if (cmd->stdout_to_stderr) { | ||
153 | s1 = dup(1); | ||
154 | dup2(2, 1); | ||
155 | } else if (need_out) { | ||
156 | s1 = dup(1); | ||
157 | dup2(fdout[1], 1); | ||
158 | } else if (cmd->out > 1) { | ||
159 | s1 = dup(1); | ||
160 | dup2(cmd->out, 1); | ||
161 | } | ||
162 | |||
163 | if (cmd->dir) | ||
164 | die("chdir in start_command() not implemented"); | ||
165 | if (cmd->env) { | ||
166 | env = copy_environ(); | ||
167 | for (; *cmd->env; cmd->env++) | ||
168 | env = env_setenv(env, *cmd->env); | ||
169 | } | ||
170 | |||
171 | if (cmd->git_cmd) { | ||
172 | cmd->argv = prepare_git_cmd(cmd->argv); | ||
173 | } | ||
174 | |||
175 | cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env); | ||
176 | |||
177 | if (cmd->env) | ||
178 | free_environ(env); | ||
179 | if (cmd->git_cmd) | ||
180 | free(cmd->argv); | ||
181 | |||
182 | cmd->argv = sargv; | ||
183 | if (s0 >= 0) | ||
184 | dup2(s0, 0), close(s0); | ||
185 | if (s1 >= 0) | ||
186 | dup2(s1, 1), close(s1); | ||
187 | if (s2 >= 0) | ||
188 | dup2(s2, 2), close(s2); | ||
189 | #endif | ||
190 | |||
191 | if (cmd->pid < 0) { | ||
192 | int err = errno; | ||
193 | if (need_in) | ||
194 | close_pair(fdin); | ||
195 | else if (cmd->in) | ||
196 | close(cmd->in); | ||
197 | if (need_out) | ||
198 | close_pair(fdout); | ||
199 | else if (cmd->out) | ||
200 | close(cmd->out); | ||
201 | if (need_err) | ||
202 | close_pair(fderr); | ||
203 | return err == ENOENT ? | ||
204 | -ERR_RUN_COMMAND_EXEC : | ||
205 | -ERR_RUN_COMMAND_FORK; | ||
206 | } | ||
207 | |||
208 | if (need_in) | ||
209 | close(fdin[0]); | ||
210 | else if (cmd->in) | ||
211 | close(cmd->in); | ||
212 | |||
213 | if (need_out) | ||
214 | close(fdout[1]); | ||
215 | else if (cmd->out) | ||
216 | close(cmd->out); | ||
217 | |||
218 | if (need_err) | ||
219 | close(fderr[1]); | ||
220 | |||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | static int wait_or_whine(pid_t pid) | ||
225 | { | ||
226 | for (;;) { | ||
227 | int status, code; | ||
228 | pid_t waiting = waitpid(pid, &status, 0); | ||
229 | |||
230 | if (waiting < 0) { | ||
231 | if (errno == EINTR) | ||
232 | continue; | ||
233 | error("waitpid failed (%s)", strerror(errno)); | ||
234 | return -ERR_RUN_COMMAND_WAITPID; | ||
235 | } | ||
236 | if (waiting != pid) | ||
237 | return -ERR_RUN_COMMAND_WAITPID_WRONG_PID; | ||
238 | if (WIFSIGNALED(status)) | ||
239 | return -ERR_RUN_COMMAND_WAITPID_SIGNAL; | ||
240 | |||
241 | if (!WIFEXITED(status)) | ||
242 | return -ERR_RUN_COMMAND_WAITPID_NOEXIT; | ||
243 | code = WEXITSTATUS(status); | ||
244 | switch (code) { | ||
245 | case 127: | ||
246 | return -ERR_RUN_COMMAND_EXEC; | ||
247 | case 0: | ||
248 | return 0; | ||
249 | default: | ||
250 | return -code; | ||
251 | } | ||
252 | } | ||
253 | } | ||
254 | |||
255 | int finish_command(struct child_process *cmd) | ||
256 | { | ||
257 | return wait_or_whine(cmd->pid); | ||
258 | } | ||
259 | |||
260 | int run_command(struct child_process *cmd) | ||
261 | { | ||
262 | int code = start_command(cmd); | ||
263 | if (code) | ||
264 | return code; | ||
265 | return finish_command(cmd); | ||
266 | } | ||
267 | |||
268 | static void prepare_run_command_v_opt(struct child_process *cmd, | ||
269 | const char **argv, | ||
270 | int opt) | ||
271 | { | ||
272 | memset(cmd, 0, sizeof(*cmd)); | ||
273 | cmd->argv = argv; | ||
274 | cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0; | ||
275 | cmd->git_cmd = opt & RUN_GIT_CMD ? 1 : 0; | ||
276 | cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0; | ||
277 | } | ||
278 | |||
279 | int run_command_v_opt(const char **argv, int opt) | ||
280 | { | ||
281 | struct child_process cmd; | ||
282 | prepare_run_command_v_opt(&cmd, argv, opt); | ||
283 | return run_command(&cmd); | ||
284 | } | ||
285 | |||
286 | int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env) | ||
287 | { | ||
288 | struct child_process cmd; | ||
289 | prepare_run_command_v_opt(&cmd, argv, opt); | ||
290 | cmd.dir = dir; | ||
291 | cmd.env = env; | ||
292 | return run_command(&cmd); | ||
293 | } | ||
294 | |||
295 | #ifdef __MINGW32__ | ||
296 | static __stdcall unsigned run_thread(void *data) | ||
297 | { | ||
298 | struct async *async = data; | ||
299 | return async->proc(async->fd_for_proc, async->data); | ||
300 | } | ||
301 | #endif | ||
302 | |||
303 | int start_async(struct async *async) | ||
304 | { | ||
305 | int pipe_out[2]; | ||
306 | |||
307 | if (pipe(pipe_out) < 0) | ||
308 | return error("cannot create pipe: %s", strerror(errno)); | ||
309 | async->out = pipe_out[0]; | ||
310 | |||
311 | #ifndef __MINGW32__ | ||
312 | /* Flush stdio before fork() to avoid cloning buffers */ | ||
313 | fflush(NULL); | ||
314 | |||
315 | async->pid = fork(); | ||
316 | if (async->pid < 0) { | ||
317 | error("fork (async) failed: %s", strerror(errno)); | ||
318 | close_pair(pipe_out); | ||
319 | return -1; | ||
320 | } | ||
321 | if (!async->pid) { | ||
322 | close(pipe_out[0]); | ||
323 | exit(!!async->proc(pipe_out[1], async->data)); | ||
324 | } | ||
325 | close(pipe_out[1]); | ||
326 | #else | ||
327 | async->fd_for_proc = pipe_out[1]; | ||
328 | async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL); | ||
329 | if (!async->tid) { | ||
330 | error("cannot create thread: %s", strerror(errno)); | ||
331 | close_pair(pipe_out); | ||
332 | return -1; | ||
333 | } | ||
334 | #endif | ||
335 | return 0; | ||
336 | } | ||
337 | |||
338 | int finish_async(struct async *async) | ||
339 | { | ||
340 | #ifndef __MINGW32__ | ||
341 | int ret = 0; | ||
342 | |||
343 | if (wait_or_whine(async->pid)) | ||
344 | ret = error("waitpid (async) failed"); | ||
345 | #else | ||
346 | DWORD ret = 0; | ||
347 | if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0) | ||
348 | ret = error("waiting for thread failed: %lu", GetLastError()); | ||
349 | else if (!GetExitCodeThread(async->tid, &ret)) | ||
350 | ret = error("cannot get thread exit code: %lu", GetLastError()); | ||
351 | CloseHandle(async->tid); | ||
352 | #endif | ||
353 | return ret; | ||
354 | } | ||
355 | |||
356 | int run_hook(const char *index_file, const char *name, ...) | ||
357 | { | ||
358 | struct child_process hook; | ||
359 | const char **argv = NULL, *env[2]; | ||
360 | char index[PATH_MAX]; | ||
361 | va_list args; | ||
362 | int ret; | ||
363 | size_t i = 0, alloc = 0; | ||
364 | |||
365 | if (access(git_path("hooks/%s", name), X_OK) < 0) | ||
366 | return 0; | ||
367 | |||
368 | va_start(args, name); | ||
369 | ALLOC_GROW(argv, i + 1, alloc); | ||
370 | argv[i++] = git_path("hooks/%s", name); | ||
371 | while (argv[i-1]) { | ||
372 | ALLOC_GROW(argv, i + 1, alloc); | ||
373 | argv[i++] = va_arg(args, const char *); | ||
374 | } | ||
375 | va_end(args); | ||
376 | |||
377 | memset(&hook, 0, sizeof(hook)); | ||
378 | hook.argv = argv; | ||
379 | hook.no_stdin = 1; | ||
380 | hook.stdout_to_stderr = 1; | ||
381 | if (index_file) { | ||
382 | snprintf(index, sizeof(index), "GIT_INDEX_FILE=%s", index_file); | ||
383 | env[0] = index; | ||
384 | env[1] = NULL; | ||
385 | hook.env = env; | ||
386 | } | ||
387 | |||
388 | ret = start_command(&hook); | ||
389 | free(argv); | ||
390 | if (ret) { | ||
391 | warning("Could not spawn %s", argv[0]); | ||
392 | return ret; | ||
393 | } | ||
394 | ret = finish_command(&hook); | ||
395 | if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL) | ||
396 | warning("%s exited due to uncaught signal", argv[0]); | ||
397 | |||
398 | return ret; | ||
399 | } | ||
diff --git a/libbb/setenv.c b/libbb/setenv.c new file mode 100644 index 000000000..3a22ea7b7 --- /dev/null +++ b/libbb/setenv.c | |||
@@ -0,0 +1,34 @@ | |||
1 | #include "../git-compat-util.h" | ||
2 | |||
3 | int gitsetenv(const char *name, const char *value, int replace) | ||
4 | { | ||
5 | int out; | ||
6 | size_t namelen, valuelen; | ||
7 | char *envstr; | ||
8 | |||
9 | if (!name || !value) return -1; | ||
10 | if (!replace) { | ||
11 | char *oldval = NULL; | ||
12 | oldval = getenv(name); | ||
13 | if (oldval) return 0; | ||
14 | } | ||
15 | |||
16 | namelen = strlen(name); | ||
17 | valuelen = strlen(value); | ||
18 | envstr = malloc((namelen + valuelen + 2)); | ||
19 | if (!envstr) return -1; | ||
20 | |||
21 | memcpy(envstr, name, namelen); | ||
22 | envstr[namelen] = '='; | ||
23 | memcpy(envstr + namelen + 1, value, valuelen); | ||
24 | envstr[namelen + valuelen + 1] = 0; | ||
25 | |||
26 | out = putenv(envstr); | ||
27 | /* putenv(3) makes the argument string part of the environment, | ||
28 | * and changing that string modifies the environment --- which | ||
29 | * means we do not own that storage anymore. Do not free | ||
30 | * envstr. | ||
31 | */ | ||
32 | |||
33 | return out; | ||
34 | } | ||
diff --git a/libbb/strbuf.c b/libbb/strbuf.c new file mode 100644 index 000000000..a88496030 --- /dev/null +++ b/libbb/strbuf.c | |||
@@ -0,0 +1,376 @@ | |||
1 | #include "cache.h" | ||
2 | #include "refs.h" | ||
3 | |||
4 | int prefixcmp(const char *str, const char *prefix) | ||
5 | { | ||
6 | for (; ; str++, prefix++) | ||
7 | if (!*prefix) | ||
8 | return 0; | ||
9 | else if (*str != *prefix) | ||
10 | return (unsigned char)*prefix - (unsigned char)*str; | ||
11 | } | ||
12 | |||
13 | /* | ||
14 | * Used as the default ->buf value, so that people can always assume | ||
15 | * buf is non NULL and ->buf is NUL terminated even for a freshly | ||
16 | * initialized strbuf. | ||
17 | */ | ||
18 | char strbuf_slopbuf[1]; | ||
19 | |||
20 | void strbuf_init(struct strbuf *sb, size_t hint) | ||
21 | { | ||
22 | sb->alloc = sb->len = 0; | ||
23 | sb->buf = strbuf_slopbuf; | ||
24 | if (hint) | ||
25 | strbuf_grow(sb, hint); | ||
26 | } | ||
27 | |||
28 | void strbuf_release(struct strbuf *sb) | ||
29 | { | ||
30 | if (sb->alloc) { | ||
31 | free(sb->buf); | ||
32 | strbuf_init(sb, 0); | ||
33 | } | ||
34 | } | ||
35 | |||
36 | char *strbuf_detach(struct strbuf *sb, size_t *sz) | ||
37 | { | ||
38 | char *res = sb->alloc ? sb->buf : NULL; | ||
39 | if (sz) | ||
40 | *sz = sb->len; | ||
41 | strbuf_init(sb, 0); | ||
42 | return res; | ||
43 | } | ||
44 | |||
45 | void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc) | ||
46 | { | ||
47 | strbuf_release(sb); | ||
48 | sb->buf = buf; | ||
49 | sb->len = len; | ||
50 | sb->alloc = alloc; | ||
51 | strbuf_grow(sb, 0); | ||
52 | sb->buf[sb->len] = '\0'; | ||
53 | } | ||
54 | |||
55 | void strbuf_grow(struct strbuf *sb, size_t extra) | ||
56 | { | ||
57 | if (sb->len + extra + 1 <= sb->len) | ||
58 | die("you want to use way too much memory"); | ||
59 | if (!sb->alloc) | ||
60 | sb->buf = NULL; | ||
61 | ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); | ||
62 | } | ||
63 | |||
64 | void strbuf_trim(struct strbuf *sb) | ||
65 | { | ||
66 | char *b = sb->buf; | ||
67 | while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) | ||
68 | sb->len--; | ||
69 | while (sb->len > 0 && isspace(*b)) { | ||
70 | b++; | ||
71 | sb->len--; | ||
72 | } | ||
73 | memmove(sb->buf, b, sb->len); | ||
74 | sb->buf[sb->len] = '\0'; | ||
75 | } | ||
76 | void strbuf_rtrim(struct strbuf *sb) | ||
77 | { | ||
78 | while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) | ||
79 | sb->len--; | ||
80 | sb->buf[sb->len] = '\0'; | ||
81 | } | ||
82 | |||
83 | void strbuf_ltrim(struct strbuf *sb) | ||
84 | { | ||
85 | char *b = sb->buf; | ||
86 | while (sb->len > 0 && isspace(*b)) { | ||
87 | b++; | ||
88 | sb->len--; | ||
89 | } | ||
90 | memmove(sb->buf, b, sb->len); | ||
91 | sb->buf[sb->len] = '\0'; | ||
92 | } | ||
93 | |||
94 | void strbuf_tolower(struct strbuf *sb) | ||
95 | { | ||
96 | int i; | ||
97 | for (i = 0; i < sb->len; i++) | ||
98 | sb->buf[i] = tolower(sb->buf[i]); | ||
99 | } | ||
100 | |||
101 | struct strbuf **strbuf_split(const struct strbuf *sb, int delim) | ||
102 | { | ||
103 | int alloc = 2, pos = 0; | ||
104 | char *n, *p; | ||
105 | struct strbuf **ret; | ||
106 | struct strbuf *t; | ||
107 | |||
108 | ret = xcalloc(alloc, sizeof(struct strbuf *)); | ||
109 | p = n = sb->buf; | ||
110 | while (n < sb->buf + sb->len) { | ||
111 | int len; | ||
112 | n = memchr(n, delim, sb->len - (n - sb->buf)); | ||
113 | if (pos + 1 >= alloc) { | ||
114 | alloc = alloc * 2; | ||
115 | ret = xrealloc(ret, sizeof(struct strbuf *) * alloc); | ||
116 | } | ||
117 | if (!n) | ||
118 | n = sb->buf + sb->len - 1; | ||
119 | len = n - p + 1; | ||
120 | t = xmalloc(sizeof(struct strbuf)); | ||
121 | strbuf_init(t, len); | ||
122 | strbuf_add(t, p, len); | ||
123 | ret[pos] = t; | ||
124 | ret[++pos] = NULL; | ||
125 | p = ++n; | ||
126 | } | ||
127 | return ret; | ||
128 | } | ||
129 | |||
130 | void strbuf_list_free(struct strbuf **sbs) | ||
131 | { | ||
132 | struct strbuf **s = sbs; | ||
133 | |||
134 | while (*s) { | ||
135 | strbuf_release(*s); | ||
136 | free(*s++); | ||
137 | } | ||
138 | free(sbs); | ||
139 | } | ||
140 | |||
141 | int strbuf_cmp(const struct strbuf *a, const struct strbuf *b) | ||
142 | { | ||
143 | int len = a->len < b->len ? a->len: b->len; | ||
144 | int cmp = memcmp(a->buf, b->buf, len); | ||
145 | if (cmp) | ||
146 | return cmp; | ||
147 | return a->len < b->len ? -1: a->len != b->len; | ||
148 | } | ||
149 | |||
150 | void strbuf_splice(struct strbuf *sb, size_t pos, size_t len, | ||
151 | const void *data, size_t dlen) | ||
152 | { | ||
153 | if (pos + len < pos) | ||
154 | die("you want to use way too much memory"); | ||
155 | if (pos > sb->len) | ||
156 | die("`pos' is too far after the end of the buffer"); | ||
157 | if (pos + len > sb->len) | ||
158 | die("`pos + len' is too far after the end of the buffer"); | ||
159 | |||
160 | if (dlen >= len) | ||
161 | strbuf_grow(sb, dlen - len); | ||
162 | memmove(sb->buf + pos + dlen, | ||
163 | sb->buf + pos + len, | ||
164 | sb->len - pos - len); | ||
165 | memcpy(sb->buf + pos, data, dlen); | ||
166 | strbuf_setlen(sb, sb->len + dlen - len); | ||
167 | } | ||
168 | |||
169 | void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len) | ||
170 | { | ||
171 | strbuf_splice(sb, pos, 0, data, len); | ||
172 | } | ||
173 | |||
174 | void strbuf_remove(struct strbuf *sb, size_t pos, size_t len) | ||
175 | { | ||
176 | strbuf_splice(sb, pos, len, NULL, 0); | ||
177 | } | ||
178 | |||
179 | void strbuf_add(struct strbuf *sb, const void *data, size_t len) | ||
180 | { | ||
181 | strbuf_grow(sb, len); | ||
182 | memcpy(sb->buf + sb->len, data, len); | ||
183 | strbuf_setlen(sb, sb->len + len); | ||
184 | } | ||
185 | |||
186 | void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len) | ||
187 | { | ||
188 | strbuf_grow(sb, len); | ||
189 | memcpy(sb->buf + sb->len, sb->buf + pos, len); | ||
190 | strbuf_setlen(sb, sb->len + len); | ||
191 | } | ||
192 | |||
193 | void strbuf_addf(struct strbuf *sb, const char *fmt, ...) | ||
194 | { | ||
195 | int len; | ||
196 | va_list ap; | ||
197 | |||
198 | if (!strbuf_avail(sb)) | ||
199 | strbuf_grow(sb, 64); | ||
200 | va_start(ap, fmt); | ||
201 | len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); | ||
202 | va_end(ap); | ||
203 | if (len < 0) | ||
204 | die("your vsnprintf is broken"); | ||
205 | if (len > strbuf_avail(sb)) { | ||
206 | strbuf_grow(sb, len); | ||
207 | va_start(ap, fmt); | ||
208 | len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); | ||
209 | va_end(ap); | ||
210 | if (len > strbuf_avail(sb)) { | ||
211 | die("this should not happen, your snprintf is broken"); | ||
212 | } | ||
213 | } | ||
214 | strbuf_setlen(sb, sb->len + len); | ||
215 | } | ||
216 | |||
217 | void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, | ||
218 | void *context) | ||
219 | { | ||
220 | for (;;) { | ||
221 | const char *percent; | ||
222 | size_t consumed; | ||
223 | |||
224 | percent = strchrnul(format, '%'); | ||
225 | strbuf_add(sb, format, percent - format); | ||
226 | if (!*percent) | ||
227 | break; | ||
228 | format = percent + 1; | ||
229 | |||
230 | consumed = fn(sb, format, context); | ||
231 | if (consumed) | ||
232 | format += consumed; | ||
233 | else | ||
234 | strbuf_addch(sb, '%'); | ||
235 | } | ||
236 | } | ||
237 | |||
238 | size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, | ||
239 | void *context) | ||
240 | { | ||
241 | struct strbuf_expand_dict_entry *e = context; | ||
242 | size_t len; | ||
243 | |||
244 | for (; e->placeholder && (len = strlen(e->placeholder)); e++) { | ||
245 | if (!strncmp(placeholder, e->placeholder, len)) { | ||
246 | if (e->value) | ||
247 | strbuf_addstr(sb, e->value); | ||
248 | return len; | ||
249 | } | ||
250 | } | ||
251 | return 0; | ||
252 | } | ||
253 | |||
254 | size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) | ||
255 | { | ||
256 | size_t res; | ||
257 | size_t oldalloc = sb->alloc; | ||
258 | |||
259 | strbuf_grow(sb, size); | ||
260 | res = fread(sb->buf + sb->len, 1, size, f); | ||
261 | if (res > 0) | ||
262 | strbuf_setlen(sb, sb->len + res); | ||
263 | else if (res < 0 && oldalloc == 0) | ||
264 | strbuf_release(sb); | ||
265 | return res; | ||
266 | } | ||
267 | |||
268 | ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint) | ||
269 | { | ||
270 | size_t oldlen = sb->len; | ||
271 | size_t oldalloc = sb->alloc; | ||
272 | |||
273 | strbuf_grow(sb, hint ? hint : 8192); | ||
274 | for (;;) { | ||
275 | ssize_t cnt; | ||
276 | |||
277 | cnt = xread(fd, sb->buf + sb->len, sb->alloc - sb->len - 1); | ||
278 | if (cnt < 0) { | ||
279 | if (oldalloc == 0) | ||
280 | strbuf_release(sb); | ||
281 | else | ||
282 | strbuf_setlen(sb, oldlen); | ||
283 | return -1; | ||
284 | } | ||
285 | if (!cnt) | ||
286 | break; | ||
287 | sb->len += cnt; | ||
288 | strbuf_grow(sb, 8192); | ||
289 | } | ||
290 | |||
291 | sb->buf[sb->len] = '\0'; | ||
292 | return sb->len - oldlen; | ||
293 | } | ||
294 | |||
295 | #define STRBUF_MAXLINK (2*PATH_MAX) | ||
296 | |||
297 | int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) | ||
298 | { | ||
299 | size_t oldalloc = sb->alloc; | ||
300 | |||
301 | if (hint < 32) | ||
302 | hint = 32; | ||
303 | |||
304 | while (hint < STRBUF_MAXLINK) { | ||
305 | int len; | ||
306 | |||
307 | strbuf_grow(sb, hint); | ||
308 | len = readlink(path, sb->buf, hint); | ||
309 | if (len < 0) { | ||
310 | if (errno != ERANGE) | ||
311 | break; | ||
312 | } else if (len < hint) { | ||
313 | strbuf_setlen(sb, len); | ||
314 | return 0; | ||
315 | } | ||
316 | |||
317 | /* .. the buffer was too small - try again */ | ||
318 | hint *= 2; | ||
319 | } | ||
320 | if (oldalloc == 0) | ||
321 | strbuf_release(sb); | ||
322 | return -1; | ||
323 | } | ||
324 | |||
325 | int strbuf_getline(struct strbuf *sb, FILE *fp, int term) | ||
326 | { | ||
327 | int ch; | ||
328 | |||
329 | strbuf_grow(sb, 0); | ||
330 | if (feof(fp)) | ||
331 | return EOF; | ||
332 | |||
333 | strbuf_reset(sb); | ||
334 | while ((ch = fgetc(fp)) != EOF) { | ||
335 | if (ch == term) | ||
336 | break; | ||
337 | strbuf_grow(sb, 1); | ||
338 | sb->buf[sb->len++] = ch; | ||
339 | } | ||
340 | if (ch == EOF && sb->len == 0) | ||
341 | return EOF; | ||
342 | |||
343 | sb->buf[sb->len] = '\0'; | ||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint) | ||
348 | { | ||
349 | int fd, len; | ||
350 | |||
351 | fd = open(path, O_RDONLY); | ||
352 | if (fd < 0) | ||
353 | return -1; | ||
354 | len = strbuf_read(sb, fd, hint); | ||
355 | close(fd); | ||
356 | if (len < 0) | ||
357 | return -1; | ||
358 | |||
359 | return len; | ||
360 | } | ||
361 | |||
362 | int strbuf_branchname(struct strbuf *sb, const char *name) | ||
363 | { | ||
364 | int len = strlen(name); | ||
365 | if (interpret_branch_name(name, sb) == len) | ||
366 | return 0; | ||
367 | strbuf_add(sb, name, len); | ||
368 | return len; | ||
369 | } | ||
370 | |||
371 | int strbuf_check_branch_ref(struct strbuf *sb, const char *name) | ||
372 | { | ||
373 | strbuf_branchname(sb, name); | ||
374 | strbuf_splice(sb, 0, 0, "refs/heads/", 11); | ||
375 | return check_ref_format(sb->buf); | ||
376 | } | ||
diff --git a/libbb/strlcpy.c b/libbb/strlcpy.c new file mode 100644 index 000000000..4024c3603 --- /dev/null +++ b/libbb/strlcpy.c | |||
@@ -0,0 +1,13 @@ | |||
1 | #include "../git-compat-util.h" | ||
2 | |||
3 | size_t gitstrlcpy(char *dest, const char *src, size_t size) | ||
4 | { | ||
5 | size_t ret = strlen(src); | ||
6 | |||
7 | if (size) { | ||
8 | size_t len = (ret >= size) ? size - 1 : ret; | ||
9 | memcpy(dest, src, len); | ||
10 | dest[len] = '\0'; | ||
11 | } | ||
12 | return ret; | ||
13 | } | ||
diff --git a/libbb/trace.c b/libbb/trace.c new file mode 100644 index 000000000..4229ae123 --- /dev/null +++ b/libbb/trace.c | |||
@@ -0,0 +1,127 @@ | |||
1 | /* | ||
2 | * GIT - The information manager from hell | ||
3 | * | ||
4 | * Copyright (C) 2000-2002 Michael R. Elkins <me@mutt.org> | ||
5 | * Copyright (C) 2002-2004 Oswald Buddenhagen <ossi@users.sf.net> | ||
6 | * Copyright (C) 2004 Theodore Y. Ts'o <tytso@mit.edu> | ||
7 | * Copyright (C) 2006 Mike McCormack | ||
8 | * Copyright (C) 2006 Christian Couder | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 | */ | ||
24 | |||
25 | #include "cache.h" | ||
26 | #include "quote.h" | ||
27 | |||
28 | /* Get a trace file descriptor from GIT_TRACE env variable. */ | ||
29 | static int get_trace_fd(int *need_close) | ||
30 | { | ||
31 | char *trace = getenv("GIT_TRACE"); | ||
32 | |||
33 | if (!trace || !strcmp(trace, "") || | ||
34 | !strcmp(trace, "0") || !strcasecmp(trace, "false")) | ||
35 | return 0; | ||
36 | if (!strcmp(trace, "1") || !strcasecmp(trace, "true")) | ||
37 | return STDERR_FILENO; | ||
38 | if (strlen(trace) == 1 && isdigit(*trace)) | ||
39 | return atoi(trace); | ||
40 | if (is_absolute_path(trace)) { | ||
41 | int fd = open(trace, O_WRONLY | O_APPEND | O_CREAT, 0666); | ||
42 | if (fd == -1) { | ||
43 | fprintf(stderr, | ||
44 | "Could not open '%s' for tracing: %s\n" | ||
45 | "Defaulting to tracing on stderr...\n", | ||
46 | trace, strerror(errno)); | ||
47 | return STDERR_FILENO; | ||
48 | } | ||
49 | *need_close = 1; | ||
50 | return fd; | ||
51 | } | ||
52 | |||
53 | fprintf(stderr, "What does '%s' for GIT_TRACE mean?\n", trace); | ||
54 | fprintf(stderr, "If you want to trace into a file, " | ||
55 | "then please set GIT_TRACE to an absolute pathname " | ||
56 | "(starting with /).\n"); | ||
57 | fprintf(stderr, "Defaulting to tracing on stderr...\n"); | ||
58 | |||
59 | return STDERR_FILENO; | ||
60 | } | ||
61 | |||
62 | static const char err_msg[] = "Could not trace into fd given by " | ||
63 | "GIT_TRACE environment variable"; | ||
64 | |||
65 | void trace_printf(const char *fmt, ...) | ||
66 | { | ||
67 | struct strbuf buf; | ||
68 | va_list ap; | ||
69 | int fd, len, need_close = 0; | ||
70 | |||
71 | fd = get_trace_fd(&need_close); | ||
72 | if (!fd) | ||
73 | return; | ||
74 | |||
75 | strbuf_init(&buf, 64); | ||
76 | va_start(ap, fmt); | ||
77 | len = vsnprintf(buf.buf, strbuf_avail(&buf), fmt, ap); | ||
78 | va_end(ap); | ||
79 | if (len >= strbuf_avail(&buf)) { | ||
80 | strbuf_grow(&buf, len - strbuf_avail(&buf) + 128); | ||
81 | va_start(ap, fmt); | ||
82 | len = vsnprintf(buf.buf, strbuf_avail(&buf), fmt, ap); | ||
83 | va_end(ap); | ||
84 | if (len >= strbuf_avail(&buf)) | ||
85 | die("broken vsnprintf"); | ||
86 | } | ||
87 | strbuf_setlen(&buf, len); | ||
88 | |||
89 | write_or_whine_pipe(fd, buf.buf, buf.len, err_msg); | ||
90 | strbuf_release(&buf); | ||
91 | |||
92 | if (need_close) | ||
93 | close(fd); | ||
94 | } | ||
95 | |||
96 | void trace_argv_printf(const char **argv, const char *fmt, ...) | ||
97 | { | ||
98 | struct strbuf buf; | ||
99 | va_list ap; | ||
100 | int fd, len, need_close = 0; | ||
101 | |||
102 | fd = get_trace_fd(&need_close); | ||
103 | if (!fd) | ||
104 | return; | ||
105 | |||
106 | strbuf_init(&buf, 64); | ||
107 | va_start(ap, fmt); | ||
108 | len = vsnprintf(buf.buf, strbuf_avail(&buf), fmt, ap); | ||
109 | va_end(ap); | ||
110 | if (len >= strbuf_avail(&buf)) { | ||
111 | strbuf_grow(&buf, len - strbuf_avail(&buf) + 128); | ||
112 | va_start(ap, fmt); | ||
113 | len = vsnprintf(buf.buf, strbuf_avail(&buf), fmt, ap); | ||
114 | va_end(ap); | ||
115 | if (len >= strbuf_avail(&buf)) | ||
116 | die("broken vsnprintf"); | ||
117 | } | ||
118 | strbuf_setlen(&buf, len); | ||
119 | |||
120 | sq_quote_argv(&buf, argv, 0); | ||
121 | strbuf_addch(&buf, '\n'); | ||
122 | write_or_whine_pipe(fd, buf.buf, buf.len, err_msg); | ||
123 | strbuf_release(&buf); | ||
124 | |||
125 | if (need_close) | ||
126 | close(fd); | ||
127 | } | ||
diff --git a/libbb/usage.c b/libbb/usage.c new file mode 100644 index 000000000..820d09f92 --- /dev/null +++ b/libbb/usage.c | |||
@@ -0,0 +1,80 @@ | |||
1 | /* | ||
2 | * GIT - The information manager from hell | ||
3 | * | ||
4 | * Copyright (C) Linus Torvalds, 2005 | ||
5 | */ | ||
6 | #include "git-compat-util.h" | ||
7 | |||
8 | static void report(const char *prefix, const char *err, va_list params) | ||
9 | { | ||
10 | char msg[1024]; | ||
11 | vsnprintf(msg, sizeof(msg), err, params); | ||
12 | fprintf(stderr, "%s%s\n", prefix, msg); | ||
13 | } | ||
14 | |||
15 | static NORETURN void usage_builtin(const char *err) | ||
16 | { | ||
17 | fprintf(stderr, "usage: %s\n", err); | ||
18 | exit(129); | ||
19 | } | ||
20 | |||
21 | static NORETURN void die_builtin(const char *err, va_list params) | ||
22 | { | ||
23 | report("fatal: ", err, params); | ||
24 | exit(128); | ||
25 | } | ||
26 | |||
27 | static void error_builtin(const char *err, va_list params) | ||
28 | { | ||
29 | report("error: ", err, params); | ||
30 | } | ||
31 | |||
32 | static void warn_builtin(const char *warn, va_list params) | ||
33 | { | ||
34 | report("warning: ", warn, params); | ||
35 | } | ||
36 | |||
37 | /* If we are in a dlopen()ed .so write to a global variable would segfault | ||
38 | * (ugh), so keep things static. */ | ||
39 | static void (*usage_routine)(const char *err) NORETURN = usage_builtin; | ||
40 | static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin; | ||
41 | static void (*error_routine)(const char *err, va_list params) = error_builtin; | ||
42 | static void (*warn_routine)(const char *err, va_list params) = warn_builtin; | ||
43 | |||
44 | void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN) | ||
45 | { | ||
46 | die_routine = routine; | ||
47 | } | ||
48 | |||
49 | void usage(const char *err) | ||
50 | { | ||
51 | usage_routine(err); | ||
52 | } | ||
53 | |||
54 | void die(const char *err, ...) | ||
55 | { | ||
56 | va_list params; | ||
57 | |||
58 | va_start(params, err); | ||
59 | die_routine(err, params); | ||
60 | va_end(params); | ||
61 | } | ||
62 | |||
63 | int error(const char *err, ...) | ||
64 | { | ||
65 | va_list params; | ||
66 | |||
67 | va_start(params, err); | ||
68 | error_routine(err, params); | ||
69 | va_end(params); | ||
70 | return -1; | ||
71 | } | ||
72 | |||
73 | void warning(const char *warn, ...) | ||
74 | { | ||
75 | va_list params; | ||
76 | |||
77 | va_start(params, warn); | ||
78 | warn_routine(warn, params); | ||
79 | va_end(params); | ||
80 | } | ||
diff --git a/libbb/win32.h b/libbb/win32.h new file mode 100644 index 000000000..c26384e59 --- /dev/null +++ b/libbb/win32.h | |||
@@ -0,0 +1,34 @@ | |||
1 | /* common Win32 functions for MinGW and Cygwin */ | ||
2 | #include <windows.h> | ||
3 | |||
4 | static inline int file_attr_to_st_mode (DWORD attr) | ||
5 | { | ||
6 | int fMode = S_IREAD; | ||
7 | if (attr & FILE_ATTRIBUTE_DIRECTORY) | ||
8 | fMode |= S_IFDIR; | ||
9 | else | ||
10 | fMode |= S_IFREG; | ||
11 | if (!(attr & FILE_ATTRIBUTE_READONLY)) | ||
12 | fMode |= S_IWRITE; | ||
13 | return fMode; | ||
14 | } | ||
15 | |||
16 | static inline int get_file_attr(const char *fname, WIN32_FILE_ATTRIBUTE_DATA *fdata) | ||
17 | { | ||
18 | if (GetFileAttributesExA(fname, GetFileExInfoStandard, fdata)) | ||
19 | return 0; | ||
20 | |||
21 | switch (GetLastError()) { | ||
22 | case ERROR_ACCESS_DENIED: | ||
23 | case ERROR_SHARING_VIOLATION: | ||
24 | case ERROR_LOCK_VIOLATION: | ||
25 | case ERROR_SHARING_BUFFER_EXCEEDED: | ||
26 | return EACCES; | ||
27 | case ERROR_BUFFER_OVERFLOW: | ||
28 | return ENAMETOOLONG; | ||
29 | case ERROR_NOT_ENOUGH_MEMORY: | ||
30 | return ENOMEM; | ||
31 | default: | ||
32 | return ENOENT; | ||
33 | } | ||
34 | } | ||
diff --git a/libbb/winansi.c b/libbb/winansi.c new file mode 100644 index 000000000..44dc293ad --- /dev/null +++ b/libbb/winansi.c | |||
@@ -0,0 +1,357 @@ | |||
1 | /* | ||
2 | * Copyright 2008 Peter Harris <git@peter.is-a-geek.org> | ||
3 | */ | ||
4 | |||
5 | #include <windows.h> | ||
6 | #include "../git-compat-util.h" | ||
7 | |||
8 | /* | ||
9 | Functions to be wrapped: | ||
10 | */ | ||
11 | #undef printf | ||
12 | #undef fprintf | ||
13 | #undef fputs | ||
14 | /* TODO: write */ | ||
15 | |||
16 | /* | ||
17 | ANSI codes used by git: m, K | ||
18 | |||
19 | This file is git-specific. Therefore, this file does not attempt | ||
20 | to implement any codes that are not used by git. | ||
21 | */ | ||
22 | |||
23 | static HANDLE console; | ||
24 | static WORD plain_attr; | ||
25 | static WORD attr; | ||
26 | static int negative; | ||
27 | |||
28 | static void init(void) | ||
29 | { | ||
30 | CONSOLE_SCREEN_BUFFER_INFO sbi; | ||
31 | |||
32 | static int initialized = 0; | ||
33 | if (initialized) | ||
34 | return; | ||
35 | |||
36 | console = GetStdHandle(STD_OUTPUT_HANDLE); | ||
37 | if (console == INVALID_HANDLE_VALUE) | ||
38 | console = NULL; | ||
39 | |||
40 | if (!console) | ||
41 | return; | ||
42 | |||
43 | GetConsoleScreenBufferInfo(console, &sbi); | ||
44 | attr = plain_attr = sbi.wAttributes; | ||
45 | negative = 0; | ||
46 | |||
47 | initialized = 1; | ||
48 | } | ||
49 | |||
50 | |||
51 | #define FOREGROUND_ALL (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE) | ||
52 | #define BACKGROUND_ALL (BACKGROUND_RED | BACKGROUND_GREEN | BACKGROUND_BLUE) | ||
53 | |||
54 | static void set_console_attr(void) | ||
55 | { | ||
56 | WORD attributes = attr; | ||
57 | if (negative) { | ||
58 | attributes &= ~FOREGROUND_ALL; | ||
59 | attributes &= ~BACKGROUND_ALL; | ||
60 | |||
61 | /* This could probably use a bitmask | ||
62 | instead of a series of ifs */ | ||
63 | if (attr & FOREGROUND_RED) | ||
64 | attributes |= BACKGROUND_RED; | ||
65 | if (attr & FOREGROUND_GREEN) | ||
66 | attributes |= BACKGROUND_GREEN; | ||
67 | if (attr & FOREGROUND_BLUE) | ||
68 | attributes |= BACKGROUND_BLUE; | ||
69 | |||
70 | if (attr & BACKGROUND_RED) | ||
71 | attributes |= FOREGROUND_RED; | ||
72 | if (attr & BACKGROUND_GREEN) | ||
73 | attributes |= FOREGROUND_GREEN; | ||
74 | if (attr & BACKGROUND_BLUE) | ||
75 | attributes |= FOREGROUND_BLUE; | ||
76 | } | ||
77 | SetConsoleTextAttribute(console, attributes); | ||
78 | } | ||
79 | |||
80 | static void erase_in_line(void) | ||
81 | { | ||
82 | CONSOLE_SCREEN_BUFFER_INFO sbi; | ||
83 | |||
84 | if (!console) | ||
85 | return; | ||
86 | |||
87 | GetConsoleScreenBufferInfo(console, &sbi); | ||
88 | FillConsoleOutputCharacterA(console, ' ', | ||
89 | sbi.dwSize.X - sbi.dwCursorPosition.X, sbi.dwCursorPosition, | ||
90 | NULL); | ||
91 | } | ||
92 | |||
93 | |||
94 | static const char *set_attr(const char *str) | ||
95 | { | ||
96 | const char *func; | ||
97 | size_t len = strspn(str, "0123456789;"); | ||
98 | func = str + len; | ||
99 | |||
100 | switch (*func) { | ||
101 | case 'm': | ||
102 | do { | ||
103 | long val = strtol(str, (char **)&str, 10); | ||
104 | switch (val) { | ||
105 | case 0: /* reset */ | ||
106 | attr = plain_attr; | ||
107 | negative = 0; | ||
108 | break; | ||
109 | case 1: /* bold */ | ||
110 | attr |= FOREGROUND_INTENSITY; | ||
111 | break; | ||
112 | case 2: /* faint */ | ||
113 | case 22: /* normal */ | ||
114 | attr &= ~FOREGROUND_INTENSITY; | ||
115 | break; | ||
116 | case 3: /* italic */ | ||
117 | /* Unsupported */ | ||
118 | break; | ||
119 | case 4: /* underline */ | ||
120 | case 21: /* double underline */ | ||
121 | /* Wikipedia says this flag does nothing */ | ||
122 | /* Furthermore, mingw doesn't define this flag | ||
123 | attr |= COMMON_LVB_UNDERSCORE; */ | ||
124 | break; | ||
125 | case 24: /* no underline */ | ||
126 | /* attr &= ~COMMON_LVB_UNDERSCORE; */ | ||
127 | break; | ||
128 | case 5: /* slow blink */ | ||
129 | case 6: /* fast blink */ | ||
130 | /* We don't have blink, but we do have | ||
131 | background intensity */ | ||
132 | attr |= BACKGROUND_INTENSITY; | ||
133 | break; | ||
134 | case 25: /* no blink */ | ||
135 | attr &= ~BACKGROUND_INTENSITY; | ||
136 | break; | ||
137 | case 7: /* negative */ | ||
138 | negative = 1; | ||
139 | break; | ||
140 | case 27: /* positive */ | ||
141 | negative = 0; | ||
142 | break; | ||
143 | case 8: /* conceal */ | ||
144 | case 28: /* reveal */ | ||
145 | /* Unsupported */ | ||
146 | break; | ||
147 | case 30: /* Black */ | ||
148 | attr &= ~FOREGROUND_ALL; | ||
149 | break; | ||
150 | case 31: /* Red */ | ||
151 | attr &= ~FOREGROUND_ALL; | ||
152 | attr |= FOREGROUND_RED; | ||
153 | break; | ||
154 | case 32: /* Green */ | ||
155 | attr &= ~FOREGROUND_ALL; | ||
156 | attr |= FOREGROUND_GREEN; | ||
157 | break; | ||
158 | case 33: /* Yellow */ | ||
159 | attr &= ~FOREGROUND_ALL; | ||
160 | attr |= FOREGROUND_RED | FOREGROUND_GREEN; | ||
161 | break; | ||
162 | case 34: /* Blue */ | ||
163 | attr &= ~FOREGROUND_ALL; | ||
164 | attr |= FOREGROUND_BLUE; | ||
165 | break; | ||
166 | case 35: /* Magenta */ | ||
167 | attr &= ~FOREGROUND_ALL; | ||
168 | attr |= FOREGROUND_RED | FOREGROUND_BLUE; | ||
169 | break; | ||
170 | case 36: /* Cyan */ | ||
171 | attr &= ~FOREGROUND_ALL; | ||
172 | attr |= FOREGROUND_GREEN | FOREGROUND_BLUE; | ||
173 | break; | ||
174 | case 37: /* White */ | ||
175 | attr |= FOREGROUND_RED | | ||
176 | FOREGROUND_GREEN | | ||
177 | FOREGROUND_BLUE; | ||
178 | break; | ||
179 | case 38: /* Unknown */ | ||
180 | break; | ||
181 | case 39: /* reset */ | ||
182 | attr &= ~FOREGROUND_ALL; | ||
183 | attr |= (plain_attr & FOREGROUND_ALL); | ||
184 | break; | ||
185 | case 40: /* Black */ | ||
186 | attr &= ~BACKGROUND_ALL; | ||
187 | break; | ||
188 | case 41: /* Red */ | ||
189 | attr &= ~BACKGROUND_ALL; | ||
190 | attr |= BACKGROUND_RED; | ||
191 | break; | ||
192 | case 42: /* Green */ | ||
193 | attr &= ~BACKGROUND_ALL; | ||
194 | attr |= BACKGROUND_GREEN; | ||
195 | break; | ||
196 | case 43: /* Yellow */ | ||
197 | attr &= ~BACKGROUND_ALL; | ||
198 | attr |= BACKGROUND_RED | BACKGROUND_GREEN; | ||
199 | break; | ||
200 | case 44: /* Blue */ | ||
201 | attr &= ~BACKGROUND_ALL; | ||
202 | attr |= BACKGROUND_BLUE; | ||
203 | break; | ||
204 | case 45: /* Magenta */ | ||
205 | attr &= ~BACKGROUND_ALL; | ||
206 | attr |= BACKGROUND_RED | BACKGROUND_BLUE; | ||
207 | break; | ||
208 | case 46: /* Cyan */ | ||
209 | attr &= ~BACKGROUND_ALL; | ||
210 | attr |= BACKGROUND_GREEN | BACKGROUND_BLUE; | ||
211 | break; | ||
212 | case 47: /* White */ | ||
213 | attr |= BACKGROUND_RED | | ||
214 | BACKGROUND_GREEN | | ||
215 | BACKGROUND_BLUE; | ||
216 | break; | ||
217 | case 48: /* Unknown */ | ||
218 | break; | ||
219 | case 49: /* reset */ | ||
220 | attr &= ~BACKGROUND_ALL; | ||
221 | attr |= (plain_attr & BACKGROUND_ALL); | ||
222 | break; | ||
223 | default: | ||
224 | /* Unsupported code */ | ||
225 | break; | ||
226 | } | ||
227 | str++; | ||
228 | } while (*(str-1) == ';'); | ||
229 | |||
230 | set_console_attr(); | ||
231 | break; | ||
232 | case 'K': | ||
233 | erase_in_line(); | ||
234 | break; | ||
235 | default: | ||
236 | /* Unsupported code */ | ||
237 | break; | ||
238 | } | ||
239 | |||
240 | return func + 1; | ||
241 | } | ||
242 | |||
243 | static int ansi_emulate(const char *str, FILE *stream) | ||
244 | { | ||
245 | int rv = 0; | ||
246 | const char *pos = str; | ||
247 | |||
248 | while (*pos) { | ||
249 | pos = strstr(str, "\033["); | ||
250 | if (pos) { | ||
251 | size_t len = pos - str; | ||
252 | |||
253 | if (len) { | ||
254 | size_t out_len = fwrite(str, 1, len, stream); | ||
255 | rv += out_len; | ||
256 | if (out_len < len) | ||
257 | return rv; | ||
258 | } | ||
259 | |||
260 | str = pos + 2; | ||
261 | rv += 2; | ||
262 | |||
263 | fflush(stream); | ||
264 | |||
265 | pos = set_attr(str); | ||
266 | rv += pos - str; | ||
267 | str = pos; | ||
268 | } else { | ||
269 | rv += strlen(str); | ||
270 | fputs(str, stream); | ||
271 | return rv; | ||
272 | } | ||
273 | } | ||
274 | return rv; | ||
275 | } | ||
276 | |||
277 | int winansi_fputs(const char *str, FILE *stream) | ||
278 | { | ||
279 | int rv; | ||
280 | |||
281 | if (!isatty(fileno(stream))) | ||
282 | return fputs(str, stream); | ||
283 | |||
284 | init(); | ||
285 | |||
286 | if (!console) | ||
287 | return fputs(str, stream); | ||
288 | |||
289 | rv = ansi_emulate(str, stream); | ||
290 | |||
291 | if (rv >= 0) | ||
292 | return 0; | ||
293 | else | ||
294 | return EOF; | ||
295 | } | ||
296 | |||
297 | static int winansi_vfprintf(FILE *stream, const char *format, va_list list) | ||
298 | { | ||
299 | int len, rv; | ||
300 | char small_buf[256]; | ||
301 | char *buf = small_buf; | ||
302 | va_list cp; | ||
303 | |||
304 | if (!isatty(fileno(stream))) | ||
305 | goto abort; | ||
306 | |||
307 | init(); | ||
308 | |||
309 | if (!console) | ||
310 | goto abort; | ||
311 | |||
312 | va_copy(cp, list); | ||
313 | len = vsnprintf(small_buf, sizeof(small_buf), format, cp); | ||
314 | va_end(cp); | ||
315 | |||
316 | if (len > sizeof(small_buf) - 1) { | ||
317 | buf = malloc(len + 1); | ||
318 | if (!buf) | ||
319 | goto abort; | ||
320 | |||
321 | len = vsnprintf(buf, len + 1, format, list); | ||
322 | } | ||
323 | |||
324 | rv = ansi_emulate(buf, stream); | ||
325 | |||
326 | if (buf != small_buf) | ||
327 | free(buf); | ||
328 | return rv; | ||
329 | |||
330 | abort: | ||
331 | rv = vfprintf(stream, format, list); | ||
332 | return rv; | ||
333 | } | ||
334 | |||
335 | int winansi_fprintf(FILE *stream, const char *format, ...) | ||
336 | { | ||
337 | va_list list; | ||
338 | int rv; | ||
339 | |||
340 | va_start(list, format); | ||
341 | rv = winansi_vfprintf(stream, format, list); | ||
342 | va_end(list); | ||
343 | |||
344 | return rv; | ||
345 | } | ||
346 | |||
347 | int winansi_printf(const char *format, ...) | ||
348 | { | ||
349 | va_list list; | ||
350 | int rv; | ||
351 | |||
352 | va_start(list, format); | ||
353 | rv = winansi_vfprintf(stdout, format, list); | ||
354 | va_end(list); | ||
355 | |||
356 | return rv; | ||
357 | } | ||
diff --git a/libbb/write_or_die.c b/libbb/write_or_die.c new file mode 100644 index 000000000..4c29255df --- /dev/null +++ b/libbb/write_or_die.c | |||
@@ -0,0 +1,86 @@ | |||
1 | #include "cache.h" | ||
2 | |||
3 | /* | ||
4 | * Some cases use stdio, but want to flush after the write | ||
5 | * to get error handling (and to get better interactive | ||
6 | * behaviour - not buffering excessively). | ||
7 | * | ||
8 | * Of course, if the flush happened within the write itself, | ||
9 | * we've already lost the error code, and cannot report it any | ||
10 | * more. So we just ignore that case instead (and hope we get | ||
11 | * the right error code on the flush). | ||
12 | * | ||
13 | * If the file handle is stdout, and stdout is a file, then skip the | ||
14 | * flush entirely since it's not needed. | ||
15 | */ | ||
16 | void maybe_flush_or_die(FILE *f, const char *desc) | ||
17 | { | ||
18 | static int skip_stdout_flush = -1; | ||
19 | struct stat st; | ||
20 | char *cp; | ||
21 | |||
22 | if (f == stdout) { | ||
23 | if (skip_stdout_flush < 0) { | ||
24 | cp = getenv("GIT_FLUSH"); | ||
25 | if (cp) | ||
26 | skip_stdout_flush = (atoi(cp) == 0); | ||
27 | else if ((fstat(fileno(stdout), &st) == 0) && | ||
28 | S_ISREG(st.st_mode)) | ||
29 | skip_stdout_flush = 1; | ||
30 | else | ||
31 | skip_stdout_flush = 0; | ||
32 | } | ||
33 | if (skip_stdout_flush && !ferror(f)) | ||
34 | return; | ||
35 | } | ||
36 | if (fflush(f)) { | ||
37 | /* | ||
38 | * On Windows, EPIPE is returned only by the first write() | ||
39 | * after the reading end has closed its handle; subsequent | ||
40 | * write()s return EINVAL. | ||
41 | */ | ||
42 | if (errno == EPIPE || errno == EINVAL) | ||
43 | exit(0); | ||
44 | die("write failure on %s: %s", desc, strerror(errno)); | ||
45 | } | ||
46 | } | ||
47 | |||
48 | void fsync_or_die(int fd, const char *msg) | ||
49 | { | ||
50 | if (fsync(fd) < 0) { | ||
51 | die("%s: fsync error (%s)", msg, strerror(errno)); | ||
52 | } | ||
53 | } | ||
54 | |||
55 | void write_or_die(int fd, const void *buf, size_t count) | ||
56 | { | ||
57 | if (write_in_full(fd, buf, count) < 0) { | ||
58 | if (errno == EPIPE) | ||
59 | exit(0); | ||
60 | die("write error (%s)", strerror(errno)); | ||
61 | } | ||
62 | } | ||
63 | |||
64 | int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg) | ||
65 | { | ||
66 | if (write_in_full(fd, buf, count) < 0) { | ||
67 | if (errno == EPIPE) | ||
68 | exit(0); | ||
69 | fprintf(stderr, "%s: write error (%s)\n", | ||
70 | msg, strerror(errno)); | ||
71 | return 0; | ||
72 | } | ||
73 | |||
74 | return 1; | ||
75 | } | ||
76 | |||
77 | int write_or_whine(int fd, const void *buf, size_t count, const char *msg) | ||
78 | { | ||
79 | if (write_in_full(fd, buf, count) < 0) { | ||
80 | fprintf(stderr, "%s: write error (%s)\n", | ||
81 | msg, strerror(errno)); | ||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | return 1; | ||
86 | } | ||