diff options
author | Mark Whitley <markw@lineo.com> | 2000-07-10 22:55:51 +0000 |
---|---|---|
committer | Mark Whitley <markw@lineo.com> | 2000-07-10 22:55:51 +0000 |
commit | 6315ce603b3ac2f4998e926b6106da21bcc0066f (patch) | |
tree | 0a3d57a74893ee848378e7f8f94e7167d30a063a | |
parent | 85c552035e4c5199d1cbef6058eb365c6150f33d (diff) | |
download | busybox-w32-6315ce603b3ac2f4998e926b6106da21bcc0066f.tar.gz busybox-w32-6315ce603b3ac2f4998e926b6106da21bcc0066f.tar.bz2 busybox-w32-6315ce603b3ac2f4998e926b6106da21bcc0066f.zip |
Brand, new sed that uses libc regex routines.
There is some common code used by both sed & grep that should be put into
utility.c as per Mat Kraai's suggestions/patch on the mailing list.
Specifically, a common regex_compile() and a regex_subst() function need to be
made.
-rw-r--r-- | editors/sed.c | 763 | ||||
-rw-r--r-- | sed.c | 763 |
2 files changed, 966 insertions, 560 deletions
diff --git a/editors/sed.c b/editors/sed.c index d4b721e49..329f5ae8d 100644 --- a/editors/sed.c +++ b/editors/sed.c | |||
@@ -1,15 +1,8 @@ | |||
1 | /* vi: set sw=4 ts=4: */ | ||
2 | /* | 1 | /* |
3 | * Mini sed implementation for busybox | 2 | * sed.c - very minimalist version of sed |
4 | * | ||
5 | * | 3 | * |
6 | * Copyright (C) 1999,2000 by Lineo, inc. | 4 | * Copyright (C) 1999,2000 by Lineo, inc. |
7 | * Written by Erik Andersen <andersen@lineo.com>, <andersee@debian.org> | 5 | * Written by Mark Whitley <markw@lineo.com>, <markw@enol.com> |
8 | * | ||
9 | * Modifications for addresses and append command have been | ||
10 | * written by Marco Pantaleoni <panta@prosa.it>, <panta@elasticworld.org> | ||
11 | * and are: | ||
12 | * Copyright (C) 1999 Marco Pantaleoni. | ||
13 | * | 6 | * |
14 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
15 | * it under the terms of the GNU General Public License as published by | 8 | * it under the terms of the GNU General Public License as published by |
@@ -27,333 +20,543 @@ | |||
27 | * | 20 | * |
28 | */ | 21 | */ |
29 | 22 | ||
30 | #include "internal.h" | 23 | /* |
31 | #include "regexp.h" | 24 | Supported features and commands in this version of sed: |
25 | |||
26 | - comments ('#') | ||
27 | - Address matching: num|/matchstr/[,num|/matchstr/|$]command | ||
28 | - Commands: p, d, s/match/replace/[g] | ||
29 | |||
30 | (Note: Specifying an address (range) to match is *optional*; commands | ||
31 | default to the whole pattern space if no specific address match was | ||
32 | requested.) | ||
33 | |||
34 | Unsupported features: | ||
35 | |||
36 | - transliteration (y/source-chars/dest-chars/) (use 'tr') | ||
37 | - no support for characters other than the '/' character for regex matches | ||
38 | - no pattern space hold space storing / swapping (x, etc.) | ||
39 | - no labels / branching (: label, b, t, and friends) | ||
40 | - and lots, lots more. | ||
41 | |||
42 | */ | ||
43 | |||
32 | #include <stdio.h> | 44 | #include <stdio.h> |
33 | #include <dirent.h> | 45 | #include <stdlib.h> /* for realloc() */ |
46 | #include <unistd.h> /* for getopt() */ | ||
47 | #include <regex.h> | ||
48 | #include <string.h> /* for strdup() */ | ||
34 | #include <errno.h> | 49 | #include <errno.h> |
35 | #include <fcntl.h> | 50 | #include <ctype.h> /* for isspace() */ |
36 | #include <signal.h> | 51 | #include "internal.h" |
37 | #include <time.h> | 52 | |
38 | #include <ctype.h> | 53 | |
54 | /* externs */ | ||
55 | extern int optind; /* in unistd.h */ | ||
56 | extern char *optarg; /* ditto */ | ||
57 | |||
58 | /* options */ | ||
59 | static int be_quiet = 0; | ||
60 | |||
61 | struct sed_cmd { | ||
62 | |||
63 | /* address storage */ | ||
64 | int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */ | ||
65 | int end_line; /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */ | ||
66 | regex_t *beg_match; /* sed -e '/match/cmd' */ | ||
67 | regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */ | ||
68 | |||
69 | /* the command */ | ||
70 | char cmd; /* p,d,s (add more at your leisure :-) */ | ||
71 | |||
72 | /* substitution command specific fields */ | ||
73 | regex_t *sub_match; /* sed -e 's/sub_match/replace/' */ | ||
74 | char *replace; /* sed -e 's/sub_match/replace/' XXX: who will hold the \1 \2 \3s? */ | ||
75 | unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */ | ||
76 | }; | ||
77 | |||
78 | /* globals */ | ||
79 | static struct sed_cmd *sed_cmds = NULL; /* growable arrary holding a sequence of sed cmds */ | ||
80 | static int ncmds = 0; /* number of sed commands */ | ||
81 | |||
82 | /*static char *cur_file = NULL;*/ /* file currently being processed XXX: do I need this? */ | ||
39 | 83 | ||
40 | static const char sed_usage[] = | 84 | static const char sed_usage[] = |
41 | "sed [-n] -e script [file...]\n" | 85 | "sed [-Vhnef] pattern [files...]\n" |
42 | #ifndef BB_FEATURE_TRIVIAL_HELP | 86 | #ifndef BB_FEATURE_TRIVIAL_HELP |
43 | "\nAllowed sed scripts come in the following form:\n" | 87 | "\n" |
44 | "\t'ADDR [!] COMMAND'\n\n" | 88 | "-n\tsuppress automatic printing of pattern space\n" |
45 | "\twhere address ADDR can be:\n" | 89 | "-e script\tadd the script to the commands to be executed\n" |
46 | "\t NUMBER Match specified line number\n" | 90 | "-f scriptfile\tadd the contents of script-file to the commands to be executed\n" |
47 | "\t $ Match last line\n" | 91 | "-h\tdisplay this help message\n" |
48 | "\t /REGEXP/ Match specified regexp\n" | 92 | "-V\toutput version information and exit\n" |
49 | "\t (! inverts the meaning of the match)\n\n" | 93 | "\n" |
50 | "\tand COMMAND can be:\n" | 94 | "If no -e or -f is given, the first non-option argument is taken as the\n" |
51 | "\t s/regexp/replacement/[igp]\n" | 95 | "sed script to interpret. All remaining arguments are names of input\n" |
52 | "\t which attempt to match regexp against the pattern space\n" | 96 | "files; if no input files are specified, then the standard input is read.\n" |
53 | "\t and if successful replaces the matched portion with replacement.\n\n" | ||
54 | "\t aTEXT\n" | ||
55 | "\t which appends TEXT after the pattern space\n" | ||
56 | "Options:\n" | ||
57 | "-e\tadd the script to the commands to be executed\n" | ||
58 | "-n\tsuppress automatic printing of pattern space\n\n" | ||
59 | #if defined BB_REGEXP | ||
60 | "This version of sed matches full regular expressions.\n"; | ||
61 | #else | ||
62 | "This version of sed matches strings (not full regular expressions).\n" | ||
63 | #endif | ||
64 | #endif | 97 | #endif |
65 | ; | 98 | ; |
66 | 99 | ||
67 | /* Flags & variables */ | 100 | #if 0 |
101 | /* Nuke from here { */ | ||
68 | 102 | ||
69 | typedef enum { f_none, f_replace, f_append } sed_function; | ||
70 | 103 | ||
71 | #define NO_LINE -2 | 104 | /* get_line_from_file() - This function reads an entire line from a text file |
72 | #define LAST_LINE -1 | 105 | * * up to a newline. It returns a malloc'ed char * which must be stored and |
73 | static int addr_line = NO_LINE; | 106 | * * free'ed by the caller. */ |
74 | static char *addr_pattern = NULL; | 107 | extern char *get_line_from_file(FILE *file) |
75 | static int negated = 0; | 108 | { |
109 | static const int GROWBY = 80; /* how large we will grow strings by */ | ||
76 | 110 | ||
77 | #define SKIPSPACES(p) do { while (isspace(*(p))) (p)++; } while (0) | 111 | int ch; |
112 | int idx = 0; | ||
113 | char *linebuf = NULL; | ||
114 | int linebufsz = 0; | ||
78 | 115 | ||
79 | #define BUFSIZE 1024 | 116 | while (1) { |
117 | ch = fgetc(file); | ||
118 | if (ch == EOF) | ||
119 | break; | ||
120 | /* grow the line buffer as necessary */ | ||
121 | if (idx > linebufsz-2) | ||
122 | linebuf = realloc(linebuf, linebufsz += GROWBY); | ||
123 | linebuf[idx++] = (char)ch; | ||
124 | if ((char)ch == '\n') | ||
125 | break; | ||
126 | } | ||
127 | |||
128 | if (idx == 0) | ||
129 | return NULL; | ||
130 | |||
131 | linebuf[idx] = 0; | ||
132 | return linebuf; | ||
133 | } | ||
80 | 134 | ||
81 | static inline int at_last(FILE * fp) | 135 | static void usage(const char *string) |
82 | { | 136 | { |
83 | int res = 0; | 137 | printf("usage: %s\n", string); |
138 | exit(0); | ||
139 | } | ||
84 | 140 | ||
85 | if (feof(fp)) | 141 | /* } to here when we integrate this into busybox */ |
86 | return 1; | 142 | #endif |
87 | else { | 143 | |
88 | int ch; | 144 | static void destroy_cmd_strs() |
145 | { | ||
146 | if (sed_cmds == NULL) | ||
147 | return; | ||
148 | |||
149 | /* destroy all the elements in the array */ | ||
150 | while (--ncmds >= 0) { | ||
89 | 151 | ||
90 | if ((ch = fgetc(fp)) == EOF) | 152 | if (sed_cmds[ncmds].beg_match) { |
91 | res++; | 153 | regfree(sed_cmds[ncmds].beg_match); |
92 | ungetc(ch, fp); | 154 | free(sed_cmds[ncmds].beg_match); |
155 | } | ||
156 | if (sed_cmds[ncmds].end_match) { | ||
157 | regfree(sed_cmds[ncmds].end_match); | ||
158 | free(sed_cmds[ncmds].end_match); | ||
159 | } | ||
160 | if (sed_cmds[ncmds].sub_match) { | ||
161 | regfree(sed_cmds[ncmds].sub_match); | ||
162 | free(sed_cmds[ncmds].sub_match); | ||
163 | } | ||
164 | if (sed_cmds[ncmds].replace) | ||
165 | free(sed_cmds[ncmds].replace); | ||
93 | } | 166 | } |
94 | return res; | 167 | |
168 | /* destroy the array */ | ||
169 | free(sed_cmds); | ||
170 | sed_cmds = NULL; | ||
171 | } | ||
172 | |||
173 | static void exit_sed(int retcode, const char *message) | ||
174 | { | ||
175 | destroy_cmd_strs(); | ||
176 | if (message) | ||
177 | fputs(message, stderr); | ||
178 | exit(retcode); | ||
95 | } | 179 | } |
96 | 180 | ||
97 | static void do_sed_repl(FILE * fp, char *needle, char *newNeedle, | 181 | /* |
98 | int ignoreCase, int printFlag, int quietFlag) | 182 | * trim_str - trims leading and trailing space from a string |
183 | * | ||
184 | * Note: This returns a malloc'ed string so you must store and free it | ||
185 | * XXX: This should be in the utility.c file. | ||
186 | */ | ||
187 | static char *trim_str(const char *str) | ||
188 | { | ||
189 | int i; | ||
190 | char *retstr = strdup(str); | ||
191 | |||
192 | /* trim leading whitespace */ | ||
193 | memmove(retstr, &retstr[strspn(retstr, " \n\t\v")], strlen(retstr)); | ||
194 | |||
195 | /* trim trailing whitespace */ | ||
196 | i = strlen(retstr) - 1; | ||
197 | while (isspace(retstr[i])) | ||
198 | i--; | ||
199 | retstr[++i] = 0; | ||
200 | |||
201 | /* Aside: | ||
202 | * | ||
203 | * you know, a strrspn() would really be nice cuz then we could say: | ||
204 | * | ||
205 | * retstr[strlen(retstr) - strrspn(retstr, " \n\t\v") + 1] = 0; | ||
206 | */ | ||
207 | |||
208 | return retstr; | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * index_of_unescaped_slash - walks left to right through a string beginning | ||
213 | * at a specified index and returns the index of the next unescaped slash. | ||
214 | */ | ||
215 | static int index_of_next_unescaped_slash(int idx, const char *str) | ||
216 | { | ||
217 | do { | ||
218 | idx++; | ||
219 | /* test if we've hit the end */ | ||
220 | if (str[idx] == 0) | ||
221 | return -1; | ||
222 | } while (str[idx] != '/' && str[idx - 1] != '\\'); | ||
223 | |||
224 | return idx; | ||
225 | } | ||
226 | |||
227 | /* | ||
228 | * returns the index in the string just past where the address ends. | ||
229 | */ | ||
230 | static int get_address(const char *str, int *line, regex_t **regex) | ||
99 | { | 231 | { |
100 | int foundOne = FALSE; | 232 | char *my_str = strdup(str); |
101 | char haystack[BUFSIZE]; | 233 | int idx = 0; |
102 | int line = 1, doit; | 234 | |
103 | 235 | if (isdigit(my_str[idx])) { | |
104 | while (fgets(haystack, BUFSIZE - 1, fp)) { | 236 | do { |
105 | doit = 0; | 237 | idx++; |
106 | if (addr_pattern) { | 238 | } while (isdigit(my_str[idx])); |
107 | doit = !find_match(haystack, addr_pattern, FALSE); | 239 | my_str[idx] = 0; |
108 | } else if (addr_line == NO_LINE) | 240 | *line = atoi(my_str); |
109 | doit = 1; | 241 | *regex = NULL; |
110 | else if (addr_line == LAST_LINE) { | 242 | } |
111 | if (at_last(fp)) | 243 | else if (my_str[idx] == '$') { |
112 | doit = 1; | 244 | *line = -1; |
113 | } else { | 245 | *regex = NULL; |
114 | if (line == addr_line) | 246 | idx++; |
115 | doit = 1; | 247 | } |
248 | else if (my_str[idx] == '/') { | ||
249 | int ret; | ||
250 | idx = index_of_next_unescaped_slash(idx, my_str); | ||
251 | if (idx == -1) { | ||
252 | free(my_str); | ||
253 | exit_sed(1, "sed: unterminated match expression\n"); | ||
116 | } | 254 | } |
117 | if (negated) | 255 | my_str[idx] = 0; /* shave off the trailing '/' */ |
118 | doit = 1 - doit; | 256 | my_str++; /* shave off the leading '/' */ |
119 | if (doit) { | 257 | *regex = (regex_t *)malloc(sizeof(regex_t)); |
120 | foundOne = | 258 | if ((ret = regcomp(*regex, my_str, 0)) != 0) { |
121 | replace_match(haystack, needle, newNeedle, ignoreCase); | 259 | /* error handling if regular expression couldn't be compiled */ |
122 | 260 | int errmsgsz = regerror(ret, *regex, NULL, 0); | |
123 | if (foundOne == TRUE && printFlag == TRUE) { | 261 | char *errmsg = malloc(errmsgsz); |
124 | fprintf(stdout, haystack); | 262 | if (errmsg == NULL) { |
263 | exit_sed(1, "sed: memory error\n"); | ||
125 | } | 264 | } |
265 | regerror(ret, *regex, errmsg, errmsgsz); | ||
266 | fprintf(stderr, "sed: %s\n", errmsg); | ||
267 | free(errmsg); | ||
268 | exit_sed(1, NULL); | ||
126 | } | 269 | } |
270 | my_str--; /* move my_str back so free() (below) won't barf */ | ||
271 | idx++; /* advance idx one past the end of the /match/ */ | ||
272 | } | ||
273 | else { | ||
274 | fprintf(stderr, "sed.c:get_address: no address found in string\n"); | ||
275 | fprintf(stderr, "\t(you probably didn't check the string you passed me)\n"); | ||
276 | idx = -1; | ||
277 | } | ||
127 | 278 | ||
128 | if (quietFlag == FALSE) { | 279 | free(my_str); |
129 | fprintf(stdout, haystack); | 280 | return idx; |
130 | } | 281 | } |
131 | 282 | ||
132 | line++; | 283 | static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr) |
284 | { | ||
285 | int idx = 0; | ||
286 | |||
287 | /* parse the command | ||
288 | * format is: [addr][,addr]cmd | ||
289 | * |----||-----||-| | ||
290 | * part1 part2 part3 | ||
291 | */ | ||
292 | |||
293 | /* first part (if present) is an address: either a number or a /regex/ */ | ||
294 | if (isdigit(cmdstr[idx]) || cmdstr[idx] == '/') | ||
295 | idx = get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); | ||
296 | |||
297 | /* second part (if present) will begin with a comma */ | ||
298 | if (cmdstr[idx] == ',') | ||
299 | idx += get_address(&cmdstr[++idx], &sed_cmd->end_line, &sed_cmd->end_match); | ||
300 | |||
301 | /* last part (mandatory) will be a command */ | ||
302 | if (cmdstr[idx] == '\0') | ||
303 | exit_sed(1, "sed: missing command\n"); | ||
304 | if (!strchr("pds", cmdstr[idx])) /* <-- XXX add new commands here */ | ||
305 | exit_sed(1, "sed: invalid command\n"); | ||
306 | sed_cmd->cmd = cmdstr[idx]; | ||
307 | /* special-case handling for 's' */ | ||
308 | if (sed_cmd->cmd == 's') { | ||
309 | int oldidx; | ||
310 | /* format for substitution is: | ||
311 | * s/match/replace/g | ||
312 | * | | | ||
313 | * mandatory optional | ||
314 | */ | ||
315 | |||
316 | /* verify that we have an 's' followed by a 'slash' */ | ||
317 | if (cmdstr[++idx] != '/') | ||
318 | exit_sed(1, "sed: bad format in substitution expression\n"); | ||
319 | |||
320 | /* get the substitution part */ | ||
321 | idx += get_address(&cmdstr[idx], NULL, &sed_cmd->sub_match); | ||
322 | |||
323 | /* get the replacement part */ | ||
324 | oldidx = idx; | ||
325 | idx = index_of_next_unescaped_slash(idx, cmdstr); | ||
326 | sed_cmd->replace = (char *)malloc(idx - oldidx + 1); | ||
327 | strncpy(sed_cmd->replace, &cmdstr[oldidx], idx - oldidx); | ||
328 | sed_cmd->replace[idx - oldidx] = 0; | ||
329 | |||
330 | /* store the 'g' if present */ | ||
331 | if (cmdstr[++idx] == 'g') | ||
332 | sed_cmd->sub_g = 1; | ||
133 | } | 333 | } |
134 | } | 334 | } |
135 | 335 | ||
136 | static void do_sed_append(FILE * fp, char *appendline, int quietFlag) | 336 | static void add_cmd_str(const char *cmdstr) |
137 | { | 337 | { |
138 | char buffer[BUFSIZE]; | 338 | char *my_cmdstr = trim_str(cmdstr); |
139 | int line = 1, doit; | ||
140 | |||
141 | while (fgets(buffer, BUFSIZE - 1, fp)) { | ||
142 | doit = 0; | ||
143 | if (addr_pattern) { | ||
144 | doit = !find_match(buffer, addr_pattern, FALSE); | ||
145 | } else if (addr_line == NO_LINE) | ||
146 | doit = 1; | ||
147 | else if (addr_line == LAST_LINE) { | ||
148 | if (at_last(fp)) | ||
149 | doit = 1; | ||
150 | } else { | ||
151 | if (line == addr_line) | ||
152 | doit = 1; | ||
153 | } | ||
154 | if (negated) | ||
155 | doit = 1 - doit; | ||
156 | if (quietFlag == FALSE) { | ||
157 | fprintf(stdout, buffer); | ||
158 | } | ||
159 | if (doit) { | ||
160 | fputs(appendline, stdout); | ||
161 | fputc('\n', stdout); | ||
162 | } | ||
163 | 339 | ||
164 | line++; | 340 | /* if this is a comment, don't even bother */ |
341 | if (my_cmdstr[0] == '#') { | ||
342 | free(my_cmdstr); | ||
343 | return; | ||
165 | } | 344 | } |
345 | |||
346 | /* grow the array */ | ||
347 | sed_cmds = realloc(sed_cmds, sizeof(struct sed_cmd) * (++ncmds)); | ||
348 | /* zero new element */ | ||
349 | memset(&sed_cmds[ncmds-1], 0, sizeof(struct sed_cmd)); | ||
350 | /* load command string into new array element */ | ||
351 | parse_cmd_str(&sed_cmds[ncmds-1], my_cmdstr); | ||
166 | } | 352 | } |
167 | 353 | ||
168 | extern int sed_main(int argc, char **argv) | 354 | |
355 | static void load_cmd_file(char *filename) | ||
169 | { | 356 | { |
170 | FILE *fp; | 357 | FILE *cmdfile; |
171 | char *needle = NULL, *newNeedle = NULL; | 358 | char *line; |
172 | char *name; | 359 | |
173 | char *cp; | 360 | cmdfile = fopen(filename, "r"); |
174 | int ignoreCase = FALSE; | 361 | if (cmdfile == NULL) |
175 | int printFlag = FALSE; | 362 | exit_sed(1, strerror(errno)); |
176 | int quietFlag = FALSE; | 363 | |
177 | int stopNow; | 364 | while ((line = get_line_from_file(cmdfile)) != NULL) { |
178 | char *line_s = NULL, saved; | 365 | line[strlen(line)-1] = 0; /* eat newline */ |
179 | char *appendline = NULL; | 366 | add_cmd_str(line); |
180 | char *pos; | 367 | free(line); |
181 | sed_function sed_f = f_none; | ||
182 | |||
183 | argc--; | ||
184 | argv++; | ||
185 | if (argc < 1) { | ||
186 | usage(sed_usage); | ||
187 | } | 368 | } |
369 | } | ||
188 | 370 | ||
189 | while (argc > 1) { | ||
190 | if (**argv != '-') | ||
191 | usage(sed_usage); | ||
192 | argc--; | ||
193 | cp = *argv++; | ||
194 | stopNow = FALSE; | ||
195 | 371 | ||
196 | while (*++cp && stopNow == FALSE) { | 372 | static int do_sed_command(const struct sed_cmd *sed_cmd, const char *line) |
197 | switch (*cp) { | 373 | { |
198 | case 'n': | 374 | int altered = 0; |
199 | quietFlag = TRUE; | ||
200 | break; | ||
201 | case 'e': | ||
202 | if (*(cp + 1) == 0 && --argc < 0) { | ||
203 | usage(sed_usage); | ||
204 | } | ||
205 | if (*++cp != 's') | ||
206 | cp = *argv++; | ||
207 | |||
208 | /* Read address if present */ | ||
209 | SKIPSPACES(cp); | ||
210 | if (*cp == '$') { | ||
211 | addr_line = LAST_LINE; | ||
212 | cp++; | ||
213 | } else { | ||
214 | if (isdigit(*cp)) { /* LINE ADDRESS */ | ||
215 | line_s = cp; | ||
216 | while (isdigit(*cp)) | ||
217 | cp++; | ||
218 | if (cp > line_s) { | ||
219 | /* numeric line */ | ||
220 | saved = *cp; | ||
221 | *cp = '\0'; | ||
222 | addr_line = atoi(line_s); | ||
223 | *cp = saved; | ||
224 | } | ||
225 | } else if (*cp == '/') { /* PATTERN ADDRESS */ | ||
226 | pos = addr_pattern = cp + 1; | ||
227 | pos = strchr(pos, '/'); | ||
228 | if (!pos) | ||
229 | usage(sed_usage); | ||
230 | *pos = '\0'; | ||
231 | cp = pos + 1; | ||
232 | } | ||
233 | } | ||
234 | 375 | ||
235 | SKIPSPACES(cp); | 376 | switch (sed_cmd->cmd) { |
236 | if (*cp == '!') { | ||
237 | negated++; | ||
238 | cp++; | ||
239 | } | ||
240 | 377 | ||
241 | /* Read command */ | 378 | case 'p': |
379 | fputs(line, stdout); | ||
380 | break; | ||
242 | 381 | ||
243 | SKIPSPACES(cp); | 382 | case 'd': |
244 | switch (*cp) { | 383 | altered++; |
245 | case 's': /* REPLACE */ | 384 | break; |
246 | if (strlen(cp) <= 3 || *(cp + 1) != '/') | 385 | |
247 | break; | 386 | case 's': /* oo, a fun one :-) */ |
248 | sed_f = f_replace; | 387 | |
249 | 388 | /* we only substitute if the substitution 'search' expression matches */ | |
250 | pos = needle = cp + 2; | 389 | if (regexec(sed_cmd->sub_match, line, 0, NULL, 0) == 0) { |
251 | 390 | regmatch_t regmatch; | |
252 | for (;;) { | 391 | int i; |
253 | pos = strchr(pos, '/'); | 392 | char *ptr = (char *)line; |
254 | if (pos == NULL) { | 393 | |
255 | usage(sed_usage); | 394 | while (*ptr) { |
256 | } | 395 | /* if we can match the search string... */ |
257 | if (*(pos - 1) == '\\') { | 396 | if (regexec(sed_cmd->sub_match, ptr, 1, ®match, 0) == 0) { |
258 | pos++; | 397 | /* print everything before the match, */ |
259 | continue; | 398 | for (i = 0; i < regmatch.rm_so; i++) |
260 | } | 399 | fputc(ptr[i], stdout); |
261 | break; | 400 | /* then print the substitution in its place */ |
401 | fputs(sed_cmd->replace, stdout); | ||
402 | /* then advance past the match */ | ||
403 | ptr += regmatch.rm_eo; | ||
404 | /* and let the calling function know that something | ||
405 | * has been changed */ | ||
406 | altered++; | ||
407 | |||
408 | /* if we're not doing this globally... */ | ||
409 | if (!sed_cmd->sub_g) | ||
410 | break; | ||
262 | } | 411 | } |
263 | *pos = 0; | 412 | /* if we COULD NOT match the search string (meaning we've gone past |
264 | newNeedle = ++pos; | 413 | * all previous instances), get out */ |
265 | for (;;) { | 414 | else |
266 | pos = strchr(pos, '/'); | ||
267 | if (pos == NULL) { | ||
268 | usage(sed_usage); | ||
269 | } | ||
270 | if (*(pos - 1) == '\\') { | ||
271 | pos++; | ||
272 | continue; | ||
273 | } | ||
274 | break; | 415 | break; |
275 | } | 416 | } |
276 | *pos = 0; | ||
277 | if (pos + 2 != 0) { | ||
278 | while (*++pos) { | ||
279 | switch (*pos) { | ||
280 | case 'i': | ||
281 | ignoreCase = TRUE; | ||
282 | break; | ||
283 | case 'p': | ||
284 | printFlag = TRUE; | ||
285 | break; | ||
286 | case 'g': | ||
287 | break; | ||
288 | default: | ||
289 | usage(sed_usage); | ||
290 | } | ||
291 | } | ||
292 | } | ||
293 | cp = pos; | ||
294 | /* fprintf(stderr, "replace '%s' with '%s'\n", needle, newNeedle); */ | ||
295 | break; | ||
296 | 417 | ||
297 | case 'a': /* APPEND */ | 418 | /* is there anything left to print? */ |
298 | if (strlen(cp) < 2) | 419 | if (*ptr) |
299 | break; | 420 | fputs(ptr, stdout); |
300 | sed_f = f_append; | 421 | } |
301 | appendline = ++cp; | 422 | |
302 | /* fprintf(stderr, "append '%s'\n", appendline); */ | 423 | break; |
303 | break; | 424 | } |
425 | |||
426 | return altered; | ||
427 | } | ||
428 | |||
429 | static void process_file(FILE *file) | ||
430 | { | ||
431 | char *line = NULL; | ||
432 | static int linenum = 0; /* GNU sed does not restart counting lines at EOF */ | ||
433 | unsigned int still_in_range = 0; | ||
434 | int line_altered; | ||
435 | int i; | ||
436 | |||
437 | /* go through every line in the file */ | ||
438 | while ((line = get_line_from_file(file)) != NULL) { | ||
439 | |||
440 | linenum++; | ||
441 | line_altered = 0; | ||
442 | |||
443 | /* for every line, go through all the commands */ | ||
444 | for (i = 0; i < ncmds; i++) { | ||
445 | |||
446 | /* are we acting on a range of matched lines? */ | ||
447 | if (sed_cmds[i].beg_match && sed_cmds[i].end_match) { | ||
448 | if (still_in_range || regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0) { | ||
449 | line_altered += do_sed_command(&sed_cmds[i], line); | ||
450 | still_in_range = 1; | ||
451 | if (regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0) | ||
452 | still_in_range = 0; | ||
304 | } | 453 | } |
454 | } | ||
305 | 455 | ||
306 | stopNow = TRUE; | 456 | /* are we trying to match a single line? */ |
307 | break; | 457 | else if (sed_cmds[i].beg_match) { |
458 | if (regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0) | ||
459 | line_altered += do_sed_command(&sed_cmds[i], line); | ||
460 | } | ||
308 | 461 | ||
309 | default: | 462 | /* are we acting on a range of line numbers? */ |
310 | usage(sed_usage); | 463 | else if (sed_cmds[i].beg_line > 0 && sed_cmds[i].end_line > 0) { |
464 | if (linenum >= sed_cmds[i].beg_line && linenum <= sed_cmds[i].end_line) | ||
465 | line_altered += do_sed_command(&sed_cmds[i], line); | ||
311 | } | 466 | } |
312 | } | ||
313 | } | ||
314 | 467 | ||
315 | if (argc == 0) { | 468 | /* are we acting on a specified line number */ |
316 | switch (sed_f) { | 469 | else if (sed_cmds[i].beg_line > 0) { |
317 | case f_none: | 470 | if (linenum == sed_cmds[i].beg_line) |
318 | break; | 471 | line_altered += do_sed_command(&sed_cmds[i], line); |
319 | case f_replace: | ||
320 | do_sed_repl(stdin, needle, newNeedle, ignoreCase, printFlag, | ||
321 | quietFlag); | ||
322 | break; | ||
323 | case f_append: | ||
324 | do_sed_append(stdin, appendline, quietFlag); | ||
325 | break; | ||
326 | } | ||
327 | } else { | ||
328 | while (argc-- > 0) { | ||
329 | name = *argv++; | ||
330 | |||
331 | fp = fopen(name, "r"); | ||
332 | if (fp == NULL) { | ||
333 | perror(name); | ||
334 | continue; | ||
335 | } | 472 | } |
336 | 473 | ||
337 | switch (sed_f) { | 474 | /* not acting on matches or line numbers. act on every line */ |
338 | case f_none: | 475 | else |
476 | line_altered += do_sed_command(&sed_cmds[i], line); | ||
477 | |||
478 | } | ||
479 | |||
480 | /* we will print the line unless we were told to be quiet or if the | ||
481 | * line was altered (via a 'd'elete or 's'ubstitution) */ | ||
482 | if (!be_quiet && !line_altered) | ||
483 | fputs(line, stdout); | ||
484 | |||
485 | free(line); | ||
486 | } | ||
487 | } | ||
488 | |||
489 | extern int sed_main(int argc, char **argv) | ||
490 | { | ||
491 | int opt; | ||
492 | |||
493 | /* do special-case option parsing */ | ||
494 | if (argv[1] && (strcmp(argv[1], "--help") == 0)) | ||
495 | usage(sed_usage); | ||
496 | |||
497 | /* do normal option parsing */ | ||
498 | while ((opt = getopt(argc, argv, "Vhne:f:")) > 0) { | ||
499 | switch (opt) { | ||
500 | case 'V': | ||
501 | printf("Print Busybox version here\n"); | ||
502 | exit(0); | ||
339 | break; | 503 | break; |
340 | case f_replace: | 504 | case 'h': |
341 | do_sed_repl(fp, needle, newNeedle, ignoreCase, printFlag, | 505 | usage(sed_usage); |
342 | quietFlag); | ||
343 | break; | 506 | break; |
344 | case f_append: | 507 | case 'n': |
345 | do_sed_append(fp, appendline, quietFlag); | 508 | be_quiet++; |
346 | break; | 509 | break; |
347 | } | 510 | case 'e': |
511 | add_cmd_str(optarg); | ||
512 | break; | ||
513 | case 'f': | ||
514 | load_cmd_file(optarg); | ||
515 | break; | ||
516 | } | ||
517 | } | ||
518 | |||
519 | /* if we didn't get a pattern from a -e and no command file was specified, | ||
520 | * argv[optind] should be the pattern. no pattern, no worky */ | ||
521 | if (ncmds == 0) { | ||
522 | if (argv[optind] == NULL) | ||
523 | usage(sed_usage); | ||
524 | else { | ||
525 | add_cmd_str(argv[optind]); | ||
526 | optind++; | ||
527 | } | ||
528 | } | ||
348 | 529 | ||
349 | if (ferror(fp)) | ||
350 | perror(name); | ||
351 | 530 | ||
352 | fclose(fp); | 531 | /* argv[(optind)..(argc-1)] should be names of file to process. If no |
532 | * files were specified or '-' was specified, take input from stdin. | ||
533 | * Otherwise, we process all the files specified. */ | ||
534 | if (argv[optind] == NULL || (strcmp(argv[optind], "-") == 0)) { | ||
535 | process_file(stdin); | ||
536 | } | ||
537 | else { | ||
538 | int i; | ||
539 | FILE *file; | ||
540 | for (i = optind; i < argc; i++) { | ||
541 | file = fopen(argv[i], "r"); | ||
542 | if (file == NULL) { | ||
543 | fprintf(stderr, "sed: %s: %s\n", argv[i], strerror(errno)); | ||
544 | } else { | ||
545 | process_file(file); | ||
546 | fclose(file); | ||
547 | } | ||
353 | } | 548 | } |
354 | } | 549 | } |
355 | return(TRUE); | 550 | |
356 | } | 551 | exit_sed(0, NULL); |
357 | 552 | ||
553 | /* not reached */ | ||
554 | return 0; | ||
555 | } | ||
358 | 556 | ||
359 | /* END CODE */ | 557 | #ifdef TEST_SED |
558 | int main(int argc, char **argv) | ||
559 | { | ||
560 | return sed_main(argc, argv); | ||
561 | } | ||
562 | #endif | ||
@@ -1,15 +1,8 @@ | |||
1 | /* vi: set sw=4 ts=4: */ | ||
2 | /* | 1 | /* |
3 | * Mini sed implementation for busybox | 2 | * sed.c - very minimalist version of sed |
4 | * | ||
5 | * | 3 | * |
6 | * Copyright (C) 1999,2000 by Lineo, inc. | 4 | * Copyright (C) 1999,2000 by Lineo, inc. |
7 | * Written by Erik Andersen <andersen@lineo.com>, <andersee@debian.org> | 5 | * Written by Mark Whitley <markw@lineo.com>, <markw@enol.com> |
8 | * | ||
9 | * Modifications for addresses and append command have been | ||
10 | * written by Marco Pantaleoni <panta@prosa.it>, <panta@elasticworld.org> | ||
11 | * and are: | ||
12 | * Copyright (C) 1999 Marco Pantaleoni. | ||
13 | * | 6 | * |
14 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
15 | * it under the terms of the GNU General Public License as published by | 8 | * it under the terms of the GNU General Public License as published by |
@@ -27,333 +20,543 @@ | |||
27 | * | 20 | * |
28 | */ | 21 | */ |
29 | 22 | ||
30 | #include "internal.h" | 23 | /* |
31 | #include "regexp.h" | 24 | Supported features and commands in this version of sed: |
25 | |||
26 | - comments ('#') | ||
27 | - Address matching: num|/matchstr/[,num|/matchstr/|$]command | ||
28 | - Commands: p, d, s/match/replace/[g] | ||
29 | |||
30 | (Note: Specifying an address (range) to match is *optional*; commands | ||
31 | default to the whole pattern space if no specific address match was | ||
32 | requested.) | ||
33 | |||
34 | Unsupported features: | ||
35 | |||
36 | - transliteration (y/source-chars/dest-chars/) (use 'tr') | ||
37 | - no support for characters other than the '/' character for regex matches | ||
38 | - no pattern space hold space storing / swapping (x, etc.) | ||
39 | - no labels / branching (: label, b, t, and friends) | ||
40 | - and lots, lots more. | ||
41 | |||
42 | */ | ||
43 | |||
32 | #include <stdio.h> | 44 | #include <stdio.h> |
33 | #include <dirent.h> | 45 | #include <stdlib.h> /* for realloc() */ |
46 | #include <unistd.h> /* for getopt() */ | ||
47 | #include <regex.h> | ||
48 | #include <string.h> /* for strdup() */ | ||
34 | #include <errno.h> | 49 | #include <errno.h> |
35 | #include <fcntl.h> | 50 | #include <ctype.h> /* for isspace() */ |
36 | #include <signal.h> | 51 | #include "internal.h" |
37 | #include <time.h> | 52 | |
38 | #include <ctype.h> | 53 | |
54 | /* externs */ | ||
55 | extern int optind; /* in unistd.h */ | ||
56 | extern char *optarg; /* ditto */ | ||
57 | |||
58 | /* options */ | ||
59 | static int be_quiet = 0; | ||
60 | |||
61 | struct sed_cmd { | ||
62 | |||
63 | /* address storage */ | ||
64 | int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */ | ||
65 | int end_line; /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */ | ||
66 | regex_t *beg_match; /* sed -e '/match/cmd' */ | ||
67 | regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */ | ||
68 | |||
69 | /* the command */ | ||
70 | char cmd; /* p,d,s (add more at your leisure :-) */ | ||
71 | |||
72 | /* substitution command specific fields */ | ||
73 | regex_t *sub_match; /* sed -e 's/sub_match/replace/' */ | ||
74 | char *replace; /* sed -e 's/sub_match/replace/' XXX: who will hold the \1 \2 \3s? */ | ||
75 | unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */ | ||
76 | }; | ||
77 | |||
78 | /* globals */ | ||
79 | static struct sed_cmd *sed_cmds = NULL; /* growable arrary holding a sequence of sed cmds */ | ||
80 | static int ncmds = 0; /* number of sed commands */ | ||
81 | |||
82 | /*static char *cur_file = NULL;*/ /* file currently being processed XXX: do I need this? */ | ||
39 | 83 | ||
40 | static const char sed_usage[] = | 84 | static const char sed_usage[] = |
41 | "sed [-n] -e script [file...]\n" | 85 | "sed [-Vhnef] pattern [files...]\n" |
42 | #ifndef BB_FEATURE_TRIVIAL_HELP | 86 | #ifndef BB_FEATURE_TRIVIAL_HELP |
43 | "\nAllowed sed scripts come in the following form:\n" | 87 | "\n" |
44 | "\t'ADDR [!] COMMAND'\n\n" | 88 | "-n\tsuppress automatic printing of pattern space\n" |
45 | "\twhere address ADDR can be:\n" | 89 | "-e script\tadd the script to the commands to be executed\n" |
46 | "\t NUMBER Match specified line number\n" | 90 | "-f scriptfile\tadd the contents of script-file to the commands to be executed\n" |
47 | "\t $ Match last line\n" | 91 | "-h\tdisplay this help message\n" |
48 | "\t /REGEXP/ Match specified regexp\n" | 92 | "-V\toutput version information and exit\n" |
49 | "\t (! inverts the meaning of the match)\n\n" | 93 | "\n" |
50 | "\tand COMMAND can be:\n" | 94 | "If no -e or -f is given, the first non-option argument is taken as the\n" |
51 | "\t s/regexp/replacement/[igp]\n" | 95 | "sed script to interpret. All remaining arguments are names of input\n" |
52 | "\t which attempt to match regexp against the pattern space\n" | 96 | "files; if no input files are specified, then the standard input is read.\n" |
53 | "\t and if successful replaces the matched portion with replacement.\n\n" | ||
54 | "\t aTEXT\n" | ||
55 | "\t which appends TEXT after the pattern space\n" | ||
56 | "Options:\n" | ||
57 | "-e\tadd the script to the commands to be executed\n" | ||
58 | "-n\tsuppress automatic printing of pattern space\n\n" | ||
59 | #if defined BB_REGEXP | ||
60 | "This version of sed matches full regular expressions.\n"; | ||
61 | #else | ||
62 | "This version of sed matches strings (not full regular expressions).\n" | ||
63 | #endif | ||
64 | #endif | 97 | #endif |
65 | ; | 98 | ; |
66 | 99 | ||
67 | /* Flags & variables */ | 100 | #if 0 |
101 | /* Nuke from here { */ | ||
68 | 102 | ||
69 | typedef enum { f_none, f_replace, f_append } sed_function; | ||
70 | 103 | ||
71 | #define NO_LINE -2 | 104 | /* get_line_from_file() - This function reads an entire line from a text file |
72 | #define LAST_LINE -1 | 105 | * * up to a newline. It returns a malloc'ed char * which must be stored and |
73 | static int addr_line = NO_LINE; | 106 | * * free'ed by the caller. */ |
74 | static char *addr_pattern = NULL; | 107 | extern char *get_line_from_file(FILE *file) |
75 | static int negated = 0; | 108 | { |
109 | static const int GROWBY = 80; /* how large we will grow strings by */ | ||
76 | 110 | ||
77 | #define SKIPSPACES(p) do { while (isspace(*(p))) (p)++; } while (0) | 111 | int ch; |
112 | int idx = 0; | ||
113 | char *linebuf = NULL; | ||
114 | int linebufsz = 0; | ||
78 | 115 | ||
79 | #define BUFSIZE 1024 | 116 | while (1) { |
117 | ch = fgetc(file); | ||
118 | if (ch == EOF) | ||
119 | break; | ||
120 | /* grow the line buffer as necessary */ | ||
121 | if (idx > linebufsz-2) | ||
122 | linebuf = realloc(linebuf, linebufsz += GROWBY); | ||
123 | linebuf[idx++] = (char)ch; | ||
124 | if ((char)ch == '\n') | ||
125 | break; | ||
126 | } | ||
127 | |||
128 | if (idx == 0) | ||
129 | return NULL; | ||
130 | |||
131 | linebuf[idx] = 0; | ||
132 | return linebuf; | ||
133 | } | ||
80 | 134 | ||
81 | static inline int at_last(FILE * fp) | 135 | static void usage(const char *string) |
82 | { | 136 | { |
83 | int res = 0; | 137 | printf("usage: %s\n", string); |
138 | exit(0); | ||
139 | } | ||
84 | 140 | ||
85 | if (feof(fp)) | 141 | /* } to here when we integrate this into busybox */ |
86 | return 1; | 142 | #endif |
87 | else { | 143 | |
88 | int ch; | 144 | static void destroy_cmd_strs() |
145 | { | ||
146 | if (sed_cmds == NULL) | ||
147 | return; | ||
148 | |||
149 | /* destroy all the elements in the array */ | ||
150 | while (--ncmds >= 0) { | ||
89 | 151 | ||
90 | if ((ch = fgetc(fp)) == EOF) | 152 | if (sed_cmds[ncmds].beg_match) { |
91 | res++; | 153 | regfree(sed_cmds[ncmds].beg_match); |
92 | ungetc(ch, fp); | 154 | free(sed_cmds[ncmds].beg_match); |
155 | } | ||
156 | if (sed_cmds[ncmds].end_match) { | ||
157 | regfree(sed_cmds[ncmds].end_match); | ||
158 | free(sed_cmds[ncmds].end_match); | ||
159 | } | ||
160 | if (sed_cmds[ncmds].sub_match) { | ||
161 | regfree(sed_cmds[ncmds].sub_match); | ||
162 | free(sed_cmds[ncmds].sub_match); | ||
163 | } | ||
164 | if (sed_cmds[ncmds].replace) | ||
165 | free(sed_cmds[ncmds].replace); | ||
93 | } | 166 | } |
94 | return res; | 167 | |
168 | /* destroy the array */ | ||
169 | free(sed_cmds); | ||
170 | sed_cmds = NULL; | ||
171 | } | ||
172 | |||
173 | static void exit_sed(int retcode, const char *message) | ||
174 | { | ||
175 | destroy_cmd_strs(); | ||
176 | if (message) | ||
177 | fputs(message, stderr); | ||
178 | exit(retcode); | ||
95 | } | 179 | } |
96 | 180 | ||
97 | static void do_sed_repl(FILE * fp, char *needle, char *newNeedle, | 181 | /* |
98 | int ignoreCase, int printFlag, int quietFlag) | 182 | * trim_str - trims leading and trailing space from a string |
183 | * | ||
184 | * Note: This returns a malloc'ed string so you must store and free it | ||
185 | * XXX: This should be in the utility.c file. | ||
186 | */ | ||
187 | static char *trim_str(const char *str) | ||
188 | { | ||
189 | int i; | ||
190 | char *retstr = strdup(str); | ||
191 | |||
192 | /* trim leading whitespace */ | ||
193 | memmove(retstr, &retstr[strspn(retstr, " \n\t\v")], strlen(retstr)); | ||
194 | |||
195 | /* trim trailing whitespace */ | ||
196 | i = strlen(retstr) - 1; | ||
197 | while (isspace(retstr[i])) | ||
198 | i--; | ||
199 | retstr[++i] = 0; | ||
200 | |||
201 | /* Aside: | ||
202 | * | ||
203 | * you know, a strrspn() would really be nice cuz then we could say: | ||
204 | * | ||
205 | * retstr[strlen(retstr) - strrspn(retstr, " \n\t\v") + 1] = 0; | ||
206 | */ | ||
207 | |||
208 | return retstr; | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * index_of_unescaped_slash - walks left to right through a string beginning | ||
213 | * at a specified index and returns the index of the next unescaped slash. | ||
214 | */ | ||
215 | static int index_of_next_unescaped_slash(int idx, const char *str) | ||
216 | { | ||
217 | do { | ||
218 | idx++; | ||
219 | /* test if we've hit the end */ | ||
220 | if (str[idx] == 0) | ||
221 | return -1; | ||
222 | } while (str[idx] != '/' && str[idx - 1] != '\\'); | ||
223 | |||
224 | return idx; | ||
225 | } | ||
226 | |||
227 | /* | ||
228 | * returns the index in the string just past where the address ends. | ||
229 | */ | ||
230 | static int get_address(const char *str, int *line, regex_t **regex) | ||
99 | { | 231 | { |
100 | int foundOne = FALSE; | 232 | char *my_str = strdup(str); |
101 | char haystack[BUFSIZE]; | 233 | int idx = 0; |
102 | int line = 1, doit; | 234 | |
103 | 235 | if (isdigit(my_str[idx])) { | |
104 | while (fgets(haystack, BUFSIZE - 1, fp)) { | 236 | do { |
105 | doit = 0; | 237 | idx++; |
106 | if (addr_pattern) { | 238 | } while (isdigit(my_str[idx])); |
107 | doit = !find_match(haystack, addr_pattern, FALSE); | 239 | my_str[idx] = 0; |
108 | } else if (addr_line == NO_LINE) | 240 | *line = atoi(my_str); |
109 | doit = 1; | 241 | *regex = NULL; |
110 | else if (addr_line == LAST_LINE) { | 242 | } |
111 | if (at_last(fp)) | 243 | else if (my_str[idx] == '$') { |
112 | doit = 1; | 244 | *line = -1; |
113 | } else { | 245 | *regex = NULL; |
114 | if (line == addr_line) | 246 | idx++; |
115 | doit = 1; | 247 | } |
248 | else if (my_str[idx] == '/') { | ||
249 | int ret; | ||
250 | idx = index_of_next_unescaped_slash(idx, my_str); | ||
251 | if (idx == -1) { | ||
252 | free(my_str); | ||
253 | exit_sed(1, "sed: unterminated match expression\n"); | ||
116 | } | 254 | } |
117 | if (negated) | 255 | my_str[idx] = 0; /* shave off the trailing '/' */ |
118 | doit = 1 - doit; | 256 | my_str++; /* shave off the leading '/' */ |
119 | if (doit) { | 257 | *regex = (regex_t *)malloc(sizeof(regex_t)); |
120 | foundOne = | 258 | if ((ret = regcomp(*regex, my_str, 0)) != 0) { |
121 | replace_match(haystack, needle, newNeedle, ignoreCase); | 259 | /* error handling if regular expression couldn't be compiled */ |
122 | 260 | int errmsgsz = regerror(ret, *regex, NULL, 0); | |
123 | if (foundOne == TRUE && printFlag == TRUE) { | 261 | char *errmsg = malloc(errmsgsz); |
124 | fprintf(stdout, haystack); | 262 | if (errmsg == NULL) { |
263 | exit_sed(1, "sed: memory error\n"); | ||
125 | } | 264 | } |
265 | regerror(ret, *regex, errmsg, errmsgsz); | ||
266 | fprintf(stderr, "sed: %s\n", errmsg); | ||
267 | free(errmsg); | ||
268 | exit_sed(1, NULL); | ||
126 | } | 269 | } |
270 | my_str--; /* move my_str back so free() (below) won't barf */ | ||
271 | idx++; /* advance idx one past the end of the /match/ */ | ||
272 | } | ||
273 | else { | ||
274 | fprintf(stderr, "sed.c:get_address: no address found in string\n"); | ||
275 | fprintf(stderr, "\t(you probably didn't check the string you passed me)\n"); | ||
276 | idx = -1; | ||
277 | } | ||
127 | 278 | ||
128 | if (quietFlag == FALSE) { | 279 | free(my_str); |
129 | fprintf(stdout, haystack); | 280 | return idx; |
130 | } | 281 | } |
131 | 282 | ||
132 | line++; | 283 | static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr) |
284 | { | ||
285 | int idx = 0; | ||
286 | |||
287 | /* parse the command | ||
288 | * format is: [addr][,addr]cmd | ||
289 | * |----||-----||-| | ||
290 | * part1 part2 part3 | ||
291 | */ | ||
292 | |||
293 | /* first part (if present) is an address: either a number or a /regex/ */ | ||
294 | if (isdigit(cmdstr[idx]) || cmdstr[idx] == '/') | ||
295 | idx = get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); | ||
296 | |||
297 | /* second part (if present) will begin with a comma */ | ||
298 | if (cmdstr[idx] == ',') | ||
299 | idx += get_address(&cmdstr[++idx], &sed_cmd->end_line, &sed_cmd->end_match); | ||
300 | |||
301 | /* last part (mandatory) will be a command */ | ||
302 | if (cmdstr[idx] == '\0') | ||
303 | exit_sed(1, "sed: missing command\n"); | ||
304 | if (!strchr("pds", cmdstr[idx])) /* <-- XXX add new commands here */ | ||
305 | exit_sed(1, "sed: invalid command\n"); | ||
306 | sed_cmd->cmd = cmdstr[idx]; | ||
307 | /* special-case handling for 's' */ | ||
308 | if (sed_cmd->cmd == 's') { | ||
309 | int oldidx; | ||
310 | /* format for substitution is: | ||
311 | * s/match/replace/g | ||
312 | * | | | ||
313 | * mandatory optional | ||
314 | */ | ||
315 | |||
316 | /* verify that we have an 's' followed by a 'slash' */ | ||
317 | if (cmdstr[++idx] != '/') | ||
318 | exit_sed(1, "sed: bad format in substitution expression\n"); | ||
319 | |||
320 | /* get the substitution part */ | ||
321 | idx += get_address(&cmdstr[idx], NULL, &sed_cmd->sub_match); | ||
322 | |||
323 | /* get the replacement part */ | ||
324 | oldidx = idx; | ||
325 | idx = index_of_next_unescaped_slash(idx, cmdstr); | ||
326 | sed_cmd->replace = (char *)malloc(idx - oldidx + 1); | ||
327 | strncpy(sed_cmd->replace, &cmdstr[oldidx], idx - oldidx); | ||
328 | sed_cmd->replace[idx - oldidx] = 0; | ||
329 | |||
330 | /* store the 'g' if present */ | ||
331 | if (cmdstr[++idx] == 'g') | ||
332 | sed_cmd->sub_g = 1; | ||
133 | } | 333 | } |
134 | } | 334 | } |
135 | 335 | ||
136 | static void do_sed_append(FILE * fp, char *appendline, int quietFlag) | 336 | static void add_cmd_str(const char *cmdstr) |
137 | { | 337 | { |
138 | char buffer[BUFSIZE]; | 338 | char *my_cmdstr = trim_str(cmdstr); |
139 | int line = 1, doit; | ||
140 | |||
141 | while (fgets(buffer, BUFSIZE - 1, fp)) { | ||
142 | doit = 0; | ||
143 | if (addr_pattern) { | ||
144 | doit = !find_match(buffer, addr_pattern, FALSE); | ||
145 | } else if (addr_line == NO_LINE) | ||
146 | doit = 1; | ||
147 | else if (addr_line == LAST_LINE) { | ||
148 | if (at_last(fp)) | ||
149 | doit = 1; | ||
150 | } else { | ||
151 | if (line == addr_line) | ||
152 | doit = 1; | ||
153 | } | ||
154 | if (negated) | ||
155 | doit = 1 - doit; | ||
156 | if (quietFlag == FALSE) { | ||
157 | fprintf(stdout, buffer); | ||
158 | } | ||
159 | if (doit) { | ||
160 | fputs(appendline, stdout); | ||
161 | fputc('\n', stdout); | ||
162 | } | ||
163 | 339 | ||
164 | line++; | 340 | /* if this is a comment, don't even bother */ |
341 | if (my_cmdstr[0] == '#') { | ||
342 | free(my_cmdstr); | ||
343 | return; | ||
165 | } | 344 | } |
345 | |||
346 | /* grow the array */ | ||
347 | sed_cmds = realloc(sed_cmds, sizeof(struct sed_cmd) * (++ncmds)); | ||
348 | /* zero new element */ | ||
349 | memset(&sed_cmds[ncmds-1], 0, sizeof(struct sed_cmd)); | ||
350 | /* load command string into new array element */ | ||
351 | parse_cmd_str(&sed_cmds[ncmds-1], my_cmdstr); | ||
166 | } | 352 | } |
167 | 353 | ||
168 | extern int sed_main(int argc, char **argv) | 354 | |
355 | static void load_cmd_file(char *filename) | ||
169 | { | 356 | { |
170 | FILE *fp; | 357 | FILE *cmdfile; |
171 | char *needle = NULL, *newNeedle = NULL; | 358 | char *line; |
172 | char *name; | 359 | |
173 | char *cp; | 360 | cmdfile = fopen(filename, "r"); |
174 | int ignoreCase = FALSE; | 361 | if (cmdfile == NULL) |
175 | int printFlag = FALSE; | 362 | exit_sed(1, strerror(errno)); |
176 | int quietFlag = FALSE; | 363 | |
177 | int stopNow; | 364 | while ((line = get_line_from_file(cmdfile)) != NULL) { |
178 | char *line_s = NULL, saved; | 365 | line[strlen(line)-1] = 0; /* eat newline */ |
179 | char *appendline = NULL; | 366 | add_cmd_str(line); |
180 | char *pos; | 367 | free(line); |
181 | sed_function sed_f = f_none; | ||
182 | |||
183 | argc--; | ||
184 | argv++; | ||
185 | if (argc < 1) { | ||
186 | usage(sed_usage); | ||
187 | } | 368 | } |
369 | } | ||
188 | 370 | ||
189 | while (argc > 1) { | ||
190 | if (**argv != '-') | ||
191 | usage(sed_usage); | ||
192 | argc--; | ||
193 | cp = *argv++; | ||
194 | stopNow = FALSE; | ||
195 | 371 | ||
196 | while (*++cp && stopNow == FALSE) { | 372 | static int do_sed_command(const struct sed_cmd *sed_cmd, const char *line) |
197 | switch (*cp) { | 373 | { |
198 | case 'n': | 374 | int altered = 0; |
199 | quietFlag = TRUE; | ||
200 | break; | ||
201 | case 'e': | ||
202 | if (*(cp + 1) == 0 && --argc < 0) { | ||
203 | usage(sed_usage); | ||
204 | } | ||
205 | if (*++cp != 's') | ||
206 | cp = *argv++; | ||
207 | |||
208 | /* Read address if present */ | ||
209 | SKIPSPACES(cp); | ||
210 | if (*cp == '$') { | ||
211 | addr_line = LAST_LINE; | ||
212 | cp++; | ||
213 | } else { | ||
214 | if (isdigit(*cp)) { /* LINE ADDRESS */ | ||
215 | line_s = cp; | ||
216 | while (isdigit(*cp)) | ||
217 | cp++; | ||
218 | if (cp > line_s) { | ||
219 | /* numeric line */ | ||
220 | saved = *cp; | ||
221 | *cp = '\0'; | ||
222 | addr_line = atoi(line_s); | ||
223 | *cp = saved; | ||
224 | } | ||
225 | } else if (*cp == '/') { /* PATTERN ADDRESS */ | ||
226 | pos = addr_pattern = cp + 1; | ||
227 | pos = strchr(pos, '/'); | ||
228 | if (!pos) | ||
229 | usage(sed_usage); | ||
230 | *pos = '\0'; | ||
231 | cp = pos + 1; | ||
232 | } | ||
233 | } | ||
234 | 375 | ||
235 | SKIPSPACES(cp); | 376 | switch (sed_cmd->cmd) { |
236 | if (*cp == '!') { | ||
237 | negated++; | ||
238 | cp++; | ||
239 | } | ||
240 | 377 | ||
241 | /* Read command */ | 378 | case 'p': |
379 | fputs(line, stdout); | ||
380 | break; | ||
242 | 381 | ||
243 | SKIPSPACES(cp); | 382 | case 'd': |
244 | switch (*cp) { | 383 | altered++; |
245 | case 's': /* REPLACE */ | 384 | break; |
246 | if (strlen(cp) <= 3 || *(cp + 1) != '/') | 385 | |
247 | break; | 386 | case 's': /* oo, a fun one :-) */ |
248 | sed_f = f_replace; | 387 | |
249 | 388 | /* we only substitute if the substitution 'search' expression matches */ | |
250 | pos = needle = cp + 2; | 389 | if (regexec(sed_cmd->sub_match, line, 0, NULL, 0) == 0) { |
251 | 390 | regmatch_t regmatch; | |
252 | for (;;) { | 391 | int i; |
253 | pos = strchr(pos, '/'); | 392 | char *ptr = (char *)line; |
254 | if (pos == NULL) { | 393 | |
255 | usage(sed_usage); | 394 | while (*ptr) { |
256 | } | 395 | /* if we can match the search string... */ |
257 | if (*(pos - 1) == '\\') { | 396 | if (regexec(sed_cmd->sub_match, ptr, 1, ®match, 0) == 0) { |
258 | pos++; | 397 | /* print everything before the match, */ |
259 | continue; | 398 | for (i = 0; i < regmatch.rm_so; i++) |
260 | } | 399 | fputc(ptr[i], stdout); |
261 | break; | 400 | /* then print the substitution in its place */ |
401 | fputs(sed_cmd->replace, stdout); | ||
402 | /* then advance past the match */ | ||
403 | ptr += regmatch.rm_eo; | ||
404 | /* and let the calling function know that something | ||
405 | * has been changed */ | ||
406 | altered++; | ||
407 | |||
408 | /* if we're not doing this globally... */ | ||
409 | if (!sed_cmd->sub_g) | ||
410 | break; | ||
262 | } | 411 | } |
263 | *pos = 0; | 412 | /* if we COULD NOT match the search string (meaning we've gone past |
264 | newNeedle = ++pos; | 413 | * all previous instances), get out */ |
265 | for (;;) { | 414 | else |
266 | pos = strchr(pos, '/'); | ||
267 | if (pos == NULL) { | ||
268 | usage(sed_usage); | ||
269 | } | ||
270 | if (*(pos - 1) == '\\') { | ||
271 | pos++; | ||
272 | continue; | ||
273 | } | ||
274 | break; | 415 | break; |
275 | } | 416 | } |
276 | *pos = 0; | ||
277 | if (pos + 2 != 0) { | ||
278 | while (*++pos) { | ||
279 | switch (*pos) { | ||
280 | case 'i': | ||
281 | ignoreCase = TRUE; | ||
282 | break; | ||
283 | case 'p': | ||
284 | printFlag = TRUE; | ||
285 | break; | ||
286 | case 'g': | ||
287 | break; | ||
288 | default: | ||
289 | usage(sed_usage); | ||
290 | } | ||
291 | } | ||
292 | } | ||
293 | cp = pos; | ||
294 | /* fprintf(stderr, "replace '%s' with '%s'\n", needle, newNeedle); */ | ||
295 | break; | ||
296 | 417 | ||
297 | case 'a': /* APPEND */ | 418 | /* is there anything left to print? */ |
298 | if (strlen(cp) < 2) | 419 | if (*ptr) |
299 | break; | 420 | fputs(ptr, stdout); |
300 | sed_f = f_append; | 421 | } |
301 | appendline = ++cp; | 422 | |
302 | /* fprintf(stderr, "append '%s'\n", appendline); */ | 423 | break; |
303 | break; | 424 | } |
425 | |||
426 | return altered; | ||
427 | } | ||
428 | |||
429 | static void process_file(FILE *file) | ||
430 | { | ||
431 | char *line = NULL; | ||
432 | static int linenum = 0; /* GNU sed does not restart counting lines at EOF */ | ||
433 | unsigned int still_in_range = 0; | ||
434 | int line_altered; | ||
435 | int i; | ||
436 | |||
437 | /* go through every line in the file */ | ||
438 | while ((line = get_line_from_file(file)) != NULL) { | ||
439 | |||
440 | linenum++; | ||
441 | line_altered = 0; | ||
442 | |||
443 | /* for every line, go through all the commands */ | ||
444 | for (i = 0; i < ncmds; i++) { | ||
445 | |||
446 | /* are we acting on a range of matched lines? */ | ||
447 | if (sed_cmds[i].beg_match && sed_cmds[i].end_match) { | ||
448 | if (still_in_range || regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0) { | ||
449 | line_altered += do_sed_command(&sed_cmds[i], line); | ||
450 | still_in_range = 1; | ||
451 | if (regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0) | ||
452 | still_in_range = 0; | ||
304 | } | 453 | } |
454 | } | ||
305 | 455 | ||
306 | stopNow = TRUE; | 456 | /* are we trying to match a single line? */ |
307 | break; | 457 | else if (sed_cmds[i].beg_match) { |
458 | if (regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0) | ||
459 | line_altered += do_sed_command(&sed_cmds[i], line); | ||
460 | } | ||
308 | 461 | ||
309 | default: | 462 | /* are we acting on a range of line numbers? */ |
310 | usage(sed_usage); | 463 | else if (sed_cmds[i].beg_line > 0 && sed_cmds[i].end_line > 0) { |
464 | if (linenum >= sed_cmds[i].beg_line && linenum <= sed_cmds[i].end_line) | ||
465 | line_altered += do_sed_command(&sed_cmds[i], line); | ||
311 | } | 466 | } |
312 | } | ||
313 | } | ||
314 | 467 | ||
315 | if (argc == 0) { | 468 | /* are we acting on a specified line number */ |
316 | switch (sed_f) { | 469 | else if (sed_cmds[i].beg_line > 0) { |
317 | case f_none: | 470 | if (linenum == sed_cmds[i].beg_line) |
318 | break; | 471 | line_altered += do_sed_command(&sed_cmds[i], line); |
319 | case f_replace: | ||
320 | do_sed_repl(stdin, needle, newNeedle, ignoreCase, printFlag, | ||
321 | quietFlag); | ||
322 | break; | ||
323 | case f_append: | ||
324 | do_sed_append(stdin, appendline, quietFlag); | ||
325 | break; | ||
326 | } | ||
327 | } else { | ||
328 | while (argc-- > 0) { | ||
329 | name = *argv++; | ||
330 | |||
331 | fp = fopen(name, "r"); | ||
332 | if (fp == NULL) { | ||
333 | perror(name); | ||
334 | continue; | ||
335 | } | 472 | } |
336 | 473 | ||
337 | switch (sed_f) { | 474 | /* not acting on matches or line numbers. act on every line */ |
338 | case f_none: | 475 | else |
476 | line_altered += do_sed_command(&sed_cmds[i], line); | ||
477 | |||
478 | } | ||
479 | |||
480 | /* we will print the line unless we were told to be quiet or if the | ||
481 | * line was altered (via a 'd'elete or 's'ubstitution) */ | ||
482 | if (!be_quiet && !line_altered) | ||
483 | fputs(line, stdout); | ||
484 | |||
485 | free(line); | ||
486 | } | ||
487 | } | ||
488 | |||
489 | extern int sed_main(int argc, char **argv) | ||
490 | { | ||
491 | int opt; | ||
492 | |||
493 | /* do special-case option parsing */ | ||
494 | if (argv[1] && (strcmp(argv[1], "--help") == 0)) | ||
495 | usage(sed_usage); | ||
496 | |||
497 | /* do normal option parsing */ | ||
498 | while ((opt = getopt(argc, argv, "Vhne:f:")) > 0) { | ||
499 | switch (opt) { | ||
500 | case 'V': | ||
501 | printf("Print Busybox version here\n"); | ||
502 | exit(0); | ||
339 | break; | 503 | break; |
340 | case f_replace: | 504 | case 'h': |
341 | do_sed_repl(fp, needle, newNeedle, ignoreCase, printFlag, | 505 | usage(sed_usage); |
342 | quietFlag); | ||
343 | break; | 506 | break; |
344 | case f_append: | 507 | case 'n': |
345 | do_sed_append(fp, appendline, quietFlag); | 508 | be_quiet++; |
346 | break; | 509 | break; |
347 | } | 510 | case 'e': |
511 | add_cmd_str(optarg); | ||
512 | break; | ||
513 | case 'f': | ||
514 | load_cmd_file(optarg); | ||
515 | break; | ||
516 | } | ||
517 | } | ||
518 | |||
519 | /* if we didn't get a pattern from a -e and no command file was specified, | ||
520 | * argv[optind] should be the pattern. no pattern, no worky */ | ||
521 | if (ncmds == 0) { | ||
522 | if (argv[optind] == NULL) | ||
523 | usage(sed_usage); | ||
524 | else { | ||
525 | add_cmd_str(argv[optind]); | ||
526 | optind++; | ||
527 | } | ||
528 | } | ||
348 | 529 | ||
349 | if (ferror(fp)) | ||
350 | perror(name); | ||
351 | 530 | ||
352 | fclose(fp); | 531 | /* argv[(optind)..(argc-1)] should be names of file to process. If no |
532 | * files were specified or '-' was specified, take input from stdin. | ||
533 | * Otherwise, we process all the files specified. */ | ||
534 | if (argv[optind] == NULL || (strcmp(argv[optind], "-") == 0)) { | ||
535 | process_file(stdin); | ||
536 | } | ||
537 | else { | ||
538 | int i; | ||
539 | FILE *file; | ||
540 | for (i = optind; i < argc; i++) { | ||
541 | file = fopen(argv[i], "r"); | ||
542 | if (file == NULL) { | ||
543 | fprintf(stderr, "sed: %s: %s\n", argv[i], strerror(errno)); | ||
544 | } else { | ||
545 | process_file(file); | ||
546 | fclose(file); | ||
547 | } | ||
353 | } | 548 | } |
354 | } | 549 | } |
355 | return(TRUE); | 550 | |
356 | } | 551 | exit_sed(0, NULL); |
357 | 552 | ||
553 | /* not reached */ | ||
554 | return 0; | ||
555 | } | ||
358 | 556 | ||
359 | /* END CODE */ | 557 | #ifdef TEST_SED |
558 | int main(int argc, char **argv) | ||
559 | { | ||
560 | return sed_main(argc, argv); | ||
561 | } | ||
562 | #endif | ||