aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Whitley <markw@lineo.com>2000-07-10 22:55:51 +0000
committerMark Whitley <markw@lineo.com>2000-07-10 22:55:51 +0000
commit6315ce603b3ac2f4998e926b6106da21bcc0066f (patch)
tree0a3d57a74893ee848378e7f8f94e7167d30a063a
parent85c552035e4c5199d1cbef6058eb365c6150f33d (diff)
downloadbusybox-w32-6315ce603b3ac2f4998e926b6106da21bcc0066f.tar.gz
busybox-w32-6315ce603b3ac2f4998e926b6106da21bcc0066f.tar.bz2
busybox-w32-6315ce603b3ac2f4998e926b6106da21bcc0066f.zip
Brand, new sed that uses libc regex routines.
There is some common code used by both sed & grep that should be put into utility.c as per Mat Kraai's suggestions/patch on the mailing list. Specifically, a common regex_compile() and a regex_subst() function need to be made.
-rw-r--r--editors/sed.c763
-rw-r--r--sed.c763
2 files changed, 966 insertions, 560 deletions
diff --git a/editors/sed.c b/editors/sed.c
index d4b721e49..329f5ae8d 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -1,15 +1,8 @@
1/* vi: set sw=4 ts=4: */
2/* 1/*
3 * Mini sed implementation for busybox 2 * sed.c - very minimalist version of sed
4 *
5 * 3 *
6 * Copyright (C) 1999,2000 by Lineo, inc. 4 * Copyright (C) 1999,2000 by Lineo, inc.
7 * Written by Erik Andersen <andersen@lineo.com>, <andersee@debian.org> 5 * Written by Mark Whitley <markw@lineo.com>, <markw@enol.com>
8 *
9 * Modifications for addresses and append command have been
10 * written by Marco Pantaleoni <panta@prosa.it>, <panta@elasticworld.org>
11 * and are:
12 * Copyright (C) 1999 Marco Pantaleoni.
13 * 6 *
14 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by 8 * it under the terms of the GNU General Public License as published by
@@ -27,333 +20,543 @@
27 * 20 *
28 */ 21 */
29 22
30#include "internal.h" 23/*
31#include "regexp.h" 24 Supported features and commands in this version of sed:
25
26 - comments ('#')
27 - Address matching: num|/matchstr/[,num|/matchstr/|$]command
28 - Commands: p, d, s/match/replace/[g]
29
30 (Note: Specifying an address (range) to match is *optional*; commands
31 default to the whole pattern space if no specific address match was
32 requested.)
33
34 Unsupported features:
35
36 - transliteration (y/source-chars/dest-chars/) (use 'tr')
37 - no support for characters other than the '/' character for regex matches
38 - no pattern space hold space storing / swapping (x, etc.)
39 - no labels / branching (: label, b, t, and friends)
40 - and lots, lots more.
41
42*/
43
32#include <stdio.h> 44#include <stdio.h>
33#include <dirent.h> 45#include <stdlib.h> /* for realloc() */
46#include <unistd.h> /* for getopt() */
47#include <regex.h>
48#include <string.h> /* for strdup() */
34#include <errno.h> 49#include <errno.h>
35#include <fcntl.h> 50#include <ctype.h> /* for isspace() */
36#include <signal.h> 51#include "internal.h"
37#include <time.h> 52
38#include <ctype.h> 53
54/* externs */
55extern int optind; /* in unistd.h */
56extern char *optarg; /* ditto */
57
58/* options */
59static int be_quiet = 0;
60
61struct sed_cmd {
62
63 /* address storage */
64 int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */
65 int end_line; /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */
66 regex_t *beg_match; /* sed -e '/match/cmd' */
67 regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */
68
69 /* the command */
70 char cmd; /* p,d,s (add more at your leisure :-) */
71
72 /* substitution command specific fields */
73 regex_t *sub_match; /* sed -e 's/sub_match/replace/' */
74 char *replace; /* sed -e 's/sub_match/replace/' XXX: who will hold the \1 \2 \3s? */
75 unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */
76};
77
78/* globals */
79static struct sed_cmd *sed_cmds = NULL; /* growable arrary holding a sequence of sed cmds */
80static int ncmds = 0; /* number of sed commands */
81
82/*static char *cur_file = NULL;*/ /* file currently being processed XXX: do I need this? */
39 83
40static const char sed_usage[] = 84static const char sed_usage[] =
41 "sed [-n] -e script [file...]\n" 85 "sed [-Vhnef] pattern [files...]\n"
42#ifndef BB_FEATURE_TRIVIAL_HELP 86#ifndef BB_FEATURE_TRIVIAL_HELP
43 "\nAllowed sed scripts come in the following form:\n" 87 "\n"
44 "\t'ADDR [!] COMMAND'\n\n" 88 "-n\tsuppress automatic printing of pattern space\n"
45 "\twhere address ADDR can be:\n" 89 "-e script\tadd the script to the commands to be executed\n"
46 "\t NUMBER Match specified line number\n" 90 "-f scriptfile\tadd the contents of script-file to the commands to be executed\n"
47 "\t $ Match last line\n" 91 "-h\tdisplay this help message\n"
48 "\t /REGEXP/ Match specified regexp\n" 92 "-V\toutput version information and exit\n"
49 "\t (! inverts the meaning of the match)\n\n" 93 "\n"
50 "\tand COMMAND can be:\n" 94 "If no -e or -f is given, the first non-option argument is taken as the\n"
51 "\t s/regexp/replacement/[igp]\n" 95 "sed script to interpret. All remaining arguments are names of input\n"
52 "\t which attempt to match regexp against the pattern space\n" 96 "files; if no input files are specified, then the standard input is read.\n"
53 "\t and if successful replaces the matched portion with replacement.\n\n"
54 "\t aTEXT\n"
55 "\t which appends TEXT after the pattern space\n"
56 "Options:\n"
57 "-e\tadd the script to the commands to be executed\n"
58 "-n\tsuppress automatic printing of pattern space\n\n"
59#if defined BB_REGEXP
60 "This version of sed matches full regular expressions.\n";
61#else
62 "This version of sed matches strings (not full regular expressions).\n"
63#endif
64#endif 97#endif
65 ; 98 ;
66 99
67/* Flags & variables */ 100#if 0
101/* Nuke from here { */
68 102
69typedef enum { f_none, f_replace, f_append } sed_function;
70 103
71#define NO_LINE -2 104/* get_line_from_file() - This function reads an entire line from a text file
72#define LAST_LINE -1 105 * * up to a newline. It returns a malloc'ed char * which must be stored and
73static int addr_line = NO_LINE; 106 * * free'ed by the caller. */
74static char *addr_pattern = NULL; 107extern char *get_line_from_file(FILE *file)
75static int negated = 0; 108{
109 static const int GROWBY = 80; /* how large we will grow strings by */
76 110
77#define SKIPSPACES(p) do { while (isspace(*(p))) (p)++; } while (0) 111 int ch;
112 int idx = 0;
113 char *linebuf = NULL;
114 int linebufsz = 0;
78 115
79#define BUFSIZE 1024 116 while (1) {
117 ch = fgetc(file);
118 if (ch == EOF)
119 break;
120 /* grow the line buffer as necessary */
121 if (idx > linebufsz-2)
122 linebuf = realloc(linebuf, linebufsz += GROWBY);
123 linebuf[idx++] = (char)ch;
124 if ((char)ch == '\n')
125 break;
126 }
127
128 if (idx == 0)
129 return NULL;
130
131 linebuf[idx] = 0;
132 return linebuf;
133}
80 134
81static inline int at_last(FILE * fp) 135static void usage(const char *string)
82{ 136{
83 int res = 0; 137 printf("usage: %s\n", string);
138 exit(0);
139}
84 140
85 if (feof(fp)) 141/* } to here when we integrate this into busybox */
86 return 1; 142#endif
87 else { 143
88 int ch; 144static void destroy_cmd_strs()
145{
146 if (sed_cmds == NULL)
147 return;
148
149 /* destroy all the elements in the array */
150 while (--ncmds >= 0) {
89 151
90 if ((ch = fgetc(fp)) == EOF) 152 if (sed_cmds[ncmds].beg_match) {
91 res++; 153 regfree(sed_cmds[ncmds].beg_match);
92 ungetc(ch, fp); 154 free(sed_cmds[ncmds].beg_match);
155 }
156 if (sed_cmds[ncmds].end_match) {
157 regfree(sed_cmds[ncmds].end_match);
158 free(sed_cmds[ncmds].end_match);
159 }
160 if (sed_cmds[ncmds].sub_match) {
161 regfree(sed_cmds[ncmds].sub_match);
162 free(sed_cmds[ncmds].sub_match);
163 }
164 if (sed_cmds[ncmds].replace)
165 free(sed_cmds[ncmds].replace);
93 } 166 }
94 return res; 167
168 /* destroy the array */
169 free(sed_cmds);
170 sed_cmds = NULL;
171}
172
173static void exit_sed(int retcode, const char *message)
174{
175 destroy_cmd_strs();
176 if (message)
177 fputs(message, stderr);
178 exit(retcode);
95} 179}
96 180
97static void do_sed_repl(FILE * fp, char *needle, char *newNeedle, 181/*
98 int ignoreCase, int printFlag, int quietFlag) 182 * trim_str - trims leading and trailing space from a string
183 *
184 * Note: This returns a malloc'ed string so you must store and free it
185 * XXX: This should be in the utility.c file.
186 */
187static char *trim_str(const char *str)
188{
189 int i;
190 char *retstr = strdup(str);
191
192 /* trim leading whitespace */
193 memmove(retstr, &retstr[strspn(retstr, " \n\t\v")], strlen(retstr));
194
195 /* trim trailing whitespace */
196 i = strlen(retstr) - 1;
197 while (isspace(retstr[i]))
198 i--;
199 retstr[++i] = 0;
200
201 /* Aside:
202 *
203 * you know, a strrspn() would really be nice cuz then we could say:
204 *
205 * retstr[strlen(retstr) - strrspn(retstr, " \n\t\v") + 1] = 0;
206 */
207
208 return retstr;
209}
210
211/*
212 * index_of_unescaped_slash - walks left to right through a string beginning
213 * at a specified index and returns the index of the next unescaped slash.
214 */
215static int index_of_next_unescaped_slash(int idx, const char *str)
216{
217 do {
218 idx++;
219 /* test if we've hit the end */
220 if (str[idx] == 0)
221 return -1;
222 } while (str[idx] != '/' && str[idx - 1] != '\\');
223
224 return idx;
225}
226
227/*
228 * returns the index in the string just past where the address ends.
229 */
230static int get_address(const char *str, int *line, regex_t **regex)
99{ 231{
100 int foundOne = FALSE; 232 char *my_str = strdup(str);
101 char haystack[BUFSIZE]; 233 int idx = 0;
102 int line = 1, doit; 234
103 235 if (isdigit(my_str[idx])) {
104 while (fgets(haystack, BUFSIZE - 1, fp)) { 236 do {
105 doit = 0; 237 idx++;
106 if (addr_pattern) { 238 } while (isdigit(my_str[idx]));
107 doit = !find_match(haystack, addr_pattern, FALSE); 239 my_str[idx] = 0;
108 } else if (addr_line == NO_LINE) 240 *line = atoi(my_str);
109 doit = 1; 241 *regex = NULL;
110 else if (addr_line == LAST_LINE) { 242 }
111 if (at_last(fp)) 243 else if (my_str[idx] == '$') {
112 doit = 1; 244 *line = -1;
113 } else { 245 *regex = NULL;
114 if (line == addr_line) 246 idx++;
115 doit = 1; 247 }
248 else if (my_str[idx] == '/') {
249 int ret;
250 idx = index_of_next_unescaped_slash(idx, my_str);
251 if (idx == -1) {
252 free(my_str);
253 exit_sed(1, "sed: unterminated match expression\n");
116 } 254 }
117 if (negated) 255 my_str[idx] = 0; /* shave off the trailing '/' */
118 doit = 1 - doit; 256 my_str++; /* shave off the leading '/' */
119 if (doit) { 257 *regex = (regex_t *)malloc(sizeof(regex_t));
120 foundOne = 258 if ((ret = regcomp(*regex, my_str, 0)) != 0) {
121 replace_match(haystack, needle, newNeedle, ignoreCase); 259 /* error handling if regular expression couldn't be compiled */
122 260 int errmsgsz = regerror(ret, *regex, NULL, 0);
123 if (foundOne == TRUE && printFlag == TRUE) { 261 char *errmsg = malloc(errmsgsz);
124 fprintf(stdout, haystack); 262 if (errmsg == NULL) {
263 exit_sed(1, "sed: memory error\n");
125 } 264 }
265 regerror(ret, *regex, errmsg, errmsgsz);
266 fprintf(stderr, "sed: %s\n", errmsg);
267 free(errmsg);
268 exit_sed(1, NULL);
126 } 269 }
270 my_str--; /* move my_str back so free() (below) won't barf */
271 idx++; /* advance idx one past the end of the /match/ */
272 }
273 else {
274 fprintf(stderr, "sed.c:get_address: no address found in string\n");
275 fprintf(stderr, "\t(you probably didn't check the string you passed me)\n");
276 idx = -1;
277 }
127 278
128 if (quietFlag == FALSE) { 279 free(my_str);
129 fprintf(stdout, haystack); 280 return idx;
130 } 281}
131 282
132 line++; 283static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
284{
285 int idx = 0;
286
287 /* parse the command
288 * format is: [addr][,addr]cmd
289 * |----||-----||-|
290 * part1 part2 part3
291 */
292
293 /* first part (if present) is an address: either a number or a /regex/ */
294 if (isdigit(cmdstr[idx]) || cmdstr[idx] == '/')
295 idx = get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
296
297 /* second part (if present) will begin with a comma */
298 if (cmdstr[idx] == ',')
299 idx += get_address(&cmdstr[++idx], &sed_cmd->end_line, &sed_cmd->end_match);
300
301 /* last part (mandatory) will be a command */
302 if (cmdstr[idx] == '\0')
303 exit_sed(1, "sed: missing command\n");
304 if (!strchr("pds", cmdstr[idx])) /* <-- XXX add new commands here */
305 exit_sed(1, "sed: invalid command\n");
306 sed_cmd->cmd = cmdstr[idx];
307 /* special-case handling for 's' */
308 if (sed_cmd->cmd == 's') {
309 int oldidx;
310 /* format for substitution is:
311 * s/match/replace/g
312 * | |
313 * mandatory optional
314 */
315
316 /* verify that we have an 's' followed by a 'slash' */
317 if (cmdstr[++idx] != '/')
318 exit_sed(1, "sed: bad format in substitution expression\n");
319
320 /* get the substitution part */
321 idx += get_address(&cmdstr[idx], NULL, &sed_cmd->sub_match);
322
323 /* get the replacement part */
324 oldidx = idx;
325 idx = index_of_next_unescaped_slash(idx, cmdstr);
326 sed_cmd->replace = (char *)malloc(idx - oldidx + 1);
327 strncpy(sed_cmd->replace, &cmdstr[oldidx], idx - oldidx);
328 sed_cmd->replace[idx - oldidx] = 0;
329
330 /* store the 'g' if present */
331 if (cmdstr[++idx] == 'g')
332 sed_cmd->sub_g = 1;
133 } 333 }
134} 334}
135 335
136static void do_sed_append(FILE * fp, char *appendline, int quietFlag) 336static void add_cmd_str(const char *cmdstr)
137{ 337{
138 char buffer[BUFSIZE]; 338 char *my_cmdstr = trim_str(cmdstr);
139 int line = 1, doit;
140
141 while (fgets(buffer, BUFSIZE - 1, fp)) {
142 doit = 0;
143 if (addr_pattern) {
144 doit = !find_match(buffer, addr_pattern, FALSE);
145 } else if (addr_line == NO_LINE)
146 doit = 1;
147 else if (addr_line == LAST_LINE) {
148 if (at_last(fp))
149 doit = 1;
150 } else {
151 if (line == addr_line)
152 doit = 1;
153 }
154 if (negated)
155 doit = 1 - doit;
156 if (quietFlag == FALSE) {
157 fprintf(stdout, buffer);
158 }
159 if (doit) {
160 fputs(appendline, stdout);
161 fputc('\n', stdout);
162 }
163 339
164 line++; 340 /* if this is a comment, don't even bother */
341 if (my_cmdstr[0] == '#') {
342 free(my_cmdstr);
343 return;
165 } 344 }
345
346 /* grow the array */
347 sed_cmds = realloc(sed_cmds, sizeof(struct sed_cmd) * (++ncmds));
348 /* zero new element */
349 memset(&sed_cmds[ncmds-1], 0, sizeof(struct sed_cmd));
350 /* load command string into new array element */
351 parse_cmd_str(&sed_cmds[ncmds-1], my_cmdstr);
166} 352}
167 353
168extern int sed_main(int argc, char **argv) 354
355static void load_cmd_file(char *filename)
169{ 356{
170 FILE *fp; 357 FILE *cmdfile;
171 char *needle = NULL, *newNeedle = NULL; 358 char *line;
172 char *name; 359
173 char *cp; 360 cmdfile = fopen(filename, "r");
174 int ignoreCase = FALSE; 361 if (cmdfile == NULL)
175 int printFlag = FALSE; 362 exit_sed(1, strerror(errno));
176 int quietFlag = FALSE; 363
177 int stopNow; 364 while ((line = get_line_from_file(cmdfile)) != NULL) {
178 char *line_s = NULL, saved; 365 line[strlen(line)-1] = 0; /* eat newline */
179 char *appendline = NULL; 366 add_cmd_str(line);
180 char *pos; 367 free(line);
181 sed_function sed_f = f_none;
182
183 argc--;
184 argv++;
185 if (argc < 1) {
186 usage(sed_usage);
187 } 368 }
369}
188 370
189 while (argc > 1) {
190 if (**argv != '-')
191 usage(sed_usage);
192 argc--;
193 cp = *argv++;
194 stopNow = FALSE;
195 371
196 while (*++cp && stopNow == FALSE) { 372static int do_sed_command(const struct sed_cmd *sed_cmd, const char *line)
197 switch (*cp) { 373{
198 case 'n': 374 int altered = 0;
199 quietFlag = TRUE;
200 break;
201 case 'e':
202 if (*(cp + 1) == 0 && --argc < 0) {
203 usage(sed_usage);
204 }
205 if (*++cp != 's')
206 cp = *argv++;
207
208 /* Read address if present */
209 SKIPSPACES(cp);
210 if (*cp == '$') {
211 addr_line = LAST_LINE;
212 cp++;
213 } else {
214 if (isdigit(*cp)) { /* LINE ADDRESS */
215 line_s = cp;
216 while (isdigit(*cp))
217 cp++;
218 if (cp > line_s) {
219 /* numeric line */
220 saved = *cp;
221 *cp = '\0';
222 addr_line = atoi(line_s);
223 *cp = saved;
224 }
225 } else if (*cp == '/') { /* PATTERN ADDRESS */
226 pos = addr_pattern = cp + 1;
227 pos = strchr(pos, '/');
228 if (!pos)
229 usage(sed_usage);
230 *pos = '\0';
231 cp = pos + 1;
232 }
233 }
234 375
235 SKIPSPACES(cp); 376 switch (sed_cmd->cmd) {
236 if (*cp == '!') {
237 negated++;
238 cp++;
239 }
240 377
241 /* Read command */ 378 case 'p':
379 fputs(line, stdout);
380 break;
242 381
243 SKIPSPACES(cp); 382 case 'd':
244 switch (*cp) { 383 altered++;
245 case 's': /* REPLACE */ 384 break;
246 if (strlen(cp) <= 3 || *(cp + 1) != '/') 385
247 break; 386 case 's': /* oo, a fun one :-) */
248 sed_f = f_replace; 387
249 388 /* we only substitute if the substitution 'search' expression matches */
250 pos = needle = cp + 2; 389 if (regexec(sed_cmd->sub_match, line, 0, NULL, 0) == 0) {
251 390 regmatch_t regmatch;
252 for (;;) { 391 int i;
253 pos = strchr(pos, '/'); 392 char *ptr = (char *)line;
254 if (pos == NULL) { 393
255 usage(sed_usage); 394 while (*ptr) {
256 } 395 /* if we can match the search string... */
257 if (*(pos - 1) == '\\') { 396 if (regexec(sed_cmd->sub_match, ptr, 1, &regmatch, 0) == 0) {
258 pos++; 397 /* print everything before the match, */
259 continue; 398 for (i = 0; i < regmatch.rm_so; i++)
260 } 399 fputc(ptr[i], stdout);
261 break; 400 /* then print the substitution in its place */
401 fputs(sed_cmd->replace, stdout);
402 /* then advance past the match */
403 ptr += regmatch.rm_eo;
404 /* and let the calling function know that something
405 * has been changed */
406 altered++;
407
408 /* if we're not doing this globally... */
409 if (!sed_cmd->sub_g)
410 break;
262 } 411 }
263 *pos = 0; 412 /* if we COULD NOT match the search string (meaning we've gone past
264 newNeedle = ++pos; 413 * all previous instances), get out */
265 for (;;) { 414 else
266 pos = strchr(pos, '/');
267 if (pos == NULL) {
268 usage(sed_usage);
269 }
270 if (*(pos - 1) == '\\') {
271 pos++;
272 continue;
273 }
274 break; 415 break;
275 } 416 }
276 *pos = 0;
277 if (pos + 2 != 0) {
278 while (*++pos) {
279 switch (*pos) {
280 case 'i':
281 ignoreCase = TRUE;
282 break;
283 case 'p':
284 printFlag = TRUE;
285 break;
286 case 'g':
287 break;
288 default:
289 usage(sed_usage);
290 }
291 }
292 }
293 cp = pos;
294 /* fprintf(stderr, "replace '%s' with '%s'\n", needle, newNeedle); */
295 break;
296 417
297 case 'a': /* APPEND */ 418 /* is there anything left to print? */
298 if (strlen(cp) < 2) 419 if (*ptr)
299 break; 420 fputs(ptr, stdout);
300 sed_f = f_append; 421 }
301 appendline = ++cp; 422
302 /* fprintf(stderr, "append '%s'\n", appendline); */ 423 break;
303 break; 424 }
425
426 return altered;
427}
428
429static void process_file(FILE *file)
430{
431 char *line = NULL;
432 static int linenum = 0; /* GNU sed does not restart counting lines at EOF */
433 unsigned int still_in_range = 0;
434 int line_altered;
435 int i;
436
437 /* go through every line in the file */
438 while ((line = get_line_from_file(file)) != NULL) {
439
440 linenum++;
441 line_altered = 0;
442
443 /* for every line, go through all the commands */
444 for (i = 0; i < ncmds; i++) {
445
446 /* are we acting on a range of matched lines? */
447 if (sed_cmds[i].beg_match && sed_cmds[i].end_match) {
448 if (still_in_range || regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0) {
449 line_altered += do_sed_command(&sed_cmds[i], line);
450 still_in_range = 1;
451 if (regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0)
452 still_in_range = 0;
304 } 453 }
454 }
305 455
306 stopNow = TRUE; 456 /* are we trying to match a single line? */
307 break; 457 else if (sed_cmds[i].beg_match) {
458 if (regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0)
459 line_altered += do_sed_command(&sed_cmds[i], line);
460 }
308 461
309 default: 462 /* are we acting on a range of line numbers? */
310 usage(sed_usage); 463 else if (sed_cmds[i].beg_line > 0 && sed_cmds[i].end_line > 0) {
464 if (linenum >= sed_cmds[i].beg_line && linenum <= sed_cmds[i].end_line)
465 line_altered += do_sed_command(&sed_cmds[i], line);
311 } 466 }
312 }
313 }
314 467
315 if (argc == 0) { 468 /* are we acting on a specified line number */
316 switch (sed_f) { 469 else if (sed_cmds[i].beg_line > 0) {
317 case f_none: 470 if (linenum == sed_cmds[i].beg_line)
318 break; 471 line_altered += do_sed_command(&sed_cmds[i], line);
319 case f_replace:
320 do_sed_repl(stdin, needle, newNeedle, ignoreCase, printFlag,
321 quietFlag);
322 break;
323 case f_append:
324 do_sed_append(stdin, appendline, quietFlag);
325 break;
326 }
327 } else {
328 while (argc-- > 0) {
329 name = *argv++;
330
331 fp = fopen(name, "r");
332 if (fp == NULL) {
333 perror(name);
334 continue;
335 } 472 }
336 473
337 switch (sed_f) { 474 /* not acting on matches or line numbers. act on every line */
338 case f_none: 475 else
476 line_altered += do_sed_command(&sed_cmds[i], line);
477
478 }
479
480 /* we will print the line unless we were told to be quiet or if the
481 * line was altered (via a 'd'elete or 's'ubstitution) */
482 if (!be_quiet && !line_altered)
483 fputs(line, stdout);
484
485 free(line);
486 }
487}
488
489extern int sed_main(int argc, char **argv)
490{
491 int opt;
492
493 /* do special-case option parsing */
494 if (argv[1] && (strcmp(argv[1], "--help") == 0))
495 usage(sed_usage);
496
497 /* do normal option parsing */
498 while ((opt = getopt(argc, argv, "Vhne:f:")) > 0) {
499 switch (opt) {
500 case 'V':
501 printf("Print Busybox version here\n");
502 exit(0);
339 break; 503 break;
340 case f_replace: 504 case 'h':
341 do_sed_repl(fp, needle, newNeedle, ignoreCase, printFlag, 505 usage(sed_usage);
342 quietFlag);
343 break; 506 break;
344 case f_append: 507 case 'n':
345 do_sed_append(fp, appendline, quietFlag); 508 be_quiet++;
346 break; 509 break;
347 } 510 case 'e':
511 add_cmd_str(optarg);
512 break;
513 case 'f':
514 load_cmd_file(optarg);
515 break;
516 }
517 }
518
519 /* if we didn't get a pattern from a -e and no command file was specified,
520 * argv[optind] should be the pattern. no pattern, no worky */
521 if (ncmds == 0) {
522 if (argv[optind] == NULL)
523 usage(sed_usage);
524 else {
525 add_cmd_str(argv[optind]);
526 optind++;
527 }
528 }
348 529
349 if (ferror(fp))
350 perror(name);
351 530
352 fclose(fp); 531 /* argv[(optind)..(argc-1)] should be names of file to process. If no
532 * files were specified or '-' was specified, take input from stdin.
533 * Otherwise, we process all the files specified. */
534 if (argv[optind] == NULL || (strcmp(argv[optind], "-") == 0)) {
535 process_file(stdin);
536 }
537 else {
538 int i;
539 FILE *file;
540 for (i = optind; i < argc; i++) {
541 file = fopen(argv[i], "r");
542 if (file == NULL) {
543 fprintf(stderr, "sed: %s: %s\n", argv[i], strerror(errno));
544 } else {
545 process_file(file);
546 fclose(file);
547 }
353 } 548 }
354 } 549 }
355 return(TRUE); 550
356} 551 exit_sed(0, NULL);
357 552
553 /* not reached */
554 return 0;
555}
358 556
359/* END CODE */ 557#ifdef TEST_SED
558int main(int argc, char **argv)
559{
560 return sed_main(argc, argv);
561}
562#endif
diff --git a/sed.c b/sed.c
index d4b721e49..329f5ae8d 100644
--- a/sed.c
+++ b/sed.c
@@ -1,15 +1,8 @@
1/* vi: set sw=4 ts=4: */
2/* 1/*
3 * Mini sed implementation for busybox 2 * sed.c - very minimalist version of sed
4 *
5 * 3 *
6 * Copyright (C) 1999,2000 by Lineo, inc. 4 * Copyright (C) 1999,2000 by Lineo, inc.
7 * Written by Erik Andersen <andersen@lineo.com>, <andersee@debian.org> 5 * Written by Mark Whitley <markw@lineo.com>, <markw@enol.com>
8 *
9 * Modifications for addresses and append command have been
10 * written by Marco Pantaleoni <panta@prosa.it>, <panta@elasticworld.org>
11 * and are:
12 * Copyright (C) 1999 Marco Pantaleoni.
13 * 6 *
14 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by 8 * it under the terms of the GNU General Public License as published by
@@ -27,333 +20,543 @@
27 * 20 *
28 */ 21 */
29 22
30#include "internal.h" 23/*
31#include "regexp.h" 24 Supported features and commands in this version of sed:
25
26 - comments ('#')
27 - Address matching: num|/matchstr/[,num|/matchstr/|$]command
28 - Commands: p, d, s/match/replace/[g]
29
30 (Note: Specifying an address (range) to match is *optional*; commands
31 default to the whole pattern space if no specific address match was
32 requested.)
33
34 Unsupported features:
35
36 - transliteration (y/source-chars/dest-chars/) (use 'tr')
37 - no support for characters other than the '/' character for regex matches
38 - no pattern space hold space storing / swapping (x, etc.)
39 - no labels / branching (: label, b, t, and friends)
40 - and lots, lots more.
41
42*/
43
32#include <stdio.h> 44#include <stdio.h>
33#include <dirent.h> 45#include <stdlib.h> /* for realloc() */
46#include <unistd.h> /* for getopt() */
47#include <regex.h>
48#include <string.h> /* for strdup() */
34#include <errno.h> 49#include <errno.h>
35#include <fcntl.h> 50#include <ctype.h> /* for isspace() */
36#include <signal.h> 51#include "internal.h"
37#include <time.h> 52
38#include <ctype.h> 53
54/* externs */
55extern int optind; /* in unistd.h */
56extern char *optarg; /* ditto */
57
58/* options */
59static int be_quiet = 0;
60
61struct sed_cmd {
62
63 /* address storage */
64 int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */
65 int end_line; /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */
66 regex_t *beg_match; /* sed -e '/match/cmd' */
67 regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */
68
69 /* the command */
70 char cmd; /* p,d,s (add more at your leisure :-) */
71
72 /* substitution command specific fields */
73 regex_t *sub_match; /* sed -e 's/sub_match/replace/' */
74 char *replace; /* sed -e 's/sub_match/replace/' XXX: who will hold the \1 \2 \3s? */
75 unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */
76};
77
78/* globals */
79static struct sed_cmd *sed_cmds = NULL; /* growable arrary holding a sequence of sed cmds */
80static int ncmds = 0; /* number of sed commands */
81
82/*static char *cur_file = NULL;*/ /* file currently being processed XXX: do I need this? */
39 83
40static const char sed_usage[] = 84static const char sed_usage[] =
41 "sed [-n] -e script [file...]\n" 85 "sed [-Vhnef] pattern [files...]\n"
42#ifndef BB_FEATURE_TRIVIAL_HELP 86#ifndef BB_FEATURE_TRIVIAL_HELP
43 "\nAllowed sed scripts come in the following form:\n" 87 "\n"
44 "\t'ADDR [!] COMMAND'\n\n" 88 "-n\tsuppress automatic printing of pattern space\n"
45 "\twhere address ADDR can be:\n" 89 "-e script\tadd the script to the commands to be executed\n"
46 "\t NUMBER Match specified line number\n" 90 "-f scriptfile\tadd the contents of script-file to the commands to be executed\n"
47 "\t $ Match last line\n" 91 "-h\tdisplay this help message\n"
48 "\t /REGEXP/ Match specified regexp\n" 92 "-V\toutput version information and exit\n"
49 "\t (! inverts the meaning of the match)\n\n" 93 "\n"
50 "\tand COMMAND can be:\n" 94 "If no -e or -f is given, the first non-option argument is taken as the\n"
51 "\t s/regexp/replacement/[igp]\n" 95 "sed script to interpret. All remaining arguments are names of input\n"
52 "\t which attempt to match regexp against the pattern space\n" 96 "files; if no input files are specified, then the standard input is read.\n"
53 "\t and if successful replaces the matched portion with replacement.\n\n"
54 "\t aTEXT\n"
55 "\t which appends TEXT after the pattern space\n"
56 "Options:\n"
57 "-e\tadd the script to the commands to be executed\n"
58 "-n\tsuppress automatic printing of pattern space\n\n"
59#if defined BB_REGEXP
60 "This version of sed matches full regular expressions.\n";
61#else
62 "This version of sed matches strings (not full regular expressions).\n"
63#endif
64#endif 97#endif
65 ; 98 ;
66 99
67/* Flags & variables */ 100#if 0
101/* Nuke from here { */
68 102
69typedef enum { f_none, f_replace, f_append } sed_function;
70 103
71#define NO_LINE -2 104/* get_line_from_file() - This function reads an entire line from a text file
72#define LAST_LINE -1 105 * * up to a newline. It returns a malloc'ed char * which must be stored and
73static int addr_line = NO_LINE; 106 * * free'ed by the caller. */
74static char *addr_pattern = NULL; 107extern char *get_line_from_file(FILE *file)
75static int negated = 0; 108{
109 static const int GROWBY = 80; /* how large we will grow strings by */
76 110
77#define SKIPSPACES(p) do { while (isspace(*(p))) (p)++; } while (0) 111 int ch;
112 int idx = 0;
113 char *linebuf = NULL;
114 int linebufsz = 0;
78 115
79#define BUFSIZE 1024 116 while (1) {
117 ch = fgetc(file);
118 if (ch == EOF)
119 break;
120 /* grow the line buffer as necessary */
121 if (idx > linebufsz-2)
122 linebuf = realloc(linebuf, linebufsz += GROWBY);
123 linebuf[idx++] = (char)ch;
124 if ((char)ch == '\n')
125 break;
126 }
127
128 if (idx == 0)
129 return NULL;
130
131 linebuf[idx] = 0;
132 return linebuf;
133}
80 134
81static inline int at_last(FILE * fp) 135static void usage(const char *string)
82{ 136{
83 int res = 0; 137 printf("usage: %s\n", string);
138 exit(0);
139}
84 140
85 if (feof(fp)) 141/* } to here when we integrate this into busybox */
86 return 1; 142#endif
87 else { 143
88 int ch; 144static void destroy_cmd_strs()
145{
146 if (sed_cmds == NULL)
147 return;
148
149 /* destroy all the elements in the array */
150 while (--ncmds >= 0) {
89 151
90 if ((ch = fgetc(fp)) == EOF) 152 if (sed_cmds[ncmds].beg_match) {
91 res++; 153 regfree(sed_cmds[ncmds].beg_match);
92 ungetc(ch, fp); 154 free(sed_cmds[ncmds].beg_match);
155 }
156 if (sed_cmds[ncmds].end_match) {
157 regfree(sed_cmds[ncmds].end_match);
158 free(sed_cmds[ncmds].end_match);
159 }
160 if (sed_cmds[ncmds].sub_match) {
161 regfree(sed_cmds[ncmds].sub_match);
162 free(sed_cmds[ncmds].sub_match);
163 }
164 if (sed_cmds[ncmds].replace)
165 free(sed_cmds[ncmds].replace);
93 } 166 }
94 return res; 167
168 /* destroy the array */
169 free(sed_cmds);
170 sed_cmds = NULL;
171}
172
173static void exit_sed(int retcode, const char *message)
174{
175 destroy_cmd_strs();
176 if (message)
177 fputs(message, stderr);
178 exit(retcode);
95} 179}
96 180
97static void do_sed_repl(FILE * fp, char *needle, char *newNeedle, 181/*
98 int ignoreCase, int printFlag, int quietFlag) 182 * trim_str - trims leading and trailing space from a string
183 *
184 * Note: This returns a malloc'ed string so you must store and free it
185 * XXX: This should be in the utility.c file.
186 */
187static char *trim_str(const char *str)
188{
189 int i;
190 char *retstr = strdup(str);
191
192 /* trim leading whitespace */
193 memmove(retstr, &retstr[strspn(retstr, " \n\t\v")], strlen(retstr));
194
195 /* trim trailing whitespace */
196 i = strlen(retstr) - 1;
197 while (isspace(retstr[i]))
198 i--;
199 retstr[++i] = 0;
200
201 /* Aside:
202 *
203 * you know, a strrspn() would really be nice cuz then we could say:
204 *
205 * retstr[strlen(retstr) - strrspn(retstr, " \n\t\v") + 1] = 0;
206 */
207
208 return retstr;
209}
210
211/*
212 * index_of_unescaped_slash - walks left to right through a string beginning
213 * at a specified index and returns the index of the next unescaped slash.
214 */
215static int index_of_next_unescaped_slash(int idx, const char *str)
216{
217 do {
218 idx++;
219 /* test if we've hit the end */
220 if (str[idx] == 0)
221 return -1;
222 } while (str[idx] != '/' && str[idx - 1] != '\\');
223
224 return idx;
225}
226
227/*
228 * returns the index in the string just past where the address ends.
229 */
230static int get_address(const char *str, int *line, regex_t **regex)
99{ 231{
100 int foundOne = FALSE; 232 char *my_str = strdup(str);
101 char haystack[BUFSIZE]; 233 int idx = 0;
102 int line = 1, doit; 234
103 235 if (isdigit(my_str[idx])) {
104 while (fgets(haystack, BUFSIZE - 1, fp)) { 236 do {
105 doit = 0; 237 idx++;
106 if (addr_pattern) { 238 } while (isdigit(my_str[idx]));
107 doit = !find_match(haystack, addr_pattern, FALSE); 239 my_str[idx] = 0;
108 } else if (addr_line == NO_LINE) 240 *line = atoi(my_str);
109 doit = 1; 241 *regex = NULL;
110 else if (addr_line == LAST_LINE) { 242 }
111 if (at_last(fp)) 243 else if (my_str[idx] == '$') {
112 doit = 1; 244 *line = -1;
113 } else { 245 *regex = NULL;
114 if (line == addr_line) 246 idx++;
115 doit = 1; 247 }
248 else if (my_str[idx] == '/') {
249 int ret;
250 idx = index_of_next_unescaped_slash(idx, my_str);
251 if (idx == -1) {
252 free(my_str);
253 exit_sed(1, "sed: unterminated match expression\n");
116 } 254 }
117 if (negated) 255 my_str[idx] = 0; /* shave off the trailing '/' */
118 doit = 1 - doit; 256 my_str++; /* shave off the leading '/' */
119 if (doit) { 257 *regex = (regex_t *)malloc(sizeof(regex_t));
120 foundOne = 258 if ((ret = regcomp(*regex, my_str, 0)) != 0) {
121 replace_match(haystack, needle, newNeedle, ignoreCase); 259 /* error handling if regular expression couldn't be compiled */
122 260 int errmsgsz = regerror(ret, *regex, NULL, 0);
123 if (foundOne == TRUE && printFlag == TRUE) { 261 char *errmsg = malloc(errmsgsz);
124 fprintf(stdout, haystack); 262 if (errmsg == NULL) {
263 exit_sed(1, "sed: memory error\n");
125 } 264 }
265 regerror(ret, *regex, errmsg, errmsgsz);
266 fprintf(stderr, "sed: %s\n", errmsg);
267 free(errmsg);
268 exit_sed(1, NULL);
126 } 269 }
270 my_str--; /* move my_str back so free() (below) won't barf */
271 idx++; /* advance idx one past the end of the /match/ */
272 }
273 else {
274 fprintf(stderr, "sed.c:get_address: no address found in string\n");
275 fprintf(stderr, "\t(you probably didn't check the string you passed me)\n");
276 idx = -1;
277 }
127 278
128 if (quietFlag == FALSE) { 279 free(my_str);
129 fprintf(stdout, haystack); 280 return idx;
130 } 281}
131 282
132 line++; 283static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
284{
285 int idx = 0;
286
287 /* parse the command
288 * format is: [addr][,addr]cmd
289 * |----||-----||-|
290 * part1 part2 part3
291 */
292
293 /* first part (if present) is an address: either a number or a /regex/ */
294 if (isdigit(cmdstr[idx]) || cmdstr[idx] == '/')
295 idx = get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
296
297 /* second part (if present) will begin with a comma */
298 if (cmdstr[idx] == ',')
299 idx += get_address(&cmdstr[++idx], &sed_cmd->end_line, &sed_cmd->end_match);
300
301 /* last part (mandatory) will be a command */
302 if (cmdstr[idx] == '\0')
303 exit_sed(1, "sed: missing command\n");
304 if (!strchr("pds", cmdstr[idx])) /* <-- XXX add new commands here */
305 exit_sed(1, "sed: invalid command\n");
306 sed_cmd->cmd = cmdstr[idx];
307 /* special-case handling for 's' */
308 if (sed_cmd->cmd == 's') {
309 int oldidx;
310 /* format for substitution is:
311 * s/match/replace/g
312 * | |
313 * mandatory optional
314 */
315
316 /* verify that we have an 's' followed by a 'slash' */
317 if (cmdstr[++idx] != '/')
318 exit_sed(1, "sed: bad format in substitution expression\n");
319
320 /* get the substitution part */
321 idx += get_address(&cmdstr[idx], NULL, &sed_cmd->sub_match);
322
323 /* get the replacement part */
324 oldidx = idx;
325 idx = index_of_next_unescaped_slash(idx, cmdstr);
326 sed_cmd->replace = (char *)malloc(idx - oldidx + 1);
327 strncpy(sed_cmd->replace, &cmdstr[oldidx], idx - oldidx);
328 sed_cmd->replace[idx - oldidx] = 0;
329
330 /* store the 'g' if present */
331 if (cmdstr[++idx] == 'g')
332 sed_cmd->sub_g = 1;
133 } 333 }
134} 334}
135 335
136static void do_sed_append(FILE * fp, char *appendline, int quietFlag) 336static void add_cmd_str(const char *cmdstr)
137{ 337{
138 char buffer[BUFSIZE]; 338 char *my_cmdstr = trim_str(cmdstr);
139 int line = 1, doit;
140
141 while (fgets(buffer, BUFSIZE - 1, fp)) {
142 doit = 0;
143 if (addr_pattern) {
144 doit = !find_match(buffer, addr_pattern, FALSE);
145 } else if (addr_line == NO_LINE)
146 doit = 1;
147 else if (addr_line == LAST_LINE) {
148 if (at_last(fp))
149 doit = 1;
150 } else {
151 if (line == addr_line)
152 doit = 1;
153 }
154 if (negated)
155 doit = 1 - doit;
156 if (quietFlag == FALSE) {
157 fprintf(stdout, buffer);
158 }
159 if (doit) {
160 fputs(appendline, stdout);
161 fputc('\n', stdout);
162 }
163 339
164 line++; 340 /* if this is a comment, don't even bother */
341 if (my_cmdstr[0] == '#') {
342 free(my_cmdstr);
343 return;
165 } 344 }
345
346 /* grow the array */
347 sed_cmds = realloc(sed_cmds, sizeof(struct sed_cmd) * (++ncmds));
348 /* zero new element */
349 memset(&sed_cmds[ncmds-1], 0, sizeof(struct sed_cmd));
350 /* load command string into new array element */
351 parse_cmd_str(&sed_cmds[ncmds-1], my_cmdstr);
166} 352}
167 353
168extern int sed_main(int argc, char **argv) 354
355static void load_cmd_file(char *filename)
169{ 356{
170 FILE *fp; 357 FILE *cmdfile;
171 char *needle = NULL, *newNeedle = NULL; 358 char *line;
172 char *name; 359
173 char *cp; 360 cmdfile = fopen(filename, "r");
174 int ignoreCase = FALSE; 361 if (cmdfile == NULL)
175 int printFlag = FALSE; 362 exit_sed(1, strerror(errno));
176 int quietFlag = FALSE; 363
177 int stopNow; 364 while ((line = get_line_from_file(cmdfile)) != NULL) {
178 char *line_s = NULL, saved; 365 line[strlen(line)-1] = 0; /* eat newline */
179 char *appendline = NULL; 366 add_cmd_str(line);
180 char *pos; 367 free(line);
181 sed_function sed_f = f_none;
182
183 argc--;
184 argv++;
185 if (argc < 1) {
186 usage(sed_usage);
187 } 368 }
369}
188 370
189 while (argc > 1) {
190 if (**argv != '-')
191 usage(sed_usage);
192 argc--;
193 cp = *argv++;
194 stopNow = FALSE;
195 371
196 while (*++cp && stopNow == FALSE) { 372static int do_sed_command(const struct sed_cmd *sed_cmd, const char *line)
197 switch (*cp) { 373{
198 case 'n': 374 int altered = 0;
199 quietFlag = TRUE;
200 break;
201 case 'e':
202 if (*(cp + 1) == 0 && --argc < 0) {
203 usage(sed_usage);
204 }
205 if (*++cp != 's')
206 cp = *argv++;
207
208 /* Read address if present */
209 SKIPSPACES(cp);
210 if (*cp == '$') {
211 addr_line = LAST_LINE;
212 cp++;
213 } else {
214 if (isdigit(*cp)) { /* LINE ADDRESS */
215 line_s = cp;
216 while (isdigit(*cp))
217 cp++;
218 if (cp > line_s) {
219 /* numeric line */
220 saved = *cp;
221 *cp = '\0';
222 addr_line = atoi(line_s);
223 *cp = saved;
224 }
225 } else if (*cp == '/') { /* PATTERN ADDRESS */
226 pos = addr_pattern = cp + 1;
227 pos = strchr(pos, '/');
228 if (!pos)
229 usage(sed_usage);
230 *pos = '\0';
231 cp = pos + 1;
232 }
233 }
234 375
235 SKIPSPACES(cp); 376 switch (sed_cmd->cmd) {
236 if (*cp == '!') {
237 negated++;
238 cp++;
239 }
240 377
241 /* Read command */ 378 case 'p':
379 fputs(line, stdout);
380 break;
242 381
243 SKIPSPACES(cp); 382 case 'd':
244 switch (*cp) { 383 altered++;
245 case 's': /* REPLACE */ 384 break;
246 if (strlen(cp) <= 3 || *(cp + 1) != '/') 385
247 break; 386 case 's': /* oo, a fun one :-) */
248 sed_f = f_replace; 387
249 388 /* we only substitute if the substitution 'search' expression matches */
250 pos = needle = cp + 2; 389 if (regexec(sed_cmd->sub_match, line, 0, NULL, 0) == 0) {
251 390 regmatch_t regmatch;
252 for (;;) { 391 int i;
253 pos = strchr(pos, '/'); 392 char *ptr = (char *)line;
254 if (pos == NULL) { 393
255 usage(sed_usage); 394 while (*ptr) {
256 } 395 /* if we can match the search string... */
257 if (*(pos - 1) == '\\') { 396 if (regexec(sed_cmd->sub_match, ptr, 1, &regmatch, 0) == 0) {
258 pos++; 397 /* print everything before the match, */
259 continue; 398 for (i = 0; i < regmatch.rm_so; i++)
260 } 399 fputc(ptr[i], stdout);
261 break; 400 /* then print the substitution in its place */
401 fputs(sed_cmd->replace, stdout);
402 /* then advance past the match */
403 ptr += regmatch.rm_eo;
404 /* and let the calling function know that something
405 * has been changed */
406 altered++;
407
408 /* if we're not doing this globally... */
409 if (!sed_cmd->sub_g)
410 break;
262 } 411 }
263 *pos = 0; 412 /* if we COULD NOT match the search string (meaning we've gone past
264 newNeedle = ++pos; 413 * all previous instances), get out */
265 for (;;) { 414 else
266 pos = strchr(pos, '/');
267 if (pos == NULL) {
268 usage(sed_usage);
269 }
270 if (*(pos - 1) == '\\') {
271 pos++;
272 continue;
273 }
274 break; 415 break;
275 } 416 }
276 *pos = 0;
277 if (pos + 2 != 0) {
278 while (*++pos) {
279 switch (*pos) {
280 case 'i':
281 ignoreCase = TRUE;
282 break;
283 case 'p':
284 printFlag = TRUE;
285 break;
286 case 'g':
287 break;
288 default:
289 usage(sed_usage);
290 }
291 }
292 }
293 cp = pos;
294 /* fprintf(stderr, "replace '%s' with '%s'\n", needle, newNeedle); */
295 break;
296 417
297 case 'a': /* APPEND */ 418 /* is there anything left to print? */
298 if (strlen(cp) < 2) 419 if (*ptr)
299 break; 420 fputs(ptr, stdout);
300 sed_f = f_append; 421 }
301 appendline = ++cp; 422
302 /* fprintf(stderr, "append '%s'\n", appendline); */ 423 break;
303 break; 424 }
425
426 return altered;
427}
428
429static void process_file(FILE *file)
430{
431 char *line = NULL;
432 static int linenum = 0; /* GNU sed does not restart counting lines at EOF */
433 unsigned int still_in_range = 0;
434 int line_altered;
435 int i;
436
437 /* go through every line in the file */
438 while ((line = get_line_from_file(file)) != NULL) {
439
440 linenum++;
441 line_altered = 0;
442
443 /* for every line, go through all the commands */
444 for (i = 0; i < ncmds; i++) {
445
446 /* are we acting on a range of matched lines? */
447 if (sed_cmds[i].beg_match && sed_cmds[i].end_match) {
448 if (still_in_range || regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0) {
449 line_altered += do_sed_command(&sed_cmds[i], line);
450 still_in_range = 1;
451 if (regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0)
452 still_in_range = 0;
304 } 453 }
454 }
305 455
306 stopNow = TRUE; 456 /* are we trying to match a single line? */
307 break; 457 else if (sed_cmds[i].beg_match) {
458 if (regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0)
459 line_altered += do_sed_command(&sed_cmds[i], line);
460 }
308 461
309 default: 462 /* are we acting on a range of line numbers? */
310 usage(sed_usage); 463 else if (sed_cmds[i].beg_line > 0 && sed_cmds[i].end_line > 0) {
464 if (linenum >= sed_cmds[i].beg_line && linenum <= sed_cmds[i].end_line)
465 line_altered += do_sed_command(&sed_cmds[i], line);
311 } 466 }
312 }
313 }
314 467
315 if (argc == 0) { 468 /* are we acting on a specified line number */
316 switch (sed_f) { 469 else if (sed_cmds[i].beg_line > 0) {
317 case f_none: 470 if (linenum == sed_cmds[i].beg_line)
318 break; 471 line_altered += do_sed_command(&sed_cmds[i], line);
319 case f_replace:
320 do_sed_repl(stdin, needle, newNeedle, ignoreCase, printFlag,
321 quietFlag);
322 break;
323 case f_append:
324 do_sed_append(stdin, appendline, quietFlag);
325 break;
326 }
327 } else {
328 while (argc-- > 0) {
329 name = *argv++;
330
331 fp = fopen(name, "r");
332 if (fp == NULL) {
333 perror(name);
334 continue;
335 } 472 }
336 473
337 switch (sed_f) { 474 /* not acting on matches or line numbers. act on every line */
338 case f_none: 475 else
476 line_altered += do_sed_command(&sed_cmds[i], line);
477
478 }
479
480 /* we will print the line unless we were told to be quiet or if the
481 * line was altered (via a 'd'elete or 's'ubstitution) */
482 if (!be_quiet && !line_altered)
483 fputs(line, stdout);
484
485 free(line);
486 }
487}
488
489extern int sed_main(int argc, char **argv)
490{
491 int opt;
492
493 /* do special-case option parsing */
494 if (argv[1] && (strcmp(argv[1], "--help") == 0))
495 usage(sed_usage);
496
497 /* do normal option parsing */
498 while ((opt = getopt(argc, argv, "Vhne:f:")) > 0) {
499 switch (opt) {
500 case 'V':
501 printf("Print Busybox version here\n");
502 exit(0);
339 break; 503 break;
340 case f_replace: 504 case 'h':
341 do_sed_repl(fp, needle, newNeedle, ignoreCase, printFlag, 505 usage(sed_usage);
342 quietFlag);
343 break; 506 break;
344 case f_append: 507 case 'n':
345 do_sed_append(fp, appendline, quietFlag); 508 be_quiet++;
346 break; 509 break;
347 } 510 case 'e':
511 add_cmd_str(optarg);
512 break;
513 case 'f':
514 load_cmd_file(optarg);
515 break;
516 }
517 }
518
519 /* if we didn't get a pattern from a -e and no command file was specified,
520 * argv[optind] should be the pattern. no pattern, no worky */
521 if (ncmds == 0) {
522 if (argv[optind] == NULL)
523 usage(sed_usage);
524 else {
525 add_cmd_str(argv[optind]);
526 optind++;
527 }
528 }
348 529
349 if (ferror(fp))
350 perror(name);
351 530
352 fclose(fp); 531 /* argv[(optind)..(argc-1)] should be names of file to process. If no
532 * files were specified or '-' was specified, take input from stdin.
533 * Otherwise, we process all the files specified. */
534 if (argv[optind] == NULL || (strcmp(argv[optind], "-") == 0)) {
535 process_file(stdin);
536 }
537 else {
538 int i;
539 FILE *file;
540 for (i = optind; i < argc; i++) {
541 file = fopen(argv[i], "r");
542 if (file == NULL) {
543 fprintf(stderr, "sed: %s: %s\n", argv[i], strerror(errno));
544 } else {
545 process_file(file);
546 fclose(file);
547 }
353 } 548 }
354 } 549 }
355 return(TRUE); 550
356} 551 exit_sed(0, NULL);
357 552
553 /* not reached */
554 return 0;
555}
358 556
359/* END CODE */ 557#ifdef TEST_SED
558int main(int argc, char **argv)
559{
560 return sed_main(argc, argv);
561}
562#endif