aboutsummaryrefslogtreecommitdiff
path: root/editors/sed.c
diff options
context:
space:
mode:
authorGlenn L McGrath <bug1@ihug.co.nz>2003-10-01 03:06:16 +0000
committerGlenn L McGrath <bug1@ihug.co.nz>2003-10-01 03:06:16 +0000
commitaa5a602689265a4351c890efe5d8e7793e777e3c (patch)
treee253699e7d4abbccc47e387e09ef1c2c6d4716c4 /editors/sed.c
parente6ba16f830bf2524b39e3e1b7c8157808921f95d (diff)
downloadbusybox-w32-aa5a602689265a4351c890efe5d8e7793e777e3c.tar.gz
busybox-w32-aa5a602689265a4351c890efe5d8e7793e777e3c.tar.bz2
busybox-w32-aa5a602689265a4351c890efe5d8e7793e777e3c.zip
Patch by Rob Landley, work in progress update, fixes lots of bugs,
introduces a few others (but they are being worked on)
Diffstat (limited to 'editors/sed.c')
-rw-r--r--editors/sed.c1458
1 files changed, 678 insertions, 780 deletions
diff --git a/editors/sed.c b/editors/sed.c
index 1c016ac57..6452a321c 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -1,3 +1,4 @@
1/* vi: set sw=4 ts=4: */
1/* 2/*
2 * sed.c - very minimalist version of sed 3 * sed.c - very minimalist version of sed
3 * 4 *
@@ -22,6 +23,24 @@
22 * 23 *
23 */ 24 */
24 25
26/* Code overview.
27
28 Files are laid out to avoid unnecessary function declarations. So for
29 example, every function add_cmd calls occurs before add_cmd in this file.
30
31 add_cmd() is called on each line of sed command text (from a file or from
32 the command line). It calls get_address() and parse_cmd_args(). The
33 resulting sed_cmd_t structures are appended to a linked list
34 (sed_cmd_head/sed_cmd_tail).
35
36 process_file() does actual sedding, reading data lines from an input FILE *
37 (which could be stdin) and applying the sed command list (sed_cmd_head) to
38 each of the resulting lines.
39
40 sed_main() is where external code calls into this, with a command line.
41*/
42
43
25/* 44/*
26 Supported features and commands in this version of sed: 45 Supported features and commands in this version of sed:
27 46
@@ -64,84 +83,72 @@
64#include "busybox.h" 83#include "busybox.h"
65 84
66typedef struct sed_cmd_s { 85typedef struct sed_cmd_s {
67 /* Order by alignment requirements */ 86 /* Ordered by alignment requirements: currently 36 bytes on x86 */
68
69 /* address storage */
70 regex_t *beg_match; /* sed -e '/match/cmd' */
71 regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */
72
73 int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */
74 int end_line; /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */
75 87
76 /* inversion flag */ 88 /* address storage */
77 int invert; /* the '!' after the address */ 89 regex_t *beg_match; /* sed -e '/match/cmd' */
90 regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */
91 regex_t *sub_match; /* For 's/sub_match/string/' */
92 int beg_line; /* 'sed 1p' 0 == apply commands to all lines */
93 int end_line; /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */
78 94
79 /* Runtime flag no not if the current command match's */ 95 FILE *file; /* File (sr) command writes to, -1 for none. */
80 int still_in_range; 96 char *string; /* Data string for (saicytb) commands. */
81 97
82 /* SUBSTITUTION COMMAND SPECIFIC FIELDS */ 98 unsigned short which_match; /* (s) Which match to replace (0 for all) */
83 99
84 /* sed -e 's/sub_match/replace/' */ 100 /* Bitfields (gcc won't group them if we don't) */
85 regex_t *sub_match; 101 unsigned int invert:1; /* the '!' after the address */
86 char *replace; 102 unsigned int in_match:1; /* Next line also included in match? */
103 unsigned int no_newline:1; /* Last line written by (sr) had no '\n' */
104 unsigned int sub_p:1; /* (s) print option */
87 105
88 /* EDIT COMMAND (a,i,c) SPECIFIC FIELDS */
89 char *editline;
90
91 /* FILE COMMAND (r) SPECIFIC FIELDS */
92 char *filename;
93
94 /* SUBSTITUTION COMMAND SPECIFIC FIELDS */
95
96 unsigned int num_backrefs:4; /* how many back references (\1..\9) */
97 /* Note: GNU/POSIX sed does not save more than nine backrefs, so
98 * we only use 4 bits to hold the number */
99 unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */
100 unsigned int sub_p:1; /* sed -e 's/foo/bar/p' (print substitution) */
101
102 /* TRANSLATE COMMAND */
103 char *translate;
104
105 /* GENERAL FIELDS */
106 /* the command */
107 char cmd; /* p,d,s (add more at your leisure :-) */
108
109 /* Branch commands */
110 char *label;
111
112 /* next command in list (sequential list of specified commands) */
113 struct sed_cmd_s *next;
114 106
107 /* GENERAL FIELDS */
108 char cmd; /* The command char: abcdDgGhHilnNpPqrstwxy:={} */
109 struct sed_cmd_s *next; /* Next command (linked list, NULL terminated) */
115} sed_cmd_t; 110} sed_cmd_t;
116 111
117
118/* externs */
119extern void xregcomp(regex_t * preg, const char *regex, int cflags);
120extern int optind; /* in unistd.h */
121extern char *optarg; /* ditto */
122
123/* globals */ 112/* globals */
124/* options */ 113/* options */
125static int be_quiet = 0; 114static int be_quiet = 0;
115
126static const char bad_format_in_subst[] = 116static const char bad_format_in_subst[] =
127 "bad format in substitution expression"; 117 "bad format in substitution expression";
118const char *const semicolon_whitespace = "; \n\r\t\v";
119
120regmatch_t regmatch[10];
121static regex_t *previous_regex_ptr = NULL;
128 122
129/* linked list of sed commands */ 123/* linked list of sed commands */
130static sed_cmd_t sed_cmd_head; 124static sed_cmd_t sed_cmd_head;
131static sed_cmd_t *sed_cmd_tail = &sed_cmd_head; 125static sed_cmd_t *sed_cmd_tail = &sed_cmd_head;
132 126
133const char *const semicolon_whitespace = "; \n\r\t\v\0"; 127/* Linked list of append lines */
134static regex_t *previous_regex_ptr = NULL; 128struct append_list {
135 129 char *string;
130 struct append_list *next;
131};
132struct append_list *append_head=NULL, *append_tail=NULL;
136 133
137#ifdef CONFIG_FEATURE_CLEAN_UP 134#ifdef CONFIG_FEATURE_CLEAN_UP
138static void destroy_cmd_strs(void) 135static void free_and_close_stuff(void)
139{ 136{
140 sed_cmd_t *sed_cmd = sed_cmd_head.next; 137 sed_cmd_t *sed_cmd = sed_cmd_head.next;
141 138
139 while(append_head) {
140 append_tail=append_head->next;
141 free(append_head->string);
142 free(append_head);
143 append_head=append_tail;
144 }
145
142 while (sed_cmd) { 146 while (sed_cmd) {
143 sed_cmd_t *sed_cmd_next = sed_cmd->next; 147 sed_cmd_t *sed_cmd_next = sed_cmd->next;
144 148
149 if(sed_cmd->file)
150 bb_xprint_and_close_file(sed_cmd->file);
151
145 if (sed_cmd->beg_match) { 152 if (sed_cmd->beg_match) {
146 regfree(sed_cmd->beg_match); 153 regfree(sed_cmd->beg_match);
147 free(sed_cmd->beg_match); 154 free(sed_cmd->beg_match);
@@ -154,17 +161,41 @@ static void destroy_cmd_strs(void)
154 regfree(sed_cmd->sub_match); 161 regfree(sed_cmd->sub_match);
155 free(sed_cmd->sub_match); 162 free(sed_cmd->sub_match);
156 } 163 }
157 free(sed_cmd->replace); 164 free(sed_cmd->string);
158 free(sed_cmd->editline);
159 free(sed_cmd->filename);
160 free(sed_cmd->translate);
161 free(sed_cmd->label);
162 free(sed_cmd); 165 free(sed_cmd);
163 sed_cmd = sed_cmd_next; 166 sed_cmd = sed_cmd_next;
164 } 167 }
165} 168}
166#endif 169#endif
167 170
171/* strdup, replacing "\n" with '\n', and "\delimiter" with 'delimiter' */
172
173static void parse_escapes(char *dest, const char *string, int len, char from, char to)
174{
175 int i=0;
176
177 while(i<len) {
178 if(string[i] == '\\') {
179 if(string[i+1] == from) {
180 *(dest++) = to;
181 i+=2;
182 continue;
183 } else *(dest++)=string[i++];
184 }
185 *(dest++) = string[i++];
186 }
187 *dest=0;
188}
189
190static char *copy_parsing_slashn(const char *string, int len)
191{
192 char *dest=xmalloc(len+1);
193
194 parse_escapes(dest,string,len,'n','\n');
195 return dest;
196}
197
198
168/* 199/*
169 * index_of_next_unescaped_regexp_delim - walks left to right through a string 200 * index_of_next_unescaped_regexp_delim - walks left to right through a string
170 * beginning at a specified index and returns the index of the next regular 201 * beginning at a specified index and returns the index of the next regular
@@ -182,7 +213,7 @@ static int index_of_next_unescaped_regexp_delim(const char delimiter,
182 for (; (ch = str[idx]); idx++) { 213 for (; (ch = str[idx]); idx++) {
183 if (bracket != -1) { 214 if (bracket != -1) {
184 if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2 215 if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2
185 && str[idx - 1] == '^'))) 216 && str[idx - 1] == '^')))
186 bracket = -1; 217 bracket = -1;
187 } else if (escaped) 218 } else if (escaped)
188 escaped = 0; 219 escaped = 0;
@@ -209,19 +240,15 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
209 240
210 /* verify that the 's' or 'y' is followed by something. That something 241 /* verify that the 's' or 'y' is followed by something. That something
211 * (typically a 'slash') is now our regexp delimiter... */ 242 * (typically a 'slash') is now our regexp delimiter... */
212 if (*cmdstr == '\0') 243 if (*cmdstr == '\0') bb_error_msg_and_die(bad_format_in_subst);
213 bb_error_msg_and_die(bad_format_in_subst); 244 delimiter = *(cmdstr_ptr++);
214 else
215 delimiter = *cmdstr_ptr;
216
217 cmdstr_ptr++;
218 245
219 /* save the match string */ 246 /* save the match string */
220 idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr); 247 idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
221 if (idx == -1) { 248 if (idx == -1) {
222 bb_error_msg_and_die(bad_format_in_subst); 249 bb_error_msg_and_die(bad_format_in_subst);
223 } 250 }
224 *match = bb_xstrndup(cmdstr_ptr, idx); 251 *match = copy_parsing_slashn(cmdstr_ptr, idx);
225 252
226 /* save the replacement string */ 253 /* save the replacement string */
227 cmdstr_ptr += idx + 1; 254 cmdstr_ptr += idx + 1;
@@ -229,7 +256,7 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
229 if (idx == -1) { 256 if (idx == -1) {
230 bb_error_msg_and_die(bad_format_in_subst); 257 bb_error_msg_and_die(bad_format_in_subst);
231 } 258 }
232 *replace = bb_xstrndup(cmdstr_ptr, idx); 259 *replace = copy_parsing_slashn(cmdstr_ptr, idx);
233 260
234 return ((cmdstr_ptr - cmdstr) + idx); 261 return ((cmdstr_ptr - cmdstr) + idx);
235} 262}
@@ -248,94 +275,109 @@ static int get_address(char *my_str, int *linenum, regex_t ** regex)
248 *linenum = -1; 275 *linenum = -1;
249 pos++; 276 pos++;
250 } else if (*my_str == '/' || *my_str == '\\') { 277 } else if (*my_str == '/' || *my_str == '\\') {
251 int next, idx_start = 1; 278 int next;
252 char delimiter; 279 char delimiter;
280 char *temp;
253 281
254 delimiter = '/'; 282 if (*my_str == '\\') delimiter = *(++pos);
255 if (*my_str == '\\') { 283 else delimiter = '/';
256 idx_start++;
257 delimiter = *(++pos);
258 }
259 next = index_of_next_unescaped_regexp_delim(delimiter, ++pos); 284 next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
260 if (next == -1) { 285 if (next == -1)
261 bb_error_msg_and_die("unterminated match expression"); 286 bb_error_msg_and_die("unterminated match expression");
262 } 287
263 pos += next; 288 temp=copy_parsing_slashn(pos,next);
264 *pos = '\0';
265
266 *regex = (regex_t *) xmalloc(sizeof(regex_t)); 289 *regex = (regex_t *) xmalloc(sizeof(regex_t));
267 xregcomp(*regex, my_str + idx_start, REG_NEWLINE); 290 xregcomp(*regex, temp, REG_NEWLINE);
268 pos++; /* so it points to the next character after the last '/' */ 291 free(temp);
292 /* Move position to next character after last delimiter */
293 pos+=(next+1);
269 } 294 }
270 return pos - my_str; 295 return pos - my_str;
271} 296}
272 297
298/* Grab a filename. Whitespace at start is skipped, then goes to EOL. */
299static int parse_file_cmd(sed_cmd_t * sed_cmd, const char *filecmdstr, char **retval)
300{
301 int start = 0, idx, hack=0;
302
303 /* Skip whitespace, then grab filename to end of line */
304 while (isspace(filecmdstr[start])) start++;
305 idx=start;
306 while(filecmdstr[idx] && filecmdstr[idx]!='\n') idx++;
307 /* If lines glued together, put backslash back. */
308 if(filecmdstr[idx]=='\n') hack=1;
309 if(idx==start) bb_error_msg_and_die("Empty filename");
310 *retval = bb_xstrndup(filecmdstr+start, idx-start+hack+1);
311 if(hack) *(idx+*retval)='\\';
312
313 return idx;
314}
315
273static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr) 316static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr)
274{ 317{
275 int cflags = 0; 318 int cflags = 0;
276 char *match; 319 char *match;
277 int idx = 0; 320 int idx = 0;
278 int j;
279 321
280 /* 322 /*
281 * the string that gets passed to this function should look like this: 323 * A substitution command should look something like this:
282 * s/match/replace/gIp 324 * s/match/replace/ #gIpw
283 * || | ||| 325 * || | |||
284 * mandatory optional 326 * mandatory optional
285 *
286 * (all three of the '/' slashes are mandatory)
287 */ 327 */
288 idx = parse_regex_delim(substr, &match, &sed_cmd->replace); 328 idx = parse_regex_delim(substr, &match, &sed_cmd->string);
289 329
290 /* determine the number of back references in the match string */ 330 /* determine the number of back references in the match string */
291 /* Note: we compute this here rather than in the do_subst_command() 331 /* Note: we compute this here rather than in the do_subst_command()
292 * function to save processor time, at the expense of a little more memory 332 * function to save processor time, at the expense of a little more memory
293 * (4 bits) per sed_cmd */ 333 * (4 bits) per sed_cmd */
294 334
295 for (j = 0; match[j]; j++) {
296 /* GNU/POSIX sed does not save more than nine backrefs */
297 if (match[j] == '\\' && match[j + 1] == '('
298 && sed_cmd->num_backrefs <= 9)
299 sed_cmd->num_backrefs++;
300 }
301
302 /* process the flags */ 335 /* process the flags */
303#ifndef CONFIG_FEATURE_SED_GNU_COMPATABILITY 336
304 idx++; 337 sed_cmd->which_match=1;
305#else 338 while (substr[++idx]) {
306 /* GNU sed allows blanks before the flag, this can lead to an incosistent 339 /* Parse match number */
307 * interpretation of 's/a/b/ g' as being either 's/a/b/g' or 's/a/b/;g'. 340 if(isdigit(substr[idx])) {
308 * which results in very different behaviour. 341 if(match[0]!='^') {
309 */ 342 /* Match 0 treated as all, multiple matches we take the last one. */
310 while (substr[++idx]) 343 char *pos=substr+idx;
311#endif 344 sed_cmd->which_match=(unsigned short)strtol(substr+idx,&pos,10);
345 idx=pos-substr;
346 }
347 continue;
348 }
312 switch (substr[idx]) { 349 switch (substr[idx]) {
313 case 'g': 350 /* Replace all occurrences */
314 if (match[0] != '^') { 351 case 'g':
315 sed_cmd->sub_g = 1; 352 if (match[0] != '^') sed_cmd->which_match = 0;
353 break;
354 /* Print pattern space */
355 case 'p':
356 sed_cmd->sub_p = 1;
357 break;
358 case 'w':
359 {
360 char *temp;
361 idx+=parse_file_cmd(sed_cmd,substr+idx,&temp);
362
363 break;
316 } 364 }
317 break; 365 /* Ignore case (gnu exension) */
318 /* Hmm, i dont see the I option mentioned in the standard */ 366 case 'I':
319 case 'I': 367 cflags |= REG_ICASE;
320 cflags |= REG_ICASE; 368 break;
321 break; 369 /* Skip spaces */
322 case 'p': 370 case ' ':
323 sed_cmd->sub_p = 1; 371 case '\t':
324 break; 372 break;
325#ifdef CONFIG_FEATURE_SED_GNU_COMPATABILITY 373 case ';':
326 default: 374 case '}':
327 /* any whitespace or semicolon trailing after a s/// is ok */
328 if (strchr(semicolon_whitespace, substr[idx]))
329 goto out; 375 goto out;
330 bb_error_msg_and_die("bad option in substitution expression"); 376 default:
331#endif 377 bb_error_msg_and_die("bad option in substitution expression");
332 } 378 }
333 379 }
334#ifndef CONFIG_FEATURE_SED_GNU_COMPATABILITY
335 idx++;
336#else
337out: 380out:
338#endif
339 /* compile the match string into a regex */ 381 /* compile the match string into a regex */
340 if (*match != '\0') { 382 if (*match != '\0') {
341 /* If match is empty, we use last regex used at runtime */ 383 /* If match is empty, we use last regex used at runtime */
@@ -347,166 +389,61 @@ out:
347 return idx; 389 return idx;
348} 390}
349 391
350static void replace_slash_n(char *string)
351{
352 char *dest;
353
354 for (dest = string; *string; string++, dest++) {
355 if ((string[0] == '\\') && (string[1] == 'n')) {
356 *dest = '\n';
357 string++;
358 } else {
359 *dest = *string;
360 }
361 }
362 *dest=0;
363}
364
365static int parse_translate_cmd(sed_cmd_t * const sed_cmd, const char *cmdstr)
366{
367 char *match;
368 char *replace;
369 int idx;
370 int i;
371
372 idx = parse_regex_delim(cmdstr, &match, &replace);
373 replace_slash_n(match);
374 replace_slash_n(replace);
375 sed_cmd->translate = xcalloc(1, (strlen(match) + 1) * 2);
376 for (i = 0; (match[i] != 0) && (replace[i] != 0); i++) {
377 sed_cmd->translate[i * 2] = match[i];
378 sed_cmd->translate[(i * 2) + 1] = replace[i];
379 }
380 return (idx + 1);
381}
382
383static int parse_edit_cmd(sed_cmd_t * sed_cmd, const char *editstr)
384{
385 int i, j;
386
387 /*
388 * the string that gets passed to this function should look like this:
389 *
390 * need one of these
391 * |
392 * | this backslash (immediately following the edit command) is mandatory
393 * | |
394 * [aic]\
395 * TEXT1\
396 * TEXT2\
397 * TEXTN
398 *
399 * as soon as we hit a TEXT line that has no trailing '\', we're done.
400 * this means a command like:
401 *
402 * i\
403 * INSERTME
404 *
405 * is a-ok.
406 *
407 */
408 if ((*editstr != '\\') || ((editstr[1] != '\n') && (editstr[1] != '\r'))) {
409 bb_error_msg_and_die("bad format in edit expression");
410 }
411
412 /* store the edit line text */
413 sed_cmd->editline = xmalloc(strlen(&editstr[2]) + 2);
414 for (i = 2, j = 0;
415 editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL; i++, j++) {
416 if ((editstr[i] == '\\') && strchr("\n\r", editstr[i + 1]) != NULL) {
417 sed_cmd->editline[j] = '\n';
418 i++;
419 } else
420 sed_cmd->editline[j] = editstr[i];
421 }
422
423 /* figure out if we need to add a newline */
424 if (sed_cmd->editline[j - 1] != '\n')
425 sed_cmd->editline[j++] = '\n';
426
427 /* terminate string */
428 sed_cmd->editline[j] = '\0';
429
430 return i;
431}
432
433
434static int parse_file_cmd(sed_cmd_t * sed_cmd, const char *filecmdstr)
435{
436 int idx = 0;
437 int filenamelen = 0;
438
439 /*
440 * the string that gets passed to this function should look like this:
441 * '[ ]filename'
442 * | |
443 * | a filename
444 * |
445 * optional whitespace
446
447 * re: the file to be read, the GNU manual says the following: "Note that
448 * if filename cannot be read, it is treated as if it were an empty file,
449 * without any error indication." Thus, all of the following commands are
450 * perfectly legal:
451 *
452 * sed -e '1r noexist'
453 * sed -e '1r ;'
454 * sed -e '1r'
455 */
456
457 /* the file command may be followed by whitespace; move past it. */
458 while (isspace(filecmdstr[++idx])) {;
459 }
460
461 /* the first non-whitespace we get is a filename. the filename ends when we
462 * hit a normal sed command terminator or end of string */
463 filenamelen = strcspn(&filecmdstr[idx], semicolon_whitespace);
464 sed_cmd->filename = xmalloc(filenamelen + 1);
465 safe_strncpy(sed_cmd->filename, &filecmdstr[idx], filenamelen + 1);
466 return idx + filenamelen;
467}
468
469/* 392/*
470 * Process the commands arguments 393 * Process the commands arguments
471 */ 394 */
472static char *parse_cmd_str(sed_cmd_t * sed_cmd, char *cmdstr) 395static char *parse_cmd_args(sed_cmd_t *sed_cmd, char *cmdstr)
473{ 396{
474 /* handle (s)ubstitution command */ 397 /* handle (s)ubstitution command */
475 if (sed_cmd->cmd == 's') { 398 if (sed_cmd->cmd == 's') cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
476 cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
477 }
478 /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ 399 /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
479 else if (strchr("aic", sed_cmd->cmd)) { 400 else if (strchr("aic", sed_cmd->cmd)) {
480 if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') 401 if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
481 bb_error_msg_and_die 402 bb_error_msg_and_die
482 ("only a beginning address can be specified for edit commands"); 403 ("only a beginning address can be specified for edit commands");
483 cmdstr += parse_edit_cmd(sed_cmd, cmdstr); 404 while(isspace(*cmdstr)) cmdstr++;
484 } 405 sed_cmd->string = bb_xstrdup(cmdstr);
406 cmdstr += strlen(cmdstr);
485 /* handle file cmds: (r)ead */ 407 /* handle file cmds: (r)ead */
486 else if (sed_cmd->cmd == 'r') { 408 } else if(strchr("rw", sed_cmd->cmd)) {
487 if (sed_cmd->end_line || sed_cmd->end_match) 409 if (sed_cmd->end_line || sed_cmd->end_match)
488 bb_error_msg_and_die("Command only uses one address"); 410 bb_error_msg_and_die("Command only uses one address");
489 cmdstr += parse_file_cmd(sed_cmd, cmdstr); 411 cmdstr += parse_file_cmd(sed_cmd, cmdstr, &sed_cmd->string);
490 } 412 if(sed_cmd->cmd=='w')
413 sed_cmd->file=bb_xfopen(sed_cmd->string,"w");
491 /* handle branch commands */ 414 /* handle branch commands */
492 else if (strchr(":bt", sed_cmd->cmd)) { 415 } else if (strchr(":bt", sed_cmd->cmd)) {
493 int length; 416 int length;
494 417
495 cmdstr += strspn(cmdstr, " "); 418 while(isspace(*cmdstr)) cmdstr++;
496 length = strcspn(cmdstr, semicolon_whitespace); 419 length = strcspn(cmdstr, semicolon_whitespace);
497 if (length) { 420 if (length) {
498 sed_cmd->label = strndup(cmdstr, length); 421 sed_cmd->string = strndup(cmdstr, length);
499 cmdstr += length; 422 cmdstr += length;
500 } 423 }
501 } 424 }
502 /* translation command */ 425 /* translation command */
503 else if (sed_cmd->cmd == 'y') { 426 else if (sed_cmd->cmd == 'y') {
504 cmdstr += parse_translate_cmd(sed_cmd, cmdstr); 427 char *match, *replace;
428 int i=cmdstr[0];
429
430 cmdstr+=parse_regex_delim(cmdstr, &match, &replace)+1;
431 /* \n already parsed, but \delimiter needs unescaping. */
432 parse_escapes(match,match,strlen(match),i,i);
433 parse_escapes(replace,replace,strlen(replace),i,i);
434
435 sed_cmd->string = xcalloc(1, (strlen(match) + 1) * 2);
436 for (i = 0; match[i] && replace[i]; i++) {
437 sed_cmd->string[i * 2] = match[i];
438 sed_cmd->string[(i * 2) + 1] = replace[i];
439 }
440 free(match);
441 free(replace);
505 } 442 }
506 /* if it wasnt a single-letter command that takes no arguments 443 /* if it wasnt a single-letter command that takes no arguments
507 * then it must be an invalid command. 444 * then it must be an invalid command.
508 */ 445 */
509 else if (strchr("dgGhHnNpPqx={}", sed_cmd->cmd) == 0) { 446 else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) {
510 bb_error_msg_and_die("Unsupported command %c", sed_cmd->cmd); 447 bb_error_msg_and_die("Unsupported command %c", sed_cmd->cmd);
511 } 448 }
512 449
@@ -514,663 +451,595 @@ static char *parse_cmd_str(sed_cmd_t * sed_cmd, char *cmdstr)
514 return (cmdstr); 451 return (cmdstr);
515} 452}
516 453
517static char *add_cmd(char *cmdstr)
518{
519 sed_cmd_t *sed_cmd;
520 454
521 /* Skip over leading whitespace and semicolons */ 455/* Parse address+command sets, skipping comment lines. */
522 cmdstr += strspn(cmdstr, semicolon_whitespace);
523 456
524 /* if we ate the whole thing, that means there was just trailing 457void add_cmd(char *cmdstr)
525 * whitespace or a final / no-op semicolon. either way, get out */ 458{
526 if (*cmdstr == '\0') { 459 static char *add_cmd_line=NULL;
527 return (NULL); 460 sed_cmd_t *sed_cmd;
528 }
529 461
530 /* if this is a comment, jump past it and keep going */ 462 /* Append this line to any unfinished line from last time. */
531 if (*cmdstr == '#') { 463 if(add_cmd_line) {
532 /* "#n" is the same as using -n on the command line */ 464 int lastlen=strlen(add_cmd_line);
533 if (cmdstr[1] == 'n') { 465 char *temp=xmalloc(lastlen+strlen(cmdstr)+2);
534 be_quiet++; 466
535 } 467 memcpy(temp,add_cmd_line,lastlen);
536 return (strpbrk(cmdstr, "\n\r")); 468 temp[lastlen]='\n';
469 strcpy(temp+lastlen+1,cmdstr);
470 free(add_cmd_line);
471 cmdstr=add_cmd_line=temp;
472 } else add_cmd_line=NULL;
473
474 /* If this line ends with backslash, request next line. */
475 int temp=strlen(cmdstr);
476 if(temp && cmdstr[temp-1]=='\\') {
477 if(!add_cmd_line) add_cmd_line=strdup(cmdstr);
478 add_cmd_line[temp-1]=0;
479 return;
537 } 480 }
538 481
539 /* parse the command 482 /* Loop parsing all commands in this line. */
540 * format is: [addr][,addr]cmd 483 while(*cmdstr) {
541 * |----||-----||-| 484 /* Skip leading whitespace and semicolons */
542 * part1 part2 part3 485 cmdstr += strspn(cmdstr, semicolon_whitespace);
543 */
544 486
545 sed_cmd = xcalloc(1, sizeof(sed_cmd_t)); 487 /* If no more commands, exit. */
488 if(!*cmdstr) break;
546 489
547 /* first part (if present) is an address: either a '$', a number or a /regex/ */ 490 /* if this is a comment, jump past it and keep going */
548 cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); 491 if (*cmdstr == '#') {
492 /* "#n" is the same as using -n on the command line */
493 if (cmdstr[1] == 'n') be_quiet++;
494 if(!(cmdstr=strpbrk(cmdstr, "\n\r"))) break;
495 continue;
496 }
549 497
550 /* second part (if present) will begin with a comma */ 498 /* parse the command
551 if (*cmdstr == ',') { 499 * format is: [addr][,addr][!]cmd
552 int idx; 500 * |----||-----||-|
501 * part1 part2 part3
502 */
553 503
554 cmdstr++; 504 sed_cmd = xcalloc(1, sizeof(sed_cmd_t));
555 idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
556 if (idx == 0) {
557 bb_error_msg_and_die("get_address: no address found in string\n"
558 "\t(you probably didn't check the string you passed me)");
559 }
560 cmdstr += idx;
561 }
562 505
563 /* skip whitespace before the command */ 506 /* first part (if present) is an address: either a '$', a number or a /regex/ */
564 while (isspace(*cmdstr)) { 507 cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
565 cmdstr++;
566 }
567 508
568 /* there my be the inversion flag between part2 and part3 */ 509 /* second part (if present) will begin with a comma */
569 if (*cmdstr == '!') { 510 if (*cmdstr == ',') {
570 sed_cmd->invert = 1; 511 int idx;
571 cmdstr++;
572 512
573#ifdef CONFIG_FEATURE_SED_GNU_COMPATABILITY
574 /* According to the spec
575 * It is unspecified whether <blank>s can follow a '!' character,
576 * and conforming applications shall not follow a '!' character
577 * with <blank>s.
578 */
579 /* skip whitespace before the command */
580 while (isspace(*cmdstr)) {
581 cmdstr++; 513 cmdstr++;
514 idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
515 if (!idx) bb_error_msg_and_die("get_address: no address found in string\n");
516 cmdstr += idx;
582 } 517 }
583#endif
584 }
585
586 /* last part (mandatory) will be a command */
587 if (*cmdstr == '\0')
588 bb_error_msg_and_die("missing command");
589 518
590 sed_cmd->cmd = *cmdstr; 519 /* skip whitespace before the command */
591 cmdstr++; 520 while (isspace(*cmdstr)) cmdstr++;
592 521
593 cmdstr = parse_cmd_str(sed_cmd, cmdstr); 522 /* Check for inversion flag */
523 if (*cmdstr == '!') {
524 sed_cmd->invert = 1;
525 cmdstr++;
594 526
595 /* Add the command to the command array */ 527 /* skip whitespace before the command */
596 sed_cmd_tail->next = sed_cmd; 528 while (isspace(*cmdstr)) cmdstr++;
597 sed_cmd_tail = sed_cmd_tail->next; 529 }
598 530
599 return (cmdstr); 531 /* last part (mandatory) will be a command */
600} 532 if (!*cmdstr) bb_error_msg_and_die("missing command");
533 sed_cmd->cmd = *(cmdstr++);
534 cmdstr = parse_cmd_args(sed_cmd, cmdstr);
601 535
602static void add_cmd_str(const char *cmdstr) 536 /* Add the command to the command array */
603{ 537 sed_cmd_tail->next = sed_cmd;
604 char *cmdstr_expanded = strdup(cmdstr); 538 sed_cmd_tail = sed_cmd_tail->next;
605 char *cmdstr_ptr;
606
607#ifdef CONFIG_FEATURE_SED_EMBEDED_NEWLINE
608 cmdstr_ptr = cmdstr_expanded;
609 /* HACK: convert "\n" to match tranlated '\n' string */
610 while ((cmdstr_ptr = strstr(cmdstr_ptr, "\\n")) != NULL) {
611 int length = strlen(cmdstr) + 2;
612 cmdstr_expanded = realloc(cmdstr_expanded, length);
613 cmdstr_ptr = strstr(cmdstr_expanded, "\\n");
614 memmove(cmdstr_ptr + 1, cmdstr_ptr, strlen(cmdstr_ptr) + 1);
615 cmdstr_ptr[0] = '\\';
616 cmdstr_ptr += 3;
617 } 539 }
618#endif
619 cmdstr_ptr = cmdstr_expanded;
620 do {
621 cmdstr_ptr = add_cmd(cmdstr_ptr);
622 } while (cmdstr_ptr && strlen(cmdstr_ptr));
623
624 free(cmdstr_expanded);
625}
626
627 540
628static void load_cmd_file(const char *filename) 541 /* If we glued multiple lines together, free the memory. */
629{ 542 if(add_cmd_line) {
630 FILE *cmdfile; 543 free(add_cmd_line);
631 char *line; 544 add_cmd_line=NULL;
632 char *nextline;
633 char *e;
634
635 cmdfile = bb_xfopen(filename, "r");
636
637 while ((line = bb_get_line_from_file(cmdfile)) != NULL) {
638 /* if a line ends with '\' it needs the next line appended to it */
639 while (((e = last_char_is(line, '\n')) != NULL)
640 && (e > line) && (e[-1] == '\\')
641 && ((nextline = bb_get_line_from_file(cmdfile)) != NULL)) {
642 line = xrealloc(line, (e - line) + 1 + strlen(nextline) + 1);
643 strcat(line, nextline);
644 free(nextline);
645 }
646 /* eat trailing newline (if any) --if I don't do this, edit commands
647 * (aic) will print an extra newline */
648 chomp(line);
649 add_cmd_str(line);
650 free(line);
651 } 545 }
652} 546}
653 547
654struct pipeline { 548struct pipeline {
655 char *buf; 549 char *buf; /* Space to hold string */
656 int idx; 550 int idx; /* Space used */
657 int len; 551 int len; /* Space allocated */
658}; 552} pipeline;
659 553
660#define PIPE_MAGIC 0x7f
661#define PIPE_GROW 64 554#define PIPE_GROW 64
662 555
663void pipe_putc(struct pipeline *const pipeline, char c) 556void pipe_putc(char c)
664{ 557{
665 if (pipeline->buf[pipeline->idx] == PIPE_MAGIC) { 558 if(pipeline.idx==pipeline.len) {
666 pipeline->buf = xrealloc(pipeline->buf, pipeline->len + PIPE_GROW); 559 pipeline.buf = xrealloc(pipeline.buf, pipeline.len + PIPE_GROW);
667 memset(pipeline->buf + pipeline->len, 0, PIPE_GROW); 560 pipeline.len+=PIPE_GROW;
668 pipeline->len += PIPE_GROW;
669 pipeline->buf[pipeline->len - 1] = PIPE_MAGIC;
670 } 561 }
671 pipeline->buf[pipeline->idx++] = (c); 562 pipeline.buf[pipeline.idx++] = (c);
672} 563}
673 564
674#define pipeputc(c) pipe_putc(pipeline, c) 565static void do_subst_w_backrefs(const char *line, const char *replace)
675
676static void print_subst_w_backrefs(const char *line, const char *replace,
677 regmatch_t * regmatch, struct pipeline *const pipeline, int matches)
678{ 566{
679 int i; 567 int i,j;
680 568
681 /* go through the replacement string */ 569 /* go through the replacement string */
682 for (i = 0; replace[i]; i++) { 570 for (i = 0; replace[i]; i++) {
683 /* if we find a backreference (\1, \2, etc.) print the backref'ed * text */ 571 /* if we find a backreference (\1, \2, etc.) print the backref'ed * text */
684 if (replace[i] == '\\' && isdigit(replace[i + 1])) { 572 if (replace[i] == '\\' && replace[i+1]>0 && replace[i+1]<=9) {
685 int j; 573 int backref=replace[++i]-'0';
686 char tmpstr[2]; 574
687 int backref;
688
689 ++i; /* i now indexes the backref number, instead of the leading slash */
690 tmpstr[0] = replace[i];
691 tmpstr[1] = 0;
692 backref = atoi(tmpstr);
693 /* print out the text held in regmatch[backref] */ 575 /* print out the text held in regmatch[backref] */
694 if (backref <= matches && regmatch[backref].rm_so != -1) 576 if(regmatch[backref].rm_so != -1)
695 for (j = regmatch[backref].rm_so; j < regmatch[backref].rm_eo; 577 for (j = regmatch[backref].rm_so; j < regmatch[backref].rm_eo; j++)
696 j++) 578 pipe_putc(line[j]);
697 pipeputc(line[j]);
698 } 579 }
699 580
700 /* if we find a backslash escaped character, print the character */ 581 /* if we find a backslash escaped character, print the character */
701 else if (replace[i] == '\\') { 582 else if (replace[i] == '\\') pipe_putc(replace[++i]);
702 ++i;
703 pipeputc(replace[i]);
704 }
705
706 /* if we find an unescaped '&' print out the whole matched text.
707 * fortunately, regmatch[0] contains the indicies to the whole matched
708 * expression (kinda seems like it was designed for just such a
709 * purpose...) */
710 else if (replace[i] == '&') {
711 int j;
712 583
584 /* if we find an unescaped '&' print out the whole matched text. */
585 else if (replace[i] == '&')
713 for (j = regmatch[0].rm_so; j < regmatch[0].rm_eo; j++) 586 for (j = regmatch[0].rm_so; j < regmatch[0].rm_eo; j++)
714 pipeputc(line[j]); 587 pipe_putc(line[j]);
715 } 588 /* Otherwise just output the character. */
716 /* nothing special, just print this char of the replacement string to stdout */ 589 else pipe_putc(replace[i]);
717 else
718 pipeputc(replace[i]);
719 } 590 }
720} 591}
721 592
722static int do_subst_command(sed_cmd_t * sed_cmd, char **line) 593static int do_subst_command(sed_cmd_t * sed_cmd, char **line)
723{ 594{
724 char *hackline = *line; 595 char *oldline = *line;
725 struct pipeline thepipe = { NULL, 0, 0 };
726 struct pipeline *const pipeline = &thepipe;
727 int altered = 0; 596 int altered = 0;
728 int result; 597 int match_count=0;
729 regmatch_t *regmatch = NULL;
730 regex_t *current_regex; 598 regex_t *current_regex;
731 599
600 /* Handle empty regex. */
732 if (sed_cmd->sub_match == NULL) { 601 if (sed_cmd->sub_match == NULL) {
733 current_regex = previous_regex_ptr; 602 current_regex = previous_regex_ptr;
734 } else { 603 if(!current_regex)
735 previous_regex_ptr = current_regex = sed_cmd->sub_match; 604 bb_error_msg_and_die("No previous regexp.");
736 } 605 } else previous_regex_ptr = current_regex = sed_cmd->sub_match;
737 result = regexec(current_regex, hackline, 0, NULL, 0);
738 606
739 /* we only proceed if the substitution 'search' expression matches */ 607 /* Find the first match */
740 if (result == REG_NOMATCH) { 608 if(REG_NOMATCH==regexec(current_regex, oldline, 10, regmatch, 0))
741 return 0; 609 return 0;
742 }
743 610
744 /* whaddaya know, it matched. get the number of back references */ 611 /* Initialize temporary output buffer. */
745 regmatch = xmalloc(sizeof(regmatch_t) * (sed_cmd->num_backrefs + 1)); 612 pipeline.buf=xmalloc(PIPE_GROW);
746 613 pipeline.len=PIPE_GROW;
747 /* allocate more PIPE_GROW bytes 614 pipeline.idx=0;
748 if replaced string is larger than original */ 615
749 thepipe.len = strlen(hackline) + PIPE_GROW; 616 /* Now loop through, substituting for matches */
750 thepipe.buf = xcalloc(1, thepipe.len); 617 do {
751 /* buffer magic */
752 thepipe.buf[thepipe.len - 1] = PIPE_MAGIC;
753
754 /* and now, as long as we've got a line to try matching and if we can match
755 * the search string, we make substitutions */
756 while ((*hackline || !altered)
757 && (regexec(current_regex, hackline, sed_cmd->num_backrefs + 1,
758 regmatch, 0) != REG_NOMATCH)) {
759 int i; 618 int i;
760 619
620 match_count++;
621
622 /* If we aren't interested in this match, output old line to
623 end of match and continue */
624 if(sed_cmd->which_match && sed_cmd->which_match!=match_count) {
625 for(i=0;i<regmatch[0].rm_eo;i++)
626 pipe_putc(oldline[i]);
627 continue;
628 }
629
761 /* print everything before the match */ 630 /* print everything before the match */
762 for (i = 0; i < regmatch[0].rm_so; i++) 631 for (i = 0; i < regmatch[0].rm_so; i++) pipe_putc(oldline[i]);
763 pipeputc(hackline[i]);
764 632
765 /* then print the substitution string */ 633 /* then print the substitution string */
766 print_subst_w_backrefs(hackline, sed_cmd->replace, regmatch, pipeline, 634 do_subst_w_backrefs(oldline, sed_cmd->string);
767 sed_cmd->num_backrefs);
768 635
769 /* advance past the match */ 636 /* advance past the match */
770 hackline += regmatch[0].rm_eo; 637 oldline += regmatch[0].rm_eo;
771 /* flag that something has changed */ 638 /* flag that something has changed */
772 altered++; 639 altered++;
773 640
774 /* if we're not doing this globally, get out now */ 641 /* if we're not doing this globally, get out now */
775 if (!sed_cmd->sub_g) { 642 if (sed_cmd->which_match) break;
776 break; 643 } while (*oldline && (regexec(current_regex, oldline, 10, regmatch, 0) != REG_NOMATCH));
777 }
778 }
779 for (; *hackline; hackline++)
780 pipeputc(*hackline);
781 if (thepipe.buf[thepipe.idx] == PIPE_MAGIC)
782 thepipe.buf[thepipe.idx] = 0;
783 644
784 /* cleanup */ 645 /* Copy rest of string into output pipeline */
785 free(regmatch); 646
647 while(*oldline) pipe_putc(*(oldline++));
648 pipe_putc(0);
786 649
787 free(*line); 650 free(*line);
788 *line = thepipe.buf; 651 *line = pipeline.buf;
789 return altered; 652 return altered;
790} 653}
791 654
655/* Set command pointer to point to this label. (Does not handle null label.) */
792static sed_cmd_t *branch_to(const char *label) 656static sed_cmd_t *branch_to(const char *label)
793{ 657{
794 sed_cmd_t *sed_cmd; 658 sed_cmd_t *sed_cmd;
795 659
796 for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { 660 for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
797 if ((sed_cmd->cmd == ':') && (sed_cmd->label) && (strcmp(sed_cmd->label, label) == 0)) { 661 if ((sed_cmd->cmd == ':') && (sed_cmd->string) && (strcmp(sed_cmd->string, label) == 0)) {
798 return (sed_cmd); 662 return (sed_cmd);
799 } 663 }
800 } 664 }
801 bb_error_msg_and_die("Can't find label for jump to `%s'", label); 665 bb_error_msg_and_die("Can't find label for jump to `%s'", label);
802} 666}
803 667
804static void process_file(FILE * file) 668/* Append copy of string to append buffer */
669static void append(char *s)
805{ 670{
806 char *pattern_space; /* Posix requires it be able to hold at least 8192 bytes */ 671 struct append_list *temp=calloc(1,sizeof(struct append_list));
807 char *hold_space = NULL; /* Posix requires it be able to hold at least 8192 bytes */ 672
808 static int linenum = 0; /* GNU sed does not restart counting lines at EOF */ 673 if(append_head)
809 int altered; 674 append_tail=(append_tail->next=temp);
810 int force_print; 675 else append_head=append_tail=temp;
811 676 temp->string=strdup(s);
812 pattern_space = bb_get_chomped_line_from_file(file); 677}
813 if (pattern_space == NULL) { 678
814 return; 679static void flush_append(void)
680{
681 /* Output appended lines. */
682 while(append_head) {
683 puts(append_head->string);
684 append_tail=append_head->next;
685 free(append_head->string);
686 free(append_head);
687 append_head=append_tail;
688 }
689 append_head=append_tail=NULL;
690}
691
692/* Get next line of input, flushing append buffer and noting if we hit EOF
693 * without a newline on the last line.
694 */
695static char *get_next_line(FILE * file, int *no_newline)
696{
697 char *temp;
698 int len;
699
700 flush_append();
701 temp=bb_get_line_from_file(file);
702 if(temp) {
703 len=strlen(temp);
704 if(len && temp[len-1]=='\n') temp[len-1]=0;
705 else *no_newline=1;
815 } 706 }
816 707
708 return temp;
709}
710
711/* Output line of text. missing_newline means the last line output did not
712 end with a newline. no_newline means this line does not end with a
713 newline. */
714
715static int puts_maybe_newline(char *s, FILE *file, int missing_newline, int no_newline)
716{
717 if(missing_newline) fputc('\n',file);
718 fputs(s,file);
719 if(!no_newline) fputc('\n',file);
720
721 return no_newline;
722}
723
724#define sed_puts(s,n) missing_newline=puts_maybe_newline(s,stdout,missing_newline,n)
725
726static void process_file(FILE * file)
727{
728 char *pattern_space, *next_line, *hold_space=NULL;
729 static int linenum = 0, missing_newline=0;
730 int no_newline,next_no_newline=0;
731
732 next_line = get_next_line(file,&next_no_newline);
733
817 /* go through every line in the file */ 734 /* go through every line in the file */
818 do { 735 for(;;) {
819 char *next_line;
820 sed_cmd_t *sed_cmd; 736 sed_cmd_t *sed_cmd;
821 int substituted = 0; 737 int substituted=0;
822 /* This enables whole blocks of commands to be mask'ed out if the lead address doesnt match */ 738
823 int block_mask = 1; 739 /* Advance to next line. Stop if out of lines. */
740 if(!(pattern_space=next_line)) break;
741 no_newline=next_no_newline;
824 742
825 /* Read one line in advance so we can act on the last line, the '$' address */ 743 /* Read one line in advance so we can act on the last line, the '$' address */
826 next_line = bb_get_chomped_line_from_file(file); 744 next_line = get_next_line(file,&next_no_newline);
827 linenum++; 745 linenum++;
828 altered = 0; 746restart:
829 force_print = 0;
830
831 /* for every line, go through all the commands */ 747 /* for every line, go through all the commands */
832 for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { 748 for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
833 int deleted = 0; 749 int matched;
834 750
835 /* 751 /* Determine if this command matches this line: */
836 * entry point into sedding... 752
837 */ 753 /* Are we continuing a previous multi-line match? */
838 int matched = ( 754
839 /* no range necessary */ 755 sed_cmd->in_match = sed_cmd->in_match
840 (sed_cmd->beg_line == 0 && sed_cmd->end_line == 0 756
841 && sed_cmd->beg_match == NULL 757 /* Or is no range necessary? */
842 && sed_cmd->end_match == NULL) || 758 || (!sed_cmd->beg_line && !sed_cmd->end_line
843 /* this line number is the first address we're looking for */ 759 && !sed_cmd->beg_match && !sed_cmd->end_match)
844 (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum)) || 760
845 /* this line matches our first address regex */ 761 /* Or did we match the start of a numerical range? */
846 (sed_cmd->beg_match 762 || (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum))
847 && (regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 763
848 0) == 0)) || 764 /* Or does this line match our begin address regex? */
849 /* we are currently within the beginning & ending address range */ 765 || (sed_cmd->beg_match &&
850 sed_cmd->still_in_range || ((sed_cmd->beg_line == -1) 766 !regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 0))
851 && (next_line == NULL)) 767
768 /* Or did we match last line of input? */
769 || (sed_cmd->beg_line == -1 && next_line == NULL);
770
771 /* Snapshot the value */
772
773 matched = sed_cmd->in_match;
774
775 /* Is this line the end of the current match? */
776
777 if(matched) {
778 sed_cmd->in_match = !(
779 /* has the ending line come, or is this a single address command? */
780 (sed_cmd->end_line ?
781 sed_cmd->end_line==-1 ?
782 !next_line
783 : sed_cmd->end_line<=linenum
784 : !sed_cmd->end_match)
785 /* or does this line matches our last address regex */
786 || (sed_cmd->end_match && (regexec(sed_cmd->end_match, pattern_space, 0, NULL, 0) == 0))
852 ); 787 );
788 }
789
790 /* Skip blocks of commands we didn't match. */
853 if (sed_cmd->cmd == '{') { 791 if (sed_cmd->cmd == '{') {
854 block_mask = block_mask & matched; 792 if(sed_cmd->invert ? matched : !matched)
793 while(sed_cmd && sed_cmd->cmd!='}') sed_cmd=sed_cmd->next;
794 if(!sed_cmd) bb_error_msg_and_die("Unterminated {");
795 continue;
855 } 796 }
856// matched &= block_mask;
857 797
858 if (sed_cmd->invert ^ (matched & block_mask)) { 798 /* Okay, so did this line match? */
859 /* Update last used regex incase a blank substitute BRE is found */ 799 if (sed_cmd->invert ? !matched : matched) {
800 /* Update last used regex in case a blank substitute BRE is found */
860 if (sed_cmd->beg_match) { 801 if (sed_cmd->beg_match) {
861 previous_regex_ptr = sed_cmd->beg_match; 802 previous_regex_ptr = sed_cmd->beg_match;
862 } 803 }
863 804
864 /* 805 /* actual sedding */
865 * actual sedding
866 */
867 switch (sed_cmd->cmd) { 806 switch (sed_cmd->cmd) {
868 case '=': 807
869 printf("%d\n", linenum); 808 /* Print line number */
870 break; 809 case '=':
871 case 'P':{ 810 printf("%d\n", linenum);
872 /* Write the current pattern space upto the first newline */
873 char *tmp = strchr(pattern_space, '\n');
874
875 if (tmp) {
876 *tmp = '\0';
877 puts(pattern_space);
878 *tmp = '\n';
879 break; 811 break;
880 } 812
881 /* Fall Through */ 813 /* Write the current pattern space up to the first newline */
882 } 814 case 'P':
883 case 'p': /* Write the current pattern space to output */
884 puts(pattern_space);
885 break;
886 case 'd':
887 altered++;
888 deleted = 1;
889 force_print = 0;
890 break;
891
892 case 's':
893
894 /*
895 * Some special cases for 's' printing to make it compliant with
896 * GNU sed printing behavior (aka "The -n | s///p Matrix"):
897 *
898 * -n ONLY = never print anything regardless of any successful
899 * substitution
900 *
901 * s///p ONLY = always print successful substitutions, even if
902 * the pattern_space is going to be printed anyway (pattern_space
903 * will be printed twice).
904 *
905 * -n AND s///p = print ONLY a successful substitution ONE TIME;
906 * no other lines are printed - this is the reason why the 'p'
907 * flag exists in the first place.
908 */
909
910#ifdef CONFIG_FEATURE_SED_EMBEDED_NEWLINE
911 /* HACK: escape newlines twice so regex can match them */
912 {
913 int offset = 0;
914 char *tmp = strchr(pattern_space + offset, '\n');
915 while ((tmp = strchr(pattern_space + offset, '\n')) != NULL) {
916 offset = tmp - pattern_space;
917 pattern_space = xrealloc(pattern_space, strlen(pattern_space) + 2);
918 tmp = pattern_space + offset;
919 memmove(tmp + 1, tmp, strlen(tmp) + 1);
920 tmp[0] = '\\';
921 tmp[1] = 'n';
922 offset += 2;
923 }
924 }
925#endif
926 /* we print the pattern_space once, unless we were told to be quiet */
927 substituted |= do_subst_command(sed_cmd, &pattern_space);
928#ifdef CONFIG_FEATURE_SED_EMBEDED_NEWLINE
929 /* undo HACK: escape newlines twice so regex can match them */
930 { 815 {
931 char *tmp = pattern_space; 816 char *tmp = strchr(pattern_space, '\n');
932 817
933 while ((tmp = strstr(tmp, "\\n")) != NULL) { 818 if (tmp) {
934 memmove(tmp, tmp + 1, strlen(tmp + 1) + 1); 819 *tmp = '\0';
935 tmp[0] = '\n'; 820 sed_puts(pattern_space,1);
821 *tmp = '\n';
822 break;
936 } 823 }
824 /* Fall Through */
937 } 825 }
938#endif 826
939 if (!be_quiet && substituted && ((sed_cmd->next == NULL) 827 /* Write the current pattern space to output */
940 || (sed_cmd->next->cmd != 's'))) { 828 case 'p':
941 force_print = 1; 829 sed_puts(pattern_space,no_newline);
942 } 830 break;
943 /* we also print the line if we were given the 'p' flag 831 /* Delete up through first newline */
944 * (this is quite possibly the second printing) */ 832 case 'D':
945 if ((sed_cmd->sub_p) && (altered || substituted)) { 833 {
946 puts(pattern_space); 834 char *tmp = strchr(pattern_space,'\n');
835
836 if(tmp) {
837 tmp=bb_xstrdup(tmp+1);
838 free(pattern_space);
839 pattern_space=tmp;
840 goto restart;
841 }
947 } 842 }
948 break; 843 /* discard this line. */
949 case 'a': 844 case 'd':
950 puts(pattern_space); 845 goto discard_line;
951 fputs(sed_cmd->editline, stdout); 846
952 altered++; 847 /* Substitute with regex */
953 break; 848 case 's':
954 849 if(do_subst_command(sed_cmd, &pattern_space)) {
955 case 'i': 850 substituted|=1;
956 fputs(sed_cmd->editline, stdout); 851
957 break; 852 /* handle p option */
958 853 if(sed_cmd->sub_p)
959 case 'c': 854 sed_puts(pattern_space,no_newline);
960 /* single-address case */ 855 /* handle w option */
961 if ((sed_cmd->end_match == NULL && sed_cmd->end_line == 0) 856 if(sed_cmd->file)
962 /* multi-address case */ 857 sed_cmd->no_newline=puts_maybe_newline(pattern_space, sed_cmd->file, sed_cmd->no_newline, no_newline);
963 /* - matching text */ 858
964 || (sed_cmd->end_match 859 }
965 && (regexec(sed_cmd->end_match, pattern_space, 0, 860 break;
966 NULL, 0) == 0)) 861
967 /* - matching line numbers */ 862 /* Append line to linked list to be printed later */
968 || (sed_cmd->end_line > 0 863 case 'a':
969 && sed_cmd->end_line == linenum)) { 864 {
970 fputs(sed_cmd->editline, stdout); 865 append(sed_cmd->string);
866 break;
971 } 867 }
972 altered++;
973 868
974 break; 869 /* Insert text before this line */
870 case 'i':
871 sed_puts(sed_cmd->string,1);
872 break;
873
874 /* Cut and paste text (replace) */
875 case 'c':
876 /* Only triggers on last line of a matching range. */
877 if (!sed_cmd->in_match) sed_puts(sed_cmd->string,1);
878 goto discard_line;
975 879
976 case 'r':{ 880 /* Read file, append contents to output */
977 FILE *outfile; 881 case 'r':
882 {
883 FILE *outfile;
978 884
979 outfile = fopen(sed_cmd->filename, "r"); 885 outfile = fopen(sed_cmd->string, "r");
980 if (outfile) { 886 if (outfile) {
981 char *line; 887 char *line;
982 888
983 while ((line = 889 while ((line = bb_get_chomped_line_from_file(outfile))
984 bb_get_chomped_line_from_file(outfile)) != 890 != NULL)
985 NULL) { 891 append(line);
986 pattern_space = 892 bb_xprint_and_close_file(outfile);
987 xrealloc(pattern_space,
988 strlen(line) + strlen(pattern_space) + 2);
989 strcat(pattern_space, "\n");
990 strcat(pattern_space, line);
991 } 893 }
992 bb_xprint_and_close_file(outfile);
993 }
994 894
995 }
996 break;
997 case 'q': /* Branch to end of script and quit */
998 deleted = 1;
999 /* Exit the outer while loop */
1000 free(next_line);
1001 next_line = NULL;
1002 break;
1003 case 'n': /* Read next line from input */
1004 if (!be_quiet) {
1005 puts(pattern_space);
1006 }
1007 if (next_line) {
1008 free(pattern_space);
1009 pattern_space = next_line;
1010 next_line = bb_get_chomped_line_from_file(file);
1011 linenum++;
1012 } else {
1013 /* Jump to end of script and exit */
1014 deleted = 1;
1015 next_line = NULL;
1016 }
1017 break;
1018 case 'N': /* Append the next line to the current line */
1019 if (next_line == NULL) {
1020 /* Jump to end of script and exit */
1021 deleted = 1;
1022#ifdef CONFIG_FEATURE_SED_GNU_COMPATABILITY
1023 /* GNU sed will add the newline character
1024 * The GNU sed info page labels this as a bug that wont be fixed
1025 */
1026 next_line = calloc(1,1);
1027#else
1028 next_line = NULL;
1029 break; 895 break;
1030#endif
1031 } 896 }
1032 pattern_space = realloc(pattern_space, strlen(pattern_space) + strlen(next_line) + 2); 897
1033 strcat(pattern_space, "\n"); 898 /* Write pattern space to file. */
1034 strcat(pattern_space, next_line); 899 case 'w':
1035 next_line = bb_get_chomped_line_from_file(file); 900 sed_cmd->no_newline=puts_maybe_newline(pattern_space,sed_cmd->file, sed_cmd->no_newline,no_newline);
1036 linenum++; 901 break;
1037 break; 902
1038 case 't': 903 /* Read next line from input */
1039 if (substituted) 904 case 'n':
1040 /* Fall through */ 905 if (!be_quiet)
1041 case 'b': 906 sed_puts(pattern_space,no_newline);
907 if (next_line) {
908 free(pattern_space);
909 pattern_space = next_line;
910 no_newline=next_no_newline;
911 next_line = get_next_line(file,&next_no_newline);
912 linenum++;
913 break;
914 }
915 /* fall through */
916
917 /* Quit. End of script, end of input. */
918 case 'q':
919 /* Exit the outer while loop */
920 free(next_line);
921 next_line = NULL;
922 goto discard_commands;
923
924 /* Append the next line to the current line */
925 case 'N':
1042 { 926 {
1043 if (sed_cmd->label == NULL) { 927 /* If no next line, jump to end of script and exit. */
1044 /* Jump to end of script */ 928 if (next_line == NULL) {
1045 deleted = 1; 929 /* Jump to end of script and exit */
930 free(next_line);
931 next_line = NULL;
932 goto discard_line;
933 /* append next_line, read new next_line. */
1046 } else { 934 } else {
1047 sed_cmd = branch_to(sed_cmd->label); 935 int len=strlen(pattern_space);
936
937 pattern_space = realloc(pattern_space, len + strlen(next_line) + 2);
938 pattern_space[len]='\n';
939 strcpy(pattern_space+len+1, next_line);
940 no_newline=next_no_newline;
941 next_line = get_next_line(file,&next_no_newline);
942 linenum++;
1048 } 943 }
1049 /* Reset the substitution flag */ 944 break;
1050 substituted = 0;
1051 } 945 }
1052 break;
1053 case 'y':{
1054 int i;
1055 946
1056 for (i = 0; pattern_space[i] != 0; i++) { 947 /* Test if substition worked, branch if so. */
1057 int j; 948 case 't':
949 if (!substituted) break;
950 substituted=0;
951 /* Fall through */
952 /* Branch to label */
953 case 'b':
954 if (!sed_cmd->string) goto discard_commands;
955 else sed_cmd = branch_to(sed_cmd->string);
956 break;
957 /* Transliterate characters */
958 case 'y':
959 {
960 int i;
961
962 for (i = 0; pattern_space[i]; i++) {
963 int j;
1058 964
1059 for (j = 0; sed_cmd->translate[j]; j += 2) { 965 for (j = 0; sed_cmd->string[j]; j += 2) {
1060 if (pattern_space[i] == sed_cmd->translate[j]) { 966 if (pattern_space[i] == sed_cmd->string[j]) {
1061 pattern_space[i] = sed_cmd->translate[j + 1]; 967 pattern_space[i] = sed_cmd->string[j + 1];
968 }
1062 } 969 }
1063 } 970 }
1064 }
1065 }
1066 break;
1067 case 'g': /* Replace pattern space with hold space */
1068 free(pattern_space);
1069 if (hold_space) {
1070 pattern_space = strdup(hold_space);
1071 }
1072 break;
1073 case 'G': { /* Append newline and hold space to pattern space */
1074 int pattern_space_size = 2;
1075 int hold_space_size = 0;
1076 971
1077 if (pattern_space) { 972 break;
1078 pattern_space_size += strlen(pattern_space);
1079 }
1080 if (hold_space) {
1081 hold_space_size = strlen(hold_space);
1082 } 973 }
1083 pattern_space = xrealloc(pattern_space, pattern_space_size + hold_space_size); 974 case 'g': /* Replace pattern space with hold space */
1084 if (pattern_space_size == 2) { 975 free(pattern_space);
1085 strcpy(pattern_space, "\n"); 976 if (hold_space) {
1086 } else { 977 pattern_space = strdup(hold_space);
978 no_newline=0;
979 }
980 break;
981 case 'G': /* Append newline and hold space to pattern space */
982 {
983 int pattern_space_size = 2;
984 int hold_space_size = 0;
985
986 if (pattern_space)
987 pattern_space_size += strlen(pattern_space);
988 if (hold_space) hold_space_size = strlen(hold_space);
989 pattern_space = xrealloc(pattern_space, pattern_space_size + hold_space_size);
990 if (pattern_space_size == 2) pattern_space[0]=0;
1087 strcat(pattern_space, "\n"); 991 strcat(pattern_space, "\n");
992 if (hold_space) strcat(pattern_space, hold_space);
993 no_newline=0;
994
995 break;
1088 } 996 }
1089 if (hold_space) { 997 case 'h': /* Replace hold space with pattern space */
1090 strcat(pattern_space, hold_space); 998 free(hold_space);
1091 } 999 hold_space = strdup(pattern_space);
1092 break; 1000 break;
1093 } 1001 case 'H': /* Append newline and pattern space to hold space */
1094 case 'h': /* Replace hold space with pattern space */ 1002 {
1095 free(hold_space); 1003 int hold_space_size = 2;
1096 hold_space = strdup(pattern_space); 1004 int pattern_space_size = 0;
1097 break;
1098 case 'H': { /* Append newline and pattern space to hold space */
1099 int hold_space_size = 2;
1100 int pattern_space_size = 0;
1101
1102 if (hold_space) {
1103 hold_space_size += strlen(hold_space);
1104 }
1105 if (pattern_space) {
1106 pattern_space_size = strlen(pattern_space);
1107 }
1108 hold_space = xrealloc(hold_space, hold_space_size + pattern_space_size);
1109 1005
1110 if (hold_space_size == 2) { 1006 if (hold_space) hold_space_size += strlen(hold_space);
1111 strcpy(hold_space, "\n"); 1007 if (pattern_space)
1112 } else { 1008 pattern_space_size = strlen(pattern_space);
1009 hold_space = xrealloc(hold_space,
1010 hold_space_size + pattern_space_size);
1011
1012 if (hold_space_size == 2) hold_space[0]=0;
1113 strcat(hold_space, "\n"); 1013 strcat(hold_space, "\n");
1014 if (pattern_space) strcat(hold_space, pattern_space);
1015
1016 break;
1114 } 1017 }
1115 if (pattern_space) { 1018 case 'x': /* Exchange hold and pattern space */
1116 strcat(hold_space, pattern_space); 1019 {
1020 char *tmp = pattern_space;
1021 pattern_space = hold_space;
1022 no_newline=0;
1023 hold_space = tmp;
1024 break;
1117 } 1025 }
1118 break;
1119 }
1120 case 'x':{
1121 /* Swap hold and pattern space */
1122 char *tmp = pattern_space;
1123 pattern_space = hold_space;
1124 hold_space = tmp;
1125 break;
1126 }
1127 } 1026 }
1128 } 1027 }
1129
1130 /*
1131 * exit point from sedding...
1132 */
1133 if (matched) {
1134 if (
1135 /* this is a single-address command or... */
1136 (sed_cmd->end_line == 0 && sed_cmd->end_match == NULL)
1137 /* If only one address */
1138 /* we were in the middle of our address range (this
1139 * isn't the first time through) and.. */
1140 || ((sed_cmd->still_in_range == 1)
1141 /* this line number is the last address we're looking for or... */
1142 && ((sed_cmd->end_line > 0
1143 && (sed_cmd->end_line == linenum))
1144 /* this line matches our last address regex */
1145 || (sed_cmd->end_match
1146 && (regexec(sed_cmd->end_match, pattern_space,
1147 0, NULL, 0) == 0))))) {
1148 /* we're out of our address range */
1149 sed_cmd->still_in_range = 0;
1150 } else {
1151 /* didn't hit the exit? then we're still in the middle of an address range */
1152 sed_cmd->still_in_range = 1;
1153 }
1154 }
1155
1156 if (sed_cmd->cmd == '}') {
1157 block_mask = 1;
1158 }
1159
1160 if (deleted)
1161 break;
1162
1163 } 1028 }
1164 1029
1165 /* we will print the line unless we were told to be quiet or if the 1030 /*
1166 * line was altered (via a 'd'elete or 's'ubstitution), in which case 1031 * exit point from sedding...
1167 * the altered line was already printed */ 1032 */
1168 if ((!be_quiet && !altered && !substituted) || force_print) { 1033discard_commands:
1169 puts(pattern_space); 1034 /* we will print the line unless we were told to be quiet ('-n')
1170 } 1035 or if the line was suppressed (ala 'd'elete) */
1036 if (!be_quiet) sed_puts(pattern_space,no_newline);
1037
1038 /* Delete and such jump here. */
1039discard_line:
1040 flush_append();
1171 free(pattern_space); 1041 free(pattern_space);
1172 pattern_space = next_line; 1042 }
1173 } while (pattern_space);
1174} 1043}
1175 1044
1176extern int sed_main(int argc, char **argv) 1045extern int sed_main(int argc, char **argv)
@@ -1179,7 +1048,7 @@ extern int sed_main(int argc, char **argv)
1179 1048
1180#ifdef CONFIG_FEATURE_CLEAN_UP 1049#ifdef CONFIG_FEATURE_CLEAN_UP
1181 /* destroy command strings on exit */ 1050 /* destroy command strings on exit */
1182 if (atexit(destroy_cmd_strs) == -1) 1051 if (atexit(free_and_close_stuff) == -1)
1183 bb_perror_msg_and_die("atexit"); 1052 bb_perror_msg_and_die("atexit");
1184#endif 1053#endif
1185 1054
@@ -1189,17 +1058,46 @@ extern int sed_main(int argc, char **argv)
1189 case 'n': 1058 case 'n':
1190 be_quiet++; 1059 be_quiet++;
1191 break; 1060 break;
1192 case 'e':{ 1061 case 'e':
1193 add_cmd_str(optarg); 1062 {
1063 int go=1;
1064 char *temp=bb_xstrdup(optarg),*temp2=temp;
1065
1066 /* It is possible to have a command line argument with embedded
1067 newlines. This counts as a multi-line argument. */
1068
1069 while(go) {
1070 int len=strcspn(temp2,"\n");
1071 if(!temp2[len]) go=0;
1072 else temp2[len]=0;
1073 add_cmd(temp2);
1074 temp2+=len+1;
1075 }
1076 free(temp);
1194 break; 1077 break;
1195 } 1078 }
1196 case 'f': 1079 case 'f':
1197 load_cmd_file(optarg); 1080 {
1081 FILE *cmdfile;
1082 char *line;
1083
1084 cmdfile = bb_xfopen(optarg, "r");
1085
1086 while ((line = bb_get_chomped_line_from_file(cmdfile))
1087 != NULL) {
1088 add_cmd(line);
1089 free(line);
1090 }
1091 bb_xprint_and_close_file(cmdfile);
1092
1198 break; 1093 break;
1094 }
1199 default: 1095 default:
1200 bb_show_usage(); 1096 bb_show_usage();
1201 } 1097 }
1202 } 1098 }
1099 /* Flush any unfinished commands. */
1100 add_cmd("");
1203 1101
1204 /* if we didn't get a pattern from a -e and no command file was specified, 1102 /* if we didn't get a pattern from a -e and no command file was specified,
1205 * argv[optind] should be the pattern. no pattern, no worky */ 1103 * argv[optind] should be the pattern. no pattern, no worky */
@@ -1207,7 +1105,7 @@ extern int sed_main(int argc, char **argv)
1207 if (argv[optind] == NULL) 1105 if (argv[optind] == NULL)
1208 bb_show_usage(); 1106 bb_show_usage();
1209 else 1107 else
1210 add_cmd_str(argv[optind++]); 1108 add_cmd(argv[optind++]);
1211 } 1109 }
1212 1110
1213 /* argv[(optind)..(argc-1)] should be names of file to process. If no 1111 /* argv[(optind)..(argc-1)] should be names of file to process. If no