aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGlenn L McGrath <bug1@ihug.co.nz>2003-10-01 03:06:16 +0000
committerGlenn L McGrath <bug1@ihug.co.nz>2003-10-01 03:06:16 +0000
commitaa5a602689265a4351c890efe5d8e7793e777e3c (patch)
treee253699e7d4abbccc47e387e09ef1c2c6d4716c4
parente6ba16f830bf2524b39e3e1b7c8157808921f95d (diff)
downloadbusybox-w32-aa5a602689265a4351c890efe5d8e7793e777e3c.tar.gz
busybox-w32-aa5a602689265a4351c890efe5d8e7793e777e3c.tar.bz2
busybox-w32-aa5a602689265a4351c890efe5d8e7793e777e3c.zip
Patch by Rob Landley, work in progress update, fixes lots of bugs,
introduces a few others (but they are being worked on)
-rw-r--r--editors/Config.in22
-rw-r--r--editors/sed.c1458
-rw-r--r--include/libbb.h3
3 files changed, 680 insertions, 803 deletions
diff --git a/editors/Config.in b/editors/Config.in
index bced12cb1..b491c2416 100644
--- a/editors/Config.in
+++ b/editors/Config.in
@@ -33,28 +33,6 @@ config CONFIG_SED
33 sed is used to perform text transformations on a file 33 sed is used to perform text transformations on a file
34 or input from a pipeline. 34 or input from a pipeline.
35 35
36config CONFIG_FEATURE_SED_EMBEDED_NEWLINE
37 bool " Embeded newline (EXPERIMENTAL)"
38 default n
39 depends on CONFIG_SED
40 help
41 This is a hack to allow matching of '\n' in regular expressions.
42 It works by translating '\n' to "\n" and back.
43 It may introduce unexpected results if you use "\n" in your text.
44
45config CONFIG_FEATURE_SED_GNU_COMPATABILITY
46 bool " Behave consistent with GNU sed"
47 default y
48 depends on CONFIG_SED
49 help
50 Where GNU sed doesnt follow the posix standard, do as GNU sed does.
51 Current difference are in
52 - N command with odd number of lines (see GNU sed info page)
53 - Blanks before substitution flags eg.
54 GNU sed interprets 's/a/b/ g' as 's/a/b/g'
55 Standard says 's/a/b/ g' should be 's/a/b/;g'
56 - GNU sed allows blanks between a '!' and the function.
57
58config CONFIG_VI 36config CONFIG_VI
59 bool "vi" 37 bool "vi"
60 default n 38 default n
diff --git a/editors/sed.c b/editors/sed.c
index 1c016ac57..6452a321c 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -1,3 +1,4 @@
1/* vi: set sw=4 ts=4: */
1/* 2/*
2 * sed.c - very minimalist version of sed 3 * sed.c - very minimalist version of sed
3 * 4 *
@@ -22,6 +23,24 @@
22 * 23 *
23 */ 24 */
24 25
26/* Code overview.
27
28 Files are laid out to avoid unnecessary function declarations. So for
29 example, every function add_cmd calls occurs before add_cmd in this file.
30
31 add_cmd() is called on each line of sed command text (from a file or from
32 the command line). It calls get_address() and parse_cmd_args(). The
33 resulting sed_cmd_t structures are appended to a linked list
34 (sed_cmd_head/sed_cmd_tail).
35
36 process_file() does actual sedding, reading data lines from an input FILE *
37 (which could be stdin) and applying the sed command list (sed_cmd_head) to
38 each of the resulting lines.
39
40 sed_main() is where external code calls into this, with a command line.
41*/
42
43
25/* 44/*
26 Supported features and commands in this version of sed: 45 Supported features and commands in this version of sed:
27 46
@@ -64,84 +83,72 @@
64#include "busybox.h" 83#include "busybox.h"
65 84
66typedef struct sed_cmd_s { 85typedef struct sed_cmd_s {
67 /* Order by alignment requirements */ 86 /* Ordered by alignment requirements: currently 36 bytes on x86 */
68
69 /* address storage */
70 regex_t *beg_match; /* sed -e '/match/cmd' */
71 regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */
72
73 int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */
74 int end_line; /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */
75 87
76 /* inversion flag */ 88 /* address storage */
77 int invert; /* the '!' after the address */ 89 regex_t *beg_match; /* sed -e '/match/cmd' */
90 regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */
91 regex_t *sub_match; /* For 's/sub_match/string/' */
92 int beg_line; /* 'sed 1p' 0 == apply commands to all lines */
93 int end_line; /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */
78 94
79 /* Runtime flag no not if the current command match's */ 95 FILE *file; /* File (sr) command writes to, -1 for none. */
80 int still_in_range; 96 char *string; /* Data string for (saicytb) commands. */
81 97
82 /* SUBSTITUTION COMMAND SPECIFIC FIELDS */ 98 unsigned short which_match; /* (s) Which match to replace (0 for all) */
83 99
84 /* sed -e 's/sub_match/replace/' */ 100 /* Bitfields (gcc won't group them if we don't) */
85 regex_t *sub_match; 101 unsigned int invert:1; /* the '!' after the address */
86 char *replace; 102 unsigned int in_match:1; /* Next line also included in match? */
103 unsigned int no_newline:1; /* Last line written by (sr) had no '\n' */
104 unsigned int sub_p:1; /* (s) print option */
87 105
88 /* EDIT COMMAND (a,i,c) SPECIFIC FIELDS */
89 char *editline;
90
91 /* FILE COMMAND (r) SPECIFIC FIELDS */
92 char *filename;
93
94 /* SUBSTITUTION COMMAND SPECIFIC FIELDS */
95
96 unsigned int num_backrefs:4; /* how many back references (\1..\9) */
97 /* Note: GNU/POSIX sed does not save more than nine backrefs, so
98 * we only use 4 bits to hold the number */
99 unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */
100 unsigned int sub_p:1; /* sed -e 's/foo/bar/p' (print substitution) */
101
102 /* TRANSLATE COMMAND */
103 char *translate;
104
105 /* GENERAL FIELDS */
106 /* the command */
107 char cmd; /* p,d,s (add more at your leisure :-) */
108
109 /* Branch commands */
110 char *label;
111
112 /* next command in list (sequential list of specified commands) */
113 struct sed_cmd_s *next;
114 106
107 /* GENERAL FIELDS */
108 char cmd; /* The command char: abcdDgGhHilnNpPqrstwxy:={} */
109 struct sed_cmd_s *next; /* Next command (linked list, NULL terminated) */
115} sed_cmd_t; 110} sed_cmd_t;
116 111
117
118/* externs */
119extern void xregcomp(regex_t * preg, const char *regex, int cflags);
120extern int optind; /* in unistd.h */
121extern char *optarg; /* ditto */
122
123/* globals */ 112/* globals */
124/* options */ 113/* options */
125static int be_quiet = 0; 114static int be_quiet = 0;
115
126static const char bad_format_in_subst[] = 116static const char bad_format_in_subst[] =
127 "bad format in substitution expression"; 117 "bad format in substitution expression";
118const char *const semicolon_whitespace = "; \n\r\t\v";
119
120regmatch_t regmatch[10];
121static regex_t *previous_regex_ptr = NULL;
128 122
129/* linked list of sed commands */ 123/* linked list of sed commands */
130static sed_cmd_t sed_cmd_head; 124static sed_cmd_t sed_cmd_head;
131static sed_cmd_t *sed_cmd_tail = &sed_cmd_head; 125static sed_cmd_t *sed_cmd_tail = &sed_cmd_head;
132 126
133const char *const semicolon_whitespace = "; \n\r\t\v\0"; 127/* Linked list of append lines */
134static regex_t *previous_regex_ptr = NULL; 128struct append_list {
135 129 char *string;
130 struct append_list *next;
131};
132struct append_list *append_head=NULL, *append_tail=NULL;
136 133
137#ifdef CONFIG_FEATURE_CLEAN_UP 134#ifdef CONFIG_FEATURE_CLEAN_UP
138static void destroy_cmd_strs(void) 135static void free_and_close_stuff(void)
139{ 136{
140 sed_cmd_t *sed_cmd = sed_cmd_head.next; 137 sed_cmd_t *sed_cmd = sed_cmd_head.next;
141 138
139 while(append_head) {
140 append_tail=append_head->next;
141 free(append_head->string);
142 free(append_head);
143 append_head=append_tail;
144 }
145
142 while (sed_cmd) { 146 while (sed_cmd) {
143 sed_cmd_t *sed_cmd_next = sed_cmd->next; 147 sed_cmd_t *sed_cmd_next = sed_cmd->next;
144 148
149 if(sed_cmd->file)
150 bb_xprint_and_close_file(sed_cmd->file);
151
145 if (sed_cmd->beg_match) { 152 if (sed_cmd->beg_match) {
146 regfree(sed_cmd->beg_match); 153 regfree(sed_cmd->beg_match);
147 free(sed_cmd->beg_match); 154 free(sed_cmd->beg_match);
@@ -154,17 +161,41 @@ static void destroy_cmd_strs(void)
154 regfree(sed_cmd->sub_match); 161 regfree(sed_cmd->sub_match);
155 free(sed_cmd->sub_match); 162 free(sed_cmd->sub_match);
156 } 163 }
157 free(sed_cmd->replace); 164 free(sed_cmd->string);
158 free(sed_cmd->editline);
159 free(sed_cmd->filename);
160 free(sed_cmd->translate);
161 free(sed_cmd->label);
162 free(sed_cmd); 165 free(sed_cmd);
163 sed_cmd = sed_cmd_next; 166 sed_cmd = sed_cmd_next;
164 } 167 }
165} 168}
166#endif 169#endif
167 170
171/* strdup, replacing "\n" with '\n', and "\delimiter" with 'delimiter' */
172
173static void parse_escapes(char *dest, const char *string, int len, char from, char to)
174{
175 int i=0;
176
177 while(i<len) {
178 if(string[i] == '\\') {
179 if(string[i+1] == from) {
180 *(dest++) = to;
181 i+=2;
182 continue;
183 } else *(dest++)=string[i++];
184 }
185 *(dest++) = string[i++];
186 }
187 *dest=0;
188}
189
190static char *copy_parsing_slashn(const char *string, int len)
191{
192 char *dest=xmalloc(len+1);
193
194 parse_escapes(dest,string,len,'n','\n');
195 return dest;
196}
197
198
168/* 199/*
169 * index_of_next_unescaped_regexp_delim - walks left to right through a string 200 * index_of_next_unescaped_regexp_delim - walks left to right through a string
170 * beginning at a specified index and returns the index of the next regular 201 * beginning at a specified index and returns the index of the next regular
@@ -182,7 +213,7 @@ static int index_of_next_unescaped_regexp_delim(const char delimiter,
182 for (; (ch = str[idx]); idx++) { 213 for (; (ch = str[idx]); idx++) {
183 if (bracket != -1) { 214 if (bracket != -1) {
184 if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2 215 if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2
185 && str[idx - 1] == '^'))) 216 && str[idx - 1] == '^')))
186 bracket = -1; 217 bracket = -1;
187 } else if (escaped) 218 } else if (escaped)
188 escaped = 0; 219 escaped = 0;
@@ -209,19 +240,15 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
209 240
210 /* verify that the 's' or 'y' is followed by something. That something 241 /* verify that the 's' or 'y' is followed by something. That something
211 * (typically a 'slash') is now our regexp delimiter... */ 242 * (typically a 'slash') is now our regexp delimiter... */
212 if (*cmdstr == '\0') 243 if (*cmdstr == '\0') bb_error_msg_and_die(bad_format_in_subst);
213 bb_error_msg_and_die(bad_format_in_subst); 244 delimiter = *(cmdstr_ptr++);
214 else
215 delimiter = *cmdstr_ptr;
216
217 cmdstr_ptr++;
218 245
219 /* save the match string */ 246 /* save the match string */
220 idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr); 247 idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
221 if (idx == -1) { 248 if (idx == -1) {
222 bb_error_msg_and_die(bad_format_in_subst); 249 bb_error_msg_and_die(bad_format_in_subst);
223 } 250 }
224 *match = bb_xstrndup(cmdstr_ptr, idx); 251 *match = copy_parsing_slashn(cmdstr_ptr, idx);
225 252
226 /* save the replacement string */ 253 /* save the replacement string */
227 cmdstr_ptr += idx + 1; 254 cmdstr_ptr += idx + 1;
@@ -229,7 +256,7 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
229 if (idx == -1) { 256 if (idx == -1) {
230 bb_error_msg_and_die(bad_format_in_subst); 257 bb_error_msg_and_die(bad_format_in_subst);
231 } 258 }
232 *replace = bb_xstrndup(cmdstr_ptr, idx); 259 *replace = copy_parsing_slashn(cmdstr_ptr, idx);
233 260
234 return ((cmdstr_ptr - cmdstr) + idx); 261 return ((cmdstr_ptr - cmdstr) + idx);
235} 262}
@@ -248,94 +275,109 @@ static int get_address(char *my_str, int *linenum, regex_t ** regex)
248 *linenum = -1; 275 *linenum = -1;
249 pos++; 276 pos++;
250 } else if (*my_str == '/' || *my_str == '\\') { 277 } else if (*my_str == '/' || *my_str == '\\') {
251 int next, idx_start = 1; 278 int next;
252 char delimiter; 279 char delimiter;
280 char *temp;
253 281
254 delimiter = '/'; 282 if (*my_str == '\\') delimiter = *(++pos);
255 if (*my_str == '\\') { 283 else delimiter = '/';
256 idx_start++;
257 delimiter = *(++pos);
258 }
259 next = index_of_next_unescaped_regexp_delim(delimiter, ++pos); 284 next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
260 if (next == -1) { 285 if (next == -1)
261 bb_error_msg_and_die("unterminated match expression"); 286 bb_error_msg_and_die("unterminated match expression");
262 } 287
263 pos += next; 288 temp=copy_parsing_slashn(pos,next);
264 *pos = '\0';
265
266 *regex = (regex_t *) xmalloc(sizeof(regex_t)); 289 *regex = (regex_t *) xmalloc(sizeof(regex_t));
267 xregcomp(*regex, my_str + idx_start, REG_NEWLINE); 290 xregcomp(*regex, temp, REG_NEWLINE);
268 pos++; /* so it points to the next character after the last '/' */ 291 free(temp);
292 /* Move position to next character after last delimiter */
293 pos+=(next+1);
269 } 294 }
270 return pos - my_str; 295 return pos - my_str;
271} 296}
272 297
298/* Grab a filename. Whitespace at start is skipped, then goes to EOL. */
299static int parse_file_cmd(sed_cmd_t * sed_cmd, const char *filecmdstr, char **retval)
300{
301 int start = 0, idx, hack=0;
302
303 /* Skip whitespace, then grab filename to end of line */
304 while (isspace(filecmdstr[start])) start++;
305 idx=start;
306 while(filecmdstr[idx] && filecmdstr[idx]!='\n') idx++;
307 /* If lines glued together, put backslash back. */
308 if(filecmdstr[idx]=='\n') hack=1;
309 if(idx==start) bb_error_msg_and_die("Empty filename");
310 *retval = bb_xstrndup(filecmdstr+start, idx-start+hack+1);
311 if(hack) *(idx+*retval)='\\';
312
313 return idx;
314}
315
273static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr) 316static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr)
274{ 317{
275 int cflags = 0; 318 int cflags = 0;
276 char *match; 319 char *match;
277 int idx = 0; 320 int idx = 0;
278 int j;
279 321
280 /* 322 /*
281 * the string that gets passed to this function should look like this: 323 * A substitution command should look something like this:
282 * s/match/replace/gIp 324 * s/match/replace/ #gIpw
283 * || | ||| 325 * || | |||
284 * mandatory optional 326 * mandatory optional
285 *
286 * (all three of the '/' slashes are mandatory)
287 */ 327 */
288 idx = parse_regex_delim(substr, &match, &sed_cmd->replace); 328 idx = parse_regex_delim(substr, &match, &sed_cmd->string);
289 329
290 /* determine the number of back references in the match string */ 330 /* determine the number of back references in the match string */
291 /* Note: we compute this here rather than in the do_subst_command() 331 /* Note: we compute this here rather than in the do_subst_command()
292 * function to save processor time, at the expense of a little more memory 332 * function to save processor time, at the expense of a little more memory
293 * (4 bits) per sed_cmd */ 333 * (4 bits) per sed_cmd */
294 334
295 for (j = 0; match[j]; j++) {
296 /* GNU/POSIX sed does not save more than nine backrefs */
297 if (match[j] == '\\' && match[j + 1] == '('
298 && sed_cmd->num_backrefs <= 9)
299 sed_cmd->num_backrefs++;
300 }
301
302 /* process the flags */ 335 /* process the flags */
303#ifndef CONFIG_FEATURE_SED_GNU_COMPATABILITY 336
304 idx++; 337 sed_cmd->which_match=1;
305#else 338 while (substr[++idx]) {
306 /* GNU sed allows blanks before the flag, this can lead to an incosistent 339 /* Parse match number */
307 * interpretation of 's/a/b/ g' as being either 's/a/b/g' or 's/a/b/;g'. 340 if(isdigit(substr[idx])) {
308 * which results in very different behaviour. 341 if(match[0]!='^') {
309 */ 342 /* Match 0 treated as all, multiple matches we take the last one. */
310 while (substr[++idx]) 343 char *pos=substr+idx;
311#endif 344 sed_cmd->which_match=(unsigned short)strtol(substr+idx,&pos,10);
345 idx=pos-substr;
346 }
347 continue;
348 }
312 switch (substr[idx]) { 349 switch (substr[idx]) {
313 case 'g': 350 /* Replace all occurrences */
314 if (match[0] != '^') { 351 case 'g':
315 sed_cmd->sub_g = 1; 352 if (match[0] != '^') sed_cmd->which_match = 0;
353 break;
354 /* Print pattern space */
355 case 'p':
356 sed_cmd->sub_p = 1;
357 break;
358 case 'w':
359 {
360 char *temp;
361 idx+=parse_file_cmd(sed_cmd,substr+idx,&temp);
362
363 break;
316 } 364 }
317 break; 365 /* Ignore case (gnu exension) */
318 /* Hmm, i dont see the I option mentioned in the standard */ 366 case 'I':
319 case 'I': 367 cflags |= REG_ICASE;
320 cflags |= REG_ICASE; 368 break;
321 break; 369 /* Skip spaces */
322 case 'p': 370 case ' ':
323 sed_cmd->sub_p = 1; 371 case '\t':
324 break; 372 break;
325#ifdef CONFIG_FEATURE_SED_GNU_COMPATABILITY 373 case ';':
326 default: 374 case '}':
327 /* any whitespace or semicolon trailing after a s/// is ok */
328 if (strchr(semicolon_whitespace, substr[idx]))
329 goto out; 375 goto out;
330 bb_error_msg_and_die("bad option in substitution expression"); 376 default:
331#endif 377 bb_error_msg_and_die("bad option in substitution expression");
332 } 378 }
333 379 }
334#ifndef CONFIG_FEATURE_SED_GNU_COMPATABILITY
335 idx++;
336#else
337out: 380out:
338#endif
339 /* compile the match string into a regex */ 381 /* compile the match string into a regex */
340 if (*match != '\0') { 382 if (*match != '\0') {
341 /* If match is empty, we use last regex used at runtime */ 383 /* If match is empty, we use last regex used at runtime */
@@ -347,166 +389,61 @@ out:
347 return idx; 389 return idx;
348} 390}
349 391
350static void replace_slash_n(char *string)
351{
352 char *dest;
353
354 for (dest = string; *string; string++, dest++) {
355 if ((string[0] == '\\') && (string[1] == 'n')) {
356 *dest = '\n';
357 string++;
358 } else {
359 *dest = *string;
360 }
361 }
362 *dest=0;
363}
364
365static int parse_translate_cmd(sed_cmd_t * const sed_cmd, const char *cmdstr)
366{
367 char *match;
368 char *replace;
369 int idx;
370 int i;
371
372 idx = parse_regex_delim(cmdstr, &match, &replace);
373 replace_slash_n(match);
374 replace_slash_n(replace);
375 sed_cmd->translate = xcalloc(1, (strlen(match) + 1) * 2);
376 for (i = 0; (match[i] != 0) && (replace[i] != 0); i++) {
377 sed_cmd->translate[i * 2] = match[i];
378 sed_cmd->translate[(i * 2) + 1] = replace[i];
379 }
380 return (idx + 1);
381}
382
383static int parse_edit_cmd(sed_cmd_t * sed_cmd, const char *editstr)
384{
385 int i, j;
386
387 /*
388 * the string that gets passed to this function should look like this:
389 *
390 * need one of these
391 * |
392 * | this backslash (immediately following the edit command) is mandatory
393 * | |
394 * [aic]\
395 * TEXT1\
396 * TEXT2\
397 * TEXTN
398 *
399 * as soon as we hit a TEXT line that has no trailing '\', we're done.
400 * this means a command like:
401 *
402 * i\
403 * INSERTME
404 *
405 * is a-ok.
406 *
407 */
408 if ((*editstr != '\\') || ((editstr[1] != '\n') && (editstr[1] != '\r'))) {
409 bb_error_msg_and_die("bad format in edit expression");
410 }
411
412 /* store the edit line text */
413 sed_cmd->editline = xmalloc(strlen(&editstr[2]) + 2);
414 for (i = 2, j = 0;
415 editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL; i++, j++) {
416 if ((editstr[i] == '\\') && strchr("\n\r", editstr[i + 1]) != NULL) {
417 sed_cmd->editline[j] = '\n';
418 i++;
419 } else
420 sed_cmd->editline[j] = editstr[i];
421 }
422
423 /* figure out if we need to add a newline */
424 if (sed_cmd->editline[j - 1] != '\n')
425 sed_cmd->editline[j++] = '\n';
426
427 /* terminate string */
428 sed_cmd->editline[j] = '\0';
429
430 return i;
431}
432
433
434static int parse_file_cmd(sed_cmd_t * sed_cmd, const char *filecmdstr)
435{
436 int idx = 0;
437 int filenamelen = 0;
438
439 /*
440 * the string that gets passed to this function should look like this:
441 * '[ ]filename'
442 * | |
443 * | a filename
444 * |
445 * optional whitespace
446
447 * re: the file to be read, the GNU manual says the following: "Note that
448 * if filename cannot be read, it is treated as if it were an empty file,
449 * without any error indication." Thus, all of the following commands are
450 * perfectly legal:
451 *
452 * sed -e '1r noexist'
453 * sed -e '1r ;'
454 * sed -e '1r'
455 */
456
457 /* the file command may be followed by whitespace; move past it. */
458 while (isspace(filecmdstr[++idx])) {;
459 }
460
461 /* the first non-whitespace we get is a filename. the filename ends when we
462 * hit a normal sed command terminator or end of string */
463 filenamelen = strcspn(&filecmdstr[idx], semicolon_whitespace);
464 sed_cmd->filename = xmalloc(filenamelen + 1);
465 safe_strncpy(sed_cmd->filename, &filecmdstr[idx], filenamelen + 1);
466 return idx + filenamelen;
467}
468
469/* 392/*
470 * Process the commands arguments 393 * Process the commands arguments
471 */ 394 */
472static char *parse_cmd_str(sed_cmd_t * sed_cmd, char *cmdstr) 395static char *parse_cmd_args(sed_cmd_t *sed_cmd, char *cmdstr)
473{ 396{
474 /* handle (s)ubstitution command */ 397 /* handle (s)ubstitution command */
475 if (sed_cmd->cmd == 's') { 398 if (sed_cmd->cmd == 's') cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
476 cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
477 }
478 /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ 399 /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
479 else if (strchr("aic", sed_cmd->cmd)) { 400 else if (strchr("aic", sed_cmd->cmd)) {
480 if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') 401 if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
481 bb_error_msg_and_die 402 bb_error_msg_and_die
482 ("only a beginning address can be specified for edit commands"); 403 ("only a beginning address can be specified for edit commands");
483 cmdstr += parse_edit_cmd(sed_cmd, cmdstr); 404 while(isspace(*cmdstr)) cmdstr++;
484 } 405 sed_cmd->string = bb_xstrdup(cmdstr);
406 cmdstr += strlen(cmdstr);
485 /* handle file cmds: (r)ead */ 407 /* handle file cmds: (r)ead */
486 else if (sed_cmd->cmd == 'r') { 408 } else if(strchr("rw", sed_cmd->cmd)) {
487 if (sed_cmd->end_line || sed_cmd->end_match) 409 if (sed_cmd->end_line || sed_cmd->end_match)
488 bb_error_msg_and_die("Command only uses one address"); 410 bb_error_msg_and_die("Command only uses one address");
489 cmdstr += parse_file_cmd(sed_cmd, cmdstr); 411 cmdstr += parse_file_cmd(sed_cmd, cmdstr, &sed_cmd->string);
490 } 412 if(sed_cmd->cmd=='w')
413 sed_cmd->file=bb_xfopen(sed_cmd->string,"w");
491 /* handle branch commands */ 414 /* handle branch commands */
492 else if (strchr(":bt", sed_cmd->cmd)) { 415 } else if (strchr(":bt", sed_cmd->cmd)) {
493 int length; 416 int length;
494 417
495 cmdstr += strspn(cmdstr, " "); 418 while(isspace(*cmdstr)) cmdstr++;
496 length = strcspn(cmdstr, semicolon_whitespace); 419 length = strcspn(cmdstr, semicolon_whitespace);
497 if (length) { 420 if (length) {
498 sed_cmd->label = strndup(cmdstr, length); 421 sed_cmd->string = strndup(cmdstr, length);
499 cmdstr += length; 422 cmdstr += length;
500 } 423 }
501 } 424 }
502 /* translation command */ 425 /* translation command */
503 else if (sed_cmd->cmd == 'y') { 426 else if (sed_cmd->cmd == 'y') {
504 cmdstr += parse_translate_cmd(sed_cmd, cmdstr); 427 char *match, *replace;
428 int i=cmdstr[0];
429
430 cmdstr+=parse_regex_delim(cmdstr, &match, &replace)+1;
431 /* \n already parsed, but \delimiter needs unescaping. */
432 parse_escapes(match,match,strlen(match),i,i);
433 parse_escapes(replace,replace,strlen(replace),i,i);
434
435 sed_cmd->string = xcalloc(1, (strlen(match) + 1) * 2);
436 for (i = 0; match[i] && replace[i]; i++) {
437 sed_cmd->string[i * 2] = match[i];
438 sed_cmd->string[(i * 2) + 1] = replace[i];
439 }
440 free(match);
441 free(replace);
505 } 442 }
506 /* if it wasnt a single-letter command that takes no arguments 443 /* if it wasnt a single-letter command that takes no arguments
507 * then it must be an invalid command. 444 * then it must be an invalid command.
508 */ 445 */
509 else if (strchr("dgGhHnNpPqx={}", sed_cmd->cmd) == 0) { 446 else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) {
510 bb_error_msg_and_die("Unsupported command %c", sed_cmd->cmd); 447 bb_error_msg_and_die("Unsupported command %c", sed_cmd->cmd);
511 } 448 }
512 449
@@ -514,663 +451,595 @@ static char *parse_cmd_str(sed_cmd_t * sed_cmd, char *cmdstr)
514 return (cmdstr); 451 return (cmdstr);
515} 452}
516 453
517static char *add_cmd(char *cmdstr)
518{
519 sed_cmd_t *sed_cmd;
520 454
521 /* Skip over leading whitespace and semicolons */ 455/* Parse address+command sets, skipping comment lines. */
522 cmdstr += strspn(cmdstr, semicolon_whitespace);
523 456
524 /* if we ate the whole thing, that means there was just trailing 457void add_cmd(char *cmdstr)
525 * whitespace or a final / no-op semicolon. either way, get out */ 458{
526 if (*cmdstr == '\0') { 459 static char *add_cmd_line=NULL;
527 return (NULL); 460 sed_cmd_t *sed_cmd;
528 }
529 461
530 /* if this is a comment, jump past it and keep going */ 462 /* Append this line to any unfinished line from last time. */
531 if (*cmdstr == '#') { 463 if(add_cmd_line) {
532 /* "#n" is the same as using -n on the command line */ 464 int lastlen=strlen(add_cmd_line);
533 if (cmdstr[1] == 'n') { 465 char *temp=xmalloc(lastlen+strlen(cmdstr)+2);
534 be_quiet++; 466
535 } 467 memcpy(temp,add_cmd_line,lastlen);
536 return (strpbrk(cmdstr, "\n\r")); 468 temp[lastlen]='\n';
469 strcpy(temp+lastlen+1,cmdstr);
470 free(add_cmd_line);
471 cmdstr=add_cmd_line=temp;
472 } else add_cmd_line=NULL;
473
474 /* If this line ends with backslash, request next line. */
475 int temp=strlen(cmdstr);
476 if(temp && cmdstr[temp-1]=='\\') {
477 if(!add_cmd_line) add_cmd_line=strdup(cmdstr);
478 add_cmd_line[temp-1]=0;
479 return;
537 } 480 }
538 481
539 /* parse the command 482 /* Loop parsing all commands in this line. */
540 * format is: [addr][,addr]cmd 483 while(*cmdstr) {
541 * |----||-----||-| 484 /* Skip leading whitespace and semicolons */
542 * part1 part2 part3 485 cmdstr += strspn(cmdstr, semicolon_whitespace);
543 */
544 486
545 sed_cmd = xcalloc(1, sizeof(sed_cmd_t)); 487 /* If no more commands, exit. */
488 if(!*cmdstr) break;
546 489
547 /* first part (if present) is an address: either a '$', a number or a /regex/ */ 490 /* if this is a comment, jump past it and keep going */
548 cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); 491 if (*cmdstr == '#') {
492 /* "#n" is the same as using -n on the command line */
493 if (cmdstr[1] == 'n') be_quiet++;
494 if(!(cmdstr=strpbrk(cmdstr, "\n\r"))) break;
495 continue;
496 }
549 497
550 /* second part (if present) will begin with a comma */ 498 /* parse the command
551 if (*cmdstr == ',') { 499 * format is: [addr][,addr][!]cmd
552 int idx; 500 * |----||-----||-|
501 * part1 part2 part3
502 */
553 503
554 cmdstr++; 504 sed_cmd = xcalloc(1, sizeof(sed_cmd_t));
555 idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
556 if (idx == 0) {
557 bb_error_msg_and_die("get_address: no address found in string\n"
558 "\t(you probably didn't check the string you passed me)");
559 }
560 cmdstr += idx;
561 }
562 505
563 /* skip whitespace before the command */ 506 /* first part (if present) is an address: either a '$', a number or a /regex/ */
564 while (isspace(*cmdstr)) { 507 cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
565 cmdstr++;
566 }
567 508
568 /* there my be the inversion flag between part2 and part3 */ 509 /* second part (if present) will begin with a comma */
569 if (*cmdstr == '!') { 510 if (*cmdstr == ',') {
570 sed_cmd->invert = 1; 511 int idx;
571 cmdstr++;
572 512
573#ifdef CONFIG_FEATURE_SED_GNU_COMPATABILITY
574 /* According to the spec
575 * It is unspecified whether <blank>s can follow a '!' character,
576 * and conforming applications shall not follow a '!' character
577 * with <blank>s.
578 */
579 /* skip whitespace before the command */
580 while (isspace(*cmdstr)) {
581 cmdstr++; 513 cmdstr++;
514 idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
515 if (!idx) bb_error_msg_and_die("get_address: no address found in string\n");
516 cmdstr += idx;
582 } 517 }
583#endif
584 }
585
586 /* last part (mandatory) will be a command */
587 if (*cmdstr == '\0')
588 bb_error_msg_and_die("missing command");
589 518
590 sed_cmd->cmd = *cmdstr; 519 /* skip whitespace before the command */
591 cmdstr++; 520 while (isspace(*cmdstr)) cmdstr++;
592 521
593 cmdstr = parse_cmd_str(sed_cmd, cmdstr); 522 /* Check for inversion flag */
523 if (*cmdstr == '!') {
524 sed_cmd->invert = 1;
525 cmdstr++;
594 526
595 /* Add the command to the command array */ 527 /* skip whitespace before the command */
596 sed_cmd_tail->next = sed_cmd; 528 while (isspace(*cmdstr)) cmdstr++;
597 sed_cmd_tail = sed_cmd_tail->next; 529 }
598 530
599 return (cmdstr); 531 /* last part (mandatory) will be a command */
600} 532 if (!*cmdstr) bb_error_msg_and_die("missing command");
533 sed_cmd->cmd = *(cmdstr++);
534 cmdstr = parse_cmd_args(sed_cmd, cmdstr);
601 535
602static void add_cmd_str(const char *cmdstr) 536 /* Add the command to the command array */
603{ 537 sed_cmd_tail->next = sed_cmd;
604 char *cmdstr_expanded = strdup(cmdstr); 538 sed_cmd_tail = sed_cmd_tail->next;
605 char *cmdstr_ptr;
606
607#ifdef CONFIG_FEATURE_SED_EMBEDED_NEWLINE
608 cmdstr_ptr = cmdstr_expanded;
609 /* HACK: convert "\n" to match tranlated '\n' string */
610 while ((cmdstr_ptr = strstr(cmdstr_ptr, "\\n")) != NULL) {
611 int length = strlen(cmdstr) + 2;
612 cmdstr_expanded = realloc(cmdstr_expanded, length);
613 cmdstr_ptr = strstr(cmdstr_expanded, "\\n");
614 memmove(cmdstr_ptr + 1, cmdstr_ptr, strlen(cmdstr_ptr) + 1);
615 cmdstr_ptr[0] = '\\';
616 cmdstr_ptr += 3;
617 } 539 }
618#endif
619 cmdstr_ptr = cmdstr_expanded;
620 do {
621 cmdstr_ptr = add_cmd(cmdstr_ptr);
622 } while (cmdstr_ptr && strlen(cmdstr_ptr));
623
624 free(cmdstr_expanded);
625}
626
627 540
628static void load_cmd_file(const char *filename) 541 /* If we glued multiple lines together, free the memory. */
629{ 542 if(add_cmd_line) {
630 FILE *cmdfile; 543 free(add_cmd_line);
631 char *line; 544 add_cmd_line=NULL;
632 char *nextline;
633 char *e;
634
635 cmdfile = bb_xfopen(filename, "r");
636
637 while ((line = bb_get_line_from_file(cmdfile)) != NULL) {
638 /* if a line ends with '\' it needs the next line appended to it */
639 while (((e = last_char_is(line, '\n')) != NULL)
640 && (e > line) && (e[-1] == '\\')
641 && ((nextline = bb_get_line_from_file(cmdfile)) != NULL)) {
642 line = xrealloc(line, (e - line) + 1 + strlen(nextline) + 1);
643 strcat(line, nextline);
644 free(nextline);
645 }
646 /* eat trailing newline (if any) --if I don't do this, edit commands
647 * (aic) will print an extra newline */
648 chomp(line);
649 add_cmd_str(line);
650 free(line);
651 } 545 }
652} 546}
653 547
654struct pipeline { 548struct pipeline {
655 char *buf; 549 char *buf; /* Space to hold string */
656 int idx; 550 int idx; /* Space used */
657 int len; 551 int len; /* Space allocated */
658}; 552} pipeline;
659 553
660#define PIPE_MAGIC 0x7f
661#define PIPE_GROW 64 554#define PIPE_GROW 64
662 555
663void pipe_putc(struct pipeline *const pipeline, char c) 556void pipe_putc(char c)
664{ 557{
665 if (pipeline->buf[pipeline->idx] == PIPE_MAGIC) { 558 if(pipeline.idx==pipeline.len) {
666 pipeline->buf = xrealloc(pipeline->buf, pipeline->len + PIPE_GROW); 559 pipeline.buf = xrealloc(pipeline.buf, pipeline.len + PIPE_GROW);
667 memset(pipeline->buf + pipeline->len, 0, PIPE_GROW); 560 pipeline.len+=PIPE_GROW;
668 pipeline->len += PIPE_GROW;
669 pipeline->buf[pipeline->len - 1] = PIPE_MAGIC;
670 } 561 }
671 pipeline->buf[pipeline->idx++] = (c); 562 pipeline.buf[pipeline.idx++] = (c);
672} 563}
673 564
674#define pipeputc(c) pipe_putc(pipeline, c) 565static void do_subst_w_backrefs(const char *line, const char *replace)
675
676static void print_subst_w_backrefs(const char *line, const char *replace,
677 regmatch_t * regmatch, struct pipeline *const pipeline, int matches)
678{ 566{
679 int i; 567 int i,j;
680 568
681 /* go through the replacement string */ 569 /* go through the replacement string */
682 for (i = 0; replace[i]; i++) { 570 for (i = 0; replace[i]; i++) {
683 /* if we find a backreference (\1, \2, etc.) print the backref'ed * text */ 571 /* if we find a backreference (\1, \2, etc.) print the backref'ed * text */
684 if (replace[i] == '\\' && isdigit(replace[i + 1])) { 572 if (replace[i] == '\\' && replace[i+1]>0 && replace[i+1]<=9) {
685 int j; 573 int backref=replace[++i]-'0';
686 char tmpstr[2]; 574
687 int backref;
688
689 ++i; /* i now indexes the backref number, instead of the leading slash */
690 tmpstr[0] = replace[i];
691 tmpstr[1] = 0;
692 backref = atoi(tmpstr);
693 /* print out the text held in regmatch[backref] */ 575 /* print out the text held in regmatch[backref] */
694 if (backref <= matches && regmatch[backref].rm_so != -1) 576 if(regmatch[backref].rm_so != -1)
695 for (j = regmatch[backref].rm_so; j < regmatch[backref].rm_eo; 577 for (j = regmatch[backref].rm_so; j < regmatch[backref].rm_eo; j++)
696 j++) 578 pipe_putc(line[j]);
697 pipeputc(line[j]);
698 } 579 }
699 580
700 /* if we find a backslash escaped character, print the character */ 581 /* if we find a backslash escaped character, print the character */
701 else if (replace[i] == '\\') { 582 else if (replace[i] == '\\') pipe_putc(replace[++i]);
702 ++i;
703 pipeputc(replace[i]);
704 }
705
706 /* if we find an unescaped '&' print out the whole matched text.
707 * fortunately, regmatch[0] contains the indicies to the whole matched
708 * expression (kinda seems like it was designed for just such a
709 * purpose...) */
710 else if (replace[i] == '&') {
711 int j;
712 583
584 /* if we find an unescaped '&' print out the whole matched text. */
585 else if (replace[i] == '&')
713 for (j = regmatch[0].rm_so; j < regmatch[0].rm_eo; j++) 586 for (j = regmatch[0].rm_so; j < regmatch[0].rm_eo; j++)
714 pipeputc(line[j]); 587 pipe_putc(line[j]);
715 } 588 /* Otherwise just output the character. */
716 /* nothing special, just print this char of the replacement string to stdout */ 589 else pipe_putc(replace[i]);
717 else
718 pipeputc(replace[i]);
719 } 590 }
720} 591}
721 592
722static int do_subst_command(sed_cmd_t * sed_cmd, char **line) 593static int do_subst_command(sed_cmd_t * sed_cmd, char **line)
723{ 594{
724 char *hackline = *line; 595 char *oldline = *line;
725 struct pipeline thepipe = { NULL, 0, 0 };
726 struct pipeline *const pipeline = &thepipe;
727 int altered = 0; 596 int altered = 0;
728 int result; 597 int match_count=0;
729 regmatch_t *regmatch = NULL;
730 regex_t *current_regex; 598 regex_t *current_regex;
731 599
600 /* Handle empty regex. */
732 if (sed_cmd->sub_match == NULL) { 601 if (sed_cmd->sub_match == NULL) {
733 current_regex = previous_regex_ptr; 602 current_regex = previous_regex_ptr;
734 } else { 603 if(!current_regex)
735 previous_regex_ptr = current_regex = sed_cmd->sub_match; 604 bb_error_msg_and_die("No previous regexp.");
736 } 605 } else previous_regex_ptr = current_regex = sed_cmd->sub_match;
737 result = regexec(current_regex, hackline, 0, NULL, 0);
738 606
739 /* we only proceed if the substitution 'search' expression matches */ 607 /* Find the first match */
740 if (result == REG_NOMATCH) { 608 if(REG_NOMATCH==regexec(current_regex, oldline, 10, regmatch, 0))
741 return 0; 609 return 0;
742 }
743 610
744 /* whaddaya know, it matched. get the number of back references */ 611 /* Initialize temporary output buffer. */
745 regmatch = xmalloc(sizeof(regmatch_t) * (sed_cmd->num_backrefs + 1)); 612 pipeline.buf=xmalloc(PIPE_GROW);
746 613 pipeline.len=PIPE_GROW;
747 /* allocate more PIPE_GROW bytes 614 pipeline.idx=0;
748 if replaced string is larger than original */ 615
749 thepipe.len = strlen(hackline) + PIPE_GROW; 616 /* Now loop through, substituting for matches */
750 thepipe.buf = xcalloc(1, thepipe.len); 617 do {
751 /* buffer magic */
752 thepipe.buf[thepipe.len - 1] = PIPE_MAGIC;
753
754 /* and now, as long as we've got a line to try matching and if we can match
755 * the search string, we make substitutions */
756 while ((*hackline || !altered)
757 && (regexec(current_regex, hackline, sed_cmd->num_backrefs + 1,
758 regmatch, 0) != REG_NOMATCH)) {
759 int i; 618 int i;
760 619
620 match_count++;
621
622 /* If we aren't interested in this match, output old line to
623 end of match and continue */
624 if(sed_cmd->which_match && sed_cmd->which_match!=match_count) {
625 for(i=0;i<regmatch[0].rm_eo;i++)
626 pipe_putc(oldline[i]);
627 continue;
628 }
629
761 /* print everything before the match */ 630 /* print everything before the match */
762 for (i = 0; i < regmatch[0].rm_so; i++) 631 for (i = 0; i < regmatch[0].rm_so; i++) pipe_putc(oldline[i]);
763 pipeputc(hackline[i]);
764 632
765 /* then print the substitution string */ 633 /* then print the substitution string */
766 print_subst_w_backrefs(hackline, sed_cmd->replace, regmatch, pipeline, 634 do_subst_w_backrefs(oldline, sed_cmd->string);
767 sed_cmd->num_backrefs);
768 635
769 /* advance past the match */ 636 /* advance past the match */
770 hackline += regmatch[0].rm_eo; 637 oldline += regmatch[0].rm_eo;
771 /* flag that something has changed */ 638 /* flag that something has changed */
772 altered++; 639 altered++;
773 640
774 /* if we're not doing this globally, get out now */ 641 /* if we're not doing this globally, get out now */
775 if (!sed_cmd->sub_g) { 642 if (sed_cmd->which_match) break;
776 break; 643 } while (*oldline && (regexec(current_regex, oldline, 10, regmatch, 0) != REG_NOMATCH));
777 }
778 }
779 for (; *hackline; hackline++)
780 pipeputc(*hackline);
781 if (thepipe.buf[thepipe.idx] == PIPE_MAGIC)
782 thepipe.buf[thepipe.idx] = 0;
783 644
784 /* cleanup */ 645 /* Copy rest of string into output pipeline */
785 free(regmatch); 646
647 while(*oldline) pipe_putc(*(oldline++));
648 pipe_putc(0);
786 649
787 free(*line); 650 free(*line);
788 *line = thepipe.buf; 651 *line = pipeline.buf;
789 return altered; 652 return altered;
790} 653}
791 654
655/* Set command pointer to point to this label. (Does not handle null label.) */
792static sed_cmd_t *branch_to(const char *label) 656static sed_cmd_t *branch_to(const char *label)
793{ 657{
794 sed_cmd_t *sed_cmd; 658 sed_cmd_t *sed_cmd;
795 659
796 for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { 660 for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
797 if ((sed_cmd->cmd == ':') && (sed_cmd->label) && (strcmp(sed_cmd->label, label) == 0)) { 661 if ((sed_cmd->cmd == ':') && (sed_cmd->string) && (strcmp(sed_cmd->string, label) == 0)) {
798 return (sed_cmd); 662 return (sed_cmd);
799 } 663 }
800 } 664 }
801 bb_error_msg_and_die("Can't find label for jump to `%s'", label); 665 bb_error_msg_and_die("Can't find label for jump to `%s'", label);
802} 666}
803 667
804static void process_file(FILE * file) 668/* Append copy of string to append buffer */
669static void append(char *s)
805{ 670{
806 char *pattern_space; /* Posix requires it be able to hold at least 8192 bytes */ 671 struct append_list *temp=calloc(1,sizeof(struct append_list));
807 char *hold_space = NULL; /* Posix requires it be able to hold at least 8192 bytes */ 672
808 static int linenum = 0; /* GNU sed does not restart counting lines at EOF */ 673 if(append_head)
809 int altered; 674 append_tail=(append_tail->next=temp);
810 int force_print; 675 else append_head=append_tail=temp;
811 676 temp->string=strdup(s);
812 pattern_space = bb_get_chomped_line_from_file(file); 677}
813 if (pattern_space == NULL) { 678
814 return; 679static void flush_append(void)
680{
681 /* Output appended lines. */
682 while(append_head) {
683 puts(append_head->string);
684 append_tail=append_head->next;
685 free(append_head->string);
686 free(append_head);
687 append_head=append_tail;
688 }
689 append_head=append_tail=NULL;
690}
691
692/* Get next line of input, flushing append buffer and noting if we hit EOF
693 * without a newline on the last line.
694 */
695static char *get_next_line(FILE * file, int *no_newline)
696{
697 char *temp;
698 int len;
699
700 flush_append();
701 temp=bb_get_line_from_file(file);
702 if(temp) {
703 len=strlen(temp);
704 if(len && temp[len-1]=='\n') temp[len-1]=0;
705 else *no_newline=1;
815 } 706 }
816 707
708 return temp;
709}
710
711/* Output line of text. missing_newline means the last line output did not
712 end with a newline. no_newline means this line does not end with a
713 newline. */
714
715static int puts_maybe_newline(char *s, FILE *file, int missing_newline, int no_newline)
716{
717 if(missing_newline) fputc('\n',file);
718 fputs(s,file);
719 if(!no_newline) fputc('\n',file);
720
721 return no_newline;
722}
723
724#define sed_puts(s,n) missing_newline=puts_maybe_newline(s,stdout,missing_newline,n)
725
726static void process_file(FILE * file)
727{
728 char *pattern_space, *next_line, *hold_space=NULL;
729 static int linenum = 0, missing_newline=0;
730 int no_newline,next_no_newline=0;
731
732 next_line = get_next_line(file,&next_no_newline);
733
817 /* go through every line in the file */ 734 /* go through every line in the file */
818 do { 735 for(;;) {
819 char *next_line;
820 sed_cmd_t *sed_cmd; 736 sed_cmd_t *sed_cmd;
821 int substituted = 0; 737 int substituted=0;
822 /* This enables whole blocks of commands to be mask'ed out if the lead address doesnt match */ 738
823 int block_mask = 1; 739 /* Advance to next line. Stop if out of lines. */
740 if(!(pattern_space=next_line)) break;
741 no_newline=next_no_newline;
824 742
825 /* Read one line in advance so we can act on the last line, the '$' address */ 743 /* Read one line in advance so we can act on the last line, the '$' address */
826 next_line = bb_get_chomped_line_from_file(file); 744 next_line = get_next_line(file,&next_no_newline);
827 linenum++; 745 linenum++;
828 altered = 0; 746restart:
829 force_print = 0;
830
831 /* for every line, go through all the commands */ 747 /* for every line, go through all the commands */
832 for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { 748 for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
833 int deleted = 0; 749 int matched;
834 750
835 /* 751 /* Determine if this command matches this line: */
836 * entry point into sedding... 752
837 */ 753 /* Are we continuing a previous multi-line match? */
838 int matched = ( 754
839 /* no range necessary */ 755 sed_cmd->in_match = sed_cmd->in_match
840 (sed_cmd->beg_line == 0 && sed_cmd->end_line == 0 756
841 && sed_cmd->beg_match == NULL 757 /* Or is no range necessary? */
842 && sed_cmd->end_match == NULL) || 758 || (!sed_cmd->beg_line && !sed_cmd->end_line
843 /* this line number is the first address we're looking for */ 759 && !sed_cmd->beg_match && !sed_cmd->end_match)
844 (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum)) || 760
845 /* this line matches our first address regex */ 761 /* Or did we match the start of a numerical range? */
846 (sed_cmd->beg_match 762 || (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum))
847 && (regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 763
848 0) == 0)) || 764 /* Or does this line match our begin address regex? */
849 /* we are currently within the beginning & ending address range */ 765 || (sed_cmd->beg_match &&
850 sed_cmd->still_in_range || ((sed_cmd->beg_line == -1) 766 !regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 0))
851 && (next_line == NULL)) 767
768 /* Or did we match last line of input? */
769 || (sed_cmd->beg_line == -1 && next_line == NULL);
770
771 /* Snapshot the value */
772
773 matched = sed_cmd->in_match;
774
775 /* Is this line the end of the current match? */
776
777 if(matched) {
778 sed_cmd->in_match = !(
779 /* has the ending line come, or is this a single address command? */
780 (sed_cmd->end_line ?
781 sed_cmd->end_line==-1 ?
782 !next_line
783 : sed_cmd->end_line<=linenum
784 : !sed_cmd->end_match)
785 /* or does this line matches our last address regex */
786 || (sed_cmd->end_match && (regexec(sed_cmd->end_match, pattern_space, 0, NULL, 0) == 0))
852 ); 787 );
788 }
789
790 /* Skip blocks of commands we didn't match. */
853 if (sed_cmd->cmd == '{') { 791 if (sed_cmd->cmd == '{') {
854 block_mask = block_mask & matched; 792 if(sed_cmd->invert ? matched : !matched)
793 while(sed_cmd && sed_cmd->cmd!='}') sed_cmd=sed_cmd->next;
794 if(!sed_cmd) bb_error_msg_and_die("Unterminated {");
795 continue;
855 } 796 }
856// matched &= block_mask;
857 797
858 if (sed_cmd->invert ^ (matched & block_mask)) { 798 /* Okay, so did this line match? */
859 /* Update last used regex incase a blank substitute BRE is found */ 799 if (sed_cmd->invert ? !matched : matched) {
800 /* Update last used regex in case a blank substitute BRE is found */
860 if (sed_cmd->beg_match) { 801 if (sed_cmd->beg_match) {
861 previous_regex_ptr = sed_cmd->beg_match; 802 previous_regex_ptr = sed_cmd->beg_match;
862 } 803 }
863 804
864 /* 805 /* actual sedding */
865 * actual sedding
866 */
867 switch (sed_cmd->cmd) { 806 switch (sed_cmd->cmd) {
868 case '=': 807
869 printf("%d\n", linenum); 808 /* Print line number */
870 break; 809 case '=':
871 case 'P':{ 810 printf("%d\n", linenum);
872 /* Write the current pattern space upto the first newline */
873 char *tmp = strchr(pattern_space, '\n');
874
875 if (tmp) {
876 *tmp = '\0';
877 puts(pattern_space);
878 *tmp = '\n';
879 break; 811 break;
880 } 812
881 /* Fall Through */ 813 /* Write the current pattern space up to the first newline */
882 } 814 case 'P':
883 case 'p': /* Write the current pattern space to output */
884 puts(pattern_space);
885 break;
886 case 'd':
887 altered++;
888 deleted = 1;
889 force_print = 0;
890 break;
891
892 case 's':
893
894 /*
895 * Some special cases for 's' printing to make it compliant with
896 * GNU sed printing behavior (aka "The -n | s///p Matrix"):
897 *
898 * -n ONLY = never print anything regardless of any successful
899 * substitution
900 *
901 * s///p ONLY = always print successful substitutions, even if
902 * the pattern_space is going to be printed anyway (pattern_space
903 * will be printed twice).
904 *
905 * -n AND s///p = print ONLY a successful substitution ONE TIME;
906 * no other lines are printed - this is the reason why the 'p'
907 * flag exists in the first place.
908 */
909
910#ifdef CONFIG_FEATURE_SED_EMBEDED_NEWLINE
911 /* HACK: escape newlines twice so regex can match them */
912 {
913 int offset = 0;
914 char *tmp = strchr(pattern_space + offset, '\n');
915 while ((tmp = strchr(pattern_space + offset, '\n')) != NULL) {
916 offset = tmp - pattern_space;
917 pattern_space = xrealloc(pattern_space, strlen(pattern_space) + 2);
918 tmp = pattern_space + offset;
919 memmove(tmp + 1, tmp, strlen(tmp) + 1);
920 tmp[0] = '\\';
921 tmp[1] = 'n';
922 offset += 2;
923 }
924 }
925#endif
926 /* we print the pattern_space once, unless we were told to be quiet */
927 substituted |= do_subst_command(sed_cmd, &pattern_space);
928#ifdef CONFIG_FEATURE_SED_EMBEDED_NEWLINE
929 /* undo HACK: escape newlines twice so regex can match them */
930 { 815 {
931 char *tmp = pattern_space; 816 char *tmp = strchr(pattern_space, '\n');
932 817
933 while ((tmp = strstr(tmp, "\\n")) != NULL) { 818 if (tmp) {
934 memmove(tmp, tmp + 1, strlen(tmp + 1) + 1); 819 *tmp = '\0';
935 tmp[0] = '\n'; 820 sed_puts(pattern_space,1);
821 *tmp = '\n';
822 break;
936 } 823 }
824 /* Fall Through */
937 } 825 }
938#endif 826
939 if (!be_quiet && substituted && ((sed_cmd->next == NULL) 827 /* Write the current pattern space to output */
940 || (sed_cmd->next->cmd != 's'))) { 828 case 'p':
941 force_print = 1; 829 sed_puts(pattern_space,no_newline);
942 } 830 break;
943 /* we also print the line if we were given the 'p' flag 831 /* Delete up through first newline */
944 * (this is quite possibly the second printing) */ 832 case 'D':
945 if ((sed_cmd->sub_p) && (altered || substituted)) { 833 {
946 puts(pattern_space); 834 char *tmp = strchr(pattern_space,'\n');
835
836 if(tmp) {
837 tmp=bb_xstrdup(tmp+1);
838 free(pattern_space);
839 pattern_space=tmp;
840 goto restart;
841 }
947 } 842 }
948 break; 843 /* discard this line. */
949 case 'a': 844 case 'd':
950 puts(pattern_space); 845 goto discard_line;
951 fputs(sed_cmd->editline, stdout); 846
952 altered++; 847 /* Substitute with regex */
953 break; 848 case 's':
954 849 if(do_subst_command(sed_cmd, &pattern_space)) {
955 case 'i': 850 substituted|=1;
956 fputs(sed_cmd->editline, stdout); 851
957 break; 852 /* handle p option */
958 853 if(sed_cmd->sub_p)
959 case 'c': 854 sed_puts(pattern_space,no_newline);
960 /* single-address case */ 855 /* handle w option */
961 if ((sed_cmd->end_match == NULL && sed_cmd->end_line == 0) 856 if(sed_cmd->file)
962 /* multi-address case */ 857 sed_cmd->no_newline=puts_maybe_newline(pattern_space, sed_cmd->file, sed_cmd->no_newline, no_newline);
963 /* - matching text */ 858
964 || (sed_cmd->end_match 859 }
965 && (regexec(sed_cmd->end_match, pattern_space, 0, 860 break;
966 NULL, 0) == 0)) 861
967 /* - matching line numbers */ 862 /* Append line to linked list to be printed later */
968 || (sed_cmd->end_line > 0 863 case 'a':
969 && sed_cmd->end_line == linenum)) { 864 {
970 fputs(sed_cmd->editline, stdout); 865 append(sed_cmd->string);
866 break;
971 } 867 }
972 altered++;
973 868
974 break; 869 /* Insert text before this line */
870 case 'i':
871 sed_puts(sed_cmd->string,1);
872 break;
873
874 /* Cut and paste text (replace) */
875 case 'c':
876 /* Only triggers on last line of a matching range. */
877 if (!sed_cmd->in_match) sed_puts(sed_cmd->string,1);
878 goto discard_line;
975 879
976 case 'r':{ 880 /* Read file, append contents to output */
977 FILE *outfile; 881 case 'r':
882 {
883 FILE *outfile;
978 884
979 outfile = fopen(sed_cmd->filename, "r"); 885 outfile = fopen(sed_cmd->string, "r");
980 if (outfile) { 886 if (outfile) {
981 char *line; 887 char *line;
982 888
983 while ((line = 889 while ((line = bb_get_chomped_line_from_file(outfile))
984 bb_get_chomped_line_from_file(outfile)) != 890 != NULL)
985 NULL) { 891 append(line);
986 pattern_space = 892 bb_xprint_and_close_file(outfile);
987 xrealloc(pattern_space,
988 strlen(line) + strlen(pattern_space) + 2);
989 strcat(pattern_space, "\n");
990 strcat(pattern_space, line);
991 } 893 }
992 bb_xprint_and_close_file(outfile);
993 }
994 894
995 }
996 break;
997 case 'q': /* Branch to end of script and quit */
998 deleted = 1;
999 /* Exit the outer while loop */
1000 free(next_line);
1001 next_line = NULL;
1002 break;
1003 case 'n': /* Read next line from input */
1004 if (!be_quiet) {
1005 puts(pattern_space);
1006 }
1007 if (next_line) {
1008 free(pattern_space);
1009 pattern_space = next_line;
1010 next_line = bb_get_chomped_line_from_file(file);
1011 linenum++;
1012 } else {
1013 /* Jump to end of script and exit */
1014 deleted = 1;
1015 next_line = NULL;
1016 }
1017 break;
1018 case 'N': /* Append the next line to the current line */
1019 if (next_line == NULL) {
1020 /* Jump to end of script and exit */
1021 deleted = 1;
1022#ifdef CONFIG_FEATURE_SED_GNU_COMPATABILITY
1023 /* GNU sed will add the newline character
1024 * The GNU sed info page labels this as a bug that wont be fixed
1025 */
1026 next_line = calloc(1,1);
1027#else
1028 next_line = NULL;
1029 break; 895 break;
1030#endif
1031 } 896 }
1032 pattern_space = realloc(pattern_space, strlen(pattern_space) + strlen(next_line) + 2); 897
1033 strcat(pattern_space, "\n"); 898 /* Write pattern space to file. */
1034 strcat(pattern_space, next_line); 899 case 'w':
1035 next_line = bb_get_chomped_line_from_file(file); 900 sed_cmd->no_newline=puts_maybe_newline(pattern_space,sed_cmd->file, sed_cmd->no_newline,no_newline);
1036 linenum++; 901 break;
1037 break; 902
1038 case 't': 903 /* Read next line from input */
1039 if (substituted) 904 case 'n':
1040 /* Fall through */ 905 if (!be_quiet)
1041 case 'b': 906 sed_puts(pattern_space,no_newline);
907 if (next_line) {
908 free(pattern_space);
909 pattern_space = next_line;
910 no_newline=next_no_newline;
911 next_line = get_next_line(file,&next_no_newline);
912 linenum++;
913 break;
914 }
915 /* fall through */
916
917 /* Quit. End of script, end of input. */
918 case 'q':
919 /* Exit the outer while loop */
920 free(next_line);
921 next_line = NULL;
922 goto discard_commands;
923
924 /* Append the next line to the current line */
925 case 'N':
1042 { 926 {
1043 if (sed_cmd->label == NULL) { 927 /* If no next line, jump to end of script and exit. */
1044 /* Jump to end of script */ 928 if (next_line == NULL) {
1045 deleted = 1; 929 /* Jump to end of script and exit */
930 free(next_line);
931 next_line = NULL;
932 goto discard_line;
933 /* append next_line, read new next_line. */
1046 } else { 934 } else {
1047 sed_cmd = branch_to(sed_cmd->label); 935 int len=strlen(pattern_space);
936
937 pattern_space = realloc(pattern_space, len + strlen(next_line) + 2);
938 pattern_space[len]='\n';
939 strcpy(pattern_space+len+1, next_line);
940 no_newline=next_no_newline;
941 next_line = get_next_line(file,&next_no_newline);
942 linenum++;
1048 } 943 }
1049 /* Reset the substitution flag */ 944 break;
1050 substituted = 0;
1051 } 945 }
1052 break;
1053 case 'y':{
1054 int i;
1055 946
1056 for (i = 0; pattern_space[i] != 0; i++) { 947 /* Test if substition worked, branch if so. */
1057 int j; 948 case 't':
949 if (!substituted) break;
950 substituted=0;
951 /* Fall through */
952 /* Branch to label */
953 case 'b':
954 if (!sed_cmd->string) goto discard_commands;
955 else sed_cmd = branch_to(sed_cmd->string);
956 break;
957 /* Transliterate characters */
958 case 'y':
959 {
960 int i;
961
962 for (i = 0; pattern_space[i]; i++) {
963 int j;
1058 964
1059 for (j = 0; sed_cmd->translate[j]; j += 2) { 965 for (j = 0; sed_cmd->string[j]; j += 2) {
1060 if (pattern_space[i] == sed_cmd->translate[j]) { 966 if (pattern_space[i] == sed_cmd->string[j]) {
1061 pattern_space[i] = sed_cmd->translate[j + 1]; 967 pattern_space[i] = sed_cmd->string[j + 1];
968 }
1062 } 969 }
1063 } 970 }
1064 }
1065 }
1066 break;
1067 case 'g': /* Replace pattern space with hold space */
1068 free(pattern_space);
1069 if (hold_space) {
1070 pattern_space = strdup(hold_space);
1071 }
1072 break;
1073 case 'G': { /* Append newline and hold space to pattern space */
1074 int pattern_space_size = 2;
1075 int hold_space_size = 0;
1076 971
1077 if (pattern_space) { 972 break;
1078 pattern_space_size += strlen(pattern_space);
1079 }
1080 if (hold_space) {
1081 hold_space_size = strlen(hold_space);
1082 } 973 }
1083 pattern_space = xrealloc(pattern_space, pattern_space_size + hold_space_size); 974 case 'g': /* Replace pattern space with hold space */
1084 if (pattern_space_size == 2) { 975 free(pattern_space);
1085 strcpy(pattern_space, "\n"); 976 if (hold_space) {
1086 } else { 977 pattern_space = strdup(hold_space);
978 no_newline=0;
979 }
980 break;
981 case 'G': /* Append newline and hold space to pattern space */
982 {
983 int pattern_space_size = 2;
984 int hold_space_size = 0;
985
986 if (pattern_space)
987 pattern_space_size += strlen(pattern_space);
988 if (hold_space) hold_space_size = strlen(hold_space);
989 pattern_space = xrealloc(pattern_space, pattern_space_size + hold_space_size);
990 if (pattern_space_size == 2) pattern_space[0]=0;
1087 strcat(pattern_space, "\n"); 991 strcat(pattern_space, "\n");
992 if (hold_space) strcat(pattern_space, hold_space);
993 no_newline=0;
994
995 break;
1088 } 996 }
1089 if (hold_space) { 997 case 'h': /* Replace hold space with pattern space */
1090 strcat(pattern_space, hold_space); 998 free(hold_space);
1091 } 999 hold_space = strdup(pattern_space);
1092 break; 1000 break;
1093 } 1001 case 'H': /* Append newline and pattern space to hold space */
1094 case 'h': /* Replace hold space with pattern space */ 1002 {
1095 free(hold_space); 1003 int hold_space_size = 2;
1096 hold_space = strdup(pattern_space); 1004 int pattern_space_size = 0;
1097 break;
1098 case 'H': { /* Append newline and pattern space to hold space */
1099 int hold_space_size = 2;
1100 int pattern_space_size = 0;
1101
1102 if (hold_space) {
1103 hold_space_size += strlen(hold_space);
1104 }
1105 if (pattern_space) {
1106 pattern_space_size = strlen(pattern_space);
1107 }
1108 hold_space = xrealloc(hold_space, hold_space_size + pattern_space_size);
1109 1005
1110 if (hold_space_size == 2) { 1006 if (hold_space) hold_space_size += strlen(hold_space);
1111 strcpy(hold_space, "\n"); 1007 if (pattern_space)
1112 } else { 1008 pattern_space_size = strlen(pattern_space);
1009 hold_space = xrealloc(hold_space,
1010 hold_space_size + pattern_space_size);
1011
1012 if (hold_space_size == 2) hold_space[0]=0;
1113 strcat(hold_space, "\n"); 1013 strcat(hold_space, "\n");
1014 if (pattern_space) strcat(hold_space, pattern_space);
1015
1016 break;
1114 } 1017 }
1115 if (pattern_space) { 1018 case 'x': /* Exchange hold and pattern space */
1116 strcat(hold_space, pattern_space); 1019 {
1020 char *tmp = pattern_space;
1021 pattern_space = hold_space;
1022 no_newline=0;
1023 hold_space = tmp;
1024 break;
1117 } 1025 }
1118 break;
1119 }
1120 case 'x':{
1121 /* Swap hold and pattern space */
1122 char *tmp = pattern_space;
1123 pattern_space = hold_space;
1124 hold_space = tmp;
1125 break;
1126 }
1127 } 1026 }
1128 } 1027 }
1129
1130 /*
1131 * exit point from sedding...
1132 */
1133 if (matched) {
1134 if (
1135 /* this is a single-address command or... */
1136 (sed_cmd->end_line == 0 && sed_cmd->end_match == NULL)
1137 /* If only one address */
1138 /* we were in the middle of our address range (this
1139 * isn't the first time through) and.. */
1140 || ((sed_cmd->still_in_range == 1)
1141 /* this line number is the last address we're looking for or... */
1142 && ((sed_cmd->end_line > 0
1143 && (sed_cmd->end_line == linenum))
1144 /* this line matches our last address regex */
1145 || (sed_cmd->end_match
1146 && (regexec(sed_cmd->end_match, pattern_space,
1147 0, NULL, 0) == 0))))) {
1148 /* we're out of our address range */
1149 sed_cmd->still_in_range = 0;
1150 } else {
1151 /* didn't hit the exit? then we're still in the middle of an address range */
1152 sed_cmd->still_in_range = 1;
1153 }
1154 }
1155
1156 if (sed_cmd->cmd == '}') {
1157 block_mask = 1;
1158 }
1159
1160 if (deleted)
1161 break;
1162
1163 } 1028 }
1164 1029
1165 /* we will print the line unless we were told to be quiet or if the 1030 /*
1166 * line was altered (via a 'd'elete or 's'ubstitution), in which case 1031 * exit point from sedding...
1167 * the altered line was already printed */ 1032 */
1168 if ((!be_quiet && !altered && !substituted) || force_print) { 1033discard_commands:
1169 puts(pattern_space); 1034 /* we will print the line unless we were told to be quiet ('-n')
1170 } 1035 or if the line was suppressed (ala 'd'elete) */
1036 if (!be_quiet) sed_puts(pattern_space,no_newline);
1037
1038 /* Delete and such jump here. */
1039discard_line:
1040 flush_append();
1171 free(pattern_space); 1041 free(pattern_space);
1172 pattern_space = next_line; 1042 }
1173 } while (pattern_space);
1174} 1043}
1175 1044
1176extern int sed_main(int argc, char **argv) 1045extern int sed_main(int argc, char **argv)
@@ -1179,7 +1048,7 @@ extern int sed_main(int argc, char **argv)
1179 1048
1180#ifdef CONFIG_FEATURE_CLEAN_UP 1049#ifdef CONFIG_FEATURE_CLEAN_UP
1181 /* destroy command strings on exit */ 1050 /* destroy command strings on exit */
1182 if (atexit(destroy_cmd_strs) == -1) 1051 if (atexit(free_and_close_stuff) == -1)
1183 bb_perror_msg_and_die("atexit"); 1052 bb_perror_msg_and_die("atexit");
1184#endif 1053#endif
1185 1054
@@ -1189,17 +1058,46 @@ extern int sed_main(int argc, char **argv)
1189 case 'n': 1058 case 'n':
1190 be_quiet++; 1059 be_quiet++;
1191 break; 1060 break;
1192 case 'e':{ 1061 case 'e':
1193 add_cmd_str(optarg); 1062 {
1063 int go=1;
1064 char *temp=bb_xstrdup(optarg),*temp2=temp;
1065
1066 /* It is possible to have a command line argument with embedded
1067 newlines. This counts as a multi-line argument. */
1068
1069 while(go) {
1070 int len=strcspn(temp2,"\n");
1071 if(!temp2[len]) go=0;
1072 else temp2[len]=0;
1073 add_cmd(temp2);
1074 temp2+=len+1;
1075 }
1076 free(temp);
1194 break; 1077 break;
1195 } 1078 }
1196 case 'f': 1079 case 'f':
1197 load_cmd_file(optarg); 1080 {
1081 FILE *cmdfile;
1082 char *line;
1083
1084 cmdfile = bb_xfopen(optarg, "r");
1085
1086 while ((line = bb_get_chomped_line_from_file(cmdfile))
1087 != NULL) {
1088 add_cmd(line);
1089 free(line);
1090 }
1091 bb_xprint_and_close_file(cmdfile);
1092
1198 break; 1093 break;
1094 }
1199 default: 1095 default:
1200 bb_show_usage(); 1096 bb_show_usage();
1201 } 1097 }
1202 } 1098 }
1099 /* Flush any unfinished commands. */
1100 add_cmd("");
1203 1101
1204 /* if we didn't get a pattern from a -e and no command file was specified, 1102 /* if we didn't get a pattern from a -e and no command file was specified,
1205 * argv[optind] should be the pattern. no pattern, no worky */ 1103 * argv[optind] should be the pattern. no pattern, no worky */
@@ -1207,7 +1105,7 @@ extern int sed_main(int argc, char **argv)
1207 if (argv[optind] == NULL) 1105 if (argv[optind] == NULL)
1208 bb_show_usage(); 1106 bb_show_usage();
1209 else 1107 else
1210 add_cmd_str(argv[optind++]); 1108 add_cmd(argv[optind++]);
1211 } 1109 }
1212 1110
1213 /* argv[(optind)..(argc-1)] should be names of file to process. If no 1111 /* argv[(optind)..(argc-1)] should be names of file to process. If no
diff --git a/include/libbb.h b/include/libbb.h
index 2bb5ce02d..eb6841d33 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -28,6 +28,7 @@
28#include <stdarg.h> 28#include <stdarg.h>
29#include <sys/stat.h> 29#include <sys/stat.h>
30#include <sys/types.h> 30#include <sys/types.h>
31#include <regex.h>
31#include <termios.h> 32#include <termios.h>
32 33
33#include <netdb.h> 34#include <netdb.h>
@@ -468,5 +469,5 @@ extern void print_login_prompt(void);
468extern void vfork_daemon_rexec(int argc, char **argv, char *foreground_opt); 469extern void vfork_daemon_rexec(int argc, char **argv, char *foreground_opt);
469extern void get_terminal_width_height(int fd, int *width, int *height); 470extern void get_terminal_width_height(int fd, int *width, int *height);
470extern unsigned long get_ug_id(const char *s, long (*my_getxxnam)(const char *)); 471extern unsigned long get_ug_id(const char *s, long (*my_getxxnam)(const char *));
471 472extern void xregcomp(regex_t *preg, const char *regex, int cflags);
472#endif /* __LIBCONFIG_H__ */ 473#endif /* __LIBCONFIG_H__ */