diff options
author | Glenn L McGrath <bug1@ihug.co.nz> | 2003-10-01 03:06:16 +0000 |
---|---|---|
committer | Glenn L McGrath <bug1@ihug.co.nz> | 2003-10-01 03:06:16 +0000 |
commit | aa5a602689265a4351c890efe5d8e7793e777e3c (patch) | |
tree | e253699e7d4abbccc47e387e09ef1c2c6d4716c4 | |
parent | e6ba16f830bf2524b39e3e1b7c8157808921f95d (diff) | |
download | busybox-w32-aa5a602689265a4351c890efe5d8e7793e777e3c.tar.gz busybox-w32-aa5a602689265a4351c890efe5d8e7793e777e3c.tar.bz2 busybox-w32-aa5a602689265a4351c890efe5d8e7793e777e3c.zip |
Patch by Rob Landley, work in progress update, fixes lots of bugs,
introduces a few others (but they are being worked on)
-rw-r--r-- | editors/Config.in | 22 | ||||
-rw-r--r-- | editors/sed.c | 1458 | ||||
-rw-r--r-- | include/libbb.h | 3 |
3 files changed, 680 insertions, 803 deletions
diff --git a/editors/Config.in b/editors/Config.in index bced12cb1..b491c2416 100644 --- a/editors/Config.in +++ b/editors/Config.in | |||
@@ -33,28 +33,6 @@ config CONFIG_SED | |||
33 | sed is used to perform text transformations on a file | 33 | sed is used to perform text transformations on a file |
34 | or input from a pipeline. | 34 | or input from a pipeline. |
35 | 35 | ||
36 | config CONFIG_FEATURE_SED_EMBEDED_NEWLINE | ||
37 | bool " Embeded newline (EXPERIMENTAL)" | ||
38 | default n | ||
39 | depends on CONFIG_SED | ||
40 | help | ||
41 | This is a hack to allow matching of '\n' in regular expressions. | ||
42 | It works by translating '\n' to "\n" and back. | ||
43 | It may introduce unexpected results if you use "\n" in your text. | ||
44 | |||
45 | config CONFIG_FEATURE_SED_GNU_COMPATABILITY | ||
46 | bool " Behave consistent with GNU sed" | ||
47 | default y | ||
48 | depends on CONFIG_SED | ||
49 | help | ||
50 | Where GNU sed doesnt follow the posix standard, do as GNU sed does. | ||
51 | Current difference are in | ||
52 | - N command with odd number of lines (see GNU sed info page) | ||
53 | - Blanks before substitution flags eg. | ||
54 | GNU sed interprets 's/a/b/ g' as 's/a/b/g' | ||
55 | Standard says 's/a/b/ g' should be 's/a/b/;g' | ||
56 | - GNU sed allows blanks between a '!' and the function. | ||
57 | |||
58 | config CONFIG_VI | 36 | config CONFIG_VI |
59 | bool "vi" | 37 | bool "vi" |
60 | default n | 38 | default n |
diff --git a/editors/sed.c b/editors/sed.c index 1c016ac57..6452a321c 100644 --- a/editors/sed.c +++ b/editors/sed.c | |||
@@ -1,3 +1,4 @@ | |||
1 | /* vi: set sw=4 ts=4: */ | ||
1 | /* | 2 | /* |
2 | * sed.c - very minimalist version of sed | 3 | * sed.c - very minimalist version of sed |
3 | * | 4 | * |
@@ -22,6 +23,24 @@ | |||
22 | * | 23 | * |
23 | */ | 24 | */ |
24 | 25 | ||
26 | /* Code overview. | ||
27 | |||
28 | Files are laid out to avoid unnecessary function declarations. So for | ||
29 | example, every function add_cmd calls occurs before add_cmd in this file. | ||
30 | |||
31 | add_cmd() is called on each line of sed command text (from a file or from | ||
32 | the command line). It calls get_address() and parse_cmd_args(). The | ||
33 | resulting sed_cmd_t structures are appended to a linked list | ||
34 | (sed_cmd_head/sed_cmd_tail). | ||
35 | |||
36 | process_file() does actual sedding, reading data lines from an input FILE * | ||
37 | (which could be stdin) and applying the sed command list (sed_cmd_head) to | ||
38 | each of the resulting lines. | ||
39 | |||
40 | sed_main() is where external code calls into this, with a command line. | ||
41 | */ | ||
42 | |||
43 | |||
25 | /* | 44 | /* |
26 | Supported features and commands in this version of sed: | 45 | Supported features and commands in this version of sed: |
27 | 46 | ||
@@ -64,84 +83,72 @@ | |||
64 | #include "busybox.h" | 83 | #include "busybox.h" |
65 | 84 | ||
66 | typedef struct sed_cmd_s { | 85 | typedef struct sed_cmd_s { |
67 | /* Order by alignment requirements */ | 86 | /* Ordered by alignment requirements: currently 36 bytes on x86 */ |
68 | |||
69 | /* address storage */ | ||
70 | regex_t *beg_match; /* sed -e '/match/cmd' */ | ||
71 | regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */ | ||
72 | |||
73 | int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */ | ||
74 | int end_line; /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */ | ||
75 | 87 | ||
76 | /* inversion flag */ | 88 | /* address storage */ |
77 | int invert; /* the '!' after the address */ | 89 | regex_t *beg_match; /* sed -e '/match/cmd' */ |
90 | regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */ | ||
91 | regex_t *sub_match; /* For 's/sub_match/string/' */ | ||
92 | int beg_line; /* 'sed 1p' 0 == apply commands to all lines */ | ||
93 | int end_line; /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */ | ||
78 | 94 | ||
79 | /* Runtime flag no not if the current command match's */ | 95 | FILE *file; /* File (sr) command writes to, -1 for none. */ |
80 | int still_in_range; | 96 | char *string; /* Data string for (saicytb) commands. */ |
81 | 97 | ||
82 | /* SUBSTITUTION COMMAND SPECIFIC FIELDS */ | 98 | unsigned short which_match; /* (s) Which match to replace (0 for all) */ |
83 | 99 | ||
84 | /* sed -e 's/sub_match/replace/' */ | 100 | /* Bitfields (gcc won't group them if we don't) */ |
85 | regex_t *sub_match; | 101 | unsigned int invert:1; /* the '!' after the address */ |
86 | char *replace; | 102 | unsigned int in_match:1; /* Next line also included in match? */ |
103 | unsigned int no_newline:1; /* Last line written by (sr) had no '\n' */ | ||
104 | unsigned int sub_p:1; /* (s) print option */ | ||
87 | 105 | ||
88 | /* EDIT COMMAND (a,i,c) SPECIFIC FIELDS */ | ||
89 | char *editline; | ||
90 | |||
91 | /* FILE COMMAND (r) SPECIFIC FIELDS */ | ||
92 | char *filename; | ||
93 | |||
94 | /* SUBSTITUTION COMMAND SPECIFIC FIELDS */ | ||
95 | |||
96 | unsigned int num_backrefs:4; /* how many back references (\1..\9) */ | ||
97 | /* Note: GNU/POSIX sed does not save more than nine backrefs, so | ||
98 | * we only use 4 bits to hold the number */ | ||
99 | unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */ | ||
100 | unsigned int sub_p:1; /* sed -e 's/foo/bar/p' (print substitution) */ | ||
101 | |||
102 | /* TRANSLATE COMMAND */ | ||
103 | char *translate; | ||
104 | |||
105 | /* GENERAL FIELDS */ | ||
106 | /* the command */ | ||
107 | char cmd; /* p,d,s (add more at your leisure :-) */ | ||
108 | |||
109 | /* Branch commands */ | ||
110 | char *label; | ||
111 | |||
112 | /* next command in list (sequential list of specified commands) */ | ||
113 | struct sed_cmd_s *next; | ||
114 | 106 | ||
107 | /* GENERAL FIELDS */ | ||
108 | char cmd; /* The command char: abcdDgGhHilnNpPqrstwxy:={} */ | ||
109 | struct sed_cmd_s *next; /* Next command (linked list, NULL terminated) */ | ||
115 | } sed_cmd_t; | 110 | } sed_cmd_t; |
116 | 111 | ||
117 | |||
118 | /* externs */ | ||
119 | extern void xregcomp(regex_t * preg, const char *regex, int cflags); | ||
120 | extern int optind; /* in unistd.h */ | ||
121 | extern char *optarg; /* ditto */ | ||
122 | |||
123 | /* globals */ | 112 | /* globals */ |
124 | /* options */ | 113 | /* options */ |
125 | static int be_quiet = 0; | 114 | static int be_quiet = 0; |
115 | |||
126 | static const char bad_format_in_subst[] = | 116 | static const char bad_format_in_subst[] = |
127 | "bad format in substitution expression"; | 117 | "bad format in substitution expression"; |
118 | const char *const semicolon_whitespace = "; \n\r\t\v"; | ||
119 | |||
120 | regmatch_t regmatch[10]; | ||
121 | static regex_t *previous_regex_ptr = NULL; | ||
128 | 122 | ||
129 | /* linked list of sed commands */ | 123 | /* linked list of sed commands */ |
130 | static sed_cmd_t sed_cmd_head; | 124 | static sed_cmd_t sed_cmd_head; |
131 | static sed_cmd_t *sed_cmd_tail = &sed_cmd_head; | 125 | static sed_cmd_t *sed_cmd_tail = &sed_cmd_head; |
132 | 126 | ||
133 | const char *const semicolon_whitespace = "; \n\r\t\v\0"; | 127 | /* Linked list of append lines */ |
134 | static regex_t *previous_regex_ptr = NULL; | 128 | struct append_list { |
135 | 129 | char *string; | |
130 | struct append_list *next; | ||
131 | }; | ||
132 | struct append_list *append_head=NULL, *append_tail=NULL; | ||
136 | 133 | ||
137 | #ifdef CONFIG_FEATURE_CLEAN_UP | 134 | #ifdef CONFIG_FEATURE_CLEAN_UP |
138 | static void destroy_cmd_strs(void) | 135 | static void free_and_close_stuff(void) |
139 | { | 136 | { |
140 | sed_cmd_t *sed_cmd = sed_cmd_head.next; | 137 | sed_cmd_t *sed_cmd = sed_cmd_head.next; |
141 | 138 | ||
139 | while(append_head) { | ||
140 | append_tail=append_head->next; | ||
141 | free(append_head->string); | ||
142 | free(append_head); | ||
143 | append_head=append_tail; | ||
144 | } | ||
145 | |||
142 | while (sed_cmd) { | 146 | while (sed_cmd) { |
143 | sed_cmd_t *sed_cmd_next = sed_cmd->next; | 147 | sed_cmd_t *sed_cmd_next = sed_cmd->next; |
144 | 148 | ||
149 | if(sed_cmd->file) | ||
150 | bb_xprint_and_close_file(sed_cmd->file); | ||
151 | |||
145 | if (sed_cmd->beg_match) { | 152 | if (sed_cmd->beg_match) { |
146 | regfree(sed_cmd->beg_match); | 153 | regfree(sed_cmd->beg_match); |
147 | free(sed_cmd->beg_match); | 154 | free(sed_cmd->beg_match); |
@@ -154,17 +161,41 @@ static void destroy_cmd_strs(void) | |||
154 | regfree(sed_cmd->sub_match); | 161 | regfree(sed_cmd->sub_match); |
155 | free(sed_cmd->sub_match); | 162 | free(sed_cmd->sub_match); |
156 | } | 163 | } |
157 | free(sed_cmd->replace); | 164 | free(sed_cmd->string); |
158 | free(sed_cmd->editline); | ||
159 | free(sed_cmd->filename); | ||
160 | free(sed_cmd->translate); | ||
161 | free(sed_cmd->label); | ||
162 | free(sed_cmd); | 165 | free(sed_cmd); |
163 | sed_cmd = sed_cmd_next; | 166 | sed_cmd = sed_cmd_next; |
164 | } | 167 | } |
165 | } | 168 | } |
166 | #endif | 169 | #endif |
167 | 170 | ||
171 | /* strdup, replacing "\n" with '\n', and "\delimiter" with 'delimiter' */ | ||
172 | |||
173 | static void parse_escapes(char *dest, const char *string, int len, char from, char to) | ||
174 | { | ||
175 | int i=0; | ||
176 | |||
177 | while(i<len) { | ||
178 | if(string[i] == '\\') { | ||
179 | if(string[i+1] == from) { | ||
180 | *(dest++) = to; | ||
181 | i+=2; | ||
182 | continue; | ||
183 | } else *(dest++)=string[i++]; | ||
184 | } | ||
185 | *(dest++) = string[i++]; | ||
186 | } | ||
187 | *dest=0; | ||
188 | } | ||
189 | |||
190 | static char *copy_parsing_slashn(const char *string, int len) | ||
191 | { | ||
192 | char *dest=xmalloc(len+1); | ||
193 | |||
194 | parse_escapes(dest,string,len,'n','\n'); | ||
195 | return dest; | ||
196 | } | ||
197 | |||
198 | |||
168 | /* | 199 | /* |
169 | * index_of_next_unescaped_regexp_delim - walks left to right through a string | 200 | * index_of_next_unescaped_regexp_delim - walks left to right through a string |
170 | * beginning at a specified index and returns the index of the next regular | 201 | * beginning at a specified index and returns the index of the next regular |
@@ -182,7 +213,7 @@ static int index_of_next_unescaped_regexp_delim(const char delimiter, | |||
182 | for (; (ch = str[idx]); idx++) { | 213 | for (; (ch = str[idx]); idx++) { |
183 | if (bracket != -1) { | 214 | if (bracket != -1) { |
184 | if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2 | 215 | if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2 |
185 | && str[idx - 1] == '^'))) | 216 | && str[idx - 1] == '^'))) |
186 | bracket = -1; | 217 | bracket = -1; |
187 | } else if (escaped) | 218 | } else if (escaped) |
188 | escaped = 0; | 219 | escaped = 0; |
@@ -209,19 +240,15 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace) | |||
209 | 240 | ||
210 | /* verify that the 's' or 'y' is followed by something. That something | 241 | /* verify that the 's' or 'y' is followed by something. That something |
211 | * (typically a 'slash') is now our regexp delimiter... */ | 242 | * (typically a 'slash') is now our regexp delimiter... */ |
212 | if (*cmdstr == '\0') | 243 | if (*cmdstr == '\0') bb_error_msg_and_die(bad_format_in_subst); |
213 | bb_error_msg_and_die(bad_format_in_subst); | 244 | delimiter = *(cmdstr_ptr++); |
214 | else | ||
215 | delimiter = *cmdstr_ptr; | ||
216 | |||
217 | cmdstr_ptr++; | ||
218 | 245 | ||
219 | /* save the match string */ | 246 | /* save the match string */ |
220 | idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr); | 247 | idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr); |
221 | if (idx == -1) { | 248 | if (idx == -1) { |
222 | bb_error_msg_and_die(bad_format_in_subst); | 249 | bb_error_msg_and_die(bad_format_in_subst); |
223 | } | 250 | } |
224 | *match = bb_xstrndup(cmdstr_ptr, idx); | 251 | *match = copy_parsing_slashn(cmdstr_ptr, idx); |
225 | 252 | ||
226 | /* save the replacement string */ | 253 | /* save the replacement string */ |
227 | cmdstr_ptr += idx + 1; | 254 | cmdstr_ptr += idx + 1; |
@@ -229,7 +256,7 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace) | |||
229 | if (idx == -1) { | 256 | if (idx == -1) { |
230 | bb_error_msg_and_die(bad_format_in_subst); | 257 | bb_error_msg_and_die(bad_format_in_subst); |
231 | } | 258 | } |
232 | *replace = bb_xstrndup(cmdstr_ptr, idx); | 259 | *replace = copy_parsing_slashn(cmdstr_ptr, idx); |
233 | 260 | ||
234 | return ((cmdstr_ptr - cmdstr) + idx); | 261 | return ((cmdstr_ptr - cmdstr) + idx); |
235 | } | 262 | } |
@@ -248,94 +275,109 @@ static int get_address(char *my_str, int *linenum, regex_t ** regex) | |||
248 | *linenum = -1; | 275 | *linenum = -1; |
249 | pos++; | 276 | pos++; |
250 | } else if (*my_str == '/' || *my_str == '\\') { | 277 | } else if (*my_str == '/' || *my_str == '\\') { |
251 | int next, idx_start = 1; | 278 | int next; |
252 | char delimiter; | 279 | char delimiter; |
280 | char *temp; | ||
253 | 281 | ||
254 | delimiter = '/'; | 282 | if (*my_str == '\\') delimiter = *(++pos); |
255 | if (*my_str == '\\') { | 283 | else delimiter = '/'; |
256 | idx_start++; | ||
257 | delimiter = *(++pos); | ||
258 | } | ||
259 | next = index_of_next_unescaped_regexp_delim(delimiter, ++pos); | 284 | next = index_of_next_unescaped_regexp_delim(delimiter, ++pos); |
260 | if (next == -1) { | 285 | if (next == -1) |
261 | bb_error_msg_and_die("unterminated match expression"); | 286 | bb_error_msg_and_die("unterminated match expression"); |
262 | } | 287 | |
263 | pos += next; | 288 | temp=copy_parsing_slashn(pos,next); |
264 | *pos = '\0'; | ||
265 | |||
266 | *regex = (regex_t *) xmalloc(sizeof(regex_t)); | 289 | *regex = (regex_t *) xmalloc(sizeof(regex_t)); |
267 | xregcomp(*regex, my_str + idx_start, REG_NEWLINE); | 290 | xregcomp(*regex, temp, REG_NEWLINE); |
268 | pos++; /* so it points to the next character after the last '/' */ | 291 | free(temp); |
292 | /* Move position to next character after last delimiter */ | ||
293 | pos+=(next+1); | ||
269 | } | 294 | } |
270 | return pos - my_str; | 295 | return pos - my_str; |
271 | } | 296 | } |
272 | 297 | ||
298 | /* Grab a filename. Whitespace at start is skipped, then goes to EOL. */ | ||
299 | static int parse_file_cmd(sed_cmd_t * sed_cmd, const char *filecmdstr, char **retval) | ||
300 | { | ||
301 | int start = 0, idx, hack=0; | ||
302 | |||
303 | /* Skip whitespace, then grab filename to end of line */ | ||
304 | while (isspace(filecmdstr[start])) start++; | ||
305 | idx=start; | ||
306 | while(filecmdstr[idx] && filecmdstr[idx]!='\n') idx++; | ||
307 | /* If lines glued together, put backslash back. */ | ||
308 | if(filecmdstr[idx]=='\n') hack=1; | ||
309 | if(idx==start) bb_error_msg_and_die("Empty filename"); | ||
310 | *retval = bb_xstrndup(filecmdstr+start, idx-start+hack+1); | ||
311 | if(hack) *(idx+*retval)='\\'; | ||
312 | |||
313 | return idx; | ||
314 | } | ||
315 | |||
273 | static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr) | 316 | static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr) |
274 | { | 317 | { |
275 | int cflags = 0; | 318 | int cflags = 0; |
276 | char *match; | 319 | char *match; |
277 | int idx = 0; | 320 | int idx = 0; |
278 | int j; | ||
279 | 321 | ||
280 | /* | 322 | /* |
281 | * the string that gets passed to this function should look like this: | 323 | * A substitution command should look something like this: |
282 | * s/match/replace/gIp | 324 | * s/match/replace/ #gIpw |
283 | * || | ||| | 325 | * || | ||| |
284 | * mandatory optional | 326 | * mandatory optional |
285 | * | ||
286 | * (all three of the '/' slashes are mandatory) | ||
287 | */ | 327 | */ |
288 | idx = parse_regex_delim(substr, &match, &sed_cmd->replace); | 328 | idx = parse_regex_delim(substr, &match, &sed_cmd->string); |
289 | 329 | ||
290 | /* determine the number of back references in the match string */ | 330 | /* determine the number of back references in the match string */ |
291 | /* Note: we compute this here rather than in the do_subst_command() | 331 | /* Note: we compute this here rather than in the do_subst_command() |
292 | * function to save processor time, at the expense of a little more memory | 332 | * function to save processor time, at the expense of a little more memory |
293 | * (4 bits) per sed_cmd */ | 333 | * (4 bits) per sed_cmd */ |
294 | 334 | ||
295 | for (j = 0; match[j]; j++) { | ||
296 | /* GNU/POSIX sed does not save more than nine backrefs */ | ||
297 | if (match[j] == '\\' && match[j + 1] == '(' | ||
298 | && sed_cmd->num_backrefs <= 9) | ||
299 | sed_cmd->num_backrefs++; | ||
300 | } | ||
301 | |||
302 | /* process the flags */ | 335 | /* process the flags */ |
303 | #ifndef CONFIG_FEATURE_SED_GNU_COMPATABILITY | 336 | |
304 | idx++; | 337 | sed_cmd->which_match=1; |
305 | #else | 338 | while (substr[++idx]) { |
306 | /* GNU sed allows blanks before the flag, this can lead to an incosistent | 339 | /* Parse match number */ |
307 | * interpretation of 's/a/b/ g' as being either 's/a/b/g' or 's/a/b/;g'. | 340 | if(isdigit(substr[idx])) { |
308 | * which results in very different behaviour. | 341 | if(match[0]!='^') { |
309 | */ | 342 | /* Match 0 treated as all, multiple matches we take the last one. */ |
310 | while (substr[++idx]) | 343 | char *pos=substr+idx; |
311 | #endif | 344 | sed_cmd->which_match=(unsigned short)strtol(substr+idx,&pos,10); |
345 | idx=pos-substr; | ||
346 | } | ||
347 | continue; | ||
348 | } | ||
312 | switch (substr[idx]) { | 349 | switch (substr[idx]) { |
313 | case 'g': | 350 | /* Replace all occurrences */ |
314 | if (match[0] != '^') { | 351 | case 'g': |
315 | sed_cmd->sub_g = 1; | 352 | if (match[0] != '^') sed_cmd->which_match = 0; |
353 | break; | ||
354 | /* Print pattern space */ | ||
355 | case 'p': | ||
356 | sed_cmd->sub_p = 1; | ||
357 | break; | ||
358 | case 'w': | ||
359 | { | ||
360 | char *temp; | ||
361 | idx+=parse_file_cmd(sed_cmd,substr+idx,&temp); | ||
362 | |||
363 | break; | ||
316 | } | 364 | } |
317 | break; | 365 | /* Ignore case (gnu exension) */ |
318 | /* Hmm, i dont see the I option mentioned in the standard */ | 366 | case 'I': |
319 | case 'I': | 367 | cflags |= REG_ICASE; |
320 | cflags |= REG_ICASE; | 368 | break; |
321 | break; | 369 | /* Skip spaces */ |
322 | case 'p': | 370 | case ' ': |
323 | sed_cmd->sub_p = 1; | 371 | case '\t': |
324 | break; | 372 | break; |
325 | #ifdef CONFIG_FEATURE_SED_GNU_COMPATABILITY | 373 | case ';': |
326 | default: | 374 | case '}': |
327 | /* any whitespace or semicolon trailing after a s/// is ok */ | ||
328 | if (strchr(semicolon_whitespace, substr[idx])) | ||
329 | goto out; | 375 | goto out; |
330 | bb_error_msg_and_die("bad option in substitution expression"); | 376 | default: |
331 | #endif | 377 | bb_error_msg_and_die("bad option in substitution expression"); |
332 | } | 378 | } |
333 | 379 | } | |
334 | #ifndef CONFIG_FEATURE_SED_GNU_COMPATABILITY | ||
335 | idx++; | ||
336 | #else | ||
337 | out: | 380 | out: |
338 | #endif | ||
339 | /* compile the match string into a regex */ | 381 | /* compile the match string into a regex */ |
340 | if (*match != '\0') { | 382 | if (*match != '\0') { |
341 | /* If match is empty, we use last regex used at runtime */ | 383 | /* If match is empty, we use last regex used at runtime */ |
@@ -347,166 +389,61 @@ out: | |||
347 | return idx; | 389 | return idx; |
348 | } | 390 | } |
349 | 391 | ||
350 | static void replace_slash_n(char *string) | ||
351 | { | ||
352 | char *dest; | ||
353 | |||
354 | for (dest = string; *string; string++, dest++) { | ||
355 | if ((string[0] == '\\') && (string[1] == 'n')) { | ||
356 | *dest = '\n'; | ||
357 | string++; | ||
358 | } else { | ||
359 | *dest = *string; | ||
360 | } | ||
361 | } | ||
362 | *dest=0; | ||
363 | } | ||
364 | |||
365 | static int parse_translate_cmd(sed_cmd_t * const sed_cmd, const char *cmdstr) | ||
366 | { | ||
367 | char *match; | ||
368 | char *replace; | ||
369 | int idx; | ||
370 | int i; | ||
371 | |||
372 | idx = parse_regex_delim(cmdstr, &match, &replace); | ||
373 | replace_slash_n(match); | ||
374 | replace_slash_n(replace); | ||
375 | sed_cmd->translate = xcalloc(1, (strlen(match) + 1) * 2); | ||
376 | for (i = 0; (match[i] != 0) && (replace[i] != 0); i++) { | ||
377 | sed_cmd->translate[i * 2] = match[i]; | ||
378 | sed_cmd->translate[(i * 2) + 1] = replace[i]; | ||
379 | } | ||
380 | return (idx + 1); | ||
381 | } | ||
382 | |||
383 | static int parse_edit_cmd(sed_cmd_t * sed_cmd, const char *editstr) | ||
384 | { | ||
385 | int i, j; | ||
386 | |||
387 | /* | ||
388 | * the string that gets passed to this function should look like this: | ||
389 | * | ||
390 | * need one of these | ||
391 | * | | ||
392 | * | this backslash (immediately following the edit command) is mandatory | ||
393 | * | | | ||
394 | * [aic]\ | ||
395 | * TEXT1\ | ||
396 | * TEXT2\ | ||
397 | * TEXTN | ||
398 | * | ||
399 | * as soon as we hit a TEXT line that has no trailing '\', we're done. | ||
400 | * this means a command like: | ||
401 | * | ||
402 | * i\ | ||
403 | * INSERTME | ||
404 | * | ||
405 | * is a-ok. | ||
406 | * | ||
407 | */ | ||
408 | if ((*editstr != '\\') || ((editstr[1] != '\n') && (editstr[1] != '\r'))) { | ||
409 | bb_error_msg_and_die("bad format in edit expression"); | ||
410 | } | ||
411 | |||
412 | /* store the edit line text */ | ||
413 | sed_cmd->editline = xmalloc(strlen(&editstr[2]) + 2); | ||
414 | for (i = 2, j = 0; | ||
415 | editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL; i++, j++) { | ||
416 | if ((editstr[i] == '\\') && strchr("\n\r", editstr[i + 1]) != NULL) { | ||
417 | sed_cmd->editline[j] = '\n'; | ||
418 | i++; | ||
419 | } else | ||
420 | sed_cmd->editline[j] = editstr[i]; | ||
421 | } | ||
422 | |||
423 | /* figure out if we need to add a newline */ | ||
424 | if (sed_cmd->editline[j - 1] != '\n') | ||
425 | sed_cmd->editline[j++] = '\n'; | ||
426 | |||
427 | /* terminate string */ | ||
428 | sed_cmd->editline[j] = '\0'; | ||
429 | |||
430 | return i; | ||
431 | } | ||
432 | |||
433 | |||
434 | static int parse_file_cmd(sed_cmd_t * sed_cmd, const char *filecmdstr) | ||
435 | { | ||
436 | int idx = 0; | ||
437 | int filenamelen = 0; | ||
438 | |||
439 | /* | ||
440 | * the string that gets passed to this function should look like this: | ||
441 | * '[ ]filename' | ||
442 | * | | | ||
443 | * | a filename | ||
444 | * | | ||
445 | * optional whitespace | ||
446 | |||
447 | * re: the file to be read, the GNU manual says the following: "Note that | ||
448 | * if filename cannot be read, it is treated as if it were an empty file, | ||
449 | * without any error indication." Thus, all of the following commands are | ||
450 | * perfectly legal: | ||
451 | * | ||
452 | * sed -e '1r noexist' | ||
453 | * sed -e '1r ;' | ||
454 | * sed -e '1r' | ||
455 | */ | ||
456 | |||
457 | /* the file command may be followed by whitespace; move past it. */ | ||
458 | while (isspace(filecmdstr[++idx])) {; | ||
459 | } | ||
460 | |||
461 | /* the first non-whitespace we get is a filename. the filename ends when we | ||
462 | * hit a normal sed command terminator or end of string */ | ||
463 | filenamelen = strcspn(&filecmdstr[idx], semicolon_whitespace); | ||
464 | sed_cmd->filename = xmalloc(filenamelen + 1); | ||
465 | safe_strncpy(sed_cmd->filename, &filecmdstr[idx], filenamelen + 1); | ||
466 | return idx + filenamelen; | ||
467 | } | ||
468 | |||
469 | /* | 392 | /* |
470 | * Process the commands arguments | 393 | * Process the commands arguments |
471 | */ | 394 | */ |
472 | static char *parse_cmd_str(sed_cmd_t * sed_cmd, char *cmdstr) | 395 | static char *parse_cmd_args(sed_cmd_t *sed_cmd, char *cmdstr) |
473 | { | 396 | { |
474 | /* handle (s)ubstitution command */ | 397 | /* handle (s)ubstitution command */ |
475 | if (sed_cmd->cmd == 's') { | 398 | if (sed_cmd->cmd == 's') cmdstr += parse_subst_cmd(sed_cmd, cmdstr); |
476 | cmdstr += parse_subst_cmd(sed_cmd, cmdstr); | ||
477 | } | ||
478 | /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ | 399 | /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ |
479 | else if (strchr("aic", sed_cmd->cmd)) { | 400 | else if (strchr("aic", sed_cmd->cmd)) { |
480 | if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') | 401 | if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') |
481 | bb_error_msg_and_die | 402 | bb_error_msg_and_die |
482 | ("only a beginning address can be specified for edit commands"); | 403 | ("only a beginning address can be specified for edit commands"); |
483 | cmdstr += parse_edit_cmd(sed_cmd, cmdstr); | 404 | while(isspace(*cmdstr)) cmdstr++; |
484 | } | 405 | sed_cmd->string = bb_xstrdup(cmdstr); |
406 | cmdstr += strlen(cmdstr); | ||
485 | /* handle file cmds: (r)ead */ | 407 | /* handle file cmds: (r)ead */ |
486 | else if (sed_cmd->cmd == 'r') { | 408 | } else if(strchr("rw", sed_cmd->cmd)) { |
487 | if (sed_cmd->end_line || sed_cmd->end_match) | 409 | if (sed_cmd->end_line || sed_cmd->end_match) |
488 | bb_error_msg_and_die("Command only uses one address"); | 410 | bb_error_msg_and_die("Command only uses one address"); |
489 | cmdstr += parse_file_cmd(sed_cmd, cmdstr); | 411 | cmdstr += parse_file_cmd(sed_cmd, cmdstr, &sed_cmd->string); |
490 | } | 412 | if(sed_cmd->cmd=='w') |
413 | sed_cmd->file=bb_xfopen(sed_cmd->string,"w"); | ||
491 | /* handle branch commands */ | 414 | /* handle branch commands */ |
492 | else if (strchr(":bt", sed_cmd->cmd)) { | 415 | } else if (strchr(":bt", sed_cmd->cmd)) { |
493 | int length; | 416 | int length; |
494 | 417 | ||
495 | cmdstr += strspn(cmdstr, " "); | 418 | while(isspace(*cmdstr)) cmdstr++; |
496 | length = strcspn(cmdstr, semicolon_whitespace); | 419 | length = strcspn(cmdstr, semicolon_whitespace); |
497 | if (length) { | 420 | if (length) { |
498 | sed_cmd->label = strndup(cmdstr, length); | 421 | sed_cmd->string = strndup(cmdstr, length); |
499 | cmdstr += length; | 422 | cmdstr += length; |
500 | } | 423 | } |
501 | } | 424 | } |
502 | /* translation command */ | 425 | /* translation command */ |
503 | else if (sed_cmd->cmd == 'y') { | 426 | else if (sed_cmd->cmd == 'y') { |
504 | cmdstr += parse_translate_cmd(sed_cmd, cmdstr); | 427 | char *match, *replace; |
428 | int i=cmdstr[0]; | ||
429 | |||
430 | cmdstr+=parse_regex_delim(cmdstr, &match, &replace)+1; | ||
431 | /* \n already parsed, but \delimiter needs unescaping. */ | ||
432 | parse_escapes(match,match,strlen(match),i,i); | ||
433 | parse_escapes(replace,replace,strlen(replace),i,i); | ||
434 | |||
435 | sed_cmd->string = xcalloc(1, (strlen(match) + 1) * 2); | ||
436 | for (i = 0; match[i] && replace[i]; i++) { | ||
437 | sed_cmd->string[i * 2] = match[i]; | ||
438 | sed_cmd->string[(i * 2) + 1] = replace[i]; | ||
439 | } | ||
440 | free(match); | ||
441 | free(replace); | ||
505 | } | 442 | } |
506 | /* if it wasnt a single-letter command that takes no arguments | 443 | /* if it wasnt a single-letter command that takes no arguments |
507 | * then it must be an invalid command. | 444 | * then it must be an invalid command. |
508 | */ | 445 | */ |
509 | else if (strchr("dgGhHnNpPqx={}", sed_cmd->cmd) == 0) { | 446 | else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) { |
510 | bb_error_msg_and_die("Unsupported command %c", sed_cmd->cmd); | 447 | bb_error_msg_and_die("Unsupported command %c", sed_cmd->cmd); |
511 | } | 448 | } |
512 | 449 | ||
@@ -514,663 +451,595 @@ static char *parse_cmd_str(sed_cmd_t * sed_cmd, char *cmdstr) | |||
514 | return (cmdstr); | 451 | return (cmdstr); |
515 | } | 452 | } |
516 | 453 | ||
517 | static char *add_cmd(char *cmdstr) | ||
518 | { | ||
519 | sed_cmd_t *sed_cmd; | ||
520 | 454 | ||
521 | /* Skip over leading whitespace and semicolons */ | 455 | /* Parse address+command sets, skipping comment lines. */ |
522 | cmdstr += strspn(cmdstr, semicolon_whitespace); | ||
523 | 456 | ||
524 | /* if we ate the whole thing, that means there was just trailing | 457 | void add_cmd(char *cmdstr) |
525 | * whitespace or a final / no-op semicolon. either way, get out */ | 458 | { |
526 | if (*cmdstr == '\0') { | 459 | static char *add_cmd_line=NULL; |
527 | return (NULL); | 460 | sed_cmd_t *sed_cmd; |
528 | } | ||
529 | 461 | ||
530 | /* if this is a comment, jump past it and keep going */ | 462 | /* Append this line to any unfinished line from last time. */ |
531 | if (*cmdstr == '#') { | 463 | if(add_cmd_line) { |
532 | /* "#n" is the same as using -n on the command line */ | 464 | int lastlen=strlen(add_cmd_line); |
533 | if (cmdstr[1] == 'n') { | 465 | char *temp=xmalloc(lastlen+strlen(cmdstr)+2); |
534 | be_quiet++; | 466 | |
535 | } | 467 | memcpy(temp,add_cmd_line,lastlen); |
536 | return (strpbrk(cmdstr, "\n\r")); | 468 | temp[lastlen]='\n'; |
469 | strcpy(temp+lastlen+1,cmdstr); | ||
470 | free(add_cmd_line); | ||
471 | cmdstr=add_cmd_line=temp; | ||
472 | } else add_cmd_line=NULL; | ||
473 | |||
474 | /* If this line ends with backslash, request next line. */ | ||
475 | int temp=strlen(cmdstr); | ||
476 | if(temp && cmdstr[temp-1]=='\\') { | ||
477 | if(!add_cmd_line) add_cmd_line=strdup(cmdstr); | ||
478 | add_cmd_line[temp-1]=0; | ||
479 | return; | ||
537 | } | 480 | } |
538 | 481 | ||
539 | /* parse the command | 482 | /* Loop parsing all commands in this line. */ |
540 | * format is: [addr][,addr]cmd | 483 | while(*cmdstr) { |
541 | * |----||-----||-| | 484 | /* Skip leading whitespace and semicolons */ |
542 | * part1 part2 part3 | 485 | cmdstr += strspn(cmdstr, semicolon_whitespace); |
543 | */ | ||
544 | 486 | ||
545 | sed_cmd = xcalloc(1, sizeof(sed_cmd_t)); | 487 | /* If no more commands, exit. */ |
488 | if(!*cmdstr) break; | ||
546 | 489 | ||
547 | /* first part (if present) is an address: either a '$', a number or a /regex/ */ | 490 | /* if this is a comment, jump past it and keep going */ |
548 | cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); | 491 | if (*cmdstr == '#') { |
492 | /* "#n" is the same as using -n on the command line */ | ||
493 | if (cmdstr[1] == 'n') be_quiet++; | ||
494 | if(!(cmdstr=strpbrk(cmdstr, "\n\r"))) break; | ||
495 | continue; | ||
496 | } | ||
549 | 497 | ||
550 | /* second part (if present) will begin with a comma */ | 498 | /* parse the command |
551 | if (*cmdstr == ',') { | 499 | * format is: [addr][,addr][!]cmd |
552 | int idx; | 500 | * |----||-----||-| |
501 | * part1 part2 part3 | ||
502 | */ | ||
553 | 503 | ||
554 | cmdstr++; | 504 | sed_cmd = xcalloc(1, sizeof(sed_cmd_t)); |
555 | idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match); | ||
556 | if (idx == 0) { | ||
557 | bb_error_msg_and_die("get_address: no address found in string\n" | ||
558 | "\t(you probably didn't check the string you passed me)"); | ||
559 | } | ||
560 | cmdstr += idx; | ||
561 | } | ||
562 | 505 | ||
563 | /* skip whitespace before the command */ | 506 | /* first part (if present) is an address: either a '$', a number or a /regex/ */ |
564 | while (isspace(*cmdstr)) { | 507 | cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); |
565 | cmdstr++; | ||
566 | } | ||
567 | 508 | ||
568 | /* there my be the inversion flag between part2 and part3 */ | 509 | /* second part (if present) will begin with a comma */ |
569 | if (*cmdstr == '!') { | 510 | if (*cmdstr == ',') { |
570 | sed_cmd->invert = 1; | 511 | int idx; |
571 | cmdstr++; | ||
572 | 512 | ||
573 | #ifdef CONFIG_FEATURE_SED_GNU_COMPATABILITY | ||
574 | /* According to the spec | ||
575 | * It is unspecified whether <blank>s can follow a '!' character, | ||
576 | * and conforming applications shall not follow a '!' character | ||
577 | * with <blank>s. | ||
578 | */ | ||
579 | /* skip whitespace before the command */ | ||
580 | while (isspace(*cmdstr)) { | ||
581 | cmdstr++; | 513 | cmdstr++; |
514 | idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match); | ||
515 | if (!idx) bb_error_msg_and_die("get_address: no address found in string\n"); | ||
516 | cmdstr += idx; | ||
582 | } | 517 | } |
583 | #endif | ||
584 | } | ||
585 | |||
586 | /* last part (mandatory) will be a command */ | ||
587 | if (*cmdstr == '\0') | ||
588 | bb_error_msg_and_die("missing command"); | ||
589 | 518 | ||
590 | sed_cmd->cmd = *cmdstr; | 519 | /* skip whitespace before the command */ |
591 | cmdstr++; | 520 | while (isspace(*cmdstr)) cmdstr++; |
592 | 521 | ||
593 | cmdstr = parse_cmd_str(sed_cmd, cmdstr); | 522 | /* Check for inversion flag */ |
523 | if (*cmdstr == '!') { | ||
524 | sed_cmd->invert = 1; | ||
525 | cmdstr++; | ||
594 | 526 | ||
595 | /* Add the command to the command array */ | 527 | /* skip whitespace before the command */ |
596 | sed_cmd_tail->next = sed_cmd; | 528 | while (isspace(*cmdstr)) cmdstr++; |
597 | sed_cmd_tail = sed_cmd_tail->next; | 529 | } |
598 | 530 | ||
599 | return (cmdstr); | 531 | /* last part (mandatory) will be a command */ |
600 | } | 532 | if (!*cmdstr) bb_error_msg_and_die("missing command"); |
533 | sed_cmd->cmd = *(cmdstr++); | ||
534 | cmdstr = parse_cmd_args(sed_cmd, cmdstr); | ||
601 | 535 | ||
602 | static void add_cmd_str(const char *cmdstr) | 536 | /* Add the command to the command array */ |
603 | { | 537 | sed_cmd_tail->next = sed_cmd; |
604 | char *cmdstr_expanded = strdup(cmdstr); | 538 | sed_cmd_tail = sed_cmd_tail->next; |
605 | char *cmdstr_ptr; | ||
606 | |||
607 | #ifdef CONFIG_FEATURE_SED_EMBEDED_NEWLINE | ||
608 | cmdstr_ptr = cmdstr_expanded; | ||
609 | /* HACK: convert "\n" to match tranlated '\n' string */ | ||
610 | while ((cmdstr_ptr = strstr(cmdstr_ptr, "\\n")) != NULL) { | ||
611 | int length = strlen(cmdstr) + 2; | ||
612 | cmdstr_expanded = realloc(cmdstr_expanded, length); | ||
613 | cmdstr_ptr = strstr(cmdstr_expanded, "\\n"); | ||
614 | memmove(cmdstr_ptr + 1, cmdstr_ptr, strlen(cmdstr_ptr) + 1); | ||
615 | cmdstr_ptr[0] = '\\'; | ||
616 | cmdstr_ptr += 3; | ||
617 | } | 539 | } |
618 | #endif | ||
619 | cmdstr_ptr = cmdstr_expanded; | ||
620 | do { | ||
621 | cmdstr_ptr = add_cmd(cmdstr_ptr); | ||
622 | } while (cmdstr_ptr && strlen(cmdstr_ptr)); | ||
623 | |||
624 | free(cmdstr_expanded); | ||
625 | } | ||
626 | |||
627 | 540 | ||
628 | static void load_cmd_file(const char *filename) | 541 | /* If we glued multiple lines together, free the memory. */ |
629 | { | 542 | if(add_cmd_line) { |
630 | FILE *cmdfile; | 543 | free(add_cmd_line); |
631 | char *line; | 544 | add_cmd_line=NULL; |
632 | char *nextline; | ||
633 | char *e; | ||
634 | |||
635 | cmdfile = bb_xfopen(filename, "r"); | ||
636 | |||
637 | while ((line = bb_get_line_from_file(cmdfile)) != NULL) { | ||
638 | /* if a line ends with '\' it needs the next line appended to it */ | ||
639 | while (((e = last_char_is(line, '\n')) != NULL) | ||
640 | && (e > line) && (e[-1] == '\\') | ||
641 | && ((nextline = bb_get_line_from_file(cmdfile)) != NULL)) { | ||
642 | line = xrealloc(line, (e - line) + 1 + strlen(nextline) + 1); | ||
643 | strcat(line, nextline); | ||
644 | free(nextline); | ||
645 | } | ||
646 | /* eat trailing newline (if any) --if I don't do this, edit commands | ||
647 | * (aic) will print an extra newline */ | ||
648 | chomp(line); | ||
649 | add_cmd_str(line); | ||
650 | free(line); | ||
651 | } | 545 | } |
652 | } | 546 | } |
653 | 547 | ||
654 | struct pipeline { | 548 | struct pipeline { |
655 | char *buf; | 549 | char *buf; /* Space to hold string */ |
656 | int idx; | 550 | int idx; /* Space used */ |
657 | int len; | 551 | int len; /* Space allocated */ |
658 | }; | 552 | } pipeline; |
659 | 553 | ||
660 | #define PIPE_MAGIC 0x7f | ||
661 | #define PIPE_GROW 64 | 554 | #define PIPE_GROW 64 |
662 | 555 | ||
663 | void pipe_putc(struct pipeline *const pipeline, char c) | 556 | void pipe_putc(char c) |
664 | { | 557 | { |
665 | if (pipeline->buf[pipeline->idx] == PIPE_MAGIC) { | 558 | if(pipeline.idx==pipeline.len) { |
666 | pipeline->buf = xrealloc(pipeline->buf, pipeline->len + PIPE_GROW); | 559 | pipeline.buf = xrealloc(pipeline.buf, pipeline.len + PIPE_GROW); |
667 | memset(pipeline->buf + pipeline->len, 0, PIPE_GROW); | 560 | pipeline.len+=PIPE_GROW; |
668 | pipeline->len += PIPE_GROW; | ||
669 | pipeline->buf[pipeline->len - 1] = PIPE_MAGIC; | ||
670 | } | 561 | } |
671 | pipeline->buf[pipeline->idx++] = (c); | 562 | pipeline.buf[pipeline.idx++] = (c); |
672 | } | 563 | } |
673 | 564 | ||
674 | #define pipeputc(c) pipe_putc(pipeline, c) | 565 | static void do_subst_w_backrefs(const char *line, const char *replace) |
675 | |||
676 | static void print_subst_w_backrefs(const char *line, const char *replace, | ||
677 | regmatch_t * regmatch, struct pipeline *const pipeline, int matches) | ||
678 | { | 566 | { |
679 | int i; | 567 | int i,j; |
680 | 568 | ||
681 | /* go through the replacement string */ | 569 | /* go through the replacement string */ |
682 | for (i = 0; replace[i]; i++) { | 570 | for (i = 0; replace[i]; i++) { |
683 | /* if we find a backreference (\1, \2, etc.) print the backref'ed * text */ | 571 | /* if we find a backreference (\1, \2, etc.) print the backref'ed * text */ |
684 | if (replace[i] == '\\' && isdigit(replace[i + 1])) { | 572 | if (replace[i] == '\\' && replace[i+1]>0 && replace[i+1]<=9) { |
685 | int j; | 573 | int backref=replace[++i]-'0'; |
686 | char tmpstr[2]; | 574 | |
687 | int backref; | ||
688 | |||
689 | ++i; /* i now indexes the backref number, instead of the leading slash */ | ||
690 | tmpstr[0] = replace[i]; | ||
691 | tmpstr[1] = 0; | ||
692 | backref = atoi(tmpstr); | ||
693 | /* print out the text held in regmatch[backref] */ | 575 | /* print out the text held in regmatch[backref] */ |
694 | if (backref <= matches && regmatch[backref].rm_so != -1) | 576 | if(regmatch[backref].rm_so != -1) |
695 | for (j = regmatch[backref].rm_so; j < regmatch[backref].rm_eo; | 577 | for (j = regmatch[backref].rm_so; j < regmatch[backref].rm_eo; j++) |
696 | j++) | 578 | pipe_putc(line[j]); |
697 | pipeputc(line[j]); | ||
698 | } | 579 | } |
699 | 580 | ||
700 | /* if we find a backslash escaped character, print the character */ | 581 | /* if we find a backslash escaped character, print the character */ |
701 | else if (replace[i] == '\\') { | 582 | else if (replace[i] == '\\') pipe_putc(replace[++i]); |
702 | ++i; | ||
703 | pipeputc(replace[i]); | ||
704 | } | ||
705 | |||
706 | /* if we find an unescaped '&' print out the whole matched text. | ||
707 | * fortunately, regmatch[0] contains the indicies to the whole matched | ||
708 | * expression (kinda seems like it was designed for just such a | ||
709 | * purpose...) */ | ||
710 | else if (replace[i] == '&') { | ||
711 | int j; | ||
712 | 583 | ||
584 | /* if we find an unescaped '&' print out the whole matched text. */ | ||
585 | else if (replace[i] == '&') | ||
713 | for (j = regmatch[0].rm_so; j < regmatch[0].rm_eo; j++) | 586 | for (j = regmatch[0].rm_so; j < regmatch[0].rm_eo; j++) |
714 | pipeputc(line[j]); | 587 | pipe_putc(line[j]); |
715 | } | 588 | /* Otherwise just output the character. */ |
716 | /* nothing special, just print this char of the replacement string to stdout */ | 589 | else pipe_putc(replace[i]); |
717 | else | ||
718 | pipeputc(replace[i]); | ||
719 | } | 590 | } |
720 | } | 591 | } |
721 | 592 | ||
722 | static int do_subst_command(sed_cmd_t * sed_cmd, char **line) | 593 | static int do_subst_command(sed_cmd_t * sed_cmd, char **line) |
723 | { | 594 | { |
724 | char *hackline = *line; | 595 | char *oldline = *line; |
725 | struct pipeline thepipe = { NULL, 0, 0 }; | ||
726 | struct pipeline *const pipeline = &thepipe; | ||
727 | int altered = 0; | 596 | int altered = 0; |
728 | int result; | 597 | int match_count=0; |
729 | regmatch_t *regmatch = NULL; | ||
730 | regex_t *current_regex; | 598 | regex_t *current_regex; |
731 | 599 | ||
600 | /* Handle empty regex. */ | ||
732 | if (sed_cmd->sub_match == NULL) { | 601 | if (sed_cmd->sub_match == NULL) { |
733 | current_regex = previous_regex_ptr; | 602 | current_regex = previous_regex_ptr; |
734 | } else { | 603 | if(!current_regex) |
735 | previous_regex_ptr = current_regex = sed_cmd->sub_match; | 604 | bb_error_msg_and_die("No previous regexp."); |
736 | } | 605 | } else previous_regex_ptr = current_regex = sed_cmd->sub_match; |
737 | result = regexec(current_regex, hackline, 0, NULL, 0); | ||
738 | 606 | ||
739 | /* we only proceed if the substitution 'search' expression matches */ | 607 | /* Find the first match */ |
740 | if (result == REG_NOMATCH) { | 608 | if(REG_NOMATCH==regexec(current_regex, oldline, 10, regmatch, 0)) |
741 | return 0; | 609 | return 0; |
742 | } | ||
743 | 610 | ||
744 | /* whaddaya know, it matched. get the number of back references */ | 611 | /* Initialize temporary output buffer. */ |
745 | regmatch = xmalloc(sizeof(regmatch_t) * (sed_cmd->num_backrefs + 1)); | 612 | pipeline.buf=xmalloc(PIPE_GROW); |
746 | 613 | pipeline.len=PIPE_GROW; | |
747 | /* allocate more PIPE_GROW bytes | 614 | pipeline.idx=0; |
748 | if replaced string is larger than original */ | 615 | |
749 | thepipe.len = strlen(hackline) + PIPE_GROW; | 616 | /* Now loop through, substituting for matches */ |
750 | thepipe.buf = xcalloc(1, thepipe.len); | 617 | do { |
751 | /* buffer magic */ | ||
752 | thepipe.buf[thepipe.len - 1] = PIPE_MAGIC; | ||
753 | |||
754 | /* and now, as long as we've got a line to try matching and if we can match | ||
755 | * the search string, we make substitutions */ | ||
756 | while ((*hackline || !altered) | ||
757 | && (regexec(current_regex, hackline, sed_cmd->num_backrefs + 1, | ||
758 | regmatch, 0) != REG_NOMATCH)) { | ||
759 | int i; | 618 | int i; |
760 | 619 | ||
620 | match_count++; | ||
621 | |||
622 | /* If we aren't interested in this match, output old line to | ||
623 | end of match and continue */ | ||
624 | if(sed_cmd->which_match && sed_cmd->which_match!=match_count) { | ||
625 | for(i=0;i<regmatch[0].rm_eo;i++) | ||
626 | pipe_putc(oldline[i]); | ||
627 | continue; | ||
628 | } | ||
629 | |||
761 | /* print everything before the match */ | 630 | /* print everything before the match */ |
762 | for (i = 0; i < regmatch[0].rm_so; i++) | 631 | for (i = 0; i < regmatch[0].rm_so; i++) pipe_putc(oldline[i]); |
763 | pipeputc(hackline[i]); | ||
764 | 632 | ||
765 | /* then print the substitution string */ | 633 | /* then print the substitution string */ |
766 | print_subst_w_backrefs(hackline, sed_cmd->replace, regmatch, pipeline, | 634 | do_subst_w_backrefs(oldline, sed_cmd->string); |
767 | sed_cmd->num_backrefs); | ||
768 | 635 | ||
769 | /* advance past the match */ | 636 | /* advance past the match */ |
770 | hackline += regmatch[0].rm_eo; | 637 | oldline += regmatch[0].rm_eo; |
771 | /* flag that something has changed */ | 638 | /* flag that something has changed */ |
772 | altered++; | 639 | altered++; |
773 | 640 | ||
774 | /* if we're not doing this globally, get out now */ | 641 | /* if we're not doing this globally, get out now */ |
775 | if (!sed_cmd->sub_g) { | 642 | if (sed_cmd->which_match) break; |
776 | break; | 643 | } while (*oldline && (regexec(current_regex, oldline, 10, regmatch, 0) != REG_NOMATCH)); |
777 | } | ||
778 | } | ||
779 | for (; *hackline; hackline++) | ||
780 | pipeputc(*hackline); | ||
781 | if (thepipe.buf[thepipe.idx] == PIPE_MAGIC) | ||
782 | thepipe.buf[thepipe.idx] = 0; | ||
783 | 644 | ||
784 | /* cleanup */ | 645 | /* Copy rest of string into output pipeline */ |
785 | free(regmatch); | 646 | |
647 | while(*oldline) pipe_putc(*(oldline++)); | ||
648 | pipe_putc(0); | ||
786 | 649 | ||
787 | free(*line); | 650 | free(*line); |
788 | *line = thepipe.buf; | 651 | *line = pipeline.buf; |
789 | return altered; | 652 | return altered; |
790 | } | 653 | } |
791 | 654 | ||
655 | /* Set command pointer to point to this label. (Does not handle null label.) */ | ||
792 | static sed_cmd_t *branch_to(const char *label) | 656 | static sed_cmd_t *branch_to(const char *label) |
793 | { | 657 | { |
794 | sed_cmd_t *sed_cmd; | 658 | sed_cmd_t *sed_cmd; |
795 | 659 | ||
796 | for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { | 660 | for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { |
797 | if ((sed_cmd->cmd == ':') && (sed_cmd->label) && (strcmp(sed_cmd->label, label) == 0)) { | 661 | if ((sed_cmd->cmd == ':') && (sed_cmd->string) && (strcmp(sed_cmd->string, label) == 0)) { |
798 | return (sed_cmd); | 662 | return (sed_cmd); |
799 | } | 663 | } |
800 | } | 664 | } |
801 | bb_error_msg_and_die("Can't find label for jump to `%s'", label); | 665 | bb_error_msg_and_die("Can't find label for jump to `%s'", label); |
802 | } | 666 | } |
803 | 667 | ||
804 | static void process_file(FILE * file) | 668 | /* Append copy of string to append buffer */ |
669 | static void append(char *s) | ||
805 | { | 670 | { |
806 | char *pattern_space; /* Posix requires it be able to hold at least 8192 bytes */ | 671 | struct append_list *temp=calloc(1,sizeof(struct append_list)); |
807 | char *hold_space = NULL; /* Posix requires it be able to hold at least 8192 bytes */ | 672 | |
808 | static int linenum = 0; /* GNU sed does not restart counting lines at EOF */ | 673 | if(append_head) |
809 | int altered; | 674 | append_tail=(append_tail->next=temp); |
810 | int force_print; | 675 | else append_head=append_tail=temp; |
811 | 676 | temp->string=strdup(s); | |
812 | pattern_space = bb_get_chomped_line_from_file(file); | 677 | } |
813 | if (pattern_space == NULL) { | 678 | |
814 | return; | 679 | static void flush_append(void) |
680 | { | ||
681 | /* Output appended lines. */ | ||
682 | while(append_head) { | ||
683 | puts(append_head->string); | ||
684 | append_tail=append_head->next; | ||
685 | free(append_head->string); | ||
686 | free(append_head); | ||
687 | append_head=append_tail; | ||
688 | } | ||
689 | append_head=append_tail=NULL; | ||
690 | } | ||
691 | |||
692 | /* Get next line of input, flushing append buffer and noting if we hit EOF | ||
693 | * without a newline on the last line. | ||
694 | */ | ||
695 | static char *get_next_line(FILE * file, int *no_newline) | ||
696 | { | ||
697 | char *temp; | ||
698 | int len; | ||
699 | |||
700 | flush_append(); | ||
701 | temp=bb_get_line_from_file(file); | ||
702 | if(temp) { | ||
703 | len=strlen(temp); | ||
704 | if(len && temp[len-1]=='\n') temp[len-1]=0; | ||
705 | else *no_newline=1; | ||
815 | } | 706 | } |
816 | 707 | ||
708 | return temp; | ||
709 | } | ||
710 | |||
711 | /* Output line of text. missing_newline means the last line output did not | ||
712 | end with a newline. no_newline means this line does not end with a | ||
713 | newline. */ | ||
714 | |||
715 | static int puts_maybe_newline(char *s, FILE *file, int missing_newline, int no_newline) | ||
716 | { | ||
717 | if(missing_newline) fputc('\n',file); | ||
718 | fputs(s,file); | ||
719 | if(!no_newline) fputc('\n',file); | ||
720 | |||
721 | return no_newline; | ||
722 | } | ||
723 | |||
724 | #define sed_puts(s,n) missing_newline=puts_maybe_newline(s,stdout,missing_newline,n) | ||
725 | |||
726 | static void process_file(FILE * file) | ||
727 | { | ||
728 | char *pattern_space, *next_line, *hold_space=NULL; | ||
729 | static int linenum = 0, missing_newline=0; | ||
730 | int no_newline,next_no_newline=0; | ||
731 | |||
732 | next_line = get_next_line(file,&next_no_newline); | ||
733 | |||
817 | /* go through every line in the file */ | 734 | /* go through every line in the file */ |
818 | do { | 735 | for(;;) { |
819 | char *next_line; | ||
820 | sed_cmd_t *sed_cmd; | 736 | sed_cmd_t *sed_cmd; |
821 | int substituted = 0; | 737 | int substituted=0; |
822 | /* This enables whole blocks of commands to be mask'ed out if the lead address doesnt match */ | 738 | |
823 | int block_mask = 1; | 739 | /* Advance to next line. Stop if out of lines. */ |
740 | if(!(pattern_space=next_line)) break; | ||
741 | no_newline=next_no_newline; | ||
824 | 742 | ||
825 | /* Read one line in advance so we can act on the last line, the '$' address */ | 743 | /* Read one line in advance so we can act on the last line, the '$' address */ |
826 | next_line = bb_get_chomped_line_from_file(file); | 744 | next_line = get_next_line(file,&next_no_newline); |
827 | linenum++; | 745 | linenum++; |
828 | altered = 0; | 746 | restart: |
829 | force_print = 0; | ||
830 | |||
831 | /* for every line, go through all the commands */ | 747 | /* for every line, go through all the commands */ |
832 | for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { | 748 | for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { |
833 | int deleted = 0; | 749 | int matched; |
834 | 750 | ||
835 | /* | 751 | /* Determine if this command matches this line: */ |
836 | * entry point into sedding... | 752 | |
837 | */ | 753 | /* Are we continuing a previous multi-line match? */ |
838 | int matched = ( | 754 | |
839 | /* no range necessary */ | 755 | sed_cmd->in_match = sed_cmd->in_match |
840 | (sed_cmd->beg_line == 0 && sed_cmd->end_line == 0 | 756 | |
841 | && sed_cmd->beg_match == NULL | 757 | /* Or is no range necessary? */ |
842 | && sed_cmd->end_match == NULL) || | 758 | || (!sed_cmd->beg_line && !sed_cmd->end_line |
843 | /* this line number is the first address we're looking for */ | 759 | && !sed_cmd->beg_match && !sed_cmd->end_match) |
844 | (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum)) || | 760 | |
845 | /* this line matches our first address regex */ | 761 | /* Or did we match the start of a numerical range? */ |
846 | (sed_cmd->beg_match | 762 | || (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum)) |
847 | && (regexec(sed_cmd->beg_match, pattern_space, 0, NULL, | 763 | |
848 | 0) == 0)) || | 764 | /* Or does this line match our begin address regex? */ |
849 | /* we are currently within the beginning & ending address range */ | 765 | || (sed_cmd->beg_match && |
850 | sed_cmd->still_in_range || ((sed_cmd->beg_line == -1) | 766 | !regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 0)) |
851 | && (next_line == NULL)) | 767 | |
768 | /* Or did we match last line of input? */ | ||
769 | || (sed_cmd->beg_line == -1 && next_line == NULL); | ||
770 | |||
771 | /* Snapshot the value */ | ||
772 | |||
773 | matched = sed_cmd->in_match; | ||
774 | |||
775 | /* Is this line the end of the current match? */ | ||
776 | |||
777 | if(matched) { | ||
778 | sed_cmd->in_match = !( | ||
779 | /* has the ending line come, or is this a single address command? */ | ||
780 | (sed_cmd->end_line ? | ||
781 | sed_cmd->end_line==-1 ? | ||
782 | !next_line | ||
783 | : sed_cmd->end_line<=linenum | ||
784 | : !sed_cmd->end_match) | ||
785 | /* or does this line matches our last address regex */ | ||
786 | || (sed_cmd->end_match && (regexec(sed_cmd->end_match, pattern_space, 0, NULL, 0) == 0)) | ||
852 | ); | 787 | ); |
788 | } | ||
789 | |||
790 | /* Skip blocks of commands we didn't match. */ | ||
853 | if (sed_cmd->cmd == '{') { | 791 | if (sed_cmd->cmd == '{') { |
854 | block_mask = block_mask & matched; | 792 | if(sed_cmd->invert ? matched : !matched) |
793 | while(sed_cmd && sed_cmd->cmd!='}') sed_cmd=sed_cmd->next; | ||
794 | if(!sed_cmd) bb_error_msg_and_die("Unterminated {"); | ||
795 | continue; | ||
855 | } | 796 | } |
856 | // matched &= block_mask; | ||
857 | 797 | ||
858 | if (sed_cmd->invert ^ (matched & block_mask)) { | 798 | /* Okay, so did this line match? */ |
859 | /* Update last used regex incase a blank substitute BRE is found */ | 799 | if (sed_cmd->invert ? !matched : matched) { |
800 | /* Update last used regex in case a blank substitute BRE is found */ | ||
860 | if (sed_cmd->beg_match) { | 801 | if (sed_cmd->beg_match) { |
861 | previous_regex_ptr = sed_cmd->beg_match; | 802 | previous_regex_ptr = sed_cmd->beg_match; |
862 | } | 803 | } |
863 | 804 | ||
864 | /* | 805 | /* actual sedding */ |
865 | * actual sedding | ||
866 | */ | ||
867 | switch (sed_cmd->cmd) { | 806 | switch (sed_cmd->cmd) { |
868 | case '=': | 807 | |
869 | printf("%d\n", linenum); | 808 | /* Print line number */ |
870 | break; | 809 | case '=': |
871 | case 'P':{ | 810 | printf("%d\n", linenum); |
872 | /* Write the current pattern space upto the first newline */ | ||
873 | char *tmp = strchr(pattern_space, '\n'); | ||
874 | |||
875 | if (tmp) { | ||
876 | *tmp = '\0'; | ||
877 | puts(pattern_space); | ||
878 | *tmp = '\n'; | ||
879 | break; | 811 | break; |
880 | } | 812 | |
881 | /* Fall Through */ | 813 | /* Write the current pattern space up to the first newline */ |
882 | } | 814 | case 'P': |
883 | case 'p': /* Write the current pattern space to output */ | ||
884 | puts(pattern_space); | ||
885 | break; | ||
886 | case 'd': | ||
887 | altered++; | ||
888 | deleted = 1; | ||
889 | force_print = 0; | ||
890 | break; | ||
891 | |||
892 | case 's': | ||
893 | |||
894 | /* | ||
895 | * Some special cases for 's' printing to make it compliant with | ||
896 | * GNU sed printing behavior (aka "The -n | s///p Matrix"): | ||
897 | * | ||
898 | * -n ONLY = never print anything regardless of any successful | ||
899 | * substitution | ||
900 | * | ||
901 | * s///p ONLY = always print successful substitutions, even if | ||
902 | * the pattern_space is going to be printed anyway (pattern_space | ||
903 | * will be printed twice). | ||
904 | * | ||
905 | * -n AND s///p = print ONLY a successful substitution ONE TIME; | ||
906 | * no other lines are printed - this is the reason why the 'p' | ||
907 | * flag exists in the first place. | ||
908 | */ | ||
909 | |||
910 | #ifdef CONFIG_FEATURE_SED_EMBEDED_NEWLINE | ||
911 | /* HACK: escape newlines twice so regex can match them */ | ||
912 | { | ||
913 | int offset = 0; | ||
914 | char *tmp = strchr(pattern_space + offset, '\n'); | ||
915 | while ((tmp = strchr(pattern_space + offset, '\n')) != NULL) { | ||
916 | offset = tmp - pattern_space; | ||
917 | pattern_space = xrealloc(pattern_space, strlen(pattern_space) + 2); | ||
918 | tmp = pattern_space + offset; | ||
919 | memmove(tmp + 1, tmp, strlen(tmp) + 1); | ||
920 | tmp[0] = '\\'; | ||
921 | tmp[1] = 'n'; | ||
922 | offset += 2; | ||
923 | } | ||
924 | } | ||
925 | #endif | ||
926 | /* we print the pattern_space once, unless we were told to be quiet */ | ||
927 | substituted |= do_subst_command(sed_cmd, &pattern_space); | ||
928 | #ifdef CONFIG_FEATURE_SED_EMBEDED_NEWLINE | ||
929 | /* undo HACK: escape newlines twice so regex can match them */ | ||
930 | { | 815 | { |
931 | char *tmp = pattern_space; | 816 | char *tmp = strchr(pattern_space, '\n'); |
932 | 817 | ||
933 | while ((tmp = strstr(tmp, "\\n")) != NULL) { | 818 | if (tmp) { |
934 | memmove(tmp, tmp + 1, strlen(tmp + 1) + 1); | 819 | *tmp = '\0'; |
935 | tmp[0] = '\n'; | 820 | sed_puts(pattern_space,1); |
821 | *tmp = '\n'; | ||
822 | break; | ||
936 | } | 823 | } |
824 | /* Fall Through */ | ||
937 | } | 825 | } |
938 | #endif | 826 | |
939 | if (!be_quiet && substituted && ((sed_cmd->next == NULL) | 827 | /* Write the current pattern space to output */ |
940 | || (sed_cmd->next->cmd != 's'))) { | 828 | case 'p': |
941 | force_print = 1; | 829 | sed_puts(pattern_space,no_newline); |
942 | } | 830 | break; |
943 | /* we also print the line if we were given the 'p' flag | 831 | /* Delete up through first newline */ |
944 | * (this is quite possibly the second printing) */ | 832 | case 'D': |
945 | if ((sed_cmd->sub_p) && (altered || substituted)) { | 833 | { |
946 | puts(pattern_space); | 834 | char *tmp = strchr(pattern_space,'\n'); |
835 | |||
836 | if(tmp) { | ||
837 | tmp=bb_xstrdup(tmp+1); | ||
838 | free(pattern_space); | ||
839 | pattern_space=tmp; | ||
840 | goto restart; | ||
841 | } | ||
947 | } | 842 | } |
948 | break; | 843 | /* discard this line. */ |
949 | case 'a': | 844 | case 'd': |
950 | puts(pattern_space); | 845 | goto discard_line; |
951 | fputs(sed_cmd->editline, stdout); | 846 | |
952 | altered++; | 847 | /* Substitute with regex */ |
953 | break; | 848 | case 's': |
954 | 849 | if(do_subst_command(sed_cmd, &pattern_space)) { | |
955 | case 'i': | 850 | substituted|=1; |
956 | fputs(sed_cmd->editline, stdout); | 851 | |
957 | break; | 852 | /* handle p option */ |
958 | 853 | if(sed_cmd->sub_p) | |
959 | case 'c': | 854 | sed_puts(pattern_space,no_newline); |
960 | /* single-address case */ | 855 | /* handle w option */ |
961 | if ((sed_cmd->end_match == NULL && sed_cmd->end_line == 0) | 856 | if(sed_cmd->file) |
962 | /* multi-address case */ | 857 | sed_cmd->no_newline=puts_maybe_newline(pattern_space, sed_cmd->file, sed_cmd->no_newline, no_newline); |
963 | /* - matching text */ | 858 | |
964 | || (sed_cmd->end_match | 859 | } |
965 | && (regexec(sed_cmd->end_match, pattern_space, 0, | 860 | break; |
966 | NULL, 0) == 0)) | 861 | |
967 | /* - matching line numbers */ | 862 | /* Append line to linked list to be printed later */ |
968 | || (sed_cmd->end_line > 0 | 863 | case 'a': |
969 | && sed_cmd->end_line == linenum)) { | 864 | { |
970 | fputs(sed_cmd->editline, stdout); | 865 | append(sed_cmd->string); |
866 | break; | ||
971 | } | 867 | } |
972 | altered++; | ||
973 | 868 | ||
974 | break; | 869 | /* Insert text before this line */ |
870 | case 'i': | ||
871 | sed_puts(sed_cmd->string,1); | ||
872 | break; | ||
873 | |||
874 | /* Cut and paste text (replace) */ | ||
875 | case 'c': | ||
876 | /* Only triggers on last line of a matching range. */ | ||
877 | if (!sed_cmd->in_match) sed_puts(sed_cmd->string,1); | ||
878 | goto discard_line; | ||
975 | 879 | ||
976 | case 'r':{ | 880 | /* Read file, append contents to output */ |
977 | FILE *outfile; | 881 | case 'r': |
882 | { | ||
883 | FILE *outfile; | ||
978 | 884 | ||
979 | outfile = fopen(sed_cmd->filename, "r"); | 885 | outfile = fopen(sed_cmd->string, "r"); |
980 | if (outfile) { | 886 | if (outfile) { |
981 | char *line; | 887 | char *line; |
982 | 888 | ||
983 | while ((line = | 889 | while ((line = bb_get_chomped_line_from_file(outfile)) |
984 | bb_get_chomped_line_from_file(outfile)) != | 890 | != NULL) |
985 | NULL) { | 891 | append(line); |
986 | pattern_space = | 892 | bb_xprint_and_close_file(outfile); |
987 | xrealloc(pattern_space, | ||
988 | strlen(line) + strlen(pattern_space) + 2); | ||
989 | strcat(pattern_space, "\n"); | ||
990 | strcat(pattern_space, line); | ||
991 | } | 893 | } |
992 | bb_xprint_and_close_file(outfile); | ||
993 | } | ||
994 | 894 | ||
995 | } | ||
996 | break; | ||
997 | case 'q': /* Branch to end of script and quit */ | ||
998 | deleted = 1; | ||
999 | /* Exit the outer while loop */ | ||
1000 | free(next_line); | ||
1001 | next_line = NULL; | ||
1002 | break; | ||
1003 | case 'n': /* Read next line from input */ | ||
1004 | if (!be_quiet) { | ||
1005 | puts(pattern_space); | ||
1006 | } | ||
1007 | if (next_line) { | ||
1008 | free(pattern_space); | ||
1009 | pattern_space = next_line; | ||
1010 | next_line = bb_get_chomped_line_from_file(file); | ||
1011 | linenum++; | ||
1012 | } else { | ||
1013 | /* Jump to end of script and exit */ | ||
1014 | deleted = 1; | ||
1015 | next_line = NULL; | ||
1016 | } | ||
1017 | break; | ||
1018 | case 'N': /* Append the next line to the current line */ | ||
1019 | if (next_line == NULL) { | ||
1020 | /* Jump to end of script and exit */ | ||
1021 | deleted = 1; | ||
1022 | #ifdef CONFIG_FEATURE_SED_GNU_COMPATABILITY | ||
1023 | /* GNU sed will add the newline character | ||
1024 | * The GNU sed info page labels this as a bug that wont be fixed | ||
1025 | */ | ||
1026 | next_line = calloc(1,1); | ||
1027 | #else | ||
1028 | next_line = NULL; | ||
1029 | break; | 895 | break; |
1030 | #endif | ||
1031 | } | 896 | } |
1032 | pattern_space = realloc(pattern_space, strlen(pattern_space) + strlen(next_line) + 2); | 897 | |
1033 | strcat(pattern_space, "\n"); | 898 | /* Write pattern space to file. */ |
1034 | strcat(pattern_space, next_line); | 899 | case 'w': |
1035 | next_line = bb_get_chomped_line_from_file(file); | 900 | sed_cmd->no_newline=puts_maybe_newline(pattern_space,sed_cmd->file, sed_cmd->no_newline,no_newline); |
1036 | linenum++; | 901 | break; |
1037 | break; | 902 | |
1038 | case 't': | 903 | /* Read next line from input */ |
1039 | if (substituted) | 904 | case 'n': |
1040 | /* Fall through */ | 905 | if (!be_quiet) |
1041 | case 'b': | 906 | sed_puts(pattern_space,no_newline); |
907 | if (next_line) { | ||
908 | free(pattern_space); | ||
909 | pattern_space = next_line; | ||
910 | no_newline=next_no_newline; | ||
911 | next_line = get_next_line(file,&next_no_newline); | ||
912 | linenum++; | ||
913 | break; | ||
914 | } | ||
915 | /* fall through */ | ||
916 | |||
917 | /* Quit. End of script, end of input. */ | ||
918 | case 'q': | ||
919 | /* Exit the outer while loop */ | ||
920 | free(next_line); | ||
921 | next_line = NULL; | ||
922 | goto discard_commands; | ||
923 | |||
924 | /* Append the next line to the current line */ | ||
925 | case 'N': | ||
1042 | { | 926 | { |
1043 | if (sed_cmd->label == NULL) { | 927 | /* If no next line, jump to end of script and exit. */ |
1044 | /* Jump to end of script */ | 928 | if (next_line == NULL) { |
1045 | deleted = 1; | 929 | /* Jump to end of script and exit */ |
930 | free(next_line); | ||
931 | next_line = NULL; | ||
932 | goto discard_line; | ||
933 | /* append next_line, read new next_line. */ | ||
1046 | } else { | 934 | } else { |
1047 | sed_cmd = branch_to(sed_cmd->label); | 935 | int len=strlen(pattern_space); |
936 | |||
937 | pattern_space = realloc(pattern_space, len + strlen(next_line) + 2); | ||
938 | pattern_space[len]='\n'; | ||
939 | strcpy(pattern_space+len+1, next_line); | ||
940 | no_newline=next_no_newline; | ||
941 | next_line = get_next_line(file,&next_no_newline); | ||
942 | linenum++; | ||
1048 | } | 943 | } |
1049 | /* Reset the substitution flag */ | 944 | break; |
1050 | substituted = 0; | ||
1051 | } | 945 | } |
1052 | break; | ||
1053 | case 'y':{ | ||
1054 | int i; | ||
1055 | 946 | ||
1056 | for (i = 0; pattern_space[i] != 0; i++) { | 947 | /* Test if substition worked, branch if so. */ |
1057 | int j; | 948 | case 't': |
949 | if (!substituted) break; | ||
950 | substituted=0; | ||
951 | /* Fall through */ | ||
952 | /* Branch to label */ | ||
953 | case 'b': | ||
954 | if (!sed_cmd->string) goto discard_commands; | ||
955 | else sed_cmd = branch_to(sed_cmd->string); | ||
956 | break; | ||
957 | /* Transliterate characters */ | ||
958 | case 'y': | ||
959 | { | ||
960 | int i; | ||
961 | |||
962 | for (i = 0; pattern_space[i]; i++) { | ||
963 | int j; | ||
1058 | 964 | ||
1059 | for (j = 0; sed_cmd->translate[j]; j += 2) { | 965 | for (j = 0; sed_cmd->string[j]; j += 2) { |
1060 | if (pattern_space[i] == sed_cmd->translate[j]) { | 966 | if (pattern_space[i] == sed_cmd->string[j]) { |
1061 | pattern_space[i] = sed_cmd->translate[j + 1]; | 967 | pattern_space[i] = sed_cmd->string[j + 1]; |
968 | } | ||
1062 | } | 969 | } |
1063 | } | 970 | } |
1064 | } | ||
1065 | } | ||
1066 | break; | ||
1067 | case 'g': /* Replace pattern space with hold space */ | ||
1068 | free(pattern_space); | ||
1069 | if (hold_space) { | ||
1070 | pattern_space = strdup(hold_space); | ||
1071 | } | ||
1072 | break; | ||
1073 | case 'G': { /* Append newline and hold space to pattern space */ | ||
1074 | int pattern_space_size = 2; | ||
1075 | int hold_space_size = 0; | ||
1076 | 971 | ||
1077 | if (pattern_space) { | 972 | break; |
1078 | pattern_space_size += strlen(pattern_space); | ||
1079 | } | ||
1080 | if (hold_space) { | ||
1081 | hold_space_size = strlen(hold_space); | ||
1082 | } | 973 | } |
1083 | pattern_space = xrealloc(pattern_space, pattern_space_size + hold_space_size); | 974 | case 'g': /* Replace pattern space with hold space */ |
1084 | if (pattern_space_size == 2) { | 975 | free(pattern_space); |
1085 | strcpy(pattern_space, "\n"); | 976 | if (hold_space) { |
1086 | } else { | 977 | pattern_space = strdup(hold_space); |
978 | no_newline=0; | ||
979 | } | ||
980 | break; | ||
981 | case 'G': /* Append newline and hold space to pattern space */ | ||
982 | { | ||
983 | int pattern_space_size = 2; | ||
984 | int hold_space_size = 0; | ||
985 | |||
986 | if (pattern_space) | ||
987 | pattern_space_size += strlen(pattern_space); | ||
988 | if (hold_space) hold_space_size = strlen(hold_space); | ||
989 | pattern_space = xrealloc(pattern_space, pattern_space_size + hold_space_size); | ||
990 | if (pattern_space_size == 2) pattern_space[0]=0; | ||
1087 | strcat(pattern_space, "\n"); | 991 | strcat(pattern_space, "\n"); |
992 | if (hold_space) strcat(pattern_space, hold_space); | ||
993 | no_newline=0; | ||
994 | |||
995 | break; | ||
1088 | } | 996 | } |
1089 | if (hold_space) { | 997 | case 'h': /* Replace hold space with pattern space */ |
1090 | strcat(pattern_space, hold_space); | 998 | free(hold_space); |
1091 | } | 999 | hold_space = strdup(pattern_space); |
1092 | break; | 1000 | break; |
1093 | } | 1001 | case 'H': /* Append newline and pattern space to hold space */ |
1094 | case 'h': /* Replace hold space with pattern space */ | 1002 | { |
1095 | free(hold_space); | 1003 | int hold_space_size = 2; |
1096 | hold_space = strdup(pattern_space); | 1004 | int pattern_space_size = 0; |
1097 | break; | ||
1098 | case 'H': { /* Append newline and pattern space to hold space */ | ||
1099 | int hold_space_size = 2; | ||
1100 | int pattern_space_size = 0; | ||
1101 | |||
1102 | if (hold_space) { | ||
1103 | hold_space_size += strlen(hold_space); | ||
1104 | } | ||
1105 | if (pattern_space) { | ||
1106 | pattern_space_size = strlen(pattern_space); | ||
1107 | } | ||
1108 | hold_space = xrealloc(hold_space, hold_space_size + pattern_space_size); | ||
1109 | 1005 | ||
1110 | if (hold_space_size == 2) { | 1006 | if (hold_space) hold_space_size += strlen(hold_space); |
1111 | strcpy(hold_space, "\n"); | 1007 | if (pattern_space) |
1112 | } else { | 1008 | pattern_space_size = strlen(pattern_space); |
1009 | hold_space = xrealloc(hold_space, | ||
1010 | hold_space_size + pattern_space_size); | ||
1011 | |||
1012 | if (hold_space_size == 2) hold_space[0]=0; | ||
1113 | strcat(hold_space, "\n"); | 1013 | strcat(hold_space, "\n"); |
1014 | if (pattern_space) strcat(hold_space, pattern_space); | ||
1015 | |||
1016 | break; | ||
1114 | } | 1017 | } |
1115 | if (pattern_space) { | 1018 | case 'x': /* Exchange hold and pattern space */ |
1116 | strcat(hold_space, pattern_space); | 1019 | { |
1020 | char *tmp = pattern_space; | ||
1021 | pattern_space = hold_space; | ||
1022 | no_newline=0; | ||
1023 | hold_space = tmp; | ||
1024 | break; | ||
1117 | } | 1025 | } |
1118 | break; | ||
1119 | } | ||
1120 | case 'x':{ | ||
1121 | /* Swap hold and pattern space */ | ||
1122 | char *tmp = pattern_space; | ||
1123 | pattern_space = hold_space; | ||
1124 | hold_space = tmp; | ||
1125 | break; | ||
1126 | } | ||
1127 | } | 1026 | } |
1128 | } | 1027 | } |
1129 | |||
1130 | /* | ||
1131 | * exit point from sedding... | ||
1132 | */ | ||
1133 | if (matched) { | ||
1134 | if ( | ||
1135 | /* this is a single-address command or... */ | ||
1136 | (sed_cmd->end_line == 0 && sed_cmd->end_match == NULL) | ||
1137 | /* If only one address */ | ||
1138 | /* we were in the middle of our address range (this | ||
1139 | * isn't the first time through) and.. */ | ||
1140 | || ((sed_cmd->still_in_range == 1) | ||
1141 | /* this line number is the last address we're looking for or... */ | ||
1142 | && ((sed_cmd->end_line > 0 | ||
1143 | && (sed_cmd->end_line == linenum)) | ||
1144 | /* this line matches our last address regex */ | ||
1145 | || (sed_cmd->end_match | ||
1146 | && (regexec(sed_cmd->end_match, pattern_space, | ||
1147 | 0, NULL, 0) == 0))))) { | ||
1148 | /* we're out of our address range */ | ||
1149 | sed_cmd->still_in_range = 0; | ||
1150 | } else { | ||
1151 | /* didn't hit the exit? then we're still in the middle of an address range */ | ||
1152 | sed_cmd->still_in_range = 1; | ||
1153 | } | ||
1154 | } | ||
1155 | |||
1156 | if (sed_cmd->cmd == '}') { | ||
1157 | block_mask = 1; | ||
1158 | } | ||
1159 | |||
1160 | if (deleted) | ||
1161 | break; | ||
1162 | |||
1163 | } | 1028 | } |
1164 | 1029 | ||
1165 | /* we will print the line unless we were told to be quiet or if the | 1030 | /* |
1166 | * line was altered (via a 'd'elete or 's'ubstitution), in which case | 1031 | * exit point from sedding... |
1167 | * the altered line was already printed */ | 1032 | */ |
1168 | if ((!be_quiet && !altered && !substituted) || force_print) { | 1033 | discard_commands: |
1169 | puts(pattern_space); | 1034 | /* we will print the line unless we were told to be quiet ('-n') |
1170 | } | 1035 | or if the line was suppressed (ala 'd'elete) */ |
1036 | if (!be_quiet) sed_puts(pattern_space,no_newline); | ||
1037 | |||
1038 | /* Delete and such jump here. */ | ||
1039 | discard_line: | ||
1040 | flush_append(); | ||
1171 | free(pattern_space); | 1041 | free(pattern_space); |
1172 | pattern_space = next_line; | 1042 | } |
1173 | } while (pattern_space); | ||
1174 | } | 1043 | } |
1175 | 1044 | ||
1176 | extern int sed_main(int argc, char **argv) | 1045 | extern int sed_main(int argc, char **argv) |
@@ -1179,7 +1048,7 @@ extern int sed_main(int argc, char **argv) | |||
1179 | 1048 | ||
1180 | #ifdef CONFIG_FEATURE_CLEAN_UP | 1049 | #ifdef CONFIG_FEATURE_CLEAN_UP |
1181 | /* destroy command strings on exit */ | 1050 | /* destroy command strings on exit */ |
1182 | if (atexit(destroy_cmd_strs) == -1) | 1051 | if (atexit(free_and_close_stuff) == -1) |
1183 | bb_perror_msg_and_die("atexit"); | 1052 | bb_perror_msg_and_die("atexit"); |
1184 | #endif | 1053 | #endif |
1185 | 1054 | ||
@@ -1189,17 +1058,46 @@ extern int sed_main(int argc, char **argv) | |||
1189 | case 'n': | 1058 | case 'n': |
1190 | be_quiet++; | 1059 | be_quiet++; |
1191 | break; | 1060 | break; |
1192 | case 'e':{ | 1061 | case 'e': |
1193 | add_cmd_str(optarg); | 1062 | { |
1063 | int go=1; | ||
1064 | char *temp=bb_xstrdup(optarg),*temp2=temp; | ||
1065 | |||
1066 | /* It is possible to have a command line argument with embedded | ||
1067 | newlines. This counts as a multi-line argument. */ | ||
1068 | |||
1069 | while(go) { | ||
1070 | int len=strcspn(temp2,"\n"); | ||
1071 | if(!temp2[len]) go=0; | ||
1072 | else temp2[len]=0; | ||
1073 | add_cmd(temp2); | ||
1074 | temp2+=len+1; | ||
1075 | } | ||
1076 | free(temp); | ||
1194 | break; | 1077 | break; |
1195 | } | 1078 | } |
1196 | case 'f': | 1079 | case 'f': |
1197 | load_cmd_file(optarg); | 1080 | { |
1081 | FILE *cmdfile; | ||
1082 | char *line; | ||
1083 | |||
1084 | cmdfile = bb_xfopen(optarg, "r"); | ||
1085 | |||
1086 | while ((line = bb_get_chomped_line_from_file(cmdfile)) | ||
1087 | != NULL) { | ||
1088 | add_cmd(line); | ||
1089 | free(line); | ||
1090 | } | ||
1091 | bb_xprint_and_close_file(cmdfile); | ||
1092 | |||
1198 | break; | 1093 | break; |
1094 | } | ||
1199 | default: | 1095 | default: |
1200 | bb_show_usage(); | 1096 | bb_show_usage(); |
1201 | } | 1097 | } |
1202 | } | 1098 | } |
1099 | /* Flush any unfinished commands. */ | ||
1100 | add_cmd(""); | ||
1203 | 1101 | ||
1204 | /* if we didn't get a pattern from a -e and no command file was specified, | 1102 | /* if we didn't get a pattern from a -e and no command file was specified, |
1205 | * argv[optind] should be the pattern. no pattern, no worky */ | 1103 | * argv[optind] should be the pattern. no pattern, no worky */ |
@@ -1207,7 +1105,7 @@ extern int sed_main(int argc, char **argv) | |||
1207 | if (argv[optind] == NULL) | 1105 | if (argv[optind] == NULL) |
1208 | bb_show_usage(); | 1106 | bb_show_usage(); |
1209 | else | 1107 | else |
1210 | add_cmd_str(argv[optind++]); | 1108 | add_cmd(argv[optind++]); |
1211 | } | 1109 | } |
1212 | 1110 | ||
1213 | /* argv[(optind)..(argc-1)] should be names of file to process. If no | 1111 | /* argv[(optind)..(argc-1)] should be names of file to process. If no |
diff --git a/include/libbb.h b/include/libbb.h index 2bb5ce02d..eb6841d33 100644 --- a/include/libbb.h +++ b/include/libbb.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <stdarg.h> | 28 | #include <stdarg.h> |
29 | #include <sys/stat.h> | 29 | #include <sys/stat.h> |
30 | #include <sys/types.h> | 30 | #include <sys/types.h> |
31 | #include <regex.h> | ||
31 | #include <termios.h> | 32 | #include <termios.h> |
32 | 33 | ||
33 | #include <netdb.h> | 34 | #include <netdb.h> |
@@ -468,5 +469,5 @@ extern void print_login_prompt(void); | |||
468 | extern void vfork_daemon_rexec(int argc, char **argv, char *foreground_opt); | 469 | extern void vfork_daemon_rexec(int argc, char **argv, char *foreground_opt); |
469 | extern void get_terminal_width_height(int fd, int *width, int *height); | 470 | extern void get_terminal_width_height(int fd, int *width, int *height); |
470 | extern unsigned long get_ug_id(const char *s, long (*my_getxxnam)(const char *)); | 471 | extern unsigned long get_ug_id(const char *s, long (*my_getxxnam)(const char *)); |
471 | 472 | extern void xregcomp(regex_t *preg, const char *regex, int cflags); | |
472 | #endif /* __LIBCONFIG_H__ */ | 473 | #endif /* __LIBCONFIG_H__ */ |