diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2016-04-24 16:18:03 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2016-04-24 16:18:03 +0200 |
commit | cbdff15bb78ba9d83be7f6b5087ee665715999b0 (patch) | |
tree | 6c6728f5b8c3bda7186cbea88a614de2ca26a52b | |
parent | 4c8fa34417fd2ccdda6a8ea508a3f1e7fb1d4ceb (diff) | |
download | busybox-w32-cbdff15bb78ba9d83be7f6b5087ee665715999b0.tar.gz busybox-w32-cbdff15bb78ba9d83be7f6b5087ee665715999b0.tar.bz2 busybox-w32-cbdff15bb78ba9d83be7f6b5087ee665715999b0.zip |
sed: understand \n,\r and \t in i and a commands. Closes 8871
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | editors/sed.c | 38 | ||||
-rwxr-xr-x | testsuite/sed.tests | 18 |
2 files changed, 43 insertions, 13 deletions
diff --git a/editors/sed.c b/editors/sed.c index 6bce25b2c..7f18fd0c4 100644 --- a/editors/sed.c +++ b/editors/sed.c | |||
@@ -218,23 +218,33 @@ static void cleanup_outname(void) | |||
218 | 218 | ||
219 | /* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */ | 219 | /* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */ |
220 | 220 | ||
221 | static void parse_escapes(char *dest, const char *string, int len, char from, char to) | 221 | static unsigned parse_escapes(char *dest, const char *string, int len, char from, char to) |
222 | { | 222 | { |
223 | char *d = dest; | ||
223 | int i = 0; | 224 | int i = 0; |
224 | 225 | ||
226 | if (len == -1) | ||
227 | len = strlen(string); | ||
228 | |||
225 | while (i < len) { | 229 | while (i < len) { |
226 | if (string[i] == '\\') { | 230 | if (string[i] == '\\') { |
227 | if (!to || string[i+1] == from) { | 231 | if (!to || string[i+1] == from) { |
228 | *dest++ = to ? to : string[i+1]; | 232 | if ((*d = to ? to : string[i+1]) == '\0') |
233 | return d - dest; | ||
229 | i += 2; | 234 | i += 2; |
235 | d++; | ||
230 | continue; | 236 | continue; |
231 | } | 237 | } |
232 | *dest++ = string[i++]; | 238 | i++; /* skip backslash in string[] */ |
239 | *d++ = '\\'; | ||
240 | /* fall through: copy next char verbatim */ | ||
233 | } | 241 | } |
234 | /* TODO: is it safe wrt a string with trailing '\\' ? */ | 242 | if ((*d = string[i++]) == '\0') |
235 | *dest++ = string[i++]; | 243 | return d - dest; |
244 | d++; | ||
236 | } | 245 | } |
237 | *dest = '\0'; | 246 | *d = '\0'; |
247 | return d - dest; | ||
238 | } | 248 | } |
239 | 249 | ||
240 | static char *copy_parsing_escapes(const char *string, int len) | 250 | static char *copy_parsing_escapes(const char *string, int len) |
@@ -245,9 +255,8 @@ static char *copy_parsing_escapes(const char *string, int len) | |||
245 | /* sed recognizes \n */ | 255 | /* sed recognizes \n */ |
246 | /* GNU sed also recognizes \t and \r */ | 256 | /* GNU sed also recognizes \t and \r */ |
247 | for (s = "\nn\tt\rr"; *s; s += 2) { | 257 | for (s = "\nn\tt\rr"; *s; s += 2) { |
248 | parse_escapes(dest, string, len, s[1], s[0]); | 258 | len = parse_escapes(dest, string, len, s[1], s[0]); |
249 | string = dest; | 259 | string = dest; |
250 | len = strlen(dest); | ||
251 | } | 260 | } |
252 | return dest; | 261 | return dest; |
253 | } | 262 | } |
@@ -516,6 +525,8 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) | |||
516 | } | 525 | } |
517 | /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ | 526 | /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ |
518 | else if (idx <= IDX_c) { /* a,i,c */ | 527 | else if (idx <= IDX_c) { /* a,i,c */ |
528 | unsigned len; | ||
529 | |||
519 | if (idx < IDX_c) { /* a,i */ | 530 | if (idx < IDX_c) { /* a,i */ |
520 | if (sed_cmd->end_line || sed_cmd->end_match) | 531 | if (sed_cmd->end_line || sed_cmd->end_match) |
521 | bb_error_msg_and_die("command '%c' uses only one address", sed_cmd->cmd); | 532 | bb_error_msg_and_die("command '%c' uses only one address", sed_cmd->cmd); |
@@ -529,10 +540,11 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) | |||
529 | break; | 540 | break; |
530 | cmdstr++; | 541 | cmdstr++; |
531 | } | 542 | } |
532 | sed_cmd->string = xstrdup(cmdstr); | 543 | len = strlen(cmdstr); |
544 | sed_cmd->string = copy_parsing_escapes(cmdstr, len); | ||
545 | cmdstr += len; | ||
533 | /* "\anychar" -> "anychar" */ | 546 | /* "\anychar" -> "anychar" */ |
534 | parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0'); | 547 | parse_escapes(sed_cmd->string, sed_cmd->string, -1, '\0', '\0'); |
535 | cmdstr += strlen(cmdstr); | ||
536 | } | 548 | } |
537 | /* handle file cmds: (r)ead */ | 549 | /* handle file cmds: (r)ead */ |
538 | else if (idx <= IDX_w) { /* r,w */ | 550 | else if (idx <= IDX_w) { /* r,w */ |
@@ -564,8 +576,8 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) | |||
564 | 576 | ||
565 | cmdstr += parse_regex_delim(cmdstr, &match, &replace)+1; | 577 | cmdstr += parse_regex_delim(cmdstr, &match, &replace)+1; |
566 | /* \n already parsed, but \delimiter needs unescaping. */ | 578 | /* \n already parsed, but \delimiter needs unescaping. */ |
567 | parse_escapes(match, match, strlen(match), i, i); | 579 | parse_escapes(match, match, -1, i, i); |
568 | parse_escapes(replace, replace, strlen(replace), i, i); | 580 | parse_escapes(replace, replace, -1, i, i); |
569 | 581 | ||
570 | sed_cmd->string = xzalloc((strlen(match) + 1) * 2); | 582 | sed_cmd->string = xzalloc((strlen(match) + 1) * 2); |
571 | for (i = 0; match[i] && replace[i]; i++) { | 583 | for (i = 0; match[i] && replace[i]; i++) { |
diff --git a/testsuite/sed.tests b/testsuite/sed.tests index 5d2356b64..c4b6fa278 100755 --- a/testsuite/sed.tests +++ b/testsuite/sed.tests | |||
@@ -275,6 +275,24 @@ testing "sed a cmd ended by double backslash" \ | |||
275 | | two \\ | 275 | | two \\ |
276 | ' | 276 | ' |
277 | 277 | ||
278 | testing "sed a cmd understands \\n,\\t,\\r" \ | ||
279 | "sed '/1/a\\\\t\\rzero\\none\\\\ntwo\\\\\\nthree'" \ | ||
280 | "\ | ||
281 | line1 | ||
282 | \t\rzero | ||
283 | one\\\\ntwo\\ | ||
284 | three | ||
285 | " "" "line1\n" | ||
286 | |||
287 | testing "sed i cmd understands \\n,\\t,\\r" \ | ||
288 | "sed '/1/i\\\\t\\rzero\\none\\\\ntwo\\\\\\nthree'" \ | ||
289 | "\ | ||
290 | \t\rzero | ||
291 | one\\\\ntwo\\ | ||
292 | three | ||
293 | line1 | ||
294 | " "" "line1\n" | ||
295 | |||
278 | # first three lines are deleted; 4th line is matched and printed by "2,3" and by "4" ranges | 296 | # first three lines are deleted; 4th line is matched and printed by "2,3" and by "4" ranges |
279 | testing "sed with N skipping lines past ranges on next cmds" \ | 297 | testing "sed with N skipping lines past ranges on next cmds" \ |
280 | "sed -n '1{N;N;d};1p;2,3p;3p;4p'" \ | 298 | "sed -n '1{N;N;d};1p;2,3p;3p;4p'" \ |