diff options
Diffstat (limited to 'editors/sed.c')
-rw-r--r-- | editors/sed.c | 157 |
1 files changed, 100 insertions, 57 deletions
diff --git a/editors/sed.c b/editors/sed.c index 9ab758bd7..99e56ff52 100644 --- a/editors/sed.c +++ b/editors/sed.c | |||
@@ -14,49 +14,47 @@ | |||
14 | */ | 14 | */ |
15 | 15 | ||
16 | /* Code overview. | 16 | /* Code overview. |
17 | * | ||
18 | * Files are laid out to avoid unnecessary function declarations. So for | ||
19 | * example, every function add_cmd calls occurs before add_cmd in this file. | ||
20 | * | ||
21 | * add_cmd() is called on each line of sed command text (from a file or from | ||
22 | * the command line). It calls get_address() and parse_cmd_args(). The | ||
23 | * resulting sed_cmd_t structures are appended to a linked list | ||
24 | * (G.sed_cmd_head/G.sed_cmd_tail). | ||
25 | * | ||
26 | * add_input_file() adds a FILE* to the list of input files. We need to | ||
27 | * know all input sources ahead of time to find the last line for the $ match. | ||
28 | * | ||
29 | * process_files() does actual sedding, reading data lines from each input FILE * | ||
30 | * (which could be stdin) and applying the sed command list (sed_cmd_head) to | ||
31 | * each of the resulting lines. | ||
32 | * | ||
33 | * sed_main() is where external code calls into this, with a command line. | ||
34 | */ | ||
17 | 35 | ||
18 | Files are laid out to avoid unnecessary function declarations. So for | 36 | /* Supported features and commands in this version of sed: |
19 | example, every function add_cmd calls occurs before add_cmd in this file. | 37 | * |
20 | 38 | * - comments ('#') | |
21 | add_cmd() is called on each line of sed command text (from a file or from | 39 | * - address matching: num|/matchstr/[,num|/matchstr/|$]command |
22 | the command line). It calls get_address() and parse_cmd_args(). The | 40 | * - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags) |
23 | resulting sed_cmd_t structures are appended to a linked list | 41 | * - edit commands: (a)ppend, (i)nsert, (c)hange |
24 | (G.sed_cmd_head/G.sed_cmd_tail). | 42 | * - file commands: (r)ead |
25 | 43 | * - backreferences in substitution expressions (\0, \1, \2...\9) | |
26 | add_input_file() adds a FILE* to the list of input files. We need to | 44 | * - grouped commands: {cmd1;cmd2} |
27 | know all input sources ahead of time to find the last line for the $ match. | 45 | * - transliteration (y/source-chars/dest-chars/) |
28 | 46 | * - pattern space hold space storing / swapping (g, h, x) | |
29 | process_files() does actual sedding, reading data lines from each input FILE * | 47 | * - labels / branching (: label, b, t, T) |
30 | (which could be stdin) and applying the sed command list (sed_cmd_head) to | 48 | * |
31 | each of the resulting lines. | 49 | * (Note: Specifying an address (range) to match is *optional*; commands |
32 | 50 | * default to the whole pattern space if no specific address match was | |
33 | sed_main() is where external code calls into this, with a command line. | 51 | * requested.) |
34 | */ | 52 | * |
35 | 53 | * Todo: | |
36 | 54 | * - Create a wrapper around regex to make libc's regex conform with sed | |
37 | /* | 55 | * |
38 | Supported features and commands in this version of sed: | 56 | * Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html |
39 | 57 | */ | |
40 | - comments ('#') | ||
41 | - address matching: num|/matchstr/[,num|/matchstr/|$]command | ||
42 | - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags) | ||
43 | - edit commands: (a)ppend, (i)nsert, (c)hange | ||
44 | - file commands: (r)ead | ||
45 | - backreferences in substitution expressions (\0, \1, \2...\9) | ||
46 | - grouped commands: {cmd1;cmd2} | ||
47 | - transliteration (y/source-chars/dest-chars/) | ||
48 | - pattern space hold space storing / swapping (g, h, x) | ||
49 | - labels / branching (: label, b, t, T) | ||
50 | |||
51 | (Note: Specifying an address (range) to match is *optional*; commands | ||
52 | default to the whole pattern space if no specific address match was | ||
53 | requested.) | ||
54 | |||
55 | Todo: | ||
56 | - Create a wrapper around regex to make libc's regex conform with sed | ||
57 | |||
58 | Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html | ||
59 | */ | ||
60 | 58 | ||
61 | //usage:#define sed_trivial_usage | 59 | //usage:#define sed_trivial_usage |
62 | //usage: "[-efinr] SED_CMD [FILE]..." | 60 | //usage: "[-efinr] SED_CMD [FILE]..." |
@@ -217,11 +215,16 @@ static void parse_escapes(char *dest, const char *string, int len, char from, ch | |||
217 | 215 | ||
218 | static char *copy_parsing_escapes(const char *string, int len) | 216 | static char *copy_parsing_escapes(const char *string, int len) |
219 | { | 217 | { |
218 | const char *s; | ||
220 | char *dest = xmalloc(len + 1); | 219 | char *dest = xmalloc(len + 1); |
221 | 220 | ||
222 | parse_escapes(dest, string, len, 'n', '\n'); | 221 | /* sed recognizes \n */ |
223 | /* GNU sed also recognizes \t */ | 222 | /* GNU sed also recognizes \t and \r */ |
224 | parse_escapes(dest, dest, strlen(dest), 't', '\t'); | 223 | for (s = "\nn\tt\rr"; *s; s += 2) { |
224 | parse_escapes(dest, string, len, s[1], s[0]); | ||
225 | string = dest; | ||
226 | len = strlen(dest); | ||
227 | } | ||
225 | return dest; | 228 | return dest; |
226 | } | 229 | } |
227 | 230 | ||
@@ -244,11 +247,13 @@ static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str) | |||
244 | delimiter = -delimiter; | 247 | delimiter = -delimiter; |
245 | } | 248 | } |
246 | 249 | ||
247 | for (; (ch = str[idx]); idx++) { | 250 | for (; (ch = str[idx]) != '\0'; idx++) { |
248 | if (bracket >= 0) { | 251 | if (bracket >= 0) { |
249 | if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2 | 252 | if (ch == ']' |
250 | && str[idx - 1] == '^'))) | 253 | && !(bracket == idx - 1 || (bracket == idx - 2 && str[idx - 1] == '^')) |
254 | ) { | ||
251 | bracket = -1; | 255 | bracket = -1; |
256 | } | ||
252 | } else if (escaped) | 257 | } else if (escaped) |
253 | escaped = 0; | 258 | escaped = 0; |
254 | else if (ch == '\\') | 259 | else if (ch == '\\') |
@@ -434,11 +439,47 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) | |||
434 | */ | 439 | */ |
435 | static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) | 440 | static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) |
436 | { | 441 | { |
442 | static const char cmd_letters[] = "saicrw:btTydDgGhHlnNpPqx={}"; | ||
443 | enum { | ||
444 | IDX_s = 0, | ||
445 | IDX_a, | ||
446 | IDX_i, | ||
447 | IDX_c, | ||
448 | IDX_r, | ||
449 | IDX_w, | ||
450 | IDX_colon, | ||
451 | IDX_b, | ||
452 | IDX_t, | ||
453 | IDX_T, | ||
454 | IDX_y, | ||
455 | IDX_d, | ||
456 | IDX_D, | ||
457 | IDX_g, | ||
458 | IDX_G, | ||
459 | IDX_h, | ||
460 | IDX_H, | ||
461 | IDX_l, | ||
462 | IDX_n, | ||
463 | IDX_N, | ||
464 | IDX_p, | ||
465 | IDX_P, | ||
466 | IDX_q, | ||
467 | IDX_x, | ||
468 | IDX_equal, | ||
469 | IDX_lbrace, | ||
470 | IDX_rbrace, | ||
471 | IDX_nul | ||
472 | }; | ||
473 | struct chk { char chk[sizeof(cmd_letters)-1 == IDX_nul ? 1 : -1]; }; | ||
474 | |||
475 | unsigned idx = strchrnul(cmd_letters, sed_cmd->cmd) - cmd_letters; | ||
476 | |||
437 | /* handle (s)ubstitution command */ | 477 | /* handle (s)ubstitution command */ |
438 | if (sed_cmd->cmd == 's') | 478 | if (idx == IDX_s) { |
439 | cmdstr += parse_subst_cmd(sed_cmd, cmdstr); | 479 | cmdstr += parse_subst_cmd(sed_cmd, cmdstr); |
480 | } | ||
440 | /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ | 481 | /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ |
441 | else if (strchr("aic", sed_cmd->cmd)) { | 482 | else if (idx <= IDX_c) { /* a,i,c */ |
442 | if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') | 483 | if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') |
443 | bb_error_msg_and_die("only a beginning address can be specified for edit commands"); | 484 | bb_error_msg_and_die("only a beginning address can be specified for edit commands"); |
444 | for (;;) { | 485 | for (;;) { |
@@ -454,8 +495,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) | |||
454 | /* "\anychar" -> "anychar" */ | 495 | /* "\anychar" -> "anychar" */ |
455 | parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0'); | 496 | parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0'); |
456 | cmdstr += strlen(cmdstr); | 497 | cmdstr += strlen(cmdstr); |
498 | } | ||
457 | /* handle file cmds: (r)ead */ | 499 | /* handle file cmds: (r)ead */ |
458 | } else if (strchr("rw", sed_cmd->cmd)) { | 500 | else if (idx <= IDX_w) { /* r,w */ |
459 | if (sed_cmd->end_line || sed_cmd->end_match) | 501 | if (sed_cmd->end_line || sed_cmd->end_match) |
460 | bb_error_msg_and_die("command only uses one address"); | 502 | bb_error_msg_and_die("command only uses one address"); |
461 | cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string); | 503 | cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string); |
@@ -463,8 +505,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) | |||
463 | sed_cmd->sw_file = xfopen_for_write(sed_cmd->string); | 505 | sed_cmd->sw_file = xfopen_for_write(sed_cmd->string); |
464 | sed_cmd->sw_last_char = '\n'; | 506 | sed_cmd->sw_last_char = '\n'; |
465 | } | 507 | } |
508 | } | ||
466 | /* handle branch commands */ | 509 | /* handle branch commands */ |
467 | } else if (strchr(":btT", sed_cmd->cmd)) { | 510 | else if (idx <= IDX_T) { /* :,b,t,T */ |
468 | int length; | 511 | int length; |
469 | 512 | ||
470 | cmdstr = skip_whitespace(cmdstr); | 513 | cmdstr = skip_whitespace(cmdstr); |
@@ -475,7 +518,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) | |||
475 | } | 518 | } |
476 | } | 519 | } |
477 | /* translation command */ | 520 | /* translation command */ |
478 | else if (sed_cmd->cmd == 'y') { | 521 | else if (idx == IDX_y) { |
479 | char *match, *replace; | 522 | char *match, *replace; |
480 | int i = cmdstr[0]; | 523 | int i = cmdstr[0]; |
481 | 524 | ||
@@ -495,7 +538,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) | |||
495 | /* if it wasnt a single-letter command that takes no arguments | 538 | /* if it wasnt a single-letter command that takes no arguments |
496 | * then it must be an invalid command. | 539 | * then it must be an invalid command. |
497 | */ | 540 | */ |
498 | else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) { | 541 | else if (idx >= IDX_nul) { /* not d,D,g,G,h,H,l,n,N,p,P,q,x,=,{,} */ |
499 | bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd); | 542 | bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd); |
500 | } | 543 | } |
501 | 544 | ||
@@ -966,9 +1009,9 @@ static void process_files(void) | |||
966 | } | 1009 | } |
967 | sed_cmd->in_match = !( | 1010 | sed_cmd->in_match = !( |
968 | /* has the ending line come, or is this a single address command? */ | 1011 | /* has the ending line come, or is this a single address command? */ |
969 | (sed_cmd->end_line ? | 1012 | (sed_cmd->end_line |
970 | sed_cmd->end_line == -1 ? | 1013 | ? sed_cmd->end_line == -1 |
971 | !next_line | 1014 | ? !next_line |
972 | : (sed_cmd->end_line <= linenum) | 1015 | : (sed_cmd->end_line <= linenum) |
973 | : !sed_cmd->end_match | 1016 | : !sed_cmd->end_match |
974 | ) | 1017 | ) |