diff options
-rw-r--r-- | editors/sed.c | 146 |
1 files changed, 92 insertions, 54 deletions
diff --git a/editors/sed.c b/editors/sed.c index 9ab758bd7..9e27e3e18 100644 --- a/editors/sed.c +++ b/editors/sed.c | |||
@@ -14,49 +14,47 @@ | |||
14 | */ | 14 | */ |
15 | 15 | ||
16 | /* Code overview. | 16 | /* Code overview. |
17 | * | ||
18 | * Files are laid out to avoid unnecessary function declarations. So for | ||
19 | * example, every function add_cmd calls occurs before add_cmd in this file. | ||
20 | * | ||
21 | * add_cmd() is called on each line of sed command text (from a file or from | ||
22 | * the command line). It calls get_address() and parse_cmd_args(). The | ||
23 | * resulting sed_cmd_t structures are appended to a linked list | ||
24 | * (G.sed_cmd_head/G.sed_cmd_tail). | ||
25 | * | ||
26 | * add_input_file() adds a FILE* to the list of input files. We need to | ||
27 | * know all input sources ahead of time to find the last line for the $ match. | ||
28 | * | ||
29 | * process_files() does actual sedding, reading data lines from each input FILE * | ||
30 | * (which could be stdin) and applying the sed command list (sed_cmd_head) to | ||
31 | * each of the resulting lines. | ||
32 | * | ||
33 | * sed_main() is where external code calls into this, with a command line. | ||
34 | */ | ||
17 | 35 | ||
18 | Files are laid out to avoid unnecessary function declarations. So for | 36 | /* Supported features and commands in this version of sed: |
19 | example, every function add_cmd calls occurs before add_cmd in this file. | 37 | * |
20 | 38 | * - comments ('#') | |
21 | add_cmd() is called on each line of sed command text (from a file or from | 39 | * - address matching: num|/matchstr/[,num|/matchstr/|$]command |
22 | the command line). It calls get_address() and parse_cmd_args(). The | 40 | * - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags) |
23 | resulting sed_cmd_t structures are appended to a linked list | 41 | * - edit commands: (a)ppend, (i)nsert, (c)hange |
24 | (G.sed_cmd_head/G.sed_cmd_tail). | 42 | * - file commands: (r)ead |
25 | 43 | * - backreferences in substitution expressions (\0, \1, \2...\9) | |
26 | add_input_file() adds a FILE* to the list of input files. We need to | 44 | * - grouped commands: {cmd1;cmd2} |
27 | know all input sources ahead of time to find the last line for the $ match. | 45 | * - transliteration (y/source-chars/dest-chars/) |
28 | 46 | * - pattern space hold space storing / swapping (g, h, x) | |
29 | process_files() does actual sedding, reading data lines from each input FILE * | 47 | * - labels / branching (: label, b, t, T) |
30 | (which could be stdin) and applying the sed command list (sed_cmd_head) to | 48 | * |
31 | each of the resulting lines. | 49 | * (Note: Specifying an address (range) to match is *optional*; commands |
32 | 50 | * default to the whole pattern space if no specific address match was | |
33 | sed_main() is where external code calls into this, with a command line. | 51 | * requested.) |
34 | */ | 52 | * |
35 | 53 | * Todo: | |
36 | 54 | * - Create a wrapper around regex to make libc's regex conform with sed | |
37 | /* | 55 | * |
38 | Supported features and commands in this version of sed: | 56 | * Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html |
39 | 57 | */ | |
40 | - comments ('#') | ||
41 | - address matching: num|/matchstr/[,num|/matchstr/|$]command | ||
42 | - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags) | ||
43 | - edit commands: (a)ppend, (i)nsert, (c)hange | ||
44 | - file commands: (r)ead | ||
45 | - backreferences in substitution expressions (\0, \1, \2...\9) | ||
46 | - grouped commands: {cmd1;cmd2} | ||
47 | - transliteration (y/source-chars/dest-chars/) | ||
48 | - pattern space hold space storing / swapping (g, h, x) | ||
49 | - labels / branching (: label, b, t, T) | ||
50 | |||
51 | (Note: Specifying an address (range) to match is *optional*; commands | ||
52 | default to the whole pattern space if no specific address match was | ||
53 | requested.) | ||
54 | |||
55 | Todo: | ||
56 | - Create a wrapper around regex to make libc's regex conform with sed | ||
57 | |||
58 | Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html | ||
59 | */ | ||
60 | 58 | ||
61 | //usage:#define sed_trivial_usage | 59 | //usage:#define sed_trivial_usage |
62 | //usage: "[-efinr] SED_CMD [FILE]..." | 60 | //usage: "[-efinr] SED_CMD [FILE]..." |
@@ -244,11 +242,13 @@ static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str) | |||
244 | delimiter = -delimiter; | 242 | delimiter = -delimiter; |
245 | } | 243 | } |
246 | 244 | ||
247 | for (; (ch = str[idx]); idx++) { | 245 | for (; (ch = str[idx]) != '\0'; idx++) { |
248 | if (bracket >= 0) { | 246 | if (bracket >= 0) { |
249 | if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2 | 247 | if (ch == ']' |
250 | && str[idx - 1] == '^'))) | 248 | && !(bracket == idx - 1 || (bracket == idx - 2 && str[idx - 1] == '^')) |
249 | ) { | ||
251 | bracket = -1; | 250 | bracket = -1; |
251 | } | ||
252 | } else if (escaped) | 252 | } else if (escaped) |
253 | escaped = 0; | 253 | escaped = 0; |
254 | else if (ch == '\\') | 254 | else if (ch == '\\') |
@@ -434,11 +434,47 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) | |||
434 | */ | 434 | */ |
435 | static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) | 435 | static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) |
436 | { | 436 | { |
437 | static const char cmd_letters[] = "saicrw:btTydDgGhHlnNpPqx={}"; | ||
438 | enum { | ||
439 | IDX_s = 0, | ||
440 | IDX_a, | ||
441 | IDX_i, | ||
442 | IDX_c, | ||
443 | IDX_r, | ||
444 | IDX_w, | ||
445 | IDX_colon, | ||
446 | IDX_b, | ||
447 | IDX_t, | ||
448 | IDX_T, | ||
449 | IDX_y, | ||
450 | IDX_d, | ||
451 | IDX_D, | ||
452 | IDX_g, | ||
453 | IDX_G, | ||
454 | IDX_h, | ||
455 | IDX_H, | ||
456 | IDX_l, | ||
457 | IDX_n, | ||
458 | IDX_N, | ||
459 | IDX_p, | ||
460 | IDX_P, | ||
461 | IDX_q, | ||
462 | IDX_x, | ||
463 | IDX_equal, | ||
464 | IDX_lbrace, | ||
465 | IDX_rbrace, | ||
466 | IDX_nul | ||
467 | }; | ||
468 | struct chk { char chk[sizeof(cmd_letters)-1 == IDX_nul ? 1 : -1]; }; | ||
469 | |||
470 | unsigned idx = strchrnul(cmd_letters, sed_cmd->cmd) - cmd_letters; | ||
471 | |||
437 | /* handle (s)ubstitution command */ | 472 | /* handle (s)ubstitution command */ |
438 | if (sed_cmd->cmd == 's') | 473 | if (idx == IDX_s) { |
439 | cmdstr += parse_subst_cmd(sed_cmd, cmdstr); | 474 | cmdstr += parse_subst_cmd(sed_cmd, cmdstr); |
475 | } | ||
440 | /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ | 476 | /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ |
441 | else if (strchr("aic", sed_cmd->cmd)) { | 477 | else if (idx <= IDX_c) { /* a,i,c */ |
442 | if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') | 478 | if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') |
443 | bb_error_msg_and_die("only a beginning address can be specified for edit commands"); | 479 | bb_error_msg_and_die("only a beginning address can be specified for edit commands"); |
444 | for (;;) { | 480 | for (;;) { |
@@ -454,8 +490,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) | |||
454 | /* "\anychar" -> "anychar" */ | 490 | /* "\anychar" -> "anychar" */ |
455 | parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0'); | 491 | parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0'); |
456 | cmdstr += strlen(cmdstr); | 492 | cmdstr += strlen(cmdstr); |
493 | } | ||
457 | /* handle file cmds: (r)ead */ | 494 | /* handle file cmds: (r)ead */ |
458 | } else if (strchr("rw", sed_cmd->cmd)) { | 495 | else if (idx <= IDX_w) { /* r,w */ |
459 | if (sed_cmd->end_line || sed_cmd->end_match) | 496 | if (sed_cmd->end_line || sed_cmd->end_match) |
460 | bb_error_msg_and_die("command only uses one address"); | 497 | bb_error_msg_and_die("command only uses one address"); |
461 | cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string); | 498 | cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string); |
@@ -463,8 +500,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) | |||
463 | sed_cmd->sw_file = xfopen_for_write(sed_cmd->string); | 500 | sed_cmd->sw_file = xfopen_for_write(sed_cmd->string); |
464 | sed_cmd->sw_last_char = '\n'; | 501 | sed_cmd->sw_last_char = '\n'; |
465 | } | 502 | } |
503 | } | ||
466 | /* handle branch commands */ | 504 | /* handle branch commands */ |
467 | } else if (strchr(":btT", sed_cmd->cmd)) { | 505 | else if (idx <= IDX_T) { /* :,b,t,T */ |
468 | int length; | 506 | int length; |
469 | 507 | ||
470 | cmdstr = skip_whitespace(cmdstr); | 508 | cmdstr = skip_whitespace(cmdstr); |
@@ -475,7 +513,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) | |||
475 | } | 513 | } |
476 | } | 514 | } |
477 | /* translation command */ | 515 | /* translation command */ |
478 | else if (sed_cmd->cmd == 'y') { | 516 | else if (idx == IDX_y) { |
479 | char *match, *replace; | 517 | char *match, *replace; |
480 | int i = cmdstr[0]; | 518 | int i = cmdstr[0]; |
481 | 519 | ||
@@ -495,7 +533,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) | |||
495 | /* if it wasnt a single-letter command that takes no arguments | 533 | /* if it wasnt a single-letter command that takes no arguments |
496 | * then it must be an invalid command. | 534 | * then it must be an invalid command. |
497 | */ | 535 | */ |
498 | else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) { | 536 | else if (idx >= IDX_nul) { /* not d,D,g,G,h,H,l,n,N,p,P,q,x,=,{,} */ |
499 | bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd); | 537 | bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd); |
500 | } | 538 | } |
501 | 539 | ||
@@ -966,9 +1004,9 @@ static void process_files(void) | |||
966 | } | 1004 | } |
967 | sed_cmd->in_match = !( | 1005 | sed_cmd->in_match = !( |
968 | /* has the ending line come, or is this a single address command? */ | 1006 | /* has the ending line come, or is this a single address command? */ |
969 | (sed_cmd->end_line ? | 1007 | (sed_cmd->end_line |
970 | sed_cmd->end_line == -1 ? | 1008 | ? sed_cmd->end_line == -1 |
971 | !next_line | 1009 | ? !next_line |
972 | : (sed_cmd->end_line <= linenum) | 1010 | : (sed_cmd->end_line <= linenum) |
973 | : !sed_cmd->end_match | 1011 | : !sed_cmd->end_match |
974 | ) | 1012 | ) |