aboutsummaryrefslogtreecommitdiff
path: root/editors/sed.c
diff options
context:
space:
mode:
Diffstat (limited to 'editors/sed.c')
-rw-r--r--editors/sed.c157
1 files changed, 100 insertions, 57 deletions
diff --git a/editors/sed.c b/editors/sed.c
index 9ab758bd7..99e56ff52 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -14,49 +14,47 @@
14 */ 14 */
15 15
16/* Code overview. 16/* Code overview.
17 *
18 * Files are laid out to avoid unnecessary function declarations. So for
19 * example, every function add_cmd calls occurs before add_cmd in this file.
20 *
21 * add_cmd() is called on each line of sed command text (from a file or from
22 * the command line). It calls get_address() and parse_cmd_args(). The
23 * resulting sed_cmd_t structures are appended to a linked list
24 * (G.sed_cmd_head/G.sed_cmd_tail).
25 *
26 * add_input_file() adds a FILE* to the list of input files. We need to
27 * know all input sources ahead of time to find the last line for the $ match.
28 *
29 * process_files() does actual sedding, reading data lines from each input FILE *
30 * (which could be stdin) and applying the sed command list (sed_cmd_head) to
31 * each of the resulting lines.
32 *
33 * sed_main() is where external code calls into this, with a command line.
34 */
17 35
18 Files are laid out to avoid unnecessary function declarations. So for 36/* Supported features and commands in this version of sed:
19 example, every function add_cmd calls occurs before add_cmd in this file. 37 *
20 38 * - comments ('#')
21 add_cmd() is called on each line of sed command text (from a file or from 39 * - address matching: num|/matchstr/[,num|/matchstr/|$]command
22 the command line). It calls get_address() and parse_cmd_args(). The 40 * - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
23 resulting sed_cmd_t structures are appended to a linked list 41 * - edit commands: (a)ppend, (i)nsert, (c)hange
24 (G.sed_cmd_head/G.sed_cmd_tail). 42 * - file commands: (r)ead
25 43 * - backreferences in substitution expressions (\0, \1, \2...\9)
26 add_input_file() adds a FILE* to the list of input files. We need to 44 * - grouped commands: {cmd1;cmd2}
27 know all input sources ahead of time to find the last line for the $ match. 45 * - transliteration (y/source-chars/dest-chars/)
28 46 * - pattern space hold space storing / swapping (g, h, x)
29 process_files() does actual sedding, reading data lines from each input FILE * 47 * - labels / branching (: label, b, t, T)
30 (which could be stdin) and applying the sed command list (sed_cmd_head) to 48 *
31 each of the resulting lines. 49 * (Note: Specifying an address (range) to match is *optional*; commands
32 50 * default to the whole pattern space if no specific address match was
33 sed_main() is where external code calls into this, with a command line. 51 * requested.)
34*/ 52 *
35 53 * Todo:
36 54 * - Create a wrapper around regex to make libc's regex conform with sed
37/* 55 *
38 Supported features and commands in this version of sed: 56 * Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
39 57 */
40 - comments ('#')
41 - address matching: num|/matchstr/[,num|/matchstr/|$]command
42 - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
43 - edit commands: (a)ppend, (i)nsert, (c)hange
44 - file commands: (r)ead
45 - backreferences in substitution expressions (\0, \1, \2...\9)
46 - grouped commands: {cmd1;cmd2}
47 - transliteration (y/source-chars/dest-chars/)
48 - pattern space hold space storing / swapping (g, h, x)
49 - labels / branching (: label, b, t, T)
50
51 (Note: Specifying an address (range) to match is *optional*; commands
52 default to the whole pattern space if no specific address match was
53 requested.)
54
55 Todo:
56 - Create a wrapper around regex to make libc's regex conform with sed
57
58 Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
59*/
60 58
61//usage:#define sed_trivial_usage 59//usage:#define sed_trivial_usage
62//usage: "[-efinr] SED_CMD [FILE]..." 60//usage: "[-efinr] SED_CMD [FILE]..."
@@ -217,11 +215,16 @@ static void parse_escapes(char *dest, const char *string, int len, char from, ch
217 215
218static char *copy_parsing_escapes(const char *string, int len) 216static char *copy_parsing_escapes(const char *string, int len)
219{ 217{
218 const char *s;
220 char *dest = xmalloc(len + 1); 219 char *dest = xmalloc(len + 1);
221 220
222 parse_escapes(dest, string, len, 'n', '\n'); 221 /* sed recognizes \n */
223 /* GNU sed also recognizes \t */ 222 /* GNU sed also recognizes \t and \r */
224 parse_escapes(dest, dest, strlen(dest), 't', '\t'); 223 for (s = "\nn\tt\rr"; *s; s += 2) {
224 parse_escapes(dest, string, len, s[1], s[0]);
225 string = dest;
226 len = strlen(dest);
227 }
225 return dest; 228 return dest;
226} 229}
227 230
@@ -244,11 +247,13 @@ static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str)
244 delimiter = -delimiter; 247 delimiter = -delimiter;
245 } 248 }
246 249
247 for (; (ch = str[idx]); idx++) { 250 for (; (ch = str[idx]) != '\0'; idx++) {
248 if (bracket >= 0) { 251 if (bracket >= 0) {
249 if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2 252 if (ch == ']'
250 && str[idx - 1] == '^'))) 253 && !(bracket == idx - 1 || (bracket == idx - 2 && str[idx - 1] == '^'))
254 ) {
251 bracket = -1; 255 bracket = -1;
256 }
252 } else if (escaped) 257 } else if (escaped)
253 escaped = 0; 258 escaped = 0;
254 else if (ch == '\\') 259 else if (ch == '\\')
@@ -434,11 +439,47 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr)
434 */ 439 */
435static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) 440static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
436{ 441{
442 static const char cmd_letters[] = "saicrw:btTydDgGhHlnNpPqx={}";
443 enum {
444 IDX_s = 0,
445 IDX_a,
446 IDX_i,
447 IDX_c,
448 IDX_r,
449 IDX_w,
450 IDX_colon,
451 IDX_b,
452 IDX_t,
453 IDX_T,
454 IDX_y,
455 IDX_d,
456 IDX_D,
457 IDX_g,
458 IDX_G,
459 IDX_h,
460 IDX_H,
461 IDX_l,
462 IDX_n,
463 IDX_N,
464 IDX_p,
465 IDX_P,
466 IDX_q,
467 IDX_x,
468 IDX_equal,
469 IDX_lbrace,
470 IDX_rbrace,
471 IDX_nul
472 };
473 struct chk { char chk[sizeof(cmd_letters)-1 == IDX_nul ? 1 : -1]; };
474
475 unsigned idx = strchrnul(cmd_letters, sed_cmd->cmd) - cmd_letters;
476
437 /* handle (s)ubstitution command */ 477 /* handle (s)ubstitution command */
438 if (sed_cmd->cmd == 's') 478 if (idx == IDX_s) {
439 cmdstr += parse_subst_cmd(sed_cmd, cmdstr); 479 cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
480 }
440 /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ 481 /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
441 else if (strchr("aic", sed_cmd->cmd)) { 482 else if (idx <= IDX_c) { /* a,i,c */
442 if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') 483 if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
443 bb_error_msg_and_die("only a beginning address can be specified for edit commands"); 484 bb_error_msg_and_die("only a beginning address can be specified for edit commands");
444 for (;;) { 485 for (;;) {
@@ -454,8 +495,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
454 /* "\anychar" -> "anychar" */ 495 /* "\anychar" -> "anychar" */
455 parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0'); 496 parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0');
456 cmdstr += strlen(cmdstr); 497 cmdstr += strlen(cmdstr);
498 }
457 /* handle file cmds: (r)ead */ 499 /* handle file cmds: (r)ead */
458 } else if (strchr("rw", sed_cmd->cmd)) { 500 else if (idx <= IDX_w) { /* r,w */
459 if (sed_cmd->end_line || sed_cmd->end_match) 501 if (sed_cmd->end_line || sed_cmd->end_match)
460 bb_error_msg_and_die("command only uses one address"); 502 bb_error_msg_and_die("command only uses one address");
461 cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string); 503 cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string);
@@ -463,8 +505,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
463 sed_cmd->sw_file = xfopen_for_write(sed_cmd->string); 505 sed_cmd->sw_file = xfopen_for_write(sed_cmd->string);
464 sed_cmd->sw_last_char = '\n'; 506 sed_cmd->sw_last_char = '\n';
465 } 507 }
508 }
466 /* handle branch commands */ 509 /* handle branch commands */
467 } else if (strchr(":btT", sed_cmd->cmd)) { 510 else if (idx <= IDX_T) { /* :,b,t,T */
468 int length; 511 int length;
469 512
470 cmdstr = skip_whitespace(cmdstr); 513 cmdstr = skip_whitespace(cmdstr);
@@ -475,7 +518,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
475 } 518 }
476 } 519 }
477 /* translation command */ 520 /* translation command */
478 else if (sed_cmd->cmd == 'y') { 521 else if (idx == IDX_y) {
479 char *match, *replace; 522 char *match, *replace;
480 int i = cmdstr[0]; 523 int i = cmdstr[0];
481 524
@@ -495,7 +538,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
495 /* if it wasnt a single-letter command that takes no arguments 538 /* if it wasnt a single-letter command that takes no arguments
496 * then it must be an invalid command. 539 * then it must be an invalid command.
497 */ 540 */
498 else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) { 541 else if (idx >= IDX_nul) { /* not d,D,g,G,h,H,l,n,N,p,P,q,x,=,{,} */
499 bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd); 542 bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd);
500 } 543 }
501 544
@@ -966,9 +1009,9 @@ static void process_files(void)
966 } 1009 }
967 sed_cmd->in_match = !( 1010 sed_cmd->in_match = !(
968 /* has the ending line come, or is this a single address command? */ 1011 /* has the ending line come, or is this a single address command? */
969 (sed_cmd->end_line ? 1012 (sed_cmd->end_line
970 sed_cmd->end_line == -1 ? 1013 ? sed_cmd->end_line == -1
971 !next_line 1014 ? !next_line
972 : (sed_cmd->end_line <= linenum) 1015 : (sed_cmd->end_line <= linenum)
973 : !sed_cmd->end_match 1016 : !sed_cmd->end_match
974 ) 1017 )