diff options
-rw-r--r-- | editors/sed.c | 159 |
1 files changed, 88 insertions, 71 deletions
diff --git a/editors/sed.c b/editors/sed.c index 96e0dd88b..ac765c83f 100644 --- a/editors/sed.c +++ b/editors/sed.c | |||
@@ -30,7 +30,8 @@ | |||
30 | - edit commands: (a)ppend, (i)nsert, (c)hange | 30 | - edit commands: (a)ppend, (i)nsert, (c)hange |
31 | - file commands: (r)ead | 31 | - file commands: (r)ead |
32 | - backreferences in substitution expressions (\1, \2...\9) | 32 | - backreferences in substitution expressions (\1, \2...\9) |
33 | 33 | - grouped commands: {cmd1;cmd2} | |
34 | |||
34 | (Note: Specifying an address (range) to match is *optional*; commands | 35 | (Note: Specifying an address (range) to match is *optional*; commands |
35 | default to the whole pattern space if no specific address match was | 36 | default to the whole pattern space if no specific address match was |
36 | requested.) | 37 | requested.) |
@@ -226,7 +227,7 @@ static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr) | |||
226 | 227 | ||
227 | /* verify that the 's' is followed by something. That something | 228 | /* verify that the 's' is followed by something. That something |
228 | * (typically a 'slash') is now our regexp delimiter... */ | 229 | * (typically a 'slash') is now our regexp delimiter... */ |
229 | if (!substr[++idx]) | 230 | if (substr[idx] == '\0') |
230 | error_msg_and_die("bad format in substitution expression"); | 231 | error_msg_and_die("bad format in substitution expression"); |
231 | else | 232 | else |
232 | sed_cmd->delimiter=substr[idx]; | 233 | sed_cmd->delimiter=substr[idx]; |
@@ -287,11 +288,6 @@ out: | |||
287 | return idx; | 288 | return idx; |
288 | } | 289 | } |
289 | 290 | ||
290 | static void move_back(char *str, int offset) | ||
291 | { | ||
292 | memmove(str, str + offset, strlen(str + offset) + 1); | ||
293 | } | ||
294 | |||
295 | static int parse_edit_cmd(sed_cmd_t *sed_cmd, const char *editstr) | 291 | static int parse_edit_cmd(sed_cmd_t *sed_cmd, const char *editstr) |
296 | { | 292 | { |
297 | int i, j; | 293 | int i, j; |
@@ -317,15 +313,15 @@ static int parse_edit_cmd(sed_cmd_t *sed_cmd, const char *editstr) | |||
317 | * is a-ok. | 313 | * is a-ok. |
318 | * | 314 | * |
319 | */ | 315 | */ |
320 | 316 | if ((*editstr != '\\') || ((editstr[1] != '\n') && (editstr[1] != '\r'))) { | |
321 | if (editstr[1] != '\\' || (editstr[2] != '\n' && editstr[2] != '\r')) | ||
322 | error_msg_and_die("bad format in edit expression"); | 317 | error_msg_and_die("bad format in edit expression"); |
318 | } | ||
323 | 319 | ||
324 | /* store the edit line text */ | 320 | /* store the edit line text */ |
325 | sed_cmd->editline = xmalloc(strlen(&editstr[3]) + 2); | 321 | sed_cmd->editline = xmalloc(strlen(&editstr[2]) + 2); |
326 | for (i = 3, j = 0; editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL; | 322 | for (i = 2, j = 0; editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL; |
327 | i++, j++) { | 323 | i++, j++) { |
328 | if (editstr[i] == '\\' && strchr("\n\r", editstr[i+1]) != NULL) { | 324 | if ((editstr[i] == '\\') && strchr("\n\r", editstr[i+1]) != NULL) { |
329 | sed_cmd->editline[j] = '\n'; | 325 | sed_cmd->editline[j] = '\n'; |
330 | i++; | 326 | i++; |
331 | } else | 327 | } else |
@@ -382,6 +378,53 @@ static int parse_file_cmd(sed_cmd_t *sed_cmd, const char *filecmdstr) | |||
382 | 378 | ||
383 | static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr) | 379 | static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr) |
384 | { | 380 | { |
381 | /* if it was a single-letter command that takes no arguments (such as 'p' | ||
382 | * or 'd') all we need to do is increment the index past that command */ | ||
383 | if (strchr("pd=", sed_cmd->cmd)) { | ||
384 | cmdstr++; | ||
385 | } | ||
386 | /* handle (s)ubstitution command */ | ||
387 | else if (sed_cmd->cmd == 's') { | ||
388 | cmdstr += parse_subst_cmd(sed_cmd, cmdstr); | ||
389 | } | ||
390 | /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ | ||
391 | else if (strchr("aic", sed_cmd->cmd)) { | ||
392 | if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') | ||
393 | error_msg_and_die("only a beginning address can be specified for edit commands"); | ||
394 | cmdstr += parse_edit_cmd(sed_cmd, cmdstr); | ||
395 | } | ||
396 | /* handle file cmds: (r)ead */ | ||
397 | else if (sed_cmd->cmd == 'r') { | ||
398 | if (sed_cmd->end_line || sed_cmd->end_match) | ||
399 | error_msg_and_die("Command only uses one address"); | ||
400 | cmdstr += parse_file_cmd(sed_cmd, cmdstr); | ||
401 | } | ||
402 | /* handle grouped commands */ | ||
403 | else { | ||
404 | error_msg_and_die("Unsupported command %c", sed_cmd->cmd); | ||
405 | } | ||
406 | |||
407 | /* give back whatever's left over */ | ||
408 | return(cmdstr); | ||
409 | } | ||
410 | |||
411 | static char *add_cmd(sed_cmd_t *sed_cmd, char *cmdstr) | ||
412 | { | ||
413 | |||
414 | /* Skip over leading whitespace and semicolons */ | ||
415 | cmdstr += strspn(cmdstr, semicolon_whitespace); | ||
416 | |||
417 | /* if we ate the whole thing, that means there was just trailing | ||
418 | * whitespace or a final / no-op semicolon. either way, get out */ | ||
419 | if (*cmdstr == '\0') { | ||
420 | return(NULL); | ||
421 | } | ||
422 | |||
423 | /* if this is a comment, jump past it and keep going */ | ||
424 | if (*cmdstr == '#') { | ||
425 | return(strpbrk(cmdstr, "\n\r")); | ||
426 | } | ||
427 | |||
385 | /* parse the command | 428 | /* parse the command |
386 | * format is: [addr][,addr]cmd | 429 | * format is: [addr][,addr]cmd |
387 | * |----||-----||-| | 430 | * |----||-----||-| |
@@ -389,26 +432,26 @@ static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr) | |||
389 | */ | 432 | */ |
390 | 433 | ||
391 | /* first part (if present) is an address: either a '$', a number or a /regex/ */ | 434 | /* first part (if present) is an address: either a '$', a number or a /regex/ */ |
392 | cmdstr += get_address(&sed_cmd->delimiter, cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); | 435 | cmdstr += get_address(&(sed_cmd->delimiter), cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); |
393 | 436 | ||
394 | /* second part (if present) will begin with a comma */ | 437 | /* second part (if present) will begin with a comma */ |
395 | if (*cmdstr == ',') { | 438 | if (*cmdstr == ',') { |
396 | int tmp_idx; | 439 | int idx; |
397 | cmdstr++; | 440 | cmdstr++; |
398 | tmp_idx = get_address(&sed_cmd->delimiter, cmdstr, &sed_cmd->end_line, &sed_cmd->end_match); | 441 | idx = get_address(&(sed_cmd->delimiter), cmdstr, &sed_cmd->end_line, &sed_cmd->end_match); |
399 | if (tmp_idx == 0) { | 442 | if (idx == 0) { |
400 | error_msg_and_die("get_address: no address found in string\n" | 443 | error_msg_and_die("get_address: no address found in string\n" |
401 | "\t(you probably didn't check the string you passed me)"); | 444 | "\t(you probably didn't check the string you passed me)"); |
402 | } | 445 | } |
403 | cmdstr += tmp_idx; | 446 | cmdstr += idx; |
404 | } | 447 | } |
405 | 448 | ||
406 | /* skip whitespace before the command */ | 449 | /* skip whitespace before the command */ |
407 | while (isspace(*cmdstr)) | 450 | while (isspace(*cmdstr)) { |
408 | cmdstr++; | 451 | cmdstr++; |
452 | } | ||
409 | 453 | ||
410 | /* there my be the inversion flag between part2 and part3 */ | 454 | /* there my be the inversion flag between part2 and part3 */ |
411 | sed_cmd->invert = 0; | ||
412 | if (*cmdstr == '!') { | 455 | if (*cmdstr == '!') { |
413 | sed_cmd->invert = 1; | 456 | sed_cmd->invert = 1; |
414 | cmdstr++; | 457 | cmdstr++; |
@@ -419,14 +462,16 @@ static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr) | |||
419 | * and conforming applications shall not follow a '!' character | 462 | * and conforming applications shall not follow a '!' character |
420 | * with <blank>s. | 463 | * with <blank>s. |
421 | */ | 464 | */ |
422 | if (isblank(*cmdstr) { | 465 | if (isblank(cmdstr[idx]) { |
423 | error_msg_and_die("blank follows '!'"); | 466 | error_msg_and_die("blank follows '!'"); |
424 | } | 467 | } |
425 | #else | 468 | #else |
426 | /* skip whitespace before the command */ | 469 | /* skip whitespace before the command */ |
427 | while (isspace(*cmdstr)) | 470 | while (isspace(*cmdstr)) { |
428 | cmdstr++; | 471 | cmdstr++; |
472 | } | ||
429 | #endif | 473 | #endif |
474 | |||
430 | } | 475 | } |
431 | 476 | ||
432 | /* last part (mandatory) will be a command */ | 477 | /* last part (mandatory) will be a command */ |
@@ -434,61 +479,34 @@ static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr) | |||
434 | error_msg_and_die("missing command"); | 479 | error_msg_and_die("missing command"); |
435 | 480 | ||
436 | sed_cmd->cmd = *cmdstr; | 481 | sed_cmd->cmd = *cmdstr; |
437 | 482 | cmdstr++; | |
438 | /* if it was a single-letter command that takes no arguments (such as 'p' | 483 | |
439 | * or 'd') all we need to do is increment the index past that command */ | 484 | if (sed_cmd->cmd == '{') { |
440 | if (strchr("pd=", sed_cmd->cmd)) { | 485 | do { |
441 | cmdstr++; | 486 | char *end_ptr = strpbrk(cmdstr, ";}"); |
487 | *end_ptr = '\0'; | ||
488 | add_cmd(sed_cmd, cmdstr); | ||
489 | cmdstr = end_ptr + 1; | ||
490 | } while (*cmdstr != '\0'); | ||
491 | } else { | ||
492 | |||
493 | cmdstr = parse_cmd_str(sed_cmd, cmdstr); | ||
494 | |||
495 | /* Add the command to the command array */ | ||
496 | sed_cmds = xrealloc(sed_cmds, sizeof(sed_cmd_t) * (++ncmds)); | ||
497 | sed_cmds[ncmds-1] = xmalloc(sizeof(sed_cmd_t)); | ||
498 | memcpy(sed_cmds[ncmds-1], sed_cmd, sizeof(sed_cmd_t)); | ||
442 | } | 499 | } |
443 | /* handle (s)ubstitution command */ | ||
444 | else if (sed_cmd->cmd == 's') { | ||
445 | cmdstr += parse_subst_cmd(sed_cmd, cmdstr); | ||
446 | } | ||
447 | /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ | ||
448 | else if (strchr("aic", sed_cmd->cmd)) { | ||
449 | if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') | ||
450 | error_msg_and_die("only a beginning address can be specified for edit commands"); | ||
451 | cmdstr += parse_edit_cmd(sed_cmd, cmdstr); | ||
452 | } | ||
453 | /* handle file cmds: (r)ead */ | ||
454 | else if (sed_cmd->cmd == 'r') { | ||
455 | if (sed_cmd->end_line || sed_cmd->end_match) | ||
456 | error_msg_and_die("Command only uses one address"); | ||
457 | cmdstr += parse_file_cmd(sed_cmd, cmdstr); | ||
458 | } | ||
459 | else { | ||
460 | error_msg_and_die("Unsupported command %c", sed_cmd->cmd); | ||
461 | } | ||
462 | |||
463 | /* give back whatever's left over */ | ||
464 | return(cmdstr); | 500 | return(cmdstr); |
465 | } | 501 | } |
466 | 502 | ||
467 | static void add_cmd_str(const char * const cmdstr) | 503 | static void add_cmd_str(char *cmdstr) |
468 | { | 504 | { |
469 | char *mystr = (char *)cmdstr; | ||
470 | |||
471 | do { | 505 | do { |
472 | 506 | sed_cmd_t *sed_cmd; | |
473 | /* trim leading whitespace and semicolons */ | 507 | sed_cmd = xcalloc(1, sizeof(sed_cmd_t)); |
474 | move_back(mystr, strspn(mystr, semicolon_whitespace)); | 508 | cmdstr = add_cmd(sed_cmd, cmdstr); |
475 | /* if we ate the whole thing, that means there was just trailing | 509 | } while (cmdstr && strlen(cmdstr)); |
476 | * whitespace or a final / no-op semicolon. either way, get out */ | ||
477 | if (strlen(mystr) == 0) | ||
478 | return; | ||
479 | /* if this is a comment, jump past it and keep going */ | ||
480 | if (mystr[0] == '#') { | ||
481 | mystr = strpbrk(mystr, "\n\r"); | ||
482 | continue; | ||
483 | } | ||
484 | /* grow the array */ | ||
485 | sed_cmds = xrealloc(sed_cmds, sizeof(sed_cmd_t *) * (++ncmds)); | ||
486 | /* zero new element */ | ||
487 | sed_cmds[ncmds-1] = xcalloc(1, sizeof(sed_cmd_t)); | ||
488 | /* load command string into new array element, get remainder */ | ||
489 | mystr = parse_cmd_str(sed_cmds[ncmds-1], mystr); | ||
490 | |||
491 | } while (mystr && strlen(mystr)); | ||
492 | } | 510 | } |
493 | 511 | ||
494 | 512 | ||
@@ -868,7 +886,6 @@ extern int sed_main(int argc, char **argv) | |||
868 | } | 886 | } |
869 | } | 887 | } |
870 | 888 | ||
871 | |||
872 | /* argv[(optind)..(argc-1)] should be names of file to process. If no | 889 | /* argv[(optind)..(argc-1)] should be names of file to process. If no |
873 | * files were specified or '-' was specified, take input from stdin. | 890 | * files were specified or '-' was specified, take input from stdin. |
874 | * Otherwise, we process all the files specified. */ | 891 | * Otherwise, we process all the files specified. */ |