aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--editors/sed.c159
1 files changed, 88 insertions, 71 deletions
diff --git a/editors/sed.c b/editors/sed.c
index 96e0dd88b..ac765c83f 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -30,7 +30,8 @@
30 - edit commands: (a)ppend, (i)nsert, (c)hange 30 - edit commands: (a)ppend, (i)nsert, (c)hange
31 - file commands: (r)ead 31 - file commands: (r)ead
32 - backreferences in substitution expressions (\1, \2...\9) 32 - backreferences in substitution expressions (\1, \2...\9)
33 33 - grouped commands: {cmd1;cmd2}
34
34 (Note: Specifying an address (range) to match is *optional*; commands 35 (Note: Specifying an address (range) to match is *optional*; commands
35 default to the whole pattern space if no specific address match was 36 default to the whole pattern space if no specific address match was
36 requested.) 37 requested.)
@@ -226,7 +227,7 @@ static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr)
226 227
227 /* verify that the 's' is followed by something. That something 228 /* verify that the 's' is followed by something. That something
228 * (typically a 'slash') is now our regexp delimiter... */ 229 * (typically a 'slash') is now our regexp delimiter... */
229 if (!substr[++idx]) 230 if (substr[idx] == '\0')
230 error_msg_and_die("bad format in substitution expression"); 231 error_msg_and_die("bad format in substitution expression");
231 else 232 else
232 sed_cmd->delimiter=substr[idx]; 233 sed_cmd->delimiter=substr[idx];
@@ -287,11 +288,6 @@ out:
287 return idx; 288 return idx;
288} 289}
289 290
290static void move_back(char *str, int offset)
291{
292 memmove(str, str + offset, strlen(str + offset) + 1);
293}
294
295static int parse_edit_cmd(sed_cmd_t *sed_cmd, const char *editstr) 291static int parse_edit_cmd(sed_cmd_t *sed_cmd, const char *editstr)
296{ 292{
297 int i, j; 293 int i, j;
@@ -317,15 +313,15 @@ static int parse_edit_cmd(sed_cmd_t *sed_cmd, const char *editstr)
317 * is a-ok. 313 * is a-ok.
318 * 314 *
319 */ 315 */
320 316 if ((*editstr != '\\') || ((editstr[1] != '\n') && (editstr[1] != '\r'))) {
321 if (editstr[1] != '\\' || (editstr[2] != '\n' && editstr[2] != '\r'))
322 error_msg_and_die("bad format in edit expression"); 317 error_msg_and_die("bad format in edit expression");
318 }
323 319
324 /* store the edit line text */ 320 /* store the edit line text */
325 sed_cmd->editline = xmalloc(strlen(&editstr[3]) + 2); 321 sed_cmd->editline = xmalloc(strlen(&editstr[2]) + 2);
326 for (i = 3, j = 0; editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL; 322 for (i = 2, j = 0; editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL;
327 i++, j++) { 323 i++, j++) {
328 if (editstr[i] == '\\' && strchr("\n\r", editstr[i+1]) != NULL) { 324 if ((editstr[i] == '\\') && strchr("\n\r", editstr[i+1]) != NULL) {
329 sed_cmd->editline[j] = '\n'; 325 sed_cmd->editline[j] = '\n';
330 i++; 326 i++;
331 } else 327 } else
@@ -382,6 +378,53 @@ static int parse_file_cmd(sed_cmd_t *sed_cmd, const char *filecmdstr)
382 378
383static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr) 379static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr)
384{ 380{
381 /* if it was a single-letter command that takes no arguments (such as 'p'
382 * or 'd') all we need to do is increment the index past that command */
383 if (strchr("pd=", sed_cmd->cmd)) {
384 cmdstr++;
385 }
386 /* handle (s)ubstitution command */
387 else if (sed_cmd->cmd == 's') {
388 cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
389 }
390 /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
391 else if (strchr("aic", sed_cmd->cmd)) {
392 if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
393 error_msg_and_die("only a beginning address can be specified for edit commands");
394 cmdstr += parse_edit_cmd(sed_cmd, cmdstr);
395 }
396 /* handle file cmds: (r)ead */
397 else if (sed_cmd->cmd == 'r') {
398 if (sed_cmd->end_line || sed_cmd->end_match)
399 error_msg_and_die("Command only uses one address");
400 cmdstr += parse_file_cmd(sed_cmd, cmdstr);
401 }
402 /* handle grouped commands */
403 else {
404 error_msg_and_die("Unsupported command %c", sed_cmd->cmd);
405 }
406
407 /* give back whatever's left over */
408 return(cmdstr);
409}
410
411static char *add_cmd(sed_cmd_t *sed_cmd, char *cmdstr)
412{
413
414 /* Skip over leading whitespace and semicolons */
415 cmdstr += strspn(cmdstr, semicolon_whitespace);
416
417 /* if we ate the whole thing, that means there was just trailing
418 * whitespace or a final / no-op semicolon. either way, get out */
419 if (*cmdstr == '\0') {
420 return(NULL);
421 }
422
423 /* if this is a comment, jump past it and keep going */
424 if (*cmdstr == '#') {
425 return(strpbrk(cmdstr, "\n\r"));
426 }
427
385 /* parse the command 428 /* parse the command
386 * format is: [addr][,addr]cmd 429 * format is: [addr][,addr]cmd
387 * |----||-----||-| 430 * |----||-----||-|
@@ -389,26 +432,26 @@ static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr)
389 */ 432 */
390 433
391 /* first part (if present) is an address: either a '$', a number or a /regex/ */ 434 /* first part (if present) is an address: either a '$', a number or a /regex/ */
392 cmdstr += get_address(&sed_cmd->delimiter, cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); 435 cmdstr += get_address(&(sed_cmd->delimiter), cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
393 436
394 /* second part (if present) will begin with a comma */ 437 /* second part (if present) will begin with a comma */
395 if (*cmdstr == ',') { 438 if (*cmdstr == ',') {
396 int tmp_idx; 439 int idx;
397 cmdstr++; 440 cmdstr++;
398 tmp_idx = get_address(&sed_cmd->delimiter, cmdstr, &sed_cmd->end_line, &sed_cmd->end_match); 441 idx = get_address(&(sed_cmd->delimiter), cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
399 if (tmp_idx == 0) { 442 if (idx == 0) {
400 error_msg_and_die("get_address: no address found in string\n" 443 error_msg_and_die("get_address: no address found in string\n"
401 "\t(you probably didn't check the string you passed me)"); 444 "\t(you probably didn't check the string you passed me)");
402 } 445 }
403 cmdstr += tmp_idx; 446 cmdstr += idx;
404 } 447 }
405 448
406 /* skip whitespace before the command */ 449 /* skip whitespace before the command */
407 while (isspace(*cmdstr)) 450 while (isspace(*cmdstr)) {
408 cmdstr++; 451 cmdstr++;
452 }
409 453
410 /* there my be the inversion flag between part2 and part3 */ 454 /* there my be the inversion flag between part2 and part3 */
411 sed_cmd->invert = 0;
412 if (*cmdstr == '!') { 455 if (*cmdstr == '!') {
413 sed_cmd->invert = 1; 456 sed_cmd->invert = 1;
414 cmdstr++; 457 cmdstr++;
@@ -419,14 +462,16 @@ static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr)
419 * and conforming applications shall not follow a '!' character 462 * and conforming applications shall not follow a '!' character
420 * with <blank>s. 463 * with <blank>s.
421 */ 464 */
422 if (isblank(*cmdstr) { 465 if (isblank(cmdstr[idx]) {
423 error_msg_and_die("blank follows '!'"); 466 error_msg_and_die("blank follows '!'");
424 } 467 }
425#else 468#else
426 /* skip whitespace before the command */ 469 /* skip whitespace before the command */
427 while (isspace(*cmdstr)) 470 while (isspace(*cmdstr)) {
428 cmdstr++; 471 cmdstr++;
472 }
429#endif 473#endif
474
430 } 475 }
431 476
432 /* last part (mandatory) will be a command */ 477 /* last part (mandatory) will be a command */
@@ -434,61 +479,34 @@ static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr)
434 error_msg_and_die("missing command"); 479 error_msg_and_die("missing command");
435 480
436 sed_cmd->cmd = *cmdstr; 481 sed_cmd->cmd = *cmdstr;
437 482 cmdstr++;
438 /* if it was a single-letter command that takes no arguments (such as 'p' 483
439 * or 'd') all we need to do is increment the index past that command */ 484 if (sed_cmd->cmd == '{') {
440 if (strchr("pd=", sed_cmd->cmd)) { 485 do {
441 cmdstr++; 486 char *end_ptr = strpbrk(cmdstr, ";}");
487 *end_ptr = '\0';
488 add_cmd(sed_cmd, cmdstr);
489 cmdstr = end_ptr + 1;
490 } while (*cmdstr != '\0');
491 } else {
492
493 cmdstr = parse_cmd_str(sed_cmd, cmdstr);
494
495 /* Add the command to the command array */
496 sed_cmds = xrealloc(sed_cmds, sizeof(sed_cmd_t) * (++ncmds));
497 sed_cmds[ncmds-1] = xmalloc(sizeof(sed_cmd_t));
498 memcpy(sed_cmds[ncmds-1], sed_cmd, sizeof(sed_cmd_t));
442 } 499 }
443 /* handle (s)ubstitution command */
444 else if (sed_cmd->cmd == 's') {
445 cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
446 }
447 /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
448 else if (strchr("aic", sed_cmd->cmd)) {
449 if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
450 error_msg_and_die("only a beginning address can be specified for edit commands");
451 cmdstr += parse_edit_cmd(sed_cmd, cmdstr);
452 }
453 /* handle file cmds: (r)ead */
454 else if (sed_cmd->cmd == 'r') {
455 if (sed_cmd->end_line || sed_cmd->end_match)
456 error_msg_and_die("Command only uses one address");
457 cmdstr += parse_file_cmd(sed_cmd, cmdstr);
458 }
459 else {
460 error_msg_and_die("Unsupported command %c", sed_cmd->cmd);
461 }
462
463 /* give back whatever's left over */
464 return(cmdstr); 500 return(cmdstr);
465} 501}
466 502
467static void add_cmd_str(const char * const cmdstr) 503static void add_cmd_str(char *cmdstr)
468{ 504{
469 char *mystr = (char *)cmdstr;
470
471 do { 505 do {
472 506 sed_cmd_t *sed_cmd;
473 /* trim leading whitespace and semicolons */ 507 sed_cmd = xcalloc(1, sizeof(sed_cmd_t));
474 move_back(mystr, strspn(mystr, semicolon_whitespace)); 508 cmdstr = add_cmd(sed_cmd, cmdstr);
475 /* if we ate the whole thing, that means there was just trailing 509 } while (cmdstr && strlen(cmdstr));
476 * whitespace or a final / no-op semicolon. either way, get out */
477 if (strlen(mystr) == 0)
478 return;
479 /* if this is a comment, jump past it and keep going */
480 if (mystr[0] == '#') {
481 mystr = strpbrk(mystr, "\n\r");
482 continue;
483 }
484 /* grow the array */
485 sed_cmds = xrealloc(sed_cmds, sizeof(sed_cmd_t *) * (++ncmds));
486 /* zero new element */
487 sed_cmds[ncmds-1] = xcalloc(1, sizeof(sed_cmd_t));
488 /* load command string into new array element, get remainder */
489 mystr = parse_cmd_str(sed_cmds[ncmds-1], mystr);
490
491 } while (mystr && strlen(mystr));
492} 510}
493 511
494 512
@@ -868,7 +886,6 @@ extern int sed_main(int argc, char **argv)
868 } 886 }
869 } 887 }
870 888
871
872 /* argv[(optind)..(argc-1)] should be names of file to process. If no 889 /* argv[(optind)..(argc-1)] should be names of file to process. If no
873 * files were specified or '-' was specified, take input from stdin. 890 * files were specified or '-' was specified, take input from stdin.
874 * Otherwise, we process all the files specified. */ 891 * Otherwise, we process all the files specified. */