aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Whitley <markw@lineo.com>2000-07-17 20:06:42 +0000
committerMark Whitley <markw@lineo.com>2000-07-17 20:06:42 +0000
commit97562bd9d7fe18bdc4f63e6e80bdce980416a915 (patch)
tree4108d5819248431f3e2c04e1b506c3a92ab1a5df
parent21ddb38fcf0633ced1e047ac090c3fbf7d636ce5 (diff)
downloadbusybox-w32-97562bd9d7fe18bdc4f63e6e80bdce980416a915.tar.gz
busybox-w32-97562bd9d7fe18bdc4f63e6e80bdce980416a915.tar.bz2
busybox-w32-97562bd9d7fe18bdc4f63e6e80bdce980416a915.zip
- Added support for backreferences in substitution expressions up to nine
(\1, \2...\9). This touched a lot of places in this file and I added a new function 'print_subst_w_backrefs' in order to keep 'do_subst_command' a little more tidy. * I tested this good 'n hard, but will always appreciate more testing from other, willing folks. - Noticed that the index_of_next_unescaped_slash was subtly wrong so I changed both the functionality and behavior (it used to skip over the first char in the string you passed it, assuming it was a leading '/'--this assumption is no longer made) this necessitated changing the lines that call this function just slightly.
-rw-r--r--editors/sed.c94
-rw-r--r--sed.c94
2 files changed, 152 insertions, 36 deletions
diff --git a/editors/sed.c b/editors/sed.c
index 4d4886e19..195175e88 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -27,6 +27,7 @@
27 - address matching: num|/matchstr/[,num|/matchstr/|$]command 27 - address matching: num|/matchstr/[,num|/matchstr/|$]command
28 - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags) 28 - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
29 - edit commands: (a)ppend, (i)nsert, (c)hange 29 - edit commands: (a)ppend, (i)nsert, (c)hange
30 - backreferences in substitution expressions (\1, \2...\9)
30 31
31 (Note: Specifying an address (range) to match is *optional*; commands 32 (Note: Specifying an address (range) to match is *optional*; commands
32 default to the whole pattern space if no specific address match was 33 default to the whole pattern space if no specific address match was
@@ -73,6 +74,9 @@ struct sed_cmd {
73 /* substitution command specific fields */ 74 /* substitution command specific fields */
74 regex_t *sub_match; /* sed -e 's/sub_match/replace/' */ 75 regex_t *sub_match; /* sed -e 's/sub_match/replace/' */
75 char *replace; /* sed -e 's/sub_match/replace/' XXX: who will hold the \1 \2 \3s? */ 76 char *replace; /* sed -e 's/sub_match/replace/' XXX: who will hold the \1 \2 \3s? */
77 unsigned int num_backrefs:4; /* how many back references (\1..\9) */
78 /* Note: GNU/POSIX sed does not save more than nine backrefs, so
79 * we only use 4 bits to hold the number */
76 unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */ 80 unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */
77 81
78 /* edit command (a,i,c) speicific field */ 82 /* edit command (a,i,c) speicific field */
@@ -166,19 +170,19 @@ static size_t strrspn(const char *s, const char *accept)
166#endif 170#endif
167 171
168/* 172/*
169 * index_of_unescaped_slash - walks left to right through a string beginning 173 * index_of_next_unescaped_slash - walks left to right through a string
170 * at a specified index and returns the index of the next unescaped slash. 174 * beginning at a specified index and returns the index of the next forward
175 * slash ('/') not preceeded by a backslash ('\').
171 */ 176 */
172static int index_of_next_unescaped_slash(const char *str, int idx) 177static int index_of_next_unescaped_slash(const char *str, int idx)
173{ 178{
174 do { 179 for ( ; str[idx]; idx++) {
175 idx++; 180 if (str[idx] == '/' && str[idx-1] != '\\')
176 /* test if we've hit the end */ 181 return idx;
177 if (str[idx] == 0) 182 }
178 return -1;
179 } while (str[idx] != '/' && str[idx - 1] != '\\');
180 183
181 return idx; 184 /* if we make it to here, we've hit the end of the string */
185 return -1;
182} 186}
183 187
184/* 188/*
@@ -201,7 +205,7 @@ static int get_address(const char *str, int *line, regex_t **regex)
201 idx++; 205 idx++;
202 } 206 }
203 else if (my_str[idx] == '/') { 207 else if (my_str[idx] == '/') {
204 idx = index_of_next_unescaped_slash(my_str, idx); 208 idx = index_of_next_unescaped_slash(my_str, ++idx);
205 if (idx == -1) 209 if (idx == -1)
206 fatalError("unterminated match expression\n"); 210 fatalError("unterminated match expression\n");
207 my_str[idx] = '\0'; 211 my_str[idx] = '\0';
@@ -233,6 +237,7 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr)
233 int oldidx, cflags = REG_NEWLINE; 237 int oldidx, cflags = REG_NEWLINE;
234 char *match; 238 char *match;
235 int idx = 0; 239 int idx = 0;
240 int j;
236 241
237 /* 242 /*
238 * the string that gets passed to this function should look like this: 243 * the string that gets passed to this function should look like this:
@@ -249,14 +254,26 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr)
249 254
250 /* save the match string */ 255 /* save the match string */
251 oldidx = idx+1; 256 oldidx = idx+1;
252 idx = index_of_next_unescaped_slash(substr, idx); 257 idx = index_of_next_unescaped_slash(substr, ++idx);
253 if (idx == -1) 258 if (idx == -1)
254 fatalError("bad format in substitution expression\n"); 259 fatalError("bad format in substitution expression\n");
255 match = strdup_substr(substr, oldidx, idx); 260 match = strdup_substr(substr, oldidx, idx);
256 261
262 /* determine the number of back references in the match string */
263 /* Note: we compute this here rather than in the do_subst_command()
264 * function to save processor time, at the expense of a little more memory
265 * (4 bits) per sed_cmd */
266
267 /* sed_cmd->num_backrefs = 0; */ /* XXX: not needed? --apparently not */
268 for (j = 0; match[j]; j++) {
269 /* GNU/POSIX sed does not save more than nine backrefs */
270 if (match[j] == '\\' && match[j+1] == '(' && sed_cmd->num_backrefs < 9)
271 sed_cmd->num_backrefs++;
272 }
273
257 /* save the replacement string */ 274 /* save the replacement string */
258 oldidx = idx+1; 275 oldidx = idx+1;
259 idx = index_of_next_unescaped_slash(substr, idx); 276 idx = index_of_next_unescaped_slash(substr, ++idx);
260 if (idx == -1) 277 if (idx == -1)
261 fatalError("bad format in substitution expression\n"); 278 fatalError("bad format in substitution expression\n");
262 sed_cmd->replace = strdup_substr(substr, oldidx, idx); 279 sed_cmd->replace = strdup_substr(substr, oldidx, idx);
@@ -280,7 +297,7 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr)
280 } 297 }
281 298
282out: 299out:
283 /* compile the regex */ 300 /* compile the match string into a regex */
284 sed_cmd->sub_match = (regex_t *)xmalloc(sizeof(regex_t)); 301 sed_cmd->sub_match = (regex_t *)xmalloc(sizeof(regex_t));
285 xregcomp(sed_cmd->sub_match, match, cflags); 302 xregcomp(sed_cmd->sub_match, match, cflags);
286 free(match); 303 free(match);
@@ -460,26 +477,64 @@ static void load_cmd_file(char *filename)
460 } 477 }
461} 478}
462 479
480static void print_subst_w_backrefs(const char *line, const char *replace, regmatch_t *regmatch)
481{
482 int i;
483
484 /* go through the replacement string */
485 for (i = 0; replace[i]; i++) {
486 /* if we find a backreference (\1, \2, etc.) print the backref'ed * text */
487 if (replace[i] == '\\' && isdigit(replace[i+1])) {
488 int j;
489 char tmpstr[2];
490 int backref;
491 ++i; /* i now indexes the backref number, instead of the leading slash */
492 tmpstr[0] = replace[i];
493 tmpstr[1] = 0;
494 backref = atoi(tmpstr);
495 /* print out the text held in regmatch[backref] */
496 for (j = regmatch[backref].rm_so; j < regmatch[backref].rm_eo; j++)
497 fputc(line[j], stdout);
498 }
499
500 /* if we find an unescaped '&' print out the whole matched text.
501 * fortunately, regmatch[0] contains the indicies to the whole matched
502 * expression (kinda seems like it was designed for just such a
503 * purpose...) */
504 else if (replace[i] == '&' && replace[i-1] != '\\') {
505 int j;
506 for (j = regmatch[0].rm_so; j < regmatch[0].rm_eo; j++)
507 fputc(line[j], stdout);
508 }
509 /* nothing special, just print this char of the replacement string to stdout */
510 else
511 fputc(replace[i], stdout);
512 }
513}
514
463static int do_subst_command(const struct sed_cmd *sed_cmd, const char *line) 515static int do_subst_command(const struct sed_cmd *sed_cmd, const char *line)
464{ 516{
465 int altered = 0; 517 int altered = 0;
466 518
467 /* we only substitute if the substitution 'search' expression matches */ 519 /* we only substitute if the substitution 'search' expression matches */
468 if (regexec(sed_cmd->sub_match, line, 0, NULL, 0) == 0) { 520 if (regexec(sed_cmd->sub_match, line, 0, NULL, 0) == 0) {
469 regmatch_t regmatch; 521 regmatch_t *regmatch = xmalloc(sizeof(regmatch_t) * (sed_cmd->num_backrefs+1));
470 int i; 522 int i;
471 char *ptr = (char *)line; 523 char *ptr = (char *)line;
472 524
473 while (*ptr) { 525 while (*ptr) {
474 /* if we can match the search string... */ 526 /* if we can match the search string... */
475 if (regexec(sed_cmd->sub_match, ptr, 1, &regmatch, 0) == 0) { 527 if (regexec(sed_cmd->sub_match, ptr, sed_cmd->num_backrefs+1, regmatch, 0) == 0) {
476 /* print everything before the match, */ 528 /* print everything before the match, */
477 for (i = 0; i < regmatch.rm_so; i++) 529 for (i = 0; i < regmatch[0].rm_so; i++)
478 fputc(ptr[i], stdout); 530 fputc(ptr[i], stdout);
531
479 /* then print the substitution in its place */ 532 /* then print the substitution in its place */
480 fputs(sed_cmd->replace, stdout); 533 print_subst_w_backrefs(ptr, sed_cmd->replace, regmatch);
534
481 /* then advance past the match */ 535 /* then advance past the match */
482 ptr += regmatch.rm_eo; 536 ptr += regmatch[0].rm_eo;
537
483 /* and flag that something has changed */ 538 /* and flag that something has changed */
484 altered++; 539 altered++;
485 540
@@ -496,6 +551,9 @@ static int do_subst_command(const struct sed_cmd *sed_cmd, const char *line)
496 /* is there anything left to print? */ 551 /* is there anything left to print? */
497 if (*ptr) 552 if (*ptr)
498 fputs(ptr, stdout); 553 fputs(ptr, stdout);
554
555 /* cleanup */
556 free(regmatch);
499 } 557 }
500 558
501 return altered; 559 return altered;
diff --git a/sed.c b/sed.c
index 4d4886e19..195175e88 100644
--- a/sed.c
+++ b/sed.c
@@ -27,6 +27,7 @@
27 - address matching: num|/matchstr/[,num|/matchstr/|$]command 27 - address matching: num|/matchstr/[,num|/matchstr/|$]command
28 - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags) 28 - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
29 - edit commands: (a)ppend, (i)nsert, (c)hange 29 - edit commands: (a)ppend, (i)nsert, (c)hange
30 - backreferences in substitution expressions (\1, \2...\9)
30 31
31 (Note: Specifying an address (range) to match is *optional*; commands 32 (Note: Specifying an address (range) to match is *optional*; commands
32 default to the whole pattern space if no specific address match was 33 default to the whole pattern space if no specific address match was
@@ -73,6 +74,9 @@ struct sed_cmd {
73 /* substitution command specific fields */ 74 /* substitution command specific fields */
74 regex_t *sub_match; /* sed -e 's/sub_match/replace/' */ 75 regex_t *sub_match; /* sed -e 's/sub_match/replace/' */
75 char *replace; /* sed -e 's/sub_match/replace/' XXX: who will hold the \1 \2 \3s? */ 76 char *replace; /* sed -e 's/sub_match/replace/' XXX: who will hold the \1 \2 \3s? */
77 unsigned int num_backrefs:4; /* how many back references (\1..\9) */
78 /* Note: GNU/POSIX sed does not save more than nine backrefs, so
79 * we only use 4 bits to hold the number */
76 unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */ 80 unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */
77 81
78 /* edit command (a,i,c) speicific field */ 82 /* edit command (a,i,c) speicific field */
@@ -166,19 +170,19 @@ static size_t strrspn(const char *s, const char *accept)
166#endif 170#endif
167 171
168/* 172/*
169 * index_of_unescaped_slash - walks left to right through a string beginning 173 * index_of_next_unescaped_slash - walks left to right through a string
170 * at a specified index and returns the index of the next unescaped slash. 174 * beginning at a specified index and returns the index of the next forward
175 * slash ('/') not preceeded by a backslash ('\').
171 */ 176 */
172static int index_of_next_unescaped_slash(const char *str, int idx) 177static int index_of_next_unescaped_slash(const char *str, int idx)
173{ 178{
174 do { 179 for ( ; str[idx]; idx++) {
175 idx++; 180 if (str[idx] == '/' && str[idx-1] != '\\')
176 /* test if we've hit the end */ 181 return idx;
177 if (str[idx] == 0) 182 }
178 return -1;
179 } while (str[idx] != '/' && str[idx - 1] != '\\');
180 183
181 return idx; 184 /* if we make it to here, we've hit the end of the string */
185 return -1;
182} 186}
183 187
184/* 188/*
@@ -201,7 +205,7 @@ static int get_address(const char *str, int *line, regex_t **regex)
201 idx++; 205 idx++;
202 } 206 }
203 else if (my_str[idx] == '/') { 207 else if (my_str[idx] == '/') {
204 idx = index_of_next_unescaped_slash(my_str, idx); 208 idx = index_of_next_unescaped_slash(my_str, ++idx);
205 if (idx == -1) 209 if (idx == -1)
206 fatalError("unterminated match expression\n"); 210 fatalError("unterminated match expression\n");
207 my_str[idx] = '\0'; 211 my_str[idx] = '\0';
@@ -233,6 +237,7 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr)
233 int oldidx, cflags = REG_NEWLINE; 237 int oldidx, cflags = REG_NEWLINE;
234 char *match; 238 char *match;
235 int idx = 0; 239 int idx = 0;
240 int j;
236 241
237 /* 242 /*
238 * the string that gets passed to this function should look like this: 243 * the string that gets passed to this function should look like this:
@@ -249,14 +254,26 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr)
249 254
250 /* save the match string */ 255 /* save the match string */
251 oldidx = idx+1; 256 oldidx = idx+1;
252 idx = index_of_next_unescaped_slash(substr, idx); 257 idx = index_of_next_unescaped_slash(substr, ++idx);
253 if (idx == -1) 258 if (idx == -1)
254 fatalError("bad format in substitution expression\n"); 259 fatalError("bad format in substitution expression\n");
255 match = strdup_substr(substr, oldidx, idx); 260 match = strdup_substr(substr, oldidx, idx);
256 261
262 /* determine the number of back references in the match string */
263 /* Note: we compute this here rather than in the do_subst_command()
264 * function to save processor time, at the expense of a little more memory
265 * (4 bits) per sed_cmd */
266
267 /* sed_cmd->num_backrefs = 0; */ /* XXX: not needed? --apparently not */
268 for (j = 0; match[j]; j++) {
269 /* GNU/POSIX sed does not save more than nine backrefs */
270 if (match[j] == '\\' && match[j+1] == '(' && sed_cmd->num_backrefs < 9)
271 sed_cmd->num_backrefs++;
272 }
273
257 /* save the replacement string */ 274 /* save the replacement string */
258 oldidx = idx+1; 275 oldidx = idx+1;
259 idx = index_of_next_unescaped_slash(substr, idx); 276 idx = index_of_next_unescaped_slash(substr, ++idx);
260 if (idx == -1) 277 if (idx == -1)
261 fatalError("bad format in substitution expression\n"); 278 fatalError("bad format in substitution expression\n");
262 sed_cmd->replace = strdup_substr(substr, oldidx, idx); 279 sed_cmd->replace = strdup_substr(substr, oldidx, idx);
@@ -280,7 +297,7 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr)
280 } 297 }
281 298
282out: 299out:
283 /* compile the regex */ 300 /* compile the match string into a regex */
284 sed_cmd->sub_match = (regex_t *)xmalloc(sizeof(regex_t)); 301 sed_cmd->sub_match = (regex_t *)xmalloc(sizeof(regex_t));
285 xregcomp(sed_cmd->sub_match, match, cflags); 302 xregcomp(sed_cmd->sub_match, match, cflags);
286 free(match); 303 free(match);
@@ -460,26 +477,64 @@ static void load_cmd_file(char *filename)
460 } 477 }
461} 478}
462 479
480static void print_subst_w_backrefs(const char *line, const char *replace, regmatch_t *regmatch)
481{
482 int i;
483
484 /* go through the replacement string */
485 for (i = 0; replace[i]; i++) {
486 /* if we find a backreference (\1, \2, etc.) print the backref'ed * text */
487 if (replace[i] == '\\' && isdigit(replace[i+1])) {
488 int j;
489 char tmpstr[2];
490 int backref;
491 ++i; /* i now indexes the backref number, instead of the leading slash */
492 tmpstr[0] = replace[i];
493 tmpstr[1] = 0;
494 backref = atoi(tmpstr);
495 /* print out the text held in regmatch[backref] */
496 for (j = regmatch[backref].rm_so; j < regmatch[backref].rm_eo; j++)
497 fputc(line[j], stdout);
498 }
499
500 /* if we find an unescaped '&' print out the whole matched text.
501 * fortunately, regmatch[0] contains the indicies to the whole matched
502 * expression (kinda seems like it was designed for just such a
503 * purpose...) */
504 else if (replace[i] == '&' && replace[i-1] != '\\') {
505 int j;
506 for (j = regmatch[0].rm_so; j < regmatch[0].rm_eo; j++)
507 fputc(line[j], stdout);
508 }
509 /* nothing special, just print this char of the replacement string to stdout */
510 else
511 fputc(replace[i], stdout);
512 }
513}
514
463static int do_subst_command(const struct sed_cmd *sed_cmd, const char *line) 515static int do_subst_command(const struct sed_cmd *sed_cmd, const char *line)
464{ 516{
465 int altered = 0; 517 int altered = 0;
466 518
467 /* we only substitute if the substitution 'search' expression matches */ 519 /* we only substitute if the substitution 'search' expression matches */
468 if (regexec(sed_cmd->sub_match, line, 0, NULL, 0) == 0) { 520 if (regexec(sed_cmd->sub_match, line, 0, NULL, 0) == 0) {
469 regmatch_t regmatch; 521 regmatch_t *regmatch = xmalloc(sizeof(regmatch_t) * (sed_cmd->num_backrefs+1));
470 int i; 522 int i;
471 char *ptr = (char *)line; 523 char *ptr = (char *)line;
472 524
473 while (*ptr) { 525 while (*ptr) {
474 /* if we can match the search string... */ 526 /* if we can match the search string... */
475 if (regexec(sed_cmd->sub_match, ptr, 1, &regmatch, 0) == 0) { 527 if (regexec(sed_cmd->sub_match, ptr, sed_cmd->num_backrefs+1, regmatch, 0) == 0) {
476 /* print everything before the match, */ 528 /* print everything before the match, */
477 for (i = 0; i < regmatch.rm_so; i++) 529 for (i = 0; i < regmatch[0].rm_so; i++)
478 fputc(ptr[i], stdout); 530 fputc(ptr[i], stdout);
531
479 /* then print the substitution in its place */ 532 /* then print the substitution in its place */
480 fputs(sed_cmd->replace, stdout); 533 print_subst_w_backrefs(ptr, sed_cmd->replace, regmatch);
534
481 /* then advance past the match */ 535 /* then advance past the match */
482 ptr += regmatch.rm_eo; 536 ptr += regmatch[0].rm_eo;
537
483 /* and flag that something has changed */ 538 /* and flag that something has changed */
484 altered++; 539 altered++;
485 540
@@ -496,6 +551,9 @@ static int do_subst_command(const struct sed_cmd *sed_cmd, const char *line)
496 /* is there anything left to print? */ 551 /* is there anything left to print? */
497 if (*ptr) 552 if (*ptr)
498 fputs(ptr, stdout); 553 fputs(ptr, stdout);
554
555 /* cleanup */
556 free(regmatch);
499 } 557 }
500 558
501 return altered; 559 return altered;