diff options
author | Eric Andersen <andersen@codepoet.org> | 2001-01-02 11:01:31 +0000 |
---|---|---|
committer | Eric Andersen <andersen@codepoet.org> | 2001-01-02 11:01:31 +0000 |
commit | 28b3c53f0de370b26aa5ec7ea35176fd4316f7e3 (patch) | |
tree | 961469fe9dd30ab4ca34836bc2b881688f79bbaa | |
parent | 01bda5de6b983cf331c42cb64fa2ccd4308b423d (diff) | |
download | busybox-w32-28b3c53f0de370b26aa5ec7ea35176fd4316f7e3.tar.gz busybox-w32-28b3c53f0de370b26aa5ec7ea35176fd4316f7e3.tar.bz2 busybox-w32-28b3c53f0de370b26aa5ec7ea35176fd4316f7e3.zip |
I just whipped up support for arbitrary regex delimiters, so
now things such as
$ echo foo | sed 'sxfooxb\arx'
bar
will work as expected (and so doogie can stop complaining).
-Erik
-rw-r--r-- | editors/sed.c | 81 | ||||
-rw-r--r-- | sed.c | 81 |
2 files changed, 36 insertions, 126 deletions
diff --git a/editors/sed.c b/editors/sed.c index 341924d6b..7fb906a15 100644 --- a/editors/sed.c +++ b/editors/sed.c | |||
@@ -36,11 +36,9 @@ | |||
36 | Unsupported features: | 36 | Unsupported features: |
37 | 37 | ||
38 | - transliteration (y/source-chars/dest-chars/) (use 'tr') | 38 | - transliteration (y/source-chars/dest-chars/) (use 'tr') |
39 | - no support for characters other than the '/' character for regex matches | ||
40 | - no pattern space hold space storing / swapping (x, etc.) | 39 | - no pattern space hold space storing / swapping (x, etc.) |
41 | - no labels / branching (: label, b, t, and friends) | 40 | - no labels / branching (: label, b, t, and friends) |
42 | - and lots, lots more. | 41 | - and lots, lots more. |
43 | |||
44 | */ | 42 | */ |
45 | 43 | ||
46 | #include <stdio.h> | 44 | #include <stdio.h> |
@@ -63,6 +61,7 @@ struct sed_cmd { | |||
63 | 61 | ||
64 | 62 | ||
65 | /* GENERAL FIELDS */ | 63 | /* GENERAL FIELDS */ |
64 | char delimiter; /* The delimiter used to separate regexps */ | ||
66 | 65 | ||
67 | /* address storage */ | 66 | /* address storage */ |
68 | int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */ | 67 | int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */ |
@@ -128,64 +127,17 @@ static void destroy_cmd_strs() | |||
128 | } | 127 | } |
129 | #endif | 128 | #endif |
130 | 129 | ||
131 | #if 0 | ||
132 | /* | ||
133 | * trim_str - trims leading and trailing space from a string | ||
134 | * | ||
135 | * Note: This returns a malloc'ed string so you must store and free it | ||
136 | * XXX: This should be in the utility.c file. | ||
137 | * XXX: This is now obsolete. Maybe it belongs nowhere. | ||
138 | */ | ||
139 | static char *trim_str(const char *str) | ||
140 | { | ||
141 | int i; | ||
142 | char *retstr = strdup(str); | ||
143 | |||
144 | /* trim leading whitespace */ | ||
145 | memmove(retstr, &retstr[strspn(retstr, " \n\t\v")], strlen(retstr)); | ||
146 | |||
147 | /* trim trailing whitespace */ | ||
148 | i = strlen(retstr) - 1; | ||
149 | while (isspace(retstr[i])) | ||
150 | i--; | ||
151 | retstr[++i] = 0; | ||
152 | |||
153 | /* Aside: | ||
154 | * | ||
155 | * you know, a strrspn() would really be nice cuz then we could say: | ||
156 | * | ||
157 | * retstr[strrspn(retstr, " \n\t\v") + 1] = 0; | ||
158 | */ | ||
159 | |||
160 | return retstr; | ||
161 | } | ||
162 | #endif | ||
163 | |||
164 | #if 0 | ||
165 | /* | ||
166 | * strrspn - works just like strspn() but goes from right to left instead of | ||
167 | * left to right | ||
168 | */ | ||
169 | static size_t strrspn(const char *s, const char *accept) | ||
170 | { | ||
171 | size_t i = strlen(s); | ||
172 | |||
173 | while (strchr(accept, s[--i])) | ||
174 | ; | ||
175 | |||
176 | return i; | ||
177 | } | ||
178 | #endif | ||
179 | 130 | ||
180 | /* | 131 | /* |
181 | * index_of_next_unescaped_slash - walks left to right through a string | 132 | * index_of_next_unescaped_regexp_delim - walks left to right through a string |
182 | * beginning at a specified index and returns the index of the next forward | 133 | * beginning at a specified index and returns the index of the next regular |
183 | * slash ('/') not preceeded by a backslash ('\'). | 134 | * expression delimiter (typically a forward * slash ('/')) not preceeded by |
135 | * a backslash ('\'). | ||
184 | */ | 136 | */ |
185 | static int index_of_next_unescaped_slash(const char *str, int idx) | 137 | static int index_of_next_unescaped_regexp_delim(struct sed_cmd *sed_cmd, const char *str, int idx) |
186 | { | 138 | { |
187 | for ( ; str[idx]; idx++) { | 139 | for ( ; str[idx]; idx++) { |
188 | if (str[idx] == '/' && str[idx-1] != '\\') | 140 | if (str[idx] == sed_cmd->delimiter && str[idx-1] != '\\') |
189 | return idx; | 141 | return idx; |
190 | } | 142 | } |
191 | 143 | ||
@@ -196,7 +148,7 @@ static int index_of_next_unescaped_slash(const char *str, int idx) | |||
196 | /* | 148 | /* |
197 | * returns the index in the string just past where the address ends. | 149 | * returns the index in the string just past where the address ends. |
198 | */ | 150 | */ |
199 | static int get_address(const char *str, int *line, regex_t **regex) | 151 | static int get_address(struct sed_cmd *sed_cmd, const char *str, int *line, regex_t **regex) |
200 | { | 152 | { |
201 | char *my_str = strdup(str); | 153 | char *my_str = strdup(str); |
202 | int idx = 0; | 154 | int idx = 0; |
@@ -213,7 +165,7 @@ static int get_address(const char *str, int *line, regex_t **regex) | |||
213 | idx++; | 165 | idx++; |
214 | } | 166 | } |
215 | else if (my_str[idx] == '/') { | 167 | else if (my_str[idx] == '/') { |
216 | idx = index_of_next_unescaped_slash(my_str, ++idx); | 168 | idx = index_of_next_unescaped_regexp_delim(sed_cmd, my_str, ++idx); |
217 | if (idx == -1) | 169 | if (idx == -1) |
218 | error_msg_and_die("unterminated match expression\n"); | 170 | error_msg_and_die("unterminated match expression\n"); |
219 | my_str[idx] = '\0'; | 171 | my_str[idx] = '\0'; |
@@ -256,13 +208,16 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr) | |||
256 | * (all three of the '/' slashes are mandatory) | 208 | * (all three of the '/' slashes are mandatory) |
257 | */ | 209 | */ |
258 | 210 | ||
259 | /* verify that the 's' is followed by a 'slash' */ | 211 | /* verify that the 's' is followed by something. That something |
260 | if (substr[++idx] != '/') | 212 | * (typically a 'slash') is now our regexp delimiter... */ |
213 | if (!substr[++idx]) | ||
261 | error_msg_and_die("bad format in substitution expression\n"); | 214 | error_msg_and_die("bad format in substitution expression\n"); |
215 | else | ||
216 | sed_cmd->delimiter=substr[idx]; | ||
262 | 217 | ||
263 | /* save the match string */ | 218 | /* save the match string */ |
264 | oldidx = idx+1; | 219 | oldidx = idx+1; |
265 | idx = index_of_next_unescaped_slash(substr, ++idx); | 220 | idx = index_of_next_unescaped_regexp_delim(sed_cmd, substr, ++idx); |
266 | if (idx == -1) | 221 | if (idx == -1) |
267 | error_msg_and_die("bad format in substitution expression\n"); | 222 | error_msg_and_die("bad format in substitution expression\n"); |
268 | match = strdup_substr(substr, oldidx, idx); | 223 | match = strdup_substr(substr, oldidx, idx); |
@@ -281,7 +236,7 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr) | |||
281 | 236 | ||
282 | /* save the replacement string */ | 237 | /* save the replacement string */ |
283 | oldidx = idx+1; | 238 | oldidx = idx+1; |
284 | idx = index_of_next_unescaped_slash(substr, ++idx); | 239 | idx = index_of_next_unescaped_regexp_delim(sed_cmd, substr, ++idx); |
285 | if (idx == -1) | 240 | if (idx == -1) |
286 | error_msg_and_die("bad format in substitution expression\n"); | 241 | error_msg_and_die("bad format in substitution expression\n"); |
287 | sed_cmd->replace = strdup_substr(substr, oldidx, idx); | 242 | sed_cmd->replace = strdup_substr(substr, oldidx, idx); |
@@ -401,11 +356,11 @@ static char *parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr) | |||
401 | 356 | ||
402 | /* first part (if present) is an address: either a number or a /regex/ */ | 357 | /* first part (if present) is an address: either a number or a /regex/ */ |
403 | if (isdigit(cmdstr[idx]) || cmdstr[idx] == '/') | 358 | if (isdigit(cmdstr[idx]) || cmdstr[idx] == '/') |
404 | idx = get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); | 359 | idx = get_address(sed_cmd, cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); |
405 | 360 | ||
406 | /* second part (if present) will begin with a comma */ | 361 | /* second part (if present) will begin with a comma */ |
407 | if (cmdstr[idx] == ',') | 362 | if (cmdstr[idx] == ',') |
408 | idx += get_address(&cmdstr[++idx], &sed_cmd->end_line, &sed_cmd->end_match); | 363 | idx += get_address(sed_cmd, &cmdstr[++idx], &sed_cmd->end_line, &sed_cmd->end_match); |
409 | 364 | ||
410 | /* last part (mandatory) will be a command */ | 365 | /* last part (mandatory) will be a command */ |
411 | if (cmdstr[idx] == '\0') | 366 | if (cmdstr[idx] == '\0') |
@@ -36,11 +36,9 @@ | |||
36 | Unsupported features: | 36 | Unsupported features: |
37 | 37 | ||
38 | - transliteration (y/source-chars/dest-chars/) (use 'tr') | 38 | - transliteration (y/source-chars/dest-chars/) (use 'tr') |
39 | - no support for characters other than the '/' character for regex matches | ||
40 | - no pattern space hold space storing / swapping (x, etc.) | 39 | - no pattern space hold space storing / swapping (x, etc.) |
41 | - no labels / branching (: label, b, t, and friends) | 40 | - no labels / branching (: label, b, t, and friends) |
42 | - and lots, lots more. | 41 | - and lots, lots more. |
43 | |||
44 | */ | 42 | */ |
45 | 43 | ||
46 | #include <stdio.h> | 44 | #include <stdio.h> |
@@ -63,6 +61,7 @@ struct sed_cmd { | |||
63 | 61 | ||
64 | 62 | ||
65 | /* GENERAL FIELDS */ | 63 | /* GENERAL FIELDS */ |
64 | char delimiter; /* The delimiter used to separate regexps */ | ||
66 | 65 | ||
67 | /* address storage */ | 66 | /* address storage */ |
68 | int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */ | 67 | int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */ |
@@ -128,64 +127,17 @@ static void destroy_cmd_strs() | |||
128 | } | 127 | } |
129 | #endif | 128 | #endif |
130 | 129 | ||
131 | #if 0 | ||
132 | /* | ||
133 | * trim_str - trims leading and trailing space from a string | ||
134 | * | ||
135 | * Note: This returns a malloc'ed string so you must store and free it | ||
136 | * XXX: This should be in the utility.c file. | ||
137 | * XXX: This is now obsolete. Maybe it belongs nowhere. | ||
138 | */ | ||
139 | static char *trim_str(const char *str) | ||
140 | { | ||
141 | int i; | ||
142 | char *retstr = strdup(str); | ||
143 | |||
144 | /* trim leading whitespace */ | ||
145 | memmove(retstr, &retstr[strspn(retstr, " \n\t\v")], strlen(retstr)); | ||
146 | |||
147 | /* trim trailing whitespace */ | ||
148 | i = strlen(retstr) - 1; | ||
149 | while (isspace(retstr[i])) | ||
150 | i--; | ||
151 | retstr[++i] = 0; | ||
152 | |||
153 | /* Aside: | ||
154 | * | ||
155 | * you know, a strrspn() would really be nice cuz then we could say: | ||
156 | * | ||
157 | * retstr[strrspn(retstr, " \n\t\v") + 1] = 0; | ||
158 | */ | ||
159 | |||
160 | return retstr; | ||
161 | } | ||
162 | #endif | ||
163 | |||
164 | #if 0 | ||
165 | /* | ||
166 | * strrspn - works just like strspn() but goes from right to left instead of | ||
167 | * left to right | ||
168 | */ | ||
169 | static size_t strrspn(const char *s, const char *accept) | ||
170 | { | ||
171 | size_t i = strlen(s); | ||
172 | |||
173 | while (strchr(accept, s[--i])) | ||
174 | ; | ||
175 | |||
176 | return i; | ||
177 | } | ||
178 | #endif | ||
179 | 130 | ||
180 | /* | 131 | /* |
181 | * index_of_next_unescaped_slash - walks left to right through a string | 132 | * index_of_next_unescaped_regexp_delim - walks left to right through a string |
182 | * beginning at a specified index and returns the index of the next forward | 133 | * beginning at a specified index and returns the index of the next regular |
183 | * slash ('/') not preceeded by a backslash ('\'). | 134 | * expression delimiter (typically a forward * slash ('/')) not preceeded by |
135 | * a backslash ('\'). | ||
184 | */ | 136 | */ |
185 | static int index_of_next_unescaped_slash(const char *str, int idx) | 137 | static int index_of_next_unescaped_regexp_delim(struct sed_cmd *sed_cmd, const char *str, int idx) |
186 | { | 138 | { |
187 | for ( ; str[idx]; idx++) { | 139 | for ( ; str[idx]; idx++) { |
188 | if (str[idx] == '/' && str[idx-1] != '\\') | 140 | if (str[idx] == sed_cmd->delimiter && str[idx-1] != '\\') |
189 | return idx; | 141 | return idx; |
190 | } | 142 | } |
191 | 143 | ||
@@ -196,7 +148,7 @@ static int index_of_next_unescaped_slash(const char *str, int idx) | |||
196 | /* | 148 | /* |
197 | * returns the index in the string just past where the address ends. | 149 | * returns the index in the string just past where the address ends. |
198 | */ | 150 | */ |
199 | static int get_address(const char *str, int *line, regex_t **regex) | 151 | static int get_address(struct sed_cmd *sed_cmd, const char *str, int *line, regex_t **regex) |
200 | { | 152 | { |
201 | char *my_str = strdup(str); | 153 | char *my_str = strdup(str); |
202 | int idx = 0; | 154 | int idx = 0; |
@@ -213,7 +165,7 @@ static int get_address(const char *str, int *line, regex_t **regex) | |||
213 | idx++; | 165 | idx++; |
214 | } | 166 | } |
215 | else if (my_str[idx] == '/') { | 167 | else if (my_str[idx] == '/') { |
216 | idx = index_of_next_unescaped_slash(my_str, ++idx); | 168 | idx = index_of_next_unescaped_regexp_delim(sed_cmd, my_str, ++idx); |
217 | if (idx == -1) | 169 | if (idx == -1) |
218 | error_msg_and_die("unterminated match expression\n"); | 170 | error_msg_and_die("unterminated match expression\n"); |
219 | my_str[idx] = '\0'; | 171 | my_str[idx] = '\0'; |
@@ -256,13 +208,16 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr) | |||
256 | * (all three of the '/' slashes are mandatory) | 208 | * (all three of the '/' slashes are mandatory) |
257 | */ | 209 | */ |
258 | 210 | ||
259 | /* verify that the 's' is followed by a 'slash' */ | 211 | /* verify that the 's' is followed by something. That something |
260 | if (substr[++idx] != '/') | 212 | * (typically a 'slash') is now our regexp delimiter... */ |
213 | if (!substr[++idx]) | ||
261 | error_msg_and_die("bad format in substitution expression\n"); | 214 | error_msg_and_die("bad format in substitution expression\n"); |
215 | else | ||
216 | sed_cmd->delimiter=substr[idx]; | ||
262 | 217 | ||
263 | /* save the match string */ | 218 | /* save the match string */ |
264 | oldidx = idx+1; | 219 | oldidx = idx+1; |
265 | idx = index_of_next_unescaped_slash(substr, ++idx); | 220 | idx = index_of_next_unescaped_regexp_delim(sed_cmd, substr, ++idx); |
266 | if (idx == -1) | 221 | if (idx == -1) |
267 | error_msg_and_die("bad format in substitution expression\n"); | 222 | error_msg_and_die("bad format in substitution expression\n"); |
268 | match = strdup_substr(substr, oldidx, idx); | 223 | match = strdup_substr(substr, oldidx, idx); |
@@ -281,7 +236,7 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr) | |||
281 | 236 | ||
282 | /* save the replacement string */ | 237 | /* save the replacement string */ |
283 | oldidx = idx+1; | 238 | oldidx = idx+1; |
284 | idx = index_of_next_unescaped_slash(substr, ++idx); | 239 | idx = index_of_next_unescaped_regexp_delim(sed_cmd, substr, ++idx); |
285 | if (idx == -1) | 240 | if (idx == -1) |
286 | error_msg_and_die("bad format in substitution expression\n"); | 241 | error_msg_and_die("bad format in substitution expression\n"); |
287 | sed_cmd->replace = strdup_substr(substr, oldidx, idx); | 242 | sed_cmd->replace = strdup_substr(substr, oldidx, idx); |
@@ -401,11 +356,11 @@ static char *parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr) | |||
401 | 356 | ||
402 | /* first part (if present) is an address: either a number or a /regex/ */ | 357 | /* first part (if present) is an address: either a number or a /regex/ */ |
403 | if (isdigit(cmdstr[idx]) || cmdstr[idx] == '/') | 358 | if (isdigit(cmdstr[idx]) || cmdstr[idx] == '/') |
404 | idx = get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); | 359 | idx = get_address(sed_cmd, cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); |
405 | 360 | ||
406 | /* second part (if present) will begin with a comma */ | 361 | /* second part (if present) will begin with a comma */ |
407 | if (cmdstr[idx] == ',') | 362 | if (cmdstr[idx] == ',') |
408 | idx += get_address(&cmdstr[++idx], &sed_cmd->end_line, &sed_cmd->end_match); | 363 | idx += get_address(sed_cmd, &cmdstr[++idx], &sed_cmd->end_line, &sed_cmd->end_match); |
409 | 364 | ||
410 | /* last part (mandatory) will be a command */ | 365 | /* last part (mandatory) will be a command */ |
411 | if (cmdstr[idx] == '\0') | 366 | if (cmdstr[idx] == '\0') |