diff options
author | bug1 <bug1@69ca8d6d-28ef-0310-b511-8ec308f3f277> | 2003-03-30 08:02:18 +0000 |
---|---|---|
committer | bug1 <bug1@69ca8d6d-28ef-0310-b511-8ec308f3f277> | 2003-03-30 08:02:18 +0000 |
commit | 1c2674583cb725d00a882d480edfe205d6d5c822 (patch) | |
tree | e7b44428c7d6ef455da9a6239f949f8e313a3830 | |
parent | 7fe9606469a2e0d087923f2a85f697d455073360 (diff) | |
download | busybox-w32-1c2674583cb725d00a882d480edfe205d6d5c822.tar.gz busybox-w32-1c2674583cb725d00a882d480edfe205d6d5c822.tar.bz2 busybox-w32-1c2674583cb725d00a882d480edfe205d6d5c822.zip |
sed 'y' command, simplify some other code
git-svn-id: svn://busybox.net/trunk/busybox@6769 69ca8d6d-28ef-0310-b511-8ec308f3f277
-rw-r--r-- | editors/sed.c | 130 |
1 files changed, 97 insertions, 33 deletions
diff --git a/editors/sed.c b/editors/sed.c index 292bc8662..4c535b1e3 100644 --- a/editors/sed.c +++ b/editors/sed.c | |||
@@ -97,9 +97,10 @@ typedef struct sed_cmd_s { | |||
97 | unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */ | 97 | unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */ |
98 | unsigned int sub_p:2; /* sed -e 's/foo/bar/p' (print substitution) */ | 98 | unsigned int sub_p:2; /* sed -e 's/foo/bar/p' (print substitution) */ |
99 | 99 | ||
100 | /* GENERAL FIELDS */ | 100 | /* TRANSLATE COMMAND */ |
101 | char delimiter; /* The delimiter used to separate regexps */ | 101 | char *translate; |
102 | 102 | ||
103 | /* GENERAL FIELDS */ | ||
103 | /* the command */ | 104 | /* the command */ |
104 | char cmd; /* p,d,s (add more at your leisure :-) */ | 105 | char cmd; /* p,d,s (add more at your leisure :-) */ |
105 | 106 | ||
@@ -148,23 +149,23 @@ static void destroy_cmd_strs(void) | |||
148 | } | 149 | } |
149 | #endif | 150 | #endif |
150 | 151 | ||
151 | |||
152 | /* | 152 | /* |
153 | * index_of_next_unescaped_regexp_delim - walks left to right through a string | 153 | * index_of_next_unescaped_regexp_delim - walks left to right through a string |
154 | * beginning at a specified index and returns the index of the next regular | 154 | * beginning at a specified index and returns the index of the next regular |
155 | * expression delimiter (typically a forward * slash ('/')) not preceeded by | 155 | * expression delimiter (typically a forward * slash ('/')) not preceeded by |
156 | * a backslash ('\'). | 156 | * a backslash ('\'). |
157 | */ | 157 | */ |
158 | static int index_of_next_unescaped_regexp_delim(const char delimiter, const char *str, int idx) | 158 | static int index_of_next_unescaped_regexp_delim(const char delimiter, const char *str) |
159 | { | 159 | { |
160 | int bracket = -1; | 160 | int bracket = -1; |
161 | int escaped = 0; | 161 | int escaped = 0; |
162 | int idx = 0; | ||
162 | char ch; | 163 | char ch; |
163 | 164 | ||
164 | for ( ; (ch = str[idx]); idx++) { | 165 | for ( ; (ch = str[idx]); idx++) { |
165 | if (bracket != -1) { | 166 | if (bracket != -1) { |
166 | if (ch == ']' && !(bracket == idx - 1 || | 167 | if (ch == ']' && !(bracket == idx - 1 || |
167 | (bracket == idx - 2 && str[idx-1] == '^'))) | 168 | (bracket == idx - 2 && str[idx-1] == '^'))) |
168 | bracket = -1; | 169 | bracket = -1; |
169 | } else if (escaped) | 170 | } else if (escaped) |
170 | escaped = 0; | 171 | escaped = 0; |
@@ -180,10 +181,43 @@ static int index_of_next_unescaped_regexp_delim(const char delimiter, const char | |||
180 | return -1; | 181 | return -1; |
181 | } | 182 | } |
182 | 183 | ||
184 | static int parse_regex_delim(const char *cmdstr, char **match, char **replace) | ||
185 | { | ||
186 | const char *cmdstr_ptr = cmdstr; | ||
187 | char delimiter; | ||
188 | int idx = 0; | ||
189 | |||
190 | /* verify that the 's' is followed by something. That something | ||
191 | * (typically a 'slash') is now our regexp delimiter... */ | ||
192 | if (*cmdstr == '\0') | ||
193 | bb_error_msg_and_die("bad format in substitution expression"); | ||
194 | else | ||
195 | delimiter = *cmdstr_ptr; | ||
196 | |||
197 | cmdstr_ptr++; | ||
198 | |||
199 | /* save the match string */ | ||
200 | idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr); | ||
201 | if (idx == -1) { | ||
202 | bb_error_msg_and_die("bad format in substitution expression"); | ||
203 | } | ||
204 | *match = bb_xstrndup(cmdstr_ptr, idx); | ||
205 | |||
206 | /* save the replacement string */ | ||
207 | cmdstr_ptr += idx + 1; | ||
208 | idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr); | ||
209 | if (idx == -1) { | ||
210 | bb_error_msg_and_die("bad format in substitution expression"); | ||
211 | } | ||
212 | *replace = bb_xstrndup(cmdstr_ptr, idx); | ||
213 | |||
214 | return((cmdstr_ptr - cmdstr) + idx); | ||
215 | } | ||
216 | |||
183 | /* | 217 | /* |
184 | * returns the index in the string just past where the address ends. | 218 | * returns the index in the string just past where the address ends. |
185 | */ | 219 | */ |
186 | static int get_address(char *delimiter, char *my_str, int *linenum, regex_t **regex) | 220 | static int get_address(char *my_str, int *linenum, regex_t **regex) |
187 | { | 221 | { |
188 | int idx = 0; | 222 | int idx = 0; |
189 | if (isdigit(my_str[idx])) { | 223 | if (isdigit(my_str[idx])) { |
@@ -198,13 +232,15 @@ static int get_address(char *delimiter, char *my_str, int *linenum, regex_t **re | |||
198 | } | 232 | } |
199 | else if (my_str[idx] == '/' || my_str[idx] == '\\') { | 233 | else if (my_str[idx] == '/' || my_str[idx] == '\\') { |
200 | int idx_start = 1; | 234 | int idx_start = 1; |
235 | char delimiter; | ||
201 | 236 | ||
202 | *delimiter = '/'; | 237 | delimiter = '/'; |
203 | if (my_str[idx] == '\\') { | 238 | if (my_str[idx] == '\\') { |
204 | idx_start++; | 239 | idx_start++; |
205 | *delimiter = my_str[++idx]; | 240 | delimiter = my_str[++idx]; |
206 | } | 241 | } |
207 | idx = index_of_next_unescaped_regexp_delim(*delimiter, my_str, ++idx); | 242 | idx++; |
243 | idx += index_of_next_unescaped_regexp_delim(delimiter, my_str + idx); | ||
208 | if (idx == -1) { | 244 | if (idx == -1) { |
209 | bb_error_msg_and_die("unterminated match expression"); | 245 | bb_error_msg_and_die("unterminated match expression"); |
210 | } | 246 | } |
@@ -218,7 +254,6 @@ static int get_address(char *delimiter, char *my_str, int *linenum, regex_t **re | |||
218 | 254 | ||
219 | static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr) | 255 | static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr) |
220 | { | 256 | { |
221 | int oldidx; | ||
222 | int cflags = 0; | 257 | int cflags = 0; |
223 | char *match; | 258 | char *match; |
224 | int idx = 0; | 259 | int idx = 0; |
@@ -233,19 +268,7 @@ static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr) | |||
233 | * (all three of the '/' slashes are mandatory) | 268 | * (all three of the '/' slashes are mandatory) |
234 | */ | 269 | */ |
235 | 270 | ||
236 | /* verify that the 's' is followed by something. That something | 271 | idx = parse_regex_delim(substr, &match, &sed_cmd->replace); |
237 | * (typically a 'slash') is now our regexp delimiter... */ | ||
238 | if (substr[idx] == '\0') | ||
239 | bb_error_msg_and_die("bad format in substitution expression"); | ||
240 | else | ||
241 | sed_cmd->delimiter=substr[idx]; | ||
242 | |||
243 | /* save the match string */ | ||
244 | oldidx = idx+1; | ||
245 | idx = index_of_next_unescaped_regexp_delim(sed_cmd->delimiter, substr, ++idx); | ||
246 | if (idx == -1) | ||
247 | bb_error_msg_and_die("bad format in substitution expression"); | ||
248 | match = bb_xstrndup(substr + oldidx, idx - oldidx); | ||
249 | 272 | ||
250 | /* determine the number of back references in the match string */ | 273 | /* determine the number of back references in the match string */ |
251 | /* Note: we compute this here rather than in the do_subst_command() | 274 | /* Note: we compute this here rather than in the do_subst_command() |
@@ -259,13 +282,6 @@ static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr) | |||
259 | sed_cmd->num_backrefs++; | 282 | sed_cmd->num_backrefs++; |
260 | } | 283 | } |
261 | 284 | ||
262 | /* save the replacement string */ | ||
263 | oldidx = idx+1; | ||
264 | idx = index_of_next_unescaped_regexp_delim(sed_cmd->delimiter, substr, ++idx); | ||
265 | if (idx == -1) | ||
266 | bb_error_msg_and_die("bad format in substitution expression"); | ||
267 | sed_cmd->replace = bb_xstrndup(substr + oldidx, idx - oldidx); | ||
268 | |||
269 | /* process the flags */ | 285 | /* process the flags */ |
270 | while (substr[++idx]) { | 286 | while (substr[++idx]) { |
271 | switch (substr[idx]) { | 287 | switch (substr[idx]) { |
@@ -297,6 +313,39 @@ out: | |||
297 | return idx; | 313 | return idx; |
298 | } | 314 | } |
299 | 315 | ||
316 | static void replace_slash_n(char *string) | ||
317 | { | ||
318 | int i; | ||
319 | int remaining = strlen(string); | ||
320 | |||
321 | for (i = 0; string[i]; i++) { | ||
322 | if ((string[i] == '\\') && (string[i + 1] == 'n')) { | ||
323 | string[i] = '\n'; | ||
324 | memmove(string + i + 1, string + i + 1, remaining - 1); | ||
325 | } else { | ||
326 | remaining--; | ||
327 | } | ||
328 | } | ||
329 | } | ||
330 | |||
331 | static int parse_translate_cmd(sed_cmd_t * const sed_cmd, const char *cmdstr) | ||
332 | { | ||
333 | char *match; | ||
334 | char *replace; | ||
335 | int idx; | ||
336 | int i; | ||
337 | |||
338 | idx = parse_regex_delim(cmdstr, &match, &replace); | ||
339 | replace_slash_n(match); | ||
340 | replace_slash_n(replace); | ||
341 | sed_cmd->translate = xcalloc(1, (strlen(match) + 1) * 2); | ||
342 | for (i = 0; (match[i] != 0) && (replace[i] != 0); i++) { | ||
343 | sed_cmd->translate[i * 2] = match[i]; | ||
344 | sed_cmd->translate[(i * 2) + 1] = replace[i]; | ||
345 | } | ||
346 | return(idx); | ||
347 | } | ||
348 | |||
300 | static int parse_edit_cmd(sed_cmd_t *sed_cmd, const char *editstr) | 349 | static int parse_edit_cmd(sed_cmd_t *sed_cmd, const char *editstr) |
301 | { | 350 | { |
302 | int i, j; | 351 | int i, j; |
@@ -417,6 +466,10 @@ static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr) | |||
417 | strncpy(sed_cmd->label, cmdstr, length); | 466 | strncpy(sed_cmd->label, cmdstr, length); |
418 | cmdstr += length; | 467 | cmdstr += length; |
419 | } | 468 | } |
469 | /* translation command */ | ||
470 | else if (sed_cmd->cmd == 'y') { | ||
471 | cmdstr += parse_translate_cmd(sed_cmd, cmdstr); | ||
472 | } | ||
420 | /* if it wasnt a single-letter command that takes no arguments | 473 | /* if it wasnt a single-letter command that takes no arguments |
421 | * then it must be an invalid command. | 474 | * then it must be an invalid command. |
422 | */ | 475 | */ |
@@ -430,7 +483,6 @@ static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr) | |||
430 | 483 | ||
431 | static char *add_cmd(sed_cmd_t *sed_cmd, char *cmdstr) | 484 | static char *add_cmd(sed_cmd_t *sed_cmd, char *cmdstr) |
432 | { | 485 | { |
433 | |||
434 | /* Skip over leading whitespace and semicolons */ | 486 | /* Skip over leading whitespace and semicolons */ |
435 | cmdstr += strspn(cmdstr, semicolon_whitespace); | 487 | cmdstr += strspn(cmdstr, semicolon_whitespace); |
436 | 488 | ||
@@ -452,13 +504,13 @@ static char *add_cmd(sed_cmd_t *sed_cmd, char *cmdstr) | |||
452 | */ | 504 | */ |
453 | 505 | ||
454 | /* first part (if present) is an address: either a '$', a number or a /regex/ */ | 506 | /* first part (if present) is an address: either a '$', a number or a /regex/ */ |
455 | cmdstr += get_address(&(sed_cmd->delimiter), cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); | 507 | cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); |
456 | 508 | ||
457 | /* second part (if present) will begin with a comma */ | 509 | /* second part (if present) will begin with a comma */ |
458 | if (*cmdstr == ',') { | 510 | if (*cmdstr == ',') { |
459 | int idx; | 511 | int idx; |
460 | cmdstr++; | 512 | cmdstr++; |
461 | idx = get_address(&(sed_cmd->delimiter), cmdstr, &sed_cmd->end_line, &sed_cmd->end_match); | 513 | idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match); |
462 | if (idx == 0) { | 514 | if (idx == 0) { |
463 | bb_error_msg_and_die("get_address: no address found in string\n" | 515 | bb_error_msg_and_die("get_address: no address found in string\n" |
464 | "\t(you probably didn't check the string you passed me)"); | 516 | "\t(you probably didn't check the string you passed me)"); |
@@ -911,6 +963,18 @@ static void process_file(FILE *file) | |||
911 | sed_cmd = branch_to(sed_cmd->label); | 963 | sed_cmd = branch_to(sed_cmd->label); |
912 | } | 964 | } |
913 | break; | 965 | break; |
966 | case 'y': { | ||
967 | int i; | ||
968 | for (i = 0; line[i] != 0; i++) { | ||
969 | int j; | ||
970 | for (j = 0; sed_cmd->translate[j] ;j += 2) { | ||
971 | if (line[i] == sed_cmd->translate[j]) { | ||
972 | line[i] = sed_cmd->translate[j + 1]; | ||
973 | } | ||
974 | } | ||
975 | } | ||
976 | } | ||
977 | break; | ||
914 | // case ':': | 978 | // case ':': |
915 | // break; | 979 | // break; |
916 | } | 980 | } |