aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbug1 <bug1@69ca8d6d-28ef-0310-b511-8ec308f3f277>2003-03-30 08:02:18 +0000
committerbug1 <bug1@69ca8d6d-28ef-0310-b511-8ec308f3f277>2003-03-30 08:02:18 +0000
commit1c2674583cb725d00a882d480edfe205d6d5c822 (patch)
treee7b44428c7d6ef455da9a6239f949f8e313a3830
parent7fe9606469a2e0d087923f2a85f697d455073360 (diff)
downloadbusybox-w32-1c2674583cb725d00a882d480edfe205d6d5c822.tar.gz
busybox-w32-1c2674583cb725d00a882d480edfe205d6d5c822.tar.bz2
busybox-w32-1c2674583cb725d00a882d480edfe205d6d5c822.zip
sed 'y' command, simplify some other code
git-svn-id: svn://busybox.net/trunk/busybox@6769 69ca8d6d-28ef-0310-b511-8ec308f3f277
-rw-r--r--editors/sed.c130
1 files changed, 97 insertions, 33 deletions
diff --git a/editors/sed.c b/editors/sed.c
index 292bc8662..4c535b1e3 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -97,9 +97,10 @@ typedef struct sed_cmd_s {
97 unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */ 97 unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */
98 unsigned int sub_p:2; /* sed -e 's/foo/bar/p' (print substitution) */ 98 unsigned int sub_p:2; /* sed -e 's/foo/bar/p' (print substitution) */
99 99
100 /* GENERAL FIELDS */ 100 /* TRANSLATE COMMAND */
101 char delimiter; /* The delimiter used to separate regexps */ 101 char *translate;
102 102
103 /* GENERAL FIELDS */
103 /* the command */ 104 /* the command */
104 char cmd; /* p,d,s (add more at your leisure :-) */ 105 char cmd; /* p,d,s (add more at your leisure :-) */
105 106
@@ -148,23 +149,23 @@ static void destroy_cmd_strs(void)
148} 149}
149#endif 150#endif
150 151
151
152/* 152/*
153 * index_of_next_unescaped_regexp_delim - walks left to right through a string 153 * index_of_next_unescaped_regexp_delim - walks left to right through a string
154 * beginning at a specified index and returns the index of the next regular 154 * beginning at a specified index and returns the index of the next regular
155 * expression delimiter (typically a forward * slash ('/')) not preceeded by 155 * expression delimiter (typically a forward * slash ('/')) not preceeded by
156 * a backslash ('\'). 156 * a backslash ('\').
157 */ 157 */
158static int index_of_next_unescaped_regexp_delim(const char delimiter, const char *str, int idx) 158static int index_of_next_unescaped_regexp_delim(const char delimiter, const char *str)
159{ 159{
160 int bracket = -1; 160 int bracket = -1;
161 int escaped = 0; 161 int escaped = 0;
162 int idx = 0;
162 char ch; 163 char ch;
163 164
164 for ( ; (ch = str[idx]); idx++) { 165 for ( ; (ch = str[idx]); idx++) {
165 if (bracket != -1) { 166 if (bracket != -1) {
166 if (ch == ']' && !(bracket == idx - 1 || 167 if (ch == ']' && !(bracket == idx - 1 ||
167 (bracket == idx - 2 && str[idx-1] == '^'))) 168 (bracket == idx - 2 && str[idx-1] == '^')))
168 bracket = -1; 169 bracket = -1;
169 } else if (escaped) 170 } else if (escaped)
170 escaped = 0; 171 escaped = 0;
@@ -180,10 +181,43 @@ static int index_of_next_unescaped_regexp_delim(const char delimiter, const char
180 return -1; 181 return -1;
181} 182}
182 183
184static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
185{
186 const char *cmdstr_ptr = cmdstr;
187 char delimiter;
188 int idx = 0;
189
190 /* verify that the 's' is followed by something. That something
191 * (typically a 'slash') is now our regexp delimiter... */
192 if (*cmdstr == '\0')
193 bb_error_msg_and_die("bad format in substitution expression");
194 else
195 delimiter = *cmdstr_ptr;
196
197 cmdstr_ptr++;
198
199 /* save the match string */
200 idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
201 if (idx == -1) {
202 bb_error_msg_and_die("bad format in substitution expression");
203 }
204 *match = bb_xstrndup(cmdstr_ptr, idx);
205
206 /* save the replacement string */
207 cmdstr_ptr += idx + 1;
208 idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
209 if (idx == -1) {
210 bb_error_msg_and_die("bad format in substitution expression");
211 }
212 *replace = bb_xstrndup(cmdstr_ptr, idx);
213
214 return((cmdstr_ptr - cmdstr) + idx);
215}
216
183/* 217/*
184 * returns the index in the string just past where the address ends. 218 * returns the index in the string just past where the address ends.
185 */ 219 */
186static int get_address(char *delimiter, char *my_str, int *linenum, regex_t **regex) 220static int get_address(char *my_str, int *linenum, regex_t **regex)
187{ 221{
188 int idx = 0; 222 int idx = 0;
189 if (isdigit(my_str[idx])) { 223 if (isdigit(my_str[idx])) {
@@ -198,13 +232,15 @@ static int get_address(char *delimiter, char *my_str, int *linenum, regex_t **re
198 } 232 }
199 else if (my_str[idx] == '/' || my_str[idx] == '\\') { 233 else if (my_str[idx] == '/' || my_str[idx] == '\\') {
200 int idx_start = 1; 234 int idx_start = 1;
235 char delimiter;
201 236
202 *delimiter = '/'; 237 delimiter = '/';
203 if (my_str[idx] == '\\') { 238 if (my_str[idx] == '\\') {
204 idx_start++; 239 idx_start++;
205 *delimiter = my_str[++idx]; 240 delimiter = my_str[++idx];
206 } 241 }
207 idx = index_of_next_unescaped_regexp_delim(*delimiter, my_str, ++idx); 242 idx++;
243 idx += index_of_next_unescaped_regexp_delim(delimiter, my_str + idx);
208 if (idx == -1) { 244 if (idx == -1) {
209 bb_error_msg_and_die("unterminated match expression"); 245 bb_error_msg_and_die("unterminated match expression");
210 } 246 }
@@ -218,7 +254,6 @@ static int get_address(char *delimiter, char *my_str, int *linenum, regex_t **re
218 254
219static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr) 255static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr)
220{ 256{
221 int oldidx;
222 int cflags = 0; 257 int cflags = 0;
223 char *match; 258 char *match;
224 int idx = 0; 259 int idx = 0;
@@ -233,19 +268,7 @@ static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr)
233 * (all three of the '/' slashes are mandatory) 268 * (all three of the '/' slashes are mandatory)
234 */ 269 */
235 270
236 /* verify that the 's' is followed by something. That something 271 idx = parse_regex_delim(substr, &match, &sed_cmd->replace);
237 * (typically a 'slash') is now our regexp delimiter... */
238 if (substr[idx] == '\0')
239 bb_error_msg_and_die("bad format in substitution expression");
240 else
241 sed_cmd->delimiter=substr[idx];
242
243 /* save the match string */
244 oldidx = idx+1;
245 idx = index_of_next_unescaped_regexp_delim(sed_cmd->delimiter, substr, ++idx);
246 if (idx == -1)
247 bb_error_msg_and_die("bad format in substitution expression");
248 match = bb_xstrndup(substr + oldidx, idx - oldidx);
249 272
250 /* determine the number of back references in the match string */ 273 /* determine the number of back references in the match string */
251 /* Note: we compute this here rather than in the do_subst_command() 274 /* Note: we compute this here rather than in the do_subst_command()
@@ -259,13 +282,6 @@ static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr)
259 sed_cmd->num_backrefs++; 282 sed_cmd->num_backrefs++;
260 } 283 }
261 284
262 /* save the replacement string */
263 oldidx = idx+1;
264 idx = index_of_next_unescaped_regexp_delim(sed_cmd->delimiter, substr, ++idx);
265 if (idx == -1)
266 bb_error_msg_and_die("bad format in substitution expression");
267 sed_cmd->replace = bb_xstrndup(substr + oldidx, idx - oldidx);
268
269 /* process the flags */ 285 /* process the flags */
270 while (substr[++idx]) { 286 while (substr[++idx]) {
271 switch (substr[idx]) { 287 switch (substr[idx]) {
@@ -297,6 +313,39 @@ out:
297 return idx; 313 return idx;
298} 314}
299 315
316static void replace_slash_n(char *string)
317{
318 int i;
319 int remaining = strlen(string);
320
321 for (i = 0; string[i]; i++) {
322 if ((string[i] == '\\') && (string[i + 1] == 'n')) {
323 string[i] = '\n';
324 memmove(string + i + 1, string + i + 1, remaining - 1);
325 } else {
326 remaining--;
327 }
328 }
329}
330
331static int parse_translate_cmd(sed_cmd_t * const sed_cmd, const char *cmdstr)
332{
333 char *match;
334 char *replace;
335 int idx;
336 int i;
337
338 idx = parse_regex_delim(cmdstr, &match, &replace);
339 replace_slash_n(match);
340 replace_slash_n(replace);
341 sed_cmd->translate = xcalloc(1, (strlen(match) + 1) * 2);
342 for (i = 0; (match[i] != 0) && (replace[i] != 0); i++) {
343 sed_cmd->translate[i * 2] = match[i];
344 sed_cmd->translate[(i * 2) + 1] = replace[i];
345 }
346 return(idx);
347}
348
300static int parse_edit_cmd(sed_cmd_t *sed_cmd, const char *editstr) 349static int parse_edit_cmd(sed_cmd_t *sed_cmd, const char *editstr)
301{ 350{
302 int i, j; 351 int i, j;
@@ -417,6 +466,10 @@ static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr)
417 strncpy(sed_cmd->label, cmdstr, length); 466 strncpy(sed_cmd->label, cmdstr, length);
418 cmdstr += length; 467 cmdstr += length;
419 } 468 }
469 /* translation command */
470 else if (sed_cmd->cmd == 'y') {
471 cmdstr += parse_translate_cmd(sed_cmd, cmdstr);
472 }
420 /* if it wasnt a single-letter command that takes no arguments 473 /* if it wasnt a single-letter command that takes no arguments
421 * then it must be an invalid command. 474 * then it must be an invalid command.
422 */ 475 */
@@ -430,7 +483,6 @@ static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr)
430 483
431static char *add_cmd(sed_cmd_t *sed_cmd, char *cmdstr) 484static char *add_cmd(sed_cmd_t *sed_cmd, char *cmdstr)
432{ 485{
433
434 /* Skip over leading whitespace and semicolons */ 486 /* Skip over leading whitespace and semicolons */
435 cmdstr += strspn(cmdstr, semicolon_whitespace); 487 cmdstr += strspn(cmdstr, semicolon_whitespace);
436 488
@@ -452,13 +504,13 @@ static char *add_cmd(sed_cmd_t *sed_cmd, char *cmdstr)
452 */ 504 */
453 505
454 /* first part (if present) is an address: either a '$', a number or a /regex/ */ 506 /* first part (if present) is an address: either a '$', a number or a /regex/ */
455 cmdstr += get_address(&(sed_cmd->delimiter), cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); 507 cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
456 508
457 /* second part (if present) will begin with a comma */ 509 /* second part (if present) will begin with a comma */
458 if (*cmdstr == ',') { 510 if (*cmdstr == ',') {
459 int idx; 511 int idx;
460 cmdstr++; 512 cmdstr++;
461 idx = get_address(&(sed_cmd->delimiter), cmdstr, &sed_cmd->end_line, &sed_cmd->end_match); 513 idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
462 if (idx == 0) { 514 if (idx == 0) {
463 bb_error_msg_and_die("get_address: no address found in string\n" 515 bb_error_msg_and_die("get_address: no address found in string\n"
464 "\t(you probably didn't check the string you passed me)"); 516 "\t(you probably didn't check the string you passed me)");
@@ -911,6 +963,18 @@ static void process_file(FILE *file)
911 sed_cmd = branch_to(sed_cmd->label); 963 sed_cmd = branch_to(sed_cmd->label);
912 } 964 }
913 break; 965 break;
966 case 'y': {
967 int i;
968 for (i = 0; line[i] != 0; i++) {
969 int j;
970 for (j = 0; sed_cmd->translate[j] ;j += 2) {
971 if (line[i] == sed_cmd->translate[j]) {
972 line[i] = sed_cmd->translate[j + 1];
973 }
974 }
975 }
976 }
977 break;
914// case ':': 978// case ':':
915// break; 979// break;
916 } 980 }