diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2021-07-13 14:38:20 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2021-07-13 14:38:20 +0200 |
commit | 95ac4a48f17c2fdd2a10524c0b399e3be72d8f42 (patch) | |
tree | 7877669d7835524e1152dfa890dc189faad61dc1 | |
parent | c76c78740a19ed3b1f9c5910313460221096536a (diff) | |
download | busybox-w32-95ac4a48f17c2fdd2a10524c0b399e3be72d8f42.tar.gz busybox-w32-95ac4a48f17c2fdd2a10524c0b399e3be72d8f42.tar.bz2 busybox-w32-95ac4a48f17c2fdd2a10524c0b399e3be72d8f42.zip |
vi: allow regular expressions in ':s' commands
BusyBox vi has never supported the use of regular expressions in
search/replace (':s') commands. Implement this using GNU regex
when VI_REGEX_SEARCH is enabled.
The implementation:
- uses basic regular expressions, to match those used in the search
command;
- only supports substitution of back references ('\0' - '\9') in the
replacement string. Any other character following a backslash is
treated as that literal character.
VI_REGEX_SEARCH isn't enabled in the default build. In that case:
function old new delta
colon 4036 4033 -3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-3) Total: -3 bytes
When VI_REGEX_SEARCH is enabled:
function old new delta
colon 4036 4378 +342
.rodata 108207 108229 +22
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/0 up/down: 364/0) Total: 364 bytes
v2: Rebase. Code shrink. Ensure empty replacement string is null terminated.
Signed-off-by: Andrey Dobrovolsky <andrey.dobrovolsky.odessa@gmail.com>
Signed-off-by: Ron Yorston <rmy@pobox.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | editors/vi.c | 131 |
1 files changed, 116 insertions, 15 deletions
diff --git a/editors/vi.c b/editors/vi.c index 2941b8ae4..070e0f55a 100644 --- a/editors/vi.c +++ b/editors/vi.c | |||
@@ -2677,6 +2677,59 @@ static char *expand_args(char *args) | |||
2677 | # endif | 2677 | # endif |
2678 | #endif /* FEATURE_VI_COLON */ | 2678 | #endif /* FEATURE_VI_COLON */ |
2679 | 2679 | ||
2680 | #if ENABLE_FEATURE_VI_REGEX_SEARCH | ||
2681 | # define MAX_SUBPATTERN 10 // subpatterns \0 .. \9 | ||
2682 | |||
2683 | // If the return value is not NULL the caller should free R | ||
2684 | static char *regex_search(char *q, regex_t *preg, const char *Rorig, | ||
2685 | size_t *len_F, size_t *len_R, char **R) | ||
2686 | { | ||
2687 | regmatch_t regmatch[MAX_SUBPATTERN], *cur_match; | ||
2688 | char *found = NULL; | ||
2689 | const char *t; | ||
2690 | char *r; | ||
2691 | |||
2692 | regmatch[0].rm_so = 0; | ||
2693 | regmatch[0].rm_eo = end_line(q) - q; | ||
2694 | if (regexec(preg, q, MAX_SUBPATTERN, regmatch, REG_STARTEND) != 0) | ||
2695 | return found; | ||
2696 | |||
2697 | found = q + regmatch[0].rm_so; | ||
2698 | *len_F = regmatch[0].rm_eo - regmatch[0].rm_so; | ||
2699 | *R = NULL; | ||
2700 | |||
2701 | fill_result: | ||
2702 | // first pass calculates len_R, second fills R | ||
2703 | *len_R = 0; | ||
2704 | for (t = Rorig, r = *R; *t; t++) { | ||
2705 | size_t len = 1; // default is to copy one char from replace pattern | ||
2706 | const char *from = t; | ||
2707 | if (*t == '\\') { | ||
2708 | from = ++t; // skip backslash | ||
2709 | if (*t >= '0' && *t < '0' + MAX_SUBPATTERN) { | ||
2710 | cur_match = regmatch + (*t - '0'); | ||
2711 | if (cur_match->rm_so >= 0) { | ||
2712 | len = cur_match->rm_eo - cur_match->rm_so; | ||
2713 | from = q + cur_match->rm_so; | ||
2714 | } | ||
2715 | } | ||
2716 | } | ||
2717 | *len_R += len; | ||
2718 | if (*R) { | ||
2719 | memcpy(r, from, len); | ||
2720 | r += len; | ||
2721 | /* *r = '\0'; - xzalloc did it */ | ||
2722 | } | ||
2723 | } | ||
2724 | if (*R == NULL) { | ||
2725 | *R = xzalloc(*len_R + 1); | ||
2726 | goto fill_result; | ||
2727 | } | ||
2728 | |||
2729 | return found; | ||
2730 | } | ||
2731 | #endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */ | ||
2732 | |||
2680 | // buf must be no longer than MAX_INPUT_LEN! | 2733 | // buf must be no longer than MAX_INPUT_LEN! |
2681 | static void colon(char *buf) | 2734 | static void colon(char *buf) |
2682 | { | 2735 | { |
@@ -3084,6 +3137,14 @@ static void colon(char *buf) | |||
3084 | # if ENABLE_FEATURE_VI_VERBOSE_STATUS | 3137 | # if ENABLE_FEATURE_VI_VERBOSE_STATUS |
3085 | int last_line = 0, lines = 0; | 3138 | int last_line = 0, lines = 0; |
3086 | # endif | 3139 | # endif |
3140 | # if ENABLE_FEATURE_VI_REGEX_SEARCH | ||
3141 | regex_t preg; | ||
3142 | int cflags; | ||
3143 | char *Rorig; | ||
3144 | # if ENABLE_FEATURE_VI_UNDO | ||
3145 | int undo = 0; | ||
3146 | # endif | ||
3147 | # endif | ||
3087 | 3148 | ||
3088 | // F points to the "find" pattern | 3149 | // F points to the "find" pattern |
3089 | // R points to the "replace" pattern | 3150 | // R points to the "replace" pattern |
@@ -3100,7 +3161,6 @@ static void colon(char *buf) | |||
3100 | *flags++ = '\0'; // terminate "replace" | 3161 | *flags++ = '\0'; // terminate "replace" |
3101 | gflag = *flags; | 3162 | gflag = *flags; |
3102 | } | 3163 | } |
3103 | len_R = strlen(R); | ||
3104 | 3164 | ||
3105 | if (len_F) { // save "find" as last search pattern | 3165 | if (len_F) { // save "find" as last search pattern |
3106 | free(last_search_pattern); | 3166 | free(last_search_pattern); |
@@ -3122,31 +3182,68 @@ static void colon(char *buf) | |||
3122 | b = e; | 3182 | b = e; |
3123 | } | 3183 | } |
3124 | 3184 | ||
3185 | # if ENABLE_FEATURE_VI_REGEX_SEARCH | ||
3186 | Rorig = R; | ||
3187 | cflags = 0; | ||
3188 | if (ignorecase) | ||
3189 | cflags = REG_ICASE; | ||
3190 | memset(&preg, 0, sizeof(preg)); | ||
3191 | if (regcomp(&preg, F, cflags) != 0) { | ||
3192 | status_line(":s bad search pattern"); | ||
3193 | goto regex_search_end; | ||
3194 | } | ||
3195 | # else | ||
3196 | len_R = strlen(R); | ||
3197 | # endif | ||
3198 | |||
3125 | for (i = b; i <= e; i++) { // so, :20,23 s \0 find \0 replace \0 | 3199 | for (i = b; i <= e; i++) { // so, :20,23 s \0 find \0 replace \0 |
3126 | char *ls = q; // orig line start | 3200 | char *ls = q; // orig line start |
3127 | char *found; | 3201 | char *found; |
3128 | vc4: | 3202 | vc4: |
3203 | # if ENABLE_FEATURE_VI_REGEX_SEARCH | ||
3204 | found = regex_search(q, &preg, Rorig, &len_F, &len_R, &R); | ||
3205 | # else | ||
3129 | found = char_search(q, F, (FORWARD << 1) | LIMITED); // search cur line only for "find" | 3206 | found = char_search(q, F, (FORWARD << 1) | LIMITED); // search cur line only for "find" |
3207 | # endif | ||
3130 | if (found) { | 3208 | if (found) { |
3131 | uintptr_t bias; | 3209 | uintptr_t bias; |
3132 | // we found the "find" pattern - delete it | 3210 | // we found the "find" pattern - delete it |
3133 | // For undo support, the first item should not be chained | 3211 | // For undo support, the first item should not be chained |
3134 | text_hole_delete(found, found + len_F - 1, | 3212 | // This needs to be handled differently depending on |
3135 | subs ? ALLOW_UNDO_CHAIN: ALLOW_UNDO); | 3213 | // whether or not regex support is enabled. |
3136 | // can't do this above, no undo => no third argument | 3214 | # if ENABLE_FEATURE_VI_REGEX_SEARCH |
3137 | subs++; | 3215 | # define TEST_LEN_F len_F // len_F may be zero |
3138 | # if ENABLE_FEATURE_VI_VERBOSE_STATUS | 3216 | # define TEST_UNDO1 undo++ |
3139 | if (last_line != i) { | 3217 | # define TEST_UNDO2 undo++ |
3140 | last_line = i; | 3218 | # else |
3141 | ++lines; | 3219 | # define TEST_LEN_F 1 // len_F is never zero |
3220 | # define TEST_UNDO1 subs | ||
3221 | # define TEST_UNDO2 1 | ||
3222 | # endif | ||
3223 | if (TEST_LEN_F) // match can be empty, no delete needed | ||
3224 | text_hole_delete(found, found + len_F - 1, | ||
3225 | TEST_UNDO1 ? ALLOW_UNDO_CHAIN: ALLOW_UNDO); | ||
3226 | if (len_R) { // insert the "replace" pattern, if required | ||
3227 | bias = string_insert(found, R, | ||
3228 | TEST_UNDO2 ? ALLOW_UNDO_CHAIN: ALLOW_UNDO); | ||
3229 | found += bias; | ||
3230 | ls += bias; | ||
3231 | dot = ls; | ||
3232 | //q += bias; - recalculated anyway | ||
3142 | } | 3233 | } |
3234 | # if ENABLE_FEATURE_VI_REGEX_SEARCH | ||
3235 | free(R); | ||
3143 | # endif | 3236 | # endif |
3144 | // insert the "replace" patern | 3237 | if (TEST_LEN_F || len_R) { |
3145 | bias = string_insert(found, R, ALLOW_UNDO_CHAIN); | 3238 | dot = ls; |
3146 | found += bias; | 3239 | subs++; |
3147 | ls += bias; | 3240 | # if ENABLE_FEATURE_VI_VERBOSE_STATUS |
3148 | dot = ls; | 3241 | if (last_line != i) { |
3149 | //q += bias; - recalculated anyway | 3242 | last_line = i; |
3243 | ++lines; | ||
3244 | } | ||
3245 | # endif | ||
3246 | } | ||
3150 | // check for "global" :s/foo/bar/g | 3247 | // check for "global" :s/foo/bar/g |
3151 | if (gflag == 'g') { | 3248 | if (gflag == 'g') { |
3152 | if ((found + len_R) < end_line(ls)) { | 3249 | if ((found + len_R) < end_line(ls)) { |
@@ -3166,6 +3263,10 @@ static void colon(char *buf) | |||
3166 | status_line("%d substitutions on %d lines", subs, lines); | 3263 | status_line("%d substitutions on %d lines", subs, lines); |
3167 | # endif | 3264 | # endif |
3168 | } | 3265 | } |
3266 | # if ENABLE_FEATURE_VI_REGEX_SEARCH | ||
3267 | regex_search_end: | ||
3268 | regfree(&preg); | ||
3269 | # endif | ||
3169 | # endif /* FEATURE_VI_SEARCH */ | 3270 | # endif /* FEATURE_VI_SEARCH */ |
3170 | } else if (strncmp(cmd, "version", i) == 0) { // show software version | 3271 | } else if (strncmp(cmd, "version", i) == 0) { // show software version |
3171 | status_line(BB_VER); | 3272 | status_line(BB_VER); |