diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2021-06-29 03:27:07 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2021-06-29 03:27:07 +0200 |
commit | 4f27503a1ecab8dfe373a349df3d8fe3c22e2160 (patch) | |
tree | c10c3c9ad535638916be478323ce3b36ad41bda1 | |
parent | f414fb4411e65662b44f038ed3175789172edc20 (diff) | |
download | busybox-w32-4f27503a1ecab8dfe373a349df3d8fe3c22e2160.tar.gz busybox-w32-4f27503a1ecab8dfe373a349df3d8fe3c22e2160.tar.bz2 busybox-w32-4f27503a1ecab8dfe373a349df3d8fe3c22e2160.zip |
awk: get rid of "move name one char back" trick in next_token()
function old new delta
next_token 791 812 +21
awk_main 886 831 -55
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 1/1 up/down: 21/-55) Total: -34 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | editors/awk.c | 54 |
1 files changed, 27 insertions, 27 deletions
diff --git a/editors/awk.c b/editors/awk.c index 1a4468a53..fb1e5d59b 100644 --- a/editors/awk.c +++ b/editors/awk.c | |||
@@ -535,6 +535,7 @@ struct globals { | |||
535 | var *Fields; | 535 | var *Fields; |
536 | nvblock *g_cb; | 536 | nvblock *g_cb; |
537 | char *g_pos; | 537 | char *g_pos; |
538 | char g_saved_ch; | ||
538 | smallint icase; | 539 | smallint icase; |
539 | smallint exiting; | 540 | smallint exiting; |
540 | smallint nextrec; | 541 | smallint nextrec; |
@@ -599,6 +600,7 @@ struct globals2 { | |||
599 | #define Fields (G1.Fields ) | 600 | #define Fields (G1.Fields ) |
600 | #define g_cb (G1.g_cb ) | 601 | #define g_cb (G1.g_cb ) |
601 | #define g_pos (G1.g_pos ) | 602 | #define g_pos (G1.g_pos ) |
603 | #define g_saved_ch (G1.g_saved_ch ) | ||
602 | #define icase (G1.icase ) | 604 | #define icase (G1.icase ) |
603 | #define exiting (G1.exiting ) | 605 | #define exiting (G1.exiting ) |
604 | #define nextrec (G1.nextrec ) | 606 | #define nextrec (G1.nextrec ) |
@@ -1125,6 +1127,10 @@ static uint32_t next_token(uint32_t expected) | |||
1125 | t_info = save_info; | 1127 | t_info = save_info; |
1126 | } else { | 1128 | } else { |
1127 | p = g_pos; | 1129 | p = g_pos; |
1130 | if (g_saved_ch != '\0') { | ||
1131 | *p = g_saved_ch; | ||
1132 | g_saved_ch = '\0'; | ||
1133 | } | ||
1128 | readnext: | 1134 | readnext: |
1129 | p = skip_spaces(p); | 1135 | p = skip_spaces(p); |
1130 | g_lineno = t_lineno; | 1136 | g_lineno = t_lineno; |
@@ -1183,6 +1189,8 @@ static uint32_t next_token(uint32_t expected) | |||
1183 | tc = TC_NUMBER; | 1189 | tc = TC_NUMBER; |
1184 | debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double); | 1190 | debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double); |
1185 | } else { | 1191 | } else { |
1192 | char *end_of_name; | ||
1193 | |||
1186 | if (*p == '\n') | 1194 | if (*p == '\n') |
1187 | t_lineno++; | 1195 | t_lineno++; |
1188 | 1196 | ||
@@ -1219,16 +1227,14 @@ static uint32_t next_token(uint32_t expected) | |||
1219 | if (!isalnum_(*p)) | 1227 | if (!isalnum_(*p)) |
1220 | syntax_error(EMSG_UNEXP_TOKEN); /* no */ | 1228 | syntax_error(EMSG_UNEXP_TOKEN); /* no */ |
1221 | /* yes */ | 1229 | /* yes */ |
1222 | /* "move name one char back" trick: we need a byte for NUL terminator */ | 1230 | t_string = p; |
1223 | /* NB: this results in argv[i][-1] being used (!!!) in e.g. "awk -e 'NAME'" case */ | 1231 | while (isalnum_(*p)) |
1224 | t_string = --p; | 1232 | p++; |
1225 | while (isalnum_(*++p)) { | 1233 | end_of_name = p; |
1226 | p[-1] = *p; | ||
1227 | } | ||
1228 | p[-1] = '\0'; | ||
1229 | tc = TC_VARIABLE; | 1234 | tc = TC_VARIABLE; |
1230 | /* also consume whitespace between functionname and bracket */ | 1235 | /* also consume whitespace between functionname and bracket */ |
1231 | if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) | 1236 | if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) |
1237 | //TODO: why if variable can be here (but not array ref), skipping is not allowed? Example where it matters? | ||
1232 | p = skip_spaces(p); | 1238 | p = skip_spaces(p); |
1233 | if (*p == '(') { | 1239 | if (*p == '(') { |
1234 | p++; | 1240 | p++; |
@@ -1240,7 +1246,19 @@ static uint32_t next_token(uint32_t expected) | |||
1240 | debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); | 1246 | debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); |
1241 | } else { | 1247 | } else { |
1242 | debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); | 1248 | debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); |
1249 | if (end_of_name == p) { | ||
1250 | /* there is no space for trailing NUL in t_string! | ||
1251 | * We need to save the char we are going to NUL. | ||
1252 | * (we'll use it in future call to next_token()) | ||
1253 | */ | ||
1254 | g_saved_ch = *end_of_name; | ||
1255 | // especially pathological example is V="abc"; V.2 - it's V concatenated to .2 | ||
1256 | // (it evaluates to "abc0.2"). Because of this case, we can't simply cache | ||
1257 | // '.' and analyze it later: we also have to *store it back* in next | ||
1258 | // next_token(), in order to give my_strtod() the undamaged ".2" string. | ||
1259 | } | ||
1243 | } | 1260 | } |
1261 | *end_of_name = '\0'; /* terminate t_string */ | ||
1244 | } | 1262 | } |
1245 | token_found: | 1263 | token_found: |
1246 | g_pos = p; | 1264 | g_pos = p; |
@@ -3420,38 +3438,20 @@ int awk_main(int argc UNUSED_PARAM, char **argv) | |||
3420 | 3438 | ||
3421 | g_progname = llist_pop(&list_f); | 3439 | g_progname = llist_pop(&list_f); |
3422 | fd = xopen_stdin(g_progname); | 3440 | fd = xopen_stdin(g_progname); |
3423 | /* 1st byte is reserved for "move name one char back" trick in next_token */ | 3441 | s = xmalloc_read(fd, NULL); /* it's NUL-terminated */ |
3424 | i = 1; | ||
3425 | s = NULL; | ||
3426 | for (;;) { | ||
3427 | int sz; | ||
3428 | s = xrealloc(s, i + 1000); | ||
3429 | sz = safe_read(fd, s + i, 1000); | ||
3430 | if (sz <= 0) | ||
3431 | break; | ||
3432 | i += sz; | ||
3433 | } | ||
3434 | s = xrealloc(s, i + 1); /* trim unused 999 bytes */ | ||
3435 | s[i] = '\0'; | ||
3436 | close(fd); | 3442 | close(fd); |
3437 | parse_program(s + 1); | 3443 | parse_program(s); |
3438 | free(s); | 3444 | free(s); |
3439 | } | 3445 | } |
3440 | g_progname = "cmd. line"; | 3446 | g_progname = "cmd. line"; |
3441 | #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS | 3447 | #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS |
3442 | while (list_e) { | 3448 | while (list_e) { |
3443 | /* NB: "move name one char back" trick in next_token | ||
3444 | * can use argv[i][-1] here. | ||
3445 | */ | ||
3446 | parse_program(llist_pop(&list_e)); | 3449 | parse_program(llist_pop(&list_e)); |
3447 | } | 3450 | } |
3448 | #endif | 3451 | #endif |
3449 | if (!(opt & (OPT_f | OPT_e))) { | 3452 | if (!(opt & (OPT_f | OPT_e))) { |
3450 | if (!*argv) | 3453 | if (!*argv) |
3451 | bb_show_usage(); | 3454 | bb_show_usage(); |
3452 | /* NB: "move name one char back" trick in next_token | ||
3453 | * can use argv[i][-1] here. | ||
3454 | */ | ||
3455 | parse_program(*argv++); | 3455 | parse_program(*argv++); |
3456 | } | 3456 | } |
3457 | 3457 | ||