aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2021-06-29 03:27:07 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2021-06-29 03:27:07 +0200
commit4f27503a1ecab8dfe373a349df3d8fe3c22e2160 (patch)
treec10c3c9ad535638916be478323ce3b36ad41bda1
parentf414fb4411e65662b44f038ed3175789172edc20 (diff)
downloadbusybox-w32-4f27503a1ecab8dfe373a349df3d8fe3c22e2160.tar.gz
busybox-w32-4f27503a1ecab8dfe373a349df3d8fe3c22e2160.tar.bz2
busybox-w32-4f27503a1ecab8dfe373a349df3d8fe3c22e2160.zip
awk: get rid of "move name one char back" trick in next_token()
function old new delta next_token 791 812 +21 awk_main 886 831 -55 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 21/-55) Total: -34 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--editors/awk.c54
1 files changed, 27 insertions, 27 deletions
diff --git a/editors/awk.c b/editors/awk.c
index 1a4468a53..fb1e5d59b 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -535,6 +535,7 @@ struct globals {
535 var *Fields; 535 var *Fields;
536 nvblock *g_cb; 536 nvblock *g_cb;
537 char *g_pos; 537 char *g_pos;
538 char g_saved_ch;
538 smallint icase; 539 smallint icase;
539 smallint exiting; 540 smallint exiting;
540 smallint nextrec; 541 smallint nextrec;
@@ -599,6 +600,7 @@ struct globals2 {
599#define Fields (G1.Fields ) 600#define Fields (G1.Fields )
600#define g_cb (G1.g_cb ) 601#define g_cb (G1.g_cb )
601#define g_pos (G1.g_pos ) 602#define g_pos (G1.g_pos )
603#define g_saved_ch (G1.g_saved_ch )
602#define icase (G1.icase ) 604#define icase (G1.icase )
603#define exiting (G1.exiting ) 605#define exiting (G1.exiting )
604#define nextrec (G1.nextrec ) 606#define nextrec (G1.nextrec )
@@ -1125,6 +1127,10 @@ static uint32_t next_token(uint32_t expected)
1125 t_info = save_info; 1127 t_info = save_info;
1126 } else { 1128 } else {
1127 p = g_pos; 1129 p = g_pos;
1130 if (g_saved_ch != '\0') {
1131 *p = g_saved_ch;
1132 g_saved_ch = '\0';
1133 }
1128 readnext: 1134 readnext:
1129 p = skip_spaces(p); 1135 p = skip_spaces(p);
1130 g_lineno = t_lineno; 1136 g_lineno = t_lineno;
@@ -1183,6 +1189,8 @@ static uint32_t next_token(uint32_t expected)
1183 tc = TC_NUMBER; 1189 tc = TC_NUMBER;
1184 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double); 1190 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1185 } else { 1191 } else {
1192 char *end_of_name;
1193
1186 if (*p == '\n') 1194 if (*p == '\n')
1187 t_lineno++; 1195 t_lineno++;
1188 1196
@@ -1219,16 +1227,14 @@ static uint32_t next_token(uint32_t expected)
1219 if (!isalnum_(*p)) 1227 if (!isalnum_(*p))
1220 syntax_error(EMSG_UNEXP_TOKEN); /* no */ 1228 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1221 /* yes */ 1229 /* yes */
1222/* "move name one char back" trick: we need a byte for NUL terminator */ 1230 t_string = p;
1223/* NB: this results in argv[i][-1] being used (!!!) in e.g. "awk -e 'NAME'" case */ 1231 while (isalnum_(*p))
1224 t_string = --p; 1232 p++;
1225 while (isalnum_(*++p)) { 1233 end_of_name = p;
1226 p[-1] = *p;
1227 }
1228 p[-1] = '\0';
1229 tc = TC_VARIABLE; 1234 tc = TC_VARIABLE;
1230 /* also consume whitespace between functionname and bracket */ 1235 /* also consume whitespace between functionname and bracket */
1231 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) 1236 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1237//TODO: why if variable can be here (but not array ref), skipping is not allowed? Example where it matters?
1232 p = skip_spaces(p); 1238 p = skip_spaces(p);
1233 if (*p == '(') { 1239 if (*p == '(') {
1234 p++; 1240 p++;
@@ -1240,7 +1246,19 @@ static uint32_t next_token(uint32_t expected)
1240 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); 1246 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1241 } else { 1247 } else {
1242 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); 1248 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1249 if (end_of_name == p) {
1250 /* there is no space for trailing NUL in t_string!
1251 * We need to save the char we are going to NUL.
1252 * (we'll use it in future call to next_token())
1253 */
1254 g_saved_ch = *end_of_name;
1255// especially pathological example is V="abc"; V.2 - it's V concatenated to .2
1256// (it evaluates to "abc0.2"). Because of this case, we can't simply cache
1257// '.' and analyze it later: we also have to *store it back* in next
1258// next_token(), in order to give my_strtod() the undamaged ".2" string.
1259 }
1243 } 1260 }
1261 *end_of_name = '\0'; /* terminate t_string */
1244 } 1262 }
1245 token_found: 1263 token_found:
1246 g_pos = p; 1264 g_pos = p;
@@ -3420,38 +3438,20 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3420 3438
3421 g_progname = llist_pop(&list_f); 3439 g_progname = llist_pop(&list_f);
3422 fd = xopen_stdin(g_progname); 3440 fd = xopen_stdin(g_progname);
3423 /* 1st byte is reserved for "move name one char back" trick in next_token */ 3441 s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
3424 i = 1;
3425 s = NULL;
3426 for (;;) {
3427 int sz;
3428 s = xrealloc(s, i + 1000);
3429 sz = safe_read(fd, s + i, 1000);
3430 if (sz <= 0)
3431 break;
3432 i += sz;
3433 }
3434 s = xrealloc(s, i + 1); /* trim unused 999 bytes */
3435 s[i] = '\0';
3436 close(fd); 3442 close(fd);
3437 parse_program(s + 1); 3443 parse_program(s);
3438 free(s); 3444 free(s);
3439 } 3445 }
3440 g_progname = "cmd. line"; 3446 g_progname = "cmd. line";
3441#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS 3447#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3442 while (list_e) { 3448 while (list_e) {
3443 /* NB: "move name one char back" trick in next_token
3444 * can use argv[i][-1] here.
3445 */
3446 parse_program(llist_pop(&list_e)); 3449 parse_program(llist_pop(&list_e));
3447 } 3450 }
3448#endif 3451#endif
3449 if (!(opt & (OPT_f | OPT_e))) { 3452 if (!(opt & (OPT_f | OPT_e))) {
3450 if (!*argv) 3453 if (!*argv)
3451 bb_show_usage(); 3454 bb_show_usage();
3452 /* NB: "move name one char back" trick in next_token
3453 * can use argv[i][-1] here.
3454 */
3455 parse_program(*argv++); 3455 parse_program(*argv++);
3456 } 3456 }
3457 3457