aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2023-12-31 15:49:54 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2023-12-31 15:49:54 +0100
commit789ccac7d9d1a9e433570ac9628992a01f946643 (patch)
tree1208e688dae9191740b57b0d9dbdb36e008b0a6a
parent5e0e54827fb0fa80d2c894eb67e8696921095935 (diff)
downloadbusybox-w32-789ccac7d9d1a9e433570ac9628992a01f946643.tar.gz
busybox-w32-789ccac7d9d1a9e433570ac9628992a01f946643.tar.bz2
busybox-w32-789ccac7d9d1a9e433570ac9628992a01f946643.zip
awk: fix handling of empty fields
Patch by M Rubon <rubonmtz@gmail.com>: Busybox awk handles references to empty (not provided in the input) fields differently during the first line of input, as compared to subsequent lines. $ (echo a ; echo b) | awk '$2 != 0' #wrong b No field $2 value is provided in the input. When awk references field $2 for the "a" line, it is seen to have a different behaviour than when it is referenced for the "b" line. Problem in BusyBox v1.36.1 embedded in OpenWrt 23.05.0 Same problem also in 21.02 versions of OpenWrt Same problem in BusyBox v1.37.0.git I get the correct expected output from Ubuntu gawk and Debian mawk, and from my fix. will@dev:~$ (echo a ; echo b) | awk '$2 != 0' #correct a b will@dev:~/busybox$ (echo a ; echo b ) | ./busybox awk '$2 != 0' #fixed a b I built and poked into the source code at editors/awk.c The function fsrealloc(int size) is core to allocating, initializing, reallocating, and reinitializing fields, both real input line fields and imaginary fields that the script references but do not exist in the input. When fsrealloc() needs more field space than it has previously allocated, it initializes those new fields differently than how they are later reinitialized for the next input line. This works fine for fields defined in the input, like $1, but does not work the first time when there is no input for that field (e.g. field $99) My one-line fix simply makes the initialization and clrvar() reinitialization use the same value for .type. I am not sure if there are regression tests to run, but I have not done those. I'm not sure if I understand why clrvar() is not setting .type to a default constant value, but in any case I have left that untouched. function old new delta ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0) Total: 0 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--editors/awk.c33
-rwxr-xr-xtestsuite/awk.tests7
2 files changed, 24 insertions, 16 deletions
diff --git a/editors/awk.c b/editors/awk.c
index bc95c4155..aa485c782 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -555,8 +555,9 @@ struct globals {
555 //we are reusing ahash as fdhash, via define (see later) 555 //we are reusing ahash as fdhash, via define (see later)
556 const char *g_progname; 556 const char *g_progname;
557 int g_lineno; 557 int g_lineno;
558 int nfields; 558 int num_fields; /* number of existing $N's */
559 unsigned maxfields; 559 unsigned num_alloc_fields; /* current size of Fields[] */
560 /* NB: Fields[0] corresponds to $1, not to $0 */
560 var *Fields; 561 var *Fields;
561 char *g_pos; 562 char *g_pos;
562 char g_saved_ch; 563 char g_saved_ch;
@@ -631,8 +632,8 @@ struct globals2 {
631// for fdhash in execution stage. 632// for fdhash in execution stage.
632#define g_progname (G1.g_progname ) 633#define g_progname (G1.g_progname )
633#define g_lineno (G1.g_lineno ) 634#define g_lineno (G1.g_lineno )
634#define nfields (G1.nfields ) 635#define num_fields (G1.num_fields )
635#define maxfields (G1.maxfields ) 636#define num_alloc_fields (G1.num_alloc_fields)
636#define Fields (G1.Fields ) 637#define Fields (G1.Fields )
637#define g_pos (G1.g_pos ) 638#define g_pos (G1.g_pos )
638#define g_saved_ch (G1.g_saved_ch ) 639#define g_saved_ch (G1.g_saved_ch )
@@ -1966,30 +1967,30 @@ static void fsrealloc(int size)
1966{ 1967{
1967 int i, newsize; 1968 int i, newsize;
1968 1969
1969 if ((unsigned)size >= maxfields) { 1970 if ((unsigned)size >= num_alloc_fields) {
1970 /* Sanity cap, easier than catering for over/underflows */ 1971 /* Sanity cap, easier than catering for over/underflows */
1971 if ((unsigned)size > 0xffffff) 1972 if ((unsigned)size > 0xffffff)
1972 bb_die_memory_exhausted(); 1973 bb_die_memory_exhausted();
1973 1974
1974 i = maxfields; 1975 i = num_alloc_fields;
1975 maxfields = size + 16; 1976 num_alloc_fields = size + 16;
1976 1977
1977 newsize = maxfields * sizeof(Fields[0]); 1978 newsize = num_alloc_fields * sizeof(Fields[0]);
1978 debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize); 1979 debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize);
1979 Fields = xrealloc(Fields, newsize); 1980 Fields = xrealloc(Fields, newsize);
1980 debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1); 1981 debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1);
1981 /* ^^^ did Fields[] move? debug aid for L.v getting "upstaged" by R.v in evaluate() */ 1982 /* ^^^ did Fields[] move? debug aid for L.v getting "upstaged" by R.v in evaluate() */
1982 1983
1983 for (; i < maxfields; i++) { 1984 for (; i < num_alloc_fields; i++) {
1984 Fields[i].type = VF_SPECIAL; 1985 Fields[i].type = VF_SPECIAL | VF_DIRTY;
1985 Fields[i].string = NULL; 1986 Fields[i].string = NULL;
1986 } 1987 }
1987 } 1988 }
1988 /* if size < nfields, clear extra field variables */ 1989 /* if size < num_fields, clear extra field variables */
1989 for (i = size; i < nfields; i++) { 1990 for (i = size; i < num_fields; i++) {
1990 clrvar(Fields + i); 1991 clrvar(Fields + i);
1991 } 1992 }
1992 nfields = size; 1993 num_fields = size;
1993} 1994}
1994 1995
1995static int regexec1_nonempty(const regex_t *preg, const char *s, regmatch_t pmatch[]) 1996static int regexec1_nonempty(const regex_t *preg, const char *s, regmatch_t pmatch[])
@@ -2126,7 +2127,7 @@ static void split_f0(void)
2126 /* set NF manually to avoid side effects */ 2127 /* set NF manually to avoid side effects */
2127 clrvar(intvar[NF]); 2128 clrvar(intvar[NF]);
2128 intvar[NF]->type = VF_NUMBER | VF_SPECIAL; 2129 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
2129 intvar[NF]->number = nfields; 2130 intvar[NF]->number = num_fields;
2130#undef fstrings 2131#undef fstrings
2131} 2132}
2132 2133
@@ -2976,7 +2977,7 @@ static var *evaluate(node *op, var *res)
2976 syntax_error(EMSG_TOO_FEW_ARGS); 2977 syntax_error(EMSG_TOO_FEW_ARGS);
2977 L.v = evaluate(op1, TMPVAR0); 2978 L.v = evaluate(op1, TMPVAR0);
2978 /* Does L.v point to $n variable? */ 2979 /* Does L.v point to $n variable? */
2979 if ((size_t)(L.v - Fields) < maxfields) { 2980 if ((size_t)(L.v - Fields) < num_alloc_fields) {
2980 /* yes, remember where Fields[] is */ 2981 /* yes, remember where Fields[] is */
2981 old_Fields_ptr = Fields; 2982 old_Fields_ptr = Fields;
2982 } 2983 }
@@ -3517,7 +3518,7 @@ static var *evaluate(node *op, var *res)
3517 res = intvar[F0]; 3518 res = intvar[F0];
3518 } else { 3519 } else {
3519 split_f0(); 3520 split_f0();
3520 if (i > nfields) 3521 if (i > num_fields)
3521 fsrealloc(i); 3522 fsrealloc(i);
3522 res = &Fields[i - 1]; 3523 res = &Fields[i - 1];
3523 } 3524 }
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index 5a792c241..063084a1c 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -592,6 +592,13 @@ testing 'awk gensub backslashes \\0' \
592\\0|\\0 592\\0|\\0
593' '' '' 593' '' ''
594 594
595# References to empty (not provided in the input) fields in first versus subsequent lines
596testing 'awk references to empty fields' \
597 'awk '$sq'$2 != 0'$sq \
598 'a
599b
600' '' 'a\nb\n'
601
595# The "b" in "abc" should not match <b* pattern. 602# The "b" in "abc" should not match <b* pattern.
596# Currently we use REG_STARTEND ("This flag is a BSD extension, not present in POSIX") 603# Currently we use REG_STARTEND ("This flag is a BSD extension, not present in POSIX")
597# to implement the code to handle this correctly, but if your libc has no REG_STARTEND, 604# to implement the code to handle this correctly, but if your libc has no REG_STARTEND,