From 6824298ab4d3da40763af4d2d466a72745b8b593 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 25 May 2023 14:22:10 +0200 Subject: hush: fix ELIF cmd1;cmd2 THEN ... not executing cmd2, closes 15571 function old new delta run_list 1012 1024 +12 Signed-off-by: Denys Vlasenko --- shell/ash_test/ash-misc/elif1.right | 4 ++++ shell/ash_test/ash-misc/elif1.tests | 6 ++++++ shell/ash_test/ash-misc/elif2.right | 2 ++ shell/ash_test/ash-misc/elif2.tests | 8 ++++++++ shell/hush.c | 24 ++++++++++++++++-------- shell/hush_test/hush-misc/elif1.right | 4 ++++ shell/hush_test/hush-misc/elif1.tests | 6 ++++++ shell/hush_test/hush-misc/elif2.right | 2 ++ shell/hush_test/hush-misc/elif2.tests | 8 ++++++++ 9 files changed, 56 insertions(+), 8 deletions(-) create mode 100644 shell/ash_test/ash-misc/elif1.right create mode 100755 shell/ash_test/ash-misc/elif1.tests create mode 100644 shell/ash_test/ash-misc/elif2.right create mode 100755 shell/ash_test/ash-misc/elif2.tests create mode 100644 shell/hush_test/hush-misc/elif1.right create mode 100755 shell/hush_test/hush-misc/elif1.tests create mode 100644 shell/hush_test/hush-misc/elif2.right create mode 100755 shell/hush_test/hush-misc/elif2.tests diff --git a/shell/ash_test/ash-misc/elif1.right b/shell/ash_test/ash-misc/elif1.right new file mode 100644 index 000000000..36dc59fed --- /dev/null +++ b/shell/ash_test/ash-misc/elif1.right @@ -0,0 +1,4 @@ +ELIF1 +ELIF2 +ELIF THEN +Ok:0 diff --git a/shell/ash_test/ash-misc/elif1.tests b/shell/ash_test/ash-misc/elif1.tests new file mode 100755 index 000000000..77b8a25ea --- /dev/null +++ b/shell/ash_test/ash-misc/elif1.tests @@ -0,0 +1,6 @@ +if false; then + : +elif echo 'ELIF1'; echo 'ELIF2'; then + echo "ELIF THEN" +fi +echo "Ok:$?" diff --git a/shell/ash_test/ash-misc/elif2.right b/shell/ash_test/ash-misc/elif2.right new file mode 100644 index 000000000..8f2851f91 --- /dev/null +++ b/shell/ash_test/ash-misc/elif2.right @@ -0,0 +1,2 @@ +THEN +Ok:0 diff --git a/shell/ash_test/ash-misc/elif2.tests b/shell/ash_test/ash-misc/elif2.tests new file mode 100755 index 000000000..3e5876f05 --- /dev/null +++ b/shell/ash_test/ash-misc/elif2.tests @@ -0,0 +1,8 @@ +if true; then + echo "THEN" +elif echo "ELIF false"; false; then + echo "ELIF THEN" +else + echo "ELSE" +fi +echo "Ok:$?" diff --git a/shell/hush.c b/shell/hush.c index 810dafd35..1f7b58d4f 100644 --- a/shell/hush.c +++ b/shell/hush.c @@ -9758,7 +9758,7 @@ static int run_list(struct pipe *pi) smallint last_rword; /* ditto */ #endif - debug_printf_exec("run_list start lvl %d\n", G.run_list_level); + debug_printf_exec("run_list lvl %d start\n", G.run_list_level); debug_enter(); #if ENABLE_HUSH_LOOPS @@ -9817,7 +9817,7 @@ static int run_list(struct pipe *pi) break; IF_HAS_KEYWORDS(rword = pi->res_word;) - debug_printf_exec(": rword=%d cond_code=%d last_rword=%d\n", + debug_printf_exec(": rword:%d cond_code:%d last_rword:%d\n", rword, cond_code, last_rword); sv_errexit_depth = G.errexit_depth; @@ -9851,23 +9851,29 @@ static int run_list(struct pipe *pi) } } last_followup = pi->followup; - IF_HAS_KEYWORDS(last_rword = rword;) #if ENABLE_HUSH_IF - if (cond_code) { + if (cond_code != 0) { if (rword == RES_THEN) { /* if false; then ... fi has exitcode 0! */ G.last_exitcode = rcode = EXIT_SUCCESS; /* "if THEN cmd": skip cmd */ + debug_printf_exec("skipped THEN cmd because IF condition was false\n"); + last_rword = rword; continue; } } else { - if (rword == RES_ELSE || rword == RES_ELIF) { + if (rword == RES_ELSE + || (rword == RES_ELIF && last_rword != RES_ELIF) + ) { /* "if then ... ELSE/ELIF cmd": * skip cmd and all following ones */ + debug_printf_exec("skipped ELSE/ELIF branch because IF condition was true\n"); break; } + //if (rword == RES_THEN): "if THEN cmd", run cmd (fall through) } #endif + IF_HAS_KEYWORDS(last_rword = rword;) #if ENABLE_HUSH_LOOPS if (rword == RES_FOR) { /* && pi->num_cmds - always == 1 */ if (!for_lcur) { @@ -9943,7 +9949,7 @@ static int run_list(struct pipe *pi) ); /* TODO: which FNM_xxx flags to use? */ cond_code = (fnmatch(pattern, case_word, /*flags:*/ 0) != 0); - debug_printf_exec("fnmatch(pattern:'%s',str:'%s'):%d\n", + debug_printf_exec("cond_code=fnmatch(pattern:'%s',str:'%s'):%d\n", pattern, case_word, cond_code); free(pattern); if (cond_code == 0) { @@ -10069,8 +10075,10 @@ static int run_list(struct pipe *pi) /* Analyze how result affects subsequent commands */ #if ENABLE_HUSH_IF - if (rword == RES_IF || rword == RES_ELIF) + if (rword == RES_IF || rword == RES_ELIF) { + debug_printf_exec("cond_code=rcode:%d\n", rcode); cond_code = rcode; + } #endif check_jobs_and_continue: checkjobs(NULL, 0 /*(no pid to wait for)*/); @@ -10111,7 +10119,7 @@ static int run_list(struct pipe *pi) free(case_word); #endif debug_leave(); - debug_printf_exec("run_list lvl %d return %d\n", G.run_list_level + 1, rcode); + debug_printf_exec("run_list lvl %d return %d\n", G.run_list_level, rcode); return rcode; } diff --git a/shell/hush_test/hush-misc/elif1.right b/shell/hush_test/hush-misc/elif1.right new file mode 100644 index 000000000..36dc59fed --- /dev/null +++ b/shell/hush_test/hush-misc/elif1.right @@ -0,0 +1,4 @@ +ELIF1 +ELIF2 +ELIF THEN +Ok:0 diff --git a/shell/hush_test/hush-misc/elif1.tests b/shell/hush_test/hush-misc/elif1.tests new file mode 100755 index 000000000..77b8a25ea --- /dev/null +++ b/shell/hush_test/hush-misc/elif1.tests @@ -0,0 +1,6 @@ +if false; then + : +elif echo 'ELIF1'; echo 'ELIF2'; then + echo "ELIF THEN" +fi +echo "Ok:$?" diff --git a/shell/hush_test/hush-misc/elif2.right b/shell/hush_test/hush-misc/elif2.right new file mode 100644 index 000000000..8f2851f91 --- /dev/null +++ b/shell/hush_test/hush-misc/elif2.right @@ -0,0 +1,2 @@ +THEN +Ok:0 diff --git a/shell/hush_test/hush-misc/elif2.tests b/shell/hush_test/hush-misc/elif2.tests new file mode 100755 index 000000000..3e5876f05 --- /dev/null +++ b/shell/hush_test/hush-misc/elif2.tests @@ -0,0 +1,8 @@ +if true; then + echo "THEN" +elif echo "ELIF false"; false; then + echo "ELIF THEN" +else + echo "ELSE" +fi +echo "Ok:$?" -- cgit v1.2.3-55-g6feb From b5be8da350b59ddc1925c485f2acb9c8f5b79b7e Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 25 May 2023 15:24:56 +0200 Subject: hush: make "false" built-in function old new delta bltins1 384 396 +12 builtin_false - 6 +6 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 1/0 up/down: 18/0) Total: 18 bytes Signed-off-by: Denys Vlasenko --- shell/hush.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/shell/hush.c b/shell/hush.c index 1f7b58d4f..cdaa67a3b 100644 --- a/shell/hush.c +++ b/shell/hush.c @@ -1061,6 +1061,7 @@ static int builtin_export(char **argv) FAST_FUNC; #if ENABLE_HUSH_READONLY static int builtin_readonly(char **argv) FAST_FUNC; #endif +static int builtin_false(char **argv) FAST_FUNC; #if ENABLE_HUSH_JOB static int builtin_fg_bg(char **argv) FAST_FUNC; static int builtin_jobs(char **argv) FAST_FUNC; @@ -1161,6 +1162,7 @@ static const struct built_in_command bltins1[] ALIGN_PTR = { #if ENABLE_HUSH_EXPORT BLTIN("export" , builtin_export , "Set environment variables"), #endif + BLTIN("false" , builtin_false , NULL), #if ENABLE_HUSH_JOB BLTIN("fg" , builtin_fg_bg , "Bring job to foreground"), #endif @@ -10831,6 +10833,11 @@ static int FAST_FUNC builtin_true(char **argv UNUSED_PARAM) return 0; } +static int FAST_FUNC builtin_false(char **argv UNUSED_PARAM) +{ + return 1; +} + #if ENABLE_HUSH_TEST || ENABLE_HUSH_ECHO || ENABLE_HUSH_PRINTF || ENABLE_HUSH_KILL static NOINLINE int run_applet_main(char **argv, int (*applet_main_func)(int argc, char **argv)) { -- cgit v1.2.3-55-g6feb From 2bda790fd14adb80820643198bb1e96e4be73571 Mon Sep 17 00:00:00 2001 From: David Leonard Date: Thu, 11 May 2023 23:59:04 +1000 Subject: od: fix -O od with option -O (4-byte octal) was incorrectly displaying 2-byte decimal when built without CONFIG_DESKTOP Signed-off-by: David Leonard Signed-off-by: Denys Vlasenko --- coreutils/od.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coreutils/od.c b/coreutils/od.c index 6f22331e0..dcf1bd6f6 100644 --- a/coreutils/od.c +++ b/coreutils/od.c @@ -166,7 +166,7 @@ static const char od_o2si[] ALIGN1 = { 0, 1, 2, 3, 5, 4, 6, 6, 7, 8, 9, 0xa, 0xb, 0xa, 0xa, - 0xb, 1, 8, 9, + 0xc, 1, 8, 9, }; int od_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; -- cgit v1.2.3-55-g6feb From 64bdd7566c21cb53cb4c384ed52845106529e55f Mon Sep 17 00:00:00 2001 From: David Leonard Date: Thu, 11 May 2023 23:49:58 +1000 Subject: od: add tests * Added tests for od (non-DESKTOP little-endian) * Allow 'optional' to invert meaning of a config option with '!' Signed-off-by: David Leonard Signed-off-by: Denys Vlasenko --- testsuite/od.tests | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++ testsuite/testing.sh | 10 +++ 2 files changed, 220 insertions(+) diff --git a/testsuite/od.tests b/testsuite/od.tests index 0880e0d2f..0b949d5f0 100755 --- a/testsuite/od.tests +++ b/testsuite/od.tests @@ -6,6 +6,216 @@ # testing "test name" "commands" "expected result" "file input" "stdin" +input="$(printf '\001\002\003\nABC\xfe')" + +le=false +{ printf '\0\1' | od -i | grep -q 256; } && le=true +readonly le + +optional !DESKTOP +testing "od -a (!DESKTOP)" \ + "od -a" \ +"\ +0000000 soh stx etx lf A B C fe +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +testing "od -B (!DESKTOP)" \ + "od -B" \ +"\ +0000000 001001 005003 041101 177103 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +$le || SKIP=1 +testing "od -o (!DESKTOP little-endian)" \ + "od -o" \ +"\ +0000000 001001 005003 041101 177103 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +testing "od -b (!DESKTOP)" \ + "od -b" \ +"\ +0000000 001 002 003 012 101 102 103 376 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +testing "od -c (!DESKTOP)" \ + "od -c" \ +"\ +0000000 001 002 003 \\\\n A B C 376 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +$le || SKIP=1 +testing "od -d (!DESKTOP little-endian)" \ + "od -d" \ +"\ +0000000 00513 02563 16961 65091 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +$le || SKIP=1 +testing "od -D (!DESKTOP little-endian)" \ + "od -D" \ +"\ +0000000 0167969281 4265820737 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +$le || SKIP=1 +testing "od -e (!DESKTOP little-endian)" \ + "od -e" \ +"\ +0000000 -1.61218556514036e+300 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +$le || SKIP=1 +testing "od -F (!DESKTOP little-endian)" \ + "od -F" \ +"\ +0000000 -1.61218556514036e+300 +0000010 +" \ + "" "$input" + +optional !DESKTOP +$le || SKIP=1 +testing "od -f (!DESKTOP little-endian)" \ + "od -f" \ +"\ +0000000 6.3077975e-33 -6.4885867e+37 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +$le || SKIP=1 +testing "od -H (!DESKTOP little-endian)" \ + "od -H" \ +"\ +0000000 0a030201 fe434241 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +$le || SKIP=1 +testing "od -X (!DESKTOP little-endian)" \ + "od -X" \ +"\ +0000000 0a030201 fe434241 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +$le || SKIP=1 +testing "od -h (!DESKTOP little-endian)" \ + "od -h" \ +"\ +0000000 0201 0a03 4241 fe43 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +$le || SKIP=1 +testing "od -x (!DESKTOP little-endian)" \ + "od -x" \ +"\ +0000000 0201 0a03 4241 fe43 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +$le || SKIP=1 +testing "od -I (!DESKTOP little-endian)" \ + "od -I" \ +"\ +0000000 167969281 -29146559 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +$le || SKIP=1 +testing "od -L (!DESKTOP little-endian)" \ + "od -L" \ +"\ +0000000 167969281 -29146559 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +$le || SKIP=1 +testing "od -i (!DESKTOP little-endian)" \ + "od -i" \ +"\ +0000000 513 2563 16961 -445 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +$le || SKIP=1 +testing "od -O (!DESKTOP little-endian)" \ + "od -O" \ +"\ +0000000 01200601001 37620641101 +0000010 +" \ + "" "$input" +SKIP= + +optional !DESKTOP +$le || SKIP=1 +testing "od -l (!DESKTOP little-endian)" \ + "od -l" \ +"\ +0000000 167969281 -29146559 +0000010 +" \ + "" "$input" +SKIP= + optional DESKTOP testing "od -b" \ "od -b" \ diff --git a/testsuite/testing.sh b/testsuite/testing.sh index f5b756947..95bb46dda 100644 --- a/testsuite/testing.sh +++ b/testsuite/testing.sh @@ -56,11 +56,21 @@ optional() { SKIP= while test "$1"; do + case $1 in + "!"*) + case "${OPTIONFLAGS}" in + *:${1#!}:*) SKIP=1; return;; + esac + shift + ;; + *) case "${OPTIONFLAGS}" in *:$1:*) ;; *) SKIP=1; return ;; esac shift + ;; + esac done } -- cgit v1.2.3-55-g6feb From e2287f99fe6f21fd6435ad04340170ad4ba5f6b3 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 25 May 2023 17:39:28 +0200 Subject: od: for !DESKTOP, match output more closely to GNU coreutils 9.1, implement -s Signed-off-by: Denys Vlasenko --- coreutils/od.c | 55 ++++++++++++++++++++++-------------- libbb/dump.c | 30 ++++++++++++++------ testsuite/od.tests | 83 +++++++++++++++++++++++++++++------------------------- 3 files changed, 100 insertions(+), 68 deletions(-) diff --git a/coreutils/od.c b/coreutils/od.c index dcf1bd6f6..6d562ea91 100644 --- a/coreutils/od.c +++ b/coreutils/od.c @@ -22,7 +22,7 @@ //usage:#if !ENABLE_DESKTOP //usage:#define od_trivial_usage -//usage: "[-aBbcDdeFfHhIiLlOovXx] [FILE]" +//usage: "[-aBbcDdeFfHhIiLlOoXxsv] [FILE]" //usage:#define od_full_usage "\n\n" //usage: "Print FILE (or stdin) unambiguously, as octal bytes by default" //usage:#endif @@ -144,29 +144,42 @@ odoffset(dumper_t *dumper, int argc, char ***argvp) } } +// A format string contains format units separated by whitespace. +// A format unit contains up to three items: an iteration count, a byte count, +// and a format. +// The iteration count is an optional integer (default 1) +// Each format is applied iteration count times. +// The byte count is an optional integer. It defines the number +// of bytes to be interpreted by each iteration of the format. +// If an iteration count and/or a byte count is specified, a slash must be +// placed after the iteration count and/or before the byte count +// to disambiguate them. +// The format is required and must be surrounded by " "s. +// It is a printf-style format. static const char *const add_strings[] ALIGN_PTR = { - "16/1 \"%3_u \" \"\\n\"", /* a */ - "8/2 \" %06o \" \"\\n\"", /* B, o */ - "16/1 \"%03o \" \"\\n\"", /* b */ - "16/1 \"%3_c \" \"\\n\"", /* c */ - "8/2 \" %05u \" \"\\n\"", /* d */ - "4/4 \" %010u \" \"\\n\"", /* D */ - "2/8 \" %21.14e \" \"\\n\"", /* e (undocumented in od), F */ - "4/4 \" %14.7e \" \"\\n\"", /* f */ - "4/4 \" %08x \" \"\\n\"", /* H, X */ - "8/2 \" %04x \" \"\\n\"", /* h, x */ - "4/4 \" %11d \" \"\\n\"", /* I, L, l */ - "8/2 \" %6d \" \"\\n\"", /* i */ - "4/4 \" %011o \" \"\\n\"", /* O */ + "16/1 \"%3_u \" \"\\n\"", /* 0: a */ + "8/2 \"%06o \" \"\\n\"", /* 1: B (undocumented in od), o */ + "16/1 \"%03o \" \"\\n\"", /* 2: b */ + "16/1 \"%3_c \" \"\\n\"", /* 3: c */ + "8/2 \"%5u \" \"\\n\"", /* 4: d */ + "4/4 \"%10u \" \"\\n\"", /* 5: D */ + "2/8 \"%24.14e \" \"\\n\"", /* 6: e (undocumented in od), F */ + "4/4 \"%15.7e \" \"\\n\"", /* 7: f */ + "4/4 \"%08x \" \"\\n\"", /* 8: H, X */ + "8/2 \"%04x \" \"\\n\"", /* 9: h, x */ + "2/8 \"%20lld \" \"\\n\"", /* 10: I, L, l */ + "4/4 \"%11d \" \"\\n\"", /* 11: i */ + "4/4 \"%011o \" \"\\n\"", /* 12: O */ + "8/2 \"%6d \" \"\\n\"", /* 13: s */ }; -static const char od_opts[] ALIGN1 = "aBbcDdeFfHhIiLlOoXxv"; +static const char od_opts[] ALIGN1 = "aBbcDdeFfHhIiLlOoXxsv"; static const char od_o2si[] ALIGN1 = { - 0, 1, 2, 3, 5, - 4, 6, 6, 7, 8, - 9, 0xa, 0xb, 0xa, 0xa, - 0xc, 1, 8, 9, + 0, 1, 2, 3, 5, /* aBbcD */ + 4, 6, 6, 7, 8, /* deFfH */ + 9, 10, 11, 10, 10, /* hIiLl */ + 12, 1, 8, 9, 13 /* OoXxs */ }; int od_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; @@ -184,9 +197,9 @@ int od_main(int argc, char **argv) if (first) { first = 0; bb_dump_add(dumper, "\"%07.7_Ao\n\""); - bb_dump_add(dumper, "\"%07.7_ao \""); + bb_dump_add(dumper, "\"%07.7_ao \""); } else { - bb_dump_add(dumper, "\" \""); + bb_dump_add(dumper, "\" \""); } bb_dump_add(dumper, add_strings[(int)od_o2si[(p - od_opts)]]); } else { /* P, p, s, w, or other unhandled */ diff --git a/libbb/dump.c b/libbb/dump.c index fcdee8343..cfb9d94f9 100644 --- a/libbb/dump.c +++ b/libbb/dump.c @@ -187,6 +187,10 @@ static NOINLINE void rewrite(priv_dumper_t *dumper, FS *fs) ++p2; ++p1; + if (*p1 == 'l') { /* %lld etc */ + ++p2; + ++p1; + } DO_INT_CONV: e = strchr(int_convs, *p1); /* "diouxX"? */ if (!e) @@ -194,7 +198,7 @@ static NOINLINE void rewrite(priv_dumper_t *dumper, FS *fs) pr->flags = F_INT; if (e > int_convs + 1) /* not d or i? */ pr->flags = F_UINT; - byte_count_str = "\004\002\001"; + byte_count_str = "\010\004\002\001"; goto DO_BYTE_COUNT; } else if (strchr(int_convs, *p1)) { /* %d etc */ @@ -601,22 +605,32 @@ static NOINLINE void display(priv_dumper_t* dumper) break; } case F_INT: { - int ival; - short sval; + union { + uint16_t val16; + uint32_t val32; + uint64_t val64; + } u; + int value = *bp; switch (pr->bcnt) { case 1: - printf(pr->fmt, (int) *bp); break; case 2: - memcpy(&sval, bp, sizeof(sval)); - printf(pr->fmt, (int) sval); + memcpy(&u.val16, bp, 2); + value = u.val16; break; case 4: - memcpy(&ival, bp, sizeof(ival)); - printf(pr->fmt, ival); + memcpy(&u.val32, bp, 4); + value = u.val32; break; + case 8: + memcpy(&u.val64, bp, 8); +//A hack. Users _must_ use %llX formats to not truncate high bits + printf(pr->fmt, (long long) u.val64); + goto skip; } + printf(pr->fmt, value); + skip: break; } case F_P: diff --git a/testsuite/od.tests b/testsuite/od.tests index 0b949d5f0..500e0e638 100755 --- a/testsuite/od.tests +++ b/testsuite/od.tests @@ -9,14 +9,19 @@ input="$(printf '\001\002\003\nABC\xfe')" le=false -{ printf '\0\1' | od -i | grep -q 256; } && le=true +{ printf '\0\1' | od -s | grep -q 256; } && le=true readonly le +# NB: +# sed 's/ *$//' truncates trailing spaces. +# This needs to be fixed properly (not output them). +# For now, the tests ignore them (does not require a match). + optional !DESKTOP testing "od -a (!DESKTOP)" \ - "od -a" \ + "od -a | sed 's/ *$//'" \ "\ -0000000 soh stx etx lf A B C fe +0000000 soh stx etx lf A B C fe 0000010 " \ "" "$input" @@ -24,9 +29,9 @@ SKIP= optional !DESKTOP testing "od -B (!DESKTOP)" \ - "od -B" \ + "od -B | sed 's/ *$//'" \ "\ -0000000 001001 005003 041101 177103 +0000000 001001 005003 041101 177103 0000010 " \ "" "$input" @@ -35,9 +40,9 @@ SKIP= optional !DESKTOP $le || SKIP=1 testing "od -o (!DESKTOP little-endian)" \ - "od -o" \ + "od -o | sed 's/ *$//'" \ "\ -0000000 001001 005003 041101 177103 +0000000 001001 005003 041101 177103 0000010 " \ "" "$input" @@ -45,9 +50,9 @@ SKIP= optional !DESKTOP testing "od -b (!DESKTOP)" \ - "od -b" \ + "od -b | sed 's/ *$//'" \ "\ -0000000 001 002 003 012 101 102 103 376 +0000000 001 002 003 012 101 102 103 376 0000010 " \ "" "$input" @@ -55,9 +60,9 @@ SKIP= optional !DESKTOP testing "od -c (!DESKTOP)" \ - "od -c" \ + "od -c | sed 's/ *$//'" \ "\ -0000000 001 002 003 \\\\n A B C 376 +0000000 001 002 003 \\\\n A B C 376 0000010 " \ "" "$input" @@ -66,9 +71,9 @@ SKIP= optional !DESKTOP $le || SKIP=1 testing "od -d (!DESKTOP little-endian)" \ - "od -d" \ + "od -d | sed 's/ *$//'" \ "\ -0000000 00513 02563 16961 65091 +0000000 513 2563 16961 65091 0000010 " \ "" "$input" @@ -77,9 +82,9 @@ SKIP= optional !DESKTOP $le || SKIP=1 testing "od -D (!DESKTOP little-endian)" \ - "od -D" \ + "od -D | sed 's/ *$//'" \ "\ -0000000 0167969281 4265820737 +0000000 167969281 4265820737 0000010 " \ "" "$input" @@ -88,9 +93,9 @@ SKIP= optional !DESKTOP $le || SKIP=1 testing "od -e (!DESKTOP little-endian)" \ - "od -e" \ + "od -e | sed 's/ *$//'" \ "\ -0000000 -1.61218556514036e+300 +0000000 -1.61218556514036e+300 0000010 " \ "" "$input" @@ -99,9 +104,9 @@ SKIP= optional !DESKTOP $le || SKIP=1 testing "od -F (!DESKTOP little-endian)" \ - "od -F" \ + "od -F | sed 's/ *$//'" \ "\ -0000000 -1.61218556514036e+300 +0000000 -1.61218556514036e+300 0000010 " \ "" "$input" @@ -109,9 +114,9 @@ testing "od -F (!DESKTOP little-endian)" \ optional !DESKTOP $le || SKIP=1 testing "od -f (!DESKTOP little-endian)" \ - "od -f" \ + "od -f | sed 's/ *$//'" \ "\ -0000000 6.3077975e-33 -6.4885867e+37 +0000000 6.3077975e-33 -6.4885867e+37 0000010 " \ "" "$input" @@ -120,9 +125,9 @@ SKIP= optional !DESKTOP $le || SKIP=1 testing "od -H (!DESKTOP little-endian)" \ - "od -H" \ + "od -H | sed 's/ *$//'" \ "\ -0000000 0a030201 fe434241 +0000000 0a030201 fe434241 0000010 " \ "" "$input" @@ -131,9 +136,9 @@ SKIP= optional !DESKTOP $le || SKIP=1 testing "od -X (!DESKTOP little-endian)" \ - "od -X" \ + "od -X | sed 's/ *$//'" \ "\ -0000000 0a030201 fe434241 +0000000 0a030201 fe434241 0000010 " \ "" "$input" @@ -142,9 +147,9 @@ SKIP= optional !DESKTOP $le || SKIP=1 testing "od -h (!DESKTOP little-endian)" \ - "od -h" \ + "od -h | sed 's/ *$//'" \ "\ -0000000 0201 0a03 4241 fe43 +0000000 0201 0a03 4241 fe43 0000010 " \ "" "$input" @@ -153,9 +158,9 @@ SKIP= optional !DESKTOP $le || SKIP=1 testing "od -x (!DESKTOP little-endian)" \ - "od -x" \ + "od -x | sed 's/ *$//'" \ "\ -0000000 0201 0a03 4241 fe43 +0000000 0201 0a03 4241 fe43 0000010 " \ "" "$input" @@ -164,9 +169,9 @@ SKIP= optional !DESKTOP $le || SKIP=1 testing "od -I (!DESKTOP little-endian)" \ - "od -I" \ + "od -I | sed 's/ *$//'" \ "\ -0000000 167969281 -29146559 +0000000 -125183517527965183 0000010 " \ "" "$input" @@ -175,9 +180,9 @@ SKIP= optional !DESKTOP $le || SKIP=1 testing "od -L (!DESKTOP little-endian)" \ - "od -L" \ + "od -L | sed 's/ *$//'" \ "\ -0000000 167969281 -29146559 +0000000 -125183517527965183 0000010 " \ "" "$input" @@ -186,9 +191,9 @@ SKIP= optional !DESKTOP $le || SKIP=1 testing "od -i (!DESKTOP little-endian)" \ - "od -i" \ + "od -i | sed 's/ *$//'" \ "\ -0000000 513 2563 16961 -445 +0000000 167969281 -29146559 0000010 " \ "" "$input" @@ -197,9 +202,9 @@ SKIP= optional !DESKTOP $le || SKIP=1 testing "od -O (!DESKTOP little-endian)" \ - "od -O" \ + "od -O | sed 's/ *$//'" \ "\ -0000000 01200601001 37620641101 +0000000 01200601001 37620641101 0000010 " \ "" "$input" @@ -208,9 +213,9 @@ SKIP= optional !DESKTOP $le || SKIP=1 testing "od -l (!DESKTOP little-endian)" \ - "od -l" \ + "od -l | sed 's/ *$//'" \ "\ -0000000 167969281 -29146559 +0000000 -125183517527965183 0000010 " \ "" "$input" -- cgit v1.2.3-55-g6feb From ce4cfc33cade63513963f9d5e701f305cbdfe693 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 25 May 2023 19:51:37 +0200 Subject: od: correct -i, enable tests which pass for DESKTOP too function old new delta .rodata 105302 105305 +3 Signed-off-by: Denys Vlasenko --- coreutils/od.c | 2 ++ coreutils/od_bloaty.c | 6 +++-- testsuite/od.tests | 62 +++++++++++++++++++++++++++------------------------ 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/coreutils/od.c b/coreutils/od.c index 6d562ea91..bd82487e5 100644 --- a/coreutils/od.c +++ b/coreutils/od.c @@ -167,6 +167,8 @@ static const char *const add_strings[] ALIGN_PTR = { "4/4 \"%15.7e \" \"\\n\"", /* 7: f */ "4/4 \"%08x \" \"\\n\"", /* 8: H, X */ "8/2 \"%04x \" \"\\n\"", /* 9: h, x */ + /* This probably also depends on word width of the arch (what is "long"?) */ + /* should be "2/8" or "4/4" depending on sizeof(long)? */ "2/8 \"%20lld \" \"\\n\"", /* 10: I, L, l */ "4/4 \"%11d \" \"\\n\"", /* 11: i */ "4/4 \"%011o \" \"\\n\"", /* 12: O */ diff --git a/coreutils/od_bloaty.c b/coreutils/od_bloaty.c index 5b5e56a21..51fff436b 100644 --- a/coreutils/od_bloaty.c +++ b/coreutils/od_bloaty.c @@ -1245,9 +1245,11 @@ int od_main(int argc UNUSED_PARAM, char **argv) if (opt & OPT_d) decode_format_string("u2"); if (opt & OPT_f) decode_format_string("fF"); if (opt & OPT_h) decode_format_string("x2"); - if (opt & OPT_i) decode_format_string("d2"); + if (opt & OPT_i) decode_format_string("dI"); if (opt & OPT_j) n_bytes_to_skip = xstrtooff_sfx(str_j, 0, bkm_suffixes); - if (opt & OPT_l) decode_format_string("d4"); + /* This probably also depends on word width of the arch (what is "long"?) */ + /* should be "d4" or "d8" depending on sizeof(long)? */ + if (opt & OPT_l) decode_format_string("d8"); if (opt & OPT_o) decode_format_string("o2"); while (lst_t) { decode_format_string(llist_pop(&lst_t)); diff --git a/testsuite/od.tests b/testsuite/od.tests index 500e0e638..fa6745b78 100755 --- a/testsuite/od.tests +++ b/testsuite/od.tests @@ -12,10 +12,10 @@ le=false { printf '\0\1' | od -s | grep -q 256; } && le=true readonly le -# NB: +# NB: for !DESKTOP, # sed 's/ *$//' truncates trailing spaces. -# This needs to be fixed properly (not output them). -# For now, the tests ignore them (does not require a match). +# This needs to be fixed properly (by not outputting them). +# For now, the tests ignore them (do not require a match). optional !DESKTOP testing "od -a (!DESKTOP)" \ @@ -26,8 +26,19 @@ testing "od -a (!DESKTOP)" \ " \ "" "$input" SKIP= +# ^^^ a bit incorrect handling of ctrl/high bytes. +# vvv this output is correct. +optional DESKTOP +testing "od -a (DESKTOP)" \ + "od -a" \ +"\ +0000000 soh stx etx nl A B C ~ +0000010 +" \ + "" "$input" +SKIP= -optional !DESKTOP +optional !DESKTOP #DESKTOP: unrecognized option: B testing "od -B (!DESKTOP)" \ "od -B | sed 's/ *$//'" \ "\ @@ -37,9 +48,8 @@ testing "od -B (!DESKTOP)" \ "" "$input" SKIP= -optional !DESKTOP $le || SKIP=1 -testing "od -o (!DESKTOP little-endian)" \ +testing "od -o (little-endian)" \ "od -o | sed 's/ *$//'" \ "\ 0000000 001001 005003 041101 177103 @@ -48,8 +58,7 @@ testing "od -o (!DESKTOP little-endian)" \ "" "$input" SKIP= -optional !DESKTOP -testing "od -b (!DESKTOP)" \ +testing "od -b" \ "od -b | sed 's/ *$//'" \ "\ 0000000 001 002 003 012 101 102 103 376 @@ -58,8 +67,7 @@ testing "od -b (!DESKTOP)" \ "" "$input" SKIP= -optional !DESKTOP -testing "od -c (!DESKTOP)" \ +testing "od -c" \ "od -c | sed 's/ *$//'" \ "\ 0000000 001 002 003 \\\\n A B C 376 @@ -68,9 +76,8 @@ testing "od -c (!DESKTOP)" \ "" "$input" SKIP= -optional !DESKTOP $le || SKIP=1 -testing "od -d (!DESKTOP little-endian)" \ +testing "od -d (little-endian)" \ "od -d | sed 's/ *$//'" \ "\ 0000000 513 2563 16961 65091 @@ -79,7 +86,7 @@ testing "od -d (!DESKTOP little-endian)" \ "" "$input" SKIP= -optional !DESKTOP +optional !DESKTOP #DESKTOP: unrecognized option: D $le || SKIP=1 testing "od -D (!DESKTOP little-endian)" \ "od -D | sed 's/ *$//'" \ @@ -90,7 +97,7 @@ testing "od -D (!DESKTOP little-endian)" \ "" "$input" SKIP= -optional !DESKTOP +optional !DESKTOP #DESKTOP: unrecognized option: e $le || SKIP=1 testing "od -e (!DESKTOP little-endian)" \ "od -e | sed 's/ *$//'" \ @@ -101,7 +108,7 @@ testing "od -e (!DESKTOP little-endian)" \ "" "$input" SKIP= -optional !DESKTOP +optional !DESKTOP #DESKTOP: unrecognized option: F $le || SKIP=1 testing "od -F (!DESKTOP little-endian)" \ "od -F | sed 's/ *$//'" \ @@ -111,7 +118,7 @@ testing "od -F (!DESKTOP little-endian)" \ " \ "" "$input" -optional !DESKTOP +#optional !DESKTOP $le || SKIP=1 testing "od -f (!DESKTOP little-endian)" \ "od -f | sed 's/ *$//'" \ @@ -122,7 +129,7 @@ testing "od -f (!DESKTOP little-endian)" \ "" "$input" SKIP= -optional !DESKTOP +optional !DESKTOP #DESKTOP: unrecognized option: H $le || SKIP=1 testing "od -H (!DESKTOP little-endian)" \ "od -H | sed 's/ *$//'" \ @@ -133,7 +140,7 @@ testing "od -H (!DESKTOP little-endian)" \ "" "$input" SKIP= -optional !DESKTOP +optional !DESKTOP #DESKTOP: unrecognized option: X $le || SKIP=1 testing "od -X (!DESKTOP little-endian)" \ "od -X | sed 's/ *$//'" \ @@ -144,9 +151,8 @@ testing "od -X (!DESKTOP little-endian)" \ "" "$input" SKIP= -optional !DESKTOP $le || SKIP=1 -testing "od -h (!DESKTOP little-endian)" \ +testing "od -h (little-endian)" \ "od -h | sed 's/ *$//'" \ "\ 0000000 0201 0a03 4241 fe43 @@ -155,9 +161,8 @@ testing "od -h (!DESKTOP little-endian)" \ "" "$input" SKIP= -optional !DESKTOP $le || SKIP=1 -testing "od -x (!DESKTOP little-endian)" \ +testing "od -x (little-endian)" \ "od -x | sed 's/ *$//'" \ "\ 0000000 0201 0a03 4241 fe43 @@ -166,7 +171,7 @@ testing "od -x (!DESKTOP little-endian)" \ "" "$input" SKIP= -optional !DESKTOP +optional !DESKTOP #DESKTOP: unrecognized option: I $le || SKIP=1 testing "od -I (!DESKTOP little-endian)" \ "od -I | sed 's/ *$//'" \ @@ -177,7 +182,7 @@ testing "od -I (!DESKTOP little-endian)" \ "" "$input" SKIP= -optional !DESKTOP +optional !DESKTOP #DESKTOP: unrecognized option: L $le || SKIP=1 testing "od -L (!DESKTOP little-endian)" \ "od -L | sed 's/ *$//'" \ @@ -188,9 +193,8 @@ testing "od -L (!DESKTOP little-endian)" \ "" "$input" SKIP= -optional !DESKTOP $le || SKIP=1 -testing "od -i (!DESKTOP little-endian)" \ +testing "od -i (little-endian)" \ "od -i | sed 's/ *$//'" \ "\ 0000000 167969281 -29146559 @@ -199,7 +203,7 @@ testing "od -i (!DESKTOP little-endian)" \ "" "$input" SKIP= -optional !DESKTOP +optional !DESKTOP #DESKTOP: unrecognized option: O $le || SKIP=1 testing "od -O (!DESKTOP little-endian)" \ "od -O | sed 's/ *$//'" \ @@ -210,9 +214,9 @@ testing "od -O (!DESKTOP little-endian)" \ "" "$input" SKIP= -optional !DESKTOP +# This probably also depends on word width of the arch (what is "long"?) $le || SKIP=1 -testing "od -l (!DESKTOP little-endian)" \ +testing "od -l (little-endian)" \ "od -l | sed 's/ *$//'" \ "\ 0000000 -125183517527965183 -- cgit v1.2.3-55-g6feb From 6882a933cf078be35f4eb93963365549d43cb497 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 25 May 2023 22:17:18 +0200 Subject: od: implement -B function old new delta .rodata 105305 105306 +1 od_main 1880 1866 -14 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 1/-14) Total: -13 bytes Signed-off-by: Denys Vlasenko --- coreutils/od_bloaty.c | 23 ++++++++++++----------- testsuite/od.tests | 7 +++---- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/coreutils/od_bloaty.c b/coreutils/od_bloaty.c index 51fff436b..57a4fe163 100644 --- a/coreutils/od_bloaty.c +++ b/coreutils/od_bloaty.c @@ -49,20 +49,21 @@ enum { OPT_j = 1 << 9, OPT_l = 1 << 10, OPT_o = 1 << 11, - OPT_t = 1 << 12, + OPT_B = 1 << 12, /* undocumented synonym to -o */ + OPT_t = 1 << 13, /* When zero and two or more consecutive blocks are equal, format only the first block and output an asterisk alone on the following line to indicate that identical blocks have been elided: */ - OPT_v = 1 << 13, - OPT_x = 1 << 14, - OPT_s = 1 << 15, - OPT_S = 1 << 16, - OPT_w = 1 << 17, - OPT_traditional = (1 << 18) * ENABLE_LONG_OPTS, + OPT_v = 1 << 14, + OPT_x = 1 << 15, + OPT_s = 1 << 16, + OPT_S = 1 << 17, + OPT_w = 1 << 18, + OPT_traditional = (1 << 19) * ENABLE_LONG_OPTS, }; #define OD_GETOPT32() getopt32long(argv, \ - "A:N:abcdfhij:lot:*vxsS:w:+:", od_longopts, \ + "A:N:abcdfhij:loBt:*vxsS:w:+:", od_longopts, \ /* -w with optional param */ \ /* -S was -s and also had optional parameter */ \ /* but in coreutils 6.3 it was renamed and now has */ \ @@ -1239,22 +1240,22 @@ int od_main(int argc UNUSED_PARAM, char **argv) if (opt & OPT_N) { max_bytes_to_format = xstrtooff_sfx(str_N, 0, bkm_suffixes); } + if (opt & OPT_a) decode_format_string("a"); if (opt & OPT_b) decode_format_string("oC"); if (opt & OPT_c) decode_format_string("c"); if (opt & OPT_d) decode_format_string("u2"); if (opt & OPT_f) decode_format_string("fF"); - if (opt & OPT_h) decode_format_string("x2"); + if (opt & (OPT_h|OPT_x)) decode_format_string("x2"); if (opt & OPT_i) decode_format_string("dI"); if (opt & OPT_j) n_bytes_to_skip = xstrtooff_sfx(str_j, 0, bkm_suffixes); /* This probably also depends on word width of the arch (what is "long"?) */ /* should be "d4" or "d8" depending on sizeof(long)? */ if (opt & OPT_l) decode_format_string("d8"); - if (opt & OPT_o) decode_format_string("o2"); + if (opt & (OPT_o|OPT_B)) decode_format_string("o2"); while (lst_t) { decode_format_string(llist_pop(&lst_t)); } - if (opt & OPT_x) decode_format_string("x2"); if (opt & OPT_s) decode_format_string("d2"); if (opt & OPT_S) { G.string_min = xstrtou_sfx(str_S, 0, bkm_suffixes); diff --git a/testsuite/od.tests b/testsuite/od.tests index fa6745b78..29ca829d5 100755 --- a/testsuite/od.tests +++ b/testsuite/od.tests @@ -38,8 +38,7 @@ testing "od -a (DESKTOP)" \ "" "$input" SKIP= -optional !DESKTOP #DESKTOP: unrecognized option: B -testing "od -B (!DESKTOP)" \ +testing "od -B" \ "od -B | sed 's/ *$//'" \ "\ 0000000 001001 005003 041101 177103 @@ -117,10 +116,10 @@ testing "od -F (!DESKTOP little-endian)" \ 0000010 " \ "" "$input" +SKIP= -#optional !DESKTOP $le || SKIP=1 -testing "od -f (!DESKTOP little-endian)" \ +testing "od -f (little-endian)" \ "od -f | sed 's/ *$//'" \ "\ 0000000 6.3077975e-33 -6.4885867e+37 -- cgit v1.2.3-55-g6feb From 0c3270f7e55e0ca7b8d664851468511d2c8995cf Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 25 May 2023 23:48:13 +0200 Subject: od: stop printing extra trailing spaces function old new delta .rodata 104598 104613 +15 display 1475 1485 +10 od_main 549 556 +7 rewrite 971 967 -4 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 3/1 up/down: 32/-4) Total: 28 bytes Signed-off-by: Denys Vlasenko --- coreutils/od.c | 33 +++++++++++++++++---------------- include/dump.h | 1 + libbb/dump.c | 21 ++++++++++++++------- testsuite/od.tests | 43 +++++++++++++++++++------------------------ 4 files changed, 51 insertions(+), 47 deletions(-) diff --git a/coreutils/od.c b/coreutils/od.c index bd82487e5..abedb40a7 100644 --- a/coreutils/od.c +++ b/coreutils/od.c @@ -157,22 +157,22 @@ odoffset(dumper_t *dumper, int argc, char ***argvp) // The format is required and must be surrounded by " "s. // It is a printf-style format. static const char *const add_strings[] ALIGN_PTR = { - "16/1 \"%3_u \" \"\\n\"", /* 0: a */ - "8/2 \"%06o \" \"\\n\"", /* 1: B (undocumented in od), o */ - "16/1 \"%03o \" \"\\n\"", /* 2: b */ - "16/1 \"%3_c \" \"\\n\"", /* 3: c */ - "8/2 \"%5u \" \"\\n\"", /* 4: d */ - "4/4 \"%10u \" \"\\n\"", /* 5: D */ - "2/8 \"%24.14e \" \"\\n\"", /* 6: e (undocumented in od), F */ - "4/4 \"%15.7e \" \"\\n\"", /* 7: f */ - "4/4 \"%08x \" \"\\n\"", /* 8: H, X */ - "8/2 \"%04x \" \"\\n\"", /* 9: h, x */ + "16/1 \" %3_u\" \"\\n\"", /* 0: a */ + "8/2 \" %06o\" \"\\n\"", /* 1: B (undocumented in od), o */ + "16/1 \" %03o\" \"\\n\"", /* 2: b */ + "16/1 \" %3_c\" \"\\n\"", /* 3: c */ + "8/2 \" %5u\" \"\\n\"", /* 4: d */ + "4/4 \" %10u\" \"\\n\"", /* 5: D */ + "2/8 \" %24.14e\" \"\\n\"", /* 6: e (undocumented in od), F */ + "4/4 \" %15.7e\" \"\\n\"", /* 7: f */ + "4/4 \" %08x\" \"\\n\"", /* 8: H, X */ + "8/2 \" %04x\" \"\\n\"", /* 9: h, x */ /* This probably also depends on word width of the arch (what is "long"?) */ /* should be "2/8" or "4/4" depending on sizeof(long)? */ - "2/8 \"%20lld \" \"\\n\"", /* 10: I, L, l */ - "4/4 \"%11d \" \"\\n\"", /* 11: i */ - "4/4 \"%011o \" \"\\n\"", /* 12: O */ - "8/2 \"%6d \" \"\\n\"", /* 13: s */ + "2/8 \" %20lld\" \"\\n\"", /* 10: I, L, l */ + "4/4 \" %11d\" \"\\n\"", /* 11: i */ + "4/4 \" %011o\" \"\\n\"", /* 12: O */ + "8/2 \" %6d\" \"\\n\"", /* 13: s */ }; static const char od_opts[] ALIGN1 = "aBbcDdeFfHhIiLlOoXxsv"; @@ -199,9 +199,9 @@ int od_main(int argc, char **argv) if (first) { first = 0; bb_dump_add(dumper, "\"%07.7_Ao\n\""); - bb_dump_add(dumper, "\"%07.7_ao \""); + bb_dump_add(dumper, "\"%07.7_ao\""); } else { - bb_dump_add(dumper, "\" \""); + bb_dump_add(dumper, "\" \""); } bb_dump_add(dumper, add_strings[(int)od_o2si[(p - od_opts)]]); } else { /* P, p, s, w, or other unhandled */ @@ -212,6 +212,7 @@ int od_main(int argc, char **argv) bb_dump_add(dumper, "\"%07.7_Ao\n\""); bb_dump_add(dumper, "\"%07.7_ao \" 8/2 \"%06o \" \"\\n\""); } + dumper->od_eofstring = "\n"; argc -= optind; argv += optind; diff --git a/include/dump.h b/include/dump.h index 10fc5d900..11dcf4523 100644 --- a/include/dump.h +++ b/include/dump.h @@ -34,6 +34,7 @@ typedef struct dumper_t { smallint dump_vflag; /*enum dump_vflag_t*/ FS *fshead; const char *xxd_eofstring; + const char *od_eofstring; off_t address; /* address/offset in stream */ long long xxd_displayoff; } dumper_t; diff --git a/libbb/dump.c b/libbb/dump.c index cfb9d94f9..77d76611b 100644 --- a/libbb/dump.c +++ b/libbb/dump.c @@ -242,7 +242,7 @@ static NOINLINE void rewrite(priv_dumper_t *dumper, FS *fs) pr->flags = F_P; *p1 = 'c'; goto DO_BYTE_COUNT_1; - case 'u': /* %_p: chars, 'nul', 'esc' etc for nonprintable */ + case 'u': /* %_u: chars, 'nul', 'esc' etc for nonprintable */ pr->flags = F_U; /* *p1 = 'c'; set in conv_u */ goto DO_BYTE_COUNT_1; @@ -322,8 +322,7 @@ static NOINLINE void rewrite(priv_dumper_t *dumper, FS *fs) p2 = NULL; for (p1 = pr->fmt; *p1; ++p1) p2 = isspace(*p1) ? p1 : NULL; - if (p2) - pr->nospace = p2; + pr->nospace = p2; } } } @@ -477,7 +476,7 @@ static void bpad(PR *pr) static const char conv_str[] ALIGN1 = "\0" "\\""0""\0" - "\007""\\""a""\0" /* \a */ + "\007""\\""a""\0" "\b" "\\""b""\0" "\f" "\\""f""\0" "\n" "\\""n""\0" @@ -539,7 +538,6 @@ static void conv_u(PR *pr, unsigned char *p) static NOINLINE void display(priv_dumper_t* dumper) { unsigned char *bp; - unsigned char savech = '\0'; while ((bp = get(dumper)) != NULL) { FS *fs; @@ -560,6 +558,8 @@ static NOINLINE void display(priv_dumper_t* dumper) PR *pr; for (pr = fu->nextpr; pr; dumper->pub.address += pr->bcnt, bp += pr->bcnt, pr = pr->nextpr) { + unsigned char savech; + if (dumper->eaddress && dumper->pub.address >= dumper->eaddress ) { @@ -568,9 +568,16 @@ static NOINLINE void display(priv_dumper_t* dumper) fputs_stdout(dumper->pub.xxd_eofstring); return; } + if (dumper->pub.od_eofstring) { + /* od support: requested to not pad incomplete blocks */ + /* ... but do print final offset */ + fputs_stdout(dumper->pub.od_eofstring); + goto endfu; + } if (!(pr->flags & (F_TEXT | F_BPAD))) bpad(pr); } + savech = '\0'; if (cnt == 1 && pr->nospace) { savech = *pr->nospace; *pr->nospace = '\0'; @@ -665,7 +672,7 @@ static NOINLINE void display(priv_dumper_t* dumper) break; } } - if (cnt == 1 && pr->nospace) { + if (savech) { *pr->nospace = savech; } } @@ -673,7 +680,7 @@ static NOINLINE void display(priv_dumper_t* dumper) } } } - + endfu: if (dumper->endfu) { PR *pr; /* diff --git a/testsuite/od.tests b/testsuite/od.tests index 29ca829d5..4b1525620 100755 --- a/testsuite/od.tests +++ b/testsuite/od.tests @@ -12,14 +12,9 @@ le=false { printf '\0\1' | od -s | grep -q 256; } && le=true readonly le -# NB: for !DESKTOP, -# sed 's/ *$//' truncates trailing spaces. -# This needs to be fixed properly (by not outputting them). -# For now, the tests ignore them (do not require a match). - optional !DESKTOP testing "od -a (!DESKTOP)" \ - "od -a | sed 's/ *$//'" \ + "od -a" \ "\ 0000000 soh stx etx lf A B C fe 0000010 @@ -39,7 +34,7 @@ testing "od -a (DESKTOP)" \ SKIP= testing "od -B" \ - "od -B | sed 's/ *$//'" \ + "od -B" \ "\ 0000000 001001 005003 041101 177103 0000010 @@ -49,7 +44,7 @@ SKIP= $le || SKIP=1 testing "od -o (little-endian)" \ - "od -o | sed 's/ *$//'" \ + "od -o" \ "\ 0000000 001001 005003 041101 177103 0000010 @@ -58,7 +53,7 @@ testing "od -o (little-endian)" \ SKIP= testing "od -b" \ - "od -b | sed 's/ *$//'" \ + "od -b" \ "\ 0000000 001 002 003 012 101 102 103 376 0000010 @@ -67,7 +62,7 @@ testing "od -b" \ SKIP= testing "od -c" \ - "od -c | sed 's/ *$//'" \ + "od -c" \ "\ 0000000 001 002 003 \\\\n A B C 376 0000010 @@ -77,7 +72,7 @@ SKIP= $le || SKIP=1 testing "od -d (little-endian)" \ - "od -d | sed 's/ *$//'" \ + "od -d" \ "\ 0000000 513 2563 16961 65091 0000010 @@ -88,7 +83,7 @@ SKIP= optional !DESKTOP #DESKTOP: unrecognized option: D $le || SKIP=1 testing "od -D (!DESKTOP little-endian)" \ - "od -D | sed 's/ *$//'" \ + "od -D" \ "\ 0000000 167969281 4265820737 0000010 @@ -99,7 +94,7 @@ SKIP= optional !DESKTOP #DESKTOP: unrecognized option: e $le || SKIP=1 testing "od -e (!DESKTOP little-endian)" \ - "od -e | sed 's/ *$//'" \ + "od -e" \ "\ 0000000 -1.61218556514036e+300 0000010 @@ -110,7 +105,7 @@ SKIP= optional !DESKTOP #DESKTOP: unrecognized option: F $le || SKIP=1 testing "od -F (!DESKTOP little-endian)" \ - "od -F | sed 's/ *$//'" \ + "od -F" \ "\ 0000000 -1.61218556514036e+300 0000010 @@ -120,7 +115,7 @@ SKIP= $le || SKIP=1 testing "od -f (little-endian)" \ - "od -f | sed 's/ *$//'" \ + "od -f" \ "\ 0000000 6.3077975e-33 -6.4885867e+37 0000010 @@ -131,7 +126,7 @@ SKIP= optional !DESKTOP #DESKTOP: unrecognized option: H $le || SKIP=1 testing "od -H (!DESKTOP little-endian)" \ - "od -H | sed 's/ *$//'" \ + "od -H" \ "\ 0000000 0a030201 fe434241 0000010 @@ -142,7 +137,7 @@ SKIP= optional !DESKTOP #DESKTOP: unrecognized option: X $le || SKIP=1 testing "od -X (!DESKTOP little-endian)" \ - "od -X | sed 's/ *$//'" \ + "od -X" \ "\ 0000000 0a030201 fe434241 0000010 @@ -152,7 +147,7 @@ SKIP= $le || SKIP=1 testing "od -h (little-endian)" \ - "od -h | sed 's/ *$//'" \ + "od -h" \ "\ 0000000 0201 0a03 4241 fe43 0000010 @@ -162,7 +157,7 @@ SKIP= $le || SKIP=1 testing "od -x (little-endian)" \ - "od -x | sed 's/ *$//'" \ + "od -x" \ "\ 0000000 0201 0a03 4241 fe43 0000010 @@ -173,7 +168,7 @@ SKIP= optional !DESKTOP #DESKTOP: unrecognized option: I $le || SKIP=1 testing "od -I (!DESKTOP little-endian)" \ - "od -I | sed 's/ *$//'" \ + "od -I" \ "\ 0000000 -125183517527965183 0000010 @@ -184,7 +179,7 @@ SKIP= optional !DESKTOP #DESKTOP: unrecognized option: L $le || SKIP=1 testing "od -L (!DESKTOP little-endian)" \ - "od -L | sed 's/ *$//'" \ + "od -L" \ "\ 0000000 -125183517527965183 0000010 @@ -194,7 +189,7 @@ SKIP= $le || SKIP=1 testing "od -i (little-endian)" \ - "od -i | sed 's/ *$//'" \ + "od -i" \ "\ 0000000 167969281 -29146559 0000010 @@ -205,7 +200,7 @@ SKIP= optional !DESKTOP #DESKTOP: unrecognized option: O $le || SKIP=1 testing "od -O (!DESKTOP little-endian)" \ - "od -O | sed 's/ *$//'" \ + "od -O" \ "\ 0000000 01200601001 37620641101 0000010 @@ -216,7 +211,7 @@ SKIP= # This probably also depends on word width of the arch (what is "long"?) $le || SKIP=1 testing "od -l (little-endian)" \ - "od -l | sed 's/ *$//'" \ + "od -l" \ "\ 0000000 -125183517527965183 0000010 -- cgit v1.2.3-55-g6feb From aa4d303a3139107919f73cece4eaf85a7dc75db6 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 26 May 2023 04:27:43 +0200 Subject: od: fix default format, shrink function old new delta od_main 556 568 +12 .rodata 104613 104555 -58 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 12/-58) Total: -46 bytes Signed-off-by: Denys Vlasenko --- coreutils/od.c | 42 ++++++++++++++++++++++-------------------- testsuite/od.tests | 10 ++++++++++ 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/coreutils/od.c b/coreutils/od.c index abedb40a7..3fd241d51 100644 --- a/coreutils/od.c +++ b/coreutils/od.c @@ -144,35 +144,36 @@ odoffset(dumper_t *dumper, int argc, char ***argvp) } } -// A format string contains format units separated by whitespace. +// bb_dump_add(): +// A format string contains format units separated by [optional] whitespace. // A format unit contains up to three items: an iteration count, a byte count, // and a format. -// The iteration count is an optional integer (default 1) +// The iteration count is an optional integer (default 1). // Each format is applied iteration count times. // The byte count is an optional integer. It defines the number // of bytes to be interpreted by each iteration of the format. // If an iteration count and/or a byte count is specified, a slash must be // placed after the iteration count and/or before the byte count // to disambiguate them. -// The format is required and must be surrounded by " "s. -// It is a printf-style format. +// The printf-style format is required and must be surrounded by " "s. +// (Below, each string contains two format units) static const char *const add_strings[] ALIGN_PTR = { - "16/1 \" %3_u\" \"\\n\"", /* 0: a */ - "8/2 \" %06o\" \"\\n\"", /* 1: B (undocumented in od), o */ - "16/1 \" %03o\" \"\\n\"", /* 2: b */ - "16/1 \" %3_c\" \"\\n\"", /* 3: c */ - "8/2 \" %5u\" \"\\n\"", /* 4: d */ - "4/4 \" %10u\" \"\\n\"", /* 5: D */ - "2/8 \" %24.14e\" \"\\n\"", /* 6: e (undocumented in od), F */ - "4/4 \" %15.7e\" \"\\n\"", /* 7: f */ - "4/4 \" %08x\" \"\\n\"", /* 8: H, X */ - "8/2 \" %04x\" \"\\n\"", /* 9: h, x */ + "16/1 \" %3_u\"" "\"\n\"", /* 0: a */ + "8/2 \" %06o\"" "\"\n\"", /* 1: B (undocumented in od), o */ + "16/1 \" %03o\"" "\"\n\"", /* 2: b */ + "16/1 \" %3_c\"" "\"\n\"", /* 3: c */ + "8/2 \" %5u\"" "\"\n\"", /* 4: d */ + "4/4 \" %10u\"" "\"\n\"", /* 5: D */ + "2/8 \" %24.14e\"" "\"\n\"", /* 6: e (undocumented in od), F */ + "4/4 \" %15.7e\"" "\"\n\"", /* 7: f */ + "4/4 \" %08x\"" "\"\n\"", /* 8: H, X */ + "8/2 \" %04x\"" "\"\n\"", /* 9: h, x */ /* This probably also depends on word width of the arch (what is "long"?) */ /* should be "2/8" or "4/4" depending on sizeof(long)? */ - "2/8 \" %20lld\" \"\\n\"", /* 10: I, L, l */ - "4/4 \" %11d\" \"\\n\"", /* 11: i */ - "4/4 \" %011o\" \"\\n\"", /* 12: O */ - "8/2 \" %6d\" \"\\n\"", /* 13: s */ + "2/8 \" %20lld\"" "\"\n\"", /* 10: I, L, l */ + "4/4 \" %11d\"" "\"\n\"", /* 11: i */ + "4/4 \" %011o\"" "\"\n\"", /* 12: O */ + "8/2 \" %6d\"" "\"\n\"", /* 13: s */ }; static const char od_opts[] ALIGN1 = "aBbcDdeFfHhIiLlOoXxsv"; @@ -204,13 +205,14 @@ int od_main(int argc, char **argv) bb_dump_add(dumper, "\" \""); } bb_dump_add(dumper, add_strings[(int)od_o2si[(p - od_opts)]]); - } else { /* P, p, s, w, or other unhandled */ + } else { /* P, p, w, or other unhandled */ bb_show_usage(); } } if (!dumper->fshead) { bb_dump_add(dumper, "\"%07.7_Ao\n\""); - bb_dump_add(dumper, "\"%07.7_ao \" 8/2 \"%06o \" \"\\n\""); + bb_dump_add(dumper, "\"%07.7_ao\""); + bb_dump_add(dumper, add_strings[1]); /* -o format is default */ } dumper->od_eofstring = "\n"; diff --git a/testsuite/od.tests b/testsuite/od.tests index 4b1525620..d6f50a206 100755 --- a/testsuite/od.tests +++ b/testsuite/od.tests @@ -12,6 +12,16 @@ le=false { printf '\0\1' | od -s | grep -q 256; } && le=true readonly le +$le || SKIP=1 +testing "od (little-endian)" \ + "od" \ +"\ +0000000 001001 005003 041101 177103 +0000010 +" \ + "" "$input" +SKIP= + optional !DESKTOP testing "od -a (!DESKTOP)" \ "od -a" \ -- cgit v1.2.3-55-g6feb From 3c6f6382eef14b880550cbf28ac5a517d0a075fc Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 26 May 2023 12:34:11 +0200 Subject: libbb/dump: conditionalize code used only by xxd and od Signed-off-by: Denys Vlasenko --- coreutils/od.c | 2 +- include/dump.h | 4 ++++ libbb/dump.c | 6 +++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/coreutils/od.c b/coreutils/od.c index 3fd241d51..46aba5a7c 100644 --- a/coreutils/od.c +++ b/coreutils/od.c @@ -223,7 +223,7 @@ int od_main(int argc, char **argv) return bb_dump_dump(dumper, argv); } -#endif /* ENABLE_DESKTOP */ +#endif /* !ENABLE_DESKTOP */ /*- * Copyright (c) 1990 The Regents of the University of California. diff --git a/include/dump.h b/include/dump.h index 11dcf4523..8fb92f07b 100644 --- a/include/dump.h +++ b/include/dump.h @@ -33,8 +33,12 @@ typedef struct dumper_t { int dump_length; /* max bytes to read */ smallint dump_vflag; /*enum dump_vflag_t*/ FS *fshead; +#if ENABLE_XXD const char *xxd_eofstring; +#endif +#if ENABLE_OD const char *od_eofstring; +#endif off_t address; /* address/offset in stream */ long long xxd_displayoff; } dumper_t; diff --git a/libbb/dump.c b/libbb/dump.c index 77d76611b..21c6c7083 100644 --- a/libbb/dump.c +++ b/libbb/dump.c @@ -563,17 +563,21 @@ static NOINLINE void display(priv_dumper_t* dumper) if (dumper->eaddress && dumper->pub.address >= dumper->eaddress ) { +#if ENABLE_XXD if (dumper->pub.xxd_eofstring) { /* xxd support: requested to not pad incomplete blocks */ fputs_stdout(dumper->pub.xxd_eofstring); return; } +#endif +#if ENABLE_OD if (dumper->pub.od_eofstring) { /* od support: requested to not pad incomplete blocks */ /* ... but do print final offset */ fputs_stdout(dumper->pub.od_eofstring); goto endfu; } +#endif if (!(pr->flags & (F_TEXT | F_BPAD))) bpad(pr); } @@ -637,7 +641,7 @@ static NOINLINE void display(priv_dumper_t* dumper) goto skip; } printf(pr->fmt, value); - skip: + IF_OD(skip:) break; } case F_P: -- cgit v1.2.3-55-g6feb From 60d4d55b870757089cdae96920cf6c416ba2de37 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 26 May 2023 12:56:17 +0200 Subject: od: support -DOHXIL function old new delta od_main 1866 1917 +51 .rodata 105306 105321 +15 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/0 up/down: 66/0) Total: 66 bytes Signed-off-by: Denys Vlasenko --- coreutils/od.c | 4 +++- coreutils/od_bloaty.c | 43 +++++++++++++++++++++++++++---------------- testsuite/od.tests | 44 ++++++++++++++++---------------------------- 3 files changed, 46 insertions(+), 45 deletions(-) diff --git a/coreutils/od.c b/coreutils/od.c index 46aba5a7c..98ae06ba7 100644 --- a/coreutils/od.c +++ b/coreutils/od.c @@ -22,7 +22,9 @@ //usage:#if !ENABLE_DESKTOP //usage:#define od_trivial_usage -//usage: "[-aBbcDdeFfHhIiLlOoXxsv] [FILE]" +//usage: "[-abcdeFfhIiLloxsv] [FILE]" +// We also support -BDOHXIL, but they are not documented in coreutils 9.1 +// manpage/help, so don't show them either. //usage:#define od_full_usage "\n\n" //usage: "Print FILE (or stdin) unambiguously, as octal bytes by default" //usage:#endif diff --git a/coreutils/od_bloaty.c b/coreutils/od_bloaty.c index 57a4fe163..2782adbf6 100644 --- a/coreutils/od_bloaty.c +++ b/coreutils/od_bloaty.c @@ -27,6 +27,8 @@ //usage:#if ENABLE_DESKTOP //usage:#define od_trivial_usage //usage: "[-abcdfhilovxs] [-t TYPE] [-A RADIX] [-N SIZE] [-j SKIP] [-S MINSTR] [-w WIDTH] [FILE]..." +// We also support -BDOHXIL, but they are not documented in coreutils 9.1 +// manpage/help, so don't show them either. // We don't support: // ... [FILE] [[+]OFFSET[.][b]] // Support is buggy for: @@ -43,27 +45,33 @@ enum { OPT_b = 1 << 3, OPT_c = 1 << 4, OPT_d = 1 << 5, - OPT_f = 1 << 6, - OPT_h = 1 << 7, - OPT_i = 1 << 8, - OPT_j = 1 << 9, - OPT_l = 1 << 10, - OPT_o = 1 << 11, - OPT_B = 1 << 12, /* undocumented synonym to -o */ - OPT_t = 1 << 13, + OPT_D = 1 << 6, /* undocumented in coreutils 9.1 */ + OPT_f = 1 << 7, + OPT_h = 1 << 8, + OPT_H = 1 << 9, /* undocumented in coreutils 9.1 */ + OPT_i = 1 << 10, + OPT_I = 1 << 11, /* undocumented in coreutils 9.1 */ + OPT_j = 1 << 12, + OPT_l = 1 << 13, + OPT_L = 1 << 14, /* undocumented in coreutils 9.1 */ + OPT_o = 1 << 15, + OPT_O = 1 << 16, /* undocumented in coreutils 9.1 */ + OPT_B = 1 << 17, /* undocumented synonym to -o */ + OPT_t = 1 << 18, /* When zero and two or more consecutive blocks are equal, format only the first block and output an asterisk alone on the following line to indicate that identical blocks have been elided: */ - OPT_v = 1 << 14, - OPT_x = 1 << 15, - OPT_s = 1 << 16, - OPT_S = 1 << 17, - OPT_w = 1 << 18, - OPT_traditional = (1 << 19) * ENABLE_LONG_OPTS, + OPT_v = 1 << 19, + OPT_x = 1 << 20, + OPT_X = 1 << 21, /* undocumented in coreutils 9.1 */ + OPT_s = 1 << 22, + OPT_S = 1 << 23, + OPT_w = 1 << 24, + OPT_traditional = (1 << 25) * ENABLE_LONG_OPTS, }; #define OD_GETOPT32() getopt32long(argv, \ - "A:N:abcdfhij:loBt:*vxsS:w:+:", od_longopts, \ + "A:N:abcdDfhHiIj:lLoOBt:*vxXsS:w:+:", od_longopts, \ /* -w with optional param */ \ /* -S was -s and also had optional parameter */ \ /* but in coreutils 6.3 it was renamed and now has */ \ @@ -1245,14 +1253,17 @@ int od_main(int argc UNUSED_PARAM, char **argv) if (opt & OPT_b) decode_format_string("oC"); if (opt & OPT_c) decode_format_string("c"); if (opt & OPT_d) decode_format_string("u2"); + if (opt & OPT_D) decode_format_string("uI"); if (opt & OPT_f) decode_format_string("fF"); if (opt & (OPT_h|OPT_x)) decode_format_string("x2"); + if (opt & (OPT_H|OPT_X)) decode_format_string("xI"); if (opt & OPT_i) decode_format_string("dI"); if (opt & OPT_j) n_bytes_to_skip = xstrtooff_sfx(str_j, 0, bkm_suffixes); /* This probably also depends on word width of the arch (what is "long"?) */ /* should be "d4" or "d8" depending on sizeof(long)? */ - if (opt & OPT_l) decode_format_string("d8"); + if (opt & (OPT_I|OPT_l|OPT_L)) decode_format_string("d8"); if (opt & (OPT_o|OPT_B)) decode_format_string("o2"); + if (opt & OPT_O) decode_format_string("oI"); while (lst_t) { decode_format_string(llist_pop(&lst_t)); } diff --git a/testsuite/od.tests b/testsuite/od.tests index d6f50a206..677968967 100755 --- a/testsuite/od.tests +++ b/testsuite/od.tests @@ -90,9 +90,8 @@ testing "od -d (little-endian)" \ "" "$input" SKIP= -optional !DESKTOP #DESKTOP: unrecognized option: D $le || SKIP=1 -testing "od -D (!DESKTOP little-endian)" \ +testing "od -D (little-endian)" \ "od -D" \ "\ 0000000 167969281 4265820737 @@ -133,9 +132,8 @@ testing "od -f (little-endian)" \ "" "$input" SKIP= -optional !DESKTOP #DESKTOP: unrecognized option: H $le || SKIP=1 -testing "od -H (!DESKTOP little-endian)" \ +testing "od -H (little-endian)" \ "od -H" \ "\ 0000000 0a030201 fe434241 @@ -144,9 +142,8 @@ testing "od -H (!DESKTOP little-endian)" \ "" "$input" SKIP= -optional !DESKTOP #DESKTOP: unrecognized option: X $le || SKIP=1 -testing "od -X (!DESKTOP little-endian)" \ +testing "od -X (little-endian)" \ "od -X" \ "\ 0000000 0a030201 fe434241 @@ -175,51 +172,42 @@ testing "od -x (little-endian)" \ "" "$input" SKIP= -optional !DESKTOP #DESKTOP: unrecognized option: I $le || SKIP=1 -testing "od -I (!DESKTOP little-endian)" \ - "od -I" \ +testing "od -i (little-endian)" \ + "od -i" \ "\ -0000000 -125183517527965183 +0000000 167969281 -29146559 0000010 " \ "" "$input" SKIP= -optional !DESKTOP #DESKTOP: unrecognized option: L $le || SKIP=1 -testing "od -L (!DESKTOP little-endian)" \ - "od -L" \ +testing "od -O (little-endian)" \ + "od -O" \ "\ -0000000 -125183517527965183 +0000000 01200601001 37620641101 0000010 " \ "" "$input" SKIP= +# This probably also depends on word width of the arch (what is "long"?) $le || SKIP=1 -testing "od -i (little-endian)" \ - "od -i" \ +testing "od -I (little-endian)" \ + "od -I" \ "\ -0000000 167969281 -29146559 +0000000 -125183517527965183 0000010 " \ "" "$input" -SKIP= - -optional !DESKTOP #DESKTOP: unrecognized option: O -$le || SKIP=1 -testing "od -O (!DESKTOP little-endian)" \ - "od -O" \ +testing "od -L (little-endian)" \ + "od -L" \ "\ -0000000 01200601001 37620641101 +0000000 -125183517527965183 0000010 " \ "" "$input" -SKIP= - -# This probably also depends on word width of the arch (what is "long"?) -$le || SKIP=1 testing "od -l (little-endian)" \ "od -l" \ "\ -- cgit v1.2.3-55-g6feb From 25a10ffe1fcec1adc40d91d1f4ddfdf7fe12cc74 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 26 May 2023 13:01:41 +0200 Subject: od: actually remove -IL from --help, as comment says Signed-off-by: Denys Vlasenko --- coreutils/od.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coreutils/od.c b/coreutils/od.c index 98ae06ba7..3684e4ed3 100644 --- a/coreutils/od.c +++ b/coreutils/od.c @@ -22,7 +22,7 @@ //usage:#if !ENABLE_DESKTOP //usage:#define od_trivial_usage -//usage: "[-abcdeFfhIiLloxsv] [FILE]" +//usage: "[-abcdeFfhiloxsv] [FILE]" // We also support -BDOHXIL, but they are not documented in coreutils 9.1 // manpage/help, so don't show them either. //usage:#define od_full_usage "\n\n" -- cgit v1.2.3-55-g6feb From de851bc9b2dcf3f5fad424172d08e850c8af7d62 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 26 May 2023 13:33:08 +0200 Subject: od, hexdump: byte 0x11 is "dc1" not "dcl" Signed-off-by: Denys Vlasenko --- libbb/dump.c | 6 ++++-- testsuite/hexdump.tests | 16 ++++++++++++++++ testsuite/od.tests | 32 +++++++++++++++++++++++++------- 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/libbb/dump.c b/libbb/dump.c index 21c6c7083..fc145edf9 100644 --- a/libbb/dump.c +++ b/libbb/dump.c @@ -516,10 +516,12 @@ static void conv_u(PR *pr, unsigned char *p) static const char list[] ALIGN1 = "nul\0soh\0stx\0etx\0eot\0enq\0ack\0bel\0" "bs\0_ht\0_lf\0_vt\0_ff\0_cr\0_so\0_si\0_" - "dle\0dcl\0dc2\0dc3\0dc4\0nak\0syn\0etb\0" + "dle\0dc1\0dc2\0dc3\0dc4\0nak\0syn\0etb\0" "can\0em\0_sub\0esc\0fs\0_gs\0_rs\0_us"; + /* NB: bug: od uses %_u to implement -a, + * but it should use "nl", not "lf", for char #10. + */ - /* od used nl, not lf */ if (*p <= 0x1f) { *pr->cchar = 's'; printf(pr->fmt, list + (4 * (int)*p)); diff --git a/testsuite/hexdump.tests b/testsuite/hexdump.tests index cfb20187e..084156af4 100755 --- a/testsuite/hexdump.tests +++ b/testsuite/hexdump.tests @@ -34,4 +34,20 @@ testing "hexdump thinks last full block can match" \ '' \ '\0\0\0\0\0\0\0\0\0\0\0\0' +testing "hexdump e %3_u" \ + "hexdump -e '16/1 \" %3_u\" \"\n\"'" \ + "\ + nul soh stx etx eot enq ack bel bs ht lf vt ff cr so si + dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us + p q r s t u v w x y z { | } ~ del + 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f + f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff +" \ + "" \ +"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"\ +"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"\ +"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"\ +"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"\ +"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"\ + exit $FAILCOUNT diff --git a/testsuite/od.tests b/testsuite/od.tests index 677968967..fce66efbb 100755 --- a/testsuite/od.tests +++ b/testsuite/od.tests @@ -26,21 +26,39 @@ optional !DESKTOP testing "od -a (!DESKTOP)" \ "od -a" \ "\ -0000000 soh stx etx lf A B C fe -0000010 +0000000 nul soh stx etx eot enq ack bel bs ht lf vt ff cr so si +0000020 dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us +0000040 p q r s t u v w x y z { | } ~ del +0000060 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f +0000100 f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff +0000120 " \ - "" "$input" + "" \ +"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"\ +"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"\ +"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"\ +"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"\ +"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" SKIP= -# ^^^ a bit incorrect handling of ctrl/high bytes. +# ^^^ a bit incorrect handling of ctrl ("lf" should be "nl") and high bytes. # vvv this output is correct. optional DESKTOP testing "od -a (DESKTOP)" \ "od -a" \ "\ -0000000 soh stx etx nl A B C ~ -0000010 +0000000 nul soh stx etx eot enq ack bel bs ht nl vt ff cr so si +0000020 dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us +0000040 p q r s t u v w x y z { | } ~ del +0000060 nul soh stx etx eot enq ack bel bs ht nl vt ff cr so si +0000100 p q r s t u v w x y z { | } ~ del +0000120 " \ - "" "$input" + "" \ +"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"\ +"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"\ +"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"\ +"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"\ +"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" SKIP= testing "od -B" \ -- cgit v1.2.3-55-g6feb From 8fab21114122c9abf54a750523a12bf263d5b0b2 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 26 May 2023 13:39:33 +0200 Subject: libbb/dump: use fputs_stdout where appropriate function old new delta display 1485 1483 -2 Signed-off-by: Denys Vlasenko --- libbb/dump.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libbb/dump.c b/libbb/dump.c index fc145edf9..dace481c2 100644 --- a/libbb/dump.c +++ b/libbb/dump.c @@ -653,7 +653,7 @@ static NOINLINE void display(priv_dumper_t* dumper) printf(pr->fmt, (char *) bp); break; case F_TEXT: - printf(pr->fmt); + fputs_stdout(pr->fmt); break; case F_U: conv_u(pr, bp); @@ -705,7 +705,7 @@ static NOINLINE void display(priv_dumper_t* dumper) printf(pr->fmt, (unsigned long long) dumper->eaddress + dumper->pub.xxd_displayoff); break; case F_TEXT: - printf(pr->fmt); + fputs_stdout(pr->fmt); break; } } -- cgit v1.2.3-55-g6feb From 34751d8bf921a2c07cf9a7ce9074756a3d936013 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 26 May 2023 14:10:38 +0200 Subject: libbb/dump: correct handling of 1-byte signed int format Signed-off-by: Denys Vlasenko --- libbb/dump.c | 37 +++++++++++++++++-------------------- testsuite/hexdump.tests | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/libbb/dump.c b/libbb/dump.c index dace481c2..49340b5a5 100644 --- a/libbb/dump.c +++ b/libbb/dump.c @@ -619,31 +619,31 @@ static NOINLINE void display(priv_dumper_t* dumper) } case F_INT: { union { - uint16_t val16; - uint32_t val32; - uint64_t val64; + int16_t ival16; + int32_t ival32; + int64_t ival64; } u; - int value = *bp; + int value = (signed char)*bp; switch (pr->bcnt) { case 1: break; case 2: - memcpy(&u.val16, bp, 2); - value = u.val16; + move_from_unaligned16(u.ival16, bp); + value = u.ival16; break; case 4: - memcpy(&u.val32, bp, 4); - value = u.val32; + move_from_unaligned32(u.ival32, bp); + value = u.ival32; break; case 8: - memcpy(&u.val64, bp, 8); + move_from_unaligned64(u.ival64, bp); //A hack. Users _must_ use %llX formats to not truncate high bits - printf(pr->fmt, (long long) u.val64); + printf(pr->fmt, (long long)u.ival64); goto skip; } printf(pr->fmt, value); - IF_OD(skip:) + skip: break; } case F_P: @@ -659,22 +659,19 @@ static NOINLINE void display(priv_dumper_t* dumper) conv_u(pr, bp); break; case F_UINT: { - unsigned ival; - unsigned short sval; - + unsigned value = (unsigned char)*bp; switch (pr->bcnt) { case 1: - printf(pr->fmt, (unsigned) *bp); break; case 2: - memcpy(&sval, bp, sizeof(sval)); - printf(pr->fmt, (unsigned) sval); + move_from_unaligned16(value, bp); break; case 4: - memcpy(&ival, bp, sizeof(ival)); - printf(pr->fmt, ival); + move_from_unaligned32(value, bp); break; + /* case 8: no users yet */ } + printf(pr->fmt, value); break; } } @@ -686,7 +683,7 @@ static NOINLINE void display(priv_dumper_t* dumper) } } } - endfu: + IF_OD(endfu:) if (dumper->endfu) { PR *pr; /* diff --git a/testsuite/hexdump.tests b/testsuite/hexdump.tests index 084156af4..be0379cfc 100755 --- a/testsuite/hexdump.tests +++ b/testsuite/hexdump.tests @@ -34,7 +34,7 @@ testing "hexdump thinks last full block can match" \ '' \ '\0\0\0\0\0\0\0\0\0\0\0\0' -testing "hexdump e %3_u" \ +testing "hexdump -e %3_u" \ "hexdump -e '16/1 \" %3_u\" \"\n\"'" \ "\ nul soh stx etx eot enq ack bel bs ht lf vt ff cr so si @@ -50,4 +50,36 @@ testing "hexdump e %3_u" \ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"\ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"\ +testing "hexdump -e /1 %d" \ + "hexdump -e '16/1 \" %4d\" \"\n\"'" \ + "\ + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 + 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 + -128 -127 -126 -125 -124 -123 -122 -121 -120 -119 -118 -117 -116 -115 -114 -113 + -16 -15 -14 -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 +" \ + "" \ +"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"\ +"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"\ +"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"\ +"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"\ +"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"\ + +testing "hexdump -e /2 %d" \ + "hexdump -e '8/2 \" %6d\" \"\n\"'" \ + "\ + 256 770 1284 1798 2312 2826 3340 3854 + 4368 4882 5396 5910 6424 6938 7452 7966 + 29040 29554 30068 30582 31096 31610 32124 32638 + -32384 -31870 -31356 -30842 -30328 -29814 -29300 -28786 + -3600 -3086 -2572 -2058 -1544 -1030 -516 -2 +" \ + "" \ +"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"\ +"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"\ +"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"\ +"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"\ +"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"\ + exit $FAILCOUNT -- cgit v1.2.3-55-g6feb From 283cba78f2c9ea8478ef58ba616197df31640353 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 26 May 2023 14:46:29 +0200 Subject: hexdump, xxd: shrink strings function old new delta add_first 12 10 -2 .rodata 105321 105306 -15 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-17) Total: -17 bytes Signed-off-by: Denys Vlasenko --- util-linux/hexdump.c | 20 ++++++++++---------- util-linux/hexdump_xxd.c | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/util-linux/hexdump.c b/util-linux/hexdump.c index 307a84803..421fe025d 100644 --- a/util-linux/hexdump.c +++ b/util-linux/hexdump.c @@ -72,14 +72,14 @@ static void bb_dump_addfile(dumper_t *dumper, char *name) } static const char *const add_strings[] ALIGN_PTR = { - "\"%07.7_ax \"16/1 \"%03o \"\"\n\"", /* b */ - "\"%07.7_ax \"16/1 \"%3_c \"\"\n\"", /* c */ - "\"%07.7_ax \"8/2 \" %05u \"\"\n\"", /* d */ - "\"%07.7_ax \"8/2 \" %06o \"\"\n\"", /* o */ - "\"%07.7_ax \"8/2 \" %04x \"\"\n\"", /* x */ + "\"%07_ax\"16/1 \" %03o\"" "\"\n\"", /* b */ + "\"%07_ax\"16/1 \" %3_c\"" "\"\n\"", /* c */ + "\"%07_ax\"8/2 \" %05u\"" "\"\n\"", /* d */ + "\"%07_ax\"8/2 \" %06o\"" "\"\n\"", /* o */ + "\"%07_ax\"8/2 \" %04x\"" "\"\n\"", /* x */ }; -static const char add_first[] ALIGN1 = "\"%07.7_Ax\n\""; +static const char add_first[] ALIGN1 = "\"%07_Ax\n\""; static const char hexdump_opts[] ALIGN1 = "bcdoxCe:f:n:s:v"; @@ -110,9 +110,9 @@ int hexdump_main(int argc, char **argv) /* Save a little bit of space below by omitting the 'else's. */ if (ch == 'C') { hd_applet: - bb_dump_add(dumper, "\"%08.8_Ax\n\""); // final address line after dump - //------------------- "address " 8 * "xx " " " 8 * "xx " - bb_dump_add(dumper, "\"%08.8_ax \"8/1 \"%02x \"\" \"8/1 \"%02x \""); + bb_dump_add(dumper, "\"%08_Ax\n\""); // final address line after dump + //------------------- "address " 8 * " xx" " " 8 * " xx" + bb_dump_add(dumper, "\"%08_ax \"8/1 \" %02x\"\" \"8/1 \" %02x\""); //------------------- " |ASCII...........|\n" bb_dump_add(dumper, "\" |\"16/1 \"%_p\"\"|\n\""); } @@ -140,7 +140,7 @@ int hexdump_main(int argc, char **argv) if (!dumper->fshead) { bb_dump_add(dumper, add_first); - bb_dump_add(dumper, "\"%07.7_ax \"8/2 \"%04x \"\"\n\""); + bb_dump_add(dumper, "\"%07_ax\"8/2 \" %04x\"\"\n\""); } argv += optind; diff --git a/util-linux/hexdump_xxd.c b/util-linux/hexdump_xxd.c index 9738a76ad..636cbfeec 100644 --- a/util-linux/hexdump_xxd.c +++ b/util-linux/hexdump_xxd.c @@ -285,7 +285,7 @@ int xxd_main(int argc UNUSED_PARAM, char **argv) // output is " 0xXX, 0xXX, 0xXX...", add leading space bb_dump_add(dumper, "\" \""); } else - bb_dump_add(dumper, "\"%08.8_ax: \""); // "address: " + bb_dump_add(dumper, "\"%08_ax: \""); // "address: " } if (bytes < 1 || bytes >= cols) { -- cgit v1.2.3-55-g6feb From d7814f572725f224fcef8870a75c2483043d3681 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 26 May 2023 16:43:40 +0200 Subject: hexdump: code shrink function old new delta add_format - 50 +50 add_first 10 - -10 hexdump_main 401 366 -35 .rodata 105306 105255 -51 ------------------------------------------------------------------------------ (add/remove: 1/1 grow/shrink: 0/2 up/down: 50/-96) Total: -46 bytes Signed-off-by: Denys Vlasenko --- util-linux/hexdump.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/util-linux/hexdump.c b/util-linux/hexdump.c index 421fe025d..be4c1964f 100644 --- a/util-linux/hexdump.c +++ b/util-linux/hexdump.c @@ -72,14 +72,20 @@ static void bb_dump_addfile(dumper_t *dumper, char *name) } static const char *const add_strings[] ALIGN_PTR = { - "\"%07_ax\"16/1 \" %03o\"" "\"\n\"", /* b */ - "\"%07_ax\"16/1 \" %3_c\"" "\"\n\"", /* c */ - "\"%07_ax\"8/2 \" %05u\"" "\"\n\"", /* d */ - "\"%07_ax\"8/2 \" %06o\"" "\"\n\"", /* o */ - "\"%07_ax\"8/2 \" %04x\"" "\"\n\"", /* x */ + "16/1 \" %03o" , /* b */ + "16/1 \" %3_c" , /* c */ + "8/2 \" %05u" , /* d */ + "8/2 \" %06o" , /* o */ + "8/2 \" %04x", /* x */ }; -static const char add_first[] ALIGN1 = "\"%07_Ax\n\""; +static void add_format(dumper_t *dumper, const char *fmt) +{ + char fmtbuf[sizeof("\"%07_ax\"" "%s\"" "\"\n\"") + 16]; + sprintf(fmtbuf, "\"%%07_ax\"" "%s\"" "\"\n\"", fmt); + bb_dump_add(dumper, "\"%07_Ax\n\""); + bb_dump_add(dumper, fmtbuf); +} static const char hexdump_opts[] ALIGN1 = "bcdoxCe:f:n:s:v"; @@ -104,8 +110,7 @@ int hexdump_main(int argc, char **argv) if (!p) bb_show_usage(); if ((p - hexdump_opts) < 5) { - bb_dump_add(dumper, add_first); - bb_dump_add(dumper, add_strings[(int)(p - hexdump_opts)]); + add_format(dumper, add_strings[(int)(p - hexdump_opts)]); } /* Save a little bit of space below by omitting the 'else's. */ if (ch == 'C') { @@ -139,8 +144,7 @@ int hexdump_main(int argc, char **argv) } if (!dumper->fshead) { - bb_dump_add(dumper, add_first); - bb_dump_add(dumper, "\"%07_ax\"8/2 \" %04x\"\"\n\""); + add_format(dumper, "8/2 \" %04x"); } argv += optind; -- cgit v1.2.3-55-g6feb From 5dcc443dba039b305a510c01883e9f34e42656ae Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 26 May 2023 19:36:58 +0200 Subject: awk: fix use-after-realloc (CVE-2021-42380), closes 15601 Signed-off-by: Denys Vlasenko --- editors/awk.c | 26 +++++++++++++++++++------ testsuite/awk.tests | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 6 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 728ee8685..2af823808 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -555,7 +555,7 @@ struct globals { const char *g_progname; int g_lineno; int nfields; - int maxfields; /* used in fsrealloc() only */ + unsigned maxfields; var *Fields; char *g_pos; char g_saved_ch; @@ -1931,9 +1931,9 @@ static void fsrealloc(int size) { int i, newsize; - if (size >= maxfields) { - /* Sanity cap, easier than catering for overflows */ - if (size > 0xffffff) + if ((unsigned)size >= maxfields) { + /* Sanity cap, easier than catering for over/underflows */ + if ((unsigned)size > 0xffffff) bb_die_memory_exhausted(); i = maxfields; @@ -2891,6 +2891,7 @@ static var *evaluate(node *op, var *res) uint32_t opinfo; int opn; node *op1; + var *old_Fields_ptr; opinfo = op->info; opn = (opinfo & OPNMASK); @@ -2899,10 +2900,16 @@ static var *evaluate(node *op, var *res) debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn); /* execute inevitable things */ + old_Fields_ptr = NULL; if (opinfo & OF_RES1) { if ((opinfo & OF_REQUIRED) && !op1) syntax_error(EMSG_TOO_FEW_ARGS); L.v = evaluate(op1, TMPVAR0); + /* Does L.v point to $n variable? */ + if ((size_t)(L.v - Fields) < maxfields) { + /* yes, remember where Fields[] is */ + old_Fields_ptr = Fields; + } if (opinfo & OF_STR1) { L.s = getvar_s(L.v); debug_printf_eval("L.s:'%s'\n", L.s); @@ -2921,8 +2928,15 @@ static var *evaluate(node *op, var *res) */ if (opinfo & OF_RES2) { R.v = evaluate(op->r.n, TMPVAR1); - //TODO: L.v may be invalid now, set L.v to NULL to catch bugs? - //L.v = NULL; + /* Seen in $5=$$5=$0: + * Evaluation of R.v ($$5=$0 expression) + * made L.v ($5) invalid. It's detected here. + */ + if (old_Fields_ptr) { + //if (old_Fields_ptr != Fields) + // debug_printf_eval("L.v moved\n"); + L.v += Fields - old_Fields_ptr; + } if (opinfo & OF_STR2) { R.s = getvar_s(R.v); debug_printf_eval("R.s:'%s'\n", R.s); diff --git a/testsuite/awk.tests b/testsuite/awk.tests index bbf0fbff1..ddc51047b 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests @@ -485,4 +485,59 @@ testing 'awk assign while test' \ "" \ "foo" +# User-supplied bug (SEGV) example, was causing use-after-realloc +testing 'awk assign while assign' \ + "awk '\$5=\$\$5=\$0'; echo \$?" \ + "\ +─ process timing ────────────────────────────────────┬─ ─ process timing ────────────────────────────────────┬─ overall results ────┐ results ────┐ +β”‚ run time : β”‚ run time : 0 days, 0 hrs, 0 min, 56 sec β”‚ cycles done : 0 β”‚ days, 0 hrs, 0 min, 56 sec β”‚ cycles done : 0 β”‚ +β”‚ last new find β”‚ last new find : 0 days, 0 hrs, 0 min, 1 sec β”‚ corpus count : 208 β”‚ 0 days, 0 hrs, 0 min, 1 sec β”‚ corpus count : 208 β”‚ +β”‚last saved crash : β”‚last saved crash : none seen yet β”‚saved crashes : 0 β”‚ seen yet β”‚saved crashes : 0 β”‚ +β”‚ last saved hang β”‚ last saved hang : none seen yet β”‚ saved hangs : 0 β”‚ none seen yet β”‚ saved hangs : 0 β”‚ +β”œβ”€ cycle progress ─────────────────────┬─ β”œβ”€ cycle progress ─────────────────────┬─ map coverage┴─────────────────────── coverage┴─────────────────────── +β”‚ now processing : β”‚ now processing : 184.1 (88.5%) β”‚ map density : 0.30% / 0.52% β”‚ (88.5%) β”‚ map density : 0.30% / 0.52% β”‚ β”‚ now processing : 184.1 (88.5%) β”‚ map density : 0.30% / 0.52% β”‚ +β”‚ runs timed out β”‚ runs timed out : 0 (0.00%) β”‚ count coverage : 2.18 bits/tuple β”‚ 0 (0.00%) β”‚ count coverage : 2.18 bits/tuple β”‚ +β”œβ”€ stage progress ─────────────────────┼─ β”œβ”€ stage progress ─────────────────────┼─ findings in depth ────────────────── in depth ────────────────── +β”‚ now trying : β”‚ now trying : havoc β”‚ favored items : 43 (20.67%) β”‚ β”‚ favored items : 43 (20.67%) β”‚ +β”‚ stage execs : β”‚ stage execs : 11.2k/131k (8.51%) β”‚ new edges on : 52 (25.00%) β”‚ (8.51%) β”‚ new edges on β”‚ stage execs : 11.2k/131k (8.51%) β”‚ new edges on : 52 (25.00%) β”‚ 52 (25.00%) β”‚ +β”‚ total execs : β”‚ total execs : 179k β”‚ total crashes : 0 (0 saved) β”‚ β”‚ total crashes : 0 (0 saved) β”‚ β”‚ total execs : 179k β”‚ total crashes : 0 (0 saved) β”‚ +β”‚ exec speed : β”‚ exec speed : 3143/sec β”‚ total tmouts : 0 (0 saved) β”‚ β”‚ total tmouts : 0 (0 saved) β”‚ β”‚ exec speed : 3143/sec β”‚ total tmouts : 0 (0 saved) β”‚ +β”œβ”€ fuzzing strategy yields β”œβ”€ fuzzing strategy yields ────────────┴─────────────┬─ item geometry ──────── item geometry ──────── +β”‚ bit flips : β”‚ bit flips : 11/648, 4/638, 5/618 β”‚ levels : 4 β”‚ 4/638, 5/618 β”‚ levels : β”‚ bit flips : 11/648, 4/638, 5/618 β”‚ levels : 4 β”‚ β”‚ +β”‚ byte flips : β”‚ byte flips : 0/81, 0/71, 0/52 β”‚ pending : 199 β”‚ 0/71, 0/52 β”‚ pending : 199 β”‚ +β”‚ arithmetics : 11/4494, β”‚ arithmetics : 11/4494, 0/1153, 0/0 β”‚ pend fav : 35 β”‚ 0/0 β”‚ pend fav : 35 β”‚ +β”‚ known ints : 1/448, 0/1986, 0/2288 β”‚ own finds : 207 β”‚ known ints : β”‚ known ints : 1/448, 0/1986, 0/2288 β”‚ own finds : 207 β”‚ 0/1986, 0/2288 β”‚ own finds : 207 β”‚ +β”‚ dictionary : 0/0, β”‚ dictionary : 0/0, 0/0, 0/0, 0/0 β”‚ imported : 0 β”‚ 0/0, 0/0 β”‚ imported : 0 β”‚ +β”‚havoc/splice : 142/146k, 23/7616 β”‚havoc/splice : 142/146k, 23/7616 β”‚ stability : 100.00% β”‚ stability : 100.00% β”‚ +β”‚py/custom/rq : unused, unused, β”‚py/custom/rq : unused, unused, unused, unused β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ unused β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +β”‚ trim/eff : 57.02%/26, β”‚ trim/eff : 57.02%/26, 0.00% β”‚ [cpu000:100%] β”‚ [cpu000:100%] +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜^C β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜^C +0 +" \ + "" \ + "\ +─ process timing ────────────────────────────────────┬─ overall results ────┐ +β”‚ run time : 0 days, 0 hrs, 0 min, 56 sec β”‚ cycles done : 0 β”‚ +β”‚ last new find : 0 days, 0 hrs, 0 min, 1 sec β”‚ corpus count : 208 β”‚ +β”‚last saved crash : none seen yet β”‚saved crashes : 0 β”‚ +β”‚ last saved hang : none seen yet β”‚ saved hangs : 0 β”‚ +β”œβ”€ cycle progress ─────────────────────┬─ map coverage┴─────────────────────── +β”‚ now processing : 184.1 (88.5%) β”‚ map density : 0.30% / 0.52% β”‚ +β”‚ runs timed out : 0 (0.00%) β”‚ count coverage : 2.18 bits/tuple β”‚ +β”œβ”€ stage progress ─────────────────────┼─ findings in depth ────────────────── +β”‚ now trying : havoc β”‚ favored items : 43 (20.67%) β”‚ +β”‚ stage execs : 11.2k/131k (8.51%) β”‚ new edges on : 52 (25.00%) β”‚ +β”‚ total execs : 179k β”‚ total crashes : 0 (0 saved) β”‚ +β”‚ exec speed : 3143/sec β”‚ total tmouts : 0 (0 saved) β”‚ +β”œβ”€ fuzzing strategy yields ────────────┴─────────────┬─ item geometry ──────── +β”‚ bit flips : 11/648, 4/638, 5/618 β”‚ levels : 4 β”‚ +β”‚ byte flips : 0/81, 0/71, 0/52 β”‚ pending : 199 β”‚ +β”‚ arithmetics : 11/4494, 0/1153, 0/0 β”‚ pend fav : 35 β”‚ +β”‚ known ints : 1/448, 0/1986, 0/2288 β”‚ own finds : 207 β”‚ +β”‚ dictionary : 0/0, 0/0, 0/0, 0/0 β”‚ imported : 0 β”‚ +β”‚havoc/splice : 142/146k, 23/7616 β”‚ stability : 100.00% β”‚ +β”‚py/custom/rq : unused, unused, unused, unused β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +β”‚ trim/eff : 57.02%/26, 0.00% β”‚ [cpu000:100%] +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜^C" + exit $FAILCOUNT -- cgit v1.2.3-55-g6feb From 6d9427420bab4ef756444fc8800dbf56d7dacf7d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 26 May 2023 20:17:04 +0200 Subject: od: -l,I,L indeed depend on sizeof(long), fix this function old new delta .rodata 105255 105252 -3 od_main 1917 1901 -16 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-19) Total: -19 bytes Signed-off-by: Denys Vlasenko --- coreutils/od.c | 21 +++++++++++++-------- coreutils/od_bloaty.c | 10 +++++++--- testsuite/od.tests | 37 +++++++++++++++++++------------------ 3 files changed, 39 insertions(+), 29 deletions(-) diff --git a/coreutils/od.c b/coreutils/od.c index 3684e4ed3..a7b1ba444 100644 --- a/coreutils/od.c +++ b/coreutils/od.c @@ -170,12 +170,17 @@ static const char *const add_strings[] ALIGN_PTR = { "4/4 \" %15.7e\"" "\"\n\"", /* 7: f */ "4/4 \" %08x\"" "\"\n\"", /* 8: H, X */ "8/2 \" %04x\"" "\"\n\"", /* 9: h, x */ - /* This probably also depends on word width of the arch (what is "long"?) */ - /* should be "2/8" or "4/4" depending on sizeof(long)? */ - "2/8 \" %20lld\"" "\"\n\"", /* 10: I, L, l */ - "4/4 \" %11d\"" "\"\n\"", /* 11: i */ - "4/4 \" %011o\"" "\"\n\"", /* 12: O */ - "8/2 \" %6d\"" "\"\n\"", /* 13: s */ + "4/4 \" %11d\"" "\"\n\"", /* 10: i */ + "4/4 \" %011o\"" "\"\n\"", /* 11: O */ + "8/2 \" %6d\"" "\"\n\"", /* 12: s */ + /* -I,L,l: depend on word width of the arch (what is "long"?) */ +#if ULONG_MAX > 0xffffffff + "2/8 \" %20lld\"" "\"\n\"", /* 13: I, L, l */ +#define L_ 13 +#else + /* 32-bit arch: -I,L,l are the same as -i */ +#define L_ 10 +#endif }; static const char od_opts[] ALIGN1 = "aBbcDdeFfHhIiLlOoXxsv"; @@ -183,8 +188,8 @@ static const char od_opts[] ALIGN1 = "aBbcDdeFfHhIiLlOoXxsv"; static const char od_o2si[] ALIGN1 = { 0, 1, 2, 3, 5, /* aBbcD */ 4, 6, 6, 7, 8, /* deFfH */ - 9, 10, 11, 10, 10, /* hIiLl */ - 12, 1, 8, 9, 13 /* OoXxs */ + 9, L_, 10, L_, L_, /* hIiLl */ + 11, 1, 8, 9, 12 /* OoXxs */ }; int od_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; diff --git a/coreutils/od_bloaty.c b/coreutils/od_bloaty.c index 2782adbf6..e886a4ed2 100644 --- a/coreutils/od_bloaty.c +++ b/coreutils/od_bloaty.c @@ -1257,11 +1257,15 @@ int od_main(int argc UNUSED_PARAM, char **argv) if (opt & OPT_f) decode_format_string("fF"); if (opt & (OPT_h|OPT_x)) decode_format_string("x2"); if (opt & (OPT_H|OPT_X)) decode_format_string("xI"); + /* -I,L,l: depend on word width of the arch (what is "long"?) */ +#if ULONG_MAX > 0xffffffff if (opt & OPT_i) decode_format_string("dI"); + if (opt & (OPT_I|OPT_l|OPT_L)) decode_format_string("dL"); +#else + /* 32-bit arch: -I,L,l are the same as -i */ + if (opt & (OPT_i|OPT_I|OPT_l|OPT_L)) decode_format_string("dI"); +#endif if (opt & OPT_j) n_bytes_to_skip = xstrtooff_sfx(str_j, 0, bkm_suffixes); - /* This probably also depends on word width of the arch (what is "long"?) */ - /* should be "d4" or "d8" depending on sizeof(long)? */ - if (opt & (OPT_I|OPT_l|OPT_L)) decode_format_string("d8"); if (opt & (OPT_o|OPT_B)) decode_format_string("o2"); if (opt & OPT_O) decode_format_string("oI"); while (lst_t) { diff --git a/testsuite/od.tests b/testsuite/od.tests index fce66efbb..4f245a7e8 100755 --- a/testsuite/od.tests +++ b/testsuite/od.tests @@ -8,11 +8,11 @@ input="$(printf '\001\002\003\nABC\xfe')" -le=false -{ printf '\0\1' | od -s | grep -q 256; } && le=true -readonly le +little_endian=false +{ printf '\0\1' | od -s | grep -q 256; } && little_endian=true +readonly little_endian -$le || SKIP=1 +$little_endian || SKIP=1 testing "od (little-endian)" \ "od" \ "\ @@ -70,7 +70,7 @@ testing "od -B" \ "" "$input" SKIP= -$le || SKIP=1 +$little_endian || SKIP=1 testing "od -o (little-endian)" \ "od -o" \ "\ @@ -98,7 +98,7 @@ testing "od -c" \ "" "$input" SKIP= -$le || SKIP=1 +$little_endian || SKIP=1 testing "od -d (little-endian)" \ "od -d" \ "\ @@ -108,7 +108,7 @@ testing "od -d (little-endian)" \ "" "$input" SKIP= -$le || SKIP=1 +$little_endian || SKIP=1 testing "od -D (little-endian)" \ "od -D" \ "\ @@ -119,7 +119,7 @@ testing "od -D (little-endian)" \ SKIP= optional !DESKTOP #DESKTOP: unrecognized option: e -$le || SKIP=1 +$little_endian || SKIP=1 testing "od -e (!DESKTOP little-endian)" \ "od -e" \ "\ @@ -130,7 +130,7 @@ testing "od -e (!DESKTOP little-endian)" \ SKIP= optional !DESKTOP #DESKTOP: unrecognized option: F -$le || SKIP=1 +$little_endian || SKIP=1 testing "od -F (!DESKTOP little-endian)" \ "od -F" \ "\ @@ -140,7 +140,7 @@ testing "od -F (!DESKTOP little-endian)" \ "" "$input" SKIP= -$le || SKIP=1 +$little_endian || SKIP=1 testing "od -f (little-endian)" \ "od -f" \ "\ @@ -150,7 +150,7 @@ testing "od -f (little-endian)" \ "" "$input" SKIP= -$le || SKIP=1 +$little_endian || SKIP=1 testing "od -H (little-endian)" \ "od -H" \ "\ @@ -160,7 +160,7 @@ testing "od -H (little-endian)" \ "" "$input" SKIP= -$le || SKIP=1 +$little_endian || SKIP=1 testing "od -X (little-endian)" \ "od -X" \ "\ @@ -170,7 +170,7 @@ testing "od -X (little-endian)" \ "" "$input" SKIP= -$le || SKIP=1 +$little_endian || SKIP=1 testing "od -h (little-endian)" \ "od -h" \ "\ @@ -180,7 +180,7 @@ testing "od -h (little-endian)" \ "" "$input" SKIP= -$le || SKIP=1 +$little_endian || SKIP=1 testing "od -x (little-endian)" \ "od -x" \ "\ @@ -190,7 +190,7 @@ testing "od -x (little-endian)" \ "" "$input" SKIP= -$le || SKIP=1 +$little_endian || SKIP=1 testing "od -i (little-endian)" \ "od -i" \ "\ @@ -200,7 +200,7 @@ testing "od -i (little-endian)" \ "" "$input" SKIP= -$le || SKIP=1 +$little_endian || SKIP=1 testing "od -O (little-endian)" \ "od -O" \ "\ @@ -210,8 +210,9 @@ testing "od -O (little-endian)" \ "" "$input" SKIP= -# This probably also depends on word width of the arch (what is "long"?) -$le || SKIP=1 +# 32-bit? +printf '00000000' | od -l | grep -q '808464432 *808464432' && SKIP=1 #yes, skip +$little_endian || SKIP=1 testing "od -I (little-endian)" \ "od -I" \ "\ -- cgit v1.2.3-55-g6feb From 9225f9684fedd5c997fc729fee29f8ac402b8db9 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 27 May 2023 14:51:46 +0200 Subject: libbb/dump: make xxd_displayoff member conditional on xxd With xxd not selected: function old new delta display 1459 1444 -15 Signed-off-by: Denys Vlasenko --- include/dump.h | 8 ++++---- libbb/dump.c | 12 ++++++++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/include/dump.h b/include/dump.h index 8fb92f07b..c6763a64d 100644 --- a/include/dump.h +++ b/include/dump.h @@ -33,14 +33,14 @@ typedef struct dumper_t { int dump_length; /* max bytes to read */ smallint dump_vflag; /*enum dump_vflag_t*/ FS *fshead; -#if ENABLE_XXD - const char *xxd_eofstring; -#endif #if ENABLE_OD const char *od_eofstring; #endif - off_t address; /* address/offset in stream */ +#if ENABLE_XXD + const char *xxd_eofstring; long long xxd_displayoff; +#endif + off_t address; /* address/offset in stream */ } dumper_t; dumper_t* alloc_dumper(void) FAST_FUNC; diff --git a/libbb/dump.c b/libbb/dump.c index 49340b5a5..70f15c9bd 100644 --- a/libbb/dump.c +++ b/libbb/dump.c @@ -590,7 +590,11 @@ static NOINLINE void display(priv_dumper_t* dumper) } switch (pr->flags) { case F_ADDRESS: - printf(pr->fmt, (unsigned long long) dumper->pub.address + dumper->pub.xxd_displayoff); + printf(pr->fmt, (unsigned long long) dumper->pub.address +#if ENABLE_XXD + + dumper->pub.xxd_displayoff +#endif + ); break; case F_BPAD: printf(pr->fmt, ""); @@ -699,7 +703,11 @@ static NOINLINE void display(priv_dumper_t* dumper) for (pr = dumper->endfu->nextpr; pr; pr = pr->nextpr) { switch (pr->flags) { case F_ADDRESS: - printf(pr->fmt, (unsigned long long) dumper->eaddress + dumper->pub.xxd_displayoff); + printf(pr->fmt, (unsigned long long) dumper->eaddress +#if ENABLE_XXD + + dumper->pub.xxd_displayoff +#endif + ); break; case F_TEXT: fputs_stdout(pr->fmt); -- cgit v1.2.3-55-g6feb From 84ff1825dd82e8de45020e3def34d1430d8e5a99 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 27 May 2023 16:16:58 +0200 Subject: awk: fix splitting with default FS function old new delta awk_split 543 544 +1 Signed-off-by: Denys Vlasenko --- editors/awk.c | 13 ++++++++----- testsuite/awk.tests | 7 +++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 2af823808..b3748b502 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -2049,13 +2049,17 @@ static int awk_split(const char *s, node *spl, char **slist) } return n; } - /* space split */ + /* space split: "In the special case that FS is a single space, + * fields are separated by runs of spaces and/or tabs and/or newlines" + */ while (*s) { - s = skip_whitespace(s); + /* s = skip_whitespace(s); -- WRONG (also skips \v \f \r) */ + while (*s == ' ' || *s == '\t' || *s == '\n') + s++; if (!*s) break; n++; - while (*s && !isspace(*s)) + while (*s && !(*s == ' ' || *s == '\t' || *s == '\n')) *s1++ = *s++; *s1++ = '\0'; } @@ -2304,7 +2308,6 @@ static int awk_getline(rstream *rsm, var *v) setvar_i(intvar[ERRNO], errno); } b[p] = '\0'; - } while (p > pp); if (p == 0) { @@ -3145,7 +3148,7 @@ static var *evaluate(node *op, var *res) /* make sure that we never return a temp var */ if (L.v == TMPVAR0) L.v = res; - /* if source is a temporary string, jusk relink it to dest */ + /* if source is a temporary string, just relink it to dest */ if (R.v == TMPVAR1 && !(R.v->type & VF_NUMBER) /* Why check !NUMBER? if R.v is a number but has cached R.v->string, diff --git a/testsuite/awk.tests b/testsuite/awk.tests index ddc51047b..8ab1c6891 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests @@ -540,4 +540,11 @@ testing 'awk assign while assign' \ β”‚ trim/eff : 57.02%/26, 0.00% β”‚ [cpu000:100%] β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜^C" +# If field separator FS=' ' (default), fields are split only on +# space or tab or linefeed, NOT other whitespace. +testing 'awk does not split on CR (char 13)' \ + "awk '{ \$1=\$0; print }'" \ + 'word1 word2 word3\r word2 word3\r\n' \ + '' 'word1 word2 word3\r' + exit $FAILCOUNT -- cgit v1.2.3-55-g6feb From 528808bcd25f7d237874dc82fad2adcddf354b42 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 27 May 2023 18:05:42 +0200 Subject: awk: get rid of one indirection level for iF (input file structure) function old new delta try_to_assign - 91 +91 next_input_file 214 216 +2 awk_main 827 826 -1 evaluate 3403 3396 -7 is_assignment 91 - -91 ------------------------------------------------------------------------------ (add/remove: 1/1 grow/shrink: 1/2 up/down: 93/-99) Total: -6 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 78 +++++++++++++++++++++++++++++++---------------------------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index b3748b502..22f52417d 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -546,7 +546,6 @@ struct globals { chain beginseq, mainseq, endseq; chain *seq; node *break_ptr, *continue_ptr; - rstream *iF; xhash *ahash; /* argument names, used only while parsing function bodies */ xhash *fnhash; /* function names, used only in parsing stage */ xhash *vhash; /* variables and arrays */ @@ -579,11 +578,12 @@ struct globals2 { var *intvar[NUM_INTERNAL_VARS]; /* often used */ + rstream iF; + /* former statics from various functions */ char *split_f0__fstrings; - rstream next_input_file__rsm; - smallint next_input_file__files_happen; + smallint next_input_file__input_file_seen; smalluint exitcode; @@ -618,7 +618,6 @@ struct globals2 { #define seq (G1.seq ) #define break_ptr (G1.break_ptr ) #define continue_ptr (G1.continue_ptr) -#define iF (G1.iF ) #define ahash (G1.ahash ) #define fnhash (G1.fnhash ) #define vhash (G1.vhash ) @@ -644,6 +643,7 @@ struct globals2 { #define t_string (G.t_string ) #define t_lineno (G.t_lineno ) #define intvar (G.intvar ) +#define iF (G.iF ) #define fsplitter (G.fsplitter ) #define rsplitter (G.rsplitter ) #define g_buf (G.g_buf ) @@ -2799,7 +2799,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) /* if expr looks like "var=value", perform assignment and return 1, * otherwise return 0 */ -static int is_assignment(const char *expr) +static int try_to_assign(const char *expr) { char *exprc, *val; @@ -2819,39 +2819,44 @@ static int is_assignment(const char *expr) } /* switch to next input file */ -static rstream *next_input_file(void) +static int next_input_file(void) { -#define rsm (G.next_input_file__rsm) -#define files_happen (G.next_input_file__files_happen) - - const char *fname, *ind; +#define input_file_seen (G.next_input_file__input_file_seen) + const char *fname; - if (rsm.F) - fclose(rsm.F); - rsm.F = NULL; - rsm.pos = rsm.adv = 0; + if (iF.F) { + fclose(iF.F); + iF.F = NULL; + iF.pos = iF.adv = 0; + } for (;;) { + const char *ind; + if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) { - if (files_happen) - return NULL; + if (input_file_seen) + return FALSE; fname = "-"; - rsm.F = stdin; + iF.F = stdin; break; } ind = getvar_s(incvar(intvar[ARGIND])); fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); - if (fname && *fname && !is_assignment(fname)) { - rsm.F = xfopen_stdin(fname); + if (fname && *fname) { + /* "If a filename on the command line has the form + * var=val it is treated as a variable assignment" + */ + if (try_to_assign(fname)) + continue; + iF.F = xfopen_stdin(fname); break; } } - files_happen = TRUE; setvar_s(intvar[FILENAME], fname); - return &rsm; -#undef rsm -#undef files_happen + input_file_seen = TRUE; + return TRUE; +#undef input_file_seen } /* @@ -3231,12 +3236,12 @@ static var *evaluate(node *op, var *res) } } } else { - if (!iF) - iF = next_input_file(); - rsm = iF; + if (!iF.F) + next_input_file(); + rsm = &iF; } - if (!rsm || !rsm->F) { + if (!rsm->F) { setvar_i(intvar[ERRNO], errno); setvar_i(res, -1); break; @@ -3659,7 +3664,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) setvar_s(intvar[FS], opt_F); } while (list_v) { - if (!is_assignment(llist_pop(&list_v))) + if (!try_to_assign(llist_pop(&list_v))) bb_show_usage(); } @@ -3718,15 +3723,14 @@ int awk_main(int argc UNUSED_PARAM, char **argv) awk_exit(); /* input file could already be opened in BEGIN block */ - if (!iF) - iF = next_input_file(); - - /* passing through input files */ - while (iF) { + if (!iF.F) + goto next_file; /* no, it wasn't, go try opening */ + /* Iterate over input files */ + for (;;) { nextfile = FALSE; setvar_i(intvar[FNR], 0); - while ((i = awk_getline(iF, intvar[F0])) > 0) { + while ((i = awk_getline(&iF, intvar[F0])) > 0) { nextrec = FALSE; incvar(intvar[NR]); incvar(intvar[FNR]); @@ -3735,11 +3739,11 @@ int awk_main(int argc UNUSED_PARAM, char **argv) if (nextfile) break; } - if (i < 0) syntax_error(strerror(errno)); - - iF = next_input_file(); + next_file: + if (!next_input_file()) + break; } awk_exit(); -- cgit v1.2.3-55-g6feb From 5c8a9dfd976493e4351abadf6686b621763b564c Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 27 May 2023 18:21:38 +0200 Subject: awk: remove a local variable "caching" a struct member Since we take its address, the variable lives on stack (not a GPR). Thus, nothing is improved by caching it. function old new delta awk_getline 642 639 -3 Signed-off-by: Denys Vlasenko --- editors/awk.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 22f52417d..4a0eb9281 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -2236,7 +2236,7 @@ static int awk_getline(rstream *rsm, var *v) { char *b; regmatch_t pmatch[1]; - int size, a, p, pp = 0; + int a, p, pp = 0; int fd, so, eo, r, rp; char c, *m, *s; @@ -2249,12 +2249,11 @@ static int awk_getline(rstream *rsm, var *v) m = rsm->buffer; a = rsm->adv; p = rsm->pos; - size = rsm->size; c = (char) rsplitter.n.info; rp = 0; if (!m) - m = qrealloc(m, 256, &size); + m = qrealloc(m, 256, &rsm->size); do { b = m + a; @@ -2298,10 +2297,10 @@ static int awk_getline(rstream *rsm, var *v) a = 0; } - m = qrealloc(m, a+p+128, &size); + m = qrealloc(m, a+p+128, &rsm->size); b = m + a; pp = p; - p += safe_read(fd, b+p, size-p-1); + p += safe_read(fd, b+p, rsm->size - p - 1); if (p < pp) { p = 0; r = 0; @@ -2325,7 +2324,6 @@ static int awk_getline(rstream *rsm, var *v) rsm->buffer = m; rsm->adv = a + eo; rsm->pos = p - eo; - rsm->size = size; debug_printf_eval("returning from %s(): %d\n", __func__, r); -- cgit v1.2.3-55-g6feb From 21dce1c3c3d74a60959b6d8b0c76f38d463b8187 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 27 May 2023 19:11:28 +0200 Subject: awk: do not read ARGIND, only set it (gawk compat) function old new delta next_input_file 216 243 +27 evaluate 3396 3402 +6 awk_main 826 829 +3 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 3/0 up/down: 36/0) Total: 36 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 4a0eb9281..77e0b0aab 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -583,6 +583,7 @@ struct globals2 { /* former statics from various functions */ char *split_f0__fstrings; + unsigned next_input_file__argind; smallint next_input_file__input_file_seen; smalluint exitcode; @@ -2820,6 +2821,7 @@ static int try_to_assign(const char *expr) static int next_input_file(void) { #define input_file_seen (G.next_input_file__input_file_seen) +#define argind (G.next_input_file__argind) const char *fname; if (iF.F) { @@ -2829,17 +2831,22 @@ static int next_input_file(void) } for (;;) { - const char *ind; - - if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) { + /* GNU Awk 5.1.1 does not _read_ ARGIND (but does read ARGC). + * It only sets ARGIND to 1, 2, 3... for every command-line filename + * (VAR=VAL params cause a gap in numbering). + * If there are none and stdin is used, then ARGIND is not modified: + * if it is set by e.g. 'BEGIN { ARGIND="foo" }', that value will + * still be there. + */ + argind++; + if (argind >= getvar_i(intvar[ARGC])) { if (input_file_seen) return FALSE; fname = "-"; iF.F = stdin; break; } - ind = getvar_s(incvar(intvar[ARGIND])); - fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); + fname = getvar_s(findvar(iamarray(intvar[ARGV]), utoa(argind))); if (fname && *fname) { /* "If a filename on the command line has the form * var=val it is treated as a variable assignment" @@ -2847,6 +2854,7 @@ static int next_input_file(void) if (try_to_assign(fname)) continue; iF.F = xfopen_stdin(fname); + setvar_i(intvar[ARGIND], argind); break; } } @@ -2854,6 +2862,7 @@ static int next_input_file(void) setvar_s(intvar[FILENAME], fname); input_file_seen = TRUE; return TRUE; +#undef argind #undef input_file_seen } -- cgit v1.2.3-55-g6feb From 9790eb73c8b7ed1a1015f58e1fe85de0508c0526 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 28 May 2023 13:32:07 +0200 Subject: libbb/dump: code shrink function old new delta .rodata 105252 105246 -6 Signed-off-by: Denys Vlasenko --- libbb/dump.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/libbb/dump.c b/libbb/dump.c index 70f15c9bd..b406a2428 100644 --- a/libbb/dump.c +++ b/libbb/dump.c @@ -47,8 +47,10 @@ typedef struct priv_dumper_t { static const char dot_flags_width_chars[] ALIGN1 = ".#-+ 0123456789"; static const char size_conv_str[] ALIGN1 = -"\x1\x4\x4\x4\x4\x4\x4\x8\x8\x8\x8\010cdiouxXeEfgG"; - +"\x1\x4\x4\x4\x4\x4\x4\x8\x8\x8\x8\x8""cdiouxXeEfgG"; +/* c d i o u x X e E f g G - bytes contain 'bcnt' for the type */ +#define SCS_OFS 12 +#define float_convs (size_conv_str + SCS_OFS + sizeof("cdiouxX")-1) static const char int_convs[] ALIGN1 = "diouxX"; dumper_t* FAST_FUNC alloc_dumper(void) @@ -88,7 +90,7 @@ static NOINLINE int bb_dump_size(FS *fs) while (isdigit(*++fmt)) continue; } - p = strchr(size_conv_str + 12, *fmt); + p = strchr(size_conv_str + SCS_OFS, *fmt); if (!p) { if (*fmt == 's') { bcnt += prec; @@ -100,7 +102,7 @@ static NOINLINE int bb_dump_size(FS *fs) } } } else { - bcnt += p[-12]; + bcnt += p[-SCS_OFS]; } } cur_size += bcnt * fu->reps; @@ -204,7 +206,7 @@ static NOINLINE void rewrite(priv_dumper_t *dumper, FS *fs) if (strchr(int_convs, *p1)) { /* %d etc */ goto DO_INT_CONV; } else - if (strchr("eEfgG", *p1)) { /* floating point */ + if (strchr(float_convs, *p1)) { /* floating point */ pr->flags = F_DBL; byte_count_str = "\010\004"; goto DO_BYTE_COUNT; -- cgit v1.2.3-55-g6feb From b76b420b5da1aadad823faf12327b610614f5951 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 28 May 2023 17:25:56 +0200 Subject: awk: fix closing of non-opened file function old new delta setvar_ERRNO - 53 +53 .rodata 105252 105246 -6 awk_getline 639 620 -19 evaluate 3402 3377 -25 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 0/3 up/down: 53/-50) Total: 3 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 77e0b0aab..83a08aa95 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -1006,6 +1006,11 @@ static var *setvar_i(var *v, double value) return v; } +static void setvar_ERRNO(void) +{ + setvar_i(intvar[ERRNO], errno); +} + static const char *getvar_s(var *v) { /* if v is numeric and has no cached string, convert it to string */ @@ -2305,7 +2310,7 @@ static int awk_getline(rstream *rsm, var *v) if (p < pp) { p = 0; r = 0; - setvar_i(intvar[ERRNO], errno); + setvar_ERRNO(); } b[p] = '\0'; } while (p > pp); @@ -3249,7 +3254,7 @@ static var *evaluate(node *op, var *res) } if (!rsm->F) { - setvar_i(intvar[ERRNO], errno); + setvar_ERRNO(); setvar_i(res, -1); break; } @@ -3388,16 +3393,18 @@ static var *evaluate(node *op, var *res) */ if (rsm->F) err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F); -//TODO: fix this case: -// $ awk 'BEGIN { print close(""); print ERRNO }' -// -1 -// close of redirection that was never opened -// (we print 0, 0) free(rsm->buffer); hash_remove(fdhash, L.s); + } else { + err = -1; + /* gawk 'BEGIN { print close(""); print ERRNO }' + * -1 + * close of redirection that was never opened + */ + errno = ENOENT; } if (err) - setvar_i(intvar[ERRNO], errno); + setvar_ERRNO(); R_d = (double)err; break; } -- cgit v1.2.3-55-g6feb From 05e60007d42b8e4005085a22e122ef70bf888fa5 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 28 May 2023 17:51:59 +0200 Subject: awk: code shrink function old new delta awk_getline 620 591 -29 Signed-off-by: Denys Vlasenko --- editors/awk.c | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 83a08aa95..eb419e063 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -2242,9 +2242,9 @@ static int awk_getline(rstream *rsm, var *v) { char *b; regmatch_t pmatch[1]; - int a, p, pp = 0; - int fd, so, eo, r, rp; - char c, *m, *s; + int p, pp; + int fd, so, eo, retval, rp; + char *m, *s; debug_printf_eval("entered %s()\n", __func__); @@ -2253,22 +2253,22 @@ static int awk_getline(rstream *rsm, var *v) */ fd = fileno(rsm->F); m = rsm->buffer; - a = rsm->adv; - p = rsm->pos; - c = (char) rsplitter.n.info; - rp = 0; - if (!m) m = qrealloc(m, 256, &rsm->size); + p = rsm->pos; + rp = 0; + pp = 0; do { - b = m + a; + b = m + rsm->adv; so = eo = p; - r = 1; + retval = 1; if (p > 0) { + char c = (char) rsplitter.n.info; if (rsplitter.n.info == TI_REGEXP) { if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re, - b, 1, pmatch, 0) == 0) { + b, 1, pmatch, 0) == 0 + ) { so = pmatch[0].rm_so; eo = pmatch[0].rm_eo; if (b[eo] != '\0') @@ -2297,43 +2297,44 @@ static int awk_getline(rstream *rsm, var *v) } } - if (a > 0) { - memmove(m, m+a, p+1); + if (rsm->adv > 0) { + memmove(m, m+rsm->adv, p+1); b = m; - a = 0; + rsm->adv = 0; } - m = qrealloc(m, a+p+128, &rsm->size); - b = m + a; + b = m = qrealloc(m, p+128, &rsm->size); pp = p; p += safe_read(fd, b+p, rsm->size - p - 1); if (p < pp) { p = 0; - r = 0; + retval = 0; setvar_ERRNO(); } b[p] = '\0'; } while (p > pp); if (p == 0) { - r--; + retval--; } else { - c = b[so]; b[so] = '\0'; + char c = b[so]; + b[so] = '\0'; setvar_s(v, b+rp); v->type |= VF_USER; b[so] = c; - c = b[eo]; b[eo] = '\0'; + c = b[eo]; + b[eo] = '\0'; setvar_s(intvar[RT], b+so); b[eo] = c; } rsm->buffer = m; - rsm->adv = a + eo; + rsm->adv += eo; rsm->pos = p - eo; - debug_printf_eval("returning from %s(): %d\n", __func__, r); + debug_printf_eval("returning from %s(): %d\n", __func__, retval); - return r; + return retval; } /* formatted output into an allocated buffer, return ptr to buffer */ -- cgit v1.2.3-55-g6feb From 4d7339204f9f823f592562d9903db3ae79a6c640 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 28 May 2023 18:00:51 +0200 Subject: awk: shrink - use setvar_sn() to set variables from non-NUL terminated strings function old new delta setvar_sn - 39 +39 exec_builtin 1145 1136 -9 awk_getline 591 559 -32 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 0/2 up/down: 39/-41) Total: -2 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index eb419e063..b5774a339 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -979,6 +979,11 @@ static var *setvar_s(var *v, const char *value) return setvar_p(v, (value && *value) ? xstrdup(value) : NULL); } +static var *setvar_sn(var *v, const char *value, int len) +{ + return setvar_p(v, (value && *value && len > 0) ? xstrndup(value, len) : NULL); +} + /* same as setvar_s but sets USER flag */ static var *setvar_u(var *v, const char *value) { @@ -2317,15 +2322,9 @@ static int awk_getline(rstream *rsm, var *v) if (p == 0) { retval--; } else { - char c = b[so]; - b[so] = '\0'; - setvar_s(v, b+rp); + setvar_sn(v, b+rp, so-rp); v->type |= VF_USER; - b[so] = c; - c = b[eo]; - b[eo] = '\0'; - setvar_s(intvar[RT], b+so); - b[eo] = c; + setvar_sn(intvar[RT], b+so, eo-so); } rsm->buffer = m; @@ -2677,8 +2676,6 @@ static NOINLINE var *exec_builtin(node *op, var *res) } case B_ss: { - char *s; - l = strlen(as[0]); i = getvar_i(av[1]) - 1; if (i > l) @@ -2688,8 +2685,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) n = (nargs > 2) ? getvar_i(av[2]) : l-i; if (n < 0) n = 0; - s = xstrndup(as[0]+i, n); - setvar_p(res, s); + setvar_sn(res, as[0]+i, n); break; } @@ -2766,8 +2762,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) i = strftime(g_buf, MAXVARFMT, ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"), localtime(&tt)); - g_buf[i] = '\0'; - setvar_s(res, g_buf); + setvar_sn(res, g_buf, i); break; case B_mt: -- cgit v1.2.3-55-g6feb From 721bf6eaf4739a2865b071b38d3478f334234d26 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 29 May 2023 10:55:40 +0200 Subject: awk: printf(INVALID_FMT) prints it verbatim function old new delta awk_printf 628 640 +12 Signed-off-by: Denys Vlasenko --- editors/awk.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index b5774a339..c49ad6e02 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -2389,7 +2389,7 @@ static char *awk_printf(node *n, size_t *len) while (1) { if (isalpha(c)) break; - if (c == '*') + if (c == '*') /* gawk supports %*d and %*.*f, we don't... */ syntax_error("%*x formats are not supported"); c = *++f; if (!c) { /* "....%...." and no letter found after % */ @@ -2422,12 +2422,18 @@ static char *awk_printf(node *n, size_t *len) double d = getvar_i(arg); if (strchr("diouxX", c)) { //TODO: make it wider here (%x -> %llx etc)? +//Can even print the value into a temp string with %.0f, +//then replace diouxX with s and print that string. +//This will correctly print even very large numbers, +//but some replacements are not equivalent: +//%09d -> %09s: breaks zero-padding; +//%+d -> %+s: won't prepend +; etc s = xasprintf(s, (int)d); } else if (strchr("eEfFgGaA", c)) { s = xasprintf(s, d); } else { -//TODO: GNU Awk 5.0.1: printf "%W" prints "%W", does not error out - syntax_error(EMSG_INV_FMT); + /* gawk 5.1.1 printf("%W") prints "%W", does not error out */ + s = xstrndup(s, f - s); } } slen = strlen(s); -- cgit v1.2.3-55-g6feb From fe0b7985483a93d3416e0e5c9e761b6ee1ba310b Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 29 May 2023 14:47:10 +0200 Subject: tunctl: code shrink function old new delta .rodata 105246 105243 -3 tunctl_main 349 344 -5 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-8) Total: -8 bytes Signed-off-by: Denys Vlasenko --- networking/tunctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/networking/tunctl.c b/networking/tunctl.c index 97e6917aa..28571ae7f 100644 --- a/networking/tunctl.c +++ b/networking/tunctl.c @@ -124,8 +124,7 @@ int tunctl_main(int argc UNUSED_PARAM, char **argv) if (opts & OPT_b) { puts(ifr.ifr_name); } else { - printf("Set '%s' %spersistent", ifr.ifr_name, ""); - printf(" and owned by uid %ld", user); + printf("Set '%s' persistent and owned by uid %ld", ifr.ifr_name, user); if (group != -1) printf(" gid %ld", group); bb_putchar('\n'); -- cgit v1.2.3-55-g6feb From 0256e00a9d077588bd3a39f5a1ef7e2eaa2911e4 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 30 May 2023 16:42:18 +0200 Subject: awk: fix precedence of = relative to == Discovered while adding code to disallow assignments to non-lvalues function old new delta parse_expr 936 991 +55 .rodata 105243 105247 +4 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/0 up/down: 59/0) Total: 59 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 66 ++++++++++++++++++++++++++++++++++++----------------- testsuite/awk.tests | 5 ++++ 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index c49ad6e02..0f062dcdb 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -337,7 +337,9 @@ static void debug_parse_print_tc(uint32_t n) #undef P #undef PRIMASK #undef PRIMASK2 -#define P(x) (x << 24) +/* Smaller 'x' means _higher_ operator precedence */ +#define PRECEDENCE(x) (x << 24) +#define P(x) PRECEDENCE(x) #define PRIMASK 0x7F000000 #define PRIMASK2 0x7E000000 @@ -360,7 +362,7 @@ enum { OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100, OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400, OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700, - OC_DONE = 0x2800, + OC_CONST = 0x2800, OC_DONE = 0x2900, ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200, ST_WHILE = 0x3300 @@ -440,9 +442,9 @@ static const uint32_t tokeninfo[] ALIGN4 = { #define TI_PREINC (OC_UNARY|xV|P(9)|'P') #define TI_PREDEC (OC_UNARY|xV|P(9)|'M') TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5), - OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-', - OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&', - OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', + OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(38), OC_REPLACE|NV|P(38)|'+', OC_REPLACE|NV|P(38)|'-', + OC_REPLACE|NV|P(38)|'*', OC_REPLACE|NV|P(38)|'/', OC_REPLACE|NV|P(38)|'%', OC_REPLACE|NV|P(38)|'&', + OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(38)|'&', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, #define TI_LESS (OC_COMPARE|VV|P(39)|2) @@ -1301,7 +1303,7 @@ static uint32_t next_token(uint32_t expected) save_tclass = tc; save_info = t_info; tc = TC_BINOPX; - t_info = OC_CONCAT | SS | P(35); + t_info = OC_CONCAT | SS | PRECEDENCE(35); } t_tclass = tc; @@ -1361,9 +1363,8 @@ static node *parse_expr(uint32_t term_tc) { node sn; node *cn = &sn; - node *vn, *glptr; + node *glptr; uint32_t tc, expected_tc; - var *v; debug_printf_parse("%s() term_tc(%x):", __func__, term_tc); debug_parse_print_tc(term_tc); @@ -1374,11 +1375,12 @@ static node *parse_expr(uint32_t term_tc) expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc; while (!((tc = next_token(expected_tc)) & term_tc)) { + node *vn; if (glptr && (t_info == TI_LESS)) { /* input redirection (<) attached to glptr node */ debug_printf_parse("%s: input redir\n", __func__); - cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); + cn = glptr->l.n = new_node(OC_CONCAT | SS | PRECEDENCE(37)); cn->a.n = glptr; expected_tc = TS_OPERAND | TS_UOPPRE; glptr = NULL; @@ -1390,24 +1392,42 @@ static node *parse_expr(uint32_t term_tc) * previous operators with higher priority */ vn = cn; while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2)) - || ((t_info == vn->info) && t_info == TI_COLON) + || (t_info == vn->info && t_info == TI_COLON) ) { vn = vn->a.n; if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN); } if (t_info == TI_TERNARY) //TODO: why? - t_info += P(6); + t_info += PRECEDENCE(6); cn = vn->a.n->r.n = new_node(t_info); cn->a.n = vn->a.n; if (tc & TS_BINOP) { cn->l.n = vn; -//FIXME: this is the place to detect and reject assignments to non-lvalues. -//Currently we allow "assignments" to consts and temporaries, nonsense like this: -// awk 'BEGIN { "qwe" = 1 }' -// awk 'BEGIN { 7 *= 7 }' -// awk 'BEGIN { length("qwe") = 1 }' -// awk 'BEGIN { (1+1) += 3 }' + + /* Prevent: + * awk 'BEGIN { "qwe" = 1 }' + * awk 'BEGIN { 7 *= 7 }' + * awk 'BEGIN { length("qwe") = 1 }' + * awk 'BEGIN { (1+1) += 3 }' + */ + /* Assignment? (including *= and friends) */ + if (((t_info & OPCLSMASK) == OC_MOVE) + || ((t_info & OPCLSMASK) == OC_REPLACE) + ) { + debug_printf_parse("%s: MOVE/REPLACE vn->info:%08x\n", __func__, vn->info); + /* Left side is a (variable or array element) + * or function argument + * or $FIELD ? + */ + if ((vn->info & OPCLSMASK) != OC_VAR + && (vn->info & OPCLSMASK) != OC_FNARG + && (vn->info & OPCLSMASK) != OC_FIELD + ) { + syntax_error(EMSG_UNEXP_TOKEN); /* no. bad */ + } + } + expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; if (t_info == TI_PGETLINE) { /* it's a pipe */ @@ -1443,6 +1463,8 @@ static node *parse_expr(uint32_t term_tc) /* one should be very careful with switch on tclass - * only simple tclasses should be used (TC_xyz, not TS_xyz) */ switch (tc) { + var *v; + case TC_VARIABLE: case TC_ARRAY: debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__); @@ -1463,14 +1485,14 @@ static node *parse_expr(uint32_t term_tc) case TC_NUMBER: case TC_STRING: debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__); - cn->info = OC_VAR; + cn->info = OC_CONST; v = cn->l.v = xzalloc(sizeof(var)); - if (tc & TC_NUMBER) + if (tc & TC_NUMBER) { setvar_i(v, t_double); - else { + } else { setvar_s(v, t_string); - expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */ } + expected_tc &= ~TC_UOPPOST; /* NUM++, "str"++ not allowed */ break; case TC_REGEXP: @@ -3124,6 +3146,8 @@ static var *evaluate(node *op, var *res) /* -- recursive node type -- */ + case XC( OC_CONST ): + debug_printf_eval("CONST "); case XC( OC_VAR ): debug_printf_eval("VAR\n"); L.v = op->l.v; diff --git a/testsuite/awk.tests b/testsuite/awk.tests index 8ab1c6891..cdab93d21 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests @@ -547,4 +547,9 @@ testing 'awk does not split on CR (char 13)' \ 'word1 word2 word3\r word2 word3\r\n' \ '' 'word1 word2 word3\r' +testing "awk = has higher precedence than == (despite what gawk manpage claims)" \ + "awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \ + '0\n1\n2\n1\n3\n' \ + '' '' + exit $FAILCOUNT -- cgit v1.2.3-55-g6feb From 5f84c5633663f6ee8c9cc3a4608b86d4b56b39d6 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 3 Jun 2023 00:39:33 +0200 Subject: awk: fix backslash handling in sub() builtins function old new delta awk_sub 559 544 -15 Signed-off-by: Denys Vlasenko --- editors/awk.c | 41 +++++++++++++++++++---------------------- testsuite/awk.tests | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 22 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 0f062dcdb..f77573806 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -2492,7 +2492,7 @@ static char *awk_printf(node *n, size_t *len) * store result into (dest), return number of substitutions. * If nm = 0, replace all matches. * If src or dst is NULL, use $0. - * If subexp != 0, enable subexpression matching (\1-\9). + * If subexp != 0, enable subexpression matching (\0-\9). */ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp) { @@ -2520,35 +2520,32 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int residx += eo; if (++match_no >= nm) { const char *s; - int nbs; + int bslash; /* replace */ residx -= (eo - so); - nbs = 0; + bslash = 0; for (s = repl; *s; s++) { - char c = resbuf[residx++] = *s; - if (c == '\\') { - nbs++; - continue; + char c = *s; + if (c == '\\' && s[1]) { + bslash ^= 1; + if (bslash) + continue; } - if (c == '&' || (subexp && c >= '0' && c <= '9')) { - int j; - residx -= ((nbs + 3) >> 1); - j = 0; + if ((!bslash && c == '&') + || (subexp && bslash && c >= '0' && c <= '9') + ) { + int n, j = 0; if (c != '&') { j = c - '0'; - nbs++; } - if (nbs % 2) { - resbuf[residx++] = c; - } else { - int n = pmatch[j].rm_eo - pmatch[j].rm_so; - resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize); - memcpy(resbuf + residx, sp + pmatch[j].rm_so, n); - residx += n; - } - } - nbs = 0; + n = pmatch[j].rm_eo - pmatch[j].rm_so; + resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize); + memcpy(resbuf + residx, sp + pmatch[j].rm_so, n); + residx += n; + } else + resbuf[residx++] = c; + bslash = 0; } } diff --git a/testsuite/awk.tests b/testsuite/awk.tests index cdab93d21..c61d32947 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests @@ -552,4 +552,51 @@ testing "awk = has higher precedence than == (despite what gawk manpage claims)" '0\n1\n2\n1\n3\n' \ '' '' +sq="'" +testing 'awk gensub backslashes \' \ + 'awk '$sq'BEGIN { s="\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ + 's=\\ +\\|\\ +' \ + '' '' +testing 'awk gensub backslashes \\' \ + 'awk '$sq'BEGIN { s="\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ + 's=\\\\ +\\|\\ +' \ + '' '' +# gawk 5.1.1 handles trailing unpaired \ inconsistently. +# If replace string is single \, it is used verbatim, +# but if it is \\\ (three slashes), gawk uses "\" (!!!), not "\\" as you would expect. +testing 'awk gensub backslashes \\\' \ + 'awk '$sq'BEGIN { s="\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ + 's=\\\\\\ +\\\\|\\\\ +' \ + '' '' +testing 'awk gensub backslashes \\\\' \ + 'awk '$sq'BEGIN { s="\\\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ + 's=\\\\\\\\ +\\\\|\\\\ +' \ + '' '' +testing 'awk gensub backslashes \&' \ + 'awk '$sq'BEGIN { s="\\&"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ + 's=\\& +&|& +' \ + '' '' +testing 'awk gensub backslashes \0' \ + 'awk '$sq'BEGIN { s="\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ + 's=\\0 +a|a +' \ + '' '' +testing 'awk gensub backslashes \\0' \ + 'awk '$sq'BEGIN { s="\\\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ + 's=\\\\0 +\\0|\\0 +' \ + '' '' + exit $FAILCOUNT -- cgit v1.2.3-55-g6feb From f4789164e0716a8b1f98cf4149a3eb2dad485b8b Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 6 Jun 2023 12:48:11 +0200 Subject: awk: code shrink function old new delta awk_sub 544 548 +4 exec_builtin 1136 1130 -6 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 4/-6) Total: -2 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index f77573806..b3871ffc5 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -2494,7 +2494,7 @@ static char *awk_printf(node *n, size_t *len) * If src or dst is NULL, use $0. * If subexp != 0, enable subexpression matching (\0-\9). */ -static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp) +static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,int subexp*/) { char *resbuf; const char *sp; @@ -2502,6 +2502,8 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int int regexec_flags; regmatch_t pmatch[10]; regex_t sreg, *regex; + /* True only if called to implement gensub(): */ + int subexp = (src != dest); resbuf = NULL; residx = 0; @@ -2549,7 +2551,6 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int } } - regexec_flags = REG_NOTBOL; sp += eo; if (match_no == nm) break; @@ -2570,6 +2571,7 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int sp++; residx++; } + regexec_flags = REG_NOTBOL; } resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize); @@ -2798,16 +2800,16 @@ static NOINLINE var *exec_builtin(node *op, var *res) res = do_match(an[1], as[0]); break; - case B_ge: - awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE); + case B_ge: /* gensub(regex, repl, matchnum, string) */ + awk_sub(an[0], as[1], /*matchnum:*/getvar_i(av[2]), /*src:*/av[3], /*dst:*/res/*, TRUE*/); break; - case B_gs: - setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE)); + case B_gs: /* gsub(regex, repl, string) */ + setvar_i(res, awk_sub(an[0], as[1], /*matchnum:all*/0, /*src:*/av[2], /*dst:*/av[2]/*, FALSE*/)); break; - case B_su: - setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE)); + case B_su: /* sub(regex, repl, string) */ + setvar_i(res, awk_sub(an[0], as[1], /*matchnum:first*/1, /*src:*/av[2], /*dst:*/av[2]/*, FALSE*/)); break; } -- cgit v1.2.3-55-g6feb From 113685fbcd4c3432ec9b640583d50ba8da2102e8 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 7 Jun 2023 10:54:34 +0200 Subject: awk: fix SEGV on read error in -f PROGFILE function old new delta awk_main 829 843 +14 Signed-off-by: Denys Vlasenko --- editors/awk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index b3871ffc5..df9b7fdc9 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -3609,8 +3609,6 @@ static var *evaluate(node *op, var *res) #undef sreg } -/* -------- main & co. -------- */ - static int awk_exit(void) { unsigned i; @@ -3717,6 +3715,8 @@ int awk_main(int argc UNUSED_PARAM, char **argv) g_progname = llist_pop(&list_f); fd = xopen_stdin(g_progname); s = xmalloc_read(fd, NULL); /* it's NUL-terminated */ + if (!s) + bb_perror_msg_and_die("read error from '%s'", g_progname); close(fd); parse_program(s); free(s); -- cgit v1.2.3-55-g6feb From 2ca39ffd447ca874fcea933194829717d5573247 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 8 Jun 2023 10:42:39 +0200 Subject: awk: fix subst code to handle "start of word" pattern correctly (needs REG_STARTEND) function old new delta awk_sub 637 714 +77 Signed-off-by: Denys Vlasenko --- editors/awk.c | 49 ++++++++++++++++++++++++++++++++++++------------- testsuite/awk.tests | 28 +++++++++++++++------------- 2 files changed, 51 insertions(+), 26 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index df9b7fdc9..171f0a7ea 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -2504,17 +2504,46 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in regex_t sreg, *regex; /* True only if called to implement gensub(): */ int subexp = (src != dest); - +#if defined(REG_STARTEND) + const char *src_string; + size_t src_strlen; + regexec_flags = REG_STARTEND; +#else + regexec_flags = 0; +#endif resbuf = NULL; residx = 0; match_no = 0; - regexec_flags = 0; regex = as_regex(rn, &sreg); sp = getvar_s(src ? src : intvar[F0]); +#if defined(REG_STARTEND) + src_string = sp; + src_strlen = strlen(src_string); +#endif replen = strlen(repl); - while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) { - int so = pmatch[0].rm_so; - int eo = pmatch[0].rm_eo; + for (;;) { + int so, eo; + +#if defined(REG_STARTEND) +// REG_STARTEND: "This flag is a BSD extension, not present in POSIX" + size_t start_ofs = sp - src_string; + pmatch[0].rm_so = start_ofs; + pmatch[0].rm_eo = src_strlen; + if (regexec(regex, src_string, 10, pmatch, regexec_flags) != 0) + break; + eo = pmatch[0].rm_eo - start_ofs; + so = pmatch[0].rm_so - start_ofs; +#else +// BUG: +// gsub(/\" (!!!), not "\\" as you would expect. @@ -572,31 +570,35 @@ testing 'awk gensub backslashes \\\' \ 'awk '$sq'BEGIN { s="\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ 's=\\\\\\ \\\\|\\\\ -' \ - '' '' +' '' '' testing 'awk gensub backslashes \\\\' \ 'awk '$sq'BEGIN { s="\\\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ 's=\\\\\\\\ \\\\|\\\\ -' \ - '' '' +' '' '' testing 'awk gensub backslashes \&' \ 'awk '$sq'BEGIN { s="\\&"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ 's=\\& &|& -' \ - '' '' +' '' '' testing 'awk gensub backslashes \0' \ 'awk '$sq'BEGIN { s="\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ 's=\\0 a|a -' \ - '' '' +' '' '' testing 'awk gensub backslashes \\0' \ 'awk '$sq'BEGIN { s="\\\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ 's=\\\\0 \\0|\\0 -' \ +' '' '' + +# The "b" in "abc" should not match