From 610c4c385b38280c7bde7a48d95ec019cbfe1ab4 Mon Sep 17 00:00:00 2001 From: Ron Yorston Date: Wed, 30 Mar 2016 00:42:05 +0200 Subject: applet_tables: save space by removing applet name offsets The array applet_nameofs consumes two bytes per applet. It encodes nofork/noexec flags suid flags the offset of the applet name in the applet_name string Change the applet_table build tool to store the flags in two separate arrays (applet_flags and applet_suid). Replace applet_nameofs[] with a smaller version that only stores a limited number of offsets. This requires changes to the macros APPLET_IS_NOFORK, APPLET_IS_NOEXEC and APPLET_SUID. According to Valgrind the original find_applet_by_name required 353 cycles per call, averaged over all names. Adjusting the number of known offsets allows space to be traded off against execution time: KNOWN_OFFSETS cycles bytes (wrt KNOWN_OFFSETS = 0) 0 9057 - 2 4604 32 4 2407 75 8 1342 98 16 908 130 32 884 194 This patch uses KNOWN_OFFSETS = 8. v2: Remove some dead code from the applet_table tool; Treat the applet in the middle of the table as a special case. v3: Use the middle applet to adjust the start of the linear search as well as the last applet found. v4: Use an augmented linear search in find_applet_by_name. Drop the special treatment of the middle name from get_applet_name: most of the advantage now derives from the last stored value. v5: Don't store index in applet_nameofs, it can be calculated. v6: Tweaks by Denys function old new delta find_applet_by_name 25 125 +100 applet_suid - 92 +92 run_applet_no_and_exit 452 460 +8 run_applet_and_exit 695 697 +2 applet_name_compare 31 - -31 applet_nameofs 734 14 -720 ------------------------------------------------------------------------------ (add/remove: 1/1 grow/shrink: 3/1 up/down: 202/-751) Total: -549 bytes text data bss dec hex filename 925464 906 17160 943530 e65aa busybox_old 924915 906 17160 942981 e6385 busybox_unstripped Signed-off-by: Ron Yorston Signed-off-by: Denys Vlasenko --- libbb/appletlib.c | 82 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 54 insertions(+), 28 deletions(-) (limited to 'libbb/appletlib.c') diff --git a/libbb/appletlib.c b/libbb/appletlib.c index 95e589e74..aeaf238f1 100644 --- a/libbb/appletlib.c +++ b/libbb/appletlib.c @@ -139,36 +139,56 @@ void FAST_FUNC bb_show_usage(void) xfunc_die(); } -#if NUM_APPLETS > 8 -static int applet_name_compare(const void *name, const void *idx) -{ - int i = (int)(ptrdiff_t)idx - 1; - return strcmp(name, APPLET_NAME(i)); -} -#endif int FAST_FUNC find_applet_by_name(const char *name) { -#if NUM_APPLETS > 8 - /* Do a binary search to find the applet entry given the name. */ + unsigned i, max; + int j; const char *p; - p = bsearch(name, (void*)(ptrdiff_t)1, ARRAY_SIZE(applet_main), 1, applet_name_compare); - /* - * if (!p) return -1; - * ^^^^^^^^^^^^^^^^^^ the code below will do this if p == NULL :) - */ - return (int)(ptrdiff_t)p - 1; + + p = applet_names; + i = 0; +#if KNOWN_APPNAME_OFFSETS <= 0 + max = NUM_APPLETS; #else - /* A version which does not pull in bsearch */ - int i = 0; - const char *p = applet_names; - while (i < NUM_APPLETS) { - if (strcmp(name, p) == 0) - return i; - p += strlen(p) + 1; + max = NUM_APPLETS * KNOWN_APPNAME_OFFSETS; + for (j = ARRAY_SIZE(applet_nameofs)-1; j >= 0; j--) { + const char *pp = applet_names + applet_nameofs[j]; + if (strcmp(name, pp) >= 0) { + //bb_error_msg("name:'%s' >= pp:'%s'", name, pp); + p = pp; + i = max - NUM_APPLETS; + break; + } + max -= NUM_APPLETS; + } + max /= (unsigned)KNOWN_APPNAME_OFFSETS; + i /= (unsigned)KNOWN_APPNAME_OFFSETS; + //bb_error_msg("name:'%s' starting from:'%s' i:%u max:%u", name, p, i, max); +#endif + + /* Open-coding without strcmp/strlen calls for speed */ + while (i < max) { + char ch; + j = 0; + /* Do we see "name\0" in applet_names[p] position? */ + while ((ch = *p) == name[j]) { + if (ch == '\0') { + //bb_error_msg("found:'%s' i:%u", name, i); + return i; /* yes */ + } + p++; + j++; + } + /* No. + * p => 1st non-matching char in applet_names[], + * skip to and including NUL. + */ + while (ch != '\0') + ch = *++p; + p++; i++; } return -1; -#endif } @@ -583,6 +603,7 @@ static void install_links(const char *busybox, int use_symbolic_links, * busybox.h::bb_install_loc_t, or else... */ int (*lf)(const char *, const char *); char *fpc; + const char *appname = applet_names; unsigned i; int rc; @@ -593,7 +614,7 @@ static void install_links(const char *busybox, int use_symbolic_links, for (i = 0; i < ARRAY_SIZE(applet_main); i++) { fpc = concat_path_file( custom_install_dir ? custom_install_dir : install_dir[APPLET_INSTALL_LOC(i)], - APPLET_NAME(i)); + appname); // debug: bb_error_msg("%slinking %s to busybox", // use_symbolic_links ? "sym" : "", fpc); rc = lf(busybox, fpc); @@ -601,6 +622,8 @@ static void install_links(const char *busybox, int use_symbolic_links, bb_simple_perror_msg(fpc); } free(fpc); + while (*appname++ != '\0') + continue; } } # else @@ -754,7 +777,7 @@ void FAST_FUNC run_applet_no_and_exit(int applet_no, char **argv) /* Reinit some shared global data */ xfunc_error_retval = EXIT_FAILURE; - applet_name = APPLET_NAME(applet_no); + applet_name = bb_get_last_path_component_nostrip(argv[0]); /* Special case. POSIX says "test --help" * should be no different from e.g. "test --foo". @@ -785,11 +808,14 @@ void FAST_FUNC run_applet_no_and_exit(int applet_no, char **argv) void FAST_FUNC run_applet_and_exit(const char *name, char **argv) { - int applet = find_applet_by_name(name); - if (applet >= 0) - run_applet_no_and_exit(applet, argv); + int applet; + if (is_prefixed_with(name, "busybox")) exit(busybox_main(argv)); + /* find_applet_by_name() search is more expensive, so goes second */ + applet = find_applet_by_name(name); + if (applet >= 0) + run_applet_no_and_exit(applet, argv); } #endif /* !defined(SINGLE_APPLET_MAIN) */ -- cgit v1.2.3-55-g6feb From a93e4fd376d990ead254657228e75715b74ca0ac Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 2 Apr 2016 22:54:23 +0200 Subject: find_applet_by_name: add an example of faster linear search code Signed-off-by: Denys Vlasenko --- libbb/appletlib.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 3 deletions(-) (limited to 'libbb/appletlib.c') diff --git a/libbb/appletlib.c b/libbb/appletlib.c index aeaf238f1..18583f91a 100644 --- a/libbb/appletlib.c +++ b/libbb/appletlib.c @@ -141,10 +141,28 @@ void FAST_FUNC bb_show_usage(void) int FAST_FUNC find_applet_by_name(const char *name) { - unsigned i, max; - int j; + unsigned i, j, max; const char *p; +/* The commented-out word-at-a-time code is ~40% faster, but +160 bytes. + * "Faster" here saves ~0.5 microsecond of real time - not worth it. + */ +#if 0 /*BB_UNALIGNED_MEMACCESS_OK && BB_LITTLE_ENDIAN*/ + uint32_t n32; + + /* Handle all names < 2 chars long early */ + if (name[0] == '\0') + return -1; /* "" is not a valid applet name */ + if (name[1] == '\0') { + if (!ENABLE_TEST) + return -1; /* 1-char name is not valid */ + if (name[0] != ']') + return -1; /* 1-char name which isn't "[" is not valid */ + /* applet "[" is always applet #0: */ + return 0; + } +#endif + p = applet_names; i = 0; #if KNOWN_APPNAME_OFFSETS <= 0 @@ -166,7 +184,62 @@ int FAST_FUNC find_applet_by_name(const char *name) //bb_error_msg("name:'%s' starting from:'%s' i:%u max:%u", name, p, i, max); #endif - /* Open-coding without strcmp/strlen calls for speed */ + /* Open-coded linear seatch without strcmp/strlen calls for speed */ + +#if 0 /*BB_UNALIGNED_MEMACCESS_OK && BB_LITTLE_ENDIAN*/ + /* skip "[\0" name, it's surely not it */ + if (ENABLE_TEST && LONE_CHAR(p, '[')) + i++, p += 2; + /* All remaining applet names in p[] are at least 2 chars long */ + /* name[] is also at least 2 chars long */ + + n32 = (name[0] << 0) | (name[1] << 8) | (name[2] << 16); + while (i < max) { + uint32_t p32; + char ch; + + /* Quickly check match of the first 3 bytes */ + move_from_unaligned32(p32, p); + p += 3; + if ((p32 & 0x00ffffff) != n32) { + /* Most likely case: 3 first bytes do not match */ + i++; + if ((p32 & 0x00ff0000) == '\0') + continue; // p[2] was NUL + p++; + if ((p32 & 0xff000000) == '\0') + continue; // p[3] was NUL + /* p[0..3] aren't matching and none is NUL, check the rest */ + while (*p++ != '\0') + continue; + continue; + } + + /* Unlikely branch: first 3 bytes ([0..2]) match */ + if ((p32 & 0x00ff0000) == '\0') { + /* name is 2-byte long, it is full match */ + //bb_error_msg("found:'%s' i:%u", name, i); + return i; + } + /* Check remaining bytes [3..NUL] */ + ch = (p32 >> 24); + j = 3; + while (ch == name[j]) { + if (ch == '\0') { + //bb_error_msg("found:'%s' i:%u", name, i); + return i; + } + ch = *++p; + j++; + } + /* Not a match. Skip it, including NUL */ + while (ch != '\0') + ch = *++p; + p++; + i++; + } + return -1; +#else while (i < max) { char ch; j = 0; @@ -189,6 +262,7 @@ int FAST_FUNC find_applet_by_name(const char *name) i++; } return -1; +#endif } -- cgit v1.2.3-55-g6feb From 1cf68e303328671f74dfd9f7d24e6c9f91d18969 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 2 Apr 2016 22:57:17 +0200 Subject: typo fix Signed-off-by: Denys Vlasenko --- libbb/appletlib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'libbb/appletlib.c') diff --git a/libbb/appletlib.c b/libbb/appletlib.c index 18583f91a..4b5b09f45 100644 --- a/libbb/appletlib.c +++ b/libbb/appletlib.c @@ -184,7 +184,7 @@ int FAST_FUNC find_applet_by_name(const char *name) //bb_error_msg("name:'%s' starting from:'%s' i:%u max:%u", name, p, i, max); #endif - /* Open-coded linear seatch without strcmp/strlen calls for speed */ + /* Open-coded linear search without strcmp/strlen calls for speed */ #if 0 /*BB_UNALIGNED_MEMACCESS_OK && BB_LITTLE_ENDIAN*/ /* skip "[\0" name, it's surely not it */ -- cgit v1.2.3-55-g6feb From bc14f4d13d3cf1d43ae809d641e29174662cd1e4 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 3 Apr 2016 16:06:42 +0200 Subject: main(): add a TODO about finding a use for _end[] area Signed-off-by: Denys Vlasenko --- libbb/appletlib.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'libbb/appletlib.c') diff --git a/libbb/appletlib.c b/libbb/appletlib.c index 4b5b09f45..d798a2eac 100644 --- a/libbb/appletlib.c +++ b/libbb/appletlib.c @@ -902,6 +902,19 @@ int lbb_main(char **argv) int main(int argc UNUSED_PARAM, char **argv) #endif { +#if 0 + /* TODO: find a use for a block of memory between end of .bss + * and end of page. For example, I'm getting "_end:0x812e698 2408 bytes" + * - more than 2k of wasted memory (in this particular build) + * *per each running process*! + * (If your linker does not generate "_end" name, weak attribute + * makes &_end == NULL, end_len == 0 here.) + */ + extern char _end[] __attribute__((weak)); + unsigned end_len = (-(int)_end) & 0xfff; + printf("_end:%p %u bytes\n", &_end, end_len); +#endif + /* Tweak malloc for reduced memory consumption */ #ifdef M_TRIM_THRESHOLD /* M_TRIM_THRESHOLD is the maximum amount of freed top-most memory -- cgit v1.2.3-55-g6feb From b22061718db0111f9e7474f9b60aef02456ac070 Mon Sep 17 00:00:00 2001 From: Ron Yorston Date: Sun, 3 Apr 2016 22:29:35 +0200 Subject: find_applet_by_name: loop index should be signed The loop for (j = ARRAY_SIZE(applet_nameofs)-1; j >= 0; j--) { was intended to terminate when j goes negative, so j needs to be signed. Signed-off-by: Ron Yorston Signed-off-by: Denys Vlasenko --- libbb/appletlib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'libbb/appletlib.c') diff --git a/libbb/appletlib.c b/libbb/appletlib.c index d798a2eac..de654f64c 100644 --- a/libbb/appletlib.c +++ b/libbb/appletlib.c @@ -141,7 +141,8 @@ void FAST_FUNC bb_show_usage(void) int FAST_FUNC find_applet_by_name(const char *name) { - unsigned i, j, max; + unsigned i, max; + int j; const char *p; /* The commented-out word-at-a-time code is ~40% faster, but +160 bytes. -- cgit v1.2.3-55-g6feb