aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2018-04-06 16:44:50 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2018-04-06 16:44:50 +0200
commitd1d6d9c5d8022bcd8f2e7fbd470d293f73adae58 (patch)
treee31548816f5b868a09e21e39a4beb03451a0d693
parent00bd76728d44901a260f2dcdbeed52b3c85d6b6b (diff)
downloadbusybox-w32-d1d6d9c5d8022bcd8f2e7fbd470d293f73adae58.tar.gz
busybox-w32-d1d6d9c5d8022bcd8f2e7fbd470d293f73adae58.tar.bz2
busybox-w32-d1d6d9c5d8022bcd8f2e7fbd470d293f73adae58.zip
sort: smaller and more agressive FEATURE_SORT_OPTIMIZE_MEMORY
function old new delta sort_main 1098 1037 -61 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--coreutils/sort.c24
1 files changed, 12 insertions, 12 deletions
diff --git a/coreutils/sort.c b/coreutils/sort.c
index 9909a44a8..4d741e76d 100644
--- a/coreutils/sort.c
+++ b/coreutils/sort.c
@@ -18,11 +18,11 @@
18//config: sort is used to sort lines of text in specified files. 18//config: sort is used to sort lines of text in specified files.
19//config: 19//config:
20//config:config FEATURE_SORT_BIG 20//config:config FEATURE_SORT_BIG
21//config: bool "Full SuSv3 compliant sort (support -ktcbdfiozgM)" 21//config: bool "Full SuSv3 compliant sort (support -ktcbdfiogM)"
22//config: default y 22//config: default y
23//config: depends on SORT 23//config: depends on SORT
24//config: help 24//config: help
25//config: Without this, sort only supports -r, -u, -s, and an integer version 25//config: Without this, sort only supports -rusz, and an integer version
26//config: of -n. Selecting this adds sort keys, floating point support, and 26//config: of -n. Selecting this adds sort keys, floating point support, and
27//config: more. This adds a little over 3k to a nonstatic build on x86. 27//config: more. This adds a little over 3k to a nonstatic build on x86.
28//config: 28//config:
@@ -66,12 +66,10 @@
66//usage: "\n -r Reverse sort order" 66//usage: "\n -r Reverse sort order"
67//usage: "\n -s Stable (don't sort ties alphabetically)" 67//usage: "\n -s Stable (don't sort ties alphabetically)"
68//usage: "\n -u Suppress duplicate lines" 68//usage: "\n -u Suppress duplicate lines"
69//usage: IF_FEATURE_SORT_BIG(
70//usage: "\n -z Lines are terminated by NUL, not newline" 69//usage: "\n -z Lines are terminated by NUL, not newline"
71///////: "\n -m Ignored for GNU compatibility" 70///////: "\n -m Ignored for GNU compatibility"
72///////: "\n -S BUFSZ Ignored for GNU compatibility" 71///////: "\n -S BUFSZ Ignored for GNU compatibility"
73///////: "\n -T TMPDIR Ignored for GNU compatibility" 72///////: "\n -T TMPDIR Ignored for GNU compatibility"
74//usage: )
75//usage: 73//usage:
76//usage:#define sort_example_usage 74//usage:#define sort_example_usage
77//usage: "$ echo -e \"e\\nf\\nb\\nd\\nc\\na\" | sort\n" 75//usage: "$ echo -e \"e\\nf\\nb\\nd\\nc\\na\" | sort\n"
@@ -413,6 +411,7 @@ int sort_main(int argc UNUSED_PARAM, char **argv)
413#if ENABLE_FEATURE_SORT_OPTIMIZE_MEMORY 411#if ENABLE_FEATURE_SORT_OPTIMIZE_MEMORY
414 bool can_drop_dups; 412 bool can_drop_dups;
415 size_t prev_len = 0; 413 size_t prev_len = 0;
414 char *prev_line = (char*) "";
416 /* Postpone optimizing if the input is small, < 16k lines: 415 /* Postpone optimizing if the input is small, < 16k lines:
417 * even just free()ing duplicate lines takes time. 416 * even just free()ing duplicate lines takes time.
418 */ 417 */
@@ -533,32 +532,33 @@ int sort_main(int argc UNUSED_PARAM, char **argv)
533 if (count_to_optimize_dups == 0) { 532 if (count_to_optimize_dups == 0) {
534 size_t len; 533 size_t len;
535 char *new_line; 534 char *new_line;
536 char first = *line;
537 535
538 /* On kernel/linux/arch/ *.[ch] files, 536 /* On kernel/linux/arch/ *.[ch] files,
539 * this reduces memory usage by 6%. 537 * this reduces memory usage by 6%.
540 * yes | head -99999999 | sort 538 * yes | head -99999999 | sort
541 * goes down from 1900Mb to 380 Mb. 539 * goes down from 1900Mb to 380 Mb.
542 */ 540 */
543 if (first == '\0' || first == '\n') {
544 len = !(first == '\0');
545 new_line = (char*)"\n" + 1 - len;
546 goto replace;
547 }
548 len = strlen(line); 541 len = strlen(line);
549 if (len <= prev_len) { 542 if (len <= prev_len) {
550 new_line = lines[linecount-1] + (prev_len - len); 543 new_line = prev_line + (prev_len - len);
551 if (strcmp(line, new_line) == 0) { 544 if (strcmp(line, new_line) == 0) {
545 /* it's a tail of the prev line */
552 if (can_drop_dups && prev_len == len) { 546 if (can_drop_dups && prev_len == len) {
547 /* it's identical to prev line */
553 free(line); 548 free(line);
554 continue; 549 continue;
555 } 550 }
556 replace:
557 free(line); 551 free(line);
558 line = new_line; 552 line = new_line;
553 /* continue using longer prev_line
554 * for future tail tests.
555 */
556 goto skip;
559 } 557 }
560 } 558 }
561 prev_len = len; 559 prev_len = len;
560 prev_line = line;
561 skip: ;
562 } 562 }
563#else 563#else
564//TODO: lighter version which only drops total dups if can_drop_dups == true 564//TODO: lighter version which only drops total dups if can_drop_dups == true