diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2018-04-06 16:44:50 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2018-04-06 16:44:50 +0200 |
commit | d1d6d9c5d8022bcd8f2e7fbd470d293f73adae58 (patch) | |
tree | e31548816f5b868a09e21e39a4beb03451a0d693 | |
parent | 00bd76728d44901a260f2dcdbeed52b3c85d6b6b (diff) | |
download | busybox-w32-d1d6d9c5d8022bcd8f2e7fbd470d293f73adae58.tar.gz busybox-w32-d1d6d9c5d8022bcd8f2e7fbd470d293f73adae58.tar.bz2 busybox-w32-d1d6d9c5d8022bcd8f2e7fbd470d293f73adae58.zip |
sort: smaller and more agressive FEATURE_SORT_OPTIMIZE_MEMORY
function old new delta
sort_main 1098 1037 -61
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | coreutils/sort.c | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/coreutils/sort.c b/coreutils/sort.c index 9909a44a8..4d741e76d 100644 --- a/coreutils/sort.c +++ b/coreutils/sort.c | |||
@@ -18,11 +18,11 @@ | |||
18 | //config: sort is used to sort lines of text in specified files. | 18 | //config: sort is used to sort lines of text in specified files. |
19 | //config: | 19 | //config: |
20 | //config:config FEATURE_SORT_BIG | 20 | //config:config FEATURE_SORT_BIG |
21 | //config: bool "Full SuSv3 compliant sort (support -ktcbdfiozgM)" | 21 | //config: bool "Full SuSv3 compliant sort (support -ktcbdfiogM)" |
22 | //config: default y | 22 | //config: default y |
23 | //config: depends on SORT | 23 | //config: depends on SORT |
24 | //config: help | 24 | //config: help |
25 | //config: Without this, sort only supports -r, -u, -s, and an integer version | 25 | //config: Without this, sort only supports -rusz, and an integer version |
26 | //config: of -n. Selecting this adds sort keys, floating point support, and | 26 | //config: of -n. Selecting this adds sort keys, floating point support, and |
27 | //config: more. This adds a little over 3k to a nonstatic build on x86. | 27 | //config: more. This adds a little over 3k to a nonstatic build on x86. |
28 | //config: | 28 | //config: |
@@ -66,12 +66,10 @@ | |||
66 | //usage: "\n -r Reverse sort order" | 66 | //usage: "\n -r Reverse sort order" |
67 | //usage: "\n -s Stable (don't sort ties alphabetically)" | 67 | //usage: "\n -s Stable (don't sort ties alphabetically)" |
68 | //usage: "\n -u Suppress duplicate lines" | 68 | //usage: "\n -u Suppress duplicate lines" |
69 | //usage: IF_FEATURE_SORT_BIG( | ||
70 | //usage: "\n -z Lines are terminated by NUL, not newline" | 69 | //usage: "\n -z Lines are terminated by NUL, not newline" |
71 | ///////: "\n -m Ignored for GNU compatibility" | 70 | ///////: "\n -m Ignored for GNU compatibility" |
72 | ///////: "\n -S BUFSZ Ignored for GNU compatibility" | 71 | ///////: "\n -S BUFSZ Ignored for GNU compatibility" |
73 | ///////: "\n -T TMPDIR Ignored for GNU compatibility" | 72 | ///////: "\n -T TMPDIR Ignored for GNU compatibility" |
74 | //usage: ) | ||
75 | //usage: | 73 | //usage: |
76 | //usage:#define sort_example_usage | 74 | //usage:#define sort_example_usage |
77 | //usage: "$ echo -e \"e\\nf\\nb\\nd\\nc\\na\" | sort\n" | 75 | //usage: "$ echo -e \"e\\nf\\nb\\nd\\nc\\na\" | sort\n" |
@@ -413,6 +411,7 @@ int sort_main(int argc UNUSED_PARAM, char **argv) | |||
413 | #if ENABLE_FEATURE_SORT_OPTIMIZE_MEMORY | 411 | #if ENABLE_FEATURE_SORT_OPTIMIZE_MEMORY |
414 | bool can_drop_dups; | 412 | bool can_drop_dups; |
415 | size_t prev_len = 0; | 413 | size_t prev_len = 0; |
414 | char *prev_line = (char*) ""; | ||
416 | /* Postpone optimizing if the input is small, < 16k lines: | 415 | /* Postpone optimizing if the input is small, < 16k lines: |
417 | * even just free()ing duplicate lines takes time. | 416 | * even just free()ing duplicate lines takes time. |
418 | */ | 417 | */ |
@@ -533,32 +532,33 @@ int sort_main(int argc UNUSED_PARAM, char **argv) | |||
533 | if (count_to_optimize_dups == 0) { | 532 | if (count_to_optimize_dups == 0) { |
534 | size_t len; | 533 | size_t len; |
535 | char *new_line; | 534 | char *new_line; |
536 | char first = *line; | ||
537 | 535 | ||
538 | /* On kernel/linux/arch/ *.[ch] files, | 536 | /* On kernel/linux/arch/ *.[ch] files, |
539 | * this reduces memory usage by 6%. | 537 | * this reduces memory usage by 6%. |
540 | * yes | head -99999999 | sort | 538 | * yes | head -99999999 | sort |
541 | * goes down from 1900Mb to 380 Mb. | 539 | * goes down from 1900Mb to 380 Mb. |
542 | */ | 540 | */ |
543 | if (first == '\0' || first == '\n') { | ||
544 | len = !(first == '\0'); | ||
545 | new_line = (char*)"\n" + 1 - len; | ||
546 | goto replace; | ||
547 | } | ||
548 | len = strlen(line); | 541 | len = strlen(line); |
549 | if (len <= prev_len) { | 542 | if (len <= prev_len) { |
550 | new_line = lines[linecount-1] + (prev_len - len); | 543 | new_line = prev_line + (prev_len - len); |
551 | if (strcmp(line, new_line) == 0) { | 544 | if (strcmp(line, new_line) == 0) { |
545 | /* it's a tail of the prev line */ | ||
552 | if (can_drop_dups && prev_len == len) { | 546 | if (can_drop_dups && prev_len == len) { |
547 | /* it's identical to prev line */ | ||
553 | free(line); | 548 | free(line); |
554 | continue; | 549 | continue; |
555 | } | 550 | } |
556 | replace: | ||
557 | free(line); | 551 | free(line); |
558 | line = new_line; | 552 | line = new_line; |
553 | /* continue using longer prev_line | ||
554 | * for future tail tests. | ||
555 | */ | ||
556 | goto skip; | ||
559 | } | 557 | } |
560 | } | 558 | } |
561 | prev_len = len; | 559 | prev_len = len; |
560 | prev_line = line; | ||
561 | skip: ; | ||
562 | } | 562 | } |
563 | #else | 563 | #else |
564 | //TODO: lighter version which only drops total dups if can_drop_dups == true | 564 | //TODO: lighter version which only drops total dups if can_drop_dups == true |