aboutsummaryrefslogtreecommitdiff
path: root/coreutils
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2018-04-04 17:02:32 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2018-04-04 17:03:41 +0200
commitc29c2e60d8677cdec142a88609a3d040e4719439 (patch)
tree9d23487088699fb4f1b4aabd9361c4a7c7d21edf /coreutils
parentee1fd1246e7d25990531dbbb2d267605b8914d5d (diff)
downloadbusybox-w32-c29c2e60d8677cdec142a88609a3d040e4719439.tar.gz
busybox-w32-c29c2e60d8677cdec142a88609a3d040e4719439.tar.bz2
busybox-w32-c29c2e60d8677cdec142a88609a3d040e4719439.zip
sort: FEATURE_SORT_OPTIMIZE_MEMORY
On sorting all kernel/linux/arch/ *.[ch] files, this reduces memory usage by 6%. yes | head -99999999 | sort goes down from 1900Mb to 380 Mb. function old new delta sort_main 862 1098 +236 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'coreutils')
-rw-r--r--coreutils/sort.c86
1 files changed, 80 insertions, 6 deletions
diff --git a/coreutils/sort.c b/coreutils/sort.c
index b39297a26..b8bb6871d 100644
--- a/coreutils/sort.c
+++ b/coreutils/sort.c
@@ -18,16 +18,24 @@
18//config: sort is used to sort lines of text in specified files. 18//config: sort is used to sort lines of text in specified files.
19//config: 19//config:
20//config:config FEATURE_SORT_BIG 20//config:config FEATURE_SORT_BIG
21//config: bool "Full SuSv3 compliant sort (support -ktcsbdfiozgM)" 21//config: bool "Full SuSv3 compliant sort (support -ktcbdfiozgM)"
22//config: default y 22//config: default y
23//config: depends on SORT 23//config: depends on SORT
24//config: help 24//config: help
25//config: Without this, sort only supports -r, -u, and an integer version 25//config: Without this, sort only supports -r, -u, -s, and an integer version
26//config: of -n. Selecting this adds sort keys, floating point support, and 26//config: of -n. Selecting this adds sort keys, floating point support, and
27//config: more. This adds a little over 3k to a nonstatic build on x86. 27//config: more. This adds a little over 3k to a nonstatic build on x86.
28//config: 28//config:
29//config: The SuSv3 sort standard is available at: 29//config: The SuSv3 sort standard is available at:
30//config: http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html 30//config: http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html
31//config:
32//config:config FEATURE_SORT_OPTIMIZE_MEMORY
33//config: bool "Use less memory (but might be slower)"
34//config: default n # defaults to N since we are size-paranoid tribe
35//config: depends on SORT
36//config: help
37//config: Attempt to use less memory (by storing only one copy
38//config: of duplicated lines, and such). Useful if you work on huge files.
31 39
32//applet:IF_SORT(APPLET_NOEXEC(sort, sort, BB_DIR_USR_BIN, BB_SUID_DROP, sort)) 40//applet:IF_SORT(APPLET_NOEXEC(sort, sort, BB_DIR_USR_BIN, BB_SUID_DROP, sort))
33 41
@@ -46,19 +54,17 @@
46//usage: "\n -f Ignore case" 54//usage: "\n -f Ignore case"
47//usage: "\n -i Ignore unprintable characters" 55//usage: "\n -i Ignore unprintable characters"
48//usage: "\n -d Dictionary order (blank or alphanumeric only)" 56//usage: "\n -d Dictionary order (blank or alphanumeric only)"
49//usage: "\n -g General numerical sort"
50//usage: "\n -M Sort month"
51//usage: ) 57//usage: )
52//-h, --human-numeric-sort: compare human readable numbers (e.g., 2K 1G) 58//-h, --human-numeric-sort: compare human readable numbers (e.g., 2K 1G)
53//usage: "\n -n Sort numbers" 59//usage: "\n -n Sort numbers"
54//usage: IF_FEATURE_SORT_BIG( 60//usage: IF_FEATURE_SORT_BIG(
61//usage: "\n -g General numerical sort"
62//usage: "\n -M Sort month"
55//usage: "\n -t CHAR Field separator" 63//usage: "\n -t CHAR Field separator"
56//usage: "\n -k N[,M] Sort by Nth field" 64//usage: "\n -k N[,M] Sort by Nth field"
57//usage: ) 65//usage: )
58//usage: "\n -r Reverse sort order" 66//usage: "\n -r Reverse sort order"
59//usage: IF_FEATURE_SORT_BIG(
60//usage: "\n -s Stable (don't sort ties alphabetically)" 67//usage: "\n -s Stable (don't sort ties alphabetically)"
61//usage: )
62//usage: "\n -u Suppress duplicate lines" 68//usage: "\n -u Suppress duplicate lines"
63//usage: IF_FEATURE_SORT_BIG( 69//usage: IF_FEATURE_SORT_BIG(
64//usage: "\n -z Lines are terminated by NUL, not newline" 70//usage: "\n -z Lines are terminated by NUL, not newline"
@@ -404,6 +410,14 @@ int sort_main(int argc UNUSED_PARAM, char **argv)
404 int i; 410 int i;
405 int linecount; 411 int linecount;
406 unsigned opts; 412 unsigned opts;
413#if ENABLE_FEATURE_SORT_OPTIMIZE_MEMORY
414 bool can_drop_dups;
415 size_t prev_len = 0;
416 /* Postpone optimizing if the input is small, < 16k lines:
417 * even just free()ing duplicate lines takes time.
418 */
419 size_t count_to_optimize_dups = 0x3fff;
420#endif
407 421
408 xfunc_error_retval = 2; 422 xfunc_error_retval = 2;
409 423
@@ -412,6 +426,29 @@ int sort_main(int argc UNUSED_PARAM, char **argv)
412 sort_opt_str, 426 sort_opt_str,
413 &str_ignored, &str_ignored, &str_o, &lst_k, &str_t 427 &str_ignored, &str_ignored, &str_o, &lst_k, &str_t
414 ); 428 );
429#if ENABLE_FEATURE_SORT_OPTIMIZE_MEMORY
430 /* Can drop dups only if -u but no "complicating" options,
431 * IOW: if we do a full line compares. Safe options:
432 * -o FILE Output to FILE
433 * -z Lines are terminated by NUL, not newline
434 * -r Reverse sort order
435 * -s Stable (don't sort ties alphabetically)
436 * Not sure about these:
437 * -b Ignore leading blanks
438 * -f Ignore case
439 * -i Ignore unprintable characters
440 * -d Dictionary order (blank or alphanumeric only)
441 * -n Sort numbers
442 * -g General numerical sort
443 * -M Sort month
444 */
445 can_drop_dups = ((opts & ~(FLAG_o|FLAG_z|FLAG_r|FLAG_s)) == FLAG_u);
446 /* Stable sort needs every line to be uniquely allocated,
447 * disable optimization to reuse strings:
448 */
449 if (opts & FLAG_s)
450 count_to_optimize_dups = (size_t)-1L;
451#endif
415 /* global b strips leading and trailing spaces */ 452 /* global b strips leading and trailing spaces */
416 if (opts & FLAG_b) 453 if (opts & FLAG_b)
417 option_mask32 |= FLAG_bb; 454 option_mask32 |= FLAG_bb;
@@ -489,6 +526,41 @@ int sort_main(int argc UNUSED_PARAM, char **argv)
489 char *line = GET_LINE(fp); 526 char *line = GET_LINE(fp);
490 if (!line) 527 if (!line)
491 break; 528 break;
529
530#if ENABLE_FEATURE_SORT_OPTIMIZE_MEMORY
531 if (count_to_optimize_dups != 0)
532 count_to_optimize_dups--;
533 if (count_to_optimize_dups == 0) {
534 size_t len;
535 char *new_line;
536 char first = *line;
537
538 /* On kernel/linux/arch/ *.[ch] files,
539 * this reduces memory usage by 6%.
540 * yes | head -99999999 | sort
541 * goes down from 1900Mb to 380 Mb.
542 */
543 if (first == '\0' || first == '\n') {
544 len = !(first == '\0');
545 new_line = (char*)"\n" + 1 - len;
546 goto replace;
547 }
548 len = strlen(line);
549 if (len <= prev_len) {
550 new_line = lines[linecount-1] + (prev_len - len);
551 if (strcmp(line, new_line) == 0) {
552 if (can_drop_dups && prev_len == len) {
553 free(line);
554 continue;
555 }
556 replace:
557 free(line);
558 line = new_line;
559 }
560 }
561 prev_len = len;
562 }
563#endif
492 lines = xrealloc_vector(lines, 6, linecount); 564 lines = xrealloc_vector(lines, 6, linecount);
493 lines[linecount++] = line; 565 lines[linecount++] = line;
494 } 566 }
@@ -510,6 +582,8 @@ int sort_main(int argc UNUSED_PARAM, char **argv)
510 } 582 }
511 return EXIT_SUCCESS; 583 return EXIT_SUCCESS;
512 } 584 }
585#else
586//TODO: lighter version which only drops total dups if can_drop_dups == true
513#endif 587#endif
514 588
515 /* For stable sort, store original line position beyond terminating NUL */ 589 /* For stable sort, store original line position beyond terminating NUL */