aboutsummaryrefslogtreecommitdiff
path: root/libbb
diff options
context:
space:
mode:
authorRon Yorston <rmy@pobox.com>2022-01-06 07:46:38 +0000
committerRon Yorston <rmy@pobox.com>2022-01-06 07:46:38 +0000
commitb8751bbc9ac24e71fbe1e79c69074b4c87a134d8 (patch)
tree336d653df8387b9b1d3c6e46caa373c00cb9b2b2 /libbb
parentb15f68214da209b5b293039c09c00f490c0cc193 (diff)
parent6062c0d19bc201cbeb61b8875598cdd7a14a5ae0 (diff)
downloadbusybox-w32-b8751bbc9ac24e71fbe1e79c69074b4c87a134d8.tar.gz
busybox-w32-b8751bbc9ac24e71fbe1e79c69074b4c87a134d8.tar.bz2
busybox-w32-b8751bbc9ac24e71fbe1e79c69074b4c87a134d8.zip
Merge busybox into merge
Fix merge conflict in miscutils/less.c. Use exit_SUCCESS() where possible.
Diffstat (limited to 'libbb')
-rw-r--r--libbb/Config.src26
-rw-r--r--libbb/Kbuild.src1
-rw-r--r--libbb/fflush_stdout_and_exit.c7
-rw-r--r--libbb/hash_md5_sha.c415
-rw-r--r--libbb/hash_md5_sha_x86-64.S1289
-rwxr-xr-xlibbb/hash_md5_sha_x86-64.S.sh281
-rw-r--r--libbb/vfork_daemon_rexec.c4
-rw-r--r--libbb/xfuncs.c10
-rw-r--r--libbb/xfuncs_printf.c7
9 files changed, 2014 insertions, 26 deletions
diff --git a/libbb/Config.src b/libbb/Config.src
index 24b31fad9..c80bee286 100644
--- a/libbb/Config.src
+++ b/libbb/Config.src
@@ -42,21 +42,33 @@ config MD5_SMALL
42 default 1 # all "fast or small" options default to small 42 default 1 # all "fast or small" options default to small
43 range 0 3 43 range 0 3
44 help 44 help
45 Trade binary size versus speed for the md5sum algorithm. 45 Trade binary size versus speed for the md5 algorithm.
46 Approximate values running uClibc and hashing 46 Approximate values running uClibc and hashing
47 linux-2.4.4.tar.bz2 were: 47 linux-2.4.4.tar.bz2 were:
48 value user times (sec) text size (386) 48 value user times (sec) text size (386)
49 0 (fastest) 1.1 6144 49 0 (fastest) 1.1 6144
50 1 1.4 5392 50 1 1.4 5392
51 2 3.0 5088 51 2 3.0 5088
52 3 (smallest) 5.1 4912 52 3 (smallest) 5.1 4912
53
54config SHA1_SMALL
55 int "SHA1: Trade bytes for speed (0:fast, 3:slow)"
56 default 3 # all "fast or small" options default to small
57 range 0 3
58 help
59 Trade binary size versus speed for the sha1 algorithm.
60 throughput MB/s size of sha1_process_block64
61 value 486 x86-64 486 x86-64
62 0 367 375 3657 3502
63 1 224 229 654 732
64 2,3 200 195 358 380
53 65
54config SHA3_SMALL 66config SHA3_SMALL
55 int "SHA3: Trade bytes for speed (0:fast, 1:slow)" 67 int "SHA3: Trade bytes for speed (0:fast, 1:slow)"
56 default 1 # all "fast or small" options default to small 68 default 1 # all "fast or small" options default to small
57 range 0 1 69 range 0 1
58 help 70 help
59 Trade binary size versus speed for the sha3sum algorithm. 71 Trade binary size versus speed for the sha3 algorithm.
60 SHA3_SMALL=0 compared to SHA3_SMALL=1 (approximate): 72 SHA3_SMALL=0 compared to SHA3_SMALL=1 (approximate):
61 64-bit x86: +270 bytes of code, 45% faster 73 64-bit x86: +270 bytes of code, 45% faster
62 32-bit x86: +450 bytes of code, 75% faster 74 32-bit x86: +450 bytes of code, 75% faster
diff --git a/libbb/Kbuild.src b/libbb/Kbuild.src
index 9b37b174d..41bf54e75 100644
--- a/libbb/Kbuild.src
+++ b/libbb/Kbuild.src
@@ -45,6 +45,7 @@ lib-y += lineedit.o lineedit_ptr_hack.o
45lib-y += llist.o 45lib-y += llist.o
46lib-y += make_directory.o 46lib-y += make_directory.o
47lib-y += hash_md5_sha.o 47lib-y += hash_md5_sha.o
48lib-y += hash_md5_sha_x86-64.o
48# Alternative (disabled) MD5 implementation 49# Alternative (disabled) MD5 implementation
49#lib-y += hash_md5prime.o 50#lib-y += hash_md5prime.o
50lib-y += messages.o 51lib-y += messages.o
diff --git a/libbb/fflush_stdout_and_exit.c b/libbb/fflush_stdout_and_exit.c
index 5df74170e..33e28ae34 100644
--- a/libbb/fflush_stdout_and_exit.c
+++ b/libbb/fflush_stdout_and_exit.c
@@ -13,10 +13,15 @@
13 */ 13 */
14void FAST_FUNC fflush_stdout_and_exit(int retval) 14void FAST_FUNC fflush_stdout_and_exit(int retval)
15{ 15{
16 xfunc_error_retval = retval;
17 if (fflush(stdout)) 16 if (fflush(stdout))
18 bb_simple_perror_msg_and_die(bb_msg_standard_output); 17 bb_simple_perror_msg_and_die(bb_msg_standard_output);
18 xfunc_error_retval = retval;
19 /* In case we are in NOFORK applet. Do not exit() directly, 19 /* In case we are in NOFORK applet. Do not exit() directly,
20 * but use xfunc_die() */ 20 * but use xfunc_die() */
21 xfunc_die(); 21 xfunc_die();
22} 22}
23
24void FAST_FUNC fflush_stdout_and_exit_SUCCESS(void)
25{
26 fflush_stdout_and_exit(EXIT_SUCCESS);
27}
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c
index e0db8ce67..ee19c1cb7 100644
--- a/libbb/hash_md5_sha.c
+++ b/libbb/hash_md5_sha.c
@@ -8,6 +8,9 @@
8 */ 8 */
9#include "libbb.h" 9#include "libbb.h"
10 10
11#define STR1(s) #s
12#define STR(s) STR1(s)
13
11#define NEED_SHA512 (ENABLE_SHA512SUM || ENABLE_USE_BB_CRYPT_SHA) 14#define NEED_SHA512 (ENABLE_SHA512SUM || ENABLE_USE_BB_CRYPT_SHA)
12 15
13/* gcc 4.2.1 optimizes rotr64 better with inline than with macro 16/* gcc 4.2.1 optimizes rotr64 better with inline than with macro
@@ -390,7 +393,6 @@ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
390 OP(FI, D, A, B, C, 11, 10, 0xbd3af235); 393 OP(FI, D, A, B, C, 11, 10, 0xbd3af235);
391 OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb); 394 OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
392 OP(FI, B, C, D, A, 9, 21, 0xeb86d391); 395 OP(FI, B, C, D, A, 9, 21, 0xeb86d391);
393# undef OP
394# endif 396# endif
395 /* Add checksum to the starting values */ 397 /* Add checksum to the starting values */
396 ctx->hash[0] += A; 398 ctx->hash[0] += A;
@@ -399,6 +401,7 @@ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
399 ctx->hash[3] += D; 401 ctx->hash[3] += D;
400#endif 402#endif
401} 403}
404#undef OP
402#undef FF 405#undef FF
403#undef FG 406#undef FG
404#undef FH 407#undef FH
@@ -490,18 +493,410 @@ unsigned FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf)
490 * then rebuild and compare "shaNNNsum bigfile" results. 493 * then rebuild and compare "shaNNNsum bigfile" results.
491 */ 494 */
492 495
496#if CONFIG_SHA1_SMALL == 0
497# if defined(__GNUC__) && defined(__i386__)
498static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM)
499{
500 BUILD_BUG_ON(offsetof(sha1_ctx_t, hash) != 76);
501 asm(
502"\n\
503 pushl %ebp # \n\
504 pushl %edi # \n\
505 pushl %esi # \n\
506 pushl %ebx # \n\
507 pushl %eax \n\
508 movl $15, %edi \n\
5091: \n\
510 movl (%eax,%edi,4), %esi \n\
511 bswap %esi \n\
512 pushl %esi \n\
513 decl %edi \n\
514 jns 1b \n\
515 movl 80(%eax), %ebx # b = ctx->hash[1] \n\
516 movl 84(%eax), %ecx # c = ctx->hash[2] \n\
517 movl 88(%eax), %edx # d = ctx->hash[3] \n\
518 movl 92(%eax), %ebp # e = ctx->hash[4] \n\
519 movl 76(%eax), %eax # a = ctx->hash[0] \n\
520#Register and stack use: \n\
521# eax..edx: a..d \n\
522# ebp: e \n\
523# esi,edi: temps \n\
524# 4*n(%esp): W[n] \n\
525"
526#define RD1As(a,b,c,d,e, n, RCONST) \
527"\n\
528 ##movl 4*"n"(%esp), %esi # n=0, W[0] already in %esi \n\
529 movl "c", %edi # c \n\
530 xorl "d", %edi # ^d \n\
531 andl "b", %edi # &b \n\
532 xorl "d", %edi # (((c ^ d) & b) ^ d) \n\
533 leal "RCONST"("e",%esi), "e" # e += RCONST + W[n] \n\
534 addl %edi, "e" # e += (((c ^ d) & b) ^ d) \n\
535 movl "a", %esi # \n\
536 roll $5, %esi # rotl32(a,5) \n\
537 addl %esi, "e" # e += rotl32(a,5) \n\
538 rorl $2, "b" # b = rotl32(b,30) \n\
539"
540#define RD1Bs(a,b,c,d,e, n, RCONST) \
541"\n\
542 movl 4*"n"(%esp), %esi # W[n] \n\
543 movl "c", %edi # c \n\
544 xorl "d", %edi # ^d \n\
545 andl "b", %edi # &b \n\
546 xorl "d", %edi # (((c ^ d) & b) ^ d) \n\
547 leal "RCONST"("e",%esi), "e" # e += RCONST + W[n] \n\
548 addl %edi, "e" # e += (((c ^ d) & b) ^ d) \n\
549 movl "a", %esi # \n\
550 roll $5, %esi # rotl32(a,5) \n\
551 addl %esi, "e" # e += rotl32(a,5) \n\
552 rorl $2, "b" # b = rotl32(b,30) \n\
553"
554#define RD1Cs(a,b,c,d,e, n13,n8,n2,n, RCONST) \
555"\n\
556 movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\
557 xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\
558 xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\
559 xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\
560 roll %esi # \n\
561 movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\
562 movl "c", %edi # c \n\
563 xorl "d", %edi # ^d \n\
564 andl "b", %edi # &b \n\
565 xorl "d", %edi # (((c ^ d) & b) ^ d) \n\
566 leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\
567 addl %edi, "e" # e += (((c ^ d) & b) ^ d) \n\
568 movl "a", %esi # \n\
569 roll $5, %esi # rotl32(a,5) \n\
570 addl %esi, "e" # e += rotl32(a,5) \n\
571 rorl $2, "b" # b = rotl32(b,30) \n\
572"
573#define RD1A(a,b,c,d,e, n) RD1As("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR((n)), STR(RCONST))
574#define RD1B(a,b,c,d,e, n) RD1Bs("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR((n)), STR(RCONST))
575#define RD1C(a,b,c,d,e, n) RD1Cs("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((n+13)&15)), STR(((n+8)&15)), STR(((n+2)&15)), STR(((n)&15)), STR(RCONST))
576#undef RCONST
577#define RCONST 0x5A827999
578 RD1A(ax,bx,cx,dx,bp, 0) RD1B(bp,ax,bx,cx,dx, 1) RD1B(dx,bp,ax,bx,cx, 2) RD1B(cx,dx,bp,ax,bx, 3) RD1B(bx,cx,dx,bp,ax, 4)
579 RD1B(ax,bx,cx,dx,bp, 5) RD1B(bp,ax,bx,cx,dx, 6) RD1B(dx,bp,ax,bx,cx, 7) RD1B(cx,dx,bp,ax,bx, 8) RD1B(bx,cx,dx,bp,ax, 9)
580 RD1B(ax,bx,cx,dx,bp,10) RD1B(bp,ax,bx,cx,dx,11) RD1B(dx,bp,ax,bx,cx,12) RD1B(cx,dx,bp,ax,bx,13) RD1B(bx,cx,dx,bp,ax,14)
581 RD1B(ax,bx,cx,dx,bp,15) RD1C(bp,ax,bx,cx,dx,16) RD1C(dx,bp,ax,bx,cx,17) RD1C(cx,dx,bp,ax,bx,18) RD1C(bx,cx,dx,bp,ax,19)
582#define RD2s(a,b,c,d,e, n13,n8,n2,n, RCONST) \
583"\n\
584 movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\
585 xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\
586 xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\
587 xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\
588 roll %esi # \n\
589 movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\
590 movl "c", %edi # c \n\
591 xorl "d", %edi # ^d \n\
592 xorl "b", %edi # ^b \n\
593 leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\
594 addl %edi, "e" # e += (c ^ d ^ b) \n\
595 movl "a", %esi # \n\
596 roll $5, %esi # rotl32(a,5) \n\
597 addl %esi, "e" # e += rotl32(a,5) \n\
598 rorl $2, "b" # b = rotl32(b,30) \n\
599"
600#define RD2(a,b,c,d,e, n) RD2s("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((20+n+13)&15)), STR(((20+n+8)&15)), STR(((20+n+2)&15)), STR(((20+n)&15)), STR(RCONST))
601#undef RCONST
602#define RCONST 0x6ED9EBA1
603 RD2(ax,bx,cx,dx,bp, 0) RD2(bp,ax,bx,cx,dx, 1) RD2(dx,bp,ax,bx,cx, 2) RD2(cx,dx,bp,ax,bx, 3) RD2(bx,cx,dx,bp,ax, 4)
604 RD2(ax,bx,cx,dx,bp, 5) RD2(bp,ax,bx,cx,dx, 6) RD2(dx,bp,ax,bx,cx, 7) RD2(cx,dx,bp,ax,bx, 8) RD2(bx,cx,dx,bp,ax, 9)
605 RD2(ax,bx,cx,dx,bp,10) RD2(bp,ax,bx,cx,dx,11) RD2(dx,bp,ax,bx,cx,12) RD2(cx,dx,bp,ax,bx,13) RD2(bx,cx,dx,bp,ax,14)
606 RD2(ax,bx,cx,dx,bp,15) RD2(bp,ax,bx,cx,dx,16) RD2(dx,bp,ax,bx,cx,17) RD2(cx,dx,bp,ax,bx,18) RD2(bx,cx,dx,bp,ax,19)
607
608#define RD3s(a,b,c,d,e, n13,n8,n2,n, RCONST) \
609"\n\
610 movl "b", %edi # di: b \n\
611 movl "b", %esi # si: b \n\
612 orl "c", %edi # di: b | c \n\
613 andl "c", %esi # si: b & c \n\
614 andl "d", %edi # di: (b | c) & d \n\
615 orl %esi, %edi # ((b | c) & d) | (b & c) \n\
616 movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\
617 xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\
618 xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\
619 xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\
620 roll %esi # \n\
621 movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\
622 addl %edi, "e" # += ((b | c) & d) | (b & c)\n\
623 leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\
624 movl "a", %esi # \n\
625 roll $5, %esi # rotl32(a,5) \n\
626 addl %esi, "e" # e += rotl32(a,5) \n\
627 rorl $2, "b" # b = rotl32(b,30) \n\
628"
629#define RD3(a,b,c,d,e, n) RD3s("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((40+n+13)&15)), STR(((40+n+8)&15)), STR(((40+n+2)&15)), STR(((40+n)&15)), STR(RCONST))
630#undef RCONST
631#define RCONST 0x8F1BBCDC
632 RD3(ax,bx,cx,dx,bp, 0) RD3(bp,ax,bx,cx,dx, 1) RD3(dx,bp,ax,bx,cx, 2) RD3(cx,dx,bp,ax,bx, 3) RD3(bx,cx,dx,bp,ax, 4)
633 RD3(ax,bx,cx,dx,bp, 5) RD3(bp,ax,bx,cx,dx, 6) RD3(dx,bp,ax,bx,cx, 7) RD3(cx,dx,bp,ax,bx, 8) RD3(bx,cx,dx,bp,ax, 9)
634 RD3(ax,bx,cx,dx,bp,10) RD3(bp,ax,bx,cx,dx,11) RD3(dx,bp,ax,bx,cx,12) RD3(cx,dx,bp,ax,bx,13) RD3(bx,cx,dx,bp,ax,14)
635 RD3(ax,bx,cx,dx,bp,15) RD3(bp,ax,bx,cx,dx,16) RD3(dx,bp,ax,bx,cx,17) RD3(cx,dx,bp,ax,bx,18) RD3(bx,cx,dx,bp,ax,19)
636
637#define RD4As(a,b,c,d,e, n13,n8,n2,n, RCONST) \
638"\n\
639 movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\
640 xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\
641 xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\
642 xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\
643 roll %esi # \n\
644 movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\
645 movl "c", %edi # c \n\
646 xorl "d", %edi # ^d \n\
647 xorl "b", %edi # ^b \n\
648 leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\
649 addl %edi, "e" # e += (c ^ d ^ b) \n\
650 movl "a", %esi # \n\
651 roll $5, %esi # rotl32(a,5) \n\
652 addl %esi, "e" # e += rotl32(a,5) \n\
653 rorl $2, "b" # b = rotl32(b,30) \n\
654"
655#define RD4Bs(a,b,c,d,e, n13,n8,n2,n, RCONST) \
656"\n\
657 movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\
658 xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\
659 xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\
660 xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\
661 roll %esi # \n\
662 ##movl %esi, 4*"n"(%esp) # store to W[n & 15] elided \n\
663 movl "c", %edi # c \n\
664 xorl "d", %edi # ^d \n\
665 xorl "b", %edi # ^b \n\
666 leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\
667 addl %edi, "e" # e += (c ^ d ^ b) \n\
668 movl "a", %esi # \n\
669 roll $5, %esi # rotl32(a,5) \n\
670 addl %esi, "e" # e += rotl32(a,5) \n\
671 rorl $2, "b" # b = rotl32(b,30) \n\
672"
673#define RD4A(a,b,c,d,e, n) RD4As("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST))
674#define RD4B(a,b,c,d,e, n) RD4Bs("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST))
675#undef RCONST
676#define RCONST 0xCA62C1D6
677 RD4A(ax,bx,cx,dx,bp, 0) RD4A(bp,ax,bx,cx,dx, 1) RD4A(dx,bp,ax,bx,cx, 2) RD4A(cx,dx,bp,ax,bx, 3) RD4A(bx,cx,dx,bp,ax, 4)
678 RD4A(ax,bx,cx,dx,bp, 5) RD4A(bp,ax,bx,cx,dx, 6) RD4A(dx,bp,ax,bx,cx, 7) RD4A(cx,dx,bp,ax,bx, 8) RD4A(bx,cx,dx,bp,ax, 9)
679 RD4A(ax,bx,cx,dx,bp,10) RD4A(bp,ax,bx,cx,dx,11) RD4A(dx,bp,ax,bx,cx,12) RD4A(cx,dx,bp,ax,bx,13) RD4A(bx,cx,dx,bp,ax,14)
680 RD4A(ax,bx,cx,dx,bp,15) RD4A(bp,ax,bx,cx,dx,16) RD4B(dx,bp,ax,bx,cx,17) RD4B(cx,dx,bp,ax,bx,18) RD4B(bx,cx,dx,bp,ax,19)
681
682"\n\
683 movl 4*16(%esp), %esi # \n\
684 addl $4*(16+1), %esp # \n\
685 addl %eax, 76(%esi) # ctx->hash[0] += a \n\
686 addl %ebx, 80(%esi) # ctx->hash[1] += b \n\
687 addl %ecx, 84(%esi) # ctx->hash[2] += c \n\
688 addl %edx, 88(%esi) # ctx->hash[3] += d \n\
689 addl %ebp, 92(%esi) # ctx->hash[4] += e \n\
690 popl %ebx # \n\
691 popl %esi # \n\
692 popl %edi # \n\
693 popl %ebp # \n\
694"
695 ); /* asm */
696#undef RCONST
697}
698# elif defined(__GNUC__) && defined(__x86_64__)
699
700/* in hash_md5_sha_x86-64.S */
701struct ASM_expects_80 { char t[1 - 2*(offsetof(sha1_ctx_t, hash) != 80)]; };
702void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM);
703
704# else
705/* Fast, fully-unrolled SHA1. +3800 bytes of code on x86.
706 * It seems further speedup can be achieved by handling more than
707 * 64 bytes per one function call (coreutils does that).
708 */
709static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
710{
711 static const uint32_t rconsts[] ALIGN4 = {
712 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
713 };
714 uint32_t W[16];
715 uint32_t a, b, c, d, e;
716
717 a = ctx->hash[0];
718 b = ctx->hash[1];
719 c = ctx->hash[2];
720 d = ctx->hash[3];
721 e = ctx->hash[4];
722
723/* From kernel source comments:
724 * """
725 * If you have 32 registers or more, the compiler can (and should)
726 * try to change the array[] accesses into registers. However, on
727 * machines with less than ~25 registers, that won't really work,
728 * and at least gcc will make an unholy mess of it.
729 *
730 * So to avoid that mess which just slows things down, we force
731 * the stores to memory to actually happen (we might be better off
732 * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as
733 * suggested by Artur Skawina - that will also make gcc unable to
734 * try to do the silly "optimize away loads" part because it won't
735 * see what the value will be).
736 * """
737 */
738#if defined(__GNUC__) && defined(__i386__)
739# define DO_NOT_TRY_PROPAGATING(m) asm("":"+m"(m))
740#else
741# define DO_NOT_TRY_PROPAGATING(m) ((void)0)
742#endif
743
744#undef OP
745#define OP(A,B,C,D,E, n) \
746 do { \
747 uint32_t work = EXPR(B, C, D); \
748 if (n <= 15) \
749 work += W[n & 15] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]); \
750 if (n >= 16) \
751 work += W[n & 15] = rotl32(W[(n+13) & 15] ^ W[(n+8) & 15] ^ W[(n+2) & 15] ^ W[n & 15], 1); \
752 DO_NOT_TRY_PROPAGATING(W[n & 15]); \
753 E += work + rotl32(A, 5) + rconsts[n / 20]; \
754 B = rotl32(B, 30); \
755 } while (0)
756#define OP20(n) \
757 OP(a,b,c,d,e, (n+ 0)); OP(e,a,b,c,d, (n+ 1)); OP(d,e,a,b,c, (n+ 2)); OP(c,d,e,a,b, (n+ 3)); OP(b,c,d,e,a, (n+ 4)); \
758 OP(a,b,c,d,e, (n+ 5)); OP(e,a,b,c,d, (n+ 6)); OP(d,e,a,b,c, (n+ 7)); OP(c,d,e,a,b, (n+ 8)); OP(b,c,d,e,a, (n+ 9)); \
759 OP(a,b,c,d,e, (n+10)); OP(e,a,b,c,d, (n+11)); OP(d,e,a,b,c, (n+12)); OP(c,d,e,a,b, (n+13)); OP(b,c,d,e,a, (n+14)); \
760 OP(a,b,c,d,e, (n+15)); OP(e,a,b,c,d, (n+16)); OP(d,e,a,b,c, (n+17)); OP(c,d,e,a,b, (n+18)); OP(b,c,d,e,a, (n+19))
761
762 /* 4 rounds of 20 operations each */
763#define EXPR(b,c,d) (((c ^ d) & b) ^ d)
764 OP20(0);
765#undef EXPR
766#define EXPR(b,c,d) (c ^ d ^ b)
767 OP20(20);
768#undef EXPR
769#define EXPR(b,c,d) (((b | c) & d) | (b & c))
770 OP20(40);
771#undef EXPR
772#define EXPR(b,c,d) (c ^ d ^ b)
773 OP20(60);
774
775#undef EXPR
776#undef OP
777#undef OP20
778
779 ctx->hash[0] += a;
780 ctx->hash[1] += b;
781 ctx->hash[2] += c;
782 ctx->hash[3] += d;
783 ctx->hash[4] += e;
784}
785# endif
786#elif CONFIG_SHA1_SMALL == 1
787/* Middle-sized version, +300 bytes of code on x86. */
788static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
789{
790 static const uint32_t rconsts[] ALIGN4 = {
791 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
792 };
793 int j;
794 int n;
795 uint32_t W[16+16];
796 uint32_t a, b, c, d, e;
797
798 a = ctx->hash[0];
799 b = ctx->hash[1];
800 c = ctx->hash[2];
801 d = ctx->hash[3];
802 e = ctx->hash[4];
803
804 /* 1st round of 20 operations */
805 n = 0;
806 do {
807 uint32_t work = ((c ^ d) & b) ^ d;
808 W[n] = W[n+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]);
809 work += W[n];
810 work += e + rotl32(a, 5) + rconsts[0];
811 /* Rotate by one for next time */
812 e = d;
813 d = c;
814 c = rotl32(b, 30);
815 b = a;
816 a = work;
817 n = (n + 1) & 15;
818 } while (n != 0);
819 do {
820 uint32_t work = ((c ^ d) & b) ^ d;
821 W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
822 work += W[n];
823 work += e + rotl32(a, 5) + rconsts[0];
824 e = d;
825 d = c;
826 c = rotl32(b, 30);
827 b = a;
828 a = work;
829 n = (n + 1) /* & 15*/;
830 } while (n != 4);
831 /* 2nd round of 20 operations */
832 j = 19;
833 do {
834 uint32_t work = c ^ d ^ b;
835 W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
836 work += W[n];
837 work += e + rotl32(a, 5) + rconsts[1];
838 e = d;
839 d = c;
840 c = rotl32(b, 30);
841 b = a;
842 a = work;
843 n = (n + 1) & 15;
844 } while (--j >= 0);
845 /* 3rd round */
846 j = 19;
847 do {
848 uint32_t work = ((b | c) & d) | (b & c);
849 W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
850 work += W[n];
851 work += e + rotl32(a, 5) + rconsts[2];
852 e = d;
853 d = c;
854 c = rotl32(b, 30);
855 b = a;
856 a = work;
857 n = (n + 1) & 15;
858 } while (--j >= 0);
859 /* 4th round */
860 j = 19;
861 do {
862 uint32_t work = c ^ d ^ b;
863 W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
864 work += W[n];
865 work += e + rotl32(a, 5) + rconsts[3];
866 e = d;
867 d = c;
868 c = rotl32(b, 30);
869 b = a;
870 a = work;
871 n = (n + 1) & 15;
872 } while (--j >= 0);
873
874 ctx->hash[0] += a;
875 ctx->hash[1] += b;
876 ctx->hash[2] += c;
877 ctx->hash[3] += d;
878 ctx->hash[4] += e;
879}
880#else
881/* Compact version, almost twice as slow as fully unrolled */
493static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) 882static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
494{ 883{
495 static const uint32_t rconsts[] ALIGN4 = { 884 static const uint32_t rconsts[] ALIGN4 = {
496 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6 885 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
497 }; 886 };
498 int i, j; 887 int i, j;
499 int cnt; 888 int n;
500 uint32_t W[16+16]; 889 uint32_t W[16+16];
501 uint32_t a, b, c, d, e; 890 uint32_t a, b, c, d, e;
502 891
503 /* On-stack work buffer frees up one register in the main loop 892 /* On-stack work buffer frees up one register in the main loop
504 * which otherwise will be needed to hold ctx pointer */ 893 * which otherwise will be needed to hold ctx pointer.
894 *
895 * The compiler is not smart enough to realize it, though. :(
896 * If __attribute__((optimize("2"))) is added to the function,
897 * only then gcc-9.3.1 spills "ctx" to stack and uses the freed
898 * register (making code 6 bytes smaller, not just faster).
899 */
505 for (i = 0; i < 16; i++) 900 for (i = 0; i < 16; i++)
506 W[i] = W[i+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[i]); 901 W[i] = W[i+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[i]);
507 902
@@ -512,7 +907,7 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
512 e = ctx->hash[4]; 907 e = ctx->hash[4];
513 908
514 /* 4 rounds of 20 operations each */ 909 /* 4 rounds of 20 operations each */
515 cnt = 0; 910 n = 0;
516 for (i = 0; i < 4; i++) { 911 for (i = 0; i < 4; i++) {
517 j = 19; 912 j = 19;
518 do { 913 do {
@@ -523,27 +918,24 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
523 work = (work & b) ^ d; 918 work = (work & b) ^ d;
524 if (j <= 3) 919 if (j <= 3)
525 goto ge16; 920 goto ge16;
526 /* Used to do SWAP_BE32 here, but this
527 * requires ctx (see comment above) */
528 work += W[cnt];
529 } else { 921 } else {
530 if (i == 2) 922 if (i == 2)
531 work = ((b | c) & d) | (b & c); 923 work = ((b | c) & d) | (b & c);
532 else /* i = 1 or 3 */ 924 else /* i = 1 or 3 */
533 work ^= b; 925 work ^= b;
534 ge16: 926 ge16:
535 W[cnt] = W[cnt+16] = rotl32(W[cnt+13] ^ W[cnt+8] ^ W[cnt+2] ^ W[cnt], 1); 927 W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
536 work += W[cnt];
537 } 928 }
929 work += W[n];
538 work += e + rotl32(a, 5) + rconsts[i]; 930 work += e + rotl32(a, 5) + rconsts[i];
539 931
540 /* Rotate by one for next time */ 932 /* Rotate by one for next time */
541 e = d; 933 e = d;
542 d = c; 934 d = c;
543 c = /* b = */ rotl32(b, 30); 935 c = rotl32(b, 30);
544 b = a; 936 b = a;
545 a = work; 937 a = work;
546 cnt = (cnt + 1) & 15; 938 n = (n + 1) & 15;
547 } while (--j >= 0); 939 } while (--j >= 0);
548 } 940 }
549 941
@@ -553,6 +945,7 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
553 ctx->hash[3] += d; 945 ctx->hash[3] += d;
554 ctx->hash[4] += e; 946 ctx->hash[4] += e;
555} 947}
948#endif
556 949
557/* Constants for SHA512 from FIPS 180-2:4.2.3. 950/* Constants for SHA512 from FIPS 180-2:4.2.3.
558 * SHA256 constants from FIPS 180-2:4.2.2 951 * SHA256 constants from FIPS 180-2:4.2.2
diff --git a/libbb/hash_md5_sha_x86-64.S b/libbb/hash_md5_sha_x86-64.S
new file mode 100644
index 000000000..ff78fc049
--- /dev/null
+++ b/libbb/hash_md5_sha_x86-64.S
@@ -0,0 +1,1289 @@
1### Generated by hash_md5_sha_x86-64.S.sh ###
2
3#if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__)
4 .section .text.sha1_process_block64,"ax",@progbits
5 .globl sha1_process_block64
6 .hidden sha1_process_block64
7 .type sha1_process_block64, @function
8
9 .balign 8 # allow decoders to fetch at least 5 first insns
10sha1_process_block64:
11 pushq %rbp # 1 byte insn
12 pushq %rbx # 1 byte insn
13 pushq %r15 # 2 byte insn
14 pushq %r14 # 2 byte insn
15 pushq %r13 # 2 byte insn
16 pushq %r12 # 2 byte insn
17 pushq %rdi # we need ctx at the end
18
19#Register and stack use:
20# eax..edx: a..d
21# ebp: e
22# esi,edi: temps
23# -32+4*n(%rsp),r8...r15: W[0..7,8..15]
24# (TODO: actually W[0..7] are used a bit more often, put _them_ into r8..r15?)
25 movl $3, %eax
261:
27 movq (%rdi,%rax,8), %rsi
28 bswapq %rsi
29 rolq $32, %rsi
30 movq %rsi, -32(%rsp,%rax,8)
31 decl %eax
32 jns 1b
33
34 movl 80(%rdi), %eax # a = ctx->hash[0]
35 movl 84(%rdi), %ebx # b = ctx->hash[1]
36 movl 88(%rdi), %ecx # c = ctx->hash[2]
37 movl 92(%rdi), %edx # d = ctx->hash[3]
38 movl 96(%rdi), %ebp # e = ctx->hash[4]
39
40 movq 4*8(%rdi), %r8
41 movq 4*10(%rdi), %r10
42 bswapq %r8
43 bswapq %r10
44 movq 4*12(%rdi), %r12
45 movq 4*14(%rdi), %r14
46 bswapq %r12
47 bswapq %r14
48 movl %r8d, %r9d
49 shrq $32, %r8
50 movl %r10d, %r11d
51 shrq $32, %r10
52 movl %r12d, %r13d
53 shrq $32, %r12
54 movl %r14d, %r15d
55 shrq $32, %r14
56
57# 0
58 # W[0], already in %esi
59 movl %ecx, %edi # c
60 xorl %edx, %edi # ^d
61 andl %ebx, %edi # &b
62 xorl %edx, %edi # (((c ^ d) & b) ^ d)
63 leal 0x5A827999(%rbp,%rsi), %ebp # e += RCONST + W[n]
64 addl %edi, %ebp # e += (((c ^ d) & b) ^ d)
65 movl %eax, %esi #
66 roll $5, %esi # rotl32(a,5)
67 addl %esi, %ebp # e += rotl32(a,5)
68 rorl $2, %ebx # b = rotl32(b,30)
69# 1
70 movl -32+4*1(%rsp), %esi # W[n]
71 movl %ebx, %edi # c
72 xorl %ecx, %edi # ^d
73 andl %eax, %edi # &b
74 xorl %ecx, %edi # (((c ^ d) & b) ^ d)
75 leal 0x5A827999(%rdx,%rsi), %edx # e += RCONST + W[n]
76 addl %edi, %edx # e += (((c ^ d) & b) ^ d)
77 movl %ebp, %esi #
78 roll $5, %esi # rotl32(a,5)
79 addl %esi, %edx # e += rotl32(a,5)
80 rorl $2, %eax # b = rotl32(b,30)
81# 2
82 movl -32+4*2(%rsp), %esi # W[n]
83 movl %eax, %edi # c
84 xorl %ebx, %edi # ^d
85 andl %ebp, %edi # &b
86 xorl %ebx, %edi # (((c ^ d) & b) ^ d)
87 leal 0x5A827999(%rcx,%rsi), %ecx # e += RCONST + W[n]
88 addl %edi, %ecx # e += (((c ^ d) & b) ^ d)
89 movl %edx, %esi #
90 roll $5, %esi # rotl32(a,5)
91 addl %esi, %ecx # e += rotl32(a,5)
92 rorl $2, %ebp # b = rotl32(b,30)
93# 3
94 movl -32+4*3(%rsp), %esi # W[n]
95 movl %ebp, %edi # c
96 xorl %eax, %edi # ^d
97 andl %edx, %edi # &b
98 xorl %eax, %edi # (((c ^ d) & b) ^ d)
99 leal 0x5A827999(%rbx,%rsi), %ebx # e += RCONST + W[n]
100 addl %edi, %ebx # e += (((c ^ d) & b) ^ d)
101 movl %ecx, %esi #
102 roll $5, %esi # rotl32(a,5)
103 addl %esi, %ebx # e += rotl32(a,5)
104 rorl $2, %edx # b = rotl32(b,30)
105# 4
106 movl -32+4*4(%rsp), %esi # W[n]
107 movl %edx, %edi # c
108 xorl %ebp, %edi # ^d
109 andl %ecx, %edi # &b
110 xorl %ebp, %edi # (((c ^ d) & b) ^ d)
111 leal 0x5A827999(%rax,%rsi), %eax # e += RCONST + W[n]
112 addl %edi, %eax # e += (((c ^ d) & b) ^ d)
113 movl %ebx, %esi #
114 roll $5, %esi # rotl32(a,5)
115 addl %esi, %eax # e += rotl32(a,5)
116 rorl $2, %ecx # b = rotl32(b,30)
117# 5
118 movl -32+4*5(%rsp), %esi # W[n]
119 movl %ecx, %edi # c
120 xorl %edx, %edi # ^d
121 andl %ebx, %edi # &b
122 xorl %edx, %edi # (((c ^ d) & b) ^ d)
123 leal 0x5A827999(%rbp,%rsi), %ebp # e += RCONST + W[n]
124 addl %edi, %ebp # e += (((c ^ d) & b) ^ d)
125 movl %eax, %esi #
126 roll $5, %esi # rotl32(a,5)
127 addl %esi, %ebp # e += rotl32(a,5)
128 rorl $2, %ebx # b = rotl32(b,30)
129# 6
130 movl -32+4*6(%rsp), %esi # W[n]
131 movl %ebx, %edi # c
132 xorl %ecx, %edi # ^d
133 andl %eax, %edi # &b
134 xorl %ecx, %edi # (((c ^ d) & b) ^ d)
135 leal 0x5A827999(%rdx,%rsi), %edx # e += RCONST + W[n]
136 addl %edi, %edx # e += (((c ^ d) & b) ^ d)
137 movl %ebp, %esi #
138 roll $5, %esi # rotl32(a,5)
139 addl %esi, %edx # e += rotl32(a,5)
140 rorl $2, %eax # b = rotl32(b,30)
141# 7
142 movl -32+4*7(%rsp), %esi # W[n]
143 movl %eax, %edi # c
144 xorl %ebx, %edi # ^d
145 andl %ebp, %edi # &b
146 xorl %ebx, %edi # (((c ^ d) & b) ^ d)
147 leal 0x5A827999(%rcx,%rsi), %ecx # e += RCONST + W[n]
148 addl %edi, %ecx # e += (((c ^ d) & b) ^ d)
149 movl %edx, %esi #
150 roll $5, %esi # rotl32(a,5)
151 addl %esi, %ecx # e += rotl32(a,5)
152 rorl $2, %ebp # b = rotl32(b,30)
153# 8
154 # W[n], in %r8
155 movl %ebp, %edi # c
156 xorl %eax, %edi # ^d
157 andl %edx, %edi # &b
158 xorl %eax, %edi # (((c ^ d) & b) ^ d)
159 leal 0x5A827999(%rbx,%r8), %ebx # e += RCONST + W[n]
160 addl %edi, %ebx # e += (((c ^ d) & b) ^ d)
161 movl %ecx, %esi #
162 roll $5, %esi # rotl32(a,5)
163 addl %esi, %ebx # e += rotl32(a,5)
164 rorl $2, %edx # b = rotl32(b,30)
165# 9
166 # W[n], in %r9
167 movl %edx, %edi # c
168 xorl %ebp, %edi # ^d
169 andl %ecx, %edi # &b
170 xorl %ebp, %edi # (((c ^ d) & b) ^ d)
171 leal 0x5A827999(%rax,%r9), %eax # e += RCONST + W[n]
172 addl %edi, %eax # e += (((c ^ d) & b) ^ d)
173 movl %ebx, %esi #
174 roll $5, %esi # rotl32(a,5)
175 addl %esi, %eax # e += rotl32(a,5)
176 rorl $2, %ecx # b = rotl32(b,30)
177# 10
178 # W[n], in %r10
179 movl %ecx, %edi # c
180 xorl %edx, %edi # ^d
181 andl %ebx, %edi # &b
182 xorl %edx, %edi # (((c ^ d) & b) ^ d)
183 leal 0x5A827999(%rbp,%r10), %ebp # e += RCONST + W[n]
184 addl %edi, %ebp # e += (((c ^ d) & b) ^ d)
185 movl %eax, %esi #
186 roll $5, %esi # rotl32(a,5)
187 addl %esi, %ebp # e += rotl32(a,5)
188 rorl $2, %ebx # b = rotl32(b,30)
189# 11
190 # W[n], in %r11
191 movl %ebx, %edi # c
192 xorl %ecx, %edi # ^d
193 andl %eax, %edi # &b
194 xorl %ecx, %edi # (((c ^ d) & b) ^ d)
195 leal 0x5A827999(%rdx,%r11), %edx # e += RCONST + W[n]
196 addl %edi, %edx # e += (((c ^ d) & b) ^ d)
197 movl %ebp, %esi #
198 roll $5, %esi # rotl32(a,5)
199 addl %esi, %edx # e += rotl32(a,5)
200 rorl $2, %eax # b = rotl32(b,30)
201# 12
202 # W[n], in %r12
203 movl %eax, %edi # c
204 xorl %ebx, %edi # ^d
205 andl %ebp, %edi # &b
206 xorl %ebx, %edi # (((c ^ d) & b) ^ d)
207 leal 0x5A827999(%rcx,%r12), %ecx # e += RCONST + W[n]
208 addl %edi, %ecx # e += (((c ^ d) & b) ^ d)
209 movl %edx, %esi #
210 roll $5, %esi # rotl32(a,5)
211 addl %esi, %ecx # e += rotl32(a,5)
212 rorl $2, %ebp # b = rotl32(b,30)
213# 13
214 # W[n], in %r13
215 movl %ebp, %edi # c
216 xorl %eax, %edi # ^d
217 andl %edx, %edi # &b
218 xorl %eax, %edi # (((c ^ d) & b) ^ d)
219 leal 0x5A827999(%rbx,%r13), %ebx # e += RCONST + W[n]
220 addl %edi, %ebx # e += (((c ^ d) & b) ^ d)
221 movl %ecx, %esi #
222 roll $5, %esi # rotl32(a,5)
223 addl %esi, %ebx # e += rotl32(a,5)
224 rorl $2, %edx # b = rotl32(b,30)
225# 14
226 # W[n], in %r14
227 movl %edx, %edi # c
228 xorl %ebp, %edi # ^d
229 andl %ecx, %edi # &b
230 xorl %ebp, %edi # (((c ^ d) & b) ^ d)
231 leal 0x5A827999(%rax,%r14), %eax # e += RCONST + W[n]
232 addl %edi, %eax # e += (((c ^ d) & b) ^ d)
233 movl %ebx, %esi #
234 roll $5, %esi # rotl32(a,5)
235 addl %esi, %eax # e += rotl32(a,5)
236 rorl $2, %ecx # b = rotl32(b,30)
237# 15
238 # W[n], in %r15
239 movl %ecx, %edi # c
240 xorl %edx, %edi # ^d
241 andl %ebx, %edi # &b
242 xorl %edx, %edi # (((c ^ d) & b) ^ d)
243 leal 0x5A827999(%rbp,%r15), %ebp # e += RCONST + W[n]
244 addl %edi, %ebp # e += (((c ^ d) & b) ^ d)
245 movl %eax, %esi #
246 roll $5, %esi # rotl32(a,5)
247 addl %esi, %ebp # e += rotl32(a,5)
248 rorl $2, %ebx # b = rotl32(b,30)
249# 16
250 movl %r13d, %esi # W[(n+13) & 15]
251 xorl %r8d, %esi # ^W[(n+8) & 15]
252 xorl -32+4*2(%rsp), %esi # ^W[(n+2) & 15]
253 xorl -32+4*0(%rsp), %esi # ^W[n & 15]
254 roll %esi #
255 movl %esi, -32+4*0(%rsp) # store to W[n & 15]
256 movl %ebx, %edi # c
257 xorl %ecx, %edi # ^d
258 andl %eax, %edi # &b
259 xorl %ecx, %edi # (((c ^ d) & b) ^ d)
260 leal 0x5A827999(%rdx,%rsi), %edx # e += RCONST + W[n & 15]
261 addl %edi, %edx # e += (((c ^ d) & b) ^ d)
262 movl %ebp, %esi #
263 roll $5, %esi # rotl32(a,5)
264 addl %esi, %edx # e += rotl32(a,5)
265 rorl $2, %eax # b = rotl32(b,30)
266# 17
267 movl %r14d, %esi # W[(n+13) & 15]
268 xorl %r9d, %esi # ^W[(n+8) & 15]
269 xorl -32+4*3(%rsp), %esi # ^W[(n+2) & 15]
270 xorl -32+4*1(%rsp), %esi # ^W[n & 15]
271 roll %esi #
272 movl %esi, -32+4*1(%rsp) # store to W[n & 15]
273 movl %eax, %edi # c
274 xorl %ebx, %edi # ^d
275 andl %ebp, %edi # &b
276 xorl %ebx, %edi # (((c ^ d) & b) ^ d)
277 leal 0x5A827999(%rcx,%rsi), %ecx # e += RCONST + W[n & 15]
278 addl %edi, %ecx # e += (((c ^ d) & b) ^ d)
279 movl %edx, %esi #
280 roll $5, %esi # rotl32(a,5)
281 addl %esi, %ecx # e += rotl32(a,5)
282 rorl $2, %ebp # b = rotl32(b,30)
283# 18
284 movl %r15d, %esi # W[(n+13) & 15]
285 xorl %r10d, %esi # ^W[(n+8) & 15]
286 xorl -32+4*4(%rsp), %esi # ^W[(n+2) & 15]
287 xorl -32+4*2(%rsp), %esi # ^W[n & 15]
288 roll %esi #
289 movl %esi, -32+4*2(%rsp) # store to W[n & 15]
290 movl %ebp, %edi # c
291 xorl %eax, %edi # ^d
292 andl %edx, %edi # &b
293 xorl %eax, %edi # (((c ^ d) & b) ^ d)
294 leal 0x5A827999(%rbx,%rsi), %ebx # e += RCONST + W[n & 15]
295 addl %edi, %ebx # e += (((c ^ d) & b) ^ d)
296 movl %ecx, %esi #
297 roll $5, %esi # rotl32(a,5)
298 addl %esi, %ebx # e += rotl32(a,5)
299 rorl $2, %edx # b = rotl32(b,30)
300# 19
301 movl -32+4*0(%rsp), %esi # W[(n+13) & 15]
302 xorl %r11d, %esi # ^W[(n+8) & 15]
303 xorl -32+4*5(%rsp), %esi # ^W[(n+2) & 15]
304 xorl -32+4*3(%rsp), %esi # ^W[n & 15]
305 roll %esi #
306 movl %esi, -32+4*3(%rsp) # store to W[n & 15]
307 movl %edx, %edi # c
308 xorl %ebp, %edi # ^d
309 andl %ecx, %edi # &b
310 xorl %ebp, %edi # (((c ^ d) & b) ^ d)
311 leal 0x5A827999(%rax,%rsi), %eax # e += RCONST + W[n & 15]
312 addl %edi, %eax # e += (((c ^ d) & b) ^ d)
313 movl %ebx, %esi #
314 roll $5, %esi # rotl32(a,5)
315 addl %esi, %eax # e += rotl32(a,5)
316 rorl $2, %ecx # b = rotl32(b,30)
317# 20
318 movl -32+4*1(%rsp), %esi # W[(n+13) & 15]
319 xorl %r12d, %esi # ^W[(n+8) & 15]
320 xorl -32+4*6(%rsp), %esi # ^W[(n+2) & 15]
321 xorl -32+4*4(%rsp), %esi # ^W[n & 15]
322 roll %esi #
323 movl %esi, -32+4*4(%rsp) # store to W[n & 15]
324 movl %ecx, %edi # c
325 xorl %edx, %edi # ^d
326 xorl %ebx, %edi # ^b
327 leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + W[n & 15]
328 addl %edi, %ebp # e += (c ^ d ^ b)
329 movl %eax, %esi #
330 roll $5, %esi # rotl32(a,5)
331 addl %esi, %ebp # e += rotl32(a,5)
332 rorl $2, %ebx # b = rotl32(b,30)
333# 21
334 movl -32+4*2(%rsp), %esi # W[(n+13) & 15]
335 xorl %r13d, %esi # ^W[(n+8) & 15]
336 xorl -32+4*7(%rsp), %esi # ^W[(n+2) & 15]
337 xorl -32+4*5(%rsp), %esi # ^W[n & 15]
338 roll %esi #
339 movl %esi, -32+4*5(%rsp) # store to W[n & 15]
340 movl %ebx, %edi # c
341 xorl %ecx, %edi # ^d
342 xorl %eax, %edi # ^b
343 leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + W[n & 15]
344 addl %edi, %edx # e += (c ^ d ^ b)
345 movl %ebp, %esi #
346 roll $5, %esi # rotl32(a,5)
347 addl %esi, %edx # e += rotl32(a,5)
348 rorl $2, %eax # b = rotl32(b,30)
349# 22
350 movl -32+4*3(%rsp), %esi # W[(n+13) & 15]
351 xorl %r14d, %esi # ^W[(n+8) & 15]
352 xorl %r8d, %esi # ^W[(n+2) & 15]
353 xorl -32+4*6(%rsp), %esi # ^W[n & 15]
354 roll %esi #
355 movl %esi, -32+4*6(%rsp) # store to W[n & 15]
356 movl %eax, %edi # c
357 xorl %ebx, %edi # ^d
358 xorl %ebp, %edi # ^b
359 leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15]
360 addl %edi, %ecx # e += (c ^ d ^ b)
361 movl %edx, %esi #
362 roll $5, %esi # rotl32(a,5)
363 addl %esi, %ecx # e += rotl32(a,5)
364 rorl $2, %ebp # b = rotl32(b,30)
365# 23
366 movl -32+4*4(%rsp), %esi # W[(n+13) & 15]
367 xorl %r15d, %esi # ^W[(n+8) & 15]
368 xorl %r9d, %esi # ^W[(n+2) & 15]
369 xorl -32+4*7(%rsp), %esi # ^W[n & 15]
370 roll %esi #
371 movl %esi, -32+4*7(%rsp) # store to W[n & 15]
372 movl %ebp, %edi # c
373 xorl %eax, %edi # ^d
374 xorl %edx, %edi # ^b
375 leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15]
376 addl %edi, %ebx # e += (c ^ d ^ b)
377 movl %ecx, %esi #
378 roll $5, %esi # rotl32(a,5)
379 addl %esi, %ebx # e += rotl32(a,5)
380 rorl $2, %edx # b = rotl32(b,30)
381# 24
382 xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15]
383 xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15]
384 xorl %r10d, %r8d # ^W[(n+2) & 15]
385 roll %r8d #
386 movl %edx, %edi # c
387 xorl %ebp, %edi # ^d
388 xorl %ecx, %edi # ^b
389 leal 0x6ED9EBA1(%rax,%r8), %eax # e += RCONST + W[n & 15]
390 addl %edi, %eax # e += (c ^ d ^ b)
391 movl %ebx, %esi #
392 roll $5, %esi # rotl32(a,5)
393 addl %esi, %eax # e += rotl32(a,5)
394 rorl $2, %ecx # b = rotl32(b,30)
395# 25
396 xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15]
397 xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15]
398 xorl %r11d, %r9d # ^W[(n+2) & 15]
399 roll %r9d #
400 movl %ecx, %edi # c
401 xorl %edx, %edi # ^d
402 xorl %ebx, %edi # ^b
403 leal 0x6ED9EBA1(%rbp,%r9), %ebp # e += RCONST + W[n & 15]
404 addl %edi, %ebp # e += (c ^ d ^ b)
405 movl %eax, %esi #
406 roll $5, %esi # rotl32(a,5)
407 addl %esi, %ebp # e += rotl32(a,5)
408 rorl $2, %ebx # b = rotl32(b,30)
409# 26
410 xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15]
411 xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15]
412 xorl %r12d, %r10d # ^W[(n+2) & 15]
413 roll %r10d #
414 movl %ebx, %edi # c
415 xorl %ecx, %edi # ^d
416 xorl %eax, %edi # ^b
417 leal 0x6ED9EBA1(%rdx,%r10), %edx # e += RCONST + W[n & 15]
418 addl %edi, %edx # e += (c ^ d ^ b)
419 movl %ebp, %esi #
420 roll $5, %esi # rotl32(a,5)
421 addl %esi, %edx # e += rotl32(a,5)
422 rorl $2, %eax # b = rotl32(b,30)
423# 27
424 xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15]
425 xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15]
426 xorl %r13d, %r11d # ^W[(n+2) & 15]
427 roll %r11d #
428 movl %eax, %edi # c
429 xorl %ebx, %edi # ^d
430 xorl %ebp, %edi # ^b
431 leal 0x6ED9EBA1(%rcx,%r11), %ecx # e += RCONST + W[n & 15]
432 addl %edi, %ecx # e += (c ^ d ^ b)
433 movl %edx, %esi #
434 roll $5, %esi # rotl32(a,5)
435 addl %esi, %ecx # e += rotl32(a,5)
436 rorl $2, %ebp # b = rotl32(b,30)
437# 28
438 xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15]
439 xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15]
440 xorl %r14d, %r12d # ^W[(n+2) & 15]
441 roll %r12d #
442 movl %ebp, %edi # c
443 xorl %eax, %edi # ^d
444 xorl %edx, %edi # ^b
445 leal 0x6ED9EBA1(%rbx,%r12), %ebx # e += RCONST + W[n & 15]
446 addl %edi, %ebx # e += (c ^ d ^ b)
447 movl %ecx, %esi #
448 roll $5, %esi # rotl32(a,5)
449 addl %esi, %ebx # e += rotl32(a,5)
450 rorl $2, %edx # b = rotl32(b,30)
451# 29
452 xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15]
453 xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15]
454 xorl %r15d, %r13d # ^W[(n+2) & 15]
455 roll %r13d #
456 movl %edx, %edi # c
457 xorl %ebp, %edi # ^d
458 xorl %ecx, %edi # ^b
459 leal 0x6ED9EBA1(%rax,%r13), %eax # e += RCONST + W[n & 15]
460 addl %edi, %eax # e += (c ^ d ^ b)
461 movl %ebx, %esi #
462 roll $5, %esi # rotl32(a,5)
463 addl %esi, %eax # e += rotl32(a,5)
464 rorl $2, %ecx # b = rotl32(b,30)
465# 30
466 xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15]
467 xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15]
468 xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15]
469 roll %r14d #
470 movl %ecx, %edi # c
471 xorl %edx, %edi # ^d
472 xorl %ebx, %edi # ^b
473 leal 0x6ED9EBA1(%rbp,%r14), %ebp # e += RCONST + W[n & 15]
474 addl %edi, %ebp # e += (c ^ d ^ b)
475 movl %eax, %esi #
476 roll $5, %esi # rotl32(a,5)
477 addl %esi, %ebp # e += rotl32(a,5)
478 rorl $2, %ebx # b = rotl32(b,30)
479# 31
480 xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15]
481 xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15]
482 xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15]
483 roll %r15d #
484 movl %ebx, %edi # c
485 xorl %ecx, %edi # ^d
486 xorl %eax, %edi # ^b
487 leal 0x6ED9EBA1(%rdx,%r15), %edx # e += RCONST + W[n & 15]
488 addl %edi, %edx # e += (c ^ d ^ b)
489 movl %ebp, %esi #
490 roll $5, %esi # rotl32(a,5)
491 addl %esi, %edx # e += rotl32(a,5)
492 rorl $2, %eax # b = rotl32(b,30)
493# 32
494 movl %r13d, %esi # W[(n+13) & 15]
495 xorl %r8d, %esi # ^W[(n+8) & 15]
496 xorl -32+4*2(%rsp), %esi # ^W[(n+2) & 15]
497 xorl -32+4*0(%rsp), %esi # ^W[n & 15]
498 roll %esi #
499 movl %esi, -32+4*0(%rsp) # store to W[n & 15]
500 movl %eax, %edi # c
501 xorl %ebx, %edi # ^d
502 xorl %ebp, %edi # ^b
503 leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15]
504 addl %edi, %ecx # e += (c ^ d ^ b)
505 movl %edx, %esi #
506 roll $5, %esi # rotl32(a,5)
507 addl %esi, %ecx # e += rotl32(a,5)
508 rorl $2, %ebp # b = rotl32(b,30)
509# 33
510 movl %r14d, %esi # W[(n+13) & 15]
511 xorl %r9d, %esi # ^W[(n+8) & 15]
512 xorl -32+4*3(%rsp), %esi # ^W[(n+2) & 15]
513 xorl -32+4*1(%rsp), %esi # ^W[n & 15]
514 roll %esi #
515 movl %esi, -32+4*1(%rsp) # store to W[n & 15]
516 movl %ebp, %edi # c
517 xorl %eax, %edi # ^d
518 xorl %edx, %edi # ^b
519 leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15]
520 addl %edi, %ebx # e += (c ^ d ^ b)
521 movl %ecx, %esi #
522 roll $5, %esi # rotl32(a,5)
523 addl %esi, %ebx # e += rotl32(a,5)
524 rorl $2, %edx # b = rotl32(b,30)
525# 34
526 movl %r15d, %esi # W[(n+13) & 15]
527 xorl %r10d, %esi # ^W[(n+8) & 15]
528 xorl -32+4*4(%rsp), %esi # ^W[(n+2) & 15]
529 xorl -32+4*2(%rsp), %esi # ^W[n & 15]
530 roll %esi #
531 movl %esi, -32+4*2(%rsp) # store to W[n & 15]
532 movl %edx, %edi # c
533 xorl %ebp, %edi # ^d
534 xorl %ecx, %edi # ^b
535 leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + W[n & 15]
536 addl %edi, %eax # e += (c ^ d ^ b)
537 movl %ebx, %esi #
538 roll $5, %esi # rotl32(a,5)
539 addl %esi, %eax # e += rotl32(a,5)
540 rorl $2, %ecx # b = rotl32(b,30)
541# 35
542 movl -32+4*0(%rsp), %esi # W[(n+13) & 15]
543 xorl %r11d, %esi # ^W[(n+8) & 15]
544 xorl -32+4*5(%rsp), %esi # ^W[(n+2) & 15]
545 xorl -32+4*3(%rsp), %esi # ^W[n & 15]
546 roll %esi #
547 movl %esi, -32+4*3(%rsp) # store to W[n & 15]
548 movl %ecx, %edi # c
549 xorl %edx, %edi # ^d
550 xorl %ebx, %edi # ^b
551 leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + W[n & 15]
552 addl %edi, %ebp # e += (c ^ d ^ b)
553 movl %eax, %esi #
554 roll $5, %esi # rotl32(a,5)
555 addl %esi, %ebp # e += rotl32(a,5)
556 rorl $2, %ebx # b = rotl32(b,30)
557# 36
558 movl -32+4*1(%rsp), %esi # W[(n+13) & 15]
559 xorl %r12d, %esi # ^W[(n+8) & 15]
560 xorl -32+4*6(%rsp), %esi # ^W[(n+2) & 15]
561 xorl -32+4*4(%rsp), %esi # ^W[n & 15]
562 roll %esi #
563 movl %esi, -32+4*4(%rsp) # store to W[n & 15]
564 movl %ebx, %edi # c
565 xorl %ecx, %edi # ^d
566 xorl %eax, %edi # ^b
567 leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + W[n & 15]
568 addl %edi, %edx # e += (c ^ d ^ b)
569 movl %ebp, %esi #
570 roll $5, %esi # rotl32(a,5)
571 addl %esi, %edx # e += rotl32(a,5)
572 rorl $2, %eax # b = rotl32(b,30)
573# 37
574 movl -32+4*2(%rsp), %esi # W[(n+13) & 15]
575 xorl %r13d, %esi # ^W[(n+8) & 15]
576 xorl -32+4*7(%rsp), %esi # ^W[(n+2) & 15]
577 xorl -32+4*5(%rsp), %esi # ^W[n & 15]
578 roll %esi #
579 movl %esi, -32+4*5(%rsp) # store to W[n & 15]
580 movl %eax, %edi # c
581 xorl %ebx, %edi # ^d
582 xorl %ebp, %edi # ^b
583 leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15]
584 addl %edi, %ecx # e += (c ^ d ^ b)
585 movl %edx, %esi #
586 roll $5, %esi # rotl32(a,5)
587 addl %esi, %ecx # e += rotl32(a,5)
588 rorl $2, %ebp # b = rotl32(b,30)
589# 38
590 movl -32+4*3(%rsp), %esi # W[(n+13) & 15]
591 xorl %r14d, %esi # ^W[(n+8) & 15]
592 xorl %r8d, %esi # ^W[(n+2) & 15]
593 xorl -32+4*6(%rsp), %esi # ^W[n & 15]
594 roll %esi #
595 movl %esi, -32+4*6(%rsp) # store to W[n & 15]
596 movl %ebp, %edi # c
597 xorl %eax, %edi # ^d
598 xorl %edx, %edi # ^b
599 leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15]
600 addl %edi, %ebx # e += (c ^ d ^ b)
601 movl %ecx, %esi #
602 roll $5, %esi # rotl32(a,5)
603 addl %esi, %ebx # e += rotl32(a,5)
604 rorl $2, %edx # b = rotl32(b,30)
605# 39
606 movl -32+4*4(%rsp), %esi # W[(n+13) & 15]
607 xorl %r15d, %esi # ^W[(n+8) & 15]
608 xorl %r9d, %esi # ^W[(n+2) & 15]
609 xorl -32+4*7(%rsp), %esi # ^W[n & 15]
610 roll %esi #
611 movl %esi, -32+4*7(%rsp) # store to W[n & 15]
612 movl %edx, %edi # c
613 xorl %ebp, %edi # ^d
614 xorl %ecx, %edi # ^b
615 leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + W[n & 15]
616 addl %edi, %eax # e += (c ^ d ^ b)
617 movl %ebx, %esi #
618 roll $5, %esi # rotl32(a,5)
619 addl %esi, %eax # e += rotl32(a,5)
620 rorl $2, %ecx # b = rotl32(b,30)
621# 40
622 movl %ebx, %edi # di: b
623 movl %ebx, %esi # si: b
624 orl %ecx, %edi # di: b | c
625 andl %ecx, %esi # si: b & c
626 andl %edx, %edi # di: (b | c) & d
627 orl %esi, %edi # ((b | c) & d) | (b & c)
628 xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15]
629 xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15]
630 xorl %r10d, %r8d # ^W[(n+2) & 15]
631 roll %r8d #
632 addl %edi, %ebp # += ((b | c) & d) | (b & c)
633 leal -0x70E44324(%rbp,%r8), %ebp # e += RCONST + W[n & 15]
634 movl %eax, %esi #
635 roll $5, %esi # rotl32(a,5)
636 addl %esi, %ebp # e += rotl32(a,5)
637 rorl $2, %ebx # b = rotl32(b,30)
638# 41
639 movl %eax, %edi # di: b
640 movl %eax, %esi # si: b
641 orl %ebx, %edi # di: b | c
642 andl %ebx, %esi # si: b & c
643 andl %ecx, %edi # di: (b | c) & d
644 orl %esi, %edi # ((b | c) & d) | (b & c)
645 xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15]
646 xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15]
647 xorl %r11d, %r9d # ^W[(n+2) & 15]
648 roll %r9d #
649 addl %edi, %edx # += ((b | c) & d) | (b & c)
650 leal -0x70E44324(%rdx,%r9), %edx # e += RCONST + W[n & 15]
651 movl %ebp, %esi #
652 roll $5, %esi # rotl32(a,5)
653 addl %esi, %edx # e += rotl32(a,5)
654 rorl $2, %eax # b = rotl32(b,30)
655# 42
656 movl %ebp, %edi # di: b
657 movl %ebp, %esi # si: b
658 orl %eax, %edi # di: b | c
659 andl %eax, %esi # si: b & c
660 andl %ebx, %edi # di: (b | c) & d
661 orl %esi, %edi # ((b | c) & d) | (b & c)
662 xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15]
663 xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15]
664 xorl %r12d, %r10d # ^W[(n+2) & 15]
665 roll %r10d #
666 addl %edi, %ecx # += ((b | c) & d) | (b & c)
667 leal -0x70E44324(%rcx,%r10), %ecx # e += RCONST + W[n & 15]
668 movl %edx, %esi #
669 roll $5, %esi # rotl32(a,5)
670 addl %esi, %ecx # e += rotl32(a,5)
671 rorl $2, %ebp # b = rotl32(b,30)
672# 43
673 movl %edx, %edi # di: b
674 movl %edx, %esi # si: b
675 orl %ebp, %edi # di: b | c
676 andl %ebp, %esi # si: b & c
677 andl %eax, %edi # di: (b | c) & d
678 orl %esi, %edi # ((b | c) & d) | (b & c)
679 xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15]
680 xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15]
681 xorl %r13d, %r11d # ^W[(n+2) & 15]
682 roll %r11d #
683 addl %edi, %ebx # += ((b | c) & d) | (b & c)
684 leal -0x70E44324(%rbx,%r11), %ebx # e += RCONST + W[n & 15]
685 movl %ecx, %esi #
686 roll $5, %esi # rotl32(a,5)
687 addl %esi, %ebx # e += rotl32(a,5)
688 rorl $2, %edx # b = rotl32(b,30)
689# 44
690 movl %ecx, %edi # di: b
691 movl %ecx, %esi # si: b
692 orl %edx, %edi # di: b | c
693 andl %edx, %esi # si: b & c
694 andl %ebp, %edi # di: (b | c) & d
695 orl %esi, %edi # ((b | c) & d) | (b & c)
696 xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15]
697 xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15]
698 xorl %r14d, %r12d # ^W[(n+2) & 15]
699 roll %r12d #
700 addl %edi, %eax # += ((b | c) & d) | (b & c)
701 leal -0x70E44324(%rax,%r12), %eax # e += RCONST + W[n & 15]
702 movl %ebx, %esi #
703 roll $5, %esi # rotl32(a,5)
704 addl %esi, %eax # e += rotl32(a,5)
705 rorl $2, %ecx # b = rotl32(b,30)
706# 45
707 movl %ebx, %edi # di: b
708 movl %ebx, %esi # si: b
709 orl %ecx, %edi # di: b | c
710 andl %ecx, %esi # si: b & c
711 andl %edx, %edi # di: (b | c) & d
712 orl %esi, %edi # ((b | c) & d) | (b & c)
713 xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15]
714 xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15]
715 xorl %r15d, %r13d # ^W[(n+2) & 15]
716 roll %r13d #
717 addl %edi, %ebp # += ((b | c) & d) | (b & c)
718 leal -0x70E44324(%rbp,%r13), %ebp # e += RCONST + W[n & 15]
719 movl %eax, %esi #
720 roll $5, %esi # rotl32(a,5)
721 addl %esi, %ebp # e += rotl32(a,5)
722 rorl $2, %ebx # b = rotl32(b,30)
723# 46
724 movl %eax, %edi # di: b
725 movl %eax, %esi # si: b
726 orl %ebx, %edi # di: b | c
727 andl %ebx, %esi # si: b & c
728 andl %ecx, %edi # di: (b | c) & d
729 orl %esi, %edi # ((b | c) & d) | (b & c)
730 xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15]
731 xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15]
732 xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15]
733 roll %r14d #
734 addl %edi, %edx # += ((b | c) & d) | (b & c)
735 leal -0x70E44324(%rdx,%r14), %edx # e += RCONST + W[n & 15]
736 movl %ebp, %esi #
737 roll $5, %esi # rotl32(a,5)
738 addl %esi, %edx # e += rotl32(a,5)
739 rorl $2, %eax # b = rotl32(b,30)
740# 47
741 movl %ebp, %edi # di: b
742 movl %ebp, %esi # si: b
743 orl %eax, %edi # di: b | c
744 andl %eax, %esi # si: b & c
745 andl %ebx, %edi # di: (b | c) & d
746 orl %esi, %edi # ((b | c) & d) | (b & c)
747 xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15]
748 xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15]
749 xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15]
750 roll %r15d #
751 addl %edi, %ecx # += ((b | c) & d) | (b & c)
752 leal -0x70E44324(%rcx,%r15), %ecx # e += RCONST + W[n & 15]
753 movl %edx, %esi #
754 roll $5, %esi # rotl32(a,5)
755 addl %esi, %ecx # e += rotl32(a,5)
756 rorl $2, %ebp # b = rotl32(b,30)
757# 48
758 movl %edx, %edi # di: b
759 movl %edx, %esi # si: b
760 orl %ebp, %edi # di: b | c
761 andl %ebp, %esi # si: b & c
762 andl %eax, %edi # di: (b | c) & d
763 orl %esi, %edi # ((b | c) & d) | (b & c)
764 movl %r13d, %esi # W[(n+13) & 15]
765 xorl %r8d, %esi # ^W[(n+8) & 15]
766 xorl -32+4*2(%rsp), %esi # ^W[(n+2) & 15]
767 xorl -32+4*0(%rsp), %esi # ^W[n & 15]
768 roll %esi #
769 movl %esi, -32+4*0(%rsp) # store to W[n & 15]
770 addl %edi, %ebx # += ((b | c) & d) | (b & c)
771 leal -0x70E44324(%rbx,%rsi), %ebx # e += RCONST + W[n & 15]
772 movl %ecx, %esi #
773 roll $5, %esi # rotl32(a,5)
774 addl %esi, %ebx # e += rotl32(a,5)
775 rorl $2, %edx # b = rotl32(b,30)
776# 49
777 movl %ecx, %edi # di: b
778 movl %ecx, %esi # si: b
779 orl %edx, %edi # di: b | c
780 andl %edx, %esi # si: b & c
781 andl %ebp, %edi # di: (b | c) & d
782 orl %esi, %edi # ((b | c) & d) | (b & c)
783 movl %r14d, %esi # W[(n+13) & 15]
784 xorl %r9d, %esi # ^W[(n+8) & 15]
785 xorl -32+4*3(%rsp), %esi # ^W[(n+2) & 15]
786 xorl -32+4*1(%rsp), %esi # ^W[n & 15]
787 roll %esi #
788 movl %esi, -32+4*1(%rsp) # store to W[n & 15]
789 addl %edi, %eax # += ((b | c) & d) | (b & c)
790 leal -0x70E44324(%rax,%rsi), %eax # e += RCONST + W[n & 15]
791 movl %ebx, %esi #
792 roll $5, %esi # rotl32(a,5)
793 addl %esi, %eax # e += rotl32(a,5)
794 rorl $2, %ecx # b = rotl32(b,30)
795# 50
796 movl %ebx, %edi # di: b
797 movl %ebx, %esi # si: b
798 orl %ecx, %edi # di: b | c
799 andl %ecx, %esi # si: b & c
800 andl %edx, %edi # di: (b | c) & d
801 orl %esi, %edi # ((b | c) & d) | (b & c)
802 movl %r15d, %esi # W[(n+13) & 15]
803 xorl %r10d, %esi # ^W[(n+8) & 15]
804 xorl -32+4*4(%rsp), %esi # ^W[(n+2) & 15]
805 xorl -32+4*2(%rsp), %esi # ^W[n & 15]
806 roll %esi #
807 movl %esi, -32+4*2(%rsp) # store to W[n & 15]
808 addl %edi, %ebp # += ((b | c) & d) | (b & c)
809 leal -0x70E44324(%rbp,%rsi), %ebp # e += RCONST + W[n & 15]
810 movl %eax, %esi #
811 roll $5, %esi # rotl32(a,5)
812 addl %esi, %ebp # e += rotl32(a,5)
813 rorl $2, %ebx # b = rotl32(b,30)
814# 51
815 movl %eax, %edi # di: b
816 movl %eax, %esi # si: b
817 orl %ebx, %edi # di: b | c
818 andl %ebx, %esi # si: b & c
819 andl %ecx, %edi # di: (b | c) & d
820 orl %esi, %edi # ((b | c) & d) | (b & c)
821 movl -32+4*0(%rsp), %esi # W[(n+13) & 15]
822 xorl %r11d, %esi # ^W[(n+8) & 15]
823 xorl -32+4*5(%rsp), %esi # ^W[(n+2) & 15]
824 xorl -32+4*3(%rsp), %esi # ^W[n & 15]
825 roll %esi #
826 movl %esi, -32+4*3(%rsp) # store to W[n & 15]
827 addl %edi, %edx # += ((b | c) & d) | (b & c)
828 leal -0x70E44324(%rdx,%rsi), %edx # e += RCONST + W[n & 15]
829 movl %ebp, %esi #
830 roll $5, %esi # rotl32(a,5)
831 addl %esi, %edx # e += rotl32(a,5)
832 rorl $2, %eax # b = rotl32(b,30)
833# 52
834 movl %ebp, %edi # di: b
835 movl %ebp, %esi # si: b
836 orl %eax, %edi # di: b | c
837 andl %eax, %esi # si: b & c
838 andl %ebx, %edi # di: (b | c) & d
839 orl %esi, %edi # ((b | c) & d) | (b & c)
840 movl -32+4*1(%rsp), %esi # W[(n+13) & 15]
841 xorl %r12d, %esi # ^W[(n+8) & 15]
842 xorl -32+4*6(%rsp), %esi # ^W[(n+2) & 15]
843 xorl -32+4*4(%rsp), %esi # ^W[n & 15]
844 roll %esi #
845 movl %esi, -32+4*4(%rsp) # store to W[n & 15]
846 addl %edi, %ecx # += ((b | c) & d) | (b & c)
847 leal -0x70E44324(%rcx,%rsi), %ecx # e += RCONST + W[n & 15]
848 movl %edx, %esi #
849 roll $5, %esi # rotl32(a,5)
850 addl %esi, %ecx # e += rotl32(a,5)
851 rorl $2, %ebp # b = rotl32(b,30)
852# 53
853 movl %edx, %edi # di: b
854 movl %edx, %esi # si: b
855 orl %ebp, %edi # di: b | c
856 andl %ebp, %esi # si: b & c
857 andl %eax, %edi # di: (b | c) & d
858 orl %esi, %edi # ((b | c) & d) | (b & c)
859 movl -32+4*2(%rsp), %esi # W[(n+13) & 15]
860 xorl %r13d, %esi # ^W[(n+8) & 15]
861 xorl -32+4*7(%rsp), %esi # ^W[(n+2) & 15]
862 xorl -32+4*5(%rsp), %esi # ^W[n & 15]
863 roll %esi #
864 movl %esi, -32+4*5(%rsp) # store to W[n & 15]
865 addl %edi, %ebx # += ((b | c) & d) | (b & c)
866 leal -0x70E44324(%rbx,%rsi), %ebx # e += RCONST + W[n & 15]
867 movl %ecx, %esi #
868 roll $5, %esi # rotl32(a,5)
869 addl %esi, %ebx # e += rotl32(a,5)
870 rorl $2, %edx # b = rotl32(b,30)
871# 54
872 movl %ecx, %edi # di: b
873 movl %ecx, %esi # si: b
874 orl %edx, %edi # di: b | c
875 andl %edx, %esi # si: b & c
876 andl %ebp, %edi # di: (b | c) & d
877 orl %esi, %edi # ((b | c) & d) | (b & c)
878 movl -32+4*3(%rsp), %esi # W[(n+13) & 15]
879 xorl %r14d, %esi # ^W[(n+8) & 15]
880 xorl %r8d, %esi # ^W[(n+2) & 15]
881 xorl -32+4*6(%rsp), %esi # ^W[n & 15]
882 roll %esi #
883 movl %esi, -32+4*6(%rsp) # store to W[n & 15]
884 addl %edi, %eax # += ((b | c) & d) | (b & c)
885 leal -0x70E44324(%rax,%rsi), %eax # e += RCONST + W[n & 15]
886 movl %ebx, %esi #
887 roll $5, %esi # rotl32(a,5)
888 addl %esi, %eax # e += rotl32(a,5)
889 rorl $2, %ecx # b = rotl32(b,30)
890# 55
891 movl %ebx, %edi # di: b
892 movl %ebx, %esi # si: b
893 orl %ecx, %edi # di: b | c
894 andl %ecx, %esi # si: b & c
895 andl %edx, %edi # di: (b | c) & d
896 orl %esi, %edi # ((b | c) & d) | (b & c)
897 movl -32+4*4(%rsp), %esi # W[(n+13) & 15]
898 xorl %r15d, %esi # ^W[(n+8) & 15]
899 xorl %r9d, %esi # ^W[(n+2) & 15]
900 xorl -32+4*7(%rsp), %esi # ^W[n & 15]
901 roll %esi #
902 movl %esi, -32+4*7(%rsp) # store to W[n & 15]
903 addl %edi, %ebp # += ((b | c) & d) | (b & c)
904 leal -0x70E44324(%rbp,%rsi), %ebp # e += RCONST + W[n & 15]
905 movl %eax, %esi #
906 roll $5, %esi # rotl32(a,5)
907 addl %esi, %ebp # e += rotl32(a,5)
908 rorl $2, %ebx # b = rotl32(b,30)
909# 56
910 movl %eax, %edi # di: b
911 movl %eax, %esi # si: b
912 orl %ebx, %edi # di: b | c
913 andl %ebx, %esi # si: b & c
914 andl %ecx, %edi # di: (b | c) & d
915 orl %esi, %edi # ((b | c) & d) | (b & c)
916 xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15]
917 xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15]
918 xorl %r10d, %r8d # ^W[(n+2) & 15]
919 roll %r8d #
920 addl %edi, %edx # += ((b | c) & d) | (b & c)
921 leal -0x70E44324(%rdx,%r8), %edx # e += RCONST + W[n & 15]
922 movl %ebp, %esi #
923 roll $5, %esi # rotl32(a,5)
924 addl %esi, %edx # e += rotl32(a,5)
925 rorl $2, %eax # b = rotl32(b,30)
926# 57
927 movl %ebp, %edi # di: b
928 movl %ebp, %esi # si: b
929 orl %eax, %edi # di: b | c
930 andl %eax, %esi # si: b & c
931 andl %ebx, %edi # di: (b | c) & d
932 orl %esi, %edi # ((b | c) & d) | (b & c)
933 xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15]
934 xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15]
935 xorl %r11d, %r9d # ^W[(n+2) & 15]
936 roll %r9d #
937 addl %edi, %ecx # += ((b | c) & d) | (b & c)
938 leal -0x70E44324(%rcx,%r9), %ecx # e += RCONST + W[n & 15]
939 movl %edx, %esi #
940 roll $5, %esi # rotl32(a,5)
941 addl %esi, %ecx # e += rotl32(a,5)
942 rorl $2, %ebp # b = rotl32(b,30)
943# 58
944 movl %edx, %edi # di: b
945 movl %edx, %esi # si: b
946 orl %ebp, %edi # di: b | c
947 andl %ebp, %esi # si: b & c
948 andl %eax, %edi # di: (b | c) & d
949 orl %esi, %edi # ((b | c) & d) | (b & c)
950 xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15]
951 xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15]
952 xorl %r12d, %r10d # ^W[(n+2) & 15]
953 roll %r10d #
954 addl %edi, %ebx # += ((b | c) & d) | (b & c)
955 leal -0x70E44324(%rbx,%r10), %ebx # e += RCONST + W[n & 15]
956 movl %ecx, %esi #
957 roll $5, %esi # rotl32(a,5)
958 addl %esi, %ebx # e += rotl32(a,5)
959 rorl $2, %edx # b = rotl32(b,30)
960# 59
961 movl %ecx, %edi # di: b
962 movl %ecx, %esi # si: b
963 orl %edx, %edi # di: b | c
964 andl %edx, %esi # si: b & c
965 andl %ebp, %edi # di: (b | c) & d
966 orl %esi, %edi # ((b | c) & d) | (b & c)
967 xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15]
968 xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15]
969 xorl %r13d, %r11d # ^W[(n+2) & 15]
970 roll %r11d #
971 addl %edi, %eax # += ((b | c) & d) | (b & c)
972 leal -0x70E44324(%rax,%r11), %eax # e += RCONST + W[n & 15]
973 movl %ebx, %esi #
974 roll $5, %esi # rotl32(a,5)
975 addl %esi, %eax # e += rotl32(a,5)
976 rorl $2, %ecx # b = rotl32(b,30)
977# 60
978 xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15]
979 xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15]
980 xorl %r14d, %r12d # ^W[(n+2) & 15]
981 roll %r12d #
982 movl %ecx, %edi # c
983 xorl %edx, %edi # ^d
984 xorl %ebx, %edi # ^b
985 leal -0x359D3E2A(%rbp,%r12), %ebp # e += RCONST + W[n & 15]
986 addl %edi, %ebp # e += (c ^ d ^ b)
987 movl %eax, %esi #
988 roll $5, %esi # rotl32(a,5)
989 addl %esi, %ebp # e += rotl32(a,5)
990 rorl $2, %ebx # b = rotl32(b,30)
991# 61
992 xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15]
993 xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15]
994 xorl %r15d, %r13d # ^W[(n+2) & 15]
995 roll %r13d #
996 movl %ebx, %edi # c
997 xorl %ecx, %edi # ^d
998 xorl %eax, %edi # ^b
999 leal -0x359D3E2A(%rdx,%r13), %edx # e += RCONST + W[n & 15]
1000 addl %edi, %edx # e += (c ^ d ^ b)
1001 movl %ebp, %esi #
1002 roll $5, %esi # rotl32(a,5)
1003 addl %esi, %edx # e += rotl32(a,5)
1004 rorl $2, %eax # b = rotl32(b,30)
1005# 62
1006 xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15]
1007 xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15]
1008 xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15]
1009 roll %r14d #
1010 movl %eax, %edi # c
1011 xorl %ebx, %edi # ^d
1012 xorl %ebp, %edi # ^b
1013 leal -0x359D3E2A(%rcx,%r14), %ecx # e += RCONST + W[n & 15]
1014 addl %edi, %ecx # e += (c ^ d ^ b)
1015 movl %edx, %esi #
1016 roll $5, %esi # rotl32(a,5)
1017 addl %esi, %ecx # e += rotl32(a,5)
1018 rorl $2, %ebp # b = rotl32(b,30)
1019# 63
1020 xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15]
1021 xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15]
1022 xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15]
1023 roll %r15d #
1024 movl %ebp, %edi # c
1025 xorl %eax, %edi # ^d
1026 xorl %edx, %edi # ^b
1027 leal -0x359D3E2A(%rbx,%r15), %ebx # e += RCONST + W[n & 15]
1028 addl %edi, %ebx # e += (c ^ d ^ b)
1029 movl %ecx, %esi #
1030 roll $5, %esi # rotl32(a,5)
1031 addl %esi, %ebx # e += rotl32(a,5)
1032 rorl $2, %edx # b = rotl32(b,30)
1033# 64
1034 movl %r13d, %esi # W[(n+13) & 15]
1035 xorl %r8d, %esi # ^W[(n+8) & 15]
1036 xorl -32+4*2(%rsp), %esi # ^W[(n+2) & 15]
1037 xorl -32+4*0(%rsp), %esi # ^W[n & 15]
1038 roll %esi #
1039 movl %esi, -32+4*0(%rsp) # store to W[n & 15]
1040 movl %edx, %edi # c
1041 xorl %ebp, %edi # ^d
1042 xorl %ecx, %edi # ^b
1043 leal -0x359D3E2A(%rax,%rsi), %eax # e += RCONST + W[n & 15]
1044 addl %edi, %eax # e += (c ^ d ^ b)
1045 movl %ebx, %esi #
1046 roll $5, %esi # rotl32(a,5)
1047 addl %esi, %eax # e += rotl32(a,5)
1048 rorl $2, %ecx # b = rotl32(b,30)
1049# 65
1050 movl %r14d, %esi # W[(n+13) & 15]
1051 xorl %r9d, %esi # ^W[(n+8) & 15]
1052 xorl -32+4*3(%rsp), %esi # ^W[(n+2) & 15]
1053 xorl -32+4*1(%rsp), %esi # ^W[n & 15]
1054 roll %esi #
1055 movl %esi, -32+4*1(%rsp) # store to W[n & 15]
1056 movl %ecx, %edi # c
1057 xorl %edx, %edi # ^d
1058 xorl %ebx, %edi # ^b
1059 leal -0x359D3E2A(%rbp,%rsi), %ebp # e += RCONST + W[n & 15]
1060 addl %edi, %ebp # e += (c ^ d ^ b)
1061 movl %eax, %esi #
1062 roll $5, %esi # rotl32(a,5)
1063 addl %esi, %ebp # e += rotl32(a,5)
1064 rorl $2, %ebx # b = rotl32(b,30)
1065# 66
1066 movl %r15d, %esi # W[(n+13) & 15]
1067 xorl %r10d, %esi # ^W[(n+8) & 15]
1068 xorl -32+4*4(%rsp), %esi # ^W[(n+2) & 15]
1069 xorl -32+4*2(%rsp), %esi # ^W[n & 15]
1070 roll %esi #
1071 movl %esi, -32+4*2(%rsp) # store to W[n & 15]
1072 movl %ebx, %edi # c
1073 xorl %ecx, %edi # ^d
1074 xorl %eax, %edi # ^b
1075 leal -0x359D3E2A(%rdx,%rsi), %edx # e += RCONST + W[n & 15]
1076 addl %edi, %edx # e += (c ^ d ^ b)
1077 movl %ebp, %esi #
1078 roll $5, %esi # rotl32(a,5)
1079 addl %esi, %edx # e += rotl32(a,5)
1080 rorl $2, %eax # b = rotl32(b,30)
1081# 67
1082 movl -32+4*0(%rsp), %esi # W[(n+13) & 15]
1083 xorl %r11d, %esi # ^W[(n+8) & 15]
1084 xorl -32+4*5(%rsp), %esi # ^W[(n+2) & 15]
1085 xorl -32+4*3(%rsp), %esi # ^W[n & 15]
1086 roll %esi #
1087 movl %esi, -32+4*3(%rsp) # store to W[n & 15]
1088 movl %eax, %edi # c
1089 xorl %ebx, %edi # ^d
1090 xorl %ebp, %edi # ^b
1091 leal -0x359D3E2A(%rcx,%rsi), %ecx # e += RCONST + W[n & 15]
1092 addl %edi, %ecx # e += (c ^ d ^ b)
1093 movl %edx, %esi #
1094 roll $5, %esi # rotl32(a,5)
1095 addl %esi, %ecx # e += rotl32(a,5)
1096 rorl $2, %ebp # b = rotl32(b,30)
1097# 68
1098 movl -32+4*1(%rsp), %esi # W[(n+13) & 15]
1099 xorl %r12d, %esi # ^W[(n+8) & 15]
1100 xorl -32+4*6(%rsp), %esi # ^W[(n+2) & 15]
1101 xorl -32+4*4(%rsp), %esi # ^W[n & 15]
1102 roll %esi #
1103 movl %esi, -32+4*4(%rsp) # store to W[n & 15]
1104 movl %ebp, %edi # c
1105 xorl %eax, %edi # ^d
1106 xorl %edx, %edi # ^b
1107 leal -0x359D3E2A(%rbx,%rsi), %ebx # e += RCONST + W[n & 15]
1108 addl %edi, %ebx # e += (c ^ d ^ b)
1109 movl %ecx, %esi #
1110 roll $5, %esi # rotl32(a,5)
1111 addl %esi, %ebx # e += rotl32(a,5)
1112 rorl $2, %edx # b = rotl32(b,30)
1113# 69
1114 movl -32+4*2(%rsp), %esi # W[(n+13) & 15]
1115 xorl %r13d, %esi # ^W[(n+8) & 15]
1116 xorl -32+4*7(%rsp), %esi # ^W[(n+2) & 15]
1117 xorl -32+4*5(%rsp), %esi # ^W[n & 15]
1118 roll %esi #
1119 movl %esi, -32+4*5(%rsp) # store to W[n & 15]
1120 movl %edx, %edi # c
1121 xorl %ebp, %edi # ^d
1122 xorl %ecx, %edi # ^b
1123 leal -0x359D3E2A(%rax,%rsi), %eax # e += RCONST + W[n & 15]
1124 addl %edi, %eax # e += (c ^ d ^ b)
1125 movl %ebx, %esi #
1126 roll $5, %esi # rotl32(a,5)
1127 addl %esi, %eax # e += rotl32(a,5)
1128 rorl $2, %ecx # b = rotl32(b,30)
1129# 70
1130 movl -32+4*3(%rsp), %esi # W[(n+13) & 15]
1131 xorl %r14d, %esi # ^W[(n+8) & 15]
1132 xorl %r8d, %esi # ^W[(n+2) & 15]
1133 xorl -32+4*6(%rsp), %esi # ^W[n & 15]
1134 roll %esi #
1135 movl %esi, -32+4*6(%rsp) # store to W[n & 15]
1136 movl %ecx, %edi # c
1137 xorl %edx, %edi # ^d
1138 xorl %ebx, %edi # ^b
1139 leal -0x359D3E2A(%rbp,%rsi), %ebp # e += RCONST + W[n & 15]
1140 addl %edi, %ebp # e += (c ^ d ^ b)
1141 movl %eax, %esi #
1142 roll $5, %esi # rotl32(a,5)
1143 addl %esi, %ebp # e += rotl32(a,5)
1144 rorl $2, %ebx # b = rotl32(b,30)
1145# 71
1146 movl -32+4*4(%rsp), %esi # W[(n+13) & 15]
1147 xorl %r15d, %esi # ^W[(n+8) & 15]
1148 xorl %r9d, %esi # ^W[(n+2) & 15]
1149 xorl -32+4*7(%rsp), %esi # ^W[n & 15]
1150 roll %esi #
1151 movl %esi, -32+4*7(%rsp) # store to W[n & 15]
1152 movl %ebx, %edi # c
1153 xorl %ecx, %edi # ^d
1154 xorl %eax, %edi # ^b
1155 leal -0x359D3E2A(%rdx,%rsi), %edx # e += RCONST + W[n & 15]
1156 addl %edi, %edx # e += (c ^ d ^ b)
1157 movl %ebp, %esi #
1158 roll $5, %esi # rotl32(a,5)
1159 addl %esi, %edx # e += rotl32(a,5)
1160 rorl $2, %eax # b = rotl32(b,30)
1161# 72
1162 xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15]
1163 xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15]
1164 xorl %r10d, %r8d # ^W[(n+2) & 15]
1165 roll %r8d #
1166 movl %eax, %edi # c
1167 xorl %ebx, %edi # ^d
1168 xorl %ebp, %edi # ^b
1169 leal -0x359D3E2A(%rcx,%r8), %ecx # e += RCONST + W[n & 15]
1170 addl %edi, %ecx # e += (c ^ d ^ b)
1171 movl %edx, %esi #
1172 roll $5, %esi # rotl32(a,5)
1173 addl %esi, %ecx # e += rotl32(a,5)
1174 rorl $2, %ebp # b = rotl32(b,30)
1175# 73
1176 xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15]
1177 xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15]
1178 xorl %r11d, %r9d # ^W[(n+2) & 15]
1179 roll %r9d #
1180 movl %ebp, %edi # c
1181 xorl %eax, %edi # ^d
1182 xorl %edx, %edi # ^b
1183 leal -0x359D3E2A(%rbx,%r9), %ebx # e += RCONST + W[n & 15]
1184 addl %edi, %ebx # e += (c ^ d ^ b)
1185 movl %ecx, %esi #
1186 roll $5, %esi # rotl32(a,5)
1187 addl %esi, %ebx # e += rotl32(a,5)
1188 rorl $2, %edx # b = rotl32(b,30)
1189# 74
1190 xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15]
1191 xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15]
1192 xorl %r12d, %r10d # ^W[(n+2) & 15]
1193 roll %r10d #
1194 movl %edx, %edi # c
1195 xorl %ebp, %edi # ^d
1196 xorl %ecx, %edi # ^b
1197 leal -0x359D3E2A(%rax,%r10), %eax # e += RCONST + W[n & 15]
1198 addl %edi, %eax # e += (c ^ d ^ b)
1199 movl %ebx, %esi #
1200 roll $5, %esi # rotl32(a,5)
1201 addl %esi, %eax # e += rotl32(a,5)
1202 rorl $2, %ecx # b = rotl32(b,30)
1203# 75
1204 xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15]
1205 xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15]
1206 xorl %r13d, %r11d # ^W[(n+2) & 15]
1207 roll %r11d #
1208 movl %ecx, %edi # c
1209 xorl %edx, %edi # ^d
1210 xorl %ebx, %edi # ^b
1211 leal -0x359D3E2A(%rbp,%r11), %ebp # e += RCONST + W[n & 15]
1212 addl %edi, %ebp # e += (c ^ d ^ b)
1213 movl %eax, %esi #
1214 roll $5, %esi # rotl32(a,5)
1215 addl %esi, %ebp # e += rotl32(a,5)
1216 rorl $2, %ebx # b = rotl32(b,30)
1217# 76
1218 xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15]
1219 xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15]
1220 xorl %r14d, %r12d # ^W[(n+2) & 15]
1221 roll %r12d #
1222 movl %ebx, %edi # c
1223 xorl %ecx, %edi # ^d
1224 xorl %eax, %edi # ^b
1225 leal -0x359D3E2A(%rdx,%r12), %edx # e += RCONST + W[n & 15]
1226 addl %edi, %edx # e += (c ^ d ^ b)
1227 movl %ebp, %esi #
1228 roll $5, %esi # rotl32(a,5)
1229 addl %esi, %edx # e += rotl32(a,5)
1230 rorl $2, %eax # b = rotl32(b,30)
1231# 77
1232 xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15]
1233 xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15]
1234 xorl %r15d, %r13d # ^W[(n+2) & 15]
1235 roll %r13d #
1236 movl %eax, %edi # c
1237 xorl %ebx, %edi # ^d
1238 xorl %ebp, %edi # ^b
1239 leal -0x359D3E2A(%rcx,%r13), %ecx # e += RCONST + W[n & 15]
1240 addl %edi, %ecx # e += (c ^ d ^ b)
1241 movl %edx, %esi #
1242 roll $5, %esi # rotl32(a,5)
1243 addl %esi, %ecx # e += rotl32(a,5)
1244 rorl $2, %ebp # b = rotl32(b,30)
1245# 78
1246 xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15]
1247 xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15]
1248 xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15]
1249 roll %r14d #
1250 movl %ebp, %edi # c
1251 xorl %eax, %edi # ^d
1252 xorl %edx, %edi # ^b
1253 leal -0x359D3E2A(%rbx,%r14), %ebx # e += RCONST + W[n & 15]
1254 addl %edi, %ebx # e += (c ^ d ^ b)
1255 movl %ecx, %esi #
1256 roll $5, %esi # rotl32(a,5)
1257 addl %esi, %ebx # e += rotl32(a,5)
1258 rorl $2, %edx # b = rotl32(b,30)
1259# 79
1260 xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15]
1261 xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15]
1262 xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15]
1263 roll %r15d #
1264 movl %edx, %edi # c
1265 xorl %ebp, %edi # ^d
1266 xorl %ecx, %edi # ^b
1267 leal -0x359D3E2A(%rax,%r15), %eax # e += RCONST + W[n & 15]
1268 addl %edi, %eax # e += (c ^ d ^ b)
1269 movl %ebx, %esi #
1270 roll $5, %esi # rotl32(a,5)
1271 addl %esi, %eax # e += rotl32(a,5)
1272 rorl $2, %ecx # b = rotl32(b,30)
1273
1274 popq %rdi #
1275 popq %r12 #
1276 addl %eax, 80(%rdi) # ctx->hash[0] += a
1277 popq %r13 #
1278 addl %ebx, 84(%rdi) # ctx->hash[1] += b
1279 popq %r14 #
1280 addl %ecx, 88(%rdi) # ctx->hash[2] += c
1281 popq %r15 #
1282 addl %edx, 92(%rdi) # ctx->hash[3] += d
1283 popq %rbx #
1284 addl %ebp, 96(%rdi) # ctx->hash[4] += e
1285 popq %rbp #
1286
1287 ret
1288 .size sha1_process_block64, .-sha1_process_block64
1289#endif
diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh
new file mode 100755
index 000000000..7e50b64fb
--- /dev/null
+++ b/libbb/hash_md5_sha_x86-64.S.sh
@@ -0,0 +1,281 @@
1#!/bin/sh
2
3# We don't regenerate it on every "make" invocation - only by hand.
4# The reason is that the changes to generated code are difficult
5# to visualize by looking only at this script, it helps when the commit
6# also contains the diff of the generated file.
7exec >hash_md5_sha_x86-64.S
8
9echo \
10'### Generated by hash_md5_sha_x86-64.S.sh ###
11
12#if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__)
13 .section .text.sha1_process_block64,"ax",@progbits
14 .globl sha1_process_block64
15 .hidden sha1_process_block64
16 .type sha1_process_block64, @function
17
18 .balign 8 # allow decoders to fetch at least 5 first insns
19sha1_process_block64:
20 pushq %rbp # 1 byte insn
21 pushq %rbx # 1 byte insn
22 pushq %r15 # 2 byte insn
23 pushq %r14 # 2 byte insn
24 pushq %r13 # 2 byte insn
25 pushq %r12 # 2 byte insn
26 pushq %rdi # we need ctx at the end
27
28#Register and stack use:
29# eax..edx: a..d
30# ebp: e
31# esi,edi: temps
32# -32+4*n(%rsp),r8...r15: W[0..7,8..15]
33# (TODO: actually W[0..7] are used a bit more often, put _them_ into r8..r15?)
34 movl $3, %eax
351:
36 movq (%rdi,%rax,8), %rsi
37 bswapq %rsi
38 rolq $32, %rsi
39 movq %rsi, -32(%rsp,%rax,8)
40 decl %eax
41 jns 1b
42
43 movl 80(%rdi), %eax # a = ctx->hash[0]
44 movl 84(%rdi), %ebx # b = ctx->hash[1]
45 movl 88(%rdi), %ecx # c = ctx->hash[2]
46 movl 92(%rdi), %edx # d = ctx->hash[3]
47 movl 96(%rdi), %ebp # e = ctx->hash[4]
48
49 movq 4*8(%rdi), %r8
50 movq 4*10(%rdi), %r10
51 bswapq %r8
52 bswapq %r10
53 movq 4*12(%rdi), %r12
54 movq 4*14(%rdi), %r14
55 bswapq %r12
56 bswapq %r14
57 movl %r8d, %r9d
58 shrq $32, %r8
59 movl %r10d, %r11d
60 shrq $32, %r10
61 movl %r12d, %r13d
62 shrq $32, %r12
63 movl %r14d, %r15d
64 shrq $32, %r14
65'
66W32() {
67test "$1" || exit 1
68test "$1" -lt 0 && exit 1
69test "$1" -gt 15 && exit 1
70test "$1" -lt 8 && echo "-32+4*$1(%rsp)"
71test "$1" -ge 8 && echo "%r${1}d"
72}
73
74# It's possible to interleave insns in rounds to mostly eliminate
75# dependency chains, but this likely to only help old Pentium-based
76# CPUs (ones without OOO, which can only simultaneously execute a pair
77# of _adjacent_ insns).
78# Testing on old-ish Silvermont CPU (which has OOO window of only
79# about ~8 insns) shows very small (~1%) speedup.
80
81RD1A() {
82local a=$1;local b=$2;local c=$3;local d=$4;local e=$5
83local n=$(($6))
84local n0=$(((n+0) & 15))
85echo "
86# $n
87";test $n0 = 0 && echo "
88 # W[0], already in %esi
89";test $n0 != 0 && test $n0 -lt 8 && echo "
90 movl `W32 $n0`, %esi # W[n]
91";test $n0 -ge 8 && echo "
92 # W[n], in %r$n0
93";echo "
94 movl %e$c, %edi # c
95 xorl %e$d, %edi # ^d
96 andl %e$b, %edi # &b
97 xorl %e$d, %edi # (((c ^ d) & b) ^ d)
98";test $n0 -lt 8 && echo "
99 leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n]
100";test $n0 -ge 8 && echo "
101 leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n]
102";echo "
103 addl %edi, %e$e # e += (((c ^ d) & b) ^ d)
104 movl %e$a, %esi #
105 roll \$5, %esi # rotl32(a,5)
106 addl %esi, %e$e # e += rotl32(a,5)
107 rorl \$2, %e$b # b = rotl32(b,30)
108"
109}
110RD1B() {
111local a=$1;local b=$2;local c=$3;local d=$4;local e=$5
112local n=$(($6))
113local n13=$(((n+13) & 15))
114local n8=$(((n+8) & 15))
115local n2=$(((n+2) & 15))
116local n0=$(((n+0) & 15))
117echo "
118# $n
119";test $n0 -lt 8 && echo "
120 movl `W32 $n13`, %esi # W[(n+13) & 15]
121 xorl `W32 $n8`, %esi # ^W[(n+8) & 15]
122 xorl `W32 $n2`, %esi # ^W[(n+2) & 15]
123 xorl `W32 $n0`, %esi # ^W[n & 15]
124 roll %esi #
125 movl %esi, `W32 $n0` # store to W[n & 15]
126";test $n0 -ge 8 && echo "
127 xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15]
128 xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15]
129 xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15]
130 roll `W32 $n0` #
131";echo "
132 movl %e$c, %edi # c
133 xorl %e$d, %edi # ^d
134 andl %e$b, %edi # &b
135 xorl %e$d, %edi # (((c ^ d) & b) ^ d)
136";test $n0 -lt 8 && echo "
137 leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15]
138";test $n0 -ge 8 && echo "
139 leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15]
140";echo "
141 addl %edi, %e$e # e += (((c ^ d) & b) ^ d)
142 movl %e$a, %esi #
143 roll \$5, %esi # rotl32(a,5)
144 addl %esi, %e$e # e += rotl32(a,5)
145 rorl \$2, %e$b # b = rotl32(b,30)
146"
147}
148{
149RCONST=0x5A827999
150RD1A ax bx cx dx bp 0; RD1A bp ax bx cx dx 1; RD1A dx bp ax bx cx 2; RD1A cx dx bp ax bx 3; RD1A bx cx dx bp ax 4
151RD1A ax bx cx dx bp 5; RD1A bp ax bx cx dx 6; RD1A dx bp ax bx cx 7; RD1A cx dx bp ax bx 8; RD1A bx cx dx bp ax 9
152RD1A ax bx cx dx bp 10; RD1A bp ax bx cx dx 11; RD1A dx bp ax bx cx 12; RD1A cx dx bp ax bx 13; RD1A bx cx dx bp ax 14
153RD1A ax bx cx dx bp 15; RD1B bp ax bx cx dx 16; RD1B dx bp ax bx cx 17; RD1B cx dx bp ax bx 18; RD1B bx cx dx bp ax 19
154} | grep -v '^$'
155
156RD2() {
157local a=$1;local b=$2;local c=$3;local d=$4;local e=$5
158local n=$(($6))
159local n13=$(((n+13) & 15))
160local n8=$(((n+8) & 15))
161local n2=$(((n+2) & 15))
162local n0=$(((n+0) & 15))
163echo "
164# $n
165";test $n0 -lt 8 && echo "
166 movl `W32 $n13`, %esi # W[(n+13) & 15]
167 xorl `W32 $n8`, %esi # ^W[(n+8) & 15]
168 xorl `W32 $n2`, %esi # ^W[(n+2) & 15]
169 xorl `W32 $n0`, %esi # ^W[n & 15]
170 roll %esi #
171 movl %esi, `W32 $n0` # store to W[n & 15]
172";test $n0 -ge 8 && echo "
173 xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15]
174 xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15]
175 xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15]
176 roll `W32 $n0` #
177";echo "
178 movl %e$c, %edi # c
179 xorl %e$d, %edi # ^d
180 xorl %e$b, %edi # ^b
181";test $n0 -lt 8 && echo "
182 leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15]
183";test $n0 -ge 8 && echo "
184 leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15]
185";echo "
186 addl %edi, %e$e # e += (c ^ d ^ b)
187 movl %e$a, %esi #
188 roll \$5, %esi # rotl32(a,5)
189 addl %esi, %e$e # e += rotl32(a,5)
190 rorl \$2, %e$b # b = rotl32(b,30)
191"
192}
193{
194RCONST=0x6ED9EBA1
195RD2 ax bx cx dx bp 20; RD2 bp ax bx cx dx 21; RD2 dx bp ax bx cx 22; RD2 cx dx bp ax bx 23; RD2 bx cx dx bp ax 24
196RD2 ax bx cx dx bp 25; RD2 bp ax bx cx dx 26; RD2 dx bp ax bx cx 27; RD2 cx dx bp ax bx 28; RD2 bx cx dx bp ax 29
197RD2 ax bx cx dx bp 30; RD2 bp ax bx cx dx 31; RD2 dx bp ax bx cx 32; RD2 cx dx bp ax bx 33; RD2 bx cx dx bp ax 34
198RD2 ax bx cx dx bp 35; RD2 bp ax bx cx dx 36; RD2 dx bp ax bx cx 37; RD2 cx dx bp ax bx 38; RD2 bx cx dx bp ax 39
199} | grep -v '^$'
200
201RD3() {
202local a=$1;local b=$2;local c=$3;local d=$4;local e=$5
203local n=$(($6))
204local n13=$(((n+13) & 15))
205local n8=$(((n+8) & 15))
206local n2=$(((n+2) & 15))
207local n0=$(((n+0) & 15))
208echo "
209# $n
210 movl %e$b, %edi # di: b
211 movl %e$b, %esi # si: b
212 orl %e$c, %edi # di: b | c
213 andl %e$c, %esi # si: b & c
214 andl %e$d, %edi # di: (b | c) & d
215 orl %esi, %edi # ((b | c) & d) | (b & c)
216";test $n0 -lt 8 && echo "
217 movl `W32 $n13`, %esi # W[(n+13) & 15]
218 xorl `W32 $n8`, %esi # ^W[(n+8) & 15]
219 xorl `W32 $n2`, %esi # ^W[(n+2) & 15]
220 xorl `W32 $n0`, %esi # ^W[n & 15]
221 roll %esi #
222 movl %esi, `W32 $n0` # store to W[n & 15]
223";test $n0 -ge 8 && echo "
224 xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15]
225 xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15]
226 xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15]
227 roll `W32 $n0` #
228";echo "
229 addl %edi, %e$e # += ((b | c) & d) | (b & c)
230";test $n0 -lt 8 && echo "
231 leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15]
232";test $n0 -ge 8 && echo "
233 leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15]
234";echo "
235 movl %e$a, %esi #
236 roll \$5, %esi # rotl32(a,5)
237 addl %esi, %e$e # e += rotl32(a,5)
238 rorl \$2, %e$b # b = rotl32(b,30)
239"
240}
241{
242#RCONST=0x8F1BBCDC "out of range for signed 32bit displacement"
243RCONST=-0x70E44324
244RD3 ax bx cx dx bp 40; RD3 bp ax bx cx dx 41; RD3 dx bp ax bx cx 42; RD3 cx dx bp ax bx 43; RD3 bx cx dx bp ax 44
245RD3 ax bx cx dx bp 45; RD3 bp ax bx cx dx 46; RD3 dx bp ax bx cx 47; RD3 cx dx bp ax bx 48; RD3 bx cx dx bp ax 49
246RD3 ax bx cx dx bp 50; RD3 bp ax bx cx dx 51; RD3 dx bp ax bx cx 52; RD3 cx dx bp ax bx 53; RD3 bx cx dx bp ax 54
247RD3 ax bx cx dx bp 55; RD3 bp ax bx cx dx 56; RD3 dx bp ax bx cx 57; RD3 cx dx bp ax bx 58; RD3 bx cx dx bp ax 59
248} | grep -v '^$'
249
250# Round 4 has the same logic as round 2, only n and RCONST are different
251{
252#RCONST=0xCA62C1D6 "out of range for signed 32bit displacement"
253RCONST=-0x359D3E2A
254RD2 ax bx cx dx bp 60; RD2 bp ax bx cx dx 61; RD2 dx bp ax bx cx 62; RD2 cx dx bp ax bx 63; RD2 bx cx dx bp ax 64
255RD2 ax bx cx dx bp 65; RD2 bp ax bx cx dx 66; RD2 dx bp ax bx cx 67; RD2 cx dx bp ax bx 68; RD2 bx cx dx bp ax 69
256RD2 ax bx cx dx bp 70; RD2 bp ax bx cx dx 71; RD2 dx bp ax bx cx 72; RD2 cx dx bp ax bx 73; RD2 bx cx dx bp ax 74
257RD2 ax bx cx dx bp 75; RD2 bp ax bx cx dx 76; RD2 dx bp ax bx cx 77; RD2 cx dx bp ax bx 78; RD2 bx cx dx bp ax 79
258# Note: new W[n&15] values generated in last 3 iterations
259# (W[13,14,15]) are unused after each of these iterations.
260# Since we use r8..r15 for W[8..15], this does not matter.
261# If we switch to e.g. using r8..r15 for W[0..7], then saving of W[13,14,15]
262# (the "movl %esi, `W32 $n0`" insn) is a dead store and can be removed.
263} | grep -v '^$'
264
265echo "
266 popq %rdi #
267 popq %r12 #
268 addl %eax, 80(%rdi) # ctx->hash[0] += a
269 popq %r13 #
270 addl %ebx, 84(%rdi) # ctx->hash[1] += b
271 popq %r14 #
272 addl %ecx, 88(%rdi) # ctx->hash[2] += c
273 popq %r15 #
274 addl %edx, 92(%rdi) # ctx->hash[3] += d
275 popq %rbx #
276 addl %ebp, 96(%rdi) # ctx->hash[4] += e
277 popq %rbp #
278
279 ret
280 .size sha1_process_block64, .-sha1_process_block64
281#endif"
diff --git a/libbb/vfork_daemon_rexec.c b/libbb/vfork_daemon_rexec.c
index 151739ae2..62beb6a5d 100644
--- a/libbb/vfork_daemon_rexec.c
+++ b/libbb/vfork_daemon_rexec.c
@@ -313,7 +313,7 @@ void FAST_FUNC bb_daemonize_or_rexec(int flags, char **argv)
313 /* fflush_all(); - add it in fork_or_rexec() if necessary */ 313 /* fflush_all(); - add it in fork_or_rexec() if necessary */
314 314
315 if (fork_or_rexec(argv)) 315 if (fork_or_rexec(argv))
316 _exit(EXIT_SUCCESS); /* parent */ 316 _exit_SUCCESS(); /* parent */
317 /* if daemonizing, detach from stdio & ctty */ 317 /* if daemonizing, detach from stdio & ctty */
318 setsid(); 318 setsid();
319 dup2(fd, 0); 319 dup2(fd, 0);
@@ -325,7 +325,7 @@ void FAST_FUNC bb_daemonize_or_rexec(int flags, char **argv)
325// * Prevent this: stop being a session leader. 325// * Prevent this: stop being a session leader.
326// */ 326// */
327// if (fork_or_rexec(argv)) 327// if (fork_or_rexec(argv))
328// _exit(EXIT_SUCCESS); /* parent */ 328// _exit_SUCCESS(); /* parent */
329// } 329// }
330 } 330 }
331 while (fd > 2) { 331 while (fd > 2) {
diff --git a/libbb/xfuncs.c b/libbb/xfuncs.c
index 102b5a227..388b246ca 100644
--- a/libbb/xfuncs.c
+++ b/libbb/xfuncs.c
@@ -427,3 +427,13 @@ int FAST_FUNC wait4pid(pid_t pid)
427 return WTERMSIG(status) + 0x180; 427 return WTERMSIG(status) + 0x180;
428 return 0; 428 return 0;
429} 429}
430
431void FAST_FUNC exit_SUCCESS(void)
432{
433 exit(EXIT_SUCCESS);
434}
435
436void FAST_FUNC _exit_SUCCESS(void)
437{
438 _exit(EXIT_SUCCESS);
439}
diff --git a/libbb/xfuncs_printf.c b/libbb/xfuncs_printf.c
index d7d8b1092..aae3b092d 100644
--- a/libbb/xfuncs_printf.c
+++ b/libbb/xfuncs_printf.c
@@ -91,13 +91,10 @@ char* FAST_FUNC xstrdup(const char *s)
91 91
92// Die if we can't allocate n+1 bytes (space for the null terminator) and copy 92// Die if we can't allocate n+1 bytes (space for the null terminator) and copy
93// the (possibly truncated to length n) string into it. 93// the (possibly truncated to length n) string into it.
94char* FAST_FUNC xstrndup(const char *s, int n) 94char* FAST_FUNC xstrndup(const char *s, size_t n)
95{ 95{
96 char *t; 96 char *t;
97 97
98 if (ENABLE_DEBUG && s == NULL)
99 bb_simple_error_msg_and_die("xstrndup bug");
100
101 t = strndup(s, n); 98 t = strndup(s, n);
102 99
103 if (t == NULL) 100 if (t == NULL)
@@ -106,7 +103,7 @@ char* FAST_FUNC xstrndup(const char *s, int n)
106 return t; 103 return t;
107} 104}
108 105
109void* FAST_FUNC xmemdup(const void *s, int n) 106void* FAST_FUNC xmemdup(const void *s, size_t n)
110{ 107{
111 return memcpy(xmalloc(n), s, n); 108 return memcpy(xmalloc(n), s, n);
112} 109}