aboutsummaryrefslogtreecommitdiff
path: root/libbb/hash_md5_sha_x86-64.S.sh
diff options
context:
space:
mode:
Diffstat (limited to 'libbb/hash_md5_sha_x86-64.S.sh')
-rwxr-xr-xlibbb/hash_md5_sha_x86-64.S.sh267
1 files changed, 267 insertions, 0 deletions
diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh
new file mode 100755
index 000000000..931c0f0fd
--- /dev/null
+++ b/libbb/hash_md5_sha_x86-64.S.sh
@@ -0,0 +1,267 @@
1#!/bin/sh
2
3# We don't regenerate it on every "make" invocation - only by hand.
4# The reason is that the changes to generated code are difficult
5# to visualize by looking only at this script, it helps when the commit
6# also contains the diff of the generated file.
7exec >hash_md5_sha_x86-64.S
8
9echo \
10'### Generated by hash_md5_sha_x86-64.S.sh ###
11
12#if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__)
13 .section .text.sha1_process_block64,"ax",@progbits
14 .globl sha1_process_block64
15 .hidden sha1_process_block64
16 .type sha1_process_block64, @function
17
18 .balign 8 # allow decoders to fetch at least 4 first insns
19sha1_process_block64:
20 pushq %r15 #
21 pushq %r14 #
22 pushq %r13 #
23 pushq %r12 #
24 pushq %rbp #
25 pushq %rbx #
26 pushq %rdi # we need ctx at the end
27
28#Register and stack use:
29# eax..edx: a..d
30# ebp: e
31# esi,edi: temps
32# -32+4*n(%rsp),r8...r15: W[0..7,8..15]
33# (TODO: actually W[0..7] are used a bit more often, put _thme_ into r8..r15?)
34
35 movq 4*8(%rdi), %r8
36 bswapq %r8
37 movl %r8d, %r9d
38 shrq $32, %r8
39 movq 4*10(%rdi), %r10
40 bswapq %r10
41 movl %r10d, %r11d
42 shrq $32, %r10
43 movq 4*12(%rdi), %r12
44 bswapq %r12
45 movl %r12d, %r13d
46 shrq $32, %r12
47 movq 4*14(%rdi), %r14
48 bswapq %r14
49 movl %r14d, %r15d
50 shrq $32, %r14
51
52 movl $3, %eax
531:
54 movq (%rdi,%rax,8), %rsi
55 bswapq %rsi
56 rolq $32, %rsi
57 movq %rsi, -32(%rsp,%rax,8)
58 decl %eax
59 jns 1b
60 movl 80(%rdi), %eax # a = ctx->hash[0]
61 movl 84(%rdi), %ebx # b = ctx->hash[1]
62 movl 88(%rdi), %ecx # c = ctx->hash[2]
63 movl 92(%rdi), %edx # d = ctx->hash[3]
64 movl 96(%rdi), %ebp # e = ctx->hash[4]
65'
66W32() {
67test "$1" || exit 1
68test "$1" -lt 0 && exit 1
69test "$1" -gt 15 && exit 1
70test "$1" -lt 8 && echo "-32+4*$1(%rsp)"
71test "$1" -ge 8 && echo "%r${1}d"
72}
73
74RD1A() {
75local a=$1;local b=$2;local c=$3;local d=$4;local e=$5
76local n=$(($6))
77echo "# $n"
78test $n = 0 && echo "
79 # W[0], already in %esi
80";test $n != 0 && test $n -lt 8 && echo "
81 movl `W32 $n`, %esi # W[n]
82";test $n -ge 8 && echo "
83 # W[n], in %r$n
84";echo "
85 movl %e$c, %edi # c
86 xorl %e$d, %edi # ^d
87 andl %e$b, %edi # &b
88 xorl %e$d, %edi # (((c ^ d) & b) ^ d)
89";test $n -lt 8 && echo "
90 leal $RCONST(%r$e,%rsi),%e$e # e += RCONST + W[n]
91";test $n -ge 8 && echo "
92 leal $RCONST(%r$e,%r$n),%e$e # e += RCONST + W[n]
93";echo "
94 addl %edi, %e$e # e += (((c ^ d) & b) ^ d)
95 movl %e$a, %esi #
96 roll \$5, %esi # rotl32(a,5)
97 addl %esi, %e$e # e += rotl32(a,5)
98 rorl \$2, %e$b # b = rotl32(b,30)
99"
100}
101RD1B() {
102local a=$1;local b=$2;local c=$3;local d=$4;local e=$5
103local n=$(($6))
104local n13=$(((n+13) & 15))
105local n8=$(((n+8) & 15))
106local n2=$(((n+2) & 15))
107local n0=$(((n+0) & 15))
108echo "
109# $n
110";test $n0 -lt 8 && echo "
111 movl `W32 $n13`, %esi # W[(n+13) & 15]
112 xorl `W32 $n8`, %esi # ^W[(n+8) & 15]
113 xorl `W32 $n2`, %esi # ^W[(n+2) & 15]
114 xorl `W32 $n0`, %esi # ^W[n & 15]
115 roll %esi #
116 movl %esi, `W32 $n0` # store to W[n & 15]
117";test $n0 -ge 8 && echo "
118 xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15]
119 xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15]
120 xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15]
121 roll `W32 $n0` #
122"; echo "
123 movl %e$c, %edi # c
124 xorl %e$d, %edi # ^d
125 andl %e$b, %edi # &b
126 xorl %e$d, %edi # (((c ^ d) & b) ^ d)
127";test $n0 -lt 8 && echo "
128 leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15]
129";test $n0 -ge 8 && echo "
130 leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15]
131";echo "
132 addl %edi, %e$e # e += (((c ^ d) & b) ^ d)
133 movl %e$a, %esi #
134 roll \$5, %esi # rotl32(a,5)
135 addl %esi, %e$e # e += rotl32(a,5)
136 rorl \$2, %e$b # b = rotl32(b,30)
137"
138}
139{
140RCONST=0x5A827999
141RD1A ax bx cx dx bp 0; RD1A bp ax bx cx dx 1; RD1A dx bp ax bx cx 2; RD1A cx dx bp ax bx 3; RD1A bx cx dx bp ax 4
142RD1A ax bx cx dx bp 5; RD1A bp ax bx cx dx 6; RD1A dx bp ax bx cx 7; RD1A cx dx bp ax bx 8; RD1A bx cx dx bp ax 9
143RD1A ax bx cx dx bp 10; RD1A bp ax bx cx dx 11; RD1A dx bp ax bx cx 12; RD1A cx dx bp ax bx 13; RD1A bx cx dx bp ax 14
144RD1A ax bx cx dx bp 15; RD1B bp ax bx cx dx 16; RD1B dx bp ax bx cx 17; RD1B cx dx bp ax bx 18; RD1B bx cx dx bp ax 19
145} | grep -v '^$'
146
147RD2() {
148local a=$1;local b=$2;local c=$3;local d=$4;local e=$5
149local n=$(($6))
150local n13=$(((n+13) & 15))
151local n8=$(((n+8) & 15))
152local n2=$(((n+2) & 15))
153local n0=$(((n+0) & 15))
154echo "
155# $n
156";test $n0 -lt 8 && echo "
157 movl `W32 $n13`, %esi # W[(n+13) & 15]
158 xorl `W32 $n8`, %esi # ^W[(n+8) & 15]
159 xorl `W32 $n2`, %esi # ^W[(n+2) & 15]
160 xorl `W32 $n0`, %esi # ^W[n & 15]
161 roll %esi #
162 movl %esi, `W32 $n0` # store to W[n & 15]
163";test $n0 -ge 8 && echo "
164 xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15]
165 xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15]
166 xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15]
167 roll `W32 $n0` #
168"; echo "
169 movl %e$c, %edi # c
170 xorl %e$d, %edi # ^d
171 xorl %e$b, %edi # ^b
172";test $n0 -lt 8 && echo "
173 leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15]
174";test $n0 -ge 8 && echo "
175 leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15]
176";echo "
177 addl %edi, %e$e # e += (c ^ d ^ b)
178 movl %e$a, %esi #
179 roll \$5, %esi # rotl32(a,5)
180 addl %esi, %e$e # e += rotl32(a,5)
181 rorl \$2, %e$b # b = rotl32(b,30)
182"
183}
184{
185RCONST=0x6ED9EBA1
186RD2 ax bx cx dx bp 20; RD2 bp ax bx cx dx 21; RD2 dx bp ax bx cx 22; RD2 cx dx bp ax bx 23; RD2 bx cx dx bp ax 24
187RD2 ax bx cx dx bp 25; RD2 bp ax bx cx dx 26; RD2 dx bp ax bx cx 27; RD2 cx dx bp ax bx 28; RD2 bx cx dx bp ax 29
188RD2 ax bx cx dx bp 30; RD2 bp ax bx cx dx 31; RD2 dx bp ax bx cx 32; RD2 cx dx bp ax bx 33; RD2 bx cx dx bp ax 34
189RD2 ax bx cx dx bp 35; RD2 bp ax bx cx dx 36; RD2 dx bp ax bx cx 37; RD2 cx dx bp ax bx 38; RD2 bx cx dx bp ax 39
190} | grep -v '^$'
191
192RD3() {
193local a=$1;local b=$2;local c=$3;local d=$4;local e=$5
194local n=$(($6))
195local n13=$(((n+13) & 15))
196local n8=$(((n+8) & 15))
197local n2=$(((n+2) & 15))
198local n0=$(((n+0) & 15))
199echo "
200# $n
201 movl %e$b, %edi # di: b
202 movl %e$b, %esi # si: b
203 orl %e$c, %edi # di: b | c
204 andl %e$c, %esi # si: b & c
205 andl %e$d, %edi # di: (b | c) & d
206 orl %esi, %edi # ((b | c) & d) | (b & c)
207";test $n0 -lt 8 && echo "
208 movl `W32 $n13`, %esi # W[(n+13) & 15]
209 xorl `W32 $n8`, %esi # ^W[(n+8) & 15]
210 xorl `W32 $n2`, %esi # ^W[(n+2) & 15]
211 xorl `W32 $n0`, %esi # ^W[n & 15]
212 roll %esi #
213 movl %esi, `W32 $n0` # store to W[n & 15]
214";test $n0 -ge 8 && echo "
215 xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15]
216 xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15]
217 xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15]
218 roll `W32 $n0` #
219"; echo "
220 addl %edi, %e$e # += ((b | c) & d) | (b & c)
221";test $n0 -lt 8 && echo "
222 leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15]
223";test $n0 -ge 8 && echo "
224 leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15]
225";echo "
226 movl %e$a, %esi #
227 roll \$5, %esi # rotl32(a,5)
228 addl %esi, %e$e # e += rotl32(a,5)
229 rorl \$2, %e$b # b = rotl32(b,30)
230"
231}
232{
233#RCONST=0x8F1BBCDC "out of range for signed 32bit displacement"
234RCONST=-0x70E44324
235RD3 ax bx cx dx bp 40; RD3 bp ax bx cx dx 41; RD3 dx bp ax bx cx 42; RD3 cx dx bp ax bx 43; RD3 bx cx dx bp ax 44
236RD3 ax bx cx dx bp 45; RD3 bp ax bx cx dx 46; RD3 dx bp ax bx cx 47; RD3 cx dx bp ax bx 48; RD3 bx cx dx bp ax 49
237RD3 ax bx cx dx bp 50; RD3 bp ax bx cx dx 51; RD3 dx bp ax bx cx 52; RD3 cx dx bp ax bx 53; RD3 bx cx dx bp ax 54
238RD3 ax bx cx dx bp 55; RD3 bp ax bx cx dx 56; RD3 dx bp ax bx cx 57; RD3 cx dx bp ax bx 58; RD3 bx cx dx bp ax 59
239} | grep -v '^$'
240
241# Round 4 has the same logic as round 2, only n and RCONST are different
242{
243#RCONST=0xCA62C1D6 "out of range for signed 32bit displacement"
244RCONST=-0x359D3E2A
245RD2 ax bx cx dx bp 60; RD2 bp ax bx cx dx 61; RD2 dx bp ax bx cx 62; RD2 cx dx bp ax bx 63; RD2 bx cx dx bp ax 64
246RD2 ax bx cx dx bp 65; RD2 bp ax bx cx dx 66; RD2 dx bp ax bx cx 67; RD2 cx dx bp ax bx 68; RD2 bx cx dx bp ax 69
247RD2 ax bx cx dx bp 70; RD2 bp ax bx cx dx 71; RD2 dx bp ax bx cx 72; RD2 cx dx bp ax bx 73; RD2 bx cx dx bp ax 74
248RD2 ax bx cx dx bp 75; RD2 bp ax bx cx dx 76; RD2 dx bp ax bx cx 77; RD2 cx dx bp ax bx 78; RD2 bx cx dx bp ax 79
249} | grep -v '^$'
250
251echo "
252 popq %rdi #
253 addl %eax, 80(%rdi) # ctx->hash[0] += a
254 addl %ebx, 84(%rdi) # ctx->hash[1] += b
255 addl %ecx, 88(%rdi) # ctx->hash[2] += c
256 addl %edx, 92(%rdi) # ctx->hash[3] += d
257 addl %ebp, 96(%rdi) # ctx->hash[4] += e
258 popq %rbx #
259 popq %rbp #
260 popq %r12 #
261 popq %r13 #
262 popq %r14 #
263 popq %r15 #
264
265 ret
266 .size sha1_process_block64, .-sha1_process_block64
267#endif"