diff options
Diffstat (limited to 'libbb/hash_md5_sha_x86-64.S.sh')
-rwxr-xr-x | libbb/hash_md5_sha_x86-64.S.sh | 267 |
1 files changed, 267 insertions, 0 deletions
diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh new file mode 100755 index 000000000..931c0f0fd --- /dev/null +++ b/libbb/hash_md5_sha_x86-64.S.sh | |||
@@ -0,0 +1,267 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | # We don't regenerate it on every "make" invocation - only by hand. | ||
4 | # The reason is that the changes to generated code are difficult | ||
5 | # to visualize by looking only at this script, it helps when the commit | ||
6 | # also contains the diff of the generated file. | ||
7 | exec >hash_md5_sha_x86-64.S | ||
8 | |||
9 | echo \ | ||
10 | '### Generated by hash_md5_sha_x86-64.S.sh ### | ||
11 | |||
12 | #if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__) | ||
13 | .section .text.sha1_process_block64,"ax",@progbits | ||
14 | .globl sha1_process_block64 | ||
15 | .hidden sha1_process_block64 | ||
16 | .type sha1_process_block64, @function | ||
17 | |||
18 | .balign 8 # allow decoders to fetch at least 4 first insns | ||
19 | sha1_process_block64: | ||
20 | pushq %r15 # | ||
21 | pushq %r14 # | ||
22 | pushq %r13 # | ||
23 | pushq %r12 # | ||
24 | pushq %rbp # | ||
25 | pushq %rbx # | ||
26 | pushq %rdi # we need ctx at the end | ||
27 | |||
28 | #Register and stack use: | ||
29 | # eax..edx: a..d | ||
30 | # ebp: e | ||
31 | # esi,edi: temps | ||
32 | # -32+4*n(%rsp),r8...r15: W[0..7,8..15] | ||
33 | # (TODO: actually W[0..7] are used a bit more often, put _thme_ into r8..r15?) | ||
34 | |||
35 | movq 4*8(%rdi), %r8 | ||
36 | bswapq %r8 | ||
37 | movl %r8d, %r9d | ||
38 | shrq $32, %r8 | ||
39 | movq 4*10(%rdi), %r10 | ||
40 | bswapq %r10 | ||
41 | movl %r10d, %r11d | ||
42 | shrq $32, %r10 | ||
43 | movq 4*12(%rdi), %r12 | ||
44 | bswapq %r12 | ||
45 | movl %r12d, %r13d | ||
46 | shrq $32, %r12 | ||
47 | movq 4*14(%rdi), %r14 | ||
48 | bswapq %r14 | ||
49 | movl %r14d, %r15d | ||
50 | shrq $32, %r14 | ||
51 | |||
52 | movl $3, %eax | ||
53 | 1: | ||
54 | movq (%rdi,%rax,8), %rsi | ||
55 | bswapq %rsi | ||
56 | rolq $32, %rsi | ||
57 | movq %rsi, -32(%rsp,%rax,8) | ||
58 | decl %eax | ||
59 | jns 1b | ||
60 | movl 80(%rdi), %eax # a = ctx->hash[0] | ||
61 | movl 84(%rdi), %ebx # b = ctx->hash[1] | ||
62 | movl 88(%rdi), %ecx # c = ctx->hash[2] | ||
63 | movl 92(%rdi), %edx # d = ctx->hash[3] | ||
64 | movl 96(%rdi), %ebp # e = ctx->hash[4] | ||
65 | ' | ||
66 | W32() { | ||
67 | test "$1" || exit 1 | ||
68 | test "$1" -lt 0 && exit 1 | ||
69 | test "$1" -gt 15 && exit 1 | ||
70 | test "$1" -lt 8 && echo "-32+4*$1(%rsp)" | ||
71 | test "$1" -ge 8 && echo "%r${1}d" | ||
72 | } | ||
73 | |||
74 | RD1A() { | ||
75 | local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||
76 | local n=$(($6)) | ||
77 | echo "# $n" | ||
78 | test $n = 0 && echo " | ||
79 | # W[0], already in %esi | ||
80 | ";test $n != 0 && test $n -lt 8 && echo " | ||
81 | movl `W32 $n`, %esi # W[n] | ||
82 | ";test $n -ge 8 && echo " | ||
83 | # W[n], in %r$n | ||
84 | ";echo " | ||
85 | movl %e$c, %edi # c | ||
86 | xorl %e$d, %edi # ^d | ||
87 | andl %e$b, %edi # &b | ||
88 | xorl %e$d, %edi # (((c ^ d) & b) ^ d) | ||
89 | ";test $n -lt 8 && echo " | ||
90 | leal $RCONST(%r$e,%rsi),%e$e # e += RCONST + W[n] | ||
91 | ";test $n -ge 8 && echo " | ||
92 | leal $RCONST(%r$e,%r$n),%e$e # e += RCONST + W[n] | ||
93 | ";echo " | ||
94 | addl %edi, %e$e # e += (((c ^ d) & b) ^ d) | ||
95 | movl %e$a, %esi # | ||
96 | roll \$5, %esi # rotl32(a,5) | ||
97 | addl %esi, %e$e # e += rotl32(a,5) | ||
98 | rorl \$2, %e$b # b = rotl32(b,30) | ||
99 | " | ||
100 | } | ||
101 | RD1B() { | ||
102 | local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||
103 | local n=$(($6)) | ||
104 | local n13=$(((n+13) & 15)) | ||
105 | local n8=$(((n+8) & 15)) | ||
106 | local n2=$(((n+2) & 15)) | ||
107 | local n0=$(((n+0) & 15)) | ||
108 | echo " | ||
109 | # $n | ||
110 | ";test $n0 -lt 8 && echo " | ||
111 | movl `W32 $n13`, %esi # W[(n+13) & 15] | ||
112 | xorl `W32 $n8`, %esi # ^W[(n+8) & 15] | ||
113 | xorl `W32 $n2`, %esi # ^W[(n+2) & 15] | ||
114 | xorl `W32 $n0`, %esi # ^W[n & 15] | ||
115 | roll %esi # | ||
116 | movl %esi, `W32 $n0` # store to W[n & 15] | ||
117 | ";test $n0 -ge 8 && echo " | ||
118 | xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15] | ||
119 | xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15] | ||
120 | xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15] | ||
121 | roll `W32 $n0` # | ||
122 | "; echo " | ||
123 | movl %e$c, %edi # c | ||
124 | xorl %e$d, %edi # ^d | ||
125 | andl %e$b, %edi # &b | ||
126 | xorl %e$d, %edi # (((c ^ d) & b) ^ d) | ||
127 | ";test $n0 -lt 8 && echo " | ||
128 | leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] | ||
129 | ";test $n0 -ge 8 && echo " | ||
130 | leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15] | ||
131 | ";echo " | ||
132 | addl %edi, %e$e # e += (((c ^ d) & b) ^ d) | ||
133 | movl %e$a, %esi # | ||
134 | roll \$5, %esi # rotl32(a,5) | ||
135 | addl %esi, %e$e # e += rotl32(a,5) | ||
136 | rorl \$2, %e$b # b = rotl32(b,30) | ||
137 | " | ||
138 | } | ||
139 | { | ||
140 | RCONST=0x5A827999 | ||
141 | RD1A ax bx cx dx bp 0; RD1A bp ax bx cx dx 1; RD1A dx bp ax bx cx 2; RD1A cx dx bp ax bx 3; RD1A bx cx dx bp ax 4 | ||
142 | RD1A ax bx cx dx bp 5; RD1A bp ax bx cx dx 6; RD1A dx bp ax bx cx 7; RD1A cx dx bp ax bx 8; RD1A bx cx dx bp ax 9 | ||
143 | RD1A ax bx cx dx bp 10; RD1A bp ax bx cx dx 11; RD1A dx bp ax bx cx 12; RD1A cx dx bp ax bx 13; RD1A bx cx dx bp ax 14 | ||
144 | RD1A ax bx cx dx bp 15; RD1B bp ax bx cx dx 16; RD1B dx bp ax bx cx 17; RD1B cx dx bp ax bx 18; RD1B bx cx dx bp ax 19 | ||
145 | } | grep -v '^$' | ||
146 | |||
147 | RD2() { | ||
148 | local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||
149 | local n=$(($6)) | ||
150 | local n13=$(((n+13) & 15)) | ||
151 | local n8=$(((n+8) & 15)) | ||
152 | local n2=$(((n+2) & 15)) | ||
153 | local n0=$(((n+0) & 15)) | ||
154 | echo " | ||
155 | # $n | ||
156 | ";test $n0 -lt 8 && echo " | ||
157 | movl `W32 $n13`, %esi # W[(n+13) & 15] | ||
158 | xorl `W32 $n8`, %esi # ^W[(n+8) & 15] | ||
159 | xorl `W32 $n2`, %esi # ^W[(n+2) & 15] | ||
160 | xorl `W32 $n0`, %esi # ^W[n & 15] | ||
161 | roll %esi # | ||
162 | movl %esi, `W32 $n0` # store to W[n & 15] | ||
163 | ";test $n0 -ge 8 && echo " | ||
164 | xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15] | ||
165 | xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15] | ||
166 | xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15] | ||
167 | roll `W32 $n0` # | ||
168 | "; echo " | ||
169 | movl %e$c, %edi # c | ||
170 | xorl %e$d, %edi # ^d | ||
171 | xorl %e$b, %edi # ^b | ||
172 | ";test $n0 -lt 8 && echo " | ||
173 | leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] | ||
174 | ";test $n0 -ge 8 && echo " | ||
175 | leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15] | ||
176 | ";echo " | ||
177 | addl %edi, %e$e # e += (c ^ d ^ b) | ||
178 | movl %e$a, %esi # | ||
179 | roll \$5, %esi # rotl32(a,5) | ||
180 | addl %esi, %e$e # e += rotl32(a,5) | ||
181 | rorl \$2, %e$b # b = rotl32(b,30) | ||
182 | " | ||
183 | } | ||
184 | { | ||
185 | RCONST=0x6ED9EBA1 | ||
186 | RD2 ax bx cx dx bp 20; RD2 bp ax bx cx dx 21; RD2 dx bp ax bx cx 22; RD2 cx dx bp ax bx 23; RD2 bx cx dx bp ax 24 | ||
187 | RD2 ax bx cx dx bp 25; RD2 bp ax bx cx dx 26; RD2 dx bp ax bx cx 27; RD2 cx dx bp ax bx 28; RD2 bx cx dx bp ax 29 | ||
188 | RD2 ax bx cx dx bp 30; RD2 bp ax bx cx dx 31; RD2 dx bp ax bx cx 32; RD2 cx dx bp ax bx 33; RD2 bx cx dx bp ax 34 | ||
189 | RD2 ax bx cx dx bp 35; RD2 bp ax bx cx dx 36; RD2 dx bp ax bx cx 37; RD2 cx dx bp ax bx 38; RD2 bx cx dx bp ax 39 | ||
190 | } | grep -v '^$' | ||
191 | |||
192 | RD3() { | ||
193 | local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||
194 | local n=$(($6)) | ||
195 | local n13=$(((n+13) & 15)) | ||
196 | local n8=$(((n+8) & 15)) | ||
197 | local n2=$(((n+2) & 15)) | ||
198 | local n0=$(((n+0) & 15)) | ||
199 | echo " | ||
200 | # $n | ||
201 | movl %e$b, %edi # di: b | ||
202 | movl %e$b, %esi # si: b | ||
203 | orl %e$c, %edi # di: b | c | ||
204 | andl %e$c, %esi # si: b & c | ||
205 | andl %e$d, %edi # di: (b | c) & d | ||
206 | orl %esi, %edi # ((b | c) & d) | (b & c) | ||
207 | ";test $n0 -lt 8 && echo " | ||
208 | movl `W32 $n13`, %esi # W[(n+13) & 15] | ||
209 | xorl `W32 $n8`, %esi # ^W[(n+8) & 15] | ||
210 | xorl `W32 $n2`, %esi # ^W[(n+2) & 15] | ||
211 | xorl `W32 $n0`, %esi # ^W[n & 15] | ||
212 | roll %esi # | ||
213 | movl %esi, `W32 $n0` # store to W[n & 15] | ||
214 | ";test $n0 -ge 8 && echo " | ||
215 | xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15] | ||
216 | xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15] | ||
217 | xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15] | ||
218 | roll `W32 $n0` # | ||
219 | "; echo " | ||
220 | addl %edi, %e$e # += ((b | c) & d) | (b & c) | ||
221 | ";test $n0 -lt 8 && echo " | ||
222 | leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] | ||
223 | ";test $n0 -ge 8 && echo " | ||
224 | leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15] | ||
225 | ";echo " | ||
226 | movl %e$a, %esi # | ||
227 | roll \$5, %esi # rotl32(a,5) | ||
228 | addl %esi, %e$e # e += rotl32(a,5) | ||
229 | rorl \$2, %e$b # b = rotl32(b,30) | ||
230 | " | ||
231 | } | ||
232 | { | ||
233 | #RCONST=0x8F1BBCDC "out of range for signed 32bit displacement" | ||
234 | RCONST=-0x70E44324 | ||
235 | RD3 ax bx cx dx bp 40; RD3 bp ax bx cx dx 41; RD3 dx bp ax bx cx 42; RD3 cx dx bp ax bx 43; RD3 bx cx dx bp ax 44 | ||
236 | RD3 ax bx cx dx bp 45; RD3 bp ax bx cx dx 46; RD3 dx bp ax bx cx 47; RD3 cx dx bp ax bx 48; RD3 bx cx dx bp ax 49 | ||
237 | RD3 ax bx cx dx bp 50; RD3 bp ax bx cx dx 51; RD3 dx bp ax bx cx 52; RD3 cx dx bp ax bx 53; RD3 bx cx dx bp ax 54 | ||
238 | RD3 ax bx cx dx bp 55; RD3 bp ax bx cx dx 56; RD3 dx bp ax bx cx 57; RD3 cx dx bp ax bx 58; RD3 bx cx dx bp ax 59 | ||
239 | } | grep -v '^$' | ||
240 | |||
241 | # Round 4 has the same logic as round 2, only n and RCONST are different | ||
242 | { | ||
243 | #RCONST=0xCA62C1D6 "out of range for signed 32bit displacement" | ||
244 | RCONST=-0x359D3E2A | ||
245 | RD2 ax bx cx dx bp 60; RD2 bp ax bx cx dx 61; RD2 dx bp ax bx cx 62; RD2 cx dx bp ax bx 63; RD2 bx cx dx bp ax 64 | ||
246 | RD2 ax bx cx dx bp 65; RD2 bp ax bx cx dx 66; RD2 dx bp ax bx cx 67; RD2 cx dx bp ax bx 68; RD2 bx cx dx bp ax 69 | ||
247 | RD2 ax bx cx dx bp 70; RD2 bp ax bx cx dx 71; RD2 dx bp ax bx cx 72; RD2 cx dx bp ax bx 73; RD2 bx cx dx bp ax 74 | ||
248 | RD2 ax bx cx dx bp 75; RD2 bp ax bx cx dx 76; RD2 dx bp ax bx cx 77; RD2 cx dx bp ax bx 78; RD2 bx cx dx bp ax 79 | ||
249 | } | grep -v '^$' | ||
250 | |||
251 | echo " | ||
252 | popq %rdi # | ||
253 | addl %eax, 80(%rdi) # ctx->hash[0] += a | ||
254 | addl %ebx, 84(%rdi) # ctx->hash[1] += b | ||
255 | addl %ecx, 88(%rdi) # ctx->hash[2] += c | ||
256 | addl %edx, 92(%rdi) # ctx->hash[3] += d | ||
257 | addl %ebp, 96(%rdi) # ctx->hash[4] += e | ||
258 | popq %rbx # | ||
259 | popq %rbp # | ||
260 | popq %r12 # | ||
261 | popq %r13 # | ||
262 | popq %r14 # | ||
263 | popq %r15 # | ||
264 | |||
265 | ret | ||
266 | .size sha1_process_block64, .-sha1_process_block64 | ||
267 | #endif" | ||