aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2022-01-03 13:00:07 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2022-01-03 13:10:30 +0100
commit947bef0deaba7b2ce432d515379091dcd4cf747f (patch)
tree15273e01ea7b42df47e49779fede62f7289a4178
parent05fd13ebec869fc5e6f226481a2405a2685e8db1 (diff)
downloadbusybox-w32-947bef0deaba7b2ce432d515379091dcd4cf747f.tar.gz
busybox-w32-947bef0deaba7b2ce432d515379091dcd4cf747f.tar.bz2
busybox-w32-947bef0deaba7b2ce432d515379091dcd4cf747f.zip
libbb/sha1: x86_64 version: generate from a script, optimize a bit
function old new delta sha1_process_block64 3569 3502 -67 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--libbb/Config.src2
-rw-r--r--libbb/hash_md5_sha_x86-64.S472
-rwxr-xr-xlibbb/hash_md5_sha_x86-64.S.sh267
3 files changed, 474 insertions, 267 deletions
diff --git a/libbb/Config.src b/libbb/Config.src
index 42a2283aa..c80bee286 100644
--- a/libbb/Config.src
+++ b/libbb/Config.src
@@ -59,7 +59,7 @@ config SHA1_SMALL
59 Trade binary size versus speed for the sha1 algorithm. 59 Trade binary size versus speed for the sha1 algorithm.
60 throughput MB/s size of sha1_process_block64 60 throughput MB/s size of sha1_process_block64
61 value 486 x86-64 486 x86-64 61 value 486 x86-64 486 x86-64
62 0 367 367 3657 3570 62 0 367 375 3657 3502
63 1 224 229 654 732 63 1 224 229 654 732
64 2,3 200 195 358 380 64 2,3 200 195 358 380
65 65
diff --git a/libbb/hash_md5_sha_x86-64.S b/libbb/hash_md5_sha_x86-64.S
index 466cd9ae9..3e1c4b455 100644
--- a/libbb/hash_md5_sha_x86-64.S
+++ b/libbb/hash_md5_sha_x86-64.S
@@ -1,23 +1,27 @@
1### Generated by hash_md5_sha_x86-64.S.sh ### 1### Generated by hash_md5_sha_x86-64.S.sh ###
2#if defined(__GNUC__) && defined(__x86_64__) 2
3#if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__)
3 .section .text.sha1_process_block64,"ax",@progbits 4 .section .text.sha1_process_block64,"ax",@progbits
4 .globl sha1_process_block64 5 .globl sha1_process_block64
5 .hidden sha1_process_block64 6 .hidden sha1_process_block64
6 .type sha1_process_block64, @function 7 .type sha1_process_block64, @function
8
9 .balign 8 # allow decoders to fetch at least 4 first insns
7sha1_process_block64: 10sha1_process_block64:
8 pushq %r15 # 11 pushq %r15 #
9 pushq %r14 # 12 pushq %r14 #
10 pushq %r13 # 13 pushq %r13 #
11 pushq %r12 # 14 pushq %r12 #
12 pushq %rbp # 15 pushq %rbp #
13 pushq %rbx # 16 pushq %rbx #
14 pushq %rdi # we need ctx at the end 17 pushq %rdi # we need ctx at the end
15 18
16#Register and stack use: 19#Register and stack use:
17# eax..edx: a..d 20# eax..edx: a..d
18# ebp: e 21# ebp: e
19# esi,edi: temps 22# esi,edi: temps
20# -32+4*n(%rsp),r8...r15: W[0..7,8..15] 23# -32+4*n(%rsp),r8...r15: W[0..7,8..15]
24# (TODO: actually W[0..7] are used a bit more often, put _thme_ into r8..r15?)
21 25
22 movq 4*8(%rdi), %r8 26 movq 4*8(%rdi), %r8
23 bswapq %r8 27 bswapq %r8
@@ -253,7 +257,7 @@ sha1_process_block64:
253 xorl %ecx, %edi # ^d 257 xorl %ecx, %edi # ^d
254 andl %eax, %edi # &b 258 andl %eax, %edi # &b
255 xorl %ecx, %edi # (((c ^ d) & b) ^ d) 259 xorl %ecx, %edi # (((c ^ d) & b) ^ d)
256 leal 0x5A827999(%rdx,%rsi),%edx # e += RCONST + W[n] 260 leal 0x5A827999(%rdx,%rsi), %edx # e += RCONST + W[n & 15]
257 addl %edi, %edx # e += (((c ^ d) & b) ^ d) 261 addl %edi, %edx # e += (((c ^ d) & b) ^ d)
258 movl %ebp, %esi # 262 movl %ebp, %esi #
259 roll $5, %esi # rotl32(a,5) 263 roll $5, %esi # rotl32(a,5)
@@ -270,7 +274,7 @@ sha1_process_block64:
270 xorl %ebx, %edi # ^d 274 xorl %ebx, %edi # ^d
271 andl %ebp, %edi # &b 275 andl %ebp, %edi # &b
272 xorl %ebx, %edi # (((c ^ d) & b) ^ d) 276 xorl %ebx, %edi # (((c ^ d) & b) ^ d)
273 leal 0x5A827999(%rcx,%rsi),%ecx # e += RCONST + W[n] 277 leal 0x5A827999(%rcx,%rsi), %ecx # e += RCONST + W[n & 15]
274 addl %edi, %ecx # e += (((c ^ d) & b) ^ d) 278 addl %edi, %ecx # e += (((c ^ d) & b) ^ d)
275 movl %edx, %esi # 279 movl %edx, %esi #
276 roll $5, %esi # rotl32(a,5) 280 roll $5, %esi # rotl32(a,5)
@@ -287,7 +291,7 @@ sha1_process_block64:
287 xorl %eax, %edi # ^d 291 xorl %eax, %edi # ^d
288 andl %edx, %edi # &b 292 andl %edx, %edi # &b
289 xorl %eax, %edi # (((c ^ d) & b) ^ d) 293 xorl %eax, %edi # (((c ^ d) & b) ^ d)
290 leal 0x5A827999(%rbx,%rsi),%ebx # e += RCONST + W[n] 294 leal 0x5A827999(%rbx,%rsi), %ebx # e += RCONST + W[n & 15]
291 addl %edi, %ebx # e += (((c ^ d) & b) ^ d) 295 addl %edi, %ebx # e += (((c ^ d) & b) ^ d)
292 movl %ecx, %esi # 296 movl %ecx, %esi #
293 roll $5, %esi # rotl32(a,5) 297 roll $5, %esi # rotl32(a,5)
@@ -304,7 +308,7 @@ sha1_process_block64:
304 xorl %ebp, %edi # ^d 308 xorl %ebp, %edi # ^d
305 andl %ecx, %edi # &b 309 andl %ecx, %edi # &b
306 xorl %ebp, %edi # (((c ^ d) & b) ^ d) 310 xorl %ebp, %edi # (((c ^ d) & b) ^ d)
307 leal 0x5A827999(%rax,%rsi),%eax # e += RCONST + W[n] 311 leal 0x5A827999(%rax,%rsi), %eax # e += RCONST + W[n & 15]
308 addl %edi, %eax # e += (((c ^ d) & b) ^ d) 312 addl %edi, %eax # e += (((c ^ d) & b) ^ d)
309 movl %ebx, %esi # 313 movl %ebx, %esi #
310 roll $5, %esi # rotl32(a,5) 314 roll $5, %esi # rotl32(a,5)
@@ -320,7 +324,7 @@ sha1_process_block64:
320 movl %ecx, %edi # c 324 movl %ecx, %edi # c
321 xorl %edx, %edi # ^d 325 xorl %edx, %edi # ^d
322 xorl %ebx, %edi # ^b 326 xorl %ebx, %edi # ^b
323 leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W 327 leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + W[n & 15]
324 addl %edi, %ebp # e += (c ^ d ^ b) 328 addl %edi, %ebp # e += (c ^ d ^ b)
325 movl %eax, %esi # 329 movl %eax, %esi #
326 roll $5, %esi # rotl32(a,5) 330 roll $5, %esi # rotl32(a,5)
@@ -336,7 +340,7 @@ sha1_process_block64:
336 movl %ebx, %edi # c 340 movl %ebx, %edi # c
337 xorl %ecx, %edi # ^d 341 xorl %ecx, %edi # ^d
338 xorl %eax, %edi # ^b 342 xorl %eax, %edi # ^b
339 leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W 343 leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + W[n & 15]
340 addl %edi, %edx # e += (c ^ d ^ b) 344 addl %edi, %edx # e += (c ^ d ^ b)
341 movl %ebp, %esi # 345 movl %ebp, %esi #
342 roll $5, %esi # rotl32(a,5) 346 roll $5, %esi # rotl32(a,5)
@@ -352,7 +356,7 @@ sha1_process_block64:
352 movl %eax, %edi # c 356 movl %eax, %edi # c
353 xorl %ebx, %edi # ^d 357 xorl %ebx, %edi # ^d
354 xorl %ebp, %edi # ^b 358 xorl %ebp, %edi # ^b
355 leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W 359 leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15]
356 addl %edi, %ecx # e += (c ^ d ^ b) 360 addl %edi, %ecx # e += (c ^ d ^ b)
357 movl %edx, %esi # 361 movl %edx, %esi #
358 roll $5, %esi # rotl32(a,5) 362 roll $5, %esi # rotl32(a,5)
@@ -368,135 +372,119 @@ sha1_process_block64:
368 movl %ebp, %edi # c 372 movl %ebp, %edi # c
369 xorl %eax, %edi # ^d 373 xorl %eax, %edi # ^d
370 xorl %edx, %edi # ^b 374 xorl %edx, %edi # ^b
371 leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W 375 leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15]
372 addl %edi, %ebx # e += (c ^ d ^ b) 376 addl %edi, %ebx # e += (c ^ d ^ b)
373 movl %ecx, %esi # 377 movl %ecx, %esi #
374 roll $5, %esi # rotl32(a,5) 378 roll $5, %esi # rotl32(a,5)
375 addl %esi, %ebx # e += rotl32(a,5) 379 addl %esi, %ebx # e += rotl32(a,5)
376 rorl $2, %edx # b = rotl32(b,30) 380 rorl $2, %edx # b = rotl32(b,30)
377# 24 381# 24
378 movl -32+4*5(%rsp), %esi # W[(n+13) & 15] 382 xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15]
379 xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] 383 xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15]
380 xorl %r10d, %esi # ^W[(n+2) & 15] 384 xorl %r10d, %r8d # ^W[(n+2) & 15]
381 xorl %r8d, %esi # ^W[n & 15] 385 roll %r8d #
382 roll %esi #
383 movl %esi, %r8d # store to W[n & 15]
384 movl %edx, %edi # c 386 movl %edx, %edi # c
385 xorl %ebp, %edi # ^d 387 xorl %ebp, %edi # ^d
386 xorl %ecx, %edi # ^b 388 xorl %ecx, %edi # ^b
387 leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W 389 leal 0x6ED9EBA1(%rax,%r8), %eax # e += RCONST + W[n & 15]
388 addl %edi, %eax # e += (c ^ d ^ b) 390 addl %edi, %eax # e += (c ^ d ^ b)
389 movl %ebx, %esi # 391 movl %ebx, %esi #
390 roll $5, %esi # rotl32(a,5) 392 roll $5, %esi # rotl32(a,5)
391 addl %esi, %eax # e += rotl32(a,5) 393 addl %esi, %eax # e += rotl32(a,5)
392 rorl $2, %ecx # b = rotl32(b,30) 394 rorl $2, %ecx # b = rotl32(b,30)
393# 25 395# 25
394 movl -32+4*6(%rsp), %esi # W[(n+13) & 15] 396 xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15]
395 xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] 397 xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15]
396 xorl %r11d, %esi # ^W[(n+2) & 15] 398 xorl %r11d, %r9d # ^W[(n+2) & 15]
397 xorl %r9d, %esi # ^W[n & 15] 399 roll %r9d #
398 roll %esi #
399 movl %esi, %r9d # store to W[n & 15]
400 movl %ecx, %edi # c 400 movl %ecx, %edi # c
401 xorl %edx, %edi # ^d 401 xorl %edx, %edi # ^d
402 xorl %ebx, %edi # ^b 402 xorl %ebx, %edi # ^b
403 leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W 403 leal 0x6ED9EBA1(%rbp,%r9), %ebp # e += RCONST + W[n & 15]
404 addl %edi, %ebp # e += (c ^ d ^ b) 404 addl %edi, %ebp # e += (c ^ d ^ b)
405 movl %eax, %esi # 405 movl %eax, %esi #
406 roll $5, %esi # rotl32(a,5) 406 roll $5, %esi # rotl32(a,5)
407 addl %esi, %ebp # e += rotl32(a,5) 407 addl %esi, %ebp # e += rotl32(a,5)
408 rorl $2, %ebx # b = rotl32(b,30) 408 rorl $2, %ebx # b = rotl32(b,30)
409# 26 409# 26
410 movl -32+4*7(%rsp), %esi # W[(n+13) & 15] 410 xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15]
411 xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] 411 xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15]
412 xorl %r12d, %esi # ^W[(n+2) & 15] 412 xorl %r12d, %r10d # ^W[(n+2) & 15]
413 xorl %r10d, %esi # ^W[n & 15] 413 roll %r10d #
414 roll %esi #
415 movl %esi, %r10d # store to W[n & 15]
416 movl %ebx, %edi # c 414 movl %ebx, %edi # c
417 xorl %ecx, %edi # ^d 415 xorl %ecx, %edi # ^d
418 xorl %eax, %edi # ^b 416 xorl %eax, %edi # ^b
419 leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W 417 leal 0x6ED9EBA1(%rdx,%r10), %edx # e += RCONST + W[n & 15]
420 addl %edi, %edx # e += (c ^ d ^ b) 418 addl %edi, %edx # e += (c ^ d ^ b)
421 movl %ebp, %esi # 419 movl %ebp, %esi #
422 roll $5, %esi # rotl32(a,5) 420 roll $5, %esi # rotl32(a,5)
423 addl %esi, %edx # e += rotl32(a,5) 421 addl %esi, %edx # e += rotl32(a,5)
424 rorl $2, %eax # b = rotl32(b,30) 422 rorl $2, %eax # b = rotl32(b,30)
425# 27 423# 27
426 movl %r8d, %esi # W[(n+13) & 15] 424 xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15]
427 xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] 425 xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15]
428 xorl %r13d, %esi # ^W[(n+2) & 15] 426 xorl %r13d, %r11d # ^W[(n+2) & 15]
429 xorl %r11d, %esi # ^W[n & 15] 427 roll %r11d #
430 roll %esi #
431 movl %esi, %r11d # store to W[n & 15]
432 movl %eax, %edi # c 428 movl %eax, %edi # c
433 xorl %ebx, %edi # ^d 429 xorl %ebx, %edi # ^d
434 xorl %ebp, %edi # ^b 430 xorl %ebp, %edi # ^b
435 leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W 431 leal 0x6ED9EBA1(%rcx,%r11), %ecx # e += RCONST + W[n & 15]
436 addl %edi, %ecx # e += (c ^ d ^ b) 432 addl %edi, %ecx # e += (c ^ d ^ b)
437 movl %edx, %esi # 433 movl %edx, %esi #
438 roll $5, %esi # rotl32(a,5) 434 roll $5, %esi # rotl32(a,5)
439 addl %esi, %ecx # e += rotl32(a,5) 435 addl %esi, %ecx # e += rotl32(a,5)
440 rorl $2, %ebp # b = rotl32(b,30) 436 rorl $2, %ebp # b = rotl32(b,30)
441# 28 437# 28
442 movl %r9d, %esi # W[(n+13) & 15] 438 xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15]
443 xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] 439 xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15]
444 xorl %r14d, %esi # ^W[(n+2) & 15] 440 xorl %r14d, %r12d # ^W[(n+2) & 15]
445 xorl %r12d, %esi # ^W[n & 15] 441 roll %r12d #
446 roll %esi #
447 movl %esi, %r12d # store to W[n & 15]
448 movl %ebp, %edi # c 442 movl %ebp, %edi # c
449 xorl %eax, %edi # ^d 443 xorl %eax, %edi # ^d
450 xorl %edx, %edi # ^b 444 xorl %edx, %edi # ^b
451 leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W 445 leal 0x6ED9EBA1(%rbx,%r12), %ebx # e += RCONST + W[n & 15]
452 addl %edi, %ebx # e += (c ^ d ^ b) 446 addl %edi, %ebx # e += (c ^ d ^ b)
453 movl %ecx, %esi # 447 movl %ecx, %esi #
454 roll $5, %esi # rotl32(a,5) 448 roll $5, %esi # rotl32(a,5)
455 addl %esi, %ebx # e += rotl32(a,5) 449 addl %esi, %ebx # e += rotl32(a,5)
456 rorl $2, %edx # b = rotl32(b,30) 450 rorl $2, %edx # b = rotl32(b,30)
457# 29 451# 29
458 movl %r10d, %esi # W[(n+13) & 15] 452 xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15]
459 xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] 453 xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15]
460 xorl %r15d, %esi # ^W[(n+2) & 15] 454 xorl %r15d, %r13d # ^W[(n+2) & 15]
461 xorl %r13d, %esi # ^W[n & 15] 455 roll %r13d #
462 roll %esi #
463 movl %esi, %r13d # store to W[n & 15]
464 movl %edx, %edi # c 456 movl %edx, %edi # c
465 xorl %ebp, %edi # ^d 457 xorl %ebp, %edi # ^d
466 xorl %ecx, %edi # ^b 458 xorl %ecx, %edi # ^b
467 leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W 459 leal 0x6ED9EBA1(%rax,%r13), %eax # e += RCONST + W[n & 15]
468 addl %edi, %eax # e += (c ^ d ^ b) 460 addl %edi, %eax # e += (c ^ d ^ b)
469 movl %ebx, %esi # 461 movl %ebx, %esi #
470 roll $5, %esi # rotl32(a,5) 462 roll $5, %esi # rotl32(a,5)
471 addl %esi, %eax # e += rotl32(a,5) 463 addl %esi, %eax # e += rotl32(a,5)
472 rorl $2, %ecx # b = rotl32(b,30) 464 rorl $2, %ecx # b = rotl32(b,30)
473# 30 465# 30
474 movl %r11d, %esi # W[(n+13) & 15] 466 xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15]
475 xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] 467 xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15]
476 xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] 468 xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15]
477 xorl %r14d, %esi # ^W[n & 15] 469 roll %r14d #
478 roll %esi #
479 movl %esi, %r14d # store to W[n & 15]
480 movl %ecx, %edi # c 470 movl %ecx, %edi # c
481 xorl %edx, %edi # ^d 471 xorl %edx, %edi # ^d
482 xorl %ebx, %edi # ^b 472 xorl %ebx, %edi # ^b
483 leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W 473 leal 0x6ED9EBA1(%rbp,%r14), %ebp # e += RCONST + W[n & 15]
484 addl %edi, %ebp # e += (c ^ d ^ b) 474 addl %edi, %ebp # e += (c ^ d ^ b)
485 movl %eax, %esi # 475 movl %eax, %esi #
486 roll $5, %esi # rotl32(a,5) 476 roll $5, %esi # rotl32(a,5)
487 addl %esi, %ebp # e += rotl32(a,5) 477 addl %esi, %ebp # e += rotl32(a,5)
488 rorl $2, %ebx # b = rotl32(b,30) 478 rorl $2, %ebx # b = rotl32(b,30)
489# 31 479# 31
490 movl %r12d, %esi # W[(n+13) & 15] 480 xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15]
491 xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] 481 xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15]
492 xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] 482 xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15]
493 xorl %r15d, %esi # ^W[n & 15] 483 roll %r15d #
494 roll %esi #
495 movl %esi, %r15d # store to W[n & 15]
496 movl %ebx, %edi # c 484 movl %ebx, %edi # c
497 xorl %ecx, %edi # ^d 485 xorl %ecx, %edi # ^d
498 xorl %eax, %edi # ^b 486 xorl %eax, %edi # ^b
499 leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W 487 leal 0x6ED9EBA1(%rdx,%r15), %edx # e += RCONST + W[n & 15]
500 addl %edi, %edx # e += (c ^ d ^ b) 488 addl %edi, %edx # e += (c ^ d ^ b)
501 movl %ebp, %esi # 489 movl %ebp, %esi #
502 roll $5, %esi # rotl32(a,5) 490 roll $5, %esi # rotl32(a,5)
@@ -512,7 +500,7 @@ sha1_process_block64:
512 movl %eax, %edi # c 500 movl %eax, %edi # c
513 xorl %ebx, %edi # ^d 501 xorl %ebx, %edi # ^d
514 xorl %ebp, %edi # ^b 502 xorl %ebp, %edi # ^b
515 leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W 503 leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15]
516 addl %edi, %ecx # e += (c ^ d ^ b) 504 addl %edi, %ecx # e += (c ^ d ^ b)
517 movl %edx, %esi # 505 movl %edx, %esi #
518 roll $5, %esi # rotl32(a,5) 506 roll $5, %esi # rotl32(a,5)
@@ -528,7 +516,7 @@ sha1_process_block64:
528 movl %ebp, %edi # c 516 movl %ebp, %edi # c
529 xorl %eax, %edi # ^d 517 xorl %eax, %edi # ^d
530 xorl %edx, %edi # ^b 518 xorl %edx, %edi # ^b
531 leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W 519 leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15]
532 addl %edi, %ebx # e += (c ^ d ^ b) 520 addl %edi, %ebx # e += (c ^ d ^ b)
533 movl %ecx, %esi # 521 movl %ecx, %esi #
534 roll $5, %esi # rotl32(a,5) 522 roll $5, %esi # rotl32(a,5)
@@ -544,7 +532,7 @@ sha1_process_block64:
544 movl %edx, %edi # c 532 movl %edx, %edi # c
545 xorl %ebp, %edi # ^d 533 xorl %ebp, %edi # ^d
546 xorl %ecx, %edi # ^b 534 xorl %ecx, %edi # ^b
547 leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W 535 leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + W[n & 15]
548 addl %edi, %eax # e += (c ^ d ^ b) 536 addl %edi, %eax # e += (c ^ d ^ b)
549 movl %ebx, %esi # 537 movl %ebx, %esi #
550 roll $5, %esi # rotl32(a,5) 538 roll $5, %esi # rotl32(a,5)
@@ -560,7 +548,7 @@ sha1_process_block64:
560 movl %ecx, %edi # c 548 movl %ecx, %edi # c
561 xorl %edx, %edi # ^d 549 xorl %edx, %edi # ^d
562 xorl %ebx, %edi # ^b 550 xorl %ebx, %edi # ^b
563 leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W 551 leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + W[n & 15]
564 addl %edi, %ebp # e += (c ^ d ^ b) 552 addl %edi, %ebp # e += (c ^ d ^ b)
565 movl %eax, %esi # 553 movl %eax, %esi #
566 roll $5, %esi # rotl32(a,5) 554 roll $5, %esi # rotl32(a,5)
@@ -576,7 +564,7 @@ sha1_process_block64:
576 movl %ebx, %edi # c 564 movl %ebx, %edi # c
577 xorl %ecx, %edi # ^d 565 xorl %ecx, %edi # ^d
578 xorl %eax, %edi # ^b 566 xorl %eax, %edi # ^b
579 leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W 567 leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + W[n & 15]
580 addl %edi, %edx # e += (c ^ d ^ b) 568 addl %edi, %edx # e += (c ^ d ^ b)
581 movl %ebp, %esi # 569 movl %ebp, %esi #
582 roll $5, %esi # rotl32(a,5) 570 roll $5, %esi # rotl32(a,5)
@@ -592,7 +580,7 @@ sha1_process_block64:
592 movl %eax, %edi # c 580 movl %eax, %edi # c
593 xorl %ebx, %edi # ^d 581 xorl %ebx, %edi # ^d
594 xorl %ebp, %edi # ^b 582 xorl %ebp, %edi # ^b
595 leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W 583 leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15]
596 addl %edi, %ecx # e += (c ^ d ^ b) 584 addl %edi, %ecx # e += (c ^ d ^ b)
597 movl %edx, %esi # 585 movl %edx, %esi #
598 roll $5, %esi # rotl32(a,5) 586 roll $5, %esi # rotl32(a,5)
@@ -608,7 +596,7 @@ sha1_process_block64:
608 movl %ebp, %edi # c 596 movl %ebp, %edi # c
609 xorl %eax, %edi # ^d 597 xorl %eax, %edi # ^d
610 xorl %edx, %edi # ^b 598 xorl %edx, %edi # ^b
611 leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W 599 leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15]
612 addl %edi, %ebx # e += (c ^ d ^ b) 600 addl %edi, %ebx # e += (c ^ d ^ b)
613 movl %ecx, %esi # 601 movl %ecx, %esi #
614 roll $5, %esi # rotl32(a,5) 602 roll $5, %esi # rotl32(a,5)
@@ -624,7 +612,7 @@ sha1_process_block64:
624 movl %edx, %edi # c 612 movl %edx, %edi # c
625 xorl %ebp, %edi # ^d 613 xorl %ebp, %edi # ^d
626 xorl %ecx, %edi # ^b 614 xorl %ecx, %edi # ^b
627 leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W 615 leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + W[n & 15]
628 addl %edi, %eax # e += (c ^ d ^ b) 616 addl %edi, %eax # e += (c ^ d ^ b)
629 movl %ebx, %esi # 617 movl %ebx, %esi #
630 roll $5, %esi # rotl32(a,5) 618 roll $5, %esi # rotl32(a,5)
@@ -637,14 +625,12 @@ sha1_process_block64:
637 andl %ecx, %esi # si: b & c 625 andl %ecx, %esi # si: b & c
638 andl %edx, %edi # di: (b | c) & d 626 andl %edx, %edi # di: (b | c) & d
639 orl %esi, %edi # ((b | c) & d) | (b & c) 627 orl %esi, %edi # ((b | c) & d) | (b & c)
640 movl -32+4*5(%rsp), %esi # W[(n+13) & 15] 628 xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15]
641 xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] 629 xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15]
642 xorl %r10d, %esi # ^W[(n+2) & 15] 630 xorl %r10d, %r8d # ^W[(n+2) & 15]
643 xorl %r8d, %esi # ^W[n & 15] 631 roll %r8d #
644 roll %esi #
645 movl %esi, %r8d # store to W[n & 15]
646 addl %edi, %ebp # += ((b | c) & d) | (b & c) 632 addl %edi, %ebp # += ((b | c) & d) | (b & c)
647 leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W 633 leal -0x70E44324(%rbp,%r8), %ebp # e += RCONST + W[n & 15]
648 movl %eax, %esi # 634 movl %eax, %esi #
649 roll $5, %esi # rotl32(a,5) 635 roll $5, %esi # rotl32(a,5)
650 addl %esi, %ebp # e += rotl32(a,5) 636 addl %esi, %ebp # e += rotl32(a,5)
@@ -656,14 +642,12 @@ sha1_process_block64:
656 andl %ebx, %esi # si: b & c 642 andl %ebx, %esi # si: b & c
657 andl %ecx, %edi # di: (b | c) & d 643 andl %ecx, %edi # di: (b | c) & d
658 orl %esi, %edi # ((b | c) & d) | (b & c) 644 orl %esi, %edi # ((b | c) & d) | (b & c)
659 movl -32+4*6(%rsp), %esi # W[(n+13) & 15] 645 xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15]
660 xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] 646 xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15]
661 xorl %r11d, %esi # ^W[(n+2) & 15] 647 xorl %r11d, %r9d # ^W[(n+2) & 15]
662 xorl %r9d, %esi # ^W[n & 15] 648 roll %r9d #
663 roll %esi #
664 movl %esi, %r9d # store to W[n & 15]
665 addl %edi, %edx # += ((b | c) & d) | (b & c) 649 addl %edi, %edx # += ((b | c) & d) | (b & c)
666 leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W 650 leal -0x70E44324(%rdx,%r9), %edx # e += RCONST + W[n & 15]
667 movl %ebp, %esi # 651 movl %ebp, %esi #
668 roll $5, %esi # rotl32(a,5) 652 roll $5, %esi # rotl32(a,5)
669 addl %esi, %edx # e += rotl32(a,5) 653 addl %esi, %edx # e += rotl32(a,5)
@@ -675,14 +659,12 @@ sha1_process_block64:
675 andl %eax, %esi # si: b & c 659 andl %eax, %esi # si: b & c
676 andl %ebx, %edi # di: (b | c) & d 660 andl %ebx, %edi # di: (b | c) & d
677 orl %esi, %edi # ((b | c) & d) | (b & c) 661 orl %esi, %edi # ((b | c) & d) | (b & c)
678 movl -32+4*7(%rsp), %esi # W[(n+13) & 15] 662 xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15]
679 xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] 663 xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15]
680 xorl %r12d, %esi # ^W[(n+2) & 15] 664 xorl %r12d, %r10d # ^W[(n+2) & 15]
681 xorl %r10d, %esi # ^W[n & 15] 665 roll %r10d #
682 roll %esi #
683 movl %esi, %r10d # store to W[n & 15]
684 addl %edi, %ecx # += ((b | c) & d) | (b & c) 666 addl %edi, %ecx # += ((b | c) & d) | (b & c)
685 leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W 667 leal -0x70E44324(%rcx,%r10), %ecx # e += RCONST + W[n & 15]
686 movl %edx, %esi # 668 movl %edx, %esi #
687 roll $5, %esi # rotl32(a,5) 669 roll $5, %esi # rotl32(a,5)
688 addl %esi, %ecx # e += rotl32(a,5) 670 addl %esi, %ecx # e += rotl32(a,5)
@@ -694,14 +676,12 @@ sha1_process_block64:
694 andl %ebp, %esi # si: b & c 676 andl %ebp, %esi # si: b & c
695 andl %eax, %edi # di: (b | c) & d 677 andl %eax, %edi # di: (b | c) & d
696 orl %esi, %edi # ((b | c) & d) | (b & c) 678 orl %esi, %edi # ((b | c) & d) | (b & c)
697 movl %r8d, %esi # W[(n+13) & 15] 679 xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15]
698 xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] 680 xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15]
699 xorl %r13d, %esi # ^W[(n+2) & 15] 681 xorl %r13d, %r11d # ^W[(n+2) & 15]
700 xorl %r11d, %esi # ^W[n & 15] 682 roll %r11d #
701 roll %esi #
702 movl %esi, %r11d # store to W[n & 15]
703 addl %edi, %ebx # += ((b | c) & d) | (b & c) 683 addl %edi, %ebx # += ((b | c) & d) | (b & c)
704 leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W 684 leal -0x70E44324(%rbx,%r11), %ebx # e += RCONST + W[n & 15]
705 movl %ecx, %esi # 685 movl %ecx, %esi #
706 roll $5, %esi # rotl32(a,5) 686 roll $5, %esi # rotl32(a,5)
707 addl %esi, %ebx # e += rotl32(a,5) 687 addl %esi, %ebx # e += rotl32(a,5)
@@ -713,14 +693,12 @@ sha1_process_block64:
713 andl %edx, %esi # si: b & c 693 andl %edx, %esi # si: b & c
714 andl %ebp, %edi # di: (b | c) & d 694 andl %ebp, %edi # di: (b | c) & d
715 orl %esi, %edi # ((b | c) & d) | (b & c) 695 orl %esi, %edi # ((b | c) & d) | (b & c)
716 movl %r9d, %esi # W[(n+13) & 15] 696 xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15]
717 xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] 697 xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15]
718 xorl %r14d, %esi # ^W[(n+2) & 15] 698 xorl %r14d, %r12d # ^W[(n+2) & 15]
719 xorl %r12d, %esi # ^W[n & 15] 699 roll %r12d #
720 roll %esi #
721 movl %esi, %r12d # store to W[n & 15]
722 addl %edi, %eax # += ((b | c) & d) | (b & c) 700 addl %edi, %eax # += ((b | c) & d) | (b & c)
723 leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W 701 leal -0x70E44324(%rax,%r12), %eax # e += RCONST + W[n & 15]
724 movl %ebx, %esi # 702 movl %ebx, %esi #
725 roll $5, %esi # rotl32(a,5) 703 roll $5, %esi # rotl32(a,5)
726 addl %esi, %eax # e += rotl32(a,5) 704 addl %esi, %eax # e += rotl32(a,5)
@@ -732,14 +710,12 @@ sha1_process_block64:
732 andl %ecx, %esi # si: b & c 710 andl %ecx, %esi # si: b & c
733 andl %edx, %edi # di: (b | c) & d 711 andl %edx, %edi # di: (b | c) & d
734 orl %esi, %edi # ((b | c) & d) | (b & c) 712 orl %esi, %edi # ((b | c) & d) | (b & c)
735 movl %r10d, %esi # W[(n+13) & 15] 713 xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15]
736 xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] 714 xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15]
737 xorl %r15d, %esi # ^W[(n+2) & 15] 715 xorl %r15d, %r13d # ^W[(n+2) & 15]
738 xorl %r13d, %esi # ^W[n & 15] 716 roll %r13d #
739 roll %esi #
740 movl %esi, %r13d # store to W[n & 15]
741 addl %edi, %ebp # += ((b | c) & d) | (b & c) 717 addl %edi, %ebp # += ((b | c) & d) | (b & c)
742 leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W 718 leal -0x70E44324(%rbp,%r13), %ebp # e += RCONST + W[n & 15]
743 movl %eax, %esi # 719 movl %eax, %esi #
744 roll $5, %esi # rotl32(a,5) 720 roll $5, %esi # rotl32(a,5)
745 addl %esi, %ebp # e += rotl32(a,5) 721 addl %esi, %ebp # e += rotl32(a,5)
@@ -751,14 +727,12 @@ sha1_process_block64:
751 andl %ebx, %esi # si: b & c 727 andl %ebx, %esi # si: b & c
752 andl %ecx, %edi # di: (b | c) & d 728 andl %ecx, %edi # di: (b | c) & d
753 orl %esi, %edi # ((b | c) & d) | (b & c) 729 orl %esi, %edi # ((b | c) & d) | (b & c)
754 movl %r11d, %esi # W[(n+13) & 15] 730 xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15]
755 xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] 731 xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15]
756 xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] 732 xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15]
757 xorl %r14d, %esi # ^W[n & 15] 733 roll %r14d #
758 roll %esi #
759 movl %esi, %r14d # store to W[n & 15]
760 addl %edi, %edx # += ((b | c) & d) | (b & c) 734 addl %edi, %edx # += ((b | c) & d) | (b & c)
761 leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W 735 leal -0x70E44324(%rdx,%r14), %edx # e += RCONST + W[n & 15]
762 movl %ebp, %esi # 736 movl %ebp, %esi #
763 roll $5, %esi # rotl32(a,5) 737 roll $5, %esi # rotl32(a,5)
764 addl %esi, %edx # e += rotl32(a,5) 738 addl %esi, %edx # e += rotl32(a,5)
@@ -770,14 +744,12 @@ sha1_process_block64:
770 andl %eax, %esi # si: b & c 744 andl %eax, %esi # si: b & c
771 andl %ebx, %edi # di: (b | c) & d 745 andl %ebx, %edi # di: (b | c) & d
772 orl %esi, %edi # ((b | c) & d) | (b & c) 746 orl %esi, %edi # ((b | c) & d) | (b & c)
773 movl %r12d, %esi # W[(n+13) & 15] 747 xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15]
774 xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] 748 xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15]
775 xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] 749 xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15]
776 xorl %r15d, %esi # ^W[n & 15] 750 roll %r15d #
777 roll %esi #
778 movl %esi, %r15d # store to W[n & 15]
779 addl %edi, %ecx # += ((b | c) & d) | (b & c) 751 addl %edi, %ecx # += ((b | c) & d) | (b & c)
780 leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W 752 leal -0x70E44324(%rcx,%r15), %ecx # e += RCONST + W[n & 15]
781 movl %edx, %esi # 753 movl %edx, %esi #
782 roll $5, %esi # rotl32(a,5) 754 roll $5, %esi # rotl32(a,5)
783 addl %esi, %ecx # e += rotl32(a,5) 755 addl %esi, %ecx # e += rotl32(a,5)
@@ -796,7 +768,7 @@ sha1_process_block64:
796 roll %esi # 768 roll %esi #
797 movl %esi, -32+4*0(%rsp) # store to W[n & 15] 769 movl %esi, -32+4*0(%rsp) # store to W[n & 15]
798 addl %edi, %ebx # += ((b | c) & d) | (b & c) 770 addl %edi, %ebx # += ((b | c) & d) | (b & c)
799 leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W 771 leal -0x70E44324(%rbx,%rsi), %ebx # e += RCONST + W[n & 15]
800 movl %ecx, %esi # 772 movl %ecx, %esi #
801 roll $5, %esi # rotl32(a,5) 773 roll $5, %esi # rotl32(a,5)
802 addl %esi, %ebx # e += rotl32(a,5) 774 addl %esi, %ebx # e += rotl32(a,5)
@@ -815,7 +787,7 @@ sha1_process_block64:
815 roll %esi # 787 roll %esi #
816 movl %esi, -32+4*1(%rsp) # store to W[n & 15] 788 movl %esi, -32+4*1(%rsp) # store to W[n & 15]
817 addl %edi, %eax # += ((b | c) & d) | (b & c) 789 addl %edi, %eax # += ((b | c) & d) | (b & c)
818 leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W 790 leal -0x70E44324(%rax,%rsi), %eax # e += RCONST + W[n & 15]
819 movl %ebx, %esi # 791 movl %ebx, %esi #
820 roll $5, %esi # rotl32(a,5) 792 roll $5, %esi # rotl32(a,5)
821 addl %esi, %eax # e += rotl32(a,5) 793 addl %esi, %eax # e += rotl32(a,5)
@@ -834,7 +806,7 @@ sha1_process_block64:
834 roll %esi # 806 roll %esi #
835 movl %esi, -32+4*2(%rsp) # store to W[n & 15] 807 movl %esi, -32+4*2(%rsp) # store to W[n & 15]
836 addl %edi, %ebp # += ((b | c) & d) | (b & c) 808 addl %edi, %ebp # += ((b | c) & d) | (b & c)
837 leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W 809 leal -0x70E44324(%rbp,%rsi), %ebp # e += RCONST + W[n & 15]
838 movl %eax, %esi # 810 movl %eax, %esi #
839 roll $5, %esi # rotl32(a,5) 811 roll $5, %esi # rotl32(a,5)
840 addl %esi, %ebp # e += rotl32(a,5) 812 addl %esi, %ebp # e += rotl32(a,5)
@@ -853,7 +825,7 @@ sha1_process_block64:
853 roll %esi # 825 roll %esi #
854 movl %esi, -32+4*3(%rsp) # store to W[n & 15] 826 movl %esi, -32+4*3(%rsp) # store to W[n & 15]
855 addl %edi, %edx # += ((b | c) & d) | (b & c) 827 addl %edi, %edx # += ((b | c) & d) | (b & c)
856 leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W 828 leal -0x70E44324(%rdx,%rsi), %edx # e += RCONST + W[n & 15]
857 movl %ebp, %esi # 829 movl %ebp, %esi #
858 roll $5, %esi # rotl32(a,5) 830 roll $5, %esi # rotl32(a,5)
859 addl %esi, %edx # e += rotl32(a,5) 831 addl %esi, %edx # e += rotl32(a,5)
@@ -872,7 +844,7 @@ sha1_process_block64:
872 roll %esi # 844 roll %esi #
873 movl %esi, -32+4*4(%rsp) # store to W[n & 15] 845 movl %esi, -32+4*4(%rsp) # store to W[n & 15]
874 addl %edi, %ecx # += ((b | c) & d) | (b & c) 846 addl %edi, %ecx # += ((b | c) & d) | (b & c)
875 leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W 847 leal -0x70E44324(%rcx,%rsi), %ecx # e += RCONST + W[n & 15]
876 movl %edx, %esi # 848 movl %edx, %esi #
877 roll $5, %esi # rotl32(a,5) 849 roll $5, %esi # rotl32(a,5)
878 addl %esi, %ecx # e += rotl32(a,5) 850 addl %esi, %ecx # e += rotl32(a,5)
@@ -891,7 +863,7 @@ sha1_process_block64:
891 roll %esi # 863 roll %esi #
892 movl %esi, -32+4*5(%rsp) # store to W[n & 15] 864 movl %esi, -32+4*5(%rsp) # store to W[n & 15]
893 addl %edi, %ebx # += ((b | c) & d) | (b & c) 865 addl %edi, %ebx # += ((b | c) & d) | (b & c)
894 leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W 866 leal -0x70E44324(%rbx,%rsi), %ebx # e += RCONST + W[n & 15]
895 movl %ecx, %esi # 867 movl %ecx, %esi #
896 roll $5, %esi # rotl32(a,5) 868 roll $5, %esi # rotl32(a,5)
897 addl %esi, %ebx # e += rotl32(a,5) 869 addl %esi, %ebx # e += rotl32(a,5)
@@ -910,7 +882,7 @@ sha1_process_block64:
910 roll %esi # 882 roll %esi #
911 movl %esi, -32+4*6(%rsp) # store to W[n & 15] 883 movl %esi, -32+4*6(%rsp) # store to W[n & 15]
912 addl %edi, %eax # += ((b | c) & d) | (b & c) 884 addl %edi, %eax # += ((b | c) & d) | (b & c)
913 leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W 885 leal -0x70E44324(%rax,%rsi), %eax # e += RCONST + W[n & 15]
914 movl %ebx, %esi # 886 movl %ebx, %esi #
915 roll $5, %esi # rotl32(a,5) 887 roll $5, %esi # rotl32(a,5)
916 addl %esi, %eax # e += rotl32(a,5) 888 addl %esi, %eax # e += rotl32(a,5)
@@ -929,7 +901,7 @@ sha1_process_block64:
929 roll %esi # 901 roll %esi #
930 movl %esi, -32+4*7(%rsp) # store to W[n & 15] 902 movl %esi, -32+4*7(%rsp) # store to W[n & 15]
931 addl %edi, %ebp # += ((b | c) & d) | (b & c) 903 addl %edi, %ebp # += ((b | c) & d) | (b & c)
932 leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W 904 leal -0x70E44324(%rbp,%rsi), %ebp # e += RCONST + W[n & 15]
933 movl %eax, %esi # 905 movl %eax, %esi #
934 roll $5, %esi # rotl32(a,5) 906 roll $5, %esi # rotl32(a,5)
935 addl %esi, %ebp # e += rotl32(a,5) 907 addl %esi, %ebp # e += rotl32(a,5)
@@ -941,14 +913,12 @@ sha1_process_block64:
941 andl %ebx, %esi # si: b & c 913 andl %ebx, %esi # si: b & c
942 andl %ecx, %edi # di: (b | c) & d 914 andl %ecx, %edi # di: (b | c) & d
943 orl %esi, %edi # ((b | c) & d) | (b & c) 915 orl %esi, %edi # ((b | c) & d) | (b & c)
944 movl -32+4*5(%rsp), %esi # W[(n+13) & 15] 916 xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15]
945 xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] 917 xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15]
946 xorl %r10d, %esi # ^W[(n+2) & 15] 918 xorl %r10d, %r8d # ^W[(n+2) & 15]
947 xorl %r8d, %esi # ^W[n & 15] 919 roll %r8d #
948 roll %esi #
949 movl %esi, %r8d # store to W[n & 15]
950 addl %edi, %edx # += ((b | c) & d) | (b & c) 920 addl %edi, %edx # += ((b | c) & d) | (b & c)
951 leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W 921 leal -0x70E44324(%rdx,%r8), %edx # e += RCONST + W[n & 15]
952 movl %ebp, %esi # 922 movl %ebp, %esi #
953 roll $5, %esi # rotl32(a,5) 923 roll $5, %esi # rotl32(a,5)
954 addl %esi, %edx # e += rotl32(a,5) 924 addl %esi, %edx # e += rotl32(a,5)
@@ -960,14 +930,12 @@ sha1_process_block64:
960 andl %eax, %esi # si: b & c 930 andl %eax, %esi # si: b & c
961 andl %ebx, %edi # di: (b | c) & d 931 andl %ebx, %edi # di: (b | c) & d
962 orl %esi, %edi # ((b | c) & d) | (b & c) 932 orl %esi, %edi # ((b | c) & d) | (b & c)
963 movl -32+4*6(%rsp), %esi # W[(n+13) & 15] 933 xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15]
964 xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] 934 xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15]
965 xorl %r11d, %esi # ^W[(n+2) & 15] 935 xorl %r11d, %r9d # ^W[(n+2) & 15]
966 xorl %r9d, %esi # ^W[n & 15] 936 roll %r9d #
967 roll %esi #
968 movl %esi, %r9d # store to W[n & 15]
969 addl %edi, %ecx # += ((b | c) & d) | (b & c) 937 addl %edi, %ecx # += ((b | c) & d) | (b & c)
970 leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W 938 leal -0x70E44324(%rcx,%r9), %ecx # e += RCONST + W[n & 15]
971 movl %edx, %esi # 939 movl %edx, %esi #
972 roll $5, %esi # rotl32(a,5) 940 roll $5, %esi # rotl32(a,5)
973 addl %esi, %ecx # e += rotl32(a,5) 941 addl %esi, %ecx # e += rotl32(a,5)
@@ -979,14 +947,12 @@ sha1_process_block64:
979 andl %ebp, %esi # si: b & c 947 andl %ebp, %esi # si: b & c
980 andl %eax, %edi # di: (b | c) & d 948 andl %eax, %edi # di: (b | c) & d
981 orl %esi, %edi # ((b | c) & d) | (b & c) 949 orl %esi, %edi # ((b | c) & d) | (b & c)
982 movl -32+4*7(%rsp), %esi # W[(n+13) & 15] 950 xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15]
983 xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] 951 xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15]
984 xorl %r12d, %esi # ^W[(n+2) & 15] 952 xorl %r12d, %r10d # ^W[(n+2) & 15]
985 xorl %r10d, %esi # ^W[n & 15] 953 roll %r10d #
986 roll %esi #
987 movl %esi, %r10d # store to W[n & 15]
988 addl %edi, %ebx # += ((b | c) & d) | (b & c) 954 addl %edi, %ebx # += ((b | c) & d) | (b & c)
989 leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W 955 leal -0x70E44324(%rbx,%r10), %ebx # e += RCONST + W[n & 15]
990 movl %ecx, %esi # 956 movl %ecx, %esi #
991 roll $5, %esi # rotl32(a,5) 957 roll $5, %esi # rotl32(a,5)
992 addl %esi, %ebx # e += rotl32(a,5) 958 addl %esi, %ebx # e += rotl32(a,5)
@@ -998,77 +964,67 @@ sha1_process_block64:
998 andl %edx, %esi # si: b & c 964 andl %edx, %esi # si: b & c
999 andl %ebp, %edi # di: (b | c) & d 965 andl %ebp, %edi # di: (b | c) & d
1000 orl %esi, %edi # ((b | c) & d) | (b & c) 966 orl %esi, %edi # ((b | c) & d) | (b & c)
1001 movl %r8d, %esi # W[(n+13) & 15] 967 xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15]
1002 xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] 968 xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15]
1003 xorl %r13d, %esi # ^W[(n+2) & 15] 969 xorl %r13d, %r11d # ^W[(n+2) & 15]
1004 xorl %r11d, %esi # ^W[n & 15] 970 roll %r11d #
1005 roll %esi #
1006 movl %esi, %r11d # store to W[n & 15]
1007 addl %edi, %eax # += ((b | c) & d) | (b & c) 971 addl %edi, %eax # += ((b | c) & d) | (b & c)
1008 leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W 972 leal -0x70E44324(%rax,%r11), %eax # e += RCONST + W[n & 15]
1009 movl %ebx, %esi # 973 movl %ebx, %esi #
1010 roll $5, %esi # rotl32(a,5) 974 roll $5, %esi # rotl32(a,5)
1011 addl %esi, %eax # e += rotl32(a,5) 975 addl %esi, %eax # e += rotl32(a,5)
1012 rorl $2, %ecx # b = rotl32(b,30) 976 rorl $2, %ecx # b = rotl32(b,30)
1013# 60 977# 60
1014 movl %r9d, %esi # W[(n+13) & 15] 978 xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15]
1015 xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] 979 xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15]
1016 xorl %r14d, %esi # ^W[(n+2) & 15] 980 xorl %r14d, %r12d # ^W[(n+2) & 15]
1017 xorl %r12d, %esi # ^W[n & 15] 981 roll %r12d #
1018 roll %esi #
1019 movl %esi, %r12d # store to W[n & 15]
1020 movl %ecx, %edi # c 982 movl %ecx, %edi # c
1021 xorl %edx, %edi # ^d 983 xorl %edx, %edi # ^d
1022 xorl %ebx, %edi # ^b 984 xorl %ebx, %edi # ^b
1023 leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W 985 leal -0x359D3E2A(%rbp,%r12), %ebp # e += RCONST + W[n & 15]
1024 addl %edi, %ebp # e += (c ^ d ^ b) 986 addl %edi, %ebp # e += (c ^ d ^ b)
1025 movl %eax, %esi # 987 movl %eax, %esi #
1026 roll $5, %esi # rotl32(a,5) 988 roll $5, %esi # rotl32(a,5)
1027 addl %esi, %ebp # e += rotl32(a,5) 989 addl %esi, %ebp # e += rotl32(a,5)
1028 rorl $2, %ebx # b = rotl32(b,30) 990 rorl $2, %ebx # b = rotl32(b,30)
1029# 61 991# 61
1030 movl %r10d, %esi # W[(n+13) & 15] 992 xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15]
1031 xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] 993 xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15]
1032 xorl %r15d, %esi # ^W[(n+2) & 15] 994 xorl %r15d, %r13d # ^W[(n+2) & 15]
1033 xorl %r13d, %esi # ^W[n & 15] 995 roll %r13d #
1034 roll %esi #
1035 movl %esi, %r13d # store to W[n & 15]
1036 movl %ebx, %edi # c 996 movl %ebx, %edi # c
1037 xorl %ecx, %edi # ^d 997 xorl %ecx, %edi # ^d
1038 xorl %eax, %edi # ^b 998 xorl %eax, %edi # ^b
1039 leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W 999 leal -0x359D3E2A(%rdx,%r13), %edx # e += RCONST + W[n & 15]
1040 addl %edi, %edx # e += (c ^ d ^ b) 1000 addl %edi, %edx # e += (c ^ d ^ b)
1041 movl %ebp, %esi # 1001 movl %ebp, %esi #
1042 roll $5, %esi # rotl32(a,5) 1002 roll $5, %esi # rotl32(a,5)
1043 addl %esi, %edx # e += rotl32(a,5) 1003 addl %esi, %edx # e += rotl32(a,5)
1044 rorl $2, %eax # b = rotl32(b,30) 1004 rorl $2, %eax # b = rotl32(b,30)
1045# 62 1005# 62
1046 movl %r11d, %esi # W[(n+13) & 15] 1006 xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15]
1047 xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] 1007 xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15]
1048 xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] 1008 xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15]
1049 xorl %r14d, %esi # ^W[n & 15] 1009 roll %r14d #
1050 roll %esi #
1051 movl %esi, %r14d # store to W[n & 15]
1052 movl %eax, %edi # c 1010 movl %eax, %edi # c
1053 xorl %ebx, %edi # ^d 1011 xorl %ebx, %edi # ^d
1054 xorl %ebp, %edi # ^b 1012 xorl %ebp, %edi # ^b
1055 leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W 1013 leal -0x359D3E2A(%rcx,%r14), %ecx # e += RCONST + W[n & 15]
1056 addl %edi, %ecx # e += (c ^ d ^ b) 1014 addl %edi, %ecx # e += (c ^ d ^ b)
1057 movl %edx, %esi # 1015 movl %edx, %esi #
1058 roll $5, %esi # rotl32(a,5) 1016 roll $5, %esi # rotl32(a,5)
1059 addl %esi, %ecx # e += rotl32(a,5) 1017 addl %esi, %ecx # e += rotl32(a,5)
1060 rorl $2, %ebp # b = rotl32(b,30) 1018 rorl $2, %ebp # b = rotl32(b,30)
1061# 63 1019# 63
1062 movl %r12d, %esi # W[(n+13) & 15] 1020 xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15]
1063 xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] 1021 xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15]
1064 xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] 1022 xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15]
1065 xorl %r15d, %esi # ^W[n & 15] 1023 roll %r15d #
1066 roll %esi #
1067 movl %esi, %r15d # store to W[n & 15]
1068 movl %ebp, %edi # c 1024 movl %ebp, %edi # c
1069 xorl %eax, %edi # ^d 1025 xorl %eax, %edi # ^d
1070 xorl %edx, %edi # ^b 1026 xorl %edx, %edi # ^b
1071 leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W 1027 leal -0x359D3E2A(%rbx,%r15), %ebx # e += RCONST + W[n & 15]
1072 addl %edi, %ebx # e += (c ^ d ^ b) 1028 addl %edi, %ebx # e += (c ^ d ^ b)
1073 movl %ecx, %esi # 1029 movl %ecx, %esi #
1074 roll $5, %esi # rotl32(a,5) 1030 roll $5, %esi # rotl32(a,5)
@@ -1084,7 +1040,7 @@ sha1_process_block64:
1084 movl %edx, %edi # c 1040 movl %edx, %edi # c
1085 xorl %ebp, %edi # ^d 1041 xorl %ebp, %edi # ^d
1086 xorl %ecx, %edi # ^b 1042 xorl %ecx, %edi # ^b
1087 leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W 1043 leal -0x359D3E2A(%rax,%rsi), %eax # e += RCONST + W[n & 15]
1088 addl %edi, %eax # e += (c ^ d ^ b) 1044 addl %edi, %eax # e += (c ^ d ^ b)
1089 movl %ebx, %esi # 1045 movl %ebx, %esi #
1090 roll $5, %esi # rotl32(a,5) 1046 roll $5, %esi # rotl32(a,5)
@@ -1100,7 +1056,7 @@ sha1_process_block64:
1100 movl %ecx, %edi # c 1056 movl %ecx, %edi # c
1101 xorl %edx, %edi # ^d 1057 xorl %edx, %edi # ^d
1102 xorl %ebx, %edi # ^b 1058 xorl %ebx, %edi # ^b
1103 leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W 1059 leal -0x359D3E2A(%rbp,%rsi), %ebp # e += RCONST + W[n & 15]
1104 addl %edi, %ebp # e += (c ^ d ^ b) 1060 addl %edi, %ebp # e += (c ^ d ^ b)
1105 movl %eax, %esi # 1061 movl %eax, %esi #
1106 roll $5, %esi # rotl32(a,5) 1062 roll $5, %esi # rotl32(a,5)
@@ -1116,7 +1072,7 @@ sha1_process_block64:
1116 movl %ebx, %edi # c 1072 movl %ebx, %edi # c
1117 xorl %ecx, %edi # ^d 1073 xorl %ecx, %edi # ^d
1118 xorl %eax, %edi # ^b 1074 xorl %eax, %edi # ^b
1119 leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W 1075 leal -0x359D3E2A(%rdx,%rsi), %edx # e += RCONST + W[n & 15]
1120 addl %edi, %edx # e += (c ^ d ^ b) 1076 addl %edi, %edx # e += (c ^ d ^ b)
1121 movl %ebp, %esi # 1077 movl %ebp, %esi #
1122 roll $5, %esi # rotl32(a,5) 1078 roll $5, %esi # rotl32(a,5)
@@ -1132,7 +1088,7 @@ sha1_process_block64:
1132 movl %eax, %edi # c 1088 movl %eax, %edi # c
1133 xorl %ebx, %edi # ^d 1089 xorl %ebx, %edi # ^d
1134 xorl %ebp, %edi # ^b 1090 xorl %ebp, %edi # ^b
1135 leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W 1091 leal -0x359D3E2A(%rcx,%rsi), %ecx # e += RCONST + W[n & 15]
1136 addl %edi, %ecx # e += (c ^ d ^ b) 1092 addl %edi, %ecx # e += (c ^ d ^ b)
1137 movl %edx, %esi # 1093 movl %edx, %esi #
1138 roll $5, %esi # rotl32(a,5) 1094 roll $5, %esi # rotl32(a,5)
@@ -1148,7 +1104,7 @@ sha1_process_block64:
1148 movl %ebp, %edi # c 1104 movl %ebp, %edi # c
1149 xorl %eax, %edi # ^d 1105 xorl %eax, %edi # ^d
1150 xorl %edx, %edi # ^b 1106 xorl %edx, %edi # ^b
1151 leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W 1107 leal -0x359D3E2A(%rbx,%rsi), %ebx # e += RCONST + W[n & 15]
1152 addl %edi, %ebx # e += (c ^ d ^ b) 1108 addl %edi, %ebx # e += (c ^ d ^ b)
1153 movl %ecx, %esi # 1109 movl %ecx, %esi #
1154 roll $5, %esi # rotl32(a,5) 1110 roll $5, %esi # rotl32(a,5)
@@ -1164,7 +1120,7 @@ sha1_process_block64:
1164 movl %edx, %edi # c 1120 movl %edx, %edi # c
1165 xorl %ebp, %edi # ^d 1121 xorl %ebp, %edi # ^d
1166 xorl %ecx, %edi # ^b 1122 xorl %ecx, %edi # ^b
1167 leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W 1123 leal -0x359D3E2A(%rax,%rsi), %eax # e += RCONST + W[n & 15]
1168 addl %edi, %eax # e += (c ^ d ^ b) 1124 addl %edi, %eax # e += (c ^ d ^ b)
1169 movl %ebx, %esi # 1125 movl %ebx, %esi #
1170 roll $5, %esi # rotl32(a,5) 1126 roll $5, %esi # rotl32(a,5)
@@ -1180,7 +1136,7 @@ sha1_process_block64:
1180 movl %ecx, %edi # c 1136 movl %ecx, %edi # c
1181 xorl %edx, %edi # ^d 1137 xorl %edx, %edi # ^d
1182 xorl %ebx, %edi # ^b 1138 xorl %ebx, %edi # ^b
1183 leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W 1139 leal -0x359D3E2A(%rbp,%rsi), %ebp # e += RCONST + W[n & 15]
1184 addl %edi, %ebp # e += (c ^ d ^ b) 1140 addl %edi, %ebp # e += (c ^ d ^ b)
1185 movl %eax, %esi # 1141 movl %eax, %esi #
1186 roll $5, %esi # rotl32(a,5) 1142 roll $5, %esi # rotl32(a,5)
@@ -1196,135 +1152,119 @@ sha1_process_block64:
1196 movl %ebx, %edi # c 1152 movl %ebx, %edi # c
1197 xorl %ecx, %edi # ^d 1153 xorl %ecx, %edi # ^d
1198 xorl %eax, %edi # ^b 1154 xorl %eax, %edi # ^b
1199 leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W 1155 leal -0x359D3E2A(%rdx,%rsi), %edx # e += RCONST + W[n & 15]
1200 addl %edi, %edx # e += (c ^ d ^ b) 1156 addl %edi, %edx # e += (c ^ d ^ b)
1201 movl %ebp, %esi # 1157 movl %ebp, %esi #
1202 roll $5, %esi # rotl32(a,5) 1158 roll $5, %esi # rotl32(a,5)
1203 addl %esi, %edx # e += rotl32(a,5) 1159 addl %esi, %edx # e += rotl32(a,5)
1204 rorl $2, %eax # b = rotl32(b,30) 1160 rorl $2, %eax # b = rotl32(b,30)
1205# 72 1161# 72
1206 movl -32+4*5(%rsp), %esi # W[(n+13) & 15] 1162 xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15]
1207 xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] 1163 xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15]
1208 xorl %r10d, %esi # ^W[(n+2) & 15] 1164 xorl %r10d, %r8d # ^W[(n+2) & 15]
1209 xorl %r8d, %esi # ^W[n & 15] 1165 roll %r8d #
1210 roll %esi #
1211 movl %esi, %r8d # store to W[n & 15]
1212 movl %eax, %edi # c 1166 movl %eax, %edi # c
1213 xorl %ebx, %edi # ^d 1167 xorl %ebx, %edi # ^d
1214 xorl %ebp, %edi # ^b 1168 xorl %ebp, %edi # ^b
1215 leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W 1169 leal -0x359D3E2A(%rcx,%r8), %ecx # e += RCONST + W[n & 15]
1216 addl %edi, %ecx # e += (c ^ d ^ b) 1170 addl %edi, %ecx # e += (c ^ d ^ b)
1217 movl %edx, %esi # 1171 movl %edx, %esi #
1218 roll $5, %esi # rotl32(a,5) 1172 roll $5, %esi # rotl32(a,5)
1219 addl %esi, %ecx # e += rotl32(a,5) 1173 addl %esi, %ecx # e += rotl32(a,5)
1220 rorl $2, %ebp # b = rotl32(b,30) 1174 rorl $2, %ebp # b = rotl32(b,30)
1221# 73 1175# 73
1222 movl -32+4*6(%rsp), %esi # W[(n+13) & 15] 1176 xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15]
1223 xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] 1177 xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15]
1224 xorl %r11d, %esi # ^W[(n+2) & 15] 1178 xorl %r11d, %r9d # ^W[(n+2) & 15]
1225 xorl %r9d, %esi # ^W[n & 15] 1179 roll %r9d #
1226 roll %esi #
1227 movl %esi, %r9d # store to W[n & 15]
1228 movl %ebp, %edi # c 1180 movl %ebp, %edi # c
1229 xorl %eax, %edi # ^d 1181 xorl %eax, %edi # ^d
1230 xorl %edx, %edi # ^b 1182 xorl %edx, %edi # ^b
1231 leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W 1183 leal -0x359D3E2A(%rbx,%r9), %ebx # e += RCONST + W[n & 15]
1232 addl %edi, %ebx # e += (c ^ d ^ b) 1184 addl %edi, %ebx # e += (c ^ d ^ b)
1233 movl %ecx, %esi # 1185 movl %ecx, %esi #
1234 roll $5, %esi # rotl32(a,5) 1186 roll $5, %esi # rotl32(a,5)
1235 addl %esi, %ebx # e += rotl32(a,5) 1187 addl %esi, %ebx # e += rotl32(a,5)
1236 rorl $2, %edx # b = rotl32(b,30) 1188 rorl $2, %edx # b = rotl32(b,30)
1237# 74 1189# 74
1238 movl -32+4*7(%rsp), %esi # W[(n+13) & 15] 1190 xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15]
1239 xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] 1191 xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15]
1240 xorl %r12d, %esi # ^W[(n+2) & 15] 1192 xorl %r12d, %r10d # ^W[(n+2) & 15]
1241 xorl %r10d, %esi # ^W[n & 15] 1193 roll %r10d #
1242 roll %esi #
1243 movl %esi, %r10d # store to W[n & 15]
1244 movl %edx, %edi # c 1194 movl %edx, %edi # c
1245 xorl %ebp, %edi # ^d 1195 xorl %ebp, %edi # ^d
1246 xorl %ecx, %edi # ^b 1196 xorl %ecx, %edi # ^b
1247 leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W 1197 leal -0x359D3E2A(%rax,%r10), %eax # e += RCONST + W[n & 15]
1248 addl %edi, %eax # e += (c ^ d ^ b) 1198 addl %edi, %eax # e += (c ^ d ^ b)
1249 movl %ebx, %esi # 1199 movl %ebx, %esi #
1250 roll $5, %esi # rotl32(a,5) 1200 roll $5, %esi # rotl32(a,5)
1251 addl %esi, %eax # e += rotl32(a,5) 1201 addl %esi, %eax # e += rotl32(a,5)
1252 rorl $2, %ecx # b = rotl32(b,30) 1202 rorl $2, %ecx # b = rotl32(b,30)
1253# 75 1203# 75
1254 movl %r8d, %esi # W[(n+13) & 15] 1204 xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15]
1255 xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] 1205 xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15]
1256 xorl %r13d, %esi # ^W[(n+2) & 15] 1206 xorl %r13d, %r11d # ^W[(n+2) & 15]
1257 xorl %r11d, %esi # ^W[n & 15] 1207 roll %r11d #
1258 roll %esi #
1259 movl %esi, %r11d # store to W[n & 15]
1260 movl %ecx, %edi # c 1208 movl %ecx, %edi # c
1261 xorl %edx, %edi # ^d 1209 xorl %edx, %edi # ^d
1262 xorl %ebx, %edi # ^b 1210 xorl %ebx, %edi # ^b
1263 leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W 1211 leal -0x359D3E2A(%rbp,%r11), %ebp # e += RCONST + W[n & 15]
1264 addl %edi, %ebp # e += (c ^ d ^ b) 1212 addl %edi, %ebp # e += (c ^ d ^ b)
1265 movl %eax, %esi # 1213 movl %eax, %esi #
1266 roll $5, %esi # rotl32(a,5) 1214 roll $5, %esi # rotl32(a,5)
1267 addl %esi, %ebp # e += rotl32(a,5) 1215 addl %esi, %ebp # e += rotl32(a,5)
1268 rorl $2, %ebx # b = rotl32(b,30) 1216 rorl $2, %ebx # b = rotl32(b,30)
1269# 76 1217# 76
1270 movl %r9d, %esi # W[(n+13) & 15] 1218 xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15]
1271 xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] 1219 xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15]
1272 xorl %r14d, %esi # ^W[(n+2) & 15] 1220 xorl %r14d, %r12d # ^W[(n+2) & 15]
1273 xorl %r12d, %esi # ^W[n & 15] 1221 roll %r12d #
1274 roll %esi #
1275 movl %esi, %r12d # store to W[n & 15]
1276 movl %ebx, %edi # c 1222 movl %ebx, %edi # c
1277 xorl %ecx, %edi # ^d 1223 xorl %ecx, %edi # ^d
1278 xorl %eax, %edi # ^b 1224 xorl %eax, %edi # ^b
1279 leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W 1225 leal -0x359D3E2A(%rdx,%r12), %edx # e += RCONST + W[n & 15]
1280 addl %edi, %edx # e += (c ^ d ^ b) 1226 addl %edi, %edx # e += (c ^ d ^ b)
1281 movl %ebp, %esi # 1227 movl %ebp, %esi #
1282 roll $5, %esi # rotl32(a,5) 1228 roll $5, %esi # rotl32(a,5)
1283 addl %esi, %edx # e += rotl32(a,5) 1229 addl %esi, %edx # e += rotl32(a,5)
1284 rorl $2, %eax # b = rotl32(b,30) 1230 rorl $2, %eax # b = rotl32(b,30)
1285# 77 1231# 77
1286 movl %r10d, %esi # W[(n+13) & 15] 1232 xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15]
1287 xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] 1233 xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15]
1288 xorl %r15d, %esi # ^W[(n+2) & 15] 1234 xorl %r15d, %r13d # ^W[(n+2) & 15]
1289 xorl %r13d, %esi # ^W[n & 15] 1235 roll %r13d #
1290 roll %esi #
1291 # store to W[n & 15] - unused, not done
1292 movl %eax, %edi # c 1236 movl %eax, %edi # c
1293 xorl %ebx, %edi # ^d 1237 xorl %ebx, %edi # ^d
1294 xorl %ebp, %edi # ^b 1238 xorl %ebp, %edi # ^b
1295 leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W 1239 leal -0x359D3E2A(%rcx,%r13), %ecx # e += RCONST + W[n & 15]
1296 addl %edi, %ecx # e += (c ^ d ^ b) 1240 addl %edi, %ecx # e += (c ^ d ^ b)
1297 movl %edx, %esi # 1241 movl %edx, %esi #
1298 roll $5, %esi # rotl32(a,5) 1242 roll $5, %esi # rotl32(a,5)
1299 addl %esi, %ecx # e += rotl32(a,5) 1243 addl %esi, %ecx # e += rotl32(a,5)
1300 rorl $2, %ebp # b = rotl32(b,30) 1244 rorl $2, %ebp # b = rotl32(b,30)
1301# 78 1245# 78
1302 movl %r11d, %esi # W[(n+13) & 15] 1246 xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15]
1303 xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] 1247 xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15]
1304 xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] 1248 xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15]
1305 xorl %r14d, %esi # ^W[n & 15] 1249 roll %r14d #
1306 roll %esi #
1307 # store to W[n & 15] - unused, not done
1308 movl %ebp, %edi # c 1250 movl %ebp, %edi # c
1309 xorl %eax, %edi # ^d 1251 xorl %eax, %edi # ^d
1310 xorl %edx, %edi # ^b 1252 xorl %edx, %edi # ^b
1311 leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W 1253 leal -0x359D3E2A(%rbx,%r14), %ebx # e += RCONST + W[n & 15]
1312 addl %edi, %ebx # e += (c ^ d ^ b) 1254 addl %edi, %ebx # e += (c ^ d ^ b)
1313 movl %ecx, %esi # 1255 movl %ecx, %esi #
1314 roll $5, %esi # rotl32(a,5) 1256 roll $5, %esi # rotl32(a,5)
1315 addl %esi, %ebx # e += rotl32(a,5) 1257 addl %esi, %ebx # e += rotl32(a,5)
1316 rorl $2, %edx # b = rotl32(b,30) 1258 rorl $2, %edx # b = rotl32(b,30)
1317# 79 1259# 79
1318 movl %r12d, %esi # W[(n+13) & 15] 1260 xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15]
1319 xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] 1261 xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15]
1320 xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] 1262 xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15]
1321 xorl %r15d, %esi # ^W[n & 15] 1263 roll %r15d #
1322 roll %esi #
1323 # store to W[n & 15] - unused, not done
1324 movl %edx, %edi # c 1264 movl %edx, %edi # c
1325 xorl %ebp, %edi # ^d 1265 xorl %ebp, %edi # ^d
1326 xorl %ecx, %edi # ^b 1266 xorl %ecx, %edi # ^b
1327 leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W 1267 leal -0x359D3E2A(%rax,%r15), %eax # e += RCONST + W[n & 15]
1328 addl %edi, %eax # e += (c ^ d ^ b) 1268 addl %edi, %eax # e += (c ^ d ^ b)
1329 movl %ebx, %esi # 1269 movl %ebx, %esi #
1330 roll $5, %esi # rotl32(a,5) 1270 roll $5, %esi # rotl32(a,5)
diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh
new file mode 100755
index 000000000..931c0f0fd
--- /dev/null
+++ b/libbb/hash_md5_sha_x86-64.S.sh
@@ -0,0 +1,267 @@
1#!/bin/sh
2
3# We don't regenerate it on every "make" invocation - only by hand.
4# The reason is that the changes to generated code are difficult
5# to visualize by looking only at this script, it helps when the commit
6# also contains the diff of the generated file.
7exec >hash_md5_sha_x86-64.S
8
9echo \
10'### Generated by hash_md5_sha_x86-64.S.sh ###
11
12#if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__)
13 .section .text.sha1_process_block64,"ax",@progbits
14 .globl sha1_process_block64
15 .hidden sha1_process_block64
16 .type sha1_process_block64, @function
17
18 .balign 8 # allow decoders to fetch at least 4 first insns
19sha1_process_block64:
20 pushq %r15 #
21 pushq %r14 #
22 pushq %r13 #
23 pushq %r12 #
24 pushq %rbp #
25 pushq %rbx #
26 pushq %rdi # we need ctx at the end
27
28#Register and stack use:
29# eax..edx: a..d
30# ebp: e
31# esi,edi: temps
32# -32+4*n(%rsp),r8...r15: W[0..7,8..15]
33# (TODO: actually W[0..7] are used a bit more often, put _thme_ into r8..r15?)
34
35 movq 4*8(%rdi), %r8
36 bswapq %r8
37 movl %r8d, %r9d
38 shrq $32, %r8
39 movq 4*10(%rdi), %r10
40 bswapq %r10
41 movl %r10d, %r11d
42 shrq $32, %r10
43 movq 4*12(%rdi), %r12
44 bswapq %r12
45 movl %r12d, %r13d
46 shrq $32, %r12
47 movq 4*14(%rdi), %r14
48 bswapq %r14
49 movl %r14d, %r15d
50 shrq $32, %r14
51
52 movl $3, %eax
531:
54 movq (%rdi,%rax,8), %rsi
55 bswapq %rsi
56 rolq $32, %rsi
57 movq %rsi, -32(%rsp,%rax,8)
58 decl %eax
59 jns 1b
60 movl 80(%rdi), %eax # a = ctx->hash[0]
61 movl 84(%rdi), %ebx # b = ctx->hash[1]
62 movl 88(%rdi), %ecx # c = ctx->hash[2]
63 movl 92(%rdi), %edx # d = ctx->hash[3]
64 movl 96(%rdi), %ebp # e = ctx->hash[4]
65'
66W32() {
67test "$1" || exit 1
68test "$1" -lt 0 && exit 1
69test "$1" -gt 15 && exit 1
70test "$1" -lt 8 && echo "-32+4*$1(%rsp)"
71test "$1" -ge 8 && echo "%r${1}d"
72}
73
74RD1A() {
75local a=$1;local b=$2;local c=$3;local d=$4;local e=$5
76local n=$(($6))
77echo "# $n"
78test $n = 0 && echo "
79 # W[0], already in %esi
80";test $n != 0 && test $n -lt 8 && echo "
81 movl `W32 $n`, %esi # W[n]
82";test $n -ge 8 && echo "
83 # W[n], in %r$n
84";echo "
85 movl %e$c, %edi # c
86 xorl %e$d, %edi # ^d
87 andl %e$b, %edi # &b
88 xorl %e$d, %edi # (((c ^ d) & b) ^ d)
89";test $n -lt 8 && echo "
90 leal $RCONST(%r$e,%rsi),%e$e # e += RCONST + W[n]
91";test $n -ge 8 && echo "
92 leal $RCONST(%r$e,%r$n),%e$e # e += RCONST + W[n]
93";echo "
94 addl %edi, %e$e # e += (((c ^ d) & b) ^ d)
95 movl %e$a, %esi #
96 roll \$5, %esi # rotl32(a,5)
97 addl %esi, %e$e # e += rotl32(a,5)
98 rorl \$2, %e$b # b = rotl32(b,30)
99"
100}
101RD1B() {
102local a=$1;local b=$2;local c=$3;local d=$4;local e=$5
103local n=$(($6))
104local n13=$(((n+13) & 15))
105local n8=$(((n+8) & 15))
106local n2=$(((n+2) & 15))
107local n0=$(((n+0) & 15))
108echo "
109# $n
110";test $n0 -lt 8 && echo "
111 movl `W32 $n13`, %esi # W[(n+13) & 15]
112 xorl `W32 $n8`, %esi # ^W[(n+8) & 15]
113 xorl `W32 $n2`, %esi # ^W[(n+2) & 15]
114 xorl `W32 $n0`, %esi # ^W[n & 15]
115 roll %esi #
116 movl %esi, `W32 $n0` # store to W[n & 15]
117";test $n0 -ge 8 && echo "
118 xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15]
119 xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15]
120 xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15]
121 roll `W32 $n0` #
122"; echo "
123 movl %e$c, %edi # c
124 xorl %e$d, %edi # ^d
125 andl %e$b, %edi # &b
126 xorl %e$d, %edi # (((c ^ d) & b) ^ d)
127";test $n0 -lt 8 && echo "
128 leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15]
129";test $n0 -ge 8 && echo "
130 leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15]
131";echo "
132 addl %edi, %e$e # e += (((c ^ d) & b) ^ d)
133 movl %e$a, %esi #
134 roll \$5, %esi # rotl32(a,5)
135 addl %esi, %e$e # e += rotl32(a,5)
136 rorl \$2, %e$b # b = rotl32(b,30)
137"
138}
139{
140RCONST=0x5A827999
141RD1A ax bx cx dx bp 0; RD1A bp ax bx cx dx 1; RD1A dx bp ax bx cx 2; RD1A cx dx bp ax bx 3; RD1A bx cx dx bp ax 4
142RD1A ax bx cx dx bp 5; RD1A bp ax bx cx dx 6; RD1A dx bp ax bx cx 7; RD1A cx dx bp ax bx 8; RD1A bx cx dx bp ax 9
143RD1A ax bx cx dx bp 10; RD1A bp ax bx cx dx 11; RD1A dx bp ax bx cx 12; RD1A cx dx bp ax bx 13; RD1A bx cx dx bp ax 14
144RD1A ax bx cx dx bp 15; RD1B bp ax bx cx dx 16; RD1B dx bp ax bx cx 17; RD1B cx dx bp ax bx 18; RD1B bx cx dx bp ax 19
145} | grep -v '^$'
146
147RD2() {
148local a=$1;local b=$2;local c=$3;local d=$4;local e=$5
149local n=$(($6))
150local n13=$(((n+13) & 15))
151local n8=$(((n+8) & 15))
152local n2=$(((n+2) & 15))
153local n0=$(((n+0) & 15))
154echo "
155# $n
156";test $n0 -lt 8 && echo "
157 movl `W32 $n13`, %esi # W[(n+13) & 15]
158 xorl `W32 $n8`, %esi # ^W[(n+8) & 15]
159 xorl `W32 $n2`, %esi # ^W[(n+2) & 15]
160 xorl `W32 $n0`, %esi # ^W[n & 15]
161 roll %esi #
162 movl %esi, `W32 $n0` # store to W[n & 15]
163";test $n0 -ge 8 && echo "
164 xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15]
165 xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15]
166 xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15]
167 roll `W32 $n0` #
168"; echo "
169 movl %e$c, %edi # c
170 xorl %e$d, %edi # ^d
171 xorl %e$b, %edi # ^b
172";test $n0 -lt 8 && echo "
173 leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15]
174";test $n0 -ge 8 && echo "
175 leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15]
176";echo "
177 addl %edi, %e$e # e += (c ^ d ^ b)
178 movl %e$a, %esi #
179 roll \$5, %esi # rotl32(a,5)
180 addl %esi, %e$e # e += rotl32(a,5)
181 rorl \$2, %e$b # b = rotl32(b,30)
182"
183}
184{
185RCONST=0x6ED9EBA1
186RD2 ax bx cx dx bp 20; RD2 bp ax bx cx dx 21; RD2 dx bp ax bx cx 22; RD2 cx dx bp ax bx 23; RD2 bx cx dx bp ax 24
187RD2 ax bx cx dx bp 25; RD2 bp ax bx cx dx 26; RD2 dx bp ax bx cx 27; RD2 cx dx bp ax bx 28; RD2 bx cx dx bp ax 29
188RD2 ax bx cx dx bp 30; RD2 bp ax bx cx dx 31; RD2 dx bp ax bx cx 32; RD2 cx dx bp ax bx 33; RD2 bx cx dx bp ax 34
189RD2 ax bx cx dx bp 35; RD2 bp ax bx cx dx 36; RD2 dx bp ax bx cx 37; RD2 cx dx bp ax bx 38; RD2 bx cx dx bp ax 39
190} | grep -v '^$'
191
192RD3() {
193local a=$1;local b=$2;local c=$3;local d=$4;local e=$5
194local n=$(($6))
195local n13=$(((n+13) & 15))
196local n8=$(((n+8) & 15))
197local n2=$(((n+2) & 15))
198local n0=$(((n+0) & 15))
199echo "
200# $n
201 movl %e$b, %edi # di: b
202 movl %e$b, %esi # si: b
203 orl %e$c, %edi # di: b | c
204 andl %e$c, %esi # si: b & c
205 andl %e$d, %edi # di: (b | c) & d
206 orl %esi, %edi # ((b | c) & d) | (b & c)
207";test $n0 -lt 8 && echo "
208 movl `W32 $n13`, %esi # W[(n+13) & 15]
209 xorl `W32 $n8`, %esi # ^W[(n+8) & 15]
210 xorl `W32 $n2`, %esi # ^W[(n+2) & 15]
211 xorl `W32 $n0`, %esi # ^W[n & 15]
212 roll %esi #
213 movl %esi, `W32 $n0` # store to W[n & 15]
214";test $n0 -ge 8 && echo "
215 xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15]
216 xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15]
217 xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15]
218 roll `W32 $n0` #
219"; echo "
220 addl %edi, %e$e # += ((b | c) & d) | (b & c)
221";test $n0 -lt 8 && echo "
222 leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15]
223";test $n0 -ge 8 && echo "
224 leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15]
225";echo "
226 movl %e$a, %esi #
227 roll \$5, %esi # rotl32(a,5)
228 addl %esi, %e$e # e += rotl32(a,5)
229 rorl \$2, %e$b # b = rotl32(b,30)
230"
231}
232{
233#RCONST=0x8F1BBCDC "out of range for signed 32bit displacement"
234RCONST=-0x70E44324
235RD3 ax bx cx dx bp 40; RD3 bp ax bx cx dx 41; RD3 dx bp ax bx cx 42; RD3 cx dx bp ax bx 43; RD3 bx cx dx bp ax 44
236RD3 ax bx cx dx bp 45; RD3 bp ax bx cx dx 46; RD3 dx bp ax bx cx 47; RD3 cx dx bp ax bx 48; RD3 bx cx dx bp ax 49
237RD3 ax bx cx dx bp 50; RD3 bp ax bx cx dx 51; RD3 dx bp ax bx cx 52; RD3 cx dx bp ax bx 53; RD3 bx cx dx bp ax 54
238RD3 ax bx cx dx bp 55; RD3 bp ax bx cx dx 56; RD3 dx bp ax bx cx 57; RD3 cx dx bp ax bx 58; RD3 bx cx dx bp ax 59
239} | grep -v '^$'
240
241# Round 4 has the same logic as round 2, only n and RCONST are different
242{
243#RCONST=0xCA62C1D6 "out of range for signed 32bit displacement"
244RCONST=-0x359D3E2A
245RD2 ax bx cx dx bp 60; RD2 bp ax bx cx dx 61; RD2 dx bp ax bx cx 62; RD2 cx dx bp ax bx 63; RD2 bx cx dx bp ax 64
246RD2 ax bx cx dx bp 65; RD2 bp ax bx cx dx 66; RD2 dx bp ax bx cx 67; RD2 cx dx bp ax bx 68; RD2 bx cx dx bp ax 69
247RD2 ax bx cx dx bp 70; RD2 bp ax bx cx dx 71; RD2 dx bp ax bx cx 72; RD2 cx dx bp ax bx 73; RD2 bx cx dx bp ax 74
248RD2 ax bx cx dx bp 75; RD2 bp ax bx cx dx 76; RD2 dx bp ax bx cx 77; RD2 cx dx bp ax bx 78; RD2 bx cx dx bp ax 79
249} | grep -v '^$'
250
251echo "
252 popq %rdi #
253 addl %eax, 80(%rdi) # ctx->hash[0] += a
254 addl %ebx, 84(%rdi) # ctx->hash[1] += b
255 addl %ecx, 88(%rdi) # ctx->hash[2] += c
256 addl %edx, 92(%rdi) # ctx->hash[3] += d
257 addl %ebp, 96(%rdi) # ctx->hash[4] += e
258 popq %rbx #
259 popq %rbp #
260 popq %r12 #
261 popq %r13 #
262 popq %r14 #
263 popq %r15 #
264
265 ret
266 .size sha1_process_block64, .-sha1_process_block64
267#endif"