diff options
| author | Denys Vlasenko <vda.linux@googlemail.com> | 2022-01-03 13:00:07 +0100 |
|---|---|---|
| committer | Denys Vlasenko <vda.linux@googlemail.com> | 2022-01-03 13:10:30 +0100 |
| commit | 947bef0deaba7b2ce432d515379091dcd4cf747f (patch) | |
| tree | 15273e01ea7b42df47e49779fede62f7289a4178 | |
| parent | 05fd13ebec869fc5e6f226481a2405a2685e8db1 (diff) | |
| download | busybox-w32-947bef0deaba7b2ce432d515379091dcd4cf747f.tar.gz busybox-w32-947bef0deaba7b2ce432d515379091dcd4cf747f.tar.bz2 busybox-w32-947bef0deaba7b2ce432d515379091dcd4cf747f.zip | |
libbb/sha1: x86_64 version: generate from a script, optimize a bit
function old new delta
sha1_process_block64 3569 3502 -67
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
| -rw-r--r-- | libbb/Config.src | 2 | ||||
| -rw-r--r-- | libbb/hash_md5_sha_x86-64.S | 472 | ||||
| -rwxr-xr-x | libbb/hash_md5_sha_x86-64.S.sh | 267 |
3 files changed, 474 insertions, 267 deletions
diff --git a/libbb/Config.src b/libbb/Config.src index 42a2283aa..c80bee286 100644 --- a/libbb/Config.src +++ b/libbb/Config.src | |||
| @@ -59,7 +59,7 @@ config SHA1_SMALL | |||
| 59 | Trade binary size versus speed for the sha1 algorithm. | 59 | Trade binary size versus speed for the sha1 algorithm. |
| 60 | throughput MB/s size of sha1_process_block64 | 60 | throughput MB/s size of sha1_process_block64 |
| 61 | value 486 x86-64 486 x86-64 | 61 | value 486 x86-64 486 x86-64 |
| 62 | 0 367 367 3657 3570 | 62 | 0 367 375 3657 3502 |
| 63 | 1 224 229 654 732 | 63 | 1 224 229 654 732 |
| 64 | 2,3 200 195 358 380 | 64 | 2,3 200 195 358 380 |
| 65 | 65 | ||
diff --git a/libbb/hash_md5_sha_x86-64.S b/libbb/hash_md5_sha_x86-64.S index 466cd9ae9..3e1c4b455 100644 --- a/libbb/hash_md5_sha_x86-64.S +++ b/libbb/hash_md5_sha_x86-64.S | |||
| @@ -1,23 +1,27 @@ | |||
| 1 | ### Generated by hash_md5_sha_x86-64.S.sh ### | 1 | ### Generated by hash_md5_sha_x86-64.S.sh ### |
| 2 | #if defined(__GNUC__) && defined(__x86_64__) | 2 | |
| 3 | #if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__) | ||
| 3 | .section .text.sha1_process_block64,"ax",@progbits | 4 | .section .text.sha1_process_block64,"ax",@progbits |
| 4 | .globl sha1_process_block64 | 5 | .globl sha1_process_block64 |
| 5 | .hidden sha1_process_block64 | 6 | .hidden sha1_process_block64 |
| 6 | .type sha1_process_block64, @function | 7 | .type sha1_process_block64, @function |
| 8 | |||
| 9 | .balign 8 # allow decoders to fetch at least 4 first insns | ||
| 7 | sha1_process_block64: | 10 | sha1_process_block64: |
| 8 | pushq %r15 # | 11 | pushq %r15 # |
| 9 | pushq %r14 # | 12 | pushq %r14 # |
| 10 | pushq %r13 # | 13 | pushq %r13 # |
| 11 | pushq %r12 # | 14 | pushq %r12 # |
| 12 | pushq %rbp # | 15 | pushq %rbp # |
| 13 | pushq %rbx # | 16 | pushq %rbx # |
| 14 | pushq %rdi # we need ctx at the end | 17 | pushq %rdi # we need ctx at the end |
| 15 | 18 | ||
| 16 | #Register and stack use: | 19 | #Register and stack use: |
| 17 | # eax..edx: a..d | 20 | # eax..edx: a..d |
| 18 | # ebp: e | 21 | # ebp: e |
| 19 | # esi,edi: temps | 22 | # esi,edi: temps |
| 20 | # -32+4*n(%rsp),r8...r15: W[0..7,8..15] | 23 | # -32+4*n(%rsp),r8...r15: W[0..7,8..15] |
| 24 | # (TODO: actually W[0..7] are used a bit more often, put _thme_ into r8..r15?) | ||
| 21 | 25 | ||
| 22 | movq 4*8(%rdi), %r8 | 26 | movq 4*8(%rdi), %r8 |
| 23 | bswapq %r8 | 27 | bswapq %r8 |
| @@ -253,7 +257,7 @@ sha1_process_block64: | |||
| 253 | xorl %ecx, %edi # ^d | 257 | xorl %ecx, %edi # ^d |
| 254 | andl %eax, %edi # &b | 258 | andl %eax, %edi # &b |
| 255 | xorl %ecx, %edi # (((c ^ d) & b) ^ d) | 259 | xorl %ecx, %edi # (((c ^ d) & b) ^ d) |
| 256 | leal 0x5A827999(%rdx,%rsi),%edx # e += RCONST + W[n] | 260 | leal 0x5A827999(%rdx,%rsi), %edx # e += RCONST + W[n & 15] |
| 257 | addl %edi, %edx # e += (((c ^ d) & b) ^ d) | 261 | addl %edi, %edx # e += (((c ^ d) & b) ^ d) |
| 258 | movl %ebp, %esi # | 262 | movl %ebp, %esi # |
| 259 | roll $5, %esi # rotl32(a,5) | 263 | roll $5, %esi # rotl32(a,5) |
| @@ -270,7 +274,7 @@ sha1_process_block64: | |||
| 270 | xorl %ebx, %edi # ^d | 274 | xorl %ebx, %edi # ^d |
| 271 | andl %ebp, %edi # &b | 275 | andl %ebp, %edi # &b |
| 272 | xorl %ebx, %edi # (((c ^ d) & b) ^ d) | 276 | xorl %ebx, %edi # (((c ^ d) & b) ^ d) |
| 273 | leal 0x5A827999(%rcx,%rsi),%ecx # e += RCONST + W[n] | 277 | leal 0x5A827999(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] |
| 274 | addl %edi, %ecx # e += (((c ^ d) & b) ^ d) | 278 | addl %edi, %ecx # e += (((c ^ d) & b) ^ d) |
| 275 | movl %edx, %esi # | 279 | movl %edx, %esi # |
| 276 | roll $5, %esi # rotl32(a,5) | 280 | roll $5, %esi # rotl32(a,5) |
| @@ -287,7 +291,7 @@ sha1_process_block64: | |||
| 287 | xorl %eax, %edi # ^d | 291 | xorl %eax, %edi # ^d |
| 288 | andl %edx, %edi # &b | 292 | andl %edx, %edi # &b |
| 289 | xorl %eax, %edi # (((c ^ d) & b) ^ d) | 293 | xorl %eax, %edi # (((c ^ d) & b) ^ d) |
| 290 | leal 0x5A827999(%rbx,%rsi),%ebx # e += RCONST + W[n] | 294 | leal 0x5A827999(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] |
| 291 | addl %edi, %ebx # e += (((c ^ d) & b) ^ d) | 295 | addl %edi, %ebx # e += (((c ^ d) & b) ^ d) |
| 292 | movl %ecx, %esi # | 296 | movl %ecx, %esi # |
| 293 | roll $5, %esi # rotl32(a,5) | 297 | roll $5, %esi # rotl32(a,5) |
| @@ -304,7 +308,7 @@ sha1_process_block64: | |||
| 304 | xorl %ebp, %edi # ^d | 308 | xorl %ebp, %edi # ^d |
| 305 | andl %ecx, %edi # &b | 309 | andl %ecx, %edi # &b |
| 306 | xorl %ebp, %edi # (((c ^ d) & b) ^ d) | 310 | xorl %ebp, %edi # (((c ^ d) & b) ^ d) |
| 307 | leal 0x5A827999(%rax,%rsi),%eax # e += RCONST + W[n] | 311 | leal 0x5A827999(%rax,%rsi), %eax # e += RCONST + W[n & 15] |
| 308 | addl %edi, %eax # e += (((c ^ d) & b) ^ d) | 312 | addl %edi, %eax # e += (((c ^ d) & b) ^ d) |
| 309 | movl %ebx, %esi # | 313 | movl %ebx, %esi # |
| 310 | roll $5, %esi # rotl32(a,5) | 314 | roll $5, %esi # rotl32(a,5) |
| @@ -320,7 +324,7 @@ sha1_process_block64: | |||
| 320 | movl %ecx, %edi # c | 324 | movl %ecx, %edi # c |
| 321 | xorl %edx, %edi # ^d | 325 | xorl %edx, %edi # ^d |
| 322 | xorl %ebx, %edi # ^b | 326 | xorl %ebx, %edi # ^b |
| 323 | leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 327 | leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] |
| 324 | addl %edi, %ebp # e += (c ^ d ^ b) | 328 | addl %edi, %ebp # e += (c ^ d ^ b) |
| 325 | movl %eax, %esi # | 329 | movl %eax, %esi # |
| 326 | roll $5, %esi # rotl32(a,5) | 330 | roll $5, %esi # rotl32(a,5) |
| @@ -336,7 +340,7 @@ sha1_process_block64: | |||
| 336 | movl %ebx, %edi # c | 340 | movl %ebx, %edi # c |
| 337 | xorl %ecx, %edi # ^d | 341 | xorl %ecx, %edi # ^d |
| 338 | xorl %eax, %edi # ^b | 342 | xorl %eax, %edi # ^b |
| 339 | leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W | 343 | leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + W[n & 15] |
| 340 | addl %edi, %edx # e += (c ^ d ^ b) | 344 | addl %edi, %edx # e += (c ^ d ^ b) |
| 341 | movl %ebp, %esi # | 345 | movl %ebp, %esi # |
| 342 | roll $5, %esi # rotl32(a,5) | 346 | roll $5, %esi # rotl32(a,5) |
| @@ -352,7 +356,7 @@ sha1_process_block64: | |||
| 352 | movl %eax, %edi # c | 356 | movl %eax, %edi # c |
| 353 | xorl %ebx, %edi # ^d | 357 | xorl %ebx, %edi # ^d |
| 354 | xorl %ebp, %edi # ^b | 358 | xorl %ebp, %edi # ^b |
| 355 | leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 359 | leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] |
| 356 | addl %edi, %ecx # e += (c ^ d ^ b) | 360 | addl %edi, %ecx # e += (c ^ d ^ b) |
| 357 | movl %edx, %esi # | 361 | movl %edx, %esi # |
| 358 | roll $5, %esi # rotl32(a,5) | 362 | roll $5, %esi # rotl32(a,5) |
| @@ -368,135 +372,119 @@ sha1_process_block64: | |||
| 368 | movl %ebp, %edi # c | 372 | movl %ebp, %edi # c |
| 369 | xorl %eax, %edi # ^d | 373 | xorl %eax, %edi # ^d |
| 370 | xorl %edx, %edi # ^b | 374 | xorl %edx, %edi # ^b |
| 371 | leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 375 | leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] |
| 372 | addl %edi, %ebx # e += (c ^ d ^ b) | 376 | addl %edi, %ebx # e += (c ^ d ^ b) |
| 373 | movl %ecx, %esi # | 377 | movl %ecx, %esi # |
| 374 | roll $5, %esi # rotl32(a,5) | 378 | roll $5, %esi # rotl32(a,5) |
| 375 | addl %esi, %ebx # e += rotl32(a,5) | 379 | addl %esi, %ebx # e += rotl32(a,5) |
| 376 | rorl $2, %edx # b = rotl32(b,30) | 380 | rorl $2, %edx # b = rotl32(b,30) |
| 377 | # 24 | 381 | # 24 |
| 378 | movl -32+4*5(%rsp), %esi # W[(n+13) & 15] | 382 | xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15] |
| 379 | xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] | 383 | xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15] |
| 380 | xorl %r10d, %esi # ^W[(n+2) & 15] | 384 | xorl %r10d, %r8d # ^W[(n+2) & 15] |
| 381 | xorl %r8d, %esi # ^W[n & 15] | 385 | roll %r8d # |
| 382 | roll %esi # | ||
| 383 | movl %esi, %r8d # store to W[n & 15] | ||
| 384 | movl %edx, %edi # c | 386 | movl %edx, %edi # c |
| 385 | xorl %ebp, %edi # ^d | 387 | xorl %ebp, %edi # ^d |
| 386 | xorl %ecx, %edi # ^b | 388 | xorl %ecx, %edi # ^b |
| 387 | leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W | 389 | leal 0x6ED9EBA1(%rax,%r8), %eax # e += RCONST + W[n & 15] |
| 388 | addl %edi, %eax # e += (c ^ d ^ b) | 390 | addl %edi, %eax # e += (c ^ d ^ b) |
| 389 | movl %ebx, %esi # | 391 | movl %ebx, %esi # |
| 390 | roll $5, %esi # rotl32(a,5) | 392 | roll $5, %esi # rotl32(a,5) |
| 391 | addl %esi, %eax # e += rotl32(a,5) | 393 | addl %esi, %eax # e += rotl32(a,5) |
| 392 | rorl $2, %ecx # b = rotl32(b,30) | 394 | rorl $2, %ecx # b = rotl32(b,30) |
| 393 | # 25 | 395 | # 25 |
| 394 | movl -32+4*6(%rsp), %esi # W[(n+13) & 15] | 396 | xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15] |
| 395 | xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] | 397 | xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15] |
| 396 | xorl %r11d, %esi # ^W[(n+2) & 15] | 398 | xorl %r11d, %r9d # ^W[(n+2) & 15] |
| 397 | xorl %r9d, %esi # ^W[n & 15] | 399 | roll %r9d # |
| 398 | roll %esi # | ||
| 399 | movl %esi, %r9d # store to W[n & 15] | ||
| 400 | movl %ecx, %edi # c | 400 | movl %ecx, %edi # c |
| 401 | xorl %edx, %edi # ^d | 401 | xorl %edx, %edi # ^d |
| 402 | xorl %ebx, %edi # ^b | 402 | xorl %ebx, %edi # ^b |
| 403 | leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 403 | leal 0x6ED9EBA1(%rbp,%r9), %ebp # e += RCONST + W[n & 15] |
| 404 | addl %edi, %ebp # e += (c ^ d ^ b) | 404 | addl %edi, %ebp # e += (c ^ d ^ b) |
| 405 | movl %eax, %esi # | 405 | movl %eax, %esi # |
| 406 | roll $5, %esi # rotl32(a,5) | 406 | roll $5, %esi # rotl32(a,5) |
| 407 | addl %esi, %ebp # e += rotl32(a,5) | 407 | addl %esi, %ebp # e += rotl32(a,5) |
| 408 | rorl $2, %ebx # b = rotl32(b,30) | 408 | rorl $2, %ebx # b = rotl32(b,30) |
| 409 | # 26 | 409 | # 26 |
| 410 | movl -32+4*7(%rsp), %esi # W[(n+13) & 15] | 410 | xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15] |
| 411 | xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] | 411 | xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15] |
| 412 | xorl %r12d, %esi # ^W[(n+2) & 15] | 412 | xorl %r12d, %r10d # ^W[(n+2) & 15] |
| 413 | xorl %r10d, %esi # ^W[n & 15] | 413 | roll %r10d # |
| 414 | roll %esi # | ||
| 415 | movl %esi, %r10d # store to W[n & 15] | ||
| 416 | movl %ebx, %edi # c | 414 | movl %ebx, %edi # c |
| 417 | xorl %ecx, %edi # ^d | 415 | xorl %ecx, %edi # ^d |
| 418 | xorl %eax, %edi # ^b | 416 | xorl %eax, %edi # ^b |
| 419 | leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W | 417 | leal 0x6ED9EBA1(%rdx,%r10), %edx # e += RCONST + W[n & 15] |
| 420 | addl %edi, %edx # e += (c ^ d ^ b) | 418 | addl %edi, %edx # e += (c ^ d ^ b) |
| 421 | movl %ebp, %esi # | 419 | movl %ebp, %esi # |
| 422 | roll $5, %esi # rotl32(a,5) | 420 | roll $5, %esi # rotl32(a,5) |
| 423 | addl %esi, %edx # e += rotl32(a,5) | 421 | addl %esi, %edx # e += rotl32(a,5) |
| 424 | rorl $2, %eax # b = rotl32(b,30) | 422 | rorl $2, %eax # b = rotl32(b,30) |
| 425 | # 27 | 423 | # 27 |
| 426 | movl %r8d, %esi # W[(n+13) & 15] | 424 | xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15] |
| 427 | xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] | 425 | xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15] |
| 428 | xorl %r13d, %esi # ^W[(n+2) & 15] | 426 | xorl %r13d, %r11d # ^W[(n+2) & 15] |
| 429 | xorl %r11d, %esi # ^W[n & 15] | 427 | roll %r11d # |
| 430 | roll %esi # | ||
| 431 | movl %esi, %r11d # store to W[n & 15] | ||
| 432 | movl %eax, %edi # c | 428 | movl %eax, %edi # c |
| 433 | xorl %ebx, %edi # ^d | 429 | xorl %ebx, %edi # ^d |
| 434 | xorl %ebp, %edi # ^b | 430 | xorl %ebp, %edi # ^b |
| 435 | leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 431 | leal 0x6ED9EBA1(%rcx,%r11), %ecx # e += RCONST + W[n & 15] |
| 436 | addl %edi, %ecx # e += (c ^ d ^ b) | 432 | addl %edi, %ecx # e += (c ^ d ^ b) |
| 437 | movl %edx, %esi # | 433 | movl %edx, %esi # |
| 438 | roll $5, %esi # rotl32(a,5) | 434 | roll $5, %esi # rotl32(a,5) |
| 439 | addl %esi, %ecx # e += rotl32(a,5) | 435 | addl %esi, %ecx # e += rotl32(a,5) |
| 440 | rorl $2, %ebp # b = rotl32(b,30) | 436 | rorl $2, %ebp # b = rotl32(b,30) |
| 441 | # 28 | 437 | # 28 |
| 442 | movl %r9d, %esi # W[(n+13) & 15] | 438 | xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15] |
| 443 | xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] | 439 | xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15] |
| 444 | xorl %r14d, %esi # ^W[(n+2) & 15] | 440 | xorl %r14d, %r12d # ^W[(n+2) & 15] |
| 445 | xorl %r12d, %esi # ^W[n & 15] | 441 | roll %r12d # |
| 446 | roll %esi # | ||
| 447 | movl %esi, %r12d # store to W[n & 15] | ||
| 448 | movl %ebp, %edi # c | 442 | movl %ebp, %edi # c |
| 449 | xorl %eax, %edi # ^d | 443 | xorl %eax, %edi # ^d |
| 450 | xorl %edx, %edi # ^b | 444 | xorl %edx, %edi # ^b |
| 451 | leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 445 | leal 0x6ED9EBA1(%rbx,%r12), %ebx # e += RCONST + W[n & 15] |
| 452 | addl %edi, %ebx # e += (c ^ d ^ b) | 446 | addl %edi, %ebx # e += (c ^ d ^ b) |
| 453 | movl %ecx, %esi # | 447 | movl %ecx, %esi # |
| 454 | roll $5, %esi # rotl32(a,5) | 448 | roll $5, %esi # rotl32(a,5) |
| 455 | addl %esi, %ebx # e += rotl32(a,5) | 449 | addl %esi, %ebx # e += rotl32(a,5) |
| 456 | rorl $2, %edx # b = rotl32(b,30) | 450 | rorl $2, %edx # b = rotl32(b,30) |
| 457 | # 29 | 451 | # 29 |
| 458 | movl %r10d, %esi # W[(n+13) & 15] | 452 | xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15] |
| 459 | xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] | 453 | xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15] |
| 460 | xorl %r15d, %esi # ^W[(n+2) & 15] | 454 | xorl %r15d, %r13d # ^W[(n+2) & 15] |
| 461 | xorl %r13d, %esi # ^W[n & 15] | 455 | roll %r13d # |
| 462 | roll %esi # | ||
| 463 | movl %esi, %r13d # store to W[n & 15] | ||
| 464 | movl %edx, %edi # c | 456 | movl %edx, %edi # c |
| 465 | xorl %ebp, %edi # ^d | 457 | xorl %ebp, %edi # ^d |
| 466 | xorl %ecx, %edi # ^b | 458 | xorl %ecx, %edi # ^b |
| 467 | leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W | 459 | leal 0x6ED9EBA1(%rax,%r13), %eax # e += RCONST + W[n & 15] |
| 468 | addl %edi, %eax # e += (c ^ d ^ b) | 460 | addl %edi, %eax # e += (c ^ d ^ b) |
| 469 | movl %ebx, %esi # | 461 | movl %ebx, %esi # |
| 470 | roll $5, %esi # rotl32(a,5) | 462 | roll $5, %esi # rotl32(a,5) |
| 471 | addl %esi, %eax # e += rotl32(a,5) | 463 | addl %esi, %eax # e += rotl32(a,5) |
| 472 | rorl $2, %ecx # b = rotl32(b,30) | 464 | rorl $2, %ecx # b = rotl32(b,30) |
| 473 | # 30 | 465 | # 30 |
| 474 | movl %r11d, %esi # W[(n+13) & 15] | 466 | xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15] |
| 475 | xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] | 467 | xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15] |
| 476 | xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] | 468 | xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15] |
| 477 | xorl %r14d, %esi # ^W[n & 15] | 469 | roll %r14d # |
| 478 | roll %esi # | ||
| 479 | movl %esi, %r14d # store to W[n & 15] | ||
| 480 | movl %ecx, %edi # c | 470 | movl %ecx, %edi # c |
| 481 | xorl %edx, %edi # ^d | 471 | xorl %edx, %edi # ^d |
| 482 | xorl %ebx, %edi # ^b | 472 | xorl %ebx, %edi # ^b |
| 483 | leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 473 | leal 0x6ED9EBA1(%rbp,%r14), %ebp # e += RCONST + W[n & 15] |
| 484 | addl %edi, %ebp # e += (c ^ d ^ b) | 474 | addl %edi, %ebp # e += (c ^ d ^ b) |
| 485 | movl %eax, %esi # | 475 | movl %eax, %esi # |
| 486 | roll $5, %esi # rotl32(a,5) | 476 | roll $5, %esi # rotl32(a,5) |
| 487 | addl %esi, %ebp # e += rotl32(a,5) | 477 | addl %esi, %ebp # e += rotl32(a,5) |
| 488 | rorl $2, %ebx # b = rotl32(b,30) | 478 | rorl $2, %ebx # b = rotl32(b,30) |
| 489 | # 31 | 479 | # 31 |
| 490 | movl %r12d, %esi # W[(n+13) & 15] | 480 | xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15] |
| 491 | xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] | 481 | xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15] |
| 492 | xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] | 482 | xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15] |
| 493 | xorl %r15d, %esi # ^W[n & 15] | 483 | roll %r15d # |
| 494 | roll %esi # | ||
| 495 | movl %esi, %r15d # store to W[n & 15] | ||
| 496 | movl %ebx, %edi # c | 484 | movl %ebx, %edi # c |
| 497 | xorl %ecx, %edi # ^d | 485 | xorl %ecx, %edi # ^d |
| 498 | xorl %eax, %edi # ^b | 486 | xorl %eax, %edi # ^b |
| 499 | leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W | 487 | leal 0x6ED9EBA1(%rdx,%r15), %edx # e += RCONST + W[n & 15] |
| 500 | addl %edi, %edx # e += (c ^ d ^ b) | 488 | addl %edi, %edx # e += (c ^ d ^ b) |
| 501 | movl %ebp, %esi # | 489 | movl %ebp, %esi # |
| 502 | roll $5, %esi # rotl32(a,5) | 490 | roll $5, %esi # rotl32(a,5) |
| @@ -512,7 +500,7 @@ sha1_process_block64: | |||
| 512 | movl %eax, %edi # c | 500 | movl %eax, %edi # c |
| 513 | xorl %ebx, %edi # ^d | 501 | xorl %ebx, %edi # ^d |
| 514 | xorl %ebp, %edi # ^b | 502 | xorl %ebp, %edi # ^b |
| 515 | leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 503 | leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] |
| 516 | addl %edi, %ecx # e += (c ^ d ^ b) | 504 | addl %edi, %ecx # e += (c ^ d ^ b) |
| 517 | movl %edx, %esi # | 505 | movl %edx, %esi # |
| 518 | roll $5, %esi # rotl32(a,5) | 506 | roll $5, %esi # rotl32(a,5) |
| @@ -528,7 +516,7 @@ sha1_process_block64: | |||
| 528 | movl %ebp, %edi # c | 516 | movl %ebp, %edi # c |
| 529 | xorl %eax, %edi # ^d | 517 | xorl %eax, %edi # ^d |
| 530 | xorl %edx, %edi # ^b | 518 | xorl %edx, %edi # ^b |
| 531 | leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 519 | leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] |
| 532 | addl %edi, %ebx # e += (c ^ d ^ b) | 520 | addl %edi, %ebx # e += (c ^ d ^ b) |
| 533 | movl %ecx, %esi # | 521 | movl %ecx, %esi # |
| 534 | roll $5, %esi # rotl32(a,5) | 522 | roll $5, %esi # rotl32(a,5) |
| @@ -544,7 +532,7 @@ sha1_process_block64: | |||
| 544 | movl %edx, %edi # c | 532 | movl %edx, %edi # c |
| 545 | xorl %ebp, %edi # ^d | 533 | xorl %ebp, %edi # ^d |
| 546 | xorl %ecx, %edi # ^b | 534 | xorl %ecx, %edi # ^b |
| 547 | leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W | 535 | leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + W[n & 15] |
| 548 | addl %edi, %eax # e += (c ^ d ^ b) | 536 | addl %edi, %eax # e += (c ^ d ^ b) |
| 549 | movl %ebx, %esi # | 537 | movl %ebx, %esi # |
| 550 | roll $5, %esi # rotl32(a,5) | 538 | roll $5, %esi # rotl32(a,5) |
| @@ -560,7 +548,7 @@ sha1_process_block64: | |||
| 560 | movl %ecx, %edi # c | 548 | movl %ecx, %edi # c |
| 561 | xorl %edx, %edi # ^d | 549 | xorl %edx, %edi # ^d |
| 562 | xorl %ebx, %edi # ^b | 550 | xorl %ebx, %edi # ^b |
| 563 | leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 551 | leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] |
| 564 | addl %edi, %ebp # e += (c ^ d ^ b) | 552 | addl %edi, %ebp # e += (c ^ d ^ b) |
| 565 | movl %eax, %esi # | 553 | movl %eax, %esi # |
| 566 | roll $5, %esi # rotl32(a,5) | 554 | roll $5, %esi # rotl32(a,5) |
| @@ -576,7 +564,7 @@ sha1_process_block64: | |||
| 576 | movl %ebx, %edi # c | 564 | movl %ebx, %edi # c |
| 577 | xorl %ecx, %edi # ^d | 565 | xorl %ecx, %edi # ^d |
| 578 | xorl %eax, %edi # ^b | 566 | xorl %eax, %edi # ^b |
| 579 | leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W | 567 | leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + W[n & 15] |
| 580 | addl %edi, %edx # e += (c ^ d ^ b) | 568 | addl %edi, %edx # e += (c ^ d ^ b) |
| 581 | movl %ebp, %esi # | 569 | movl %ebp, %esi # |
| 582 | roll $5, %esi # rotl32(a,5) | 570 | roll $5, %esi # rotl32(a,5) |
| @@ -592,7 +580,7 @@ sha1_process_block64: | |||
| 592 | movl %eax, %edi # c | 580 | movl %eax, %edi # c |
| 593 | xorl %ebx, %edi # ^d | 581 | xorl %ebx, %edi # ^d |
| 594 | xorl %ebp, %edi # ^b | 582 | xorl %ebp, %edi # ^b |
| 595 | leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 583 | leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] |
| 596 | addl %edi, %ecx # e += (c ^ d ^ b) | 584 | addl %edi, %ecx # e += (c ^ d ^ b) |
| 597 | movl %edx, %esi # | 585 | movl %edx, %esi # |
| 598 | roll $5, %esi # rotl32(a,5) | 586 | roll $5, %esi # rotl32(a,5) |
| @@ -608,7 +596,7 @@ sha1_process_block64: | |||
| 608 | movl %ebp, %edi # c | 596 | movl %ebp, %edi # c |
| 609 | xorl %eax, %edi # ^d | 597 | xorl %eax, %edi # ^d |
| 610 | xorl %edx, %edi # ^b | 598 | xorl %edx, %edi # ^b |
| 611 | leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 599 | leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] |
| 612 | addl %edi, %ebx # e += (c ^ d ^ b) | 600 | addl %edi, %ebx # e += (c ^ d ^ b) |
| 613 | movl %ecx, %esi # | 601 | movl %ecx, %esi # |
| 614 | roll $5, %esi # rotl32(a,5) | 602 | roll $5, %esi # rotl32(a,5) |
| @@ -624,7 +612,7 @@ sha1_process_block64: | |||
| 624 | movl %edx, %edi # c | 612 | movl %edx, %edi # c |
| 625 | xorl %ebp, %edi # ^d | 613 | xorl %ebp, %edi # ^d |
| 626 | xorl %ecx, %edi # ^b | 614 | xorl %ecx, %edi # ^b |
| 627 | leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W | 615 | leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + W[n & 15] |
| 628 | addl %edi, %eax # e += (c ^ d ^ b) | 616 | addl %edi, %eax # e += (c ^ d ^ b) |
| 629 | movl %ebx, %esi # | 617 | movl %ebx, %esi # |
| 630 | roll $5, %esi # rotl32(a,5) | 618 | roll $5, %esi # rotl32(a,5) |
| @@ -637,14 +625,12 @@ sha1_process_block64: | |||
| 637 | andl %ecx, %esi # si: b & c | 625 | andl %ecx, %esi # si: b & c |
| 638 | andl %edx, %edi # di: (b | c) & d | 626 | andl %edx, %edi # di: (b | c) & d |
| 639 | orl %esi, %edi # ((b | c) & d) | (b & c) | 627 | orl %esi, %edi # ((b | c) & d) | (b & c) |
| 640 | movl -32+4*5(%rsp), %esi # W[(n+13) & 15] | 628 | xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15] |
| 641 | xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] | 629 | xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15] |
| 642 | xorl %r10d, %esi # ^W[(n+2) & 15] | 630 | xorl %r10d, %r8d # ^W[(n+2) & 15] |
| 643 | xorl %r8d, %esi # ^W[n & 15] | 631 | roll %r8d # |
| 644 | roll %esi # | ||
| 645 | movl %esi, %r8d # store to W[n & 15] | ||
| 646 | addl %edi, %ebp # += ((b | c) & d) | (b & c) | 632 | addl %edi, %ebp # += ((b | c) & d) | (b & c) |
| 647 | leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 633 | leal -0x70E44324(%rbp,%r8), %ebp # e += RCONST + W[n & 15] |
| 648 | movl %eax, %esi # | 634 | movl %eax, %esi # |
| 649 | roll $5, %esi # rotl32(a,5) | 635 | roll $5, %esi # rotl32(a,5) |
| 650 | addl %esi, %ebp # e += rotl32(a,5) | 636 | addl %esi, %ebp # e += rotl32(a,5) |
| @@ -656,14 +642,12 @@ sha1_process_block64: | |||
| 656 | andl %ebx, %esi # si: b & c | 642 | andl %ebx, %esi # si: b & c |
| 657 | andl %ecx, %edi # di: (b | c) & d | 643 | andl %ecx, %edi # di: (b | c) & d |
| 658 | orl %esi, %edi # ((b | c) & d) | (b & c) | 644 | orl %esi, %edi # ((b | c) & d) | (b & c) |
| 659 | movl -32+4*6(%rsp), %esi # W[(n+13) & 15] | 645 | xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15] |
| 660 | xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] | 646 | xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15] |
| 661 | xorl %r11d, %esi # ^W[(n+2) & 15] | 647 | xorl %r11d, %r9d # ^W[(n+2) & 15] |
| 662 | xorl %r9d, %esi # ^W[n & 15] | 648 | roll %r9d # |
| 663 | roll %esi # | ||
| 664 | movl %esi, %r9d # store to W[n & 15] | ||
| 665 | addl %edi, %edx # += ((b | c) & d) | (b & c) | 649 | addl %edi, %edx # += ((b | c) & d) | (b & c) |
| 666 | leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W | 650 | leal -0x70E44324(%rdx,%r9), %edx # e += RCONST + W[n & 15] |
| 667 | movl %ebp, %esi # | 651 | movl %ebp, %esi # |
| 668 | roll $5, %esi # rotl32(a,5) | 652 | roll $5, %esi # rotl32(a,5) |
| 669 | addl %esi, %edx # e += rotl32(a,5) | 653 | addl %esi, %edx # e += rotl32(a,5) |
| @@ -675,14 +659,12 @@ sha1_process_block64: | |||
| 675 | andl %eax, %esi # si: b & c | 659 | andl %eax, %esi # si: b & c |
| 676 | andl %ebx, %edi # di: (b | c) & d | 660 | andl %ebx, %edi # di: (b | c) & d |
| 677 | orl %esi, %edi # ((b | c) & d) | (b & c) | 661 | orl %esi, %edi # ((b | c) & d) | (b & c) |
| 678 | movl -32+4*7(%rsp), %esi # W[(n+13) & 15] | 662 | xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15] |
| 679 | xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] | 663 | xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15] |
| 680 | xorl %r12d, %esi # ^W[(n+2) & 15] | 664 | xorl %r12d, %r10d # ^W[(n+2) & 15] |
| 681 | xorl %r10d, %esi # ^W[n & 15] | 665 | roll %r10d # |
| 682 | roll %esi # | ||
| 683 | movl %esi, %r10d # store to W[n & 15] | ||
| 684 | addl %edi, %ecx # += ((b | c) & d) | (b & c) | 666 | addl %edi, %ecx # += ((b | c) & d) | (b & c) |
| 685 | leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 667 | leal -0x70E44324(%rcx,%r10), %ecx # e += RCONST + W[n & 15] |
| 686 | movl %edx, %esi # | 668 | movl %edx, %esi # |
| 687 | roll $5, %esi # rotl32(a,5) | 669 | roll $5, %esi # rotl32(a,5) |
| 688 | addl %esi, %ecx # e += rotl32(a,5) | 670 | addl %esi, %ecx # e += rotl32(a,5) |
| @@ -694,14 +676,12 @@ sha1_process_block64: | |||
| 694 | andl %ebp, %esi # si: b & c | 676 | andl %ebp, %esi # si: b & c |
| 695 | andl %eax, %edi # di: (b | c) & d | 677 | andl %eax, %edi # di: (b | c) & d |
| 696 | orl %esi, %edi # ((b | c) & d) | (b & c) | 678 | orl %esi, %edi # ((b | c) & d) | (b & c) |
| 697 | movl %r8d, %esi # W[(n+13) & 15] | 679 | xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15] |
| 698 | xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] | 680 | xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15] |
| 699 | xorl %r13d, %esi # ^W[(n+2) & 15] | 681 | xorl %r13d, %r11d # ^W[(n+2) & 15] |
| 700 | xorl %r11d, %esi # ^W[n & 15] | 682 | roll %r11d # |
| 701 | roll %esi # | ||
| 702 | movl %esi, %r11d # store to W[n & 15] | ||
| 703 | addl %edi, %ebx # += ((b | c) & d) | (b & c) | 683 | addl %edi, %ebx # += ((b | c) & d) | (b & c) |
| 704 | leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 684 | leal -0x70E44324(%rbx,%r11), %ebx # e += RCONST + W[n & 15] |
| 705 | movl %ecx, %esi # | 685 | movl %ecx, %esi # |
| 706 | roll $5, %esi # rotl32(a,5) | 686 | roll $5, %esi # rotl32(a,5) |
| 707 | addl %esi, %ebx # e += rotl32(a,5) | 687 | addl %esi, %ebx # e += rotl32(a,5) |
| @@ -713,14 +693,12 @@ sha1_process_block64: | |||
| 713 | andl %edx, %esi # si: b & c | 693 | andl %edx, %esi # si: b & c |
| 714 | andl %ebp, %edi # di: (b | c) & d | 694 | andl %ebp, %edi # di: (b | c) & d |
| 715 | orl %esi, %edi # ((b | c) & d) | (b & c) | 695 | orl %esi, %edi # ((b | c) & d) | (b & c) |
| 716 | movl %r9d, %esi # W[(n+13) & 15] | 696 | xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15] |
| 717 | xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] | 697 | xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15] |
| 718 | xorl %r14d, %esi # ^W[(n+2) & 15] | 698 | xorl %r14d, %r12d # ^W[(n+2) & 15] |
| 719 | xorl %r12d, %esi # ^W[n & 15] | 699 | roll %r12d # |
| 720 | roll %esi # | ||
| 721 | movl %esi, %r12d # store to W[n & 15] | ||
| 722 | addl %edi, %eax # += ((b | c) & d) | (b & c) | 700 | addl %edi, %eax # += ((b | c) & d) | (b & c) |
| 723 | leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W | 701 | leal -0x70E44324(%rax,%r12), %eax # e += RCONST + W[n & 15] |
| 724 | movl %ebx, %esi # | 702 | movl %ebx, %esi # |
| 725 | roll $5, %esi # rotl32(a,5) | 703 | roll $5, %esi # rotl32(a,5) |
| 726 | addl %esi, %eax # e += rotl32(a,5) | 704 | addl %esi, %eax # e += rotl32(a,5) |
| @@ -732,14 +710,12 @@ sha1_process_block64: | |||
| 732 | andl %ecx, %esi # si: b & c | 710 | andl %ecx, %esi # si: b & c |
| 733 | andl %edx, %edi # di: (b | c) & d | 711 | andl %edx, %edi # di: (b | c) & d |
| 734 | orl %esi, %edi # ((b | c) & d) | (b & c) | 712 | orl %esi, %edi # ((b | c) & d) | (b & c) |
| 735 | movl %r10d, %esi # W[(n+13) & 15] | 713 | xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15] |
| 736 | xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] | 714 | xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15] |
| 737 | xorl %r15d, %esi # ^W[(n+2) & 15] | 715 | xorl %r15d, %r13d # ^W[(n+2) & 15] |
| 738 | xorl %r13d, %esi # ^W[n & 15] | 716 | roll %r13d # |
| 739 | roll %esi # | ||
| 740 | movl %esi, %r13d # store to W[n & 15] | ||
| 741 | addl %edi, %ebp # += ((b | c) & d) | (b & c) | 717 | addl %edi, %ebp # += ((b | c) & d) | (b & c) |
| 742 | leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 718 | leal -0x70E44324(%rbp,%r13), %ebp # e += RCONST + W[n & 15] |
| 743 | movl %eax, %esi # | 719 | movl %eax, %esi # |
| 744 | roll $5, %esi # rotl32(a,5) | 720 | roll $5, %esi # rotl32(a,5) |
| 745 | addl %esi, %ebp # e += rotl32(a,5) | 721 | addl %esi, %ebp # e += rotl32(a,5) |
| @@ -751,14 +727,12 @@ sha1_process_block64: | |||
| 751 | andl %ebx, %esi # si: b & c | 727 | andl %ebx, %esi # si: b & c |
| 752 | andl %ecx, %edi # di: (b | c) & d | 728 | andl %ecx, %edi # di: (b | c) & d |
| 753 | orl %esi, %edi # ((b | c) & d) | (b & c) | 729 | orl %esi, %edi # ((b | c) & d) | (b & c) |
| 754 | movl %r11d, %esi # W[(n+13) & 15] | 730 | xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15] |
| 755 | xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] | 731 | xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15] |
| 756 | xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] | 732 | xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15] |
| 757 | xorl %r14d, %esi # ^W[n & 15] | 733 | roll %r14d # |
| 758 | roll %esi # | ||
| 759 | movl %esi, %r14d # store to W[n & 15] | ||
| 760 | addl %edi, %edx # += ((b | c) & d) | (b & c) | 734 | addl %edi, %edx # += ((b | c) & d) | (b & c) |
| 761 | leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W | 735 | leal -0x70E44324(%rdx,%r14), %edx # e += RCONST + W[n & 15] |
| 762 | movl %ebp, %esi # | 736 | movl %ebp, %esi # |
| 763 | roll $5, %esi # rotl32(a,5) | 737 | roll $5, %esi # rotl32(a,5) |
| 764 | addl %esi, %edx # e += rotl32(a,5) | 738 | addl %esi, %edx # e += rotl32(a,5) |
| @@ -770,14 +744,12 @@ sha1_process_block64: | |||
| 770 | andl %eax, %esi # si: b & c | 744 | andl %eax, %esi # si: b & c |
| 771 | andl %ebx, %edi # di: (b | c) & d | 745 | andl %ebx, %edi # di: (b | c) & d |
| 772 | orl %esi, %edi # ((b | c) & d) | (b & c) | 746 | orl %esi, %edi # ((b | c) & d) | (b & c) |
| 773 | movl %r12d, %esi # W[(n+13) & 15] | 747 | xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15] |
| 774 | xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] | 748 | xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15] |
| 775 | xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] | 749 | xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15] |
| 776 | xorl %r15d, %esi # ^W[n & 15] | 750 | roll %r15d # |
| 777 | roll %esi # | ||
| 778 | movl %esi, %r15d # store to W[n & 15] | ||
| 779 | addl %edi, %ecx # += ((b | c) & d) | (b & c) | 751 | addl %edi, %ecx # += ((b | c) & d) | (b & c) |
| 780 | leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 752 | leal -0x70E44324(%rcx,%r15), %ecx # e += RCONST + W[n & 15] |
| 781 | movl %edx, %esi # | 753 | movl %edx, %esi # |
| 782 | roll $5, %esi # rotl32(a,5) | 754 | roll $5, %esi # rotl32(a,5) |
| 783 | addl %esi, %ecx # e += rotl32(a,5) | 755 | addl %esi, %ecx # e += rotl32(a,5) |
| @@ -796,7 +768,7 @@ sha1_process_block64: | |||
| 796 | roll %esi # | 768 | roll %esi # |
| 797 | movl %esi, -32+4*0(%rsp) # store to W[n & 15] | 769 | movl %esi, -32+4*0(%rsp) # store to W[n & 15] |
| 798 | addl %edi, %ebx # += ((b | c) & d) | (b & c) | 770 | addl %edi, %ebx # += ((b | c) & d) | (b & c) |
| 799 | leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 771 | leal -0x70E44324(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] |
| 800 | movl %ecx, %esi # | 772 | movl %ecx, %esi # |
| 801 | roll $5, %esi # rotl32(a,5) | 773 | roll $5, %esi # rotl32(a,5) |
| 802 | addl %esi, %ebx # e += rotl32(a,5) | 774 | addl %esi, %ebx # e += rotl32(a,5) |
| @@ -815,7 +787,7 @@ sha1_process_block64: | |||
| 815 | roll %esi # | 787 | roll %esi # |
| 816 | movl %esi, -32+4*1(%rsp) # store to W[n & 15] | 788 | movl %esi, -32+4*1(%rsp) # store to W[n & 15] |
| 817 | addl %edi, %eax # += ((b | c) & d) | (b & c) | 789 | addl %edi, %eax # += ((b | c) & d) | (b & c) |
| 818 | leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W | 790 | leal -0x70E44324(%rax,%rsi), %eax # e += RCONST + W[n & 15] |
| 819 | movl %ebx, %esi # | 791 | movl %ebx, %esi # |
| 820 | roll $5, %esi # rotl32(a,5) | 792 | roll $5, %esi # rotl32(a,5) |
| 821 | addl %esi, %eax # e += rotl32(a,5) | 793 | addl %esi, %eax # e += rotl32(a,5) |
| @@ -834,7 +806,7 @@ sha1_process_block64: | |||
| 834 | roll %esi # | 806 | roll %esi # |
| 835 | movl %esi, -32+4*2(%rsp) # store to W[n & 15] | 807 | movl %esi, -32+4*2(%rsp) # store to W[n & 15] |
| 836 | addl %edi, %ebp # += ((b | c) & d) | (b & c) | 808 | addl %edi, %ebp # += ((b | c) & d) | (b & c) |
| 837 | leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 809 | leal -0x70E44324(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] |
| 838 | movl %eax, %esi # | 810 | movl %eax, %esi # |
| 839 | roll $5, %esi # rotl32(a,5) | 811 | roll $5, %esi # rotl32(a,5) |
| 840 | addl %esi, %ebp # e += rotl32(a,5) | 812 | addl %esi, %ebp # e += rotl32(a,5) |
| @@ -853,7 +825,7 @@ sha1_process_block64: | |||
| 853 | roll %esi # | 825 | roll %esi # |
| 854 | movl %esi, -32+4*3(%rsp) # store to W[n & 15] | 826 | movl %esi, -32+4*3(%rsp) # store to W[n & 15] |
| 855 | addl %edi, %edx # += ((b | c) & d) | (b & c) | 827 | addl %edi, %edx # += ((b | c) & d) | (b & c) |
| 856 | leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W | 828 | leal -0x70E44324(%rdx,%rsi), %edx # e += RCONST + W[n & 15] |
| 857 | movl %ebp, %esi # | 829 | movl %ebp, %esi # |
| 858 | roll $5, %esi # rotl32(a,5) | 830 | roll $5, %esi # rotl32(a,5) |
| 859 | addl %esi, %edx # e += rotl32(a,5) | 831 | addl %esi, %edx # e += rotl32(a,5) |
| @@ -872,7 +844,7 @@ sha1_process_block64: | |||
| 872 | roll %esi # | 844 | roll %esi # |
| 873 | movl %esi, -32+4*4(%rsp) # store to W[n & 15] | 845 | movl %esi, -32+4*4(%rsp) # store to W[n & 15] |
| 874 | addl %edi, %ecx # += ((b | c) & d) | (b & c) | 846 | addl %edi, %ecx # += ((b | c) & d) | (b & c) |
| 875 | leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 847 | leal -0x70E44324(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] |
| 876 | movl %edx, %esi # | 848 | movl %edx, %esi # |
| 877 | roll $5, %esi # rotl32(a,5) | 849 | roll $5, %esi # rotl32(a,5) |
| 878 | addl %esi, %ecx # e += rotl32(a,5) | 850 | addl %esi, %ecx # e += rotl32(a,5) |
| @@ -891,7 +863,7 @@ sha1_process_block64: | |||
| 891 | roll %esi # | 863 | roll %esi # |
| 892 | movl %esi, -32+4*5(%rsp) # store to W[n & 15] | 864 | movl %esi, -32+4*5(%rsp) # store to W[n & 15] |
| 893 | addl %edi, %ebx # += ((b | c) & d) | (b & c) | 865 | addl %edi, %ebx # += ((b | c) & d) | (b & c) |
| 894 | leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 866 | leal -0x70E44324(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] |
| 895 | movl %ecx, %esi # | 867 | movl %ecx, %esi # |
| 896 | roll $5, %esi # rotl32(a,5) | 868 | roll $5, %esi # rotl32(a,5) |
| 897 | addl %esi, %ebx # e += rotl32(a,5) | 869 | addl %esi, %ebx # e += rotl32(a,5) |
| @@ -910,7 +882,7 @@ sha1_process_block64: | |||
| 910 | roll %esi # | 882 | roll %esi # |
| 911 | movl %esi, -32+4*6(%rsp) # store to W[n & 15] | 883 | movl %esi, -32+4*6(%rsp) # store to W[n & 15] |
| 912 | addl %edi, %eax # += ((b | c) & d) | (b & c) | 884 | addl %edi, %eax # += ((b | c) & d) | (b & c) |
| 913 | leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W | 885 | leal -0x70E44324(%rax,%rsi), %eax # e += RCONST + W[n & 15] |
| 914 | movl %ebx, %esi # | 886 | movl %ebx, %esi # |
| 915 | roll $5, %esi # rotl32(a,5) | 887 | roll $5, %esi # rotl32(a,5) |
| 916 | addl %esi, %eax # e += rotl32(a,5) | 888 | addl %esi, %eax # e += rotl32(a,5) |
| @@ -929,7 +901,7 @@ sha1_process_block64: | |||
| 929 | roll %esi # | 901 | roll %esi # |
| 930 | movl %esi, -32+4*7(%rsp) # store to W[n & 15] | 902 | movl %esi, -32+4*7(%rsp) # store to W[n & 15] |
| 931 | addl %edi, %ebp # += ((b | c) & d) | (b & c) | 903 | addl %edi, %ebp # += ((b | c) & d) | (b & c) |
| 932 | leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 904 | leal -0x70E44324(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] |
| 933 | movl %eax, %esi # | 905 | movl %eax, %esi # |
| 934 | roll $5, %esi # rotl32(a,5) | 906 | roll $5, %esi # rotl32(a,5) |
| 935 | addl %esi, %ebp # e += rotl32(a,5) | 907 | addl %esi, %ebp # e += rotl32(a,5) |
| @@ -941,14 +913,12 @@ sha1_process_block64: | |||
| 941 | andl %ebx, %esi # si: b & c | 913 | andl %ebx, %esi # si: b & c |
| 942 | andl %ecx, %edi # di: (b | c) & d | 914 | andl %ecx, %edi # di: (b | c) & d |
| 943 | orl %esi, %edi # ((b | c) & d) | (b & c) | 915 | orl %esi, %edi # ((b | c) & d) | (b & c) |
| 944 | movl -32+4*5(%rsp), %esi # W[(n+13) & 15] | 916 | xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15] |
| 945 | xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] | 917 | xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15] |
| 946 | xorl %r10d, %esi # ^W[(n+2) & 15] | 918 | xorl %r10d, %r8d # ^W[(n+2) & 15] |
| 947 | xorl %r8d, %esi # ^W[n & 15] | 919 | roll %r8d # |
| 948 | roll %esi # | ||
| 949 | movl %esi, %r8d # store to W[n & 15] | ||
| 950 | addl %edi, %edx # += ((b | c) & d) | (b & c) | 920 | addl %edi, %edx # += ((b | c) & d) | (b & c) |
| 951 | leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W | 921 | leal -0x70E44324(%rdx,%r8), %edx # e += RCONST + W[n & 15] |
| 952 | movl %ebp, %esi # | 922 | movl %ebp, %esi # |
| 953 | roll $5, %esi # rotl32(a,5) | 923 | roll $5, %esi # rotl32(a,5) |
| 954 | addl %esi, %edx # e += rotl32(a,5) | 924 | addl %esi, %edx # e += rotl32(a,5) |
| @@ -960,14 +930,12 @@ sha1_process_block64: | |||
| 960 | andl %eax, %esi # si: b & c | 930 | andl %eax, %esi # si: b & c |
| 961 | andl %ebx, %edi # di: (b | c) & d | 931 | andl %ebx, %edi # di: (b | c) & d |
| 962 | orl %esi, %edi # ((b | c) & d) | (b & c) | 932 | orl %esi, %edi # ((b | c) & d) | (b & c) |
| 963 | movl -32+4*6(%rsp), %esi # W[(n+13) & 15] | 933 | xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15] |
| 964 | xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] | 934 | xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15] |
| 965 | xorl %r11d, %esi # ^W[(n+2) & 15] | 935 | xorl %r11d, %r9d # ^W[(n+2) & 15] |
| 966 | xorl %r9d, %esi # ^W[n & 15] | 936 | roll %r9d # |
| 967 | roll %esi # | ||
| 968 | movl %esi, %r9d # store to W[n & 15] | ||
| 969 | addl %edi, %ecx # += ((b | c) & d) | (b & c) | 937 | addl %edi, %ecx # += ((b | c) & d) | (b & c) |
| 970 | leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 938 | leal -0x70E44324(%rcx,%r9), %ecx # e += RCONST + W[n & 15] |
| 971 | movl %edx, %esi # | 939 | movl %edx, %esi # |
| 972 | roll $5, %esi # rotl32(a,5) | 940 | roll $5, %esi # rotl32(a,5) |
| 973 | addl %esi, %ecx # e += rotl32(a,5) | 941 | addl %esi, %ecx # e += rotl32(a,5) |
| @@ -979,14 +947,12 @@ sha1_process_block64: | |||
| 979 | andl %ebp, %esi # si: b & c | 947 | andl %ebp, %esi # si: b & c |
| 980 | andl %eax, %edi # di: (b | c) & d | 948 | andl %eax, %edi # di: (b | c) & d |
| 981 | orl %esi, %edi # ((b | c) & d) | (b & c) | 949 | orl %esi, %edi # ((b | c) & d) | (b & c) |
| 982 | movl -32+4*7(%rsp), %esi # W[(n+13) & 15] | 950 | xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15] |
| 983 | xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] | 951 | xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15] |
| 984 | xorl %r12d, %esi # ^W[(n+2) & 15] | 952 | xorl %r12d, %r10d # ^W[(n+2) & 15] |
| 985 | xorl %r10d, %esi # ^W[n & 15] | 953 | roll %r10d # |
| 986 | roll %esi # | ||
| 987 | movl %esi, %r10d # store to W[n & 15] | ||
| 988 | addl %edi, %ebx # += ((b | c) & d) | (b & c) | 954 | addl %edi, %ebx # += ((b | c) & d) | (b & c) |
| 989 | leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 955 | leal -0x70E44324(%rbx,%r10), %ebx # e += RCONST + W[n & 15] |
| 990 | movl %ecx, %esi # | 956 | movl %ecx, %esi # |
| 991 | roll $5, %esi # rotl32(a,5) | 957 | roll $5, %esi # rotl32(a,5) |
| 992 | addl %esi, %ebx # e += rotl32(a,5) | 958 | addl %esi, %ebx # e += rotl32(a,5) |
| @@ -998,77 +964,67 @@ sha1_process_block64: | |||
| 998 | andl %edx, %esi # si: b & c | 964 | andl %edx, %esi # si: b & c |
| 999 | andl %ebp, %edi # di: (b | c) & d | 965 | andl %ebp, %edi # di: (b | c) & d |
| 1000 | orl %esi, %edi # ((b | c) & d) | (b & c) | 966 | orl %esi, %edi # ((b | c) & d) | (b & c) |
| 1001 | movl %r8d, %esi # W[(n+13) & 15] | 967 | xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15] |
| 1002 | xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] | 968 | xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15] |
| 1003 | xorl %r13d, %esi # ^W[(n+2) & 15] | 969 | xorl %r13d, %r11d # ^W[(n+2) & 15] |
| 1004 | xorl %r11d, %esi # ^W[n & 15] | 970 | roll %r11d # |
| 1005 | roll %esi # | ||
| 1006 | movl %esi, %r11d # store to W[n & 15] | ||
| 1007 | addl %edi, %eax # += ((b | c) & d) | (b & c) | 971 | addl %edi, %eax # += ((b | c) & d) | (b & c) |
| 1008 | leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W | 972 | leal -0x70E44324(%rax,%r11), %eax # e += RCONST + W[n & 15] |
| 1009 | movl %ebx, %esi # | 973 | movl %ebx, %esi # |
| 1010 | roll $5, %esi # rotl32(a,5) | 974 | roll $5, %esi # rotl32(a,5) |
| 1011 | addl %esi, %eax # e += rotl32(a,5) | 975 | addl %esi, %eax # e += rotl32(a,5) |
| 1012 | rorl $2, %ecx # b = rotl32(b,30) | 976 | rorl $2, %ecx # b = rotl32(b,30) |
| 1013 | # 60 | 977 | # 60 |
| 1014 | movl %r9d, %esi # W[(n+13) & 15] | 978 | xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15] |
| 1015 | xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] | 979 | xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15] |
| 1016 | xorl %r14d, %esi # ^W[(n+2) & 15] | 980 | xorl %r14d, %r12d # ^W[(n+2) & 15] |
| 1017 | xorl %r12d, %esi # ^W[n & 15] | 981 | roll %r12d # |
| 1018 | roll %esi # | ||
| 1019 | movl %esi, %r12d # store to W[n & 15] | ||
| 1020 | movl %ecx, %edi # c | 982 | movl %ecx, %edi # c |
| 1021 | xorl %edx, %edi # ^d | 983 | xorl %edx, %edi # ^d |
| 1022 | xorl %ebx, %edi # ^b | 984 | xorl %ebx, %edi # ^b |
| 1023 | leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 985 | leal -0x359D3E2A(%rbp,%r12), %ebp # e += RCONST + W[n & 15] |
| 1024 | addl %edi, %ebp # e += (c ^ d ^ b) | 986 | addl %edi, %ebp # e += (c ^ d ^ b) |
| 1025 | movl %eax, %esi # | 987 | movl %eax, %esi # |
| 1026 | roll $5, %esi # rotl32(a,5) | 988 | roll $5, %esi # rotl32(a,5) |
| 1027 | addl %esi, %ebp # e += rotl32(a,5) | 989 | addl %esi, %ebp # e += rotl32(a,5) |
| 1028 | rorl $2, %ebx # b = rotl32(b,30) | 990 | rorl $2, %ebx # b = rotl32(b,30) |
| 1029 | # 61 | 991 | # 61 |
| 1030 | movl %r10d, %esi # W[(n+13) & 15] | 992 | xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15] |
| 1031 | xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] | 993 | xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15] |
| 1032 | xorl %r15d, %esi # ^W[(n+2) & 15] | 994 | xorl %r15d, %r13d # ^W[(n+2) & 15] |
| 1033 | xorl %r13d, %esi # ^W[n & 15] | 995 | roll %r13d # |
| 1034 | roll %esi # | ||
| 1035 | movl %esi, %r13d # store to W[n & 15] | ||
| 1036 | movl %ebx, %edi # c | 996 | movl %ebx, %edi # c |
| 1037 | xorl %ecx, %edi # ^d | 997 | xorl %ecx, %edi # ^d |
| 1038 | xorl %eax, %edi # ^b | 998 | xorl %eax, %edi # ^b |
| 1039 | leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W | 999 | leal -0x359D3E2A(%rdx,%r13), %edx # e += RCONST + W[n & 15] |
| 1040 | addl %edi, %edx # e += (c ^ d ^ b) | 1000 | addl %edi, %edx # e += (c ^ d ^ b) |
| 1041 | movl %ebp, %esi # | 1001 | movl %ebp, %esi # |
| 1042 | roll $5, %esi # rotl32(a,5) | 1002 | roll $5, %esi # rotl32(a,5) |
| 1043 | addl %esi, %edx # e += rotl32(a,5) | 1003 | addl %esi, %edx # e += rotl32(a,5) |
| 1044 | rorl $2, %eax # b = rotl32(b,30) | 1004 | rorl $2, %eax # b = rotl32(b,30) |
| 1045 | # 62 | 1005 | # 62 |
| 1046 | movl %r11d, %esi # W[(n+13) & 15] | 1006 | xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15] |
| 1047 | xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] | 1007 | xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15] |
| 1048 | xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] | 1008 | xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15] |
| 1049 | xorl %r14d, %esi # ^W[n & 15] | 1009 | roll %r14d # |
| 1050 | roll %esi # | ||
| 1051 | movl %esi, %r14d # store to W[n & 15] | ||
| 1052 | movl %eax, %edi # c | 1010 | movl %eax, %edi # c |
| 1053 | xorl %ebx, %edi # ^d | 1011 | xorl %ebx, %edi # ^d |
| 1054 | xorl %ebp, %edi # ^b | 1012 | xorl %ebp, %edi # ^b |
| 1055 | leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 1013 | leal -0x359D3E2A(%rcx,%r14), %ecx # e += RCONST + W[n & 15] |
| 1056 | addl %edi, %ecx # e += (c ^ d ^ b) | 1014 | addl %edi, %ecx # e += (c ^ d ^ b) |
| 1057 | movl %edx, %esi # | 1015 | movl %edx, %esi # |
| 1058 | roll $5, %esi # rotl32(a,5) | 1016 | roll $5, %esi # rotl32(a,5) |
| 1059 | addl %esi, %ecx # e += rotl32(a,5) | 1017 | addl %esi, %ecx # e += rotl32(a,5) |
| 1060 | rorl $2, %ebp # b = rotl32(b,30) | 1018 | rorl $2, %ebp # b = rotl32(b,30) |
| 1061 | # 63 | 1019 | # 63 |
| 1062 | movl %r12d, %esi # W[(n+13) & 15] | 1020 | xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15] |
| 1063 | xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] | 1021 | xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15] |
| 1064 | xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] | 1022 | xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15] |
| 1065 | xorl %r15d, %esi # ^W[n & 15] | 1023 | roll %r15d # |
| 1066 | roll %esi # | ||
| 1067 | movl %esi, %r15d # store to W[n & 15] | ||
| 1068 | movl %ebp, %edi # c | 1024 | movl %ebp, %edi # c |
| 1069 | xorl %eax, %edi # ^d | 1025 | xorl %eax, %edi # ^d |
| 1070 | xorl %edx, %edi # ^b | 1026 | xorl %edx, %edi # ^b |
| 1071 | leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 1027 | leal -0x359D3E2A(%rbx,%r15), %ebx # e += RCONST + W[n & 15] |
| 1072 | addl %edi, %ebx # e += (c ^ d ^ b) | 1028 | addl %edi, %ebx # e += (c ^ d ^ b) |
| 1073 | movl %ecx, %esi # | 1029 | movl %ecx, %esi # |
| 1074 | roll $5, %esi # rotl32(a,5) | 1030 | roll $5, %esi # rotl32(a,5) |
| @@ -1084,7 +1040,7 @@ sha1_process_block64: | |||
| 1084 | movl %edx, %edi # c | 1040 | movl %edx, %edi # c |
| 1085 | xorl %ebp, %edi # ^d | 1041 | xorl %ebp, %edi # ^d |
| 1086 | xorl %ecx, %edi # ^b | 1042 | xorl %ecx, %edi # ^b |
| 1087 | leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W | 1043 | leal -0x359D3E2A(%rax,%rsi), %eax # e += RCONST + W[n & 15] |
| 1088 | addl %edi, %eax # e += (c ^ d ^ b) | 1044 | addl %edi, %eax # e += (c ^ d ^ b) |
| 1089 | movl %ebx, %esi # | 1045 | movl %ebx, %esi # |
| 1090 | roll $5, %esi # rotl32(a,5) | 1046 | roll $5, %esi # rotl32(a,5) |
| @@ -1100,7 +1056,7 @@ sha1_process_block64: | |||
| 1100 | movl %ecx, %edi # c | 1056 | movl %ecx, %edi # c |
| 1101 | xorl %edx, %edi # ^d | 1057 | xorl %edx, %edi # ^d |
| 1102 | xorl %ebx, %edi # ^b | 1058 | xorl %ebx, %edi # ^b |
| 1103 | leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 1059 | leal -0x359D3E2A(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] |
| 1104 | addl %edi, %ebp # e += (c ^ d ^ b) | 1060 | addl %edi, %ebp # e += (c ^ d ^ b) |
| 1105 | movl %eax, %esi # | 1061 | movl %eax, %esi # |
| 1106 | roll $5, %esi # rotl32(a,5) | 1062 | roll $5, %esi # rotl32(a,5) |
| @@ -1116,7 +1072,7 @@ sha1_process_block64: | |||
| 1116 | movl %ebx, %edi # c | 1072 | movl %ebx, %edi # c |
| 1117 | xorl %ecx, %edi # ^d | 1073 | xorl %ecx, %edi # ^d |
| 1118 | xorl %eax, %edi # ^b | 1074 | xorl %eax, %edi # ^b |
| 1119 | leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W | 1075 | leal -0x359D3E2A(%rdx,%rsi), %edx # e += RCONST + W[n & 15] |
| 1120 | addl %edi, %edx # e += (c ^ d ^ b) | 1076 | addl %edi, %edx # e += (c ^ d ^ b) |
| 1121 | movl %ebp, %esi # | 1077 | movl %ebp, %esi # |
| 1122 | roll $5, %esi # rotl32(a,5) | 1078 | roll $5, %esi # rotl32(a,5) |
| @@ -1132,7 +1088,7 @@ sha1_process_block64: | |||
| 1132 | movl %eax, %edi # c | 1088 | movl %eax, %edi # c |
| 1133 | xorl %ebx, %edi # ^d | 1089 | xorl %ebx, %edi # ^d |
| 1134 | xorl %ebp, %edi # ^b | 1090 | xorl %ebp, %edi # ^b |
| 1135 | leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 1091 | leal -0x359D3E2A(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] |
| 1136 | addl %edi, %ecx # e += (c ^ d ^ b) | 1092 | addl %edi, %ecx # e += (c ^ d ^ b) |
| 1137 | movl %edx, %esi # | 1093 | movl %edx, %esi # |
| 1138 | roll $5, %esi # rotl32(a,5) | 1094 | roll $5, %esi # rotl32(a,5) |
| @@ -1148,7 +1104,7 @@ sha1_process_block64: | |||
| 1148 | movl %ebp, %edi # c | 1104 | movl %ebp, %edi # c |
| 1149 | xorl %eax, %edi # ^d | 1105 | xorl %eax, %edi # ^d |
| 1150 | xorl %edx, %edi # ^b | 1106 | xorl %edx, %edi # ^b |
| 1151 | leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 1107 | leal -0x359D3E2A(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] |
| 1152 | addl %edi, %ebx # e += (c ^ d ^ b) | 1108 | addl %edi, %ebx # e += (c ^ d ^ b) |
| 1153 | movl %ecx, %esi # | 1109 | movl %ecx, %esi # |
| 1154 | roll $5, %esi # rotl32(a,5) | 1110 | roll $5, %esi # rotl32(a,5) |
| @@ -1164,7 +1120,7 @@ sha1_process_block64: | |||
| 1164 | movl %edx, %edi # c | 1120 | movl %edx, %edi # c |
| 1165 | xorl %ebp, %edi # ^d | 1121 | xorl %ebp, %edi # ^d |
| 1166 | xorl %ecx, %edi # ^b | 1122 | xorl %ecx, %edi # ^b |
| 1167 | leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W | 1123 | leal -0x359D3E2A(%rax,%rsi), %eax # e += RCONST + W[n & 15] |
| 1168 | addl %edi, %eax # e += (c ^ d ^ b) | 1124 | addl %edi, %eax # e += (c ^ d ^ b) |
| 1169 | movl %ebx, %esi # | 1125 | movl %ebx, %esi # |
| 1170 | roll $5, %esi # rotl32(a,5) | 1126 | roll $5, %esi # rotl32(a,5) |
| @@ -1180,7 +1136,7 @@ sha1_process_block64: | |||
| 1180 | movl %ecx, %edi # c | 1136 | movl %ecx, %edi # c |
| 1181 | xorl %edx, %edi # ^d | 1137 | xorl %edx, %edi # ^d |
| 1182 | xorl %ebx, %edi # ^b | 1138 | xorl %ebx, %edi # ^b |
| 1183 | leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 1139 | leal -0x359D3E2A(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] |
| 1184 | addl %edi, %ebp # e += (c ^ d ^ b) | 1140 | addl %edi, %ebp # e += (c ^ d ^ b) |
| 1185 | movl %eax, %esi # | 1141 | movl %eax, %esi # |
| 1186 | roll $5, %esi # rotl32(a,5) | 1142 | roll $5, %esi # rotl32(a,5) |
| @@ -1196,135 +1152,119 @@ sha1_process_block64: | |||
| 1196 | movl %ebx, %edi # c | 1152 | movl %ebx, %edi # c |
| 1197 | xorl %ecx, %edi # ^d | 1153 | xorl %ecx, %edi # ^d |
| 1198 | xorl %eax, %edi # ^b | 1154 | xorl %eax, %edi # ^b |
| 1199 | leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W | 1155 | leal -0x359D3E2A(%rdx,%rsi), %edx # e += RCONST + W[n & 15] |
| 1200 | addl %edi, %edx # e += (c ^ d ^ b) | 1156 | addl %edi, %edx # e += (c ^ d ^ b) |
| 1201 | movl %ebp, %esi # | 1157 | movl %ebp, %esi # |
| 1202 | roll $5, %esi # rotl32(a,5) | 1158 | roll $5, %esi # rotl32(a,5) |
| 1203 | addl %esi, %edx # e += rotl32(a,5) | 1159 | addl %esi, %edx # e += rotl32(a,5) |
| 1204 | rorl $2, %eax # b = rotl32(b,30) | 1160 | rorl $2, %eax # b = rotl32(b,30) |
| 1205 | # 72 | 1161 | # 72 |
| 1206 | movl -32+4*5(%rsp), %esi # W[(n+13) & 15] | 1162 | xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15] |
| 1207 | xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] | 1163 | xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15] |
| 1208 | xorl %r10d, %esi # ^W[(n+2) & 15] | 1164 | xorl %r10d, %r8d # ^W[(n+2) & 15] |
| 1209 | xorl %r8d, %esi # ^W[n & 15] | 1165 | roll %r8d # |
| 1210 | roll %esi # | ||
| 1211 | movl %esi, %r8d # store to W[n & 15] | ||
| 1212 | movl %eax, %edi # c | 1166 | movl %eax, %edi # c |
| 1213 | xorl %ebx, %edi # ^d | 1167 | xorl %ebx, %edi # ^d |
| 1214 | xorl %ebp, %edi # ^b | 1168 | xorl %ebp, %edi # ^b |
| 1215 | leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 1169 | leal -0x359D3E2A(%rcx,%r8), %ecx # e += RCONST + W[n & 15] |
| 1216 | addl %edi, %ecx # e += (c ^ d ^ b) | 1170 | addl %edi, %ecx # e += (c ^ d ^ b) |
| 1217 | movl %edx, %esi # | 1171 | movl %edx, %esi # |
| 1218 | roll $5, %esi # rotl32(a,5) | 1172 | roll $5, %esi # rotl32(a,5) |
| 1219 | addl %esi, %ecx # e += rotl32(a,5) | 1173 | addl %esi, %ecx # e += rotl32(a,5) |
| 1220 | rorl $2, %ebp # b = rotl32(b,30) | 1174 | rorl $2, %ebp # b = rotl32(b,30) |
| 1221 | # 73 | 1175 | # 73 |
| 1222 | movl -32+4*6(%rsp), %esi # W[(n+13) & 15] | 1176 | xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15] |
| 1223 | xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] | 1177 | xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15] |
| 1224 | xorl %r11d, %esi # ^W[(n+2) & 15] | 1178 | xorl %r11d, %r9d # ^W[(n+2) & 15] |
| 1225 | xorl %r9d, %esi # ^W[n & 15] | 1179 | roll %r9d # |
| 1226 | roll %esi # | ||
| 1227 | movl %esi, %r9d # store to W[n & 15] | ||
| 1228 | movl %ebp, %edi # c | 1180 | movl %ebp, %edi # c |
| 1229 | xorl %eax, %edi # ^d | 1181 | xorl %eax, %edi # ^d |
| 1230 | xorl %edx, %edi # ^b | 1182 | xorl %edx, %edi # ^b |
| 1231 | leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 1183 | leal -0x359D3E2A(%rbx,%r9), %ebx # e += RCONST + W[n & 15] |
| 1232 | addl %edi, %ebx # e += (c ^ d ^ b) | 1184 | addl %edi, %ebx # e += (c ^ d ^ b) |
| 1233 | movl %ecx, %esi # | 1185 | movl %ecx, %esi # |
| 1234 | roll $5, %esi # rotl32(a,5) | 1186 | roll $5, %esi # rotl32(a,5) |
| 1235 | addl %esi, %ebx # e += rotl32(a,5) | 1187 | addl %esi, %ebx # e += rotl32(a,5) |
| 1236 | rorl $2, %edx # b = rotl32(b,30) | 1188 | rorl $2, %edx # b = rotl32(b,30) |
| 1237 | # 74 | 1189 | # 74 |
| 1238 | movl -32+4*7(%rsp), %esi # W[(n+13) & 15] | 1190 | xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15] |
| 1239 | xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] | 1191 | xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15] |
| 1240 | xorl %r12d, %esi # ^W[(n+2) & 15] | 1192 | xorl %r12d, %r10d # ^W[(n+2) & 15] |
| 1241 | xorl %r10d, %esi # ^W[n & 15] | 1193 | roll %r10d # |
| 1242 | roll %esi # | ||
| 1243 | movl %esi, %r10d # store to W[n & 15] | ||
| 1244 | movl %edx, %edi # c | 1194 | movl %edx, %edi # c |
| 1245 | xorl %ebp, %edi # ^d | 1195 | xorl %ebp, %edi # ^d |
| 1246 | xorl %ecx, %edi # ^b | 1196 | xorl %ecx, %edi # ^b |
| 1247 | leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W | 1197 | leal -0x359D3E2A(%rax,%r10), %eax # e += RCONST + W[n & 15] |
| 1248 | addl %edi, %eax # e += (c ^ d ^ b) | 1198 | addl %edi, %eax # e += (c ^ d ^ b) |
| 1249 | movl %ebx, %esi # | 1199 | movl %ebx, %esi # |
| 1250 | roll $5, %esi # rotl32(a,5) | 1200 | roll $5, %esi # rotl32(a,5) |
| 1251 | addl %esi, %eax # e += rotl32(a,5) | 1201 | addl %esi, %eax # e += rotl32(a,5) |
| 1252 | rorl $2, %ecx # b = rotl32(b,30) | 1202 | rorl $2, %ecx # b = rotl32(b,30) |
| 1253 | # 75 | 1203 | # 75 |
| 1254 | movl %r8d, %esi # W[(n+13) & 15] | 1204 | xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15] |
| 1255 | xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] | 1205 | xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15] |
| 1256 | xorl %r13d, %esi # ^W[(n+2) & 15] | 1206 | xorl %r13d, %r11d # ^W[(n+2) & 15] |
| 1257 | xorl %r11d, %esi # ^W[n & 15] | 1207 | roll %r11d # |
| 1258 | roll %esi # | ||
| 1259 | movl %esi, %r11d # store to W[n & 15] | ||
| 1260 | movl %ecx, %edi # c | 1208 | movl %ecx, %edi # c |
| 1261 | xorl %edx, %edi # ^d | 1209 | xorl %edx, %edi # ^d |
| 1262 | xorl %ebx, %edi # ^b | 1210 | xorl %ebx, %edi # ^b |
| 1263 | leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 1211 | leal -0x359D3E2A(%rbp,%r11), %ebp # e += RCONST + W[n & 15] |
| 1264 | addl %edi, %ebp # e += (c ^ d ^ b) | 1212 | addl %edi, %ebp # e += (c ^ d ^ b) |
| 1265 | movl %eax, %esi # | 1213 | movl %eax, %esi # |
| 1266 | roll $5, %esi # rotl32(a,5) | 1214 | roll $5, %esi # rotl32(a,5) |
| 1267 | addl %esi, %ebp # e += rotl32(a,5) | 1215 | addl %esi, %ebp # e += rotl32(a,5) |
| 1268 | rorl $2, %ebx # b = rotl32(b,30) | 1216 | rorl $2, %ebx # b = rotl32(b,30) |
| 1269 | # 76 | 1217 | # 76 |
| 1270 | movl %r9d, %esi # W[(n+13) & 15] | 1218 | xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15] |
| 1271 | xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] | 1219 | xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15] |
| 1272 | xorl %r14d, %esi # ^W[(n+2) & 15] | 1220 | xorl %r14d, %r12d # ^W[(n+2) & 15] |
| 1273 | xorl %r12d, %esi # ^W[n & 15] | 1221 | roll %r12d # |
| 1274 | roll %esi # | ||
| 1275 | movl %esi, %r12d # store to W[n & 15] | ||
| 1276 | movl %ebx, %edi # c | 1222 | movl %ebx, %edi # c |
| 1277 | xorl %ecx, %edi # ^d | 1223 | xorl %ecx, %edi # ^d |
| 1278 | xorl %eax, %edi # ^b | 1224 | xorl %eax, %edi # ^b |
| 1279 | leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W | 1225 | leal -0x359D3E2A(%rdx,%r12), %edx # e += RCONST + W[n & 15] |
| 1280 | addl %edi, %edx # e += (c ^ d ^ b) | 1226 | addl %edi, %edx # e += (c ^ d ^ b) |
| 1281 | movl %ebp, %esi # | 1227 | movl %ebp, %esi # |
| 1282 | roll $5, %esi # rotl32(a,5) | 1228 | roll $5, %esi # rotl32(a,5) |
| 1283 | addl %esi, %edx # e += rotl32(a,5) | 1229 | addl %esi, %edx # e += rotl32(a,5) |
| 1284 | rorl $2, %eax # b = rotl32(b,30) | 1230 | rorl $2, %eax # b = rotl32(b,30) |
| 1285 | # 77 | 1231 | # 77 |
| 1286 | movl %r10d, %esi # W[(n+13) & 15] | 1232 | xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15] |
| 1287 | xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] | 1233 | xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15] |
| 1288 | xorl %r15d, %esi # ^W[(n+2) & 15] | 1234 | xorl %r15d, %r13d # ^W[(n+2) & 15] |
| 1289 | xorl %r13d, %esi # ^W[n & 15] | 1235 | roll %r13d # |
| 1290 | roll %esi # | ||
| 1291 | # store to W[n & 15] - unused, not done | ||
| 1292 | movl %eax, %edi # c | 1236 | movl %eax, %edi # c |
| 1293 | xorl %ebx, %edi # ^d | 1237 | xorl %ebx, %edi # ^d |
| 1294 | xorl %ebp, %edi # ^b | 1238 | xorl %ebp, %edi # ^b |
| 1295 | leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 1239 | leal -0x359D3E2A(%rcx,%r13), %ecx # e += RCONST + W[n & 15] |
| 1296 | addl %edi, %ecx # e += (c ^ d ^ b) | 1240 | addl %edi, %ecx # e += (c ^ d ^ b) |
| 1297 | movl %edx, %esi # | 1241 | movl %edx, %esi # |
| 1298 | roll $5, %esi # rotl32(a,5) | 1242 | roll $5, %esi # rotl32(a,5) |
| 1299 | addl %esi, %ecx # e += rotl32(a,5) | 1243 | addl %esi, %ecx # e += rotl32(a,5) |
| 1300 | rorl $2, %ebp # b = rotl32(b,30) | 1244 | rorl $2, %ebp # b = rotl32(b,30) |
| 1301 | # 78 | 1245 | # 78 |
| 1302 | movl %r11d, %esi # W[(n+13) & 15] | 1246 | xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15] |
| 1303 | xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] | 1247 | xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15] |
| 1304 | xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] | 1248 | xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15] |
| 1305 | xorl %r14d, %esi # ^W[n & 15] | 1249 | roll %r14d # |
| 1306 | roll %esi # | ||
| 1307 | # store to W[n & 15] - unused, not done | ||
| 1308 | movl %ebp, %edi # c | 1250 | movl %ebp, %edi # c |
| 1309 | xorl %eax, %edi # ^d | 1251 | xorl %eax, %edi # ^d |
| 1310 | xorl %edx, %edi # ^b | 1252 | xorl %edx, %edi # ^b |
| 1311 | leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 1253 | leal -0x359D3E2A(%rbx,%r14), %ebx # e += RCONST + W[n & 15] |
| 1312 | addl %edi, %ebx # e += (c ^ d ^ b) | 1254 | addl %edi, %ebx # e += (c ^ d ^ b) |
| 1313 | movl %ecx, %esi # | 1255 | movl %ecx, %esi # |
| 1314 | roll $5, %esi # rotl32(a,5) | 1256 | roll $5, %esi # rotl32(a,5) |
| 1315 | addl %esi, %ebx # e += rotl32(a,5) | 1257 | addl %esi, %ebx # e += rotl32(a,5) |
| 1316 | rorl $2, %edx # b = rotl32(b,30) | 1258 | rorl $2, %edx # b = rotl32(b,30) |
| 1317 | # 79 | 1259 | # 79 |
| 1318 | movl %r12d, %esi # W[(n+13) & 15] | 1260 | xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15] |
| 1319 | xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] | 1261 | xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15] |
| 1320 | xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] | 1262 | xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15] |
| 1321 | xorl %r15d, %esi # ^W[n & 15] | 1263 | roll %r15d # |
| 1322 | roll %esi # | ||
| 1323 | # store to W[n & 15] - unused, not done | ||
| 1324 | movl %edx, %edi # c | 1264 | movl %edx, %edi # c |
| 1325 | xorl %ebp, %edi # ^d | 1265 | xorl %ebp, %edi # ^d |
| 1326 | xorl %ecx, %edi # ^b | 1266 | xorl %ecx, %edi # ^b |
| 1327 | leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W | 1267 | leal -0x359D3E2A(%rax,%r15), %eax # e += RCONST + W[n & 15] |
| 1328 | addl %edi, %eax # e += (c ^ d ^ b) | 1268 | addl %edi, %eax # e += (c ^ d ^ b) |
| 1329 | movl %ebx, %esi # | 1269 | movl %ebx, %esi # |
| 1330 | roll $5, %esi # rotl32(a,5) | 1270 | roll $5, %esi # rotl32(a,5) |
diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh new file mode 100755 index 000000000..931c0f0fd --- /dev/null +++ b/libbb/hash_md5_sha_x86-64.S.sh | |||
| @@ -0,0 +1,267 @@ | |||
| 1 | #!/bin/sh | ||
| 2 | |||
| 3 | # We don't regenerate it on every "make" invocation - only by hand. | ||
| 4 | # The reason is that the changes to generated code are difficult | ||
| 5 | # to visualize by looking only at this script, it helps when the commit | ||
| 6 | # also contains the diff of the generated file. | ||
| 7 | exec >hash_md5_sha_x86-64.S | ||
| 8 | |||
| 9 | echo \ | ||
| 10 | '### Generated by hash_md5_sha_x86-64.S.sh ### | ||
| 11 | |||
| 12 | #if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__) | ||
| 13 | .section .text.sha1_process_block64,"ax",@progbits | ||
| 14 | .globl sha1_process_block64 | ||
| 15 | .hidden sha1_process_block64 | ||
| 16 | .type sha1_process_block64, @function | ||
| 17 | |||
| 18 | .balign 8 # allow decoders to fetch at least 4 first insns | ||
| 19 | sha1_process_block64: | ||
| 20 | pushq %r15 # | ||
| 21 | pushq %r14 # | ||
| 22 | pushq %r13 # | ||
| 23 | pushq %r12 # | ||
| 24 | pushq %rbp # | ||
| 25 | pushq %rbx # | ||
| 26 | pushq %rdi # we need ctx at the end | ||
| 27 | |||
| 28 | #Register and stack use: | ||
| 29 | # eax..edx: a..d | ||
| 30 | # ebp: e | ||
| 31 | # esi,edi: temps | ||
| 32 | # -32+4*n(%rsp),r8...r15: W[0..7,8..15] | ||
| 33 | # (TODO: actually W[0..7] are used a bit more often, put _thme_ into r8..r15?) | ||
| 34 | |||
| 35 | movq 4*8(%rdi), %r8 | ||
| 36 | bswapq %r8 | ||
| 37 | movl %r8d, %r9d | ||
| 38 | shrq $32, %r8 | ||
| 39 | movq 4*10(%rdi), %r10 | ||
| 40 | bswapq %r10 | ||
| 41 | movl %r10d, %r11d | ||
| 42 | shrq $32, %r10 | ||
| 43 | movq 4*12(%rdi), %r12 | ||
| 44 | bswapq %r12 | ||
| 45 | movl %r12d, %r13d | ||
| 46 | shrq $32, %r12 | ||
| 47 | movq 4*14(%rdi), %r14 | ||
| 48 | bswapq %r14 | ||
| 49 | movl %r14d, %r15d | ||
| 50 | shrq $32, %r14 | ||
| 51 | |||
| 52 | movl $3, %eax | ||
| 53 | 1: | ||
| 54 | movq (%rdi,%rax,8), %rsi | ||
| 55 | bswapq %rsi | ||
| 56 | rolq $32, %rsi | ||
| 57 | movq %rsi, -32(%rsp,%rax,8) | ||
| 58 | decl %eax | ||
| 59 | jns 1b | ||
| 60 | movl 80(%rdi), %eax # a = ctx->hash[0] | ||
| 61 | movl 84(%rdi), %ebx # b = ctx->hash[1] | ||
| 62 | movl 88(%rdi), %ecx # c = ctx->hash[2] | ||
| 63 | movl 92(%rdi), %edx # d = ctx->hash[3] | ||
| 64 | movl 96(%rdi), %ebp # e = ctx->hash[4] | ||
| 65 | ' | ||
| 66 | W32() { | ||
| 67 | test "$1" || exit 1 | ||
| 68 | test "$1" -lt 0 && exit 1 | ||
| 69 | test "$1" -gt 15 && exit 1 | ||
| 70 | test "$1" -lt 8 && echo "-32+4*$1(%rsp)" | ||
| 71 | test "$1" -ge 8 && echo "%r${1}d" | ||
| 72 | } | ||
| 73 | |||
| 74 | RD1A() { | ||
| 75 | local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||
| 76 | local n=$(($6)) | ||
| 77 | echo "# $n" | ||
| 78 | test $n = 0 && echo " | ||
| 79 | # W[0], already in %esi | ||
| 80 | ";test $n != 0 && test $n -lt 8 && echo " | ||
| 81 | movl `W32 $n`, %esi # W[n] | ||
| 82 | ";test $n -ge 8 && echo " | ||
| 83 | # W[n], in %r$n | ||
| 84 | ";echo " | ||
| 85 | movl %e$c, %edi # c | ||
| 86 | xorl %e$d, %edi # ^d | ||
| 87 | andl %e$b, %edi # &b | ||
| 88 | xorl %e$d, %edi # (((c ^ d) & b) ^ d) | ||
| 89 | ";test $n -lt 8 && echo " | ||
| 90 | leal $RCONST(%r$e,%rsi),%e$e # e += RCONST + W[n] | ||
| 91 | ";test $n -ge 8 && echo " | ||
| 92 | leal $RCONST(%r$e,%r$n),%e$e # e += RCONST + W[n] | ||
| 93 | ";echo " | ||
| 94 | addl %edi, %e$e # e += (((c ^ d) & b) ^ d) | ||
| 95 | movl %e$a, %esi # | ||
| 96 | roll \$5, %esi # rotl32(a,5) | ||
| 97 | addl %esi, %e$e # e += rotl32(a,5) | ||
| 98 | rorl \$2, %e$b # b = rotl32(b,30) | ||
| 99 | " | ||
| 100 | } | ||
| 101 | RD1B() { | ||
| 102 | local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||
| 103 | local n=$(($6)) | ||
| 104 | local n13=$(((n+13) & 15)) | ||
| 105 | local n8=$(((n+8) & 15)) | ||
| 106 | local n2=$(((n+2) & 15)) | ||
| 107 | local n0=$(((n+0) & 15)) | ||
| 108 | echo " | ||
| 109 | # $n | ||
| 110 | ";test $n0 -lt 8 && echo " | ||
| 111 | movl `W32 $n13`, %esi # W[(n+13) & 15] | ||
| 112 | xorl `W32 $n8`, %esi # ^W[(n+8) & 15] | ||
| 113 | xorl `W32 $n2`, %esi # ^W[(n+2) & 15] | ||
| 114 | xorl `W32 $n0`, %esi # ^W[n & 15] | ||
| 115 | roll %esi # | ||
| 116 | movl %esi, `W32 $n0` # store to W[n & 15] | ||
| 117 | ";test $n0 -ge 8 && echo " | ||
| 118 | xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15] | ||
| 119 | xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15] | ||
| 120 | xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15] | ||
| 121 | roll `W32 $n0` # | ||
| 122 | "; echo " | ||
| 123 | movl %e$c, %edi # c | ||
| 124 | xorl %e$d, %edi # ^d | ||
| 125 | andl %e$b, %edi # &b | ||
| 126 | xorl %e$d, %edi # (((c ^ d) & b) ^ d) | ||
| 127 | ";test $n0 -lt 8 && echo " | ||
| 128 | leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] | ||
| 129 | ";test $n0 -ge 8 && echo " | ||
| 130 | leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15] | ||
| 131 | ";echo " | ||
| 132 | addl %edi, %e$e # e += (((c ^ d) & b) ^ d) | ||
| 133 | movl %e$a, %esi # | ||
| 134 | roll \$5, %esi # rotl32(a,5) | ||
| 135 | addl %esi, %e$e # e += rotl32(a,5) | ||
| 136 | rorl \$2, %e$b # b = rotl32(b,30) | ||
| 137 | " | ||
| 138 | } | ||
| 139 | { | ||
| 140 | RCONST=0x5A827999 | ||
| 141 | RD1A ax bx cx dx bp 0; RD1A bp ax bx cx dx 1; RD1A dx bp ax bx cx 2; RD1A cx dx bp ax bx 3; RD1A bx cx dx bp ax 4 | ||
| 142 | RD1A ax bx cx dx bp 5; RD1A bp ax bx cx dx 6; RD1A dx bp ax bx cx 7; RD1A cx dx bp ax bx 8; RD1A bx cx dx bp ax 9 | ||
| 143 | RD1A ax bx cx dx bp 10; RD1A bp ax bx cx dx 11; RD1A dx bp ax bx cx 12; RD1A cx dx bp ax bx 13; RD1A bx cx dx bp ax 14 | ||
| 144 | RD1A ax bx cx dx bp 15; RD1B bp ax bx cx dx 16; RD1B dx bp ax bx cx 17; RD1B cx dx bp ax bx 18; RD1B bx cx dx bp ax 19 | ||
| 145 | } | grep -v '^$' | ||
| 146 | |||
| 147 | RD2() { | ||
| 148 | local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||
| 149 | local n=$(($6)) | ||
| 150 | local n13=$(((n+13) & 15)) | ||
| 151 | local n8=$(((n+8) & 15)) | ||
| 152 | local n2=$(((n+2) & 15)) | ||
| 153 | local n0=$(((n+0) & 15)) | ||
| 154 | echo " | ||
| 155 | # $n | ||
| 156 | ";test $n0 -lt 8 && echo " | ||
| 157 | movl `W32 $n13`, %esi # W[(n+13) & 15] | ||
| 158 | xorl `W32 $n8`, %esi # ^W[(n+8) & 15] | ||
| 159 | xorl `W32 $n2`, %esi # ^W[(n+2) & 15] | ||
| 160 | xorl `W32 $n0`, %esi # ^W[n & 15] | ||
| 161 | roll %esi # | ||
| 162 | movl %esi, `W32 $n0` # store to W[n & 15] | ||
| 163 | ";test $n0 -ge 8 && echo " | ||
| 164 | xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15] | ||
| 165 | xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15] | ||
| 166 | xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15] | ||
| 167 | roll `W32 $n0` # | ||
| 168 | "; echo " | ||
| 169 | movl %e$c, %edi # c | ||
| 170 | xorl %e$d, %edi # ^d | ||
| 171 | xorl %e$b, %edi # ^b | ||
| 172 | ";test $n0 -lt 8 && echo " | ||
| 173 | leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] | ||
| 174 | ";test $n0 -ge 8 && echo " | ||
| 175 | leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15] | ||
| 176 | ";echo " | ||
| 177 | addl %edi, %e$e # e += (c ^ d ^ b) | ||
| 178 | movl %e$a, %esi # | ||
| 179 | roll \$5, %esi # rotl32(a,5) | ||
| 180 | addl %esi, %e$e # e += rotl32(a,5) | ||
| 181 | rorl \$2, %e$b # b = rotl32(b,30) | ||
| 182 | " | ||
| 183 | } | ||
| 184 | { | ||
| 185 | RCONST=0x6ED9EBA1 | ||
| 186 | RD2 ax bx cx dx bp 20; RD2 bp ax bx cx dx 21; RD2 dx bp ax bx cx 22; RD2 cx dx bp ax bx 23; RD2 bx cx dx bp ax 24 | ||
| 187 | RD2 ax bx cx dx bp 25; RD2 bp ax bx cx dx 26; RD2 dx bp ax bx cx 27; RD2 cx dx bp ax bx 28; RD2 bx cx dx bp ax 29 | ||
| 188 | RD2 ax bx cx dx bp 30; RD2 bp ax bx cx dx 31; RD2 dx bp ax bx cx 32; RD2 cx dx bp ax bx 33; RD2 bx cx dx bp ax 34 | ||
| 189 | RD2 ax bx cx dx bp 35; RD2 bp ax bx cx dx 36; RD2 dx bp ax bx cx 37; RD2 cx dx bp ax bx 38; RD2 bx cx dx bp ax 39 | ||
| 190 | } | grep -v '^$' | ||
| 191 | |||
| 192 | RD3() { | ||
| 193 | local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||
| 194 | local n=$(($6)) | ||
| 195 | local n13=$(((n+13) & 15)) | ||
| 196 | local n8=$(((n+8) & 15)) | ||
| 197 | local n2=$(((n+2) & 15)) | ||
| 198 | local n0=$(((n+0) & 15)) | ||
| 199 | echo " | ||
| 200 | # $n | ||
| 201 | movl %e$b, %edi # di: b | ||
| 202 | movl %e$b, %esi # si: b | ||
| 203 | orl %e$c, %edi # di: b | c | ||
| 204 | andl %e$c, %esi # si: b & c | ||
| 205 | andl %e$d, %edi # di: (b | c) & d | ||
| 206 | orl %esi, %edi # ((b | c) & d) | (b & c) | ||
| 207 | ";test $n0 -lt 8 && echo " | ||
| 208 | movl `W32 $n13`, %esi # W[(n+13) & 15] | ||
| 209 | xorl `W32 $n8`, %esi # ^W[(n+8) & 15] | ||
| 210 | xorl `W32 $n2`, %esi # ^W[(n+2) & 15] | ||
| 211 | xorl `W32 $n0`, %esi # ^W[n & 15] | ||
| 212 | roll %esi # | ||
| 213 | movl %esi, `W32 $n0` # store to W[n & 15] | ||
| 214 | ";test $n0 -ge 8 && echo " | ||
| 215 | xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15] | ||
| 216 | xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15] | ||
| 217 | xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15] | ||
| 218 | roll `W32 $n0` # | ||
| 219 | "; echo " | ||
| 220 | addl %edi, %e$e # += ((b | c) & d) | (b & c) | ||
| 221 | ";test $n0 -lt 8 && echo " | ||
| 222 | leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] | ||
| 223 | ";test $n0 -ge 8 && echo " | ||
| 224 | leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15] | ||
| 225 | ";echo " | ||
| 226 | movl %e$a, %esi # | ||
| 227 | roll \$5, %esi # rotl32(a,5) | ||
| 228 | addl %esi, %e$e # e += rotl32(a,5) | ||
| 229 | rorl \$2, %e$b # b = rotl32(b,30) | ||
| 230 | " | ||
| 231 | } | ||
| 232 | { | ||
| 233 | #RCONST=0x8F1BBCDC "out of range for signed 32bit displacement" | ||
| 234 | RCONST=-0x70E44324 | ||
| 235 | RD3 ax bx cx dx bp 40; RD3 bp ax bx cx dx 41; RD3 dx bp ax bx cx 42; RD3 cx dx bp ax bx 43; RD3 bx cx dx bp ax 44 | ||
| 236 | RD3 ax bx cx dx bp 45; RD3 bp ax bx cx dx 46; RD3 dx bp ax bx cx 47; RD3 cx dx bp ax bx 48; RD3 bx cx dx bp ax 49 | ||
| 237 | RD3 ax bx cx dx bp 50; RD3 bp ax bx cx dx 51; RD3 dx bp ax bx cx 52; RD3 cx dx bp ax bx 53; RD3 bx cx dx bp ax 54 | ||
| 238 | RD3 ax bx cx dx bp 55; RD3 bp ax bx cx dx 56; RD3 dx bp ax bx cx 57; RD3 cx dx bp ax bx 58; RD3 bx cx dx bp ax 59 | ||
| 239 | } | grep -v '^$' | ||
| 240 | |||
| 241 | # Round 4 has the same logic as round 2, only n and RCONST are different | ||
| 242 | { | ||
| 243 | #RCONST=0xCA62C1D6 "out of range for signed 32bit displacement" | ||
| 244 | RCONST=-0x359D3E2A | ||
| 245 | RD2 ax bx cx dx bp 60; RD2 bp ax bx cx dx 61; RD2 dx bp ax bx cx 62; RD2 cx dx bp ax bx 63; RD2 bx cx dx bp ax 64 | ||
| 246 | RD2 ax bx cx dx bp 65; RD2 bp ax bx cx dx 66; RD2 dx bp ax bx cx 67; RD2 cx dx bp ax bx 68; RD2 bx cx dx bp ax 69 | ||
| 247 | RD2 ax bx cx dx bp 70; RD2 bp ax bx cx dx 71; RD2 dx bp ax bx cx 72; RD2 cx dx bp ax bx 73; RD2 bx cx dx bp ax 74 | ||
| 248 | RD2 ax bx cx dx bp 75; RD2 bp ax bx cx dx 76; RD2 dx bp ax bx cx 77; RD2 cx dx bp ax bx 78; RD2 bx cx dx bp ax 79 | ||
| 249 | } | grep -v '^$' | ||
| 250 | |||
| 251 | echo " | ||
| 252 | popq %rdi # | ||
| 253 | addl %eax, 80(%rdi) # ctx->hash[0] += a | ||
| 254 | addl %ebx, 84(%rdi) # ctx->hash[1] += b | ||
| 255 | addl %ecx, 88(%rdi) # ctx->hash[2] += c | ||
| 256 | addl %edx, 92(%rdi) # ctx->hash[3] += d | ||
| 257 | addl %ebp, 96(%rdi) # ctx->hash[4] += e | ||
| 258 | popq %rbx # | ||
| 259 | popq %rbp # | ||
| 260 | popq %r12 # | ||
| 261 | popq %r13 # | ||
| 262 | popq %r14 # | ||
| 263 | popq %r15 # | ||
| 264 | |||
| 265 | ret | ||
| 266 | .size sha1_process_block64, .-sha1_process_block64 | ||
| 267 | #endif" | ||
