diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2022-01-03 13:00:07 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2022-01-03 13:10:30 +0100 |
commit | 947bef0deaba7b2ce432d515379091dcd4cf747f (patch) | |
tree | 15273e01ea7b42df47e49779fede62f7289a4178 | |
parent | 05fd13ebec869fc5e6f226481a2405a2685e8db1 (diff) | |
download | busybox-w32-947bef0deaba7b2ce432d515379091dcd4cf747f.tar.gz busybox-w32-947bef0deaba7b2ce432d515379091dcd4cf747f.tar.bz2 busybox-w32-947bef0deaba7b2ce432d515379091dcd4cf747f.zip |
libbb/sha1: x86_64 version: generate from a script, optimize a bit
function old new delta
sha1_process_block64 3569 3502 -67
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | libbb/Config.src | 2 | ||||
-rw-r--r-- | libbb/hash_md5_sha_x86-64.S | 472 | ||||
-rwxr-xr-x | libbb/hash_md5_sha_x86-64.S.sh | 267 |
3 files changed, 474 insertions, 267 deletions
diff --git a/libbb/Config.src b/libbb/Config.src index 42a2283aa..c80bee286 100644 --- a/libbb/Config.src +++ b/libbb/Config.src | |||
@@ -59,7 +59,7 @@ config SHA1_SMALL | |||
59 | Trade binary size versus speed for the sha1 algorithm. | 59 | Trade binary size versus speed for the sha1 algorithm. |
60 | throughput MB/s size of sha1_process_block64 | 60 | throughput MB/s size of sha1_process_block64 |
61 | value 486 x86-64 486 x86-64 | 61 | value 486 x86-64 486 x86-64 |
62 | 0 367 367 3657 3570 | 62 | 0 367 375 3657 3502 |
63 | 1 224 229 654 732 | 63 | 1 224 229 654 732 |
64 | 2,3 200 195 358 380 | 64 | 2,3 200 195 358 380 |
65 | 65 | ||
diff --git a/libbb/hash_md5_sha_x86-64.S b/libbb/hash_md5_sha_x86-64.S index 466cd9ae9..3e1c4b455 100644 --- a/libbb/hash_md5_sha_x86-64.S +++ b/libbb/hash_md5_sha_x86-64.S | |||
@@ -1,23 +1,27 @@ | |||
1 | ### Generated by hash_md5_sha_x86-64.S.sh ### | 1 | ### Generated by hash_md5_sha_x86-64.S.sh ### |
2 | #if defined(__GNUC__) && defined(__x86_64__) | 2 | |
3 | #if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__) | ||
3 | .section .text.sha1_process_block64,"ax",@progbits | 4 | .section .text.sha1_process_block64,"ax",@progbits |
4 | .globl sha1_process_block64 | 5 | .globl sha1_process_block64 |
5 | .hidden sha1_process_block64 | 6 | .hidden sha1_process_block64 |
6 | .type sha1_process_block64, @function | 7 | .type sha1_process_block64, @function |
8 | |||
9 | .balign 8 # allow decoders to fetch at least 4 first insns | ||
7 | sha1_process_block64: | 10 | sha1_process_block64: |
8 | pushq %r15 # | 11 | pushq %r15 # |
9 | pushq %r14 # | 12 | pushq %r14 # |
10 | pushq %r13 # | 13 | pushq %r13 # |
11 | pushq %r12 # | 14 | pushq %r12 # |
12 | pushq %rbp # | 15 | pushq %rbp # |
13 | pushq %rbx # | 16 | pushq %rbx # |
14 | pushq %rdi # we need ctx at the end | 17 | pushq %rdi # we need ctx at the end |
15 | 18 | ||
16 | #Register and stack use: | 19 | #Register and stack use: |
17 | # eax..edx: a..d | 20 | # eax..edx: a..d |
18 | # ebp: e | 21 | # ebp: e |
19 | # esi,edi: temps | 22 | # esi,edi: temps |
20 | # -32+4*n(%rsp),r8...r15: W[0..7,8..15] | 23 | # -32+4*n(%rsp),r8...r15: W[0..7,8..15] |
24 | # (TODO: actually W[0..7] are used a bit more often, put _thme_ into r8..r15?) | ||
21 | 25 | ||
22 | movq 4*8(%rdi), %r8 | 26 | movq 4*8(%rdi), %r8 |
23 | bswapq %r8 | 27 | bswapq %r8 |
@@ -253,7 +257,7 @@ sha1_process_block64: | |||
253 | xorl %ecx, %edi # ^d | 257 | xorl %ecx, %edi # ^d |
254 | andl %eax, %edi # &b | 258 | andl %eax, %edi # &b |
255 | xorl %ecx, %edi # (((c ^ d) & b) ^ d) | 259 | xorl %ecx, %edi # (((c ^ d) & b) ^ d) |
256 | leal 0x5A827999(%rdx,%rsi),%edx # e += RCONST + W[n] | 260 | leal 0x5A827999(%rdx,%rsi), %edx # e += RCONST + W[n & 15] |
257 | addl %edi, %edx # e += (((c ^ d) & b) ^ d) | 261 | addl %edi, %edx # e += (((c ^ d) & b) ^ d) |
258 | movl %ebp, %esi # | 262 | movl %ebp, %esi # |
259 | roll $5, %esi # rotl32(a,5) | 263 | roll $5, %esi # rotl32(a,5) |
@@ -270,7 +274,7 @@ sha1_process_block64: | |||
270 | xorl %ebx, %edi # ^d | 274 | xorl %ebx, %edi # ^d |
271 | andl %ebp, %edi # &b | 275 | andl %ebp, %edi # &b |
272 | xorl %ebx, %edi # (((c ^ d) & b) ^ d) | 276 | xorl %ebx, %edi # (((c ^ d) & b) ^ d) |
273 | leal 0x5A827999(%rcx,%rsi),%ecx # e += RCONST + W[n] | 277 | leal 0x5A827999(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] |
274 | addl %edi, %ecx # e += (((c ^ d) & b) ^ d) | 278 | addl %edi, %ecx # e += (((c ^ d) & b) ^ d) |
275 | movl %edx, %esi # | 279 | movl %edx, %esi # |
276 | roll $5, %esi # rotl32(a,5) | 280 | roll $5, %esi # rotl32(a,5) |
@@ -287,7 +291,7 @@ sha1_process_block64: | |||
287 | xorl %eax, %edi # ^d | 291 | xorl %eax, %edi # ^d |
288 | andl %edx, %edi # &b | 292 | andl %edx, %edi # &b |
289 | xorl %eax, %edi # (((c ^ d) & b) ^ d) | 293 | xorl %eax, %edi # (((c ^ d) & b) ^ d) |
290 | leal 0x5A827999(%rbx,%rsi),%ebx # e += RCONST + W[n] | 294 | leal 0x5A827999(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] |
291 | addl %edi, %ebx # e += (((c ^ d) & b) ^ d) | 295 | addl %edi, %ebx # e += (((c ^ d) & b) ^ d) |
292 | movl %ecx, %esi # | 296 | movl %ecx, %esi # |
293 | roll $5, %esi # rotl32(a,5) | 297 | roll $5, %esi # rotl32(a,5) |
@@ -304,7 +308,7 @@ sha1_process_block64: | |||
304 | xorl %ebp, %edi # ^d | 308 | xorl %ebp, %edi # ^d |
305 | andl %ecx, %edi # &b | 309 | andl %ecx, %edi # &b |
306 | xorl %ebp, %edi # (((c ^ d) & b) ^ d) | 310 | xorl %ebp, %edi # (((c ^ d) & b) ^ d) |
307 | leal 0x5A827999(%rax,%rsi),%eax # e += RCONST + W[n] | 311 | leal 0x5A827999(%rax,%rsi), %eax # e += RCONST + W[n & 15] |
308 | addl %edi, %eax # e += (((c ^ d) & b) ^ d) | 312 | addl %edi, %eax # e += (((c ^ d) & b) ^ d) |
309 | movl %ebx, %esi # | 313 | movl %ebx, %esi # |
310 | roll $5, %esi # rotl32(a,5) | 314 | roll $5, %esi # rotl32(a,5) |
@@ -320,7 +324,7 @@ sha1_process_block64: | |||
320 | movl %ecx, %edi # c | 324 | movl %ecx, %edi # c |
321 | xorl %edx, %edi # ^d | 325 | xorl %edx, %edi # ^d |
322 | xorl %ebx, %edi # ^b | 326 | xorl %ebx, %edi # ^b |
323 | leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 327 | leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] |
324 | addl %edi, %ebp # e += (c ^ d ^ b) | 328 | addl %edi, %ebp # e += (c ^ d ^ b) |
325 | movl %eax, %esi # | 329 | movl %eax, %esi # |
326 | roll $5, %esi # rotl32(a,5) | 330 | roll $5, %esi # rotl32(a,5) |
@@ -336,7 +340,7 @@ sha1_process_block64: | |||
336 | movl %ebx, %edi # c | 340 | movl %ebx, %edi # c |
337 | xorl %ecx, %edi # ^d | 341 | xorl %ecx, %edi # ^d |
338 | xorl %eax, %edi # ^b | 342 | xorl %eax, %edi # ^b |
339 | leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W | 343 | leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + W[n & 15] |
340 | addl %edi, %edx # e += (c ^ d ^ b) | 344 | addl %edi, %edx # e += (c ^ d ^ b) |
341 | movl %ebp, %esi # | 345 | movl %ebp, %esi # |
342 | roll $5, %esi # rotl32(a,5) | 346 | roll $5, %esi # rotl32(a,5) |
@@ -352,7 +356,7 @@ sha1_process_block64: | |||
352 | movl %eax, %edi # c | 356 | movl %eax, %edi # c |
353 | xorl %ebx, %edi # ^d | 357 | xorl %ebx, %edi # ^d |
354 | xorl %ebp, %edi # ^b | 358 | xorl %ebp, %edi # ^b |
355 | leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 359 | leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] |
356 | addl %edi, %ecx # e += (c ^ d ^ b) | 360 | addl %edi, %ecx # e += (c ^ d ^ b) |
357 | movl %edx, %esi # | 361 | movl %edx, %esi # |
358 | roll $5, %esi # rotl32(a,5) | 362 | roll $5, %esi # rotl32(a,5) |
@@ -368,135 +372,119 @@ sha1_process_block64: | |||
368 | movl %ebp, %edi # c | 372 | movl %ebp, %edi # c |
369 | xorl %eax, %edi # ^d | 373 | xorl %eax, %edi # ^d |
370 | xorl %edx, %edi # ^b | 374 | xorl %edx, %edi # ^b |
371 | leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 375 | leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] |
372 | addl %edi, %ebx # e += (c ^ d ^ b) | 376 | addl %edi, %ebx # e += (c ^ d ^ b) |
373 | movl %ecx, %esi # | 377 | movl %ecx, %esi # |
374 | roll $5, %esi # rotl32(a,5) | 378 | roll $5, %esi # rotl32(a,5) |
375 | addl %esi, %ebx # e += rotl32(a,5) | 379 | addl %esi, %ebx # e += rotl32(a,5) |
376 | rorl $2, %edx # b = rotl32(b,30) | 380 | rorl $2, %edx # b = rotl32(b,30) |
377 | # 24 | 381 | # 24 |
378 | movl -32+4*5(%rsp), %esi # W[(n+13) & 15] | 382 | xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15] |
379 | xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] | 383 | xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15] |
380 | xorl %r10d, %esi # ^W[(n+2) & 15] | 384 | xorl %r10d, %r8d # ^W[(n+2) & 15] |
381 | xorl %r8d, %esi # ^W[n & 15] | 385 | roll %r8d # |
382 | roll %esi # | ||
383 | movl %esi, %r8d # store to W[n & 15] | ||
384 | movl %edx, %edi # c | 386 | movl %edx, %edi # c |
385 | xorl %ebp, %edi # ^d | 387 | xorl %ebp, %edi # ^d |
386 | xorl %ecx, %edi # ^b | 388 | xorl %ecx, %edi # ^b |
387 | leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W | 389 | leal 0x6ED9EBA1(%rax,%r8), %eax # e += RCONST + W[n & 15] |
388 | addl %edi, %eax # e += (c ^ d ^ b) | 390 | addl %edi, %eax # e += (c ^ d ^ b) |
389 | movl %ebx, %esi # | 391 | movl %ebx, %esi # |
390 | roll $5, %esi # rotl32(a,5) | 392 | roll $5, %esi # rotl32(a,5) |
391 | addl %esi, %eax # e += rotl32(a,5) | 393 | addl %esi, %eax # e += rotl32(a,5) |
392 | rorl $2, %ecx # b = rotl32(b,30) | 394 | rorl $2, %ecx # b = rotl32(b,30) |
393 | # 25 | 395 | # 25 |
394 | movl -32+4*6(%rsp), %esi # W[(n+13) & 15] | 396 | xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15] |
395 | xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] | 397 | xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15] |
396 | xorl %r11d, %esi # ^W[(n+2) & 15] | 398 | xorl %r11d, %r9d # ^W[(n+2) & 15] |
397 | xorl %r9d, %esi # ^W[n & 15] | 399 | roll %r9d # |
398 | roll %esi # | ||
399 | movl %esi, %r9d # store to W[n & 15] | ||
400 | movl %ecx, %edi # c | 400 | movl %ecx, %edi # c |
401 | xorl %edx, %edi # ^d | 401 | xorl %edx, %edi # ^d |
402 | xorl %ebx, %edi # ^b | 402 | xorl %ebx, %edi # ^b |
403 | leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 403 | leal 0x6ED9EBA1(%rbp,%r9), %ebp # e += RCONST + W[n & 15] |
404 | addl %edi, %ebp # e += (c ^ d ^ b) | 404 | addl %edi, %ebp # e += (c ^ d ^ b) |
405 | movl %eax, %esi # | 405 | movl %eax, %esi # |
406 | roll $5, %esi # rotl32(a,5) | 406 | roll $5, %esi # rotl32(a,5) |
407 | addl %esi, %ebp # e += rotl32(a,5) | 407 | addl %esi, %ebp # e += rotl32(a,5) |
408 | rorl $2, %ebx # b = rotl32(b,30) | 408 | rorl $2, %ebx # b = rotl32(b,30) |
409 | # 26 | 409 | # 26 |
410 | movl -32+4*7(%rsp), %esi # W[(n+13) & 15] | 410 | xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15] |
411 | xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] | 411 | xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15] |
412 | xorl %r12d, %esi # ^W[(n+2) & 15] | 412 | xorl %r12d, %r10d # ^W[(n+2) & 15] |
413 | xorl %r10d, %esi # ^W[n & 15] | 413 | roll %r10d # |
414 | roll %esi # | ||
415 | movl %esi, %r10d # store to W[n & 15] | ||
416 | movl %ebx, %edi # c | 414 | movl %ebx, %edi # c |
417 | xorl %ecx, %edi # ^d | 415 | xorl %ecx, %edi # ^d |
418 | xorl %eax, %edi # ^b | 416 | xorl %eax, %edi # ^b |
419 | leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W | 417 | leal 0x6ED9EBA1(%rdx,%r10), %edx # e += RCONST + W[n & 15] |
420 | addl %edi, %edx # e += (c ^ d ^ b) | 418 | addl %edi, %edx # e += (c ^ d ^ b) |
421 | movl %ebp, %esi # | 419 | movl %ebp, %esi # |
422 | roll $5, %esi # rotl32(a,5) | 420 | roll $5, %esi # rotl32(a,5) |
423 | addl %esi, %edx # e += rotl32(a,5) | 421 | addl %esi, %edx # e += rotl32(a,5) |
424 | rorl $2, %eax # b = rotl32(b,30) | 422 | rorl $2, %eax # b = rotl32(b,30) |
425 | # 27 | 423 | # 27 |
426 | movl %r8d, %esi # W[(n+13) & 15] | 424 | xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15] |
427 | xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] | 425 | xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15] |
428 | xorl %r13d, %esi # ^W[(n+2) & 15] | 426 | xorl %r13d, %r11d # ^W[(n+2) & 15] |
429 | xorl %r11d, %esi # ^W[n & 15] | 427 | roll %r11d # |
430 | roll %esi # | ||
431 | movl %esi, %r11d # store to W[n & 15] | ||
432 | movl %eax, %edi # c | 428 | movl %eax, %edi # c |
433 | xorl %ebx, %edi # ^d | 429 | xorl %ebx, %edi # ^d |
434 | xorl %ebp, %edi # ^b | 430 | xorl %ebp, %edi # ^b |
435 | leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 431 | leal 0x6ED9EBA1(%rcx,%r11), %ecx # e += RCONST + W[n & 15] |
436 | addl %edi, %ecx # e += (c ^ d ^ b) | 432 | addl %edi, %ecx # e += (c ^ d ^ b) |
437 | movl %edx, %esi # | 433 | movl %edx, %esi # |
438 | roll $5, %esi # rotl32(a,5) | 434 | roll $5, %esi # rotl32(a,5) |
439 | addl %esi, %ecx # e += rotl32(a,5) | 435 | addl %esi, %ecx # e += rotl32(a,5) |
440 | rorl $2, %ebp # b = rotl32(b,30) | 436 | rorl $2, %ebp # b = rotl32(b,30) |
441 | # 28 | 437 | # 28 |
442 | movl %r9d, %esi # W[(n+13) & 15] | 438 | xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15] |
443 | xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] | 439 | xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15] |
444 | xorl %r14d, %esi # ^W[(n+2) & 15] | 440 | xorl %r14d, %r12d # ^W[(n+2) & 15] |
445 | xorl %r12d, %esi # ^W[n & 15] | 441 | roll %r12d # |
446 | roll %esi # | ||
447 | movl %esi, %r12d # store to W[n & 15] | ||
448 | movl %ebp, %edi # c | 442 | movl %ebp, %edi # c |
449 | xorl %eax, %edi # ^d | 443 | xorl %eax, %edi # ^d |
450 | xorl %edx, %edi # ^b | 444 | xorl %edx, %edi # ^b |
451 | leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 445 | leal 0x6ED9EBA1(%rbx,%r12), %ebx # e += RCONST + W[n & 15] |
452 | addl %edi, %ebx # e += (c ^ d ^ b) | 446 | addl %edi, %ebx # e += (c ^ d ^ b) |
453 | movl %ecx, %esi # | 447 | movl %ecx, %esi # |
454 | roll $5, %esi # rotl32(a,5) | 448 | roll $5, %esi # rotl32(a,5) |
455 | addl %esi, %ebx # e += rotl32(a,5) | 449 | addl %esi, %ebx # e += rotl32(a,5) |
456 | rorl $2, %edx # b = rotl32(b,30) | 450 | rorl $2, %edx # b = rotl32(b,30) |
457 | # 29 | 451 | # 29 |
458 | movl %r10d, %esi # W[(n+13) & 15] | 452 | xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15] |
459 | xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] | 453 | xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15] |
460 | xorl %r15d, %esi # ^W[(n+2) & 15] | 454 | xorl %r15d, %r13d # ^W[(n+2) & 15] |
461 | xorl %r13d, %esi # ^W[n & 15] | 455 | roll %r13d # |
462 | roll %esi # | ||
463 | movl %esi, %r13d # store to W[n & 15] | ||
464 | movl %edx, %edi # c | 456 | movl %edx, %edi # c |
465 | xorl %ebp, %edi # ^d | 457 | xorl %ebp, %edi # ^d |
466 | xorl %ecx, %edi # ^b | 458 | xorl %ecx, %edi # ^b |
467 | leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W | 459 | leal 0x6ED9EBA1(%rax,%r13), %eax # e += RCONST + W[n & 15] |
468 | addl %edi, %eax # e += (c ^ d ^ b) | 460 | addl %edi, %eax # e += (c ^ d ^ b) |
469 | movl %ebx, %esi # | 461 | movl %ebx, %esi # |
470 | roll $5, %esi # rotl32(a,5) | 462 | roll $5, %esi # rotl32(a,5) |
471 | addl %esi, %eax # e += rotl32(a,5) | 463 | addl %esi, %eax # e += rotl32(a,5) |
472 | rorl $2, %ecx # b = rotl32(b,30) | 464 | rorl $2, %ecx # b = rotl32(b,30) |
473 | # 30 | 465 | # 30 |
474 | movl %r11d, %esi # W[(n+13) & 15] | 466 | xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15] |
475 | xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] | 467 | xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15] |
476 | xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] | 468 | xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15] |
477 | xorl %r14d, %esi # ^W[n & 15] | 469 | roll %r14d # |
478 | roll %esi # | ||
479 | movl %esi, %r14d # store to W[n & 15] | ||
480 | movl %ecx, %edi # c | 470 | movl %ecx, %edi # c |
481 | xorl %edx, %edi # ^d | 471 | xorl %edx, %edi # ^d |
482 | xorl %ebx, %edi # ^b | 472 | xorl %ebx, %edi # ^b |
483 | leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 473 | leal 0x6ED9EBA1(%rbp,%r14), %ebp # e += RCONST + W[n & 15] |
484 | addl %edi, %ebp # e += (c ^ d ^ b) | 474 | addl %edi, %ebp # e += (c ^ d ^ b) |
485 | movl %eax, %esi # | 475 | movl %eax, %esi # |
486 | roll $5, %esi # rotl32(a,5) | 476 | roll $5, %esi # rotl32(a,5) |
487 | addl %esi, %ebp # e += rotl32(a,5) | 477 | addl %esi, %ebp # e += rotl32(a,5) |
488 | rorl $2, %ebx # b = rotl32(b,30) | 478 | rorl $2, %ebx # b = rotl32(b,30) |
489 | # 31 | 479 | # 31 |
490 | movl %r12d, %esi # W[(n+13) & 15] | 480 | xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15] |
491 | xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] | 481 | xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15] |
492 | xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] | 482 | xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15] |
493 | xorl %r15d, %esi # ^W[n & 15] | 483 | roll %r15d # |
494 | roll %esi # | ||
495 | movl %esi, %r15d # store to W[n & 15] | ||
496 | movl %ebx, %edi # c | 484 | movl %ebx, %edi # c |
497 | xorl %ecx, %edi # ^d | 485 | xorl %ecx, %edi # ^d |
498 | xorl %eax, %edi # ^b | 486 | xorl %eax, %edi # ^b |
499 | leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W | 487 | leal 0x6ED9EBA1(%rdx,%r15), %edx # e += RCONST + W[n & 15] |
500 | addl %edi, %edx # e += (c ^ d ^ b) | 488 | addl %edi, %edx # e += (c ^ d ^ b) |
501 | movl %ebp, %esi # | 489 | movl %ebp, %esi # |
502 | roll $5, %esi # rotl32(a,5) | 490 | roll $5, %esi # rotl32(a,5) |
@@ -512,7 +500,7 @@ sha1_process_block64: | |||
512 | movl %eax, %edi # c | 500 | movl %eax, %edi # c |
513 | xorl %ebx, %edi # ^d | 501 | xorl %ebx, %edi # ^d |
514 | xorl %ebp, %edi # ^b | 502 | xorl %ebp, %edi # ^b |
515 | leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 503 | leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] |
516 | addl %edi, %ecx # e += (c ^ d ^ b) | 504 | addl %edi, %ecx # e += (c ^ d ^ b) |
517 | movl %edx, %esi # | 505 | movl %edx, %esi # |
518 | roll $5, %esi # rotl32(a,5) | 506 | roll $5, %esi # rotl32(a,5) |
@@ -528,7 +516,7 @@ sha1_process_block64: | |||
528 | movl %ebp, %edi # c | 516 | movl %ebp, %edi # c |
529 | xorl %eax, %edi # ^d | 517 | xorl %eax, %edi # ^d |
530 | xorl %edx, %edi # ^b | 518 | xorl %edx, %edi # ^b |
531 | leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 519 | leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] |
532 | addl %edi, %ebx # e += (c ^ d ^ b) | 520 | addl %edi, %ebx # e += (c ^ d ^ b) |
533 | movl %ecx, %esi # | 521 | movl %ecx, %esi # |
534 | roll $5, %esi # rotl32(a,5) | 522 | roll $5, %esi # rotl32(a,5) |
@@ -544,7 +532,7 @@ sha1_process_block64: | |||
544 | movl %edx, %edi # c | 532 | movl %edx, %edi # c |
545 | xorl %ebp, %edi # ^d | 533 | xorl %ebp, %edi # ^d |
546 | xorl %ecx, %edi # ^b | 534 | xorl %ecx, %edi # ^b |
547 | leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W | 535 | leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + W[n & 15] |
548 | addl %edi, %eax # e += (c ^ d ^ b) | 536 | addl %edi, %eax # e += (c ^ d ^ b) |
549 | movl %ebx, %esi # | 537 | movl %ebx, %esi # |
550 | roll $5, %esi # rotl32(a,5) | 538 | roll $5, %esi # rotl32(a,5) |
@@ -560,7 +548,7 @@ sha1_process_block64: | |||
560 | movl %ecx, %edi # c | 548 | movl %ecx, %edi # c |
561 | xorl %edx, %edi # ^d | 549 | xorl %edx, %edi # ^d |
562 | xorl %ebx, %edi # ^b | 550 | xorl %ebx, %edi # ^b |
563 | leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 551 | leal 0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] |
564 | addl %edi, %ebp # e += (c ^ d ^ b) | 552 | addl %edi, %ebp # e += (c ^ d ^ b) |
565 | movl %eax, %esi # | 553 | movl %eax, %esi # |
566 | roll $5, %esi # rotl32(a,5) | 554 | roll $5, %esi # rotl32(a,5) |
@@ -576,7 +564,7 @@ sha1_process_block64: | |||
576 | movl %ebx, %edi # c | 564 | movl %ebx, %edi # c |
577 | xorl %ecx, %edi # ^d | 565 | xorl %ecx, %edi # ^d |
578 | xorl %eax, %edi # ^b | 566 | xorl %eax, %edi # ^b |
579 | leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W | 567 | leal 0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + W[n & 15] |
580 | addl %edi, %edx # e += (c ^ d ^ b) | 568 | addl %edi, %edx # e += (c ^ d ^ b) |
581 | movl %ebp, %esi # | 569 | movl %ebp, %esi # |
582 | roll $5, %esi # rotl32(a,5) | 570 | roll $5, %esi # rotl32(a,5) |
@@ -592,7 +580,7 @@ sha1_process_block64: | |||
592 | movl %eax, %edi # c | 580 | movl %eax, %edi # c |
593 | xorl %ebx, %edi # ^d | 581 | xorl %ebx, %edi # ^d |
594 | xorl %ebp, %edi # ^b | 582 | xorl %ebp, %edi # ^b |
595 | leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 583 | leal 0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] |
596 | addl %edi, %ecx # e += (c ^ d ^ b) | 584 | addl %edi, %ecx # e += (c ^ d ^ b) |
597 | movl %edx, %esi # | 585 | movl %edx, %esi # |
598 | roll $5, %esi # rotl32(a,5) | 586 | roll $5, %esi # rotl32(a,5) |
@@ -608,7 +596,7 @@ sha1_process_block64: | |||
608 | movl %ebp, %edi # c | 596 | movl %ebp, %edi # c |
609 | xorl %eax, %edi # ^d | 597 | xorl %eax, %edi # ^d |
610 | xorl %edx, %edi # ^b | 598 | xorl %edx, %edi # ^b |
611 | leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 599 | leal 0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] |
612 | addl %edi, %ebx # e += (c ^ d ^ b) | 600 | addl %edi, %ebx # e += (c ^ d ^ b) |
613 | movl %ecx, %esi # | 601 | movl %ecx, %esi # |
614 | roll $5, %esi # rotl32(a,5) | 602 | roll $5, %esi # rotl32(a,5) |
@@ -624,7 +612,7 @@ sha1_process_block64: | |||
624 | movl %edx, %edi # c | 612 | movl %edx, %edi # c |
625 | xorl %ebp, %edi # ^d | 613 | xorl %ebp, %edi # ^d |
626 | xorl %ecx, %edi # ^b | 614 | xorl %ecx, %edi # ^b |
627 | leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W | 615 | leal 0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + W[n & 15] |
628 | addl %edi, %eax # e += (c ^ d ^ b) | 616 | addl %edi, %eax # e += (c ^ d ^ b) |
629 | movl %ebx, %esi # | 617 | movl %ebx, %esi # |
630 | roll $5, %esi # rotl32(a,5) | 618 | roll $5, %esi # rotl32(a,5) |
@@ -637,14 +625,12 @@ sha1_process_block64: | |||
637 | andl %ecx, %esi # si: b & c | 625 | andl %ecx, %esi # si: b & c |
638 | andl %edx, %edi # di: (b | c) & d | 626 | andl %edx, %edi # di: (b | c) & d |
639 | orl %esi, %edi # ((b | c) & d) | (b & c) | 627 | orl %esi, %edi # ((b | c) & d) | (b & c) |
640 | movl -32+4*5(%rsp), %esi # W[(n+13) & 15] | 628 | xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15] |
641 | xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] | 629 | xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15] |
642 | xorl %r10d, %esi # ^W[(n+2) & 15] | 630 | xorl %r10d, %r8d # ^W[(n+2) & 15] |
643 | xorl %r8d, %esi # ^W[n & 15] | 631 | roll %r8d # |
644 | roll %esi # | ||
645 | movl %esi, %r8d # store to W[n & 15] | ||
646 | addl %edi, %ebp # += ((b | c) & d) | (b & c) | 632 | addl %edi, %ebp # += ((b | c) & d) | (b & c) |
647 | leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 633 | leal -0x70E44324(%rbp,%r8), %ebp # e += RCONST + W[n & 15] |
648 | movl %eax, %esi # | 634 | movl %eax, %esi # |
649 | roll $5, %esi # rotl32(a,5) | 635 | roll $5, %esi # rotl32(a,5) |
650 | addl %esi, %ebp # e += rotl32(a,5) | 636 | addl %esi, %ebp # e += rotl32(a,5) |
@@ -656,14 +642,12 @@ sha1_process_block64: | |||
656 | andl %ebx, %esi # si: b & c | 642 | andl %ebx, %esi # si: b & c |
657 | andl %ecx, %edi # di: (b | c) & d | 643 | andl %ecx, %edi # di: (b | c) & d |
658 | orl %esi, %edi # ((b | c) & d) | (b & c) | 644 | orl %esi, %edi # ((b | c) & d) | (b & c) |
659 | movl -32+4*6(%rsp), %esi # W[(n+13) & 15] | 645 | xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15] |
660 | xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] | 646 | xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15] |
661 | xorl %r11d, %esi # ^W[(n+2) & 15] | 647 | xorl %r11d, %r9d # ^W[(n+2) & 15] |
662 | xorl %r9d, %esi # ^W[n & 15] | 648 | roll %r9d # |
663 | roll %esi # | ||
664 | movl %esi, %r9d # store to W[n & 15] | ||
665 | addl %edi, %edx # += ((b | c) & d) | (b & c) | 649 | addl %edi, %edx # += ((b | c) & d) | (b & c) |
666 | leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W | 650 | leal -0x70E44324(%rdx,%r9), %edx # e += RCONST + W[n & 15] |
667 | movl %ebp, %esi # | 651 | movl %ebp, %esi # |
668 | roll $5, %esi # rotl32(a,5) | 652 | roll $5, %esi # rotl32(a,5) |
669 | addl %esi, %edx # e += rotl32(a,5) | 653 | addl %esi, %edx # e += rotl32(a,5) |
@@ -675,14 +659,12 @@ sha1_process_block64: | |||
675 | andl %eax, %esi # si: b & c | 659 | andl %eax, %esi # si: b & c |
676 | andl %ebx, %edi # di: (b | c) & d | 660 | andl %ebx, %edi # di: (b | c) & d |
677 | orl %esi, %edi # ((b | c) & d) | (b & c) | 661 | orl %esi, %edi # ((b | c) & d) | (b & c) |
678 | movl -32+4*7(%rsp), %esi # W[(n+13) & 15] | 662 | xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15] |
679 | xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] | 663 | xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15] |
680 | xorl %r12d, %esi # ^W[(n+2) & 15] | 664 | xorl %r12d, %r10d # ^W[(n+2) & 15] |
681 | xorl %r10d, %esi # ^W[n & 15] | 665 | roll %r10d # |
682 | roll %esi # | ||
683 | movl %esi, %r10d # store to W[n & 15] | ||
684 | addl %edi, %ecx # += ((b | c) & d) | (b & c) | 666 | addl %edi, %ecx # += ((b | c) & d) | (b & c) |
685 | leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 667 | leal -0x70E44324(%rcx,%r10), %ecx # e += RCONST + W[n & 15] |
686 | movl %edx, %esi # | 668 | movl %edx, %esi # |
687 | roll $5, %esi # rotl32(a,5) | 669 | roll $5, %esi # rotl32(a,5) |
688 | addl %esi, %ecx # e += rotl32(a,5) | 670 | addl %esi, %ecx # e += rotl32(a,5) |
@@ -694,14 +676,12 @@ sha1_process_block64: | |||
694 | andl %ebp, %esi # si: b & c | 676 | andl %ebp, %esi # si: b & c |
695 | andl %eax, %edi # di: (b | c) & d | 677 | andl %eax, %edi # di: (b | c) & d |
696 | orl %esi, %edi # ((b | c) & d) | (b & c) | 678 | orl %esi, %edi # ((b | c) & d) | (b & c) |
697 | movl %r8d, %esi # W[(n+13) & 15] | 679 | xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15] |
698 | xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] | 680 | xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15] |
699 | xorl %r13d, %esi # ^W[(n+2) & 15] | 681 | xorl %r13d, %r11d # ^W[(n+2) & 15] |
700 | xorl %r11d, %esi # ^W[n & 15] | 682 | roll %r11d # |
701 | roll %esi # | ||
702 | movl %esi, %r11d # store to W[n & 15] | ||
703 | addl %edi, %ebx # += ((b | c) & d) | (b & c) | 683 | addl %edi, %ebx # += ((b | c) & d) | (b & c) |
704 | leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 684 | leal -0x70E44324(%rbx,%r11), %ebx # e += RCONST + W[n & 15] |
705 | movl %ecx, %esi # | 685 | movl %ecx, %esi # |
706 | roll $5, %esi # rotl32(a,5) | 686 | roll $5, %esi # rotl32(a,5) |
707 | addl %esi, %ebx # e += rotl32(a,5) | 687 | addl %esi, %ebx # e += rotl32(a,5) |
@@ -713,14 +693,12 @@ sha1_process_block64: | |||
713 | andl %edx, %esi # si: b & c | 693 | andl %edx, %esi # si: b & c |
714 | andl %ebp, %edi # di: (b | c) & d | 694 | andl %ebp, %edi # di: (b | c) & d |
715 | orl %esi, %edi # ((b | c) & d) | (b & c) | 695 | orl %esi, %edi # ((b | c) & d) | (b & c) |
716 | movl %r9d, %esi # W[(n+13) & 15] | 696 | xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15] |
717 | xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] | 697 | xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15] |
718 | xorl %r14d, %esi # ^W[(n+2) & 15] | 698 | xorl %r14d, %r12d # ^W[(n+2) & 15] |
719 | xorl %r12d, %esi # ^W[n & 15] | 699 | roll %r12d # |
720 | roll %esi # | ||
721 | movl %esi, %r12d # store to W[n & 15] | ||
722 | addl %edi, %eax # += ((b | c) & d) | (b & c) | 700 | addl %edi, %eax # += ((b | c) & d) | (b & c) |
723 | leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W | 701 | leal -0x70E44324(%rax,%r12), %eax # e += RCONST + W[n & 15] |
724 | movl %ebx, %esi # | 702 | movl %ebx, %esi # |
725 | roll $5, %esi # rotl32(a,5) | 703 | roll $5, %esi # rotl32(a,5) |
726 | addl %esi, %eax # e += rotl32(a,5) | 704 | addl %esi, %eax # e += rotl32(a,5) |
@@ -732,14 +710,12 @@ sha1_process_block64: | |||
732 | andl %ecx, %esi # si: b & c | 710 | andl %ecx, %esi # si: b & c |
733 | andl %edx, %edi # di: (b | c) & d | 711 | andl %edx, %edi # di: (b | c) & d |
734 | orl %esi, %edi # ((b | c) & d) | (b & c) | 712 | orl %esi, %edi # ((b | c) & d) | (b & c) |
735 | movl %r10d, %esi # W[(n+13) & 15] | 713 | xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15] |
736 | xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] | 714 | xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15] |
737 | xorl %r15d, %esi # ^W[(n+2) & 15] | 715 | xorl %r15d, %r13d # ^W[(n+2) & 15] |
738 | xorl %r13d, %esi # ^W[n & 15] | 716 | roll %r13d # |
739 | roll %esi # | ||
740 | movl %esi, %r13d # store to W[n & 15] | ||
741 | addl %edi, %ebp # += ((b | c) & d) | (b & c) | 717 | addl %edi, %ebp # += ((b | c) & d) | (b & c) |
742 | leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 718 | leal -0x70E44324(%rbp,%r13), %ebp # e += RCONST + W[n & 15] |
743 | movl %eax, %esi # | 719 | movl %eax, %esi # |
744 | roll $5, %esi # rotl32(a,5) | 720 | roll $5, %esi # rotl32(a,5) |
745 | addl %esi, %ebp # e += rotl32(a,5) | 721 | addl %esi, %ebp # e += rotl32(a,5) |
@@ -751,14 +727,12 @@ sha1_process_block64: | |||
751 | andl %ebx, %esi # si: b & c | 727 | andl %ebx, %esi # si: b & c |
752 | andl %ecx, %edi # di: (b | c) & d | 728 | andl %ecx, %edi # di: (b | c) & d |
753 | orl %esi, %edi # ((b | c) & d) | (b & c) | 729 | orl %esi, %edi # ((b | c) & d) | (b & c) |
754 | movl %r11d, %esi # W[(n+13) & 15] | 730 | xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15] |
755 | xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] | 731 | xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15] |
756 | xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] | 732 | xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15] |
757 | xorl %r14d, %esi # ^W[n & 15] | 733 | roll %r14d # |
758 | roll %esi # | ||
759 | movl %esi, %r14d # store to W[n & 15] | ||
760 | addl %edi, %edx # += ((b | c) & d) | (b & c) | 734 | addl %edi, %edx # += ((b | c) & d) | (b & c) |
761 | leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W | 735 | leal -0x70E44324(%rdx,%r14), %edx # e += RCONST + W[n & 15] |
762 | movl %ebp, %esi # | 736 | movl %ebp, %esi # |
763 | roll $5, %esi # rotl32(a,5) | 737 | roll $5, %esi # rotl32(a,5) |
764 | addl %esi, %edx # e += rotl32(a,5) | 738 | addl %esi, %edx # e += rotl32(a,5) |
@@ -770,14 +744,12 @@ sha1_process_block64: | |||
770 | andl %eax, %esi # si: b & c | 744 | andl %eax, %esi # si: b & c |
771 | andl %ebx, %edi # di: (b | c) & d | 745 | andl %ebx, %edi # di: (b | c) & d |
772 | orl %esi, %edi # ((b | c) & d) | (b & c) | 746 | orl %esi, %edi # ((b | c) & d) | (b & c) |
773 | movl %r12d, %esi # W[(n+13) & 15] | 747 | xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15] |
774 | xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] | 748 | xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15] |
775 | xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] | 749 | xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15] |
776 | xorl %r15d, %esi # ^W[n & 15] | 750 | roll %r15d # |
777 | roll %esi # | ||
778 | movl %esi, %r15d # store to W[n & 15] | ||
779 | addl %edi, %ecx # += ((b | c) & d) | (b & c) | 751 | addl %edi, %ecx # += ((b | c) & d) | (b & c) |
780 | leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 752 | leal -0x70E44324(%rcx,%r15), %ecx # e += RCONST + W[n & 15] |
781 | movl %edx, %esi # | 753 | movl %edx, %esi # |
782 | roll $5, %esi # rotl32(a,5) | 754 | roll $5, %esi # rotl32(a,5) |
783 | addl %esi, %ecx # e += rotl32(a,5) | 755 | addl %esi, %ecx # e += rotl32(a,5) |
@@ -796,7 +768,7 @@ sha1_process_block64: | |||
796 | roll %esi # | 768 | roll %esi # |
797 | movl %esi, -32+4*0(%rsp) # store to W[n & 15] | 769 | movl %esi, -32+4*0(%rsp) # store to W[n & 15] |
798 | addl %edi, %ebx # += ((b | c) & d) | (b & c) | 770 | addl %edi, %ebx # += ((b | c) & d) | (b & c) |
799 | leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 771 | leal -0x70E44324(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] |
800 | movl %ecx, %esi # | 772 | movl %ecx, %esi # |
801 | roll $5, %esi # rotl32(a,5) | 773 | roll $5, %esi # rotl32(a,5) |
802 | addl %esi, %ebx # e += rotl32(a,5) | 774 | addl %esi, %ebx # e += rotl32(a,5) |
@@ -815,7 +787,7 @@ sha1_process_block64: | |||
815 | roll %esi # | 787 | roll %esi # |
816 | movl %esi, -32+4*1(%rsp) # store to W[n & 15] | 788 | movl %esi, -32+4*1(%rsp) # store to W[n & 15] |
817 | addl %edi, %eax # += ((b | c) & d) | (b & c) | 789 | addl %edi, %eax # += ((b | c) & d) | (b & c) |
818 | leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W | 790 | leal -0x70E44324(%rax,%rsi), %eax # e += RCONST + W[n & 15] |
819 | movl %ebx, %esi # | 791 | movl %ebx, %esi # |
820 | roll $5, %esi # rotl32(a,5) | 792 | roll $5, %esi # rotl32(a,5) |
821 | addl %esi, %eax # e += rotl32(a,5) | 793 | addl %esi, %eax # e += rotl32(a,5) |
@@ -834,7 +806,7 @@ sha1_process_block64: | |||
834 | roll %esi # | 806 | roll %esi # |
835 | movl %esi, -32+4*2(%rsp) # store to W[n & 15] | 807 | movl %esi, -32+4*2(%rsp) # store to W[n & 15] |
836 | addl %edi, %ebp # += ((b | c) & d) | (b & c) | 808 | addl %edi, %ebp # += ((b | c) & d) | (b & c) |
837 | leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 809 | leal -0x70E44324(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] |
838 | movl %eax, %esi # | 810 | movl %eax, %esi # |
839 | roll $5, %esi # rotl32(a,5) | 811 | roll $5, %esi # rotl32(a,5) |
840 | addl %esi, %ebp # e += rotl32(a,5) | 812 | addl %esi, %ebp # e += rotl32(a,5) |
@@ -853,7 +825,7 @@ sha1_process_block64: | |||
853 | roll %esi # | 825 | roll %esi # |
854 | movl %esi, -32+4*3(%rsp) # store to W[n & 15] | 826 | movl %esi, -32+4*3(%rsp) # store to W[n & 15] |
855 | addl %edi, %edx # += ((b | c) & d) | (b & c) | 827 | addl %edi, %edx # += ((b | c) & d) | (b & c) |
856 | leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W | 828 | leal -0x70E44324(%rdx,%rsi), %edx # e += RCONST + W[n & 15] |
857 | movl %ebp, %esi # | 829 | movl %ebp, %esi # |
858 | roll $5, %esi # rotl32(a,5) | 830 | roll $5, %esi # rotl32(a,5) |
859 | addl %esi, %edx # e += rotl32(a,5) | 831 | addl %esi, %edx # e += rotl32(a,5) |
@@ -872,7 +844,7 @@ sha1_process_block64: | |||
872 | roll %esi # | 844 | roll %esi # |
873 | movl %esi, -32+4*4(%rsp) # store to W[n & 15] | 845 | movl %esi, -32+4*4(%rsp) # store to W[n & 15] |
874 | addl %edi, %ecx # += ((b | c) & d) | (b & c) | 846 | addl %edi, %ecx # += ((b | c) & d) | (b & c) |
875 | leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 847 | leal -0x70E44324(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] |
876 | movl %edx, %esi # | 848 | movl %edx, %esi # |
877 | roll $5, %esi # rotl32(a,5) | 849 | roll $5, %esi # rotl32(a,5) |
878 | addl %esi, %ecx # e += rotl32(a,5) | 850 | addl %esi, %ecx # e += rotl32(a,5) |
@@ -891,7 +863,7 @@ sha1_process_block64: | |||
891 | roll %esi # | 863 | roll %esi # |
892 | movl %esi, -32+4*5(%rsp) # store to W[n & 15] | 864 | movl %esi, -32+4*5(%rsp) # store to W[n & 15] |
893 | addl %edi, %ebx # += ((b | c) & d) | (b & c) | 865 | addl %edi, %ebx # += ((b | c) & d) | (b & c) |
894 | leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 866 | leal -0x70E44324(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] |
895 | movl %ecx, %esi # | 867 | movl %ecx, %esi # |
896 | roll $5, %esi # rotl32(a,5) | 868 | roll $5, %esi # rotl32(a,5) |
897 | addl %esi, %ebx # e += rotl32(a,5) | 869 | addl %esi, %ebx # e += rotl32(a,5) |
@@ -910,7 +882,7 @@ sha1_process_block64: | |||
910 | roll %esi # | 882 | roll %esi # |
911 | movl %esi, -32+4*6(%rsp) # store to W[n & 15] | 883 | movl %esi, -32+4*6(%rsp) # store to W[n & 15] |
912 | addl %edi, %eax # += ((b | c) & d) | (b & c) | 884 | addl %edi, %eax # += ((b | c) & d) | (b & c) |
913 | leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W | 885 | leal -0x70E44324(%rax,%rsi), %eax # e += RCONST + W[n & 15] |
914 | movl %ebx, %esi # | 886 | movl %ebx, %esi # |
915 | roll $5, %esi # rotl32(a,5) | 887 | roll $5, %esi # rotl32(a,5) |
916 | addl %esi, %eax # e += rotl32(a,5) | 888 | addl %esi, %eax # e += rotl32(a,5) |
@@ -929,7 +901,7 @@ sha1_process_block64: | |||
929 | roll %esi # | 901 | roll %esi # |
930 | movl %esi, -32+4*7(%rsp) # store to W[n & 15] | 902 | movl %esi, -32+4*7(%rsp) # store to W[n & 15] |
931 | addl %edi, %ebp # += ((b | c) & d) | (b & c) | 903 | addl %edi, %ebp # += ((b | c) & d) | (b & c) |
932 | leal -0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 904 | leal -0x70E44324(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] |
933 | movl %eax, %esi # | 905 | movl %eax, %esi # |
934 | roll $5, %esi # rotl32(a,5) | 906 | roll $5, %esi # rotl32(a,5) |
935 | addl %esi, %ebp # e += rotl32(a,5) | 907 | addl %esi, %ebp # e += rotl32(a,5) |
@@ -941,14 +913,12 @@ sha1_process_block64: | |||
941 | andl %ebx, %esi # si: b & c | 913 | andl %ebx, %esi # si: b & c |
942 | andl %ecx, %edi # di: (b | c) & d | 914 | andl %ecx, %edi # di: (b | c) & d |
943 | orl %esi, %edi # ((b | c) & d) | (b & c) | 915 | orl %esi, %edi # ((b | c) & d) | (b & c) |
944 | movl -32+4*5(%rsp), %esi # W[(n+13) & 15] | 916 | xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15] |
945 | xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] | 917 | xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15] |
946 | xorl %r10d, %esi # ^W[(n+2) & 15] | 918 | xorl %r10d, %r8d # ^W[(n+2) & 15] |
947 | xorl %r8d, %esi # ^W[n & 15] | 919 | roll %r8d # |
948 | roll %esi # | ||
949 | movl %esi, %r8d # store to W[n & 15] | ||
950 | addl %edi, %edx # += ((b | c) & d) | (b & c) | 920 | addl %edi, %edx # += ((b | c) & d) | (b & c) |
951 | leal -0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W | 921 | leal -0x70E44324(%rdx,%r8), %edx # e += RCONST + W[n & 15] |
952 | movl %ebp, %esi # | 922 | movl %ebp, %esi # |
953 | roll $5, %esi # rotl32(a,5) | 923 | roll $5, %esi # rotl32(a,5) |
954 | addl %esi, %edx # e += rotl32(a,5) | 924 | addl %esi, %edx # e += rotl32(a,5) |
@@ -960,14 +930,12 @@ sha1_process_block64: | |||
960 | andl %eax, %esi # si: b & c | 930 | andl %eax, %esi # si: b & c |
961 | andl %ebx, %edi # di: (b | c) & d | 931 | andl %ebx, %edi # di: (b | c) & d |
962 | orl %esi, %edi # ((b | c) & d) | (b & c) | 932 | orl %esi, %edi # ((b | c) & d) | (b & c) |
963 | movl -32+4*6(%rsp), %esi # W[(n+13) & 15] | 933 | xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15] |
964 | xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] | 934 | xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15] |
965 | xorl %r11d, %esi # ^W[(n+2) & 15] | 935 | xorl %r11d, %r9d # ^W[(n+2) & 15] |
966 | xorl %r9d, %esi # ^W[n & 15] | 936 | roll %r9d # |
967 | roll %esi # | ||
968 | movl %esi, %r9d # store to W[n & 15] | ||
969 | addl %edi, %ecx # += ((b | c) & d) | (b & c) | 937 | addl %edi, %ecx # += ((b | c) & d) | (b & c) |
970 | leal -0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 938 | leal -0x70E44324(%rcx,%r9), %ecx # e += RCONST + W[n & 15] |
971 | movl %edx, %esi # | 939 | movl %edx, %esi # |
972 | roll $5, %esi # rotl32(a,5) | 940 | roll $5, %esi # rotl32(a,5) |
973 | addl %esi, %ecx # e += rotl32(a,5) | 941 | addl %esi, %ecx # e += rotl32(a,5) |
@@ -979,14 +947,12 @@ sha1_process_block64: | |||
979 | andl %ebp, %esi # si: b & c | 947 | andl %ebp, %esi # si: b & c |
980 | andl %eax, %edi # di: (b | c) & d | 948 | andl %eax, %edi # di: (b | c) & d |
981 | orl %esi, %edi # ((b | c) & d) | (b & c) | 949 | orl %esi, %edi # ((b | c) & d) | (b & c) |
982 | movl -32+4*7(%rsp), %esi # W[(n+13) & 15] | 950 | xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15] |
983 | xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] | 951 | xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15] |
984 | xorl %r12d, %esi # ^W[(n+2) & 15] | 952 | xorl %r12d, %r10d # ^W[(n+2) & 15] |
985 | xorl %r10d, %esi # ^W[n & 15] | 953 | roll %r10d # |
986 | roll %esi # | ||
987 | movl %esi, %r10d # store to W[n & 15] | ||
988 | addl %edi, %ebx # += ((b | c) & d) | (b & c) | 954 | addl %edi, %ebx # += ((b | c) & d) | (b & c) |
989 | leal -0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 955 | leal -0x70E44324(%rbx,%r10), %ebx # e += RCONST + W[n & 15] |
990 | movl %ecx, %esi # | 956 | movl %ecx, %esi # |
991 | roll $5, %esi # rotl32(a,5) | 957 | roll $5, %esi # rotl32(a,5) |
992 | addl %esi, %ebx # e += rotl32(a,5) | 958 | addl %esi, %ebx # e += rotl32(a,5) |
@@ -998,77 +964,67 @@ sha1_process_block64: | |||
998 | andl %edx, %esi # si: b & c | 964 | andl %edx, %esi # si: b & c |
999 | andl %ebp, %edi # di: (b | c) & d | 965 | andl %ebp, %edi # di: (b | c) & d |
1000 | orl %esi, %edi # ((b | c) & d) | (b & c) | 966 | orl %esi, %edi # ((b | c) & d) | (b & c) |
1001 | movl %r8d, %esi # W[(n+13) & 15] | 967 | xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15] |
1002 | xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] | 968 | xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15] |
1003 | xorl %r13d, %esi # ^W[(n+2) & 15] | 969 | xorl %r13d, %r11d # ^W[(n+2) & 15] |
1004 | xorl %r11d, %esi # ^W[n & 15] | 970 | roll %r11d # |
1005 | roll %esi # | ||
1006 | movl %esi, %r11d # store to W[n & 15] | ||
1007 | addl %edi, %eax # += ((b | c) & d) | (b & c) | 971 | addl %edi, %eax # += ((b | c) & d) | (b & c) |
1008 | leal -0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W | 972 | leal -0x70E44324(%rax,%r11), %eax # e += RCONST + W[n & 15] |
1009 | movl %ebx, %esi # | 973 | movl %ebx, %esi # |
1010 | roll $5, %esi # rotl32(a,5) | 974 | roll $5, %esi # rotl32(a,5) |
1011 | addl %esi, %eax # e += rotl32(a,5) | 975 | addl %esi, %eax # e += rotl32(a,5) |
1012 | rorl $2, %ecx # b = rotl32(b,30) | 976 | rorl $2, %ecx # b = rotl32(b,30) |
1013 | # 60 | 977 | # 60 |
1014 | movl %r9d, %esi # W[(n+13) & 15] | 978 | xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15] |
1015 | xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] | 979 | xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15] |
1016 | xorl %r14d, %esi # ^W[(n+2) & 15] | 980 | xorl %r14d, %r12d # ^W[(n+2) & 15] |
1017 | xorl %r12d, %esi # ^W[n & 15] | 981 | roll %r12d # |
1018 | roll %esi # | ||
1019 | movl %esi, %r12d # store to W[n & 15] | ||
1020 | movl %ecx, %edi # c | 982 | movl %ecx, %edi # c |
1021 | xorl %edx, %edi # ^d | 983 | xorl %edx, %edi # ^d |
1022 | xorl %ebx, %edi # ^b | 984 | xorl %ebx, %edi # ^b |
1023 | leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 985 | leal -0x359D3E2A(%rbp,%r12), %ebp # e += RCONST + W[n & 15] |
1024 | addl %edi, %ebp # e += (c ^ d ^ b) | 986 | addl %edi, %ebp # e += (c ^ d ^ b) |
1025 | movl %eax, %esi # | 987 | movl %eax, %esi # |
1026 | roll $5, %esi # rotl32(a,5) | 988 | roll $5, %esi # rotl32(a,5) |
1027 | addl %esi, %ebp # e += rotl32(a,5) | 989 | addl %esi, %ebp # e += rotl32(a,5) |
1028 | rorl $2, %ebx # b = rotl32(b,30) | 990 | rorl $2, %ebx # b = rotl32(b,30) |
1029 | # 61 | 991 | # 61 |
1030 | movl %r10d, %esi # W[(n+13) & 15] | 992 | xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15] |
1031 | xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] | 993 | xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15] |
1032 | xorl %r15d, %esi # ^W[(n+2) & 15] | 994 | xorl %r15d, %r13d # ^W[(n+2) & 15] |
1033 | xorl %r13d, %esi # ^W[n & 15] | 995 | roll %r13d # |
1034 | roll %esi # | ||
1035 | movl %esi, %r13d # store to W[n & 15] | ||
1036 | movl %ebx, %edi # c | 996 | movl %ebx, %edi # c |
1037 | xorl %ecx, %edi # ^d | 997 | xorl %ecx, %edi # ^d |
1038 | xorl %eax, %edi # ^b | 998 | xorl %eax, %edi # ^b |
1039 | leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W | 999 | leal -0x359D3E2A(%rdx,%r13), %edx # e += RCONST + W[n & 15] |
1040 | addl %edi, %edx # e += (c ^ d ^ b) | 1000 | addl %edi, %edx # e += (c ^ d ^ b) |
1041 | movl %ebp, %esi # | 1001 | movl %ebp, %esi # |
1042 | roll $5, %esi # rotl32(a,5) | 1002 | roll $5, %esi # rotl32(a,5) |
1043 | addl %esi, %edx # e += rotl32(a,5) | 1003 | addl %esi, %edx # e += rotl32(a,5) |
1044 | rorl $2, %eax # b = rotl32(b,30) | 1004 | rorl $2, %eax # b = rotl32(b,30) |
1045 | # 62 | 1005 | # 62 |
1046 | movl %r11d, %esi # W[(n+13) & 15] | 1006 | xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15] |
1047 | xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] | 1007 | xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15] |
1048 | xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] | 1008 | xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15] |
1049 | xorl %r14d, %esi # ^W[n & 15] | 1009 | roll %r14d # |
1050 | roll %esi # | ||
1051 | movl %esi, %r14d # store to W[n & 15] | ||
1052 | movl %eax, %edi # c | 1010 | movl %eax, %edi # c |
1053 | xorl %ebx, %edi # ^d | 1011 | xorl %ebx, %edi # ^d |
1054 | xorl %ebp, %edi # ^b | 1012 | xorl %ebp, %edi # ^b |
1055 | leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 1013 | leal -0x359D3E2A(%rcx,%r14), %ecx # e += RCONST + W[n & 15] |
1056 | addl %edi, %ecx # e += (c ^ d ^ b) | 1014 | addl %edi, %ecx # e += (c ^ d ^ b) |
1057 | movl %edx, %esi # | 1015 | movl %edx, %esi # |
1058 | roll $5, %esi # rotl32(a,5) | 1016 | roll $5, %esi # rotl32(a,5) |
1059 | addl %esi, %ecx # e += rotl32(a,5) | 1017 | addl %esi, %ecx # e += rotl32(a,5) |
1060 | rorl $2, %ebp # b = rotl32(b,30) | 1018 | rorl $2, %ebp # b = rotl32(b,30) |
1061 | # 63 | 1019 | # 63 |
1062 | movl %r12d, %esi # W[(n+13) & 15] | 1020 | xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15] |
1063 | xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] | 1021 | xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15] |
1064 | xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] | 1022 | xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15] |
1065 | xorl %r15d, %esi # ^W[n & 15] | 1023 | roll %r15d # |
1066 | roll %esi # | ||
1067 | movl %esi, %r15d # store to W[n & 15] | ||
1068 | movl %ebp, %edi # c | 1024 | movl %ebp, %edi # c |
1069 | xorl %eax, %edi # ^d | 1025 | xorl %eax, %edi # ^d |
1070 | xorl %edx, %edi # ^b | 1026 | xorl %edx, %edi # ^b |
1071 | leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 1027 | leal -0x359D3E2A(%rbx,%r15), %ebx # e += RCONST + W[n & 15] |
1072 | addl %edi, %ebx # e += (c ^ d ^ b) | 1028 | addl %edi, %ebx # e += (c ^ d ^ b) |
1073 | movl %ecx, %esi # | 1029 | movl %ecx, %esi # |
1074 | roll $5, %esi # rotl32(a,5) | 1030 | roll $5, %esi # rotl32(a,5) |
@@ -1084,7 +1040,7 @@ sha1_process_block64: | |||
1084 | movl %edx, %edi # c | 1040 | movl %edx, %edi # c |
1085 | xorl %ebp, %edi # ^d | 1041 | xorl %ebp, %edi # ^d |
1086 | xorl %ecx, %edi # ^b | 1042 | xorl %ecx, %edi # ^b |
1087 | leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W | 1043 | leal -0x359D3E2A(%rax,%rsi), %eax # e += RCONST + W[n & 15] |
1088 | addl %edi, %eax # e += (c ^ d ^ b) | 1044 | addl %edi, %eax # e += (c ^ d ^ b) |
1089 | movl %ebx, %esi # | 1045 | movl %ebx, %esi # |
1090 | roll $5, %esi # rotl32(a,5) | 1046 | roll $5, %esi # rotl32(a,5) |
@@ -1100,7 +1056,7 @@ sha1_process_block64: | |||
1100 | movl %ecx, %edi # c | 1056 | movl %ecx, %edi # c |
1101 | xorl %edx, %edi # ^d | 1057 | xorl %edx, %edi # ^d |
1102 | xorl %ebx, %edi # ^b | 1058 | xorl %ebx, %edi # ^b |
1103 | leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 1059 | leal -0x359D3E2A(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] |
1104 | addl %edi, %ebp # e += (c ^ d ^ b) | 1060 | addl %edi, %ebp # e += (c ^ d ^ b) |
1105 | movl %eax, %esi # | 1061 | movl %eax, %esi # |
1106 | roll $5, %esi # rotl32(a,5) | 1062 | roll $5, %esi # rotl32(a,5) |
@@ -1116,7 +1072,7 @@ sha1_process_block64: | |||
1116 | movl %ebx, %edi # c | 1072 | movl %ebx, %edi # c |
1117 | xorl %ecx, %edi # ^d | 1073 | xorl %ecx, %edi # ^d |
1118 | xorl %eax, %edi # ^b | 1074 | xorl %eax, %edi # ^b |
1119 | leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W | 1075 | leal -0x359D3E2A(%rdx,%rsi), %edx # e += RCONST + W[n & 15] |
1120 | addl %edi, %edx # e += (c ^ d ^ b) | 1076 | addl %edi, %edx # e += (c ^ d ^ b) |
1121 | movl %ebp, %esi # | 1077 | movl %ebp, %esi # |
1122 | roll $5, %esi # rotl32(a,5) | 1078 | roll $5, %esi # rotl32(a,5) |
@@ -1132,7 +1088,7 @@ sha1_process_block64: | |||
1132 | movl %eax, %edi # c | 1088 | movl %eax, %edi # c |
1133 | xorl %ebx, %edi # ^d | 1089 | xorl %ebx, %edi # ^d |
1134 | xorl %ebp, %edi # ^b | 1090 | xorl %ebp, %edi # ^b |
1135 | leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 1091 | leal -0x359D3E2A(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] |
1136 | addl %edi, %ecx # e += (c ^ d ^ b) | 1092 | addl %edi, %ecx # e += (c ^ d ^ b) |
1137 | movl %edx, %esi # | 1093 | movl %edx, %esi # |
1138 | roll $5, %esi # rotl32(a,5) | 1094 | roll $5, %esi # rotl32(a,5) |
@@ -1148,7 +1104,7 @@ sha1_process_block64: | |||
1148 | movl %ebp, %edi # c | 1104 | movl %ebp, %edi # c |
1149 | xorl %eax, %edi # ^d | 1105 | xorl %eax, %edi # ^d |
1150 | xorl %edx, %edi # ^b | 1106 | xorl %edx, %edi # ^b |
1151 | leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 1107 | leal -0x359D3E2A(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] |
1152 | addl %edi, %ebx # e += (c ^ d ^ b) | 1108 | addl %edi, %ebx # e += (c ^ d ^ b) |
1153 | movl %ecx, %esi # | 1109 | movl %ecx, %esi # |
1154 | roll $5, %esi # rotl32(a,5) | 1110 | roll $5, %esi # rotl32(a,5) |
@@ -1164,7 +1120,7 @@ sha1_process_block64: | |||
1164 | movl %edx, %edi # c | 1120 | movl %edx, %edi # c |
1165 | xorl %ebp, %edi # ^d | 1121 | xorl %ebp, %edi # ^d |
1166 | xorl %ecx, %edi # ^b | 1122 | xorl %ecx, %edi # ^b |
1167 | leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W | 1123 | leal -0x359D3E2A(%rax,%rsi), %eax # e += RCONST + W[n & 15] |
1168 | addl %edi, %eax # e += (c ^ d ^ b) | 1124 | addl %edi, %eax # e += (c ^ d ^ b) |
1169 | movl %ebx, %esi # | 1125 | movl %ebx, %esi # |
1170 | roll $5, %esi # rotl32(a,5) | 1126 | roll $5, %esi # rotl32(a,5) |
@@ -1180,7 +1136,7 @@ sha1_process_block64: | |||
1180 | movl %ecx, %edi # c | 1136 | movl %ecx, %edi # c |
1181 | xorl %edx, %edi # ^d | 1137 | xorl %edx, %edi # ^d |
1182 | xorl %ebx, %edi # ^b | 1138 | xorl %ebx, %edi # ^b |
1183 | leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 1139 | leal -0x359D3E2A(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] |
1184 | addl %edi, %ebp # e += (c ^ d ^ b) | 1140 | addl %edi, %ebp # e += (c ^ d ^ b) |
1185 | movl %eax, %esi # | 1141 | movl %eax, %esi # |
1186 | roll $5, %esi # rotl32(a,5) | 1142 | roll $5, %esi # rotl32(a,5) |
@@ -1196,135 +1152,119 @@ sha1_process_block64: | |||
1196 | movl %ebx, %edi # c | 1152 | movl %ebx, %edi # c |
1197 | xorl %ecx, %edi # ^d | 1153 | xorl %ecx, %edi # ^d |
1198 | xorl %eax, %edi # ^b | 1154 | xorl %eax, %edi # ^b |
1199 | leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W | 1155 | leal -0x359D3E2A(%rdx,%rsi), %edx # e += RCONST + W[n & 15] |
1200 | addl %edi, %edx # e += (c ^ d ^ b) | 1156 | addl %edi, %edx # e += (c ^ d ^ b) |
1201 | movl %ebp, %esi # | 1157 | movl %ebp, %esi # |
1202 | roll $5, %esi # rotl32(a,5) | 1158 | roll $5, %esi # rotl32(a,5) |
1203 | addl %esi, %edx # e += rotl32(a,5) | 1159 | addl %esi, %edx # e += rotl32(a,5) |
1204 | rorl $2, %eax # b = rotl32(b,30) | 1160 | rorl $2, %eax # b = rotl32(b,30) |
1205 | # 72 | 1161 | # 72 |
1206 | movl -32+4*5(%rsp), %esi # W[(n+13) & 15] | 1162 | xorl -32+4*5(%rsp), %r8d # W[n & 15] ^= W[(n+13) & 15] |
1207 | xorl -32+4*0(%rsp), %esi # ^W[(n+8) & 15] | 1163 | xorl -32+4*0(%rsp), %r8d # ^W[(n+8) & 15] |
1208 | xorl %r10d, %esi # ^W[(n+2) & 15] | 1164 | xorl %r10d, %r8d # ^W[(n+2) & 15] |
1209 | xorl %r8d, %esi # ^W[n & 15] | 1165 | roll %r8d # |
1210 | roll %esi # | ||
1211 | movl %esi, %r8d # store to W[n & 15] | ||
1212 | movl %eax, %edi # c | 1166 | movl %eax, %edi # c |
1213 | xorl %ebx, %edi # ^d | 1167 | xorl %ebx, %edi # ^d |
1214 | xorl %ebp, %edi # ^b | 1168 | xorl %ebp, %edi # ^b |
1215 | leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 1169 | leal -0x359D3E2A(%rcx,%r8), %ecx # e += RCONST + W[n & 15] |
1216 | addl %edi, %ecx # e += (c ^ d ^ b) | 1170 | addl %edi, %ecx # e += (c ^ d ^ b) |
1217 | movl %edx, %esi # | 1171 | movl %edx, %esi # |
1218 | roll $5, %esi # rotl32(a,5) | 1172 | roll $5, %esi # rotl32(a,5) |
1219 | addl %esi, %ecx # e += rotl32(a,5) | 1173 | addl %esi, %ecx # e += rotl32(a,5) |
1220 | rorl $2, %ebp # b = rotl32(b,30) | 1174 | rorl $2, %ebp # b = rotl32(b,30) |
1221 | # 73 | 1175 | # 73 |
1222 | movl -32+4*6(%rsp), %esi # W[(n+13) & 15] | 1176 | xorl -32+4*6(%rsp), %r9d # W[n & 15] ^= W[(n+13) & 15] |
1223 | xorl -32+4*1(%rsp), %esi # ^W[(n+8) & 15] | 1177 | xorl -32+4*1(%rsp), %r9d # ^W[(n+8) & 15] |
1224 | xorl %r11d, %esi # ^W[(n+2) & 15] | 1178 | xorl %r11d, %r9d # ^W[(n+2) & 15] |
1225 | xorl %r9d, %esi # ^W[n & 15] | 1179 | roll %r9d # |
1226 | roll %esi # | ||
1227 | movl %esi, %r9d # store to W[n & 15] | ||
1228 | movl %ebp, %edi # c | 1180 | movl %ebp, %edi # c |
1229 | xorl %eax, %edi # ^d | 1181 | xorl %eax, %edi # ^d |
1230 | xorl %edx, %edi # ^b | 1182 | xorl %edx, %edi # ^b |
1231 | leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 1183 | leal -0x359D3E2A(%rbx,%r9), %ebx # e += RCONST + W[n & 15] |
1232 | addl %edi, %ebx # e += (c ^ d ^ b) | 1184 | addl %edi, %ebx # e += (c ^ d ^ b) |
1233 | movl %ecx, %esi # | 1185 | movl %ecx, %esi # |
1234 | roll $5, %esi # rotl32(a,5) | 1186 | roll $5, %esi # rotl32(a,5) |
1235 | addl %esi, %ebx # e += rotl32(a,5) | 1187 | addl %esi, %ebx # e += rotl32(a,5) |
1236 | rorl $2, %edx # b = rotl32(b,30) | 1188 | rorl $2, %edx # b = rotl32(b,30) |
1237 | # 74 | 1189 | # 74 |
1238 | movl -32+4*7(%rsp), %esi # W[(n+13) & 15] | 1190 | xorl -32+4*7(%rsp), %r10d # W[n & 15] ^= W[(n+13) & 15] |
1239 | xorl -32+4*2(%rsp), %esi # ^W[(n+8) & 15] | 1191 | xorl -32+4*2(%rsp), %r10d # ^W[(n+8) & 15] |
1240 | xorl %r12d, %esi # ^W[(n+2) & 15] | 1192 | xorl %r12d, %r10d # ^W[(n+2) & 15] |
1241 | xorl %r10d, %esi # ^W[n & 15] | 1193 | roll %r10d # |
1242 | roll %esi # | ||
1243 | movl %esi, %r10d # store to W[n & 15] | ||
1244 | movl %edx, %edi # c | 1194 | movl %edx, %edi # c |
1245 | xorl %ebp, %edi # ^d | 1195 | xorl %ebp, %edi # ^d |
1246 | xorl %ecx, %edi # ^b | 1196 | xorl %ecx, %edi # ^b |
1247 | leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W | 1197 | leal -0x359D3E2A(%rax,%r10), %eax # e += RCONST + W[n & 15] |
1248 | addl %edi, %eax # e += (c ^ d ^ b) | 1198 | addl %edi, %eax # e += (c ^ d ^ b) |
1249 | movl %ebx, %esi # | 1199 | movl %ebx, %esi # |
1250 | roll $5, %esi # rotl32(a,5) | 1200 | roll $5, %esi # rotl32(a,5) |
1251 | addl %esi, %eax # e += rotl32(a,5) | 1201 | addl %esi, %eax # e += rotl32(a,5) |
1252 | rorl $2, %ecx # b = rotl32(b,30) | 1202 | rorl $2, %ecx # b = rotl32(b,30) |
1253 | # 75 | 1203 | # 75 |
1254 | movl %r8d, %esi # W[(n+13) & 15] | 1204 | xorl %r8d, %r11d # W[n & 15] ^= W[(n+13) & 15] |
1255 | xorl -32+4*3(%rsp), %esi # ^W[(n+8) & 15] | 1205 | xorl -32+4*3(%rsp), %r11d # ^W[(n+8) & 15] |
1256 | xorl %r13d, %esi # ^W[(n+2) & 15] | 1206 | xorl %r13d, %r11d # ^W[(n+2) & 15] |
1257 | xorl %r11d, %esi # ^W[n & 15] | 1207 | roll %r11d # |
1258 | roll %esi # | ||
1259 | movl %esi, %r11d # store to W[n & 15] | ||
1260 | movl %ecx, %edi # c | 1208 | movl %ecx, %edi # c |
1261 | xorl %edx, %edi # ^d | 1209 | xorl %edx, %edi # ^d |
1262 | xorl %ebx, %edi # ^b | 1210 | xorl %ebx, %edi # ^b |
1263 | leal -0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W | 1211 | leal -0x359D3E2A(%rbp,%r11), %ebp # e += RCONST + W[n & 15] |
1264 | addl %edi, %ebp # e += (c ^ d ^ b) | 1212 | addl %edi, %ebp # e += (c ^ d ^ b) |
1265 | movl %eax, %esi # | 1213 | movl %eax, %esi # |
1266 | roll $5, %esi # rotl32(a,5) | 1214 | roll $5, %esi # rotl32(a,5) |
1267 | addl %esi, %ebp # e += rotl32(a,5) | 1215 | addl %esi, %ebp # e += rotl32(a,5) |
1268 | rorl $2, %ebx # b = rotl32(b,30) | 1216 | rorl $2, %ebx # b = rotl32(b,30) |
1269 | # 76 | 1217 | # 76 |
1270 | movl %r9d, %esi # W[(n+13) & 15] | 1218 | xorl %r9d, %r12d # W[n & 15] ^= W[(n+13) & 15] |
1271 | xorl -32+4*4(%rsp), %esi # ^W[(n+8) & 15] | 1219 | xorl -32+4*4(%rsp), %r12d # ^W[(n+8) & 15] |
1272 | xorl %r14d, %esi # ^W[(n+2) & 15] | 1220 | xorl %r14d, %r12d # ^W[(n+2) & 15] |
1273 | xorl %r12d, %esi # ^W[n & 15] | 1221 | roll %r12d # |
1274 | roll %esi # | ||
1275 | movl %esi, %r12d # store to W[n & 15] | ||
1276 | movl %ebx, %edi # c | 1222 | movl %ebx, %edi # c |
1277 | xorl %ecx, %edi # ^d | 1223 | xorl %ecx, %edi # ^d |
1278 | xorl %eax, %edi # ^b | 1224 | xorl %eax, %edi # ^b |
1279 | leal -0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W | 1225 | leal -0x359D3E2A(%rdx,%r12), %edx # e += RCONST + W[n & 15] |
1280 | addl %edi, %edx # e += (c ^ d ^ b) | 1226 | addl %edi, %edx # e += (c ^ d ^ b) |
1281 | movl %ebp, %esi # | 1227 | movl %ebp, %esi # |
1282 | roll $5, %esi # rotl32(a,5) | 1228 | roll $5, %esi # rotl32(a,5) |
1283 | addl %esi, %edx # e += rotl32(a,5) | 1229 | addl %esi, %edx # e += rotl32(a,5) |
1284 | rorl $2, %eax # b = rotl32(b,30) | 1230 | rorl $2, %eax # b = rotl32(b,30) |
1285 | # 77 | 1231 | # 77 |
1286 | movl %r10d, %esi # W[(n+13) & 15] | 1232 | xorl %r10d, %r13d # W[n & 15] ^= W[(n+13) & 15] |
1287 | xorl -32+4*5(%rsp), %esi # ^W[(n+8) & 15] | 1233 | xorl -32+4*5(%rsp), %r13d # ^W[(n+8) & 15] |
1288 | xorl %r15d, %esi # ^W[(n+2) & 15] | 1234 | xorl %r15d, %r13d # ^W[(n+2) & 15] |
1289 | xorl %r13d, %esi # ^W[n & 15] | 1235 | roll %r13d # |
1290 | roll %esi # | ||
1291 | # store to W[n & 15] - unused, not done | ||
1292 | movl %eax, %edi # c | 1236 | movl %eax, %edi # c |
1293 | xorl %ebx, %edi # ^d | 1237 | xorl %ebx, %edi # ^d |
1294 | xorl %ebp, %edi # ^b | 1238 | xorl %ebp, %edi # ^b |
1295 | leal -0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W | 1239 | leal -0x359D3E2A(%rcx,%r13), %ecx # e += RCONST + W[n & 15] |
1296 | addl %edi, %ecx # e += (c ^ d ^ b) | 1240 | addl %edi, %ecx # e += (c ^ d ^ b) |
1297 | movl %edx, %esi # | 1241 | movl %edx, %esi # |
1298 | roll $5, %esi # rotl32(a,5) | 1242 | roll $5, %esi # rotl32(a,5) |
1299 | addl %esi, %ecx # e += rotl32(a,5) | 1243 | addl %esi, %ecx # e += rotl32(a,5) |
1300 | rorl $2, %ebp # b = rotl32(b,30) | 1244 | rorl $2, %ebp # b = rotl32(b,30) |
1301 | # 78 | 1245 | # 78 |
1302 | movl %r11d, %esi # W[(n+13) & 15] | 1246 | xorl %r11d, %r14d # W[n & 15] ^= W[(n+13) & 15] |
1303 | xorl -32+4*6(%rsp), %esi # ^W[(n+8) & 15] | 1247 | xorl -32+4*6(%rsp), %r14d # ^W[(n+8) & 15] |
1304 | xorl -32+4*0(%rsp), %esi # ^W[(n+2) & 15] | 1248 | xorl -32+4*0(%rsp), %r14d # ^W[(n+2) & 15] |
1305 | xorl %r14d, %esi # ^W[n & 15] | 1249 | roll %r14d # |
1306 | roll %esi # | ||
1307 | # store to W[n & 15] - unused, not done | ||
1308 | movl %ebp, %edi # c | 1250 | movl %ebp, %edi # c |
1309 | xorl %eax, %edi # ^d | 1251 | xorl %eax, %edi # ^d |
1310 | xorl %edx, %edi # ^b | 1252 | xorl %edx, %edi # ^b |
1311 | leal -0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W | 1253 | leal -0x359D3E2A(%rbx,%r14), %ebx # e += RCONST + W[n & 15] |
1312 | addl %edi, %ebx # e += (c ^ d ^ b) | 1254 | addl %edi, %ebx # e += (c ^ d ^ b) |
1313 | movl %ecx, %esi # | 1255 | movl %ecx, %esi # |
1314 | roll $5, %esi # rotl32(a,5) | 1256 | roll $5, %esi # rotl32(a,5) |
1315 | addl %esi, %ebx # e += rotl32(a,5) | 1257 | addl %esi, %ebx # e += rotl32(a,5) |
1316 | rorl $2, %edx # b = rotl32(b,30) | 1258 | rorl $2, %edx # b = rotl32(b,30) |
1317 | # 79 | 1259 | # 79 |
1318 | movl %r12d, %esi # W[(n+13) & 15] | 1260 | xorl %r12d, %r15d # W[n & 15] ^= W[(n+13) & 15] |
1319 | xorl -32+4*7(%rsp), %esi # ^W[(n+8) & 15] | 1261 | xorl -32+4*7(%rsp), %r15d # ^W[(n+8) & 15] |
1320 | xorl -32+4*1(%rsp), %esi # ^W[(n+2) & 15] | 1262 | xorl -32+4*1(%rsp), %r15d # ^W[(n+2) & 15] |
1321 | xorl %r15d, %esi # ^W[n & 15] | 1263 | roll %r15d # |
1322 | roll %esi # | ||
1323 | # store to W[n & 15] - unused, not done | ||
1324 | movl %edx, %edi # c | 1264 | movl %edx, %edi # c |
1325 | xorl %ebp, %edi # ^d | 1265 | xorl %ebp, %edi # ^d |
1326 | xorl %ecx, %edi # ^b | 1266 | xorl %ecx, %edi # ^b |
1327 | leal -0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W | 1267 | leal -0x359D3E2A(%rax,%r15), %eax # e += RCONST + W[n & 15] |
1328 | addl %edi, %eax # e += (c ^ d ^ b) | 1268 | addl %edi, %eax # e += (c ^ d ^ b) |
1329 | movl %ebx, %esi # | 1269 | movl %ebx, %esi # |
1330 | roll $5, %esi # rotl32(a,5) | 1270 | roll $5, %esi # rotl32(a,5) |
diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh new file mode 100755 index 000000000..931c0f0fd --- /dev/null +++ b/libbb/hash_md5_sha_x86-64.S.sh | |||
@@ -0,0 +1,267 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | # We don't regenerate it on every "make" invocation - only by hand. | ||
4 | # The reason is that the changes to generated code are difficult | ||
5 | # to visualize by looking only at this script, it helps when the commit | ||
6 | # also contains the diff of the generated file. | ||
7 | exec >hash_md5_sha_x86-64.S | ||
8 | |||
9 | echo \ | ||
10 | '### Generated by hash_md5_sha_x86-64.S.sh ### | ||
11 | |||
12 | #if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__) | ||
13 | .section .text.sha1_process_block64,"ax",@progbits | ||
14 | .globl sha1_process_block64 | ||
15 | .hidden sha1_process_block64 | ||
16 | .type sha1_process_block64, @function | ||
17 | |||
18 | .balign 8 # allow decoders to fetch at least 4 first insns | ||
19 | sha1_process_block64: | ||
20 | pushq %r15 # | ||
21 | pushq %r14 # | ||
22 | pushq %r13 # | ||
23 | pushq %r12 # | ||
24 | pushq %rbp # | ||
25 | pushq %rbx # | ||
26 | pushq %rdi # we need ctx at the end | ||
27 | |||
28 | #Register and stack use: | ||
29 | # eax..edx: a..d | ||
30 | # ebp: e | ||
31 | # esi,edi: temps | ||
32 | # -32+4*n(%rsp),r8...r15: W[0..7,8..15] | ||
33 | # (TODO: actually W[0..7] are used a bit more often, put _thme_ into r8..r15?) | ||
34 | |||
35 | movq 4*8(%rdi), %r8 | ||
36 | bswapq %r8 | ||
37 | movl %r8d, %r9d | ||
38 | shrq $32, %r8 | ||
39 | movq 4*10(%rdi), %r10 | ||
40 | bswapq %r10 | ||
41 | movl %r10d, %r11d | ||
42 | shrq $32, %r10 | ||
43 | movq 4*12(%rdi), %r12 | ||
44 | bswapq %r12 | ||
45 | movl %r12d, %r13d | ||
46 | shrq $32, %r12 | ||
47 | movq 4*14(%rdi), %r14 | ||
48 | bswapq %r14 | ||
49 | movl %r14d, %r15d | ||
50 | shrq $32, %r14 | ||
51 | |||
52 | movl $3, %eax | ||
53 | 1: | ||
54 | movq (%rdi,%rax,8), %rsi | ||
55 | bswapq %rsi | ||
56 | rolq $32, %rsi | ||
57 | movq %rsi, -32(%rsp,%rax,8) | ||
58 | decl %eax | ||
59 | jns 1b | ||
60 | movl 80(%rdi), %eax # a = ctx->hash[0] | ||
61 | movl 84(%rdi), %ebx # b = ctx->hash[1] | ||
62 | movl 88(%rdi), %ecx # c = ctx->hash[2] | ||
63 | movl 92(%rdi), %edx # d = ctx->hash[3] | ||
64 | movl 96(%rdi), %ebp # e = ctx->hash[4] | ||
65 | ' | ||
66 | W32() { | ||
67 | test "$1" || exit 1 | ||
68 | test "$1" -lt 0 && exit 1 | ||
69 | test "$1" -gt 15 && exit 1 | ||
70 | test "$1" -lt 8 && echo "-32+4*$1(%rsp)" | ||
71 | test "$1" -ge 8 && echo "%r${1}d" | ||
72 | } | ||
73 | |||
74 | RD1A() { | ||
75 | local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||
76 | local n=$(($6)) | ||
77 | echo "# $n" | ||
78 | test $n = 0 && echo " | ||
79 | # W[0], already in %esi | ||
80 | ";test $n != 0 && test $n -lt 8 && echo " | ||
81 | movl `W32 $n`, %esi # W[n] | ||
82 | ";test $n -ge 8 && echo " | ||
83 | # W[n], in %r$n | ||
84 | ";echo " | ||
85 | movl %e$c, %edi # c | ||
86 | xorl %e$d, %edi # ^d | ||
87 | andl %e$b, %edi # &b | ||
88 | xorl %e$d, %edi # (((c ^ d) & b) ^ d) | ||
89 | ";test $n -lt 8 && echo " | ||
90 | leal $RCONST(%r$e,%rsi),%e$e # e += RCONST + W[n] | ||
91 | ";test $n -ge 8 && echo " | ||
92 | leal $RCONST(%r$e,%r$n),%e$e # e += RCONST + W[n] | ||
93 | ";echo " | ||
94 | addl %edi, %e$e # e += (((c ^ d) & b) ^ d) | ||
95 | movl %e$a, %esi # | ||
96 | roll \$5, %esi # rotl32(a,5) | ||
97 | addl %esi, %e$e # e += rotl32(a,5) | ||
98 | rorl \$2, %e$b # b = rotl32(b,30) | ||
99 | " | ||
100 | } | ||
101 | RD1B() { | ||
102 | local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||
103 | local n=$(($6)) | ||
104 | local n13=$(((n+13) & 15)) | ||
105 | local n8=$(((n+8) & 15)) | ||
106 | local n2=$(((n+2) & 15)) | ||
107 | local n0=$(((n+0) & 15)) | ||
108 | echo " | ||
109 | # $n | ||
110 | ";test $n0 -lt 8 && echo " | ||
111 | movl `W32 $n13`, %esi # W[(n+13) & 15] | ||
112 | xorl `W32 $n8`, %esi # ^W[(n+8) & 15] | ||
113 | xorl `W32 $n2`, %esi # ^W[(n+2) & 15] | ||
114 | xorl `W32 $n0`, %esi # ^W[n & 15] | ||
115 | roll %esi # | ||
116 | movl %esi, `W32 $n0` # store to W[n & 15] | ||
117 | ";test $n0 -ge 8 && echo " | ||
118 | xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15] | ||
119 | xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15] | ||
120 | xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15] | ||
121 | roll `W32 $n0` # | ||
122 | "; echo " | ||
123 | movl %e$c, %edi # c | ||
124 | xorl %e$d, %edi # ^d | ||
125 | andl %e$b, %edi # &b | ||
126 | xorl %e$d, %edi # (((c ^ d) & b) ^ d) | ||
127 | ";test $n0 -lt 8 && echo " | ||
128 | leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] | ||
129 | ";test $n0 -ge 8 && echo " | ||
130 | leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15] | ||
131 | ";echo " | ||
132 | addl %edi, %e$e # e += (((c ^ d) & b) ^ d) | ||
133 | movl %e$a, %esi # | ||
134 | roll \$5, %esi # rotl32(a,5) | ||
135 | addl %esi, %e$e # e += rotl32(a,5) | ||
136 | rorl \$2, %e$b # b = rotl32(b,30) | ||
137 | " | ||
138 | } | ||
139 | { | ||
140 | RCONST=0x5A827999 | ||
141 | RD1A ax bx cx dx bp 0; RD1A bp ax bx cx dx 1; RD1A dx bp ax bx cx 2; RD1A cx dx bp ax bx 3; RD1A bx cx dx bp ax 4 | ||
142 | RD1A ax bx cx dx bp 5; RD1A bp ax bx cx dx 6; RD1A dx bp ax bx cx 7; RD1A cx dx bp ax bx 8; RD1A bx cx dx bp ax 9 | ||
143 | RD1A ax bx cx dx bp 10; RD1A bp ax bx cx dx 11; RD1A dx bp ax bx cx 12; RD1A cx dx bp ax bx 13; RD1A bx cx dx bp ax 14 | ||
144 | RD1A ax bx cx dx bp 15; RD1B bp ax bx cx dx 16; RD1B dx bp ax bx cx 17; RD1B cx dx bp ax bx 18; RD1B bx cx dx bp ax 19 | ||
145 | } | grep -v '^$' | ||
146 | |||
147 | RD2() { | ||
148 | local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||
149 | local n=$(($6)) | ||
150 | local n13=$(((n+13) & 15)) | ||
151 | local n8=$(((n+8) & 15)) | ||
152 | local n2=$(((n+2) & 15)) | ||
153 | local n0=$(((n+0) & 15)) | ||
154 | echo " | ||
155 | # $n | ||
156 | ";test $n0 -lt 8 && echo " | ||
157 | movl `W32 $n13`, %esi # W[(n+13) & 15] | ||
158 | xorl `W32 $n8`, %esi # ^W[(n+8) & 15] | ||
159 | xorl `W32 $n2`, %esi # ^W[(n+2) & 15] | ||
160 | xorl `W32 $n0`, %esi # ^W[n & 15] | ||
161 | roll %esi # | ||
162 | movl %esi, `W32 $n0` # store to W[n & 15] | ||
163 | ";test $n0 -ge 8 && echo " | ||
164 | xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15] | ||
165 | xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15] | ||
166 | xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15] | ||
167 | roll `W32 $n0` # | ||
168 | "; echo " | ||
169 | movl %e$c, %edi # c | ||
170 | xorl %e$d, %edi # ^d | ||
171 | xorl %e$b, %edi # ^b | ||
172 | ";test $n0 -lt 8 && echo " | ||
173 | leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] | ||
174 | ";test $n0 -ge 8 && echo " | ||
175 | leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15] | ||
176 | ";echo " | ||
177 | addl %edi, %e$e # e += (c ^ d ^ b) | ||
178 | movl %e$a, %esi # | ||
179 | roll \$5, %esi # rotl32(a,5) | ||
180 | addl %esi, %e$e # e += rotl32(a,5) | ||
181 | rorl \$2, %e$b # b = rotl32(b,30) | ||
182 | " | ||
183 | } | ||
184 | { | ||
185 | RCONST=0x6ED9EBA1 | ||
186 | RD2 ax bx cx dx bp 20; RD2 bp ax bx cx dx 21; RD2 dx bp ax bx cx 22; RD2 cx dx bp ax bx 23; RD2 bx cx dx bp ax 24 | ||
187 | RD2 ax bx cx dx bp 25; RD2 bp ax bx cx dx 26; RD2 dx bp ax bx cx 27; RD2 cx dx bp ax bx 28; RD2 bx cx dx bp ax 29 | ||
188 | RD2 ax bx cx dx bp 30; RD2 bp ax bx cx dx 31; RD2 dx bp ax bx cx 32; RD2 cx dx bp ax bx 33; RD2 bx cx dx bp ax 34 | ||
189 | RD2 ax bx cx dx bp 35; RD2 bp ax bx cx dx 36; RD2 dx bp ax bx cx 37; RD2 cx dx bp ax bx 38; RD2 bx cx dx bp ax 39 | ||
190 | } | grep -v '^$' | ||
191 | |||
192 | RD3() { | ||
193 | local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||
194 | local n=$(($6)) | ||
195 | local n13=$(((n+13) & 15)) | ||
196 | local n8=$(((n+8) & 15)) | ||
197 | local n2=$(((n+2) & 15)) | ||
198 | local n0=$(((n+0) & 15)) | ||
199 | echo " | ||
200 | # $n | ||
201 | movl %e$b, %edi # di: b | ||
202 | movl %e$b, %esi # si: b | ||
203 | orl %e$c, %edi # di: b | c | ||
204 | andl %e$c, %esi # si: b & c | ||
205 | andl %e$d, %edi # di: (b | c) & d | ||
206 | orl %esi, %edi # ((b | c) & d) | (b & c) | ||
207 | ";test $n0 -lt 8 && echo " | ||
208 | movl `W32 $n13`, %esi # W[(n+13) & 15] | ||
209 | xorl `W32 $n8`, %esi # ^W[(n+8) & 15] | ||
210 | xorl `W32 $n2`, %esi # ^W[(n+2) & 15] | ||
211 | xorl `W32 $n0`, %esi # ^W[n & 15] | ||
212 | roll %esi # | ||
213 | movl %esi, `W32 $n0` # store to W[n & 15] | ||
214 | ";test $n0 -ge 8 && echo " | ||
215 | xorl `W32 $n13`, `W32 $n0` # W[n & 15] ^= W[(n+13) & 15] | ||
216 | xorl `W32 $n8`, `W32 $n0` # ^W[(n+8) & 15] | ||
217 | xorl `W32 $n2`, `W32 $n0` # ^W[(n+2) & 15] | ||
218 | roll `W32 $n0` # | ||
219 | "; echo " | ||
220 | addl %edi, %e$e # += ((b | c) & d) | (b & c) | ||
221 | ";test $n0 -lt 8 && echo " | ||
222 | leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] | ||
223 | ";test $n0 -ge 8 && echo " | ||
224 | leal $RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15] | ||
225 | ";echo " | ||
226 | movl %e$a, %esi # | ||
227 | roll \$5, %esi # rotl32(a,5) | ||
228 | addl %esi, %e$e # e += rotl32(a,5) | ||
229 | rorl \$2, %e$b # b = rotl32(b,30) | ||
230 | " | ||
231 | } | ||
232 | { | ||
233 | #RCONST=0x8F1BBCDC "out of range for signed 32bit displacement" | ||
234 | RCONST=-0x70E44324 | ||
235 | RD3 ax bx cx dx bp 40; RD3 bp ax bx cx dx 41; RD3 dx bp ax bx cx 42; RD3 cx dx bp ax bx 43; RD3 bx cx dx bp ax 44 | ||
236 | RD3 ax bx cx dx bp 45; RD3 bp ax bx cx dx 46; RD3 dx bp ax bx cx 47; RD3 cx dx bp ax bx 48; RD3 bx cx dx bp ax 49 | ||
237 | RD3 ax bx cx dx bp 50; RD3 bp ax bx cx dx 51; RD3 dx bp ax bx cx 52; RD3 cx dx bp ax bx 53; RD3 bx cx dx bp ax 54 | ||
238 | RD3 ax bx cx dx bp 55; RD3 bp ax bx cx dx 56; RD3 dx bp ax bx cx 57; RD3 cx dx bp ax bx 58; RD3 bx cx dx bp ax 59 | ||
239 | } | grep -v '^$' | ||
240 | |||
241 | # Round 4 has the same logic as round 2, only n and RCONST are different | ||
242 | { | ||
243 | #RCONST=0xCA62C1D6 "out of range for signed 32bit displacement" | ||
244 | RCONST=-0x359D3E2A | ||
245 | RD2 ax bx cx dx bp 60; RD2 bp ax bx cx dx 61; RD2 dx bp ax bx cx 62; RD2 cx dx bp ax bx 63; RD2 bx cx dx bp ax 64 | ||
246 | RD2 ax bx cx dx bp 65; RD2 bp ax bx cx dx 66; RD2 dx bp ax bx cx 67; RD2 cx dx bp ax bx 68; RD2 bx cx dx bp ax 69 | ||
247 | RD2 ax bx cx dx bp 70; RD2 bp ax bx cx dx 71; RD2 dx bp ax bx cx 72; RD2 cx dx bp ax bx 73; RD2 bx cx dx bp ax 74 | ||
248 | RD2 ax bx cx dx bp 75; RD2 bp ax bx cx dx 76; RD2 dx bp ax bx cx 77; RD2 cx dx bp ax bx 78; RD2 bx cx dx bp ax 79 | ||
249 | } | grep -v '^$' | ||
250 | |||
251 | echo " | ||
252 | popq %rdi # | ||
253 | addl %eax, 80(%rdi) # ctx->hash[0] += a | ||
254 | addl %ebx, 84(%rdi) # ctx->hash[1] += b | ||
255 | addl %ecx, 88(%rdi) # ctx->hash[2] += c | ||
256 | addl %edx, 92(%rdi) # ctx->hash[3] += d | ||
257 | addl %ebp, 96(%rdi) # ctx->hash[4] += e | ||
258 | popq %rbx # | ||
259 | popq %rbp # | ||
260 | popq %r12 # | ||
261 | popq %r13 # | ||
262 | popq %r14 # | ||
263 | popq %r15 # | ||
264 | |||
265 | ret | ||
266 | .size sha1_process_block64, .-sha1_process_block64 | ||
267 | #endif" | ||