diff options
Diffstat (limited to 'src/lib/libcrypto/aes/asm/aes-ppc.pl')
-rw-r--r-- | src/lib/libcrypto/aes/asm/aes-ppc.pl | 269 |
1 files changed, 141 insertions, 128 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-ppc.pl b/src/lib/libcrypto/aes/asm/aes-ppc.pl index ce427655ef..f82c5e1814 100644 --- a/src/lib/libcrypto/aes/asm/aes-ppc.pl +++ b/src/lib/libcrypto/aes/asm/aes-ppc.pl | |||
@@ -16,6 +16,19 @@ | |||
16 | # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact - | 16 | # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact - |
17 | # at 1/3 of ppc_AES_decrypt. | 17 | # at 1/3 of ppc_AES_decrypt. |
18 | 18 | ||
19 | # February 2010 | ||
20 | # | ||
21 | # Rescheduling instructions to favour Power6 pipeline gives 10% | ||
22 | # performance improvement on the platfrom in question (and marginal | ||
23 | # improvement even on others). It should be noted that Power6 fails | ||
24 | # to process byte in 18 cycles, only in 23, because it fails to issue | ||
25 | # 4 load instructions in two cycles, only in 3. As result non-compact | ||
26 | # block subroutines are 25% slower than one would expect. Compact | ||
27 | # functions scale better, because they have pure computational part, | ||
28 | # which scales perfectly with clock frequency. To be specific | ||
29 | # ppc_AES_encrypt_compact operates at 42 cycles per byte, while | ||
30 | # ppc_AES_decrypt_compact - at 55 (in 64-bit build). | ||
31 | |||
19 | $flavour = shift; | 32 | $flavour = shift; |
20 | 33 | ||
21 | if ($flavour =~ /64/) { | 34 | if ($flavour =~ /64/) { |
@@ -376,7 +389,7 @@ $code.=<<___; | |||
376 | addi $sp,$sp,$FRAME | 389 | addi $sp,$sp,$FRAME |
377 | blr | 390 | blr |
378 | 391 | ||
379 | .align 4 | 392 | .align 5 |
380 | Lppc_AES_encrypt: | 393 | Lppc_AES_encrypt: |
381 | lwz $acc00,240($key) | 394 | lwz $acc00,240($key) |
382 | lwz $t0,0($key) | 395 | lwz $t0,0($key) |
@@ -397,46 +410,46 @@ Lppc_AES_encrypt: | |||
397 | Lenc_loop: | 410 | Lenc_loop: |
398 | rlwinm $acc00,$s0,`32-24+3`,21,28 | 411 | rlwinm $acc00,$s0,`32-24+3`,21,28 |
399 | rlwinm $acc01,$s1,`32-24+3`,21,28 | 412 | rlwinm $acc01,$s1,`32-24+3`,21,28 |
400 | lwz $t0,0($key) | ||
401 | lwz $t1,4($key) | ||
402 | rlwinm $acc02,$s2,`32-24+3`,21,28 | 413 | rlwinm $acc02,$s2,`32-24+3`,21,28 |
403 | rlwinm $acc03,$s3,`32-24+3`,21,28 | 414 | rlwinm $acc03,$s3,`32-24+3`,21,28 |
404 | lwz $t2,8($key) | 415 | lwz $t0,0($key) |
405 | lwz $t3,12($key) | 416 | lwz $t1,4($key) |
406 | rlwinm $acc04,$s1,`32-16+3`,21,28 | 417 | rlwinm $acc04,$s1,`32-16+3`,21,28 |
407 | rlwinm $acc05,$s2,`32-16+3`,21,28 | 418 | rlwinm $acc05,$s2,`32-16+3`,21,28 |
408 | lwzx $acc00,$Tbl0,$acc00 | 419 | lwz $t2,8($key) |
409 | lwzx $acc01,$Tbl0,$acc01 | 420 | lwz $t3,12($key) |
410 | rlwinm $acc06,$s3,`32-16+3`,21,28 | 421 | rlwinm $acc06,$s3,`32-16+3`,21,28 |
411 | rlwinm $acc07,$s0,`32-16+3`,21,28 | 422 | rlwinm $acc07,$s0,`32-16+3`,21,28 |
412 | lwzx $acc02,$Tbl0,$acc02 | 423 | lwzx $acc00,$Tbl0,$acc00 |
413 | lwzx $acc03,$Tbl0,$acc03 | 424 | lwzx $acc01,$Tbl0,$acc01 |
414 | rlwinm $acc08,$s2,`32-8+3`,21,28 | 425 | rlwinm $acc08,$s2,`32-8+3`,21,28 |
415 | rlwinm $acc09,$s3,`32-8+3`,21,28 | 426 | rlwinm $acc09,$s3,`32-8+3`,21,28 |
416 | lwzx $acc04,$Tbl1,$acc04 | 427 | lwzx $acc02,$Tbl0,$acc02 |
417 | lwzx $acc05,$Tbl1,$acc05 | 428 | lwzx $acc03,$Tbl0,$acc03 |
418 | rlwinm $acc10,$s0,`32-8+3`,21,28 | 429 | rlwinm $acc10,$s0,`32-8+3`,21,28 |
419 | rlwinm $acc11,$s1,`32-8+3`,21,28 | 430 | rlwinm $acc11,$s1,`32-8+3`,21,28 |
420 | lwzx $acc06,$Tbl1,$acc06 | 431 | lwzx $acc04,$Tbl1,$acc04 |
421 | lwzx $acc07,$Tbl1,$acc07 | 432 | lwzx $acc05,$Tbl1,$acc05 |
422 | rlwinm $acc12,$s3,`0+3`,21,28 | 433 | rlwinm $acc12,$s3,`0+3`,21,28 |
423 | rlwinm $acc13,$s0,`0+3`,21,28 | 434 | rlwinm $acc13,$s0,`0+3`,21,28 |
424 | lwzx $acc08,$Tbl2,$acc08 | 435 | lwzx $acc06,$Tbl1,$acc06 |
425 | lwzx $acc09,$Tbl2,$acc09 | 436 | lwzx $acc07,$Tbl1,$acc07 |
426 | rlwinm $acc14,$s1,`0+3`,21,28 | 437 | rlwinm $acc14,$s1,`0+3`,21,28 |
427 | rlwinm $acc15,$s2,`0+3`,21,28 | 438 | rlwinm $acc15,$s2,`0+3`,21,28 |
428 | lwzx $acc10,$Tbl2,$acc10 | 439 | lwzx $acc08,$Tbl2,$acc08 |
429 | lwzx $acc11,$Tbl2,$acc11 | 440 | lwzx $acc09,$Tbl2,$acc09 |
430 | xor $t0,$t0,$acc00 | 441 | xor $t0,$t0,$acc00 |
431 | xor $t1,$t1,$acc01 | 442 | xor $t1,$t1,$acc01 |
432 | lwzx $acc12,$Tbl3,$acc12 | 443 | lwzx $acc10,$Tbl2,$acc10 |
433 | lwzx $acc13,$Tbl3,$acc13 | 444 | lwzx $acc11,$Tbl2,$acc11 |
434 | xor $t2,$t2,$acc02 | 445 | xor $t2,$t2,$acc02 |
435 | xor $t3,$t3,$acc03 | 446 | xor $t3,$t3,$acc03 |
436 | lwzx $acc14,$Tbl3,$acc14 | 447 | lwzx $acc12,$Tbl3,$acc12 |
437 | lwzx $acc15,$Tbl3,$acc15 | 448 | lwzx $acc13,$Tbl3,$acc13 |
438 | xor $t0,$t0,$acc04 | 449 | xor $t0,$t0,$acc04 |
439 | xor $t1,$t1,$acc05 | 450 | xor $t1,$t1,$acc05 |
451 | lwzx $acc14,$Tbl3,$acc14 | ||
452 | lwzx $acc15,$Tbl3,$acc15 | ||
440 | xor $t2,$t2,$acc06 | 453 | xor $t2,$t2,$acc06 |
441 | xor $t3,$t3,$acc07 | 454 | xor $t3,$t3,$acc07 |
442 | xor $t0,$t0,$acc08 | 455 | xor $t0,$t0,$acc08 |
@@ -452,60 +465,60 @@ Lenc_loop: | |||
452 | 465 | ||
453 | addi $Tbl2,$Tbl0,2048 | 466 | addi $Tbl2,$Tbl0,2048 |
454 | nop | 467 | nop |
455 | lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4 | ||
456 | lwz $acc09,`2048+32`($Tbl0) | ||
457 | lwz $acc10,`2048+64`($Tbl0) | ||
458 | lwz $acc11,`2048+96`($Tbl0) | ||
459 | lwz $acc08,`2048+128`($Tbl0) | ||
460 | lwz $acc09,`2048+160`($Tbl0) | ||
461 | lwz $acc10,`2048+192`($Tbl0) | ||
462 | lwz $acc11,`2048+224`($Tbl0) | ||
463 | rlwinm $acc00,$s0,`32-24`,24,31 | ||
464 | rlwinm $acc01,$s1,`32-24`,24,31 | ||
465 | lwz $t0,0($key) | 468 | lwz $t0,0($key) |
466 | lwz $t1,4($key) | 469 | lwz $t1,4($key) |
467 | rlwinm $acc02,$s2,`32-24`,24,31 | 470 | rlwinm $acc00,$s0,`32-24`,24,31 |
468 | rlwinm $acc03,$s3,`32-24`,24,31 | 471 | rlwinm $acc01,$s1,`32-24`,24,31 |
469 | lwz $t2,8($key) | 472 | lwz $t2,8($key) |
470 | lwz $t3,12($key) | 473 | lwz $t3,12($key) |
474 | rlwinm $acc02,$s2,`32-24`,24,31 | ||
475 | rlwinm $acc03,$s3,`32-24`,24,31 | ||
476 | lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4 | ||
477 | lwz $acc09,`2048+32`($Tbl0) | ||
471 | rlwinm $acc04,$s1,`32-16`,24,31 | 478 | rlwinm $acc04,$s1,`32-16`,24,31 |
472 | rlwinm $acc05,$s2,`32-16`,24,31 | 479 | rlwinm $acc05,$s2,`32-16`,24,31 |
473 | lbzx $acc00,$Tbl2,$acc00 | 480 | lwz $acc10,`2048+64`($Tbl0) |
474 | lbzx $acc01,$Tbl2,$acc01 | 481 | lwz $acc11,`2048+96`($Tbl0) |
475 | rlwinm $acc06,$s3,`32-16`,24,31 | 482 | rlwinm $acc06,$s3,`32-16`,24,31 |
476 | rlwinm $acc07,$s0,`32-16`,24,31 | 483 | rlwinm $acc07,$s0,`32-16`,24,31 |
477 | lbzx $acc02,$Tbl2,$acc02 | 484 | lwz $acc12,`2048+128`($Tbl0) |
478 | lbzx $acc03,$Tbl2,$acc03 | 485 | lwz $acc13,`2048+160`($Tbl0) |
479 | rlwinm $acc08,$s2,`32-8`,24,31 | 486 | rlwinm $acc08,$s2,`32-8`,24,31 |
480 | rlwinm $acc09,$s3,`32-8`,24,31 | 487 | rlwinm $acc09,$s3,`32-8`,24,31 |
481 | lbzx $acc04,$Tbl2,$acc04 | 488 | lwz $acc14,`2048+192`($Tbl0) |
482 | lbzx $acc05,$Tbl2,$acc05 | 489 | lwz $acc15,`2048+224`($Tbl0) |
483 | rlwinm $acc10,$s0,`32-8`,24,31 | 490 | rlwinm $acc10,$s0,`32-8`,24,31 |
484 | rlwinm $acc11,$s1,`32-8`,24,31 | 491 | rlwinm $acc11,$s1,`32-8`,24,31 |
485 | lbzx $acc06,$Tbl2,$acc06 | 492 | lbzx $acc00,$Tbl2,$acc00 |
486 | lbzx $acc07,$Tbl2,$acc07 | 493 | lbzx $acc01,$Tbl2,$acc01 |
487 | rlwinm $acc12,$s3,`0`,24,31 | 494 | rlwinm $acc12,$s3,`0`,24,31 |
488 | rlwinm $acc13,$s0,`0`,24,31 | 495 | rlwinm $acc13,$s0,`0`,24,31 |
489 | lbzx $acc08,$Tbl2,$acc08 | 496 | lbzx $acc02,$Tbl2,$acc02 |
490 | lbzx $acc09,$Tbl2,$acc09 | 497 | lbzx $acc03,$Tbl2,$acc03 |
491 | rlwinm $acc14,$s1,`0`,24,31 | 498 | rlwinm $acc14,$s1,`0`,24,31 |
492 | rlwinm $acc15,$s2,`0`,24,31 | 499 | rlwinm $acc15,$s2,`0`,24,31 |
493 | lbzx $acc10,$Tbl2,$acc10 | 500 | lbzx $acc04,$Tbl2,$acc04 |
494 | lbzx $acc11,$Tbl2,$acc11 | 501 | lbzx $acc05,$Tbl2,$acc05 |
495 | rlwinm $s0,$acc00,24,0,7 | 502 | rlwinm $s0,$acc00,24,0,7 |
496 | rlwinm $s1,$acc01,24,0,7 | 503 | rlwinm $s1,$acc01,24,0,7 |
497 | lbzx $acc12,$Tbl2,$acc12 | 504 | lbzx $acc06,$Tbl2,$acc06 |
498 | lbzx $acc13,$Tbl2,$acc13 | 505 | lbzx $acc07,$Tbl2,$acc07 |
499 | rlwinm $s2,$acc02,24,0,7 | 506 | rlwinm $s2,$acc02,24,0,7 |
500 | rlwinm $s3,$acc03,24,0,7 | 507 | rlwinm $s3,$acc03,24,0,7 |
501 | lbzx $acc14,$Tbl2,$acc14 | 508 | lbzx $acc08,$Tbl2,$acc08 |
502 | lbzx $acc15,$Tbl2,$acc15 | 509 | lbzx $acc09,$Tbl2,$acc09 |
503 | rlwimi $s0,$acc04,16,8,15 | 510 | rlwimi $s0,$acc04,16,8,15 |
504 | rlwimi $s1,$acc05,16,8,15 | 511 | rlwimi $s1,$acc05,16,8,15 |
512 | lbzx $acc10,$Tbl2,$acc10 | ||
513 | lbzx $acc11,$Tbl2,$acc11 | ||
505 | rlwimi $s2,$acc06,16,8,15 | 514 | rlwimi $s2,$acc06,16,8,15 |
506 | rlwimi $s3,$acc07,16,8,15 | 515 | rlwimi $s3,$acc07,16,8,15 |
516 | lbzx $acc12,$Tbl2,$acc12 | ||
517 | lbzx $acc13,$Tbl2,$acc13 | ||
507 | rlwimi $s0,$acc08,8,16,23 | 518 | rlwimi $s0,$acc08,8,16,23 |
508 | rlwimi $s1,$acc09,8,16,23 | 519 | rlwimi $s1,$acc09,8,16,23 |
520 | lbzx $acc14,$Tbl2,$acc14 | ||
521 | lbzx $acc15,$Tbl2,$acc15 | ||
509 | rlwimi $s2,$acc10,8,16,23 | 522 | rlwimi $s2,$acc10,8,16,23 |
510 | rlwimi $s3,$acc11,8,16,23 | 523 | rlwimi $s3,$acc11,8,16,23 |
511 | or $s0,$s0,$acc12 | 524 | or $s0,$s0,$acc12 |
@@ -542,40 +555,40 @@ Lenc_compact_loop: | |||
542 | rlwinm $acc01,$s1,`32-24`,24,31 | 555 | rlwinm $acc01,$s1,`32-24`,24,31 |
543 | rlwinm $acc02,$s2,`32-24`,24,31 | 556 | rlwinm $acc02,$s2,`32-24`,24,31 |
544 | rlwinm $acc03,$s3,`32-24`,24,31 | 557 | rlwinm $acc03,$s3,`32-24`,24,31 |
545 | lbzx $acc00,$Tbl1,$acc00 | ||
546 | lbzx $acc01,$Tbl1,$acc01 | ||
547 | rlwinm $acc04,$s1,`32-16`,24,31 | 558 | rlwinm $acc04,$s1,`32-16`,24,31 |
548 | rlwinm $acc05,$s2,`32-16`,24,31 | 559 | rlwinm $acc05,$s2,`32-16`,24,31 |
549 | lbzx $acc02,$Tbl1,$acc02 | ||
550 | lbzx $acc03,$Tbl1,$acc03 | ||
551 | rlwinm $acc06,$s3,`32-16`,24,31 | 560 | rlwinm $acc06,$s3,`32-16`,24,31 |
552 | rlwinm $acc07,$s0,`32-16`,24,31 | 561 | rlwinm $acc07,$s0,`32-16`,24,31 |
553 | lbzx $acc04,$Tbl1,$acc04 | 562 | lbzx $acc00,$Tbl1,$acc00 |
554 | lbzx $acc05,$Tbl1,$acc05 | 563 | lbzx $acc01,$Tbl1,$acc01 |
555 | rlwinm $acc08,$s2,`32-8`,24,31 | 564 | rlwinm $acc08,$s2,`32-8`,24,31 |
556 | rlwinm $acc09,$s3,`32-8`,24,31 | 565 | rlwinm $acc09,$s3,`32-8`,24,31 |
557 | lbzx $acc06,$Tbl1,$acc06 | 566 | lbzx $acc02,$Tbl1,$acc02 |
558 | lbzx $acc07,$Tbl1,$acc07 | 567 | lbzx $acc03,$Tbl1,$acc03 |
559 | rlwinm $acc10,$s0,`32-8`,24,31 | 568 | rlwinm $acc10,$s0,`32-8`,24,31 |
560 | rlwinm $acc11,$s1,`32-8`,24,31 | 569 | rlwinm $acc11,$s1,`32-8`,24,31 |
561 | lbzx $acc08,$Tbl1,$acc08 | 570 | lbzx $acc04,$Tbl1,$acc04 |
562 | lbzx $acc09,$Tbl1,$acc09 | 571 | lbzx $acc05,$Tbl1,$acc05 |
563 | rlwinm $acc12,$s3,`0`,24,31 | 572 | rlwinm $acc12,$s3,`0`,24,31 |
564 | rlwinm $acc13,$s0,`0`,24,31 | 573 | rlwinm $acc13,$s0,`0`,24,31 |
565 | lbzx $acc10,$Tbl1,$acc10 | 574 | lbzx $acc06,$Tbl1,$acc06 |
566 | lbzx $acc11,$Tbl1,$acc11 | 575 | lbzx $acc07,$Tbl1,$acc07 |
567 | rlwinm $acc14,$s1,`0`,24,31 | 576 | rlwinm $acc14,$s1,`0`,24,31 |
568 | rlwinm $acc15,$s2,`0`,24,31 | 577 | rlwinm $acc15,$s2,`0`,24,31 |
569 | lbzx $acc12,$Tbl1,$acc12 | 578 | lbzx $acc08,$Tbl1,$acc08 |
570 | lbzx $acc13,$Tbl1,$acc13 | 579 | lbzx $acc09,$Tbl1,$acc09 |
571 | rlwinm $s0,$acc00,24,0,7 | 580 | rlwinm $s0,$acc00,24,0,7 |
572 | rlwinm $s1,$acc01,24,0,7 | 581 | rlwinm $s1,$acc01,24,0,7 |
573 | lbzx $acc14,$Tbl1,$acc14 | 582 | lbzx $acc10,$Tbl1,$acc10 |
574 | lbzx $acc15,$Tbl1,$acc15 | 583 | lbzx $acc11,$Tbl1,$acc11 |
575 | rlwinm $s2,$acc02,24,0,7 | 584 | rlwinm $s2,$acc02,24,0,7 |
576 | rlwinm $s3,$acc03,24,0,7 | 585 | rlwinm $s3,$acc03,24,0,7 |
586 | lbzx $acc12,$Tbl1,$acc12 | ||
587 | lbzx $acc13,$Tbl1,$acc13 | ||
577 | rlwimi $s0,$acc04,16,8,15 | 588 | rlwimi $s0,$acc04,16,8,15 |
578 | rlwimi $s1,$acc05,16,8,15 | 589 | rlwimi $s1,$acc05,16,8,15 |
590 | lbzx $acc14,$Tbl1,$acc14 | ||
591 | lbzx $acc15,$Tbl1,$acc15 | ||
579 | rlwimi $s2,$acc06,16,8,15 | 592 | rlwimi $s2,$acc06,16,8,15 |
580 | rlwimi $s3,$acc07,16,8,15 | 593 | rlwimi $s3,$acc07,16,8,15 |
581 | rlwimi $s0,$acc08,8,16,23 | 594 | rlwimi $s0,$acc08,8,16,23 |
@@ -725,7 +738,7 @@ Lenc_compact_done: | |||
725 | addi $sp,$sp,$FRAME | 738 | addi $sp,$sp,$FRAME |
726 | blr | 739 | blr |
727 | 740 | ||
728 | .align 4 | 741 | .align 5 |
729 | Lppc_AES_decrypt: | 742 | Lppc_AES_decrypt: |
730 | lwz $acc00,240($key) | 743 | lwz $acc00,240($key) |
731 | lwz $t0,0($key) | 744 | lwz $t0,0($key) |
@@ -746,46 +759,46 @@ Lppc_AES_decrypt: | |||
746 | Ldec_loop: | 759 | Ldec_loop: |
747 | rlwinm $acc00,$s0,`32-24+3`,21,28 | 760 | rlwinm $acc00,$s0,`32-24+3`,21,28 |
748 | rlwinm $acc01,$s1,`32-24+3`,21,28 | 761 | rlwinm $acc01,$s1,`32-24+3`,21,28 |
749 | lwz $t0,0($key) | ||
750 | lwz $t1,4($key) | ||
751 | rlwinm $acc02,$s2,`32-24+3`,21,28 | 762 | rlwinm $acc02,$s2,`32-24+3`,21,28 |
752 | rlwinm $acc03,$s3,`32-24+3`,21,28 | 763 | rlwinm $acc03,$s3,`32-24+3`,21,28 |
753 | lwz $t2,8($key) | 764 | lwz $t0,0($key) |
754 | lwz $t3,12($key) | 765 | lwz $t1,4($key) |
755 | rlwinm $acc04,$s3,`32-16+3`,21,28 | 766 | rlwinm $acc04,$s3,`32-16+3`,21,28 |
756 | rlwinm $acc05,$s0,`32-16+3`,21,28 | 767 | rlwinm $acc05,$s0,`32-16+3`,21,28 |
757 | lwzx $acc00,$Tbl0,$acc00 | 768 | lwz $t2,8($key) |
758 | lwzx $acc01,$Tbl0,$acc01 | 769 | lwz $t3,12($key) |
759 | rlwinm $acc06,$s1,`32-16+3`,21,28 | 770 | rlwinm $acc06,$s1,`32-16+3`,21,28 |
760 | rlwinm $acc07,$s2,`32-16+3`,21,28 | 771 | rlwinm $acc07,$s2,`32-16+3`,21,28 |
761 | lwzx $acc02,$Tbl0,$acc02 | 772 | lwzx $acc00,$Tbl0,$acc00 |
762 | lwzx $acc03,$Tbl0,$acc03 | 773 | lwzx $acc01,$Tbl0,$acc01 |
763 | rlwinm $acc08,$s2,`32-8+3`,21,28 | 774 | rlwinm $acc08,$s2,`32-8+3`,21,28 |
764 | rlwinm $acc09,$s3,`32-8+3`,21,28 | 775 | rlwinm $acc09,$s3,`32-8+3`,21,28 |
765 | lwzx $acc04,$Tbl1,$acc04 | 776 | lwzx $acc02,$Tbl0,$acc02 |
766 | lwzx $acc05,$Tbl1,$acc05 | 777 | lwzx $acc03,$Tbl0,$acc03 |
767 | rlwinm $acc10,$s0,`32-8+3`,21,28 | 778 | rlwinm $acc10,$s0,`32-8+3`,21,28 |
768 | rlwinm $acc11,$s1,`32-8+3`,21,28 | 779 | rlwinm $acc11,$s1,`32-8+3`,21,28 |
769 | lwzx $acc06,$Tbl1,$acc06 | 780 | lwzx $acc04,$Tbl1,$acc04 |
770 | lwzx $acc07,$Tbl1,$acc07 | 781 | lwzx $acc05,$Tbl1,$acc05 |
771 | rlwinm $acc12,$s1,`0+3`,21,28 | 782 | rlwinm $acc12,$s1,`0+3`,21,28 |
772 | rlwinm $acc13,$s2,`0+3`,21,28 | 783 | rlwinm $acc13,$s2,`0+3`,21,28 |
773 | lwzx $acc08,$Tbl2,$acc08 | 784 | lwzx $acc06,$Tbl1,$acc06 |
774 | lwzx $acc09,$Tbl2,$acc09 | 785 | lwzx $acc07,$Tbl1,$acc07 |
775 | rlwinm $acc14,$s3,`0+3`,21,28 | 786 | rlwinm $acc14,$s3,`0+3`,21,28 |
776 | rlwinm $acc15,$s0,`0+3`,21,28 | 787 | rlwinm $acc15,$s0,`0+3`,21,28 |
777 | lwzx $acc10,$Tbl2,$acc10 | 788 | lwzx $acc08,$Tbl2,$acc08 |
778 | lwzx $acc11,$Tbl2,$acc11 | 789 | lwzx $acc09,$Tbl2,$acc09 |
779 | xor $t0,$t0,$acc00 | 790 | xor $t0,$t0,$acc00 |
780 | xor $t1,$t1,$acc01 | 791 | xor $t1,$t1,$acc01 |
781 | lwzx $acc12,$Tbl3,$acc12 | 792 | lwzx $acc10,$Tbl2,$acc10 |
782 | lwzx $acc13,$Tbl3,$acc13 | 793 | lwzx $acc11,$Tbl2,$acc11 |
783 | xor $t2,$t2,$acc02 | 794 | xor $t2,$t2,$acc02 |
784 | xor $t3,$t3,$acc03 | 795 | xor $t3,$t3,$acc03 |
785 | lwzx $acc14,$Tbl3,$acc14 | 796 | lwzx $acc12,$Tbl3,$acc12 |
786 | lwzx $acc15,$Tbl3,$acc15 | 797 | lwzx $acc13,$Tbl3,$acc13 |
787 | xor $t0,$t0,$acc04 | 798 | xor $t0,$t0,$acc04 |
788 | xor $t1,$t1,$acc05 | 799 | xor $t1,$t1,$acc05 |
800 | lwzx $acc14,$Tbl3,$acc14 | ||
801 | lwzx $acc15,$Tbl3,$acc15 | ||
789 | xor $t2,$t2,$acc06 | 802 | xor $t2,$t2,$acc06 |
790 | xor $t3,$t3,$acc07 | 803 | xor $t3,$t3,$acc07 |
791 | xor $t0,$t0,$acc08 | 804 | xor $t0,$t0,$acc08 |
@@ -801,56 +814,56 @@ Ldec_loop: | |||
801 | 814 | ||
802 | addi $Tbl2,$Tbl0,2048 | 815 | addi $Tbl2,$Tbl0,2048 |
803 | nop | 816 | nop |
804 | lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4 | ||
805 | lwz $acc09,`2048+32`($Tbl0) | ||
806 | lwz $acc10,`2048+64`($Tbl0) | ||
807 | lwz $acc11,`2048+96`($Tbl0) | ||
808 | lwz $acc08,`2048+128`($Tbl0) | ||
809 | lwz $acc09,`2048+160`($Tbl0) | ||
810 | lwz $acc10,`2048+192`($Tbl0) | ||
811 | lwz $acc11,`2048+224`($Tbl0) | ||
812 | rlwinm $acc00,$s0,`32-24`,24,31 | ||
813 | rlwinm $acc01,$s1,`32-24`,24,31 | ||
814 | lwz $t0,0($key) | 817 | lwz $t0,0($key) |
815 | lwz $t1,4($key) | 818 | lwz $t1,4($key) |
816 | rlwinm $acc02,$s2,`32-24`,24,31 | 819 | rlwinm $acc00,$s0,`32-24`,24,31 |
817 | rlwinm $acc03,$s3,`32-24`,24,31 | 820 | rlwinm $acc01,$s1,`32-24`,24,31 |
818 | lwz $t2,8($key) | 821 | lwz $t2,8($key) |
819 | lwz $t3,12($key) | 822 | lwz $t3,12($key) |
823 | rlwinm $acc02,$s2,`32-24`,24,31 | ||
824 | rlwinm $acc03,$s3,`32-24`,24,31 | ||
825 | lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4 | ||
826 | lwz $acc09,`2048+32`($Tbl0) | ||
820 | rlwinm $acc04,$s3,`32-16`,24,31 | 827 | rlwinm $acc04,$s3,`32-16`,24,31 |
821 | rlwinm $acc05,$s0,`32-16`,24,31 | 828 | rlwinm $acc05,$s0,`32-16`,24,31 |
829 | lwz $acc10,`2048+64`($Tbl0) | ||
830 | lwz $acc11,`2048+96`($Tbl0) | ||
822 | lbzx $acc00,$Tbl2,$acc00 | 831 | lbzx $acc00,$Tbl2,$acc00 |
823 | lbzx $acc01,$Tbl2,$acc01 | 832 | lbzx $acc01,$Tbl2,$acc01 |
833 | lwz $acc12,`2048+128`($Tbl0) | ||
834 | lwz $acc13,`2048+160`($Tbl0) | ||
824 | rlwinm $acc06,$s1,`32-16`,24,31 | 835 | rlwinm $acc06,$s1,`32-16`,24,31 |
825 | rlwinm $acc07,$s2,`32-16`,24,31 | 836 | rlwinm $acc07,$s2,`32-16`,24,31 |
826 | lbzx $acc02,$Tbl2,$acc02 | 837 | lwz $acc14,`2048+192`($Tbl0) |
827 | lbzx $acc03,$Tbl2,$acc03 | 838 | lwz $acc15,`2048+224`($Tbl0) |
828 | rlwinm $acc08,$s2,`32-8`,24,31 | 839 | rlwinm $acc08,$s2,`32-8`,24,31 |
829 | rlwinm $acc09,$s3,`32-8`,24,31 | 840 | rlwinm $acc09,$s3,`32-8`,24,31 |
830 | lbzx $acc04,$Tbl2,$acc04 | 841 | lbzx $acc02,$Tbl2,$acc02 |
831 | lbzx $acc05,$Tbl2,$acc05 | 842 | lbzx $acc03,$Tbl2,$acc03 |
832 | rlwinm $acc10,$s0,`32-8`,24,31 | 843 | rlwinm $acc10,$s0,`32-8`,24,31 |
833 | rlwinm $acc11,$s1,`32-8`,24,31 | 844 | rlwinm $acc11,$s1,`32-8`,24,31 |
834 | lbzx $acc06,$Tbl2,$acc06 | 845 | lbzx $acc04,$Tbl2,$acc04 |
835 | lbzx $acc07,$Tbl2,$acc07 | 846 | lbzx $acc05,$Tbl2,$acc05 |
836 | rlwinm $acc12,$s1,`0`,24,31 | 847 | rlwinm $acc12,$s1,`0`,24,31 |
837 | rlwinm $acc13,$s2,`0`,24,31 | 848 | rlwinm $acc13,$s2,`0`,24,31 |
838 | lbzx $acc08,$Tbl2,$acc08 | 849 | lbzx $acc06,$Tbl2,$acc06 |
839 | lbzx $acc09,$Tbl2,$acc09 | 850 | lbzx $acc07,$Tbl2,$acc07 |
840 | rlwinm $acc14,$s3,`0`,24,31 | 851 | rlwinm $acc14,$s3,`0`,24,31 |
841 | rlwinm $acc15,$s0,`0`,24,31 | 852 | rlwinm $acc15,$s0,`0`,24,31 |
842 | lbzx $acc10,$Tbl2,$acc10 | 853 | lbzx $acc08,$Tbl2,$acc08 |
843 | lbzx $acc11,$Tbl2,$acc11 | 854 | lbzx $acc09,$Tbl2,$acc09 |
844 | rlwinm $s0,$acc00,24,0,7 | 855 | rlwinm $s0,$acc00,24,0,7 |
845 | rlwinm $s1,$acc01,24,0,7 | 856 | rlwinm $s1,$acc01,24,0,7 |
846 | lbzx $acc12,$Tbl2,$acc12 | 857 | lbzx $acc10,$Tbl2,$acc10 |
847 | lbzx $acc13,$Tbl2,$acc13 | 858 | lbzx $acc11,$Tbl2,$acc11 |
848 | rlwinm $s2,$acc02,24,0,7 | 859 | rlwinm $s2,$acc02,24,0,7 |
849 | rlwinm $s3,$acc03,24,0,7 | 860 | rlwinm $s3,$acc03,24,0,7 |
850 | lbzx $acc14,$Tbl2,$acc14 | 861 | lbzx $acc12,$Tbl2,$acc12 |
851 | lbzx $acc15,$Tbl2,$acc15 | 862 | lbzx $acc13,$Tbl2,$acc13 |
852 | rlwimi $s0,$acc04,16,8,15 | 863 | rlwimi $s0,$acc04,16,8,15 |
853 | rlwimi $s1,$acc05,16,8,15 | 864 | rlwimi $s1,$acc05,16,8,15 |
865 | lbzx $acc14,$Tbl2,$acc14 | ||
866 | lbzx $acc15,$Tbl2,$acc15 | ||
854 | rlwimi $s2,$acc06,16,8,15 | 867 | rlwimi $s2,$acc06,16,8,15 |
855 | rlwimi $s3,$acc07,16,8,15 | 868 | rlwimi $s3,$acc07,16,8,15 |
856 | rlwimi $s0,$acc08,8,16,23 | 869 | rlwimi $s0,$acc08,8,16,23 |
@@ -897,40 +910,40 @@ Ldec_compact_loop: | |||
897 | rlwinm $acc01,$s1,`32-24`,24,31 | 910 | rlwinm $acc01,$s1,`32-24`,24,31 |
898 | rlwinm $acc02,$s2,`32-24`,24,31 | 911 | rlwinm $acc02,$s2,`32-24`,24,31 |
899 | rlwinm $acc03,$s3,`32-24`,24,31 | 912 | rlwinm $acc03,$s3,`32-24`,24,31 |
900 | lbzx $acc00,$Tbl1,$acc00 | ||
901 | lbzx $acc01,$Tbl1,$acc01 | ||
902 | rlwinm $acc04,$s3,`32-16`,24,31 | 913 | rlwinm $acc04,$s3,`32-16`,24,31 |
903 | rlwinm $acc05,$s0,`32-16`,24,31 | 914 | rlwinm $acc05,$s0,`32-16`,24,31 |
904 | lbzx $acc02,$Tbl1,$acc02 | ||
905 | lbzx $acc03,$Tbl1,$acc03 | ||
906 | rlwinm $acc06,$s1,`32-16`,24,31 | 915 | rlwinm $acc06,$s1,`32-16`,24,31 |
907 | rlwinm $acc07,$s2,`32-16`,24,31 | 916 | rlwinm $acc07,$s2,`32-16`,24,31 |
908 | lbzx $acc04,$Tbl1,$acc04 | 917 | lbzx $acc00,$Tbl1,$acc00 |
909 | lbzx $acc05,$Tbl1,$acc05 | 918 | lbzx $acc01,$Tbl1,$acc01 |
910 | rlwinm $acc08,$s2,`32-8`,24,31 | 919 | rlwinm $acc08,$s2,`32-8`,24,31 |
911 | rlwinm $acc09,$s3,`32-8`,24,31 | 920 | rlwinm $acc09,$s3,`32-8`,24,31 |
912 | lbzx $acc06,$Tbl1,$acc06 | 921 | lbzx $acc02,$Tbl1,$acc02 |
913 | lbzx $acc07,$Tbl1,$acc07 | 922 | lbzx $acc03,$Tbl1,$acc03 |
914 | rlwinm $acc10,$s0,`32-8`,24,31 | 923 | rlwinm $acc10,$s0,`32-8`,24,31 |
915 | rlwinm $acc11,$s1,`32-8`,24,31 | 924 | rlwinm $acc11,$s1,`32-8`,24,31 |
916 | lbzx $acc08,$Tbl1,$acc08 | 925 | lbzx $acc04,$Tbl1,$acc04 |
917 | lbzx $acc09,$Tbl1,$acc09 | 926 | lbzx $acc05,$Tbl1,$acc05 |
918 | rlwinm $acc12,$s1,`0`,24,31 | 927 | rlwinm $acc12,$s1,`0`,24,31 |
919 | rlwinm $acc13,$s2,`0`,24,31 | 928 | rlwinm $acc13,$s2,`0`,24,31 |
920 | lbzx $acc10,$Tbl1,$acc10 | 929 | lbzx $acc06,$Tbl1,$acc06 |
921 | lbzx $acc11,$Tbl1,$acc11 | 930 | lbzx $acc07,$Tbl1,$acc07 |
922 | rlwinm $acc14,$s3,`0`,24,31 | 931 | rlwinm $acc14,$s3,`0`,24,31 |
923 | rlwinm $acc15,$s0,`0`,24,31 | 932 | rlwinm $acc15,$s0,`0`,24,31 |
924 | lbzx $acc12,$Tbl1,$acc12 | 933 | lbzx $acc08,$Tbl1,$acc08 |
925 | lbzx $acc13,$Tbl1,$acc13 | 934 | lbzx $acc09,$Tbl1,$acc09 |
926 | rlwinm $s0,$acc00,24,0,7 | 935 | rlwinm $s0,$acc00,24,0,7 |
927 | rlwinm $s1,$acc01,24,0,7 | 936 | rlwinm $s1,$acc01,24,0,7 |
928 | lbzx $acc14,$Tbl1,$acc14 | 937 | lbzx $acc10,$Tbl1,$acc10 |
929 | lbzx $acc15,$Tbl1,$acc15 | 938 | lbzx $acc11,$Tbl1,$acc11 |
930 | rlwinm $s2,$acc02,24,0,7 | 939 | rlwinm $s2,$acc02,24,0,7 |
931 | rlwinm $s3,$acc03,24,0,7 | 940 | rlwinm $s3,$acc03,24,0,7 |
941 | lbzx $acc12,$Tbl1,$acc12 | ||
942 | lbzx $acc13,$Tbl1,$acc13 | ||
932 | rlwimi $s0,$acc04,16,8,15 | 943 | rlwimi $s0,$acc04,16,8,15 |
933 | rlwimi $s1,$acc05,16,8,15 | 944 | rlwimi $s1,$acc05,16,8,15 |
945 | lbzx $acc14,$Tbl1,$acc14 | ||
946 | lbzx $acc15,$Tbl1,$acc15 | ||
934 | rlwimi $s2,$acc06,16,8,15 | 947 | rlwimi $s2,$acc06,16,8,15 |
935 | rlwimi $s3,$acc07,16,8,15 | 948 | rlwimi $s3,$acc07,16,8,15 |
936 | rlwimi $s0,$acc08,8,16,23 | 949 | rlwimi $s0,$acc08,8,16,23 |