summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/aes/asm/aes-ppc.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/aes/asm/aes-ppc.pl')
-rw-r--r--src/lib/libcrypto/aes/asm/aes-ppc.pl269
1 files changed, 141 insertions, 128 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-ppc.pl b/src/lib/libcrypto/aes/asm/aes-ppc.pl
index ce427655ef..f82c5e1814 100644
--- a/src/lib/libcrypto/aes/asm/aes-ppc.pl
+++ b/src/lib/libcrypto/aes/asm/aes-ppc.pl
@@ -16,6 +16,19 @@
16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact - 16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17# at 1/3 of ppc_AES_decrypt. 17# at 1/3 of ppc_AES_decrypt.
18 18
19# February 2010
20#
21# Rescheduling instructions to favour Power6 pipeline gives 10%
22# performance improvement on the platfrom in question (and marginal
23# improvement even on others). It should be noted that Power6 fails
24# to process byte in 18 cycles, only in 23, because it fails to issue
25# 4 load instructions in two cycles, only in 3. As result non-compact
26# block subroutines are 25% slower than one would expect. Compact
27# functions scale better, because they have pure computational part,
28# which scales perfectly with clock frequency. To be specific
29# ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30# ppc_AES_decrypt_compact - at 55 (in 64-bit build).
31
19$flavour = shift; 32$flavour = shift;
20 33
21if ($flavour =~ /64/) { 34if ($flavour =~ /64/) {
@@ -376,7 +389,7 @@ $code.=<<___;
376 addi $sp,$sp,$FRAME 389 addi $sp,$sp,$FRAME
377 blr 390 blr
378 391
379.align 4 392.align 5
380Lppc_AES_encrypt: 393Lppc_AES_encrypt:
381 lwz $acc00,240($key) 394 lwz $acc00,240($key)
382 lwz $t0,0($key) 395 lwz $t0,0($key)
@@ -397,46 +410,46 @@ Lppc_AES_encrypt:
397Lenc_loop: 410Lenc_loop:
398 rlwinm $acc00,$s0,`32-24+3`,21,28 411 rlwinm $acc00,$s0,`32-24+3`,21,28
399 rlwinm $acc01,$s1,`32-24+3`,21,28 412 rlwinm $acc01,$s1,`32-24+3`,21,28
400 lwz $t0,0($key)
401 lwz $t1,4($key)
402 rlwinm $acc02,$s2,`32-24+3`,21,28 413 rlwinm $acc02,$s2,`32-24+3`,21,28
403 rlwinm $acc03,$s3,`32-24+3`,21,28 414 rlwinm $acc03,$s3,`32-24+3`,21,28
404 lwz $t2,8($key) 415 lwz $t0,0($key)
405 lwz $t3,12($key) 416 lwz $t1,4($key)
406 rlwinm $acc04,$s1,`32-16+3`,21,28 417 rlwinm $acc04,$s1,`32-16+3`,21,28
407 rlwinm $acc05,$s2,`32-16+3`,21,28 418 rlwinm $acc05,$s2,`32-16+3`,21,28
408 lwzx $acc00,$Tbl0,$acc00 419 lwz $t2,8($key)
409 lwzx $acc01,$Tbl0,$acc01 420 lwz $t3,12($key)
410 rlwinm $acc06,$s3,`32-16+3`,21,28 421 rlwinm $acc06,$s3,`32-16+3`,21,28
411 rlwinm $acc07,$s0,`32-16+3`,21,28 422 rlwinm $acc07,$s0,`32-16+3`,21,28
412 lwzx $acc02,$Tbl0,$acc02 423 lwzx $acc00,$Tbl0,$acc00
413 lwzx $acc03,$Tbl0,$acc03 424 lwzx $acc01,$Tbl0,$acc01
414 rlwinm $acc08,$s2,`32-8+3`,21,28 425 rlwinm $acc08,$s2,`32-8+3`,21,28
415 rlwinm $acc09,$s3,`32-8+3`,21,28 426 rlwinm $acc09,$s3,`32-8+3`,21,28
416 lwzx $acc04,$Tbl1,$acc04 427 lwzx $acc02,$Tbl0,$acc02
417 lwzx $acc05,$Tbl1,$acc05 428 lwzx $acc03,$Tbl0,$acc03
418 rlwinm $acc10,$s0,`32-8+3`,21,28 429 rlwinm $acc10,$s0,`32-8+3`,21,28
419 rlwinm $acc11,$s1,`32-8+3`,21,28 430 rlwinm $acc11,$s1,`32-8+3`,21,28
420 lwzx $acc06,$Tbl1,$acc06 431 lwzx $acc04,$Tbl1,$acc04
421 lwzx $acc07,$Tbl1,$acc07 432 lwzx $acc05,$Tbl1,$acc05
422 rlwinm $acc12,$s3,`0+3`,21,28 433 rlwinm $acc12,$s3,`0+3`,21,28
423 rlwinm $acc13,$s0,`0+3`,21,28 434 rlwinm $acc13,$s0,`0+3`,21,28
424 lwzx $acc08,$Tbl2,$acc08 435 lwzx $acc06,$Tbl1,$acc06
425 lwzx $acc09,$Tbl2,$acc09 436 lwzx $acc07,$Tbl1,$acc07
426 rlwinm $acc14,$s1,`0+3`,21,28 437 rlwinm $acc14,$s1,`0+3`,21,28
427 rlwinm $acc15,$s2,`0+3`,21,28 438 rlwinm $acc15,$s2,`0+3`,21,28
428 lwzx $acc10,$Tbl2,$acc10 439 lwzx $acc08,$Tbl2,$acc08
429 lwzx $acc11,$Tbl2,$acc11 440 lwzx $acc09,$Tbl2,$acc09
430 xor $t0,$t0,$acc00 441 xor $t0,$t0,$acc00
431 xor $t1,$t1,$acc01 442 xor $t1,$t1,$acc01
432 lwzx $acc12,$Tbl3,$acc12 443 lwzx $acc10,$Tbl2,$acc10
433 lwzx $acc13,$Tbl3,$acc13 444 lwzx $acc11,$Tbl2,$acc11
434 xor $t2,$t2,$acc02 445 xor $t2,$t2,$acc02
435 xor $t3,$t3,$acc03 446 xor $t3,$t3,$acc03
436 lwzx $acc14,$Tbl3,$acc14 447 lwzx $acc12,$Tbl3,$acc12
437 lwzx $acc15,$Tbl3,$acc15 448 lwzx $acc13,$Tbl3,$acc13
438 xor $t0,$t0,$acc04 449 xor $t0,$t0,$acc04
439 xor $t1,$t1,$acc05 450 xor $t1,$t1,$acc05
451 lwzx $acc14,$Tbl3,$acc14
452 lwzx $acc15,$Tbl3,$acc15
440 xor $t2,$t2,$acc06 453 xor $t2,$t2,$acc06
441 xor $t3,$t3,$acc07 454 xor $t3,$t3,$acc07
442 xor $t0,$t0,$acc08 455 xor $t0,$t0,$acc08
@@ -452,60 +465,60 @@ Lenc_loop:
452 465
453 addi $Tbl2,$Tbl0,2048 466 addi $Tbl2,$Tbl0,2048
454 nop 467 nop
455 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
456 lwz $acc09,`2048+32`($Tbl0)
457 lwz $acc10,`2048+64`($Tbl0)
458 lwz $acc11,`2048+96`($Tbl0)
459 lwz $acc08,`2048+128`($Tbl0)
460 lwz $acc09,`2048+160`($Tbl0)
461 lwz $acc10,`2048+192`($Tbl0)
462 lwz $acc11,`2048+224`($Tbl0)
463 rlwinm $acc00,$s0,`32-24`,24,31
464 rlwinm $acc01,$s1,`32-24`,24,31
465 lwz $t0,0($key) 468 lwz $t0,0($key)
466 lwz $t1,4($key) 469 lwz $t1,4($key)
467 rlwinm $acc02,$s2,`32-24`,24,31 470 rlwinm $acc00,$s0,`32-24`,24,31
468 rlwinm $acc03,$s3,`32-24`,24,31 471 rlwinm $acc01,$s1,`32-24`,24,31
469 lwz $t2,8($key) 472 lwz $t2,8($key)
470 lwz $t3,12($key) 473 lwz $t3,12($key)
474 rlwinm $acc02,$s2,`32-24`,24,31
475 rlwinm $acc03,$s3,`32-24`,24,31
476 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
477 lwz $acc09,`2048+32`($Tbl0)
471 rlwinm $acc04,$s1,`32-16`,24,31 478 rlwinm $acc04,$s1,`32-16`,24,31
472 rlwinm $acc05,$s2,`32-16`,24,31 479 rlwinm $acc05,$s2,`32-16`,24,31
473 lbzx $acc00,$Tbl2,$acc00 480 lwz $acc10,`2048+64`($Tbl0)
474 lbzx $acc01,$Tbl2,$acc01 481 lwz $acc11,`2048+96`($Tbl0)
475 rlwinm $acc06,$s3,`32-16`,24,31 482 rlwinm $acc06,$s3,`32-16`,24,31
476 rlwinm $acc07,$s0,`32-16`,24,31 483 rlwinm $acc07,$s0,`32-16`,24,31
477 lbzx $acc02,$Tbl2,$acc02 484 lwz $acc12,`2048+128`($Tbl0)
478 lbzx $acc03,$Tbl2,$acc03 485 lwz $acc13,`2048+160`($Tbl0)
479 rlwinm $acc08,$s2,`32-8`,24,31 486 rlwinm $acc08,$s2,`32-8`,24,31
480 rlwinm $acc09,$s3,`32-8`,24,31 487 rlwinm $acc09,$s3,`32-8`,24,31
481 lbzx $acc04,$Tbl2,$acc04 488 lwz $acc14,`2048+192`($Tbl0)
482 lbzx $acc05,$Tbl2,$acc05 489 lwz $acc15,`2048+224`($Tbl0)
483 rlwinm $acc10,$s0,`32-8`,24,31 490 rlwinm $acc10,$s0,`32-8`,24,31
484 rlwinm $acc11,$s1,`32-8`,24,31 491 rlwinm $acc11,$s1,`32-8`,24,31
485 lbzx $acc06,$Tbl2,$acc06 492 lbzx $acc00,$Tbl2,$acc00
486 lbzx $acc07,$Tbl2,$acc07 493 lbzx $acc01,$Tbl2,$acc01
487 rlwinm $acc12,$s3,`0`,24,31 494 rlwinm $acc12,$s3,`0`,24,31
488 rlwinm $acc13,$s0,`0`,24,31 495 rlwinm $acc13,$s0,`0`,24,31
489 lbzx $acc08,$Tbl2,$acc08 496 lbzx $acc02,$Tbl2,$acc02
490 lbzx $acc09,$Tbl2,$acc09 497 lbzx $acc03,$Tbl2,$acc03
491 rlwinm $acc14,$s1,`0`,24,31 498 rlwinm $acc14,$s1,`0`,24,31
492 rlwinm $acc15,$s2,`0`,24,31 499 rlwinm $acc15,$s2,`0`,24,31
493 lbzx $acc10,$Tbl2,$acc10 500 lbzx $acc04,$Tbl2,$acc04
494 lbzx $acc11,$Tbl2,$acc11 501 lbzx $acc05,$Tbl2,$acc05
495 rlwinm $s0,$acc00,24,0,7 502 rlwinm $s0,$acc00,24,0,7
496 rlwinm $s1,$acc01,24,0,7 503 rlwinm $s1,$acc01,24,0,7
497 lbzx $acc12,$Tbl2,$acc12 504 lbzx $acc06,$Tbl2,$acc06
498 lbzx $acc13,$Tbl2,$acc13 505 lbzx $acc07,$Tbl2,$acc07
499 rlwinm $s2,$acc02,24,0,7 506 rlwinm $s2,$acc02,24,0,7
500 rlwinm $s3,$acc03,24,0,7 507 rlwinm $s3,$acc03,24,0,7
501 lbzx $acc14,$Tbl2,$acc14 508 lbzx $acc08,$Tbl2,$acc08
502 lbzx $acc15,$Tbl2,$acc15 509 lbzx $acc09,$Tbl2,$acc09
503 rlwimi $s0,$acc04,16,8,15 510 rlwimi $s0,$acc04,16,8,15
504 rlwimi $s1,$acc05,16,8,15 511 rlwimi $s1,$acc05,16,8,15
512 lbzx $acc10,$Tbl2,$acc10
513 lbzx $acc11,$Tbl2,$acc11
505 rlwimi $s2,$acc06,16,8,15 514 rlwimi $s2,$acc06,16,8,15
506 rlwimi $s3,$acc07,16,8,15 515 rlwimi $s3,$acc07,16,8,15
516 lbzx $acc12,$Tbl2,$acc12
517 lbzx $acc13,$Tbl2,$acc13
507 rlwimi $s0,$acc08,8,16,23 518 rlwimi $s0,$acc08,8,16,23
508 rlwimi $s1,$acc09,8,16,23 519 rlwimi $s1,$acc09,8,16,23
520 lbzx $acc14,$Tbl2,$acc14
521 lbzx $acc15,$Tbl2,$acc15
509 rlwimi $s2,$acc10,8,16,23 522 rlwimi $s2,$acc10,8,16,23
510 rlwimi $s3,$acc11,8,16,23 523 rlwimi $s3,$acc11,8,16,23
511 or $s0,$s0,$acc12 524 or $s0,$s0,$acc12
@@ -542,40 +555,40 @@ Lenc_compact_loop:
542 rlwinm $acc01,$s1,`32-24`,24,31 555 rlwinm $acc01,$s1,`32-24`,24,31
543 rlwinm $acc02,$s2,`32-24`,24,31 556 rlwinm $acc02,$s2,`32-24`,24,31
544 rlwinm $acc03,$s3,`32-24`,24,31 557 rlwinm $acc03,$s3,`32-24`,24,31
545 lbzx $acc00,$Tbl1,$acc00
546 lbzx $acc01,$Tbl1,$acc01
547 rlwinm $acc04,$s1,`32-16`,24,31 558 rlwinm $acc04,$s1,`32-16`,24,31
548 rlwinm $acc05,$s2,`32-16`,24,31 559 rlwinm $acc05,$s2,`32-16`,24,31
549 lbzx $acc02,$Tbl1,$acc02
550 lbzx $acc03,$Tbl1,$acc03
551 rlwinm $acc06,$s3,`32-16`,24,31 560 rlwinm $acc06,$s3,`32-16`,24,31
552 rlwinm $acc07,$s0,`32-16`,24,31 561 rlwinm $acc07,$s0,`32-16`,24,31
553 lbzx $acc04,$Tbl1,$acc04 562 lbzx $acc00,$Tbl1,$acc00
554 lbzx $acc05,$Tbl1,$acc05 563 lbzx $acc01,$Tbl1,$acc01
555 rlwinm $acc08,$s2,`32-8`,24,31 564 rlwinm $acc08,$s2,`32-8`,24,31
556 rlwinm $acc09,$s3,`32-8`,24,31 565 rlwinm $acc09,$s3,`32-8`,24,31
557 lbzx $acc06,$Tbl1,$acc06 566 lbzx $acc02,$Tbl1,$acc02
558 lbzx $acc07,$Tbl1,$acc07 567 lbzx $acc03,$Tbl1,$acc03
559 rlwinm $acc10,$s0,`32-8`,24,31 568 rlwinm $acc10,$s0,`32-8`,24,31
560 rlwinm $acc11,$s1,`32-8`,24,31 569 rlwinm $acc11,$s1,`32-8`,24,31
561 lbzx $acc08,$Tbl1,$acc08 570 lbzx $acc04,$Tbl1,$acc04
562 lbzx $acc09,$Tbl1,$acc09 571 lbzx $acc05,$Tbl1,$acc05
563 rlwinm $acc12,$s3,`0`,24,31 572 rlwinm $acc12,$s3,`0`,24,31
564 rlwinm $acc13,$s0,`0`,24,31 573 rlwinm $acc13,$s0,`0`,24,31
565 lbzx $acc10,$Tbl1,$acc10 574 lbzx $acc06,$Tbl1,$acc06
566 lbzx $acc11,$Tbl1,$acc11 575 lbzx $acc07,$Tbl1,$acc07
567 rlwinm $acc14,$s1,`0`,24,31 576 rlwinm $acc14,$s1,`0`,24,31
568 rlwinm $acc15,$s2,`0`,24,31 577 rlwinm $acc15,$s2,`0`,24,31
569 lbzx $acc12,$Tbl1,$acc12 578 lbzx $acc08,$Tbl1,$acc08
570 lbzx $acc13,$Tbl1,$acc13 579 lbzx $acc09,$Tbl1,$acc09
571 rlwinm $s0,$acc00,24,0,7 580 rlwinm $s0,$acc00,24,0,7
572 rlwinm $s1,$acc01,24,0,7 581 rlwinm $s1,$acc01,24,0,7
573 lbzx $acc14,$Tbl1,$acc14 582 lbzx $acc10,$Tbl1,$acc10
574 lbzx $acc15,$Tbl1,$acc15 583 lbzx $acc11,$Tbl1,$acc11
575 rlwinm $s2,$acc02,24,0,7 584 rlwinm $s2,$acc02,24,0,7
576 rlwinm $s3,$acc03,24,0,7 585 rlwinm $s3,$acc03,24,0,7
586 lbzx $acc12,$Tbl1,$acc12
587 lbzx $acc13,$Tbl1,$acc13
577 rlwimi $s0,$acc04,16,8,15 588 rlwimi $s0,$acc04,16,8,15
578 rlwimi $s1,$acc05,16,8,15 589 rlwimi $s1,$acc05,16,8,15
590 lbzx $acc14,$Tbl1,$acc14
591 lbzx $acc15,$Tbl1,$acc15
579 rlwimi $s2,$acc06,16,8,15 592 rlwimi $s2,$acc06,16,8,15
580 rlwimi $s3,$acc07,16,8,15 593 rlwimi $s3,$acc07,16,8,15
581 rlwimi $s0,$acc08,8,16,23 594 rlwimi $s0,$acc08,8,16,23
@@ -725,7 +738,7 @@ Lenc_compact_done:
725 addi $sp,$sp,$FRAME 738 addi $sp,$sp,$FRAME
726 blr 739 blr
727 740
728.align 4 741.align 5
729Lppc_AES_decrypt: 742Lppc_AES_decrypt:
730 lwz $acc00,240($key) 743 lwz $acc00,240($key)
731 lwz $t0,0($key) 744 lwz $t0,0($key)
@@ -746,46 +759,46 @@ Lppc_AES_decrypt:
746Ldec_loop: 759Ldec_loop:
747 rlwinm $acc00,$s0,`32-24+3`,21,28 760 rlwinm $acc00,$s0,`32-24+3`,21,28
748 rlwinm $acc01,$s1,`32-24+3`,21,28 761 rlwinm $acc01,$s1,`32-24+3`,21,28
749 lwz $t0,0($key)
750 lwz $t1,4($key)
751 rlwinm $acc02,$s2,`32-24+3`,21,28 762 rlwinm $acc02,$s2,`32-24+3`,21,28
752 rlwinm $acc03,$s3,`32-24+3`,21,28 763 rlwinm $acc03,$s3,`32-24+3`,21,28
753 lwz $t2,8($key) 764 lwz $t0,0($key)
754 lwz $t3,12($key) 765 lwz $t1,4($key)
755 rlwinm $acc04,$s3,`32-16+3`,21,28 766 rlwinm $acc04,$s3,`32-16+3`,21,28
756 rlwinm $acc05,$s0,`32-16+3`,21,28 767 rlwinm $acc05,$s0,`32-16+3`,21,28
757 lwzx $acc00,$Tbl0,$acc00 768 lwz $t2,8($key)
758 lwzx $acc01,$Tbl0,$acc01 769 lwz $t3,12($key)
759 rlwinm $acc06,$s1,`32-16+3`,21,28 770 rlwinm $acc06,$s1,`32-16+3`,21,28
760 rlwinm $acc07,$s2,`32-16+3`,21,28 771 rlwinm $acc07,$s2,`32-16+3`,21,28
761 lwzx $acc02,$Tbl0,$acc02 772 lwzx $acc00,$Tbl0,$acc00
762 lwzx $acc03,$Tbl0,$acc03 773 lwzx $acc01,$Tbl0,$acc01
763 rlwinm $acc08,$s2,`32-8+3`,21,28 774 rlwinm $acc08,$s2,`32-8+3`,21,28
764 rlwinm $acc09,$s3,`32-8+3`,21,28 775 rlwinm $acc09,$s3,`32-8+3`,21,28
765 lwzx $acc04,$Tbl1,$acc04 776 lwzx $acc02,$Tbl0,$acc02
766 lwzx $acc05,$Tbl1,$acc05 777 lwzx $acc03,$Tbl0,$acc03
767 rlwinm $acc10,$s0,`32-8+3`,21,28 778 rlwinm $acc10,$s0,`32-8+3`,21,28
768 rlwinm $acc11,$s1,`32-8+3`,21,28 779 rlwinm $acc11,$s1,`32-8+3`,21,28
769 lwzx $acc06,$Tbl1,$acc06 780 lwzx $acc04,$Tbl1,$acc04
770 lwzx $acc07,$Tbl1,$acc07 781 lwzx $acc05,$Tbl1,$acc05
771 rlwinm $acc12,$s1,`0+3`,21,28 782 rlwinm $acc12,$s1,`0+3`,21,28
772 rlwinm $acc13,$s2,`0+3`,21,28 783 rlwinm $acc13,$s2,`0+3`,21,28
773 lwzx $acc08,$Tbl2,$acc08 784 lwzx $acc06,$Tbl1,$acc06
774 lwzx $acc09,$Tbl2,$acc09 785 lwzx $acc07,$Tbl1,$acc07
775 rlwinm $acc14,$s3,`0+3`,21,28 786 rlwinm $acc14,$s3,`0+3`,21,28
776 rlwinm $acc15,$s0,`0+3`,21,28 787 rlwinm $acc15,$s0,`0+3`,21,28
777 lwzx $acc10,$Tbl2,$acc10 788 lwzx $acc08,$Tbl2,$acc08
778 lwzx $acc11,$Tbl2,$acc11 789 lwzx $acc09,$Tbl2,$acc09
779 xor $t0,$t0,$acc00 790 xor $t0,$t0,$acc00
780 xor $t1,$t1,$acc01 791 xor $t1,$t1,$acc01
781 lwzx $acc12,$Tbl3,$acc12 792 lwzx $acc10,$Tbl2,$acc10
782 lwzx $acc13,$Tbl3,$acc13 793 lwzx $acc11,$Tbl2,$acc11
783 xor $t2,$t2,$acc02 794 xor $t2,$t2,$acc02
784 xor $t3,$t3,$acc03 795 xor $t3,$t3,$acc03
785 lwzx $acc14,$Tbl3,$acc14 796 lwzx $acc12,$Tbl3,$acc12
786 lwzx $acc15,$Tbl3,$acc15 797 lwzx $acc13,$Tbl3,$acc13
787 xor $t0,$t0,$acc04 798 xor $t0,$t0,$acc04
788 xor $t1,$t1,$acc05 799 xor $t1,$t1,$acc05
800 lwzx $acc14,$Tbl3,$acc14
801 lwzx $acc15,$Tbl3,$acc15
789 xor $t2,$t2,$acc06 802 xor $t2,$t2,$acc06
790 xor $t3,$t3,$acc07 803 xor $t3,$t3,$acc07
791 xor $t0,$t0,$acc08 804 xor $t0,$t0,$acc08
@@ -801,56 +814,56 @@ Ldec_loop:
801 814
802 addi $Tbl2,$Tbl0,2048 815 addi $Tbl2,$Tbl0,2048
803 nop 816 nop
804 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
805 lwz $acc09,`2048+32`($Tbl0)
806 lwz $acc10,`2048+64`($Tbl0)
807 lwz $acc11,`2048+96`($Tbl0)
808 lwz $acc08,`2048+128`($Tbl0)
809 lwz $acc09,`2048+160`($Tbl0)
810 lwz $acc10,`2048+192`($Tbl0)
811 lwz $acc11,`2048+224`($Tbl0)
812 rlwinm $acc00,$s0,`32-24`,24,31
813 rlwinm $acc01,$s1,`32-24`,24,31
814 lwz $t0,0($key) 817 lwz $t0,0($key)
815 lwz $t1,4($key) 818 lwz $t1,4($key)
816 rlwinm $acc02,$s2,`32-24`,24,31 819 rlwinm $acc00,$s0,`32-24`,24,31
817 rlwinm $acc03,$s3,`32-24`,24,31 820 rlwinm $acc01,$s1,`32-24`,24,31
818 lwz $t2,8($key) 821 lwz $t2,8($key)
819 lwz $t3,12($key) 822 lwz $t3,12($key)
823 rlwinm $acc02,$s2,`32-24`,24,31
824 rlwinm $acc03,$s3,`32-24`,24,31
825 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
826 lwz $acc09,`2048+32`($Tbl0)
820 rlwinm $acc04,$s3,`32-16`,24,31 827 rlwinm $acc04,$s3,`32-16`,24,31
821 rlwinm $acc05,$s0,`32-16`,24,31 828 rlwinm $acc05,$s0,`32-16`,24,31
829 lwz $acc10,`2048+64`($Tbl0)
830 lwz $acc11,`2048+96`($Tbl0)
822 lbzx $acc00,$Tbl2,$acc00 831 lbzx $acc00,$Tbl2,$acc00
823 lbzx $acc01,$Tbl2,$acc01 832 lbzx $acc01,$Tbl2,$acc01
833 lwz $acc12,`2048+128`($Tbl0)
834 lwz $acc13,`2048+160`($Tbl0)
824 rlwinm $acc06,$s1,`32-16`,24,31 835 rlwinm $acc06,$s1,`32-16`,24,31
825 rlwinm $acc07,$s2,`32-16`,24,31 836 rlwinm $acc07,$s2,`32-16`,24,31
826 lbzx $acc02,$Tbl2,$acc02 837 lwz $acc14,`2048+192`($Tbl0)
827 lbzx $acc03,$Tbl2,$acc03 838 lwz $acc15,`2048+224`($Tbl0)
828 rlwinm $acc08,$s2,`32-8`,24,31 839 rlwinm $acc08,$s2,`32-8`,24,31
829 rlwinm $acc09,$s3,`32-8`,24,31 840 rlwinm $acc09,$s3,`32-8`,24,31
830 lbzx $acc04,$Tbl2,$acc04 841 lbzx $acc02,$Tbl2,$acc02
831 lbzx $acc05,$Tbl2,$acc05 842 lbzx $acc03,$Tbl2,$acc03
832 rlwinm $acc10,$s0,`32-8`,24,31 843 rlwinm $acc10,$s0,`32-8`,24,31
833 rlwinm $acc11,$s1,`32-8`,24,31 844 rlwinm $acc11,$s1,`32-8`,24,31
834 lbzx $acc06,$Tbl2,$acc06 845 lbzx $acc04,$Tbl2,$acc04
835 lbzx $acc07,$Tbl2,$acc07 846 lbzx $acc05,$Tbl2,$acc05
836 rlwinm $acc12,$s1,`0`,24,31 847 rlwinm $acc12,$s1,`0`,24,31
837 rlwinm $acc13,$s2,`0`,24,31 848 rlwinm $acc13,$s2,`0`,24,31
838 lbzx $acc08,$Tbl2,$acc08 849 lbzx $acc06,$Tbl2,$acc06
839 lbzx $acc09,$Tbl2,$acc09 850 lbzx $acc07,$Tbl2,$acc07
840 rlwinm $acc14,$s3,`0`,24,31 851 rlwinm $acc14,$s3,`0`,24,31
841 rlwinm $acc15,$s0,`0`,24,31 852 rlwinm $acc15,$s0,`0`,24,31
842 lbzx $acc10,$Tbl2,$acc10 853 lbzx $acc08,$Tbl2,$acc08
843 lbzx $acc11,$Tbl2,$acc11 854 lbzx $acc09,$Tbl2,$acc09
844 rlwinm $s0,$acc00,24,0,7 855 rlwinm $s0,$acc00,24,0,7
845 rlwinm $s1,$acc01,24,0,7 856 rlwinm $s1,$acc01,24,0,7
846 lbzx $acc12,$Tbl2,$acc12 857 lbzx $acc10,$Tbl2,$acc10
847 lbzx $acc13,$Tbl2,$acc13 858 lbzx $acc11,$Tbl2,$acc11
848 rlwinm $s2,$acc02,24,0,7 859 rlwinm $s2,$acc02,24,0,7
849 rlwinm $s3,$acc03,24,0,7 860 rlwinm $s3,$acc03,24,0,7
850 lbzx $acc14,$Tbl2,$acc14 861 lbzx $acc12,$Tbl2,$acc12
851 lbzx $acc15,$Tbl2,$acc15 862 lbzx $acc13,$Tbl2,$acc13
852 rlwimi $s0,$acc04,16,8,15 863 rlwimi $s0,$acc04,16,8,15
853 rlwimi $s1,$acc05,16,8,15 864 rlwimi $s1,$acc05,16,8,15
865 lbzx $acc14,$Tbl2,$acc14
866 lbzx $acc15,$Tbl2,$acc15
854 rlwimi $s2,$acc06,16,8,15 867 rlwimi $s2,$acc06,16,8,15
855 rlwimi $s3,$acc07,16,8,15 868 rlwimi $s3,$acc07,16,8,15
856 rlwimi $s0,$acc08,8,16,23 869 rlwimi $s0,$acc08,8,16,23
@@ -897,40 +910,40 @@ Ldec_compact_loop:
897 rlwinm $acc01,$s1,`32-24`,24,31 910 rlwinm $acc01,$s1,`32-24`,24,31
898 rlwinm $acc02,$s2,`32-24`,24,31 911 rlwinm $acc02,$s2,`32-24`,24,31
899 rlwinm $acc03,$s3,`32-24`,24,31 912 rlwinm $acc03,$s3,`32-24`,24,31
900 lbzx $acc00,$Tbl1,$acc00
901 lbzx $acc01,$Tbl1,$acc01
902 rlwinm $acc04,$s3,`32-16`,24,31 913 rlwinm $acc04,$s3,`32-16`,24,31
903 rlwinm $acc05,$s0,`32-16`,24,31 914 rlwinm $acc05,$s0,`32-16`,24,31
904 lbzx $acc02,$Tbl1,$acc02
905 lbzx $acc03,$Tbl1,$acc03
906 rlwinm $acc06,$s1,`32-16`,24,31 915 rlwinm $acc06,$s1,`32-16`,24,31
907 rlwinm $acc07,$s2,`32-16`,24,31 916 rlwinm $acc07,$s2,`32-16`,24,31
908 lbzx $acc04,$Tbl1,$acc04 917 lbzx $acc00,$Tbl1,$acc00
909 lbzx $acc05,$Tbl1,$acc05 918 lbzx $acc01,$Tbl1,$acc01
910 rlwinm $acc08,$s2,`32-8`,24,31 919 rlwinm $acc08,$s2,`32-8`,24,31
911 rlwinm $acc09,$s3,`32-8`,24,31 920 rlwinm $acc09,$s3,`32-8`,24,31
912 lbzx $acc06,$Tbl1,$acc06 921 lbzx $acc02,$Tbl1,$acc02
913 lbzx $acc07,$Tbl1,$acc07 922 lbzx $acc03,$Tbl1,$acc03
914 rlwinm $acc10,$s0,`32-8`,24,31 923 rlwinm $acc10,$s0,`32-8`,24,31
915 rlwinm $acc11,$s1,`32-8`,24,31 924 rlwinm $acc11,$s1,`32-8`,24,31
916 lbzx $acc08,$Tbl1,$acc08 925 lbzx $acc04,$Tbl1,$acc04
917 lbzx $acc09,$Tbl1,$acc09 926 lbzx $acc05,$Tbl1,$acc05
918 rlwinm $acc12,$s1,`0`,24,31 927 rlwinm $acc12,$s1,`0`,24,31
919 rlwinm $acc13,$s2,`0`,24,31 928 rlwinm $acc13,$s2,`0`,24,31
920 lbzx $acc10,$Tbl1,$acc10 929 lbzx $acc06,$Tbl1,$acc06
921 lbzx $acc11,$Tbl1,$acc11 930 lbzx $acc07,$Tbl1,$acc07
922 rlwinm $acc14,$s3,`0`,24,31 931 rlwinm $acc14,$s3,`0`,24,31
923 rlwinm $acc15,$s0,`0`,24,31 932 rlwinm $acc15,$s0,`0`,24,31
924 lbzx $acc12,$Tbl1,$acc12 933 lbzx $acc08,$Tbl1,$acc08
925 lbzx $acc13,$Tbl1,$acc13 934 lbzx $acc09,$Tbl1,$acc09
926 rlwinm $s0,$acc00,24,0,7 935 rlwinm $s0,$acc00,24,0,7
927 rlwinm $s1,$acc01,24,0,7 936 rlwinm $s1,$acc01,24,0,7
928 lbzx $acc14,$Tbl1,$acc14 937 lbzx $acc10,$Tbl1,$acc10
929 lbzx $acc15,$Tbl1,$acc15 938 lbzx $acc11,$Tbl1,$acc11
930 rlwinm $s2,$acc02,24,0,7 939 rlwinm $s2,$acc02,24,0,7
931 rlwinm $s3,$acc03,24,0,7 940 rlwinm $s3,$acc03,24,0,7
941 lbzx $acc12,$Tbl1,$acc12
942 lbzx $acc13,$Tbl1,$acc13
932 rlwimi $s0,$acc04,16,8,15 943 rlwimi $s0,$acc04,16,8,15
933 rlwimi $s1,$acc05,16,8,15 944 rlwimi $s1,$acc05,16,8,15
945 lbzx $acc14,$Tbl1,$acc14
946 lbzx $acc15,$Tbl1,$acc15
934 rlwimi $s2,$acc06,16,8,15 947 rlwimi $s2,$acc06,16,8,15
935 rlwimi $s3,$acc07,16,8,15 948 rlwimi $s3,$acc07,16,8,15
936 rlwimi $s0,$acc08,8,16,23 949 rlwimi $s0,$acc08,8,16,23