diff options
Diffstat (limited to 'src/lib/libcrypto/modes/asm/ghash-x86.pl')
-rw-r--r-- | src/lib/libcrypto/modes/asm/ghash-x86.pl | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/src/lib/libcrypto/modes/asm/ghash-x86.pl b/src/lib/libcrypto/modes/asm/ghash-x86.pl index 83c727e07f..27492597ad 100644 --- a/src/lib/libcrypto/modes/asm/ghash-x86.pl +++ b/src/lib/libcrypto/modes/asm/ghash-x86.pl | |||
@@ -86,7 +86,7 @@ | |||
86 | # where Tproc is time required for Karatsuba pre- and post-processing, | 86 | # where Tproc is time required for Karatsuba pre- and post-processing, |
87 | # is more realistic estimate. In this case it gives ... 1.91 cycles. | 87 | # is more realistic estimate. In this case it gives ... 1.91 cycles. |
88 | # Or in other words, depending on how well we can interleave reduction | 88 | # Or in other words, depending on how well we can interleave reduction |
89 | # and one of the two multiplications the performance should be betwen | 89 | # and one of the two multiplications the performance should be between |
90 | # 1.91 and 2.16. As already mentioned, this implementation processes | 90 | # 1.91 and 2.16. As already mentioned, this implementation processes |
91 | # one byte out of 8KB buffer in 2.10 cycles, while x86_64 counterpart | 91 | # one byte out of 8KB buffer in 2.10 cycles, while x86_64 counterpart |
92 | # - in 2.02. x86_64 performance is better, because larger register | 92 | # - in 2.02. x86_64 performance is better, because larger register |
@@ -700,7 +700,7 @@ sub mmx_loop() { | |||
700 | &pxor ($red[1],$red[1]); | 700 | &pxor ($red[1],$red[1]); |
701 | &pxor ($red[2],$red[2]); | 701 | &pxor ($red[2],$red[2]); |
702 | 702 | ||
703 | # Just like in "May" verson modulo-schedule for critical path in | 703 | # Just like in "May" version modulo-schedule for critical path in |
704 | # 'Z.hi ^= rem_8bit[Z.lo&0xff^((u8)H[nhi]<<4)]<<48'. Final 'pxor' | 704 | # 'Z.hi ^= rem_8bit[Z.lo&0xff^((u8)H[nhi]<<4)]<<48'. Final 'pxor' |
705 | # is scheduled so late that rem_8bit[] has to be shifted *right* | 705 | # is scheduled so late that rem_8bit[] has to be shifted *right* |
706 | # by 16, which is why last argument to pinsrw is 2, which | 706 | # by 16, which is why last argument to pinsrw is 2, which |
@@ -1087,7 +1087,7 @@ my ($Xhi,$Xi) = @_; | |||
1087 | &movdqu (&QWP(0,$Xip),$Xi); | 1087 | &movdqu (&QWP(0,$Xip),$Xi); |
1088 | &function_end("gcm_ghash_clmul"); | 1088 | &function_end("gcm_ghash_clmul"); |
1089 | 1089 | ||
1090 | } else { # Algorith 5. Kept for reference purposes. | 1090 | } else { # Algorithm 5. Kept for reference purposes. |
1091 | 1091 | ||
1092 | sub reduction_alg5 { # 19/16 times faster than Intel version | 1092 | sub reduction_alg5 { # 19/16 times faster than Intel version |
1093 | my ($Xhi,$Xi)=@_; | 1093 | my ($Xhi,$Xi)=@_; |