summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/modes/asm/ghash-x86.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/modes/asm/ghash-x86.pl')
-rw-r--r--src/lib/libcrypto/modes/asm/ghash-x86.pl6
1 files changed, 3 insertions, 3 deletions
diff --git a/src/lib/libcrypto/modes/asm/ghash-x86.pl b/src/lib/libcrypto/modes/asm/ghash-x86.pl
index 83c727e07f..27492597ad 100644
--- a/src/lib/libcrypto/modes/asm/ghash-x86.pl
+++ b/src/lib/libcrypto/modes/asm/ghash-x86.pl
@@ -86,7 +86,7 @@
86# where Tproc is time required for Karatsuba pre- and post-processing, 86# where Tproc is time required for Karatsuba pre- and post-processing,
87# is more realistic estimate. In this case it gives ... 1.91 cycles. 87# is more realistic estimate. In this case it gives ... 1.91 cycles.
88# Or in other words, depending on how well we can interleave reduction 88# Or in other words, depending on how well we can interleave reduction
89# and one of the two multiplications the performance should be betwen 89# and one of the two multiplications the performance should be between
90# 1.91 and 2.16. As already mentioned, this implementation processes 90# 1.91 and 2.16. As already mentioned, this implementation processes
91# one byte out of 8KB buffer in 2.10 cycles, while x86_64 counterpart 91# one byte out of 8KB buffer in 2.10 cycles, while x86_64 counterpart
92# - in 2.02. x86_64 performance is better, because larger register 92# - in 2.02. x86_64 performance is better, because larger register
@@ -700,7 +700,7 @@ sub mmx_loop() {
700 &pxor ($red[1],$red[1]); 700 &pxor ($red[1],$red[1]);
701 &pxor ($red[2],$red[2]); 701 &pxor ($red[2],$red[2]);
702 702
703 # Just like in "May" verson modulo-schedule for critical path in 703 # Just like in "May" version modulo-schedule for critical path in
704 # 'Z.hi ^= rem_8bit[Z.lo&0xff^((u8)H[nhi]<<4)]<<48'. Final 'pxor' 704 # 'Z.hi ^= rem_8bit[Z.lo&0xff^((u8)H[nhi]<<4)]<<48'. Final 'pxor'
705 # is scheduled so late that rem_8bit[] has to be shifted *right* 705 # is scheduled so late that rem_8bit[] has to be shifted *right*
706 # by 16, which is why last argument to pinsrw is 2, which 706 # by 16, which is why last argument to pinsrw is 2, which
@@ -1087,7 +1087,7 @@ my ($Xhi,$Xi) = @_;
1087 &movdqu (&QWP(0,$Xip),$Xi); 1087 &movdqu (&QWP(0,$Xip),$Xi);
1088&function_end("gcm_ghash_clmul"); 1088&function_end("gcm_ghash_clmul");
1089 1089
1090} else { # Algorith 5. Kept for reference purposes. 1090} else { # Algorithm 5. Kept for reference purposes.
1091 1091
1092sub reduction_alg5 { # 19/16 times faster than Intel version 1092sub reduction_alg5 { # 19/16 times faster than Intel version
1093my ($Xhi,$Xi)=@_; 1093my ($Xhi,$Xi)=@_;