summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/sha/asm/sha512-armv4.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/sha/asm/sha512-armv4.pl')
-rw-r--r--src/lib/libcrypto/sha/asm/sha512-armv4.pl32
1 files changed, 18 insertions, 14 deletions
diff --git a/src/lib/libcrypto/sha/asm/sha512-armv4.pl b/src/lib/libcrypto/sha/asm/sha512-armv4.pl
index 4fbb94a914..3a35861ac6 100644
--- a/src/lib/libcrypto/sha/asm/sha512-armv4.pl
+++ b/src/lib/libcrypto/sha/asm/sha512-armv4.pl
@@ -10,7 +10,13 @@
10# SHA512 block procedure for ARMv4. September 2007. 10# SHA512 block procedure for ARMv4. September 2007.
11 11
12# This code is ~4.5 (four and a half) times faster than code generated 12# This code is ~4.5 (four and a half) times faster than code generated
13# by gcc 3.4 and it spends ~72 clock cycles per byte. 13# by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
14# Xscale PXA250 core].
15#
16# July 2010.
17#
18# Rescheduling for dual-issue pipeline resulted in 6% improvement on
19# Cortex A8 core and ~40 cycles per processed byte.
14 20
15# Byte order [in]dependence. ========================================= 21# Byte order [in]dependence. =========================================
16# 22#
@@ -22,7 +28,7 @@ $hi=0;
22$lo=4; 28$lo=4;
23# ==================================================================== 29# ====================================================================
24 30
25$output=shift; 31while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
26open STDOUT,">$output"; 32open STDOUT,">$output";
27 33
28$ctx="r0"; 34$ctx="r0";
@@ -73,33 +79,31 @@ $code.=<<___;
73 eor $t0,$t0,$Elo,lsl#23 79 eor $t0,$t0,$Elo,lsl#23
74 eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e) 80 eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e)
75 adds $Tlo,$Tlo,$t0 81 adds $Tlo,$Tlo,$t0
76 adc $Thi,$Thi,$t1 @ T += Sigma1(e)
77 adds $Tlo,$Tlo,$t2
78 adc $Thi,$Thi,$t3 @ T += h
79
80 ldr $t0,[sp,#$Foff+0] @ f.lo 82 ldr $t0,[sp,#$Foff+0] @ f.lo
83 adc $Thi,$Thi,$t1 @ T += Sigma1(e)
81 ldr $t1,[sp,#$Foff+4] @ f.hi 84 ldr $t1,[sp,#$Foff+4] @ f.hi
85 adds $Tlo,$Tlo,$t2
82 ldr $t2,[sp,#$Goff+0] @ g.lo 86 ldr $t2,[sp,#$Goff+0] @ g.lo
87 adc $Thi,$Thi,$t3 @ T += h
83 ldr $t3,[sp,#$Goff+4] @ g.hi 88 ldr $t3,[sp,#$Goff+4] @ g.hi
84 str $Elo,[sp,#$Eoff+0]
85 str $Ehi,[sp,#$Eoff+4]
86 str $Alo,[sp,#$Aoff+0]
87 str $Ahi,[sp,#$Aoff+4]
88 89
89 eor $t0,$t0,$t2 90 eor $t0,$t0,$t2
91 str $Elo,[sp,#$Eoff+0]
90 eor $t1,$t1,$t3 92 eor $t1,$t1,$t3
93 str $Ehi,[sp,#$Eoff+4]
91 and $t0,$t0,$Elo 94 and $t0,$t0,$Elo
95 str $Alo,[sp,#$Aoff+0]
92 and $t1,$t1,$Ehi 96 and $t1,$t1,$Ehi
97 str $Ahi,[sp,#$Aoff+4]
93 eor $t0,$t0,$t2 98 eor $t0,$t0,$t2
94 eor $t1,$t1,$t3 @ Ch(e,f,g)
95
96 ldr $t2,[$Ktbl,#4] @ K[i].lo 99 ldr $t2,[$Ktbl,#4] @ K[i].lo
100 eor $t1,$t1,$t3 @ Ch(e,f,g)
97 ldr $t3,[$Ktbl,#0] @ K[i].hi 101 ldr $t3,[$Ktbl,#0] @ K[i].hi
98 ldr $Elo,[sp,#$Doff+0] @ d.lo
99 ldr $Ehi,[sp,#$Doff+4] @ d.hi
100 102
101 adds $Tlo,$Tlo,$t0 103 adds $Tlo,$Tlo,$t0
104 ldr $Elo,[sp,#$Doff+0] @ d.lo
102 adc $Thi,$Thi,$t1 @ T += Ch(e,f,g) 105 adc $Thi,$Thi,$t1 @ T += Ch(e,f,g)
106 ldr $Ehi,[sp,#$Doff+4] @ d.hi
103 adds $Tlo,$Tlo,$t2 107 adds $Tlo,$Tlo,$t2
104 adc $Thi,$Thi,$t3 @ T += K[i] 108 adc $Thi,$Thi,$t3 @ T += K[i]
105 adds $Elo,$Elo,$Tlo 109 adds $Elo,$Elo,$Tlo