summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/sha/asm/sha256-armv4.pl
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib/libcrypto/sha/asm/sha256-armv4.pl55
1 files changed, 40 insertions, 15 deletions
diff --git a/src/lib/libcrypto/sha/asm/sha256-armv4.pl b/src/lib/libcrypto/sha/asm/sha256-armv4.pl
index 492cb62bc0..9c84e8d93c 100644
--- a/src/lib/libcrypto/sha/asm/sha256-armv4.pl
+++ b/src/lib/libcrypto/sha/asm/sha256-armv4.pl
@@ -18,11 +18,16 @@
18# Rescheduling for dual-issue pipeline resulted in 22% improvement on 18# Rescheduling for dual-issue pipeline resulted in 22% improvement on
19# Cortex A8 core and ~20 cycles per processed byte. 19# Cortex A8 core and ~20 cycles per processed byte.
20 20
21# February 2011.
22#
23# Profiler-assisted and platform-specific optimization resulted in 16%
24# improvement on Cortex A8 core and ~17 cycles per processed byte.
25
21while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} 26while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
22open STDOUT,">$output"; 27open STDOUT,">$output";
23 28
24$ctx="r0"; $t0="r0"; 29$ctx="r0"; $t0="r0";
25$inp="r1"; 30$inp="r1"; $t3="r1";
26$len="r2"; $t1="r2"; 31$len="r2"; $t1="r2";
27$T1="r3"; 32$T1="r3";
28$A="r4"; 33$A="r4";
@@ -46,6 +51,9 @@ sub BODY_00_15 {
46my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 51my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
47 52
48$code.=<<___ if ($i<16); 53$code.=<<___ if ($i<16);
54#if __ARM_ARCH__>=7
55 ldr $T1,[$inp],#4
56#else
49 ldrb $T1,[$inp,#3] @ $i 57 ldrb $T1,[$inp,#3] @ $i
50 ldrb $t2,[$inp,#2] 58 ldrb $t2,[$inp,#2]
51 ldrb $t1,[$inp,#1] 59 ldrb $t1,[$inp,#1]
@@ -53,16 +61,24 @@ $code.=<<___ if ($i<16);
53 orr $T1,$T1,$t2,lsl#8 61 orr $T1,$T1,$t2,lsl#8
54 orr $T1,$T1,$t1,lsl#16 62 orr $T1,$T1,$t1,lsl#16
55 orr $T1,$T1,$t0,lsl#24 63 orr $T1,$T1,$t0,lsl#24
56 `"str $inp,[sp,#17*4]" if ($i==15)` 64#endif
57___ 65___
58$code.=<<___; 66$code.=<<___;
59 ldr $t2,[$Ktbl],#4 @ *K256++
60 mov $t0,$e,ror#$Sigma1[0] 67 mov $t0,$e,ror#$Sigma1[0]
61 str $T1,[sp,#`$i%16`*4] 68 ldr $t2,[$Ktbl],#4 @ *K256++
62 eor $t0,$t0,$e,ror#$Sigma1[1] 69 eor $t0,$t0,$e,ror#$Sigma1[1]
63 eor $t1,$f,$g 70 eor $t1,$f,$g
71#if $i>=16
72 add $T1,$T1,$t3 @ from BODY_16_xx
73#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
74 rev $T1,$T1
75#endif
76#if $i==15
77 str $inp,[sp,#17*4] @ leave room for $t3
78#endif
64 eor $t0,$t0,$e,ror#$Sigma1[2] @ Sigma1(e) 79 eor $t0,$t0,$e,ror#$Sigma1[2] @ Sigma1(e)
65 and $t1,$t1,$e 80 and $t1,$t1,$e
81 str $T1,[sp,#`$i%16`*4]
66 add $T1,$T1,$t0 82 add $T1,$T1,$t0
67 eor $t1,$t1,$g @ Ch(e,f,g) 83 eor $t1,$t1,$g @ Ch(e,f,g)
68 add $T1,$T1,$h 84 add $T1,$T1,$h
@@ -71,6 +87,9 @@ $code.=<<___;
71 eor $h,$h,$a,ror#$Sigma0[1] 87 eor $h,$h,$a,ror#$Sigma0[1]
72 add $T1,$T1,$t2 88 add $T1,$T1,$t2
73 eor $h,$h,$a,ror#$Sigma0[2] @ Sigma0(a) 89 eor $h,$h,$a,ror#$Sigma0[2] @ Sigma0(a)
90#if $i>=15
91 ldr $t3,[sp,#`($i+2)%16`*4] @ from BODY_16_xx
92#endif
74 orr $t0,$a,$b 93 orr $t0,$a,$b
75 and $t1,$a,$b 94 and $t1,$a,$b
76 and $t0,$t0,$c 95 and $t0,$t0,$c
@@ -85,24 +104,26 @@ sub BODY_16_XX {
85my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 104my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
86 105
87$code.=<<___; 106$code.=<<___;
88 ldr $t1,[sp,#`($i+1)%16`*4] @ $i 107 @ ldr $t3,[sp,#`($i+1)%16`*4] @ $i
89 ldr $t2,[sp,#`($i+14)%16`*4] 108 ldr $t2,[sp,#`($i+14)%16`*4]
109 mov $t0,$t3,ror#$sigma0[0]
90 ldr $T1,[sp,#`($i+0)%16`*4] 110 ldr $T1,[sp,#`($i+0)%16`*4]
91 mov $t0,$t1,ror#$sigma0[0] 111 eor $t0,$t0,$t3,ror#$sigma0[1]
92 ldr $inp,[sp,#`($i+9)%16`*4] 112 ldr $t1,[sp,#`($i+9)%16`*4]
93 eor $t0,$t0,$t1,ror#$sigma0[1] 113 eor $t0,$t0,$t3,lsr#$sigma0[2] @ sigma0(X[i+1])
94 eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) 114 mov $t3,$t2,ror#$sigma1[0]
95 mov $t1,$t2,ror#$sigma1[0]
96 add $T1,$T1,$t0 115 add $T1,$T1,$t0
97 eor $t1,$t1,$t2,ror#$sigma1[1] 116 eor $t3,$t3,$t2,ror#$sigma1[1]
98 add $T1,$T1,$inp
99 eor $t1,$t1,$t2,lsr#$sigma1[2] @ sigma1(X[i+14])
100 add $T1,$T1,$t1 117 add $T1,$T1,$t1
118 eor $t3,$t3,$t2,lsr#$sigma1[2] @ sigma1(X[i+14])
119 @ add $T1,$T1,$t3
101___ 120___
102 &BODY_00_15(@_); 121 &BODY_00_15(@_);
103} 122}
104 123
105$code=<<___; 124$code=<<___;
125#include "arm_arch.h"
126
106.text 127.text
107.code 32 128.code 32
108 129
@@ -132,7 +153,7 @@ K256:
132sha256_block_data_order: 153sha256_block_data_order:
133 sub r3,pc,#8 @ sha256_block_data_order 154 sub r3,pc,#8 @ sha256_block_data_order
134 add $len,$inp,$len,lsl#6 @ len to point at the end of inp 155 add $len,$inp,$len,lsl#6 @ len to point at the end of inp
135 stmdb sp!,{$ctx,$inp,$len,r4-r12,lr} 156 stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
136 ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} 157 ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
137 sub $Ktbl,r3,#256 @ K256 158 sub $Ktbl,r3,#256 @ K256
138 sub sp,sp,#16*4 @ alloca(X[16]) 159 sub sp,sp,#16*4 @ alloca(X[16])
@@ -171,10 +192,14 @@ $code.=<<___;
171 bne .Loop 192 bne .Loop
172 193
173 add sp,sp,#`16+3`*4 @ destroy frame 194 add sp,sp,#`16+3`*4 @ destroy frame
174 ldmia sp!,{r4-r12,lr} 195#if __ARM_ARCH__>=5
196 ldmia sp!,{r4-r11,pc}
197#else
198 ldmia sp!,{r4-r11,lr}
175 tst lr,#1 199 tst lr,#1
176 moveq pc,lr @ be binary compatible with V4, yet 200 moveq pc,lr @ be binary compatible with V4, yet
177 bx lr @ interoperable with Thumb ISA:-) 201 bx lr @ interoperable with Thumb ISA:-)
202#endif
178.size sha256_block_data_order,.-sha256_block_data_order 203.size sha256_block_data_order,.-sha256_block_data_order
179.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" 204.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
180.align 2 205.align 2