diff options
Diffstat (limited to 'src/lib/libcrypto/sha/asm/sha1-s390x.pl')
-rw-r--r-- | src/lib/libcrypto/sha/asm/sha1-s390x.pl | 246 |
1 files changed, 0 insertions, 246 deletions
diff --git a/src/lib/libcrypto/sha/asm/sha1-s390x.pl b/src/lib/libcrypto/sha/asm/sha1-s390x.pl deleted file mode 100644 index 9193dda45e..0000000000 --- a/src/lib/libcrypto/sha/asm/sha1-s390x.pl +++ /dev/null | |||
@@ -1,246 +0,0 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | |||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
5 | # project. The module is, however, dual licensed under OpenSSL and | ||
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
8 | # ==================================================================== | ||
9 | |||
10 | # SHA1 block procedure for s390x. | ||
11 | |||
12 | # April 2007. | ||
13 | # | ||
14 | # Performance is >30% better than gcc 3.3 generated code. But the real | ||
15 | # twist is that SHA1 hardware support is detected and utilized. In | ||
16 | # which case performance can reach further >4.5x for larger chunks. | ||
17 | |||
18 | # January 2009. | ||
19 | # | ||
20 | # Optimize Xupdate for amount of memory references and reschedule | ||
21 | # instructions to favour dual-issue z10 pipeline. On z10 hardware is | ||
22 | # "only" ~2.3x faster than software. | ||
23 | |||
24 | # November 2010. | ||
25 | # | ||
26 | # Adapt for -m31 build. If kernel supports what's called "highgprs" | ||
27 | # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit | ||
28 | # instructions and achieve "64-bit" performance even in 31-bit legacy | ||
29 | # application context. The feature is not specific to any particular | ||
30 | # processor, as long as it's "z-CPU". Latter implies that the code | ||
31 | # remains z/Architecture specific. | ||
32 | |||
33 | $kimdfunc=1; # magic function code for kimd instruction | ||
34 | |||
35 | $flavour = shift; | ||
36 | |||
37 | if ($flavour =~ /3[12]/) { | ||
38 | $SIZE_T=4; | ||
39 | $g=""; | ||
40 | } else { | ||
41 | $SIZE_T=8; | ||
42 | $g="g"; | ||
43 | } | ||
44 | |||
45 | while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} | ||
46 | open STDOUT,">$output"; | ||
47 | |||
48 | $K_00_39="%r0"; $K=$K_00_39; | ||
49 | $K_40_79="%r1"; | ||
50 | $ctx="%r2"; $prefetch="%r2"; | ||
51 | $inp="%r3"; | ||
52 | $len="%r4"; | ||
53 | |||
54 | $A="%r5"; | ||
55 | $B="%r6"; | ||
56 | $C="%r7"; | ||
57 | $D="%r8"; | ||
58 | $E="%r9"; @V=($A,$B,$C,$D,$E); | ||
59 | $t0="%r10"; | ||
60 | $t1="%r11"; | ||
61 | @X=("%r12","%r13","%r14"); | ||
62 | $sp="%r15"; | ||
63 | |||
64 | $stdframe=16*$SIZE_T+4*8; | ||
65 | $frame=$stdframe+16*4; | ||
66 | |||
67 | sub Xupdate { | ||
68 | my $i=shift; | ||
69 | |||
70 | $code.=<<___ if ($i==15); | ||
71 | lg $prefetch,$stdframe($sp) ### Xupdate(16) warm-up | ||
72 | lr $X[0],$X[2] | ||
73 | ___ | ||
74 | return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle | ||
75 | $code.=<<___ if ($i<16); | ||
76 | lg $X[0],`$i*4`($inp) ### Xload($i) | ||
77 | rllg $X[1],$X[0],32 | ||
78 | ___ | ||
79 | $code.=<<___ if ($i>=16); | ||
80 | xgr $X[0],$prefetch ### Xupdate($i) | ||
81 | lg $prefetch,`$stdframe+4*(($i+2)%16)`($sp) | ||
82 | xg $X[0],`$stdframe+4*(($i+8)%16)`($sp) | ||
83 | xgr $X[0],$prefetch | ||
84 | rll $X[0],$X[0],1 | ||
85 | rllg $X[1],$X[0],32 | ||
86 | rll $X[1],$X[1],1 | ||
87 | rllg $X[0],$X[1],32 | ||
88 | lr $X[2],$X[1] # feedback | ||
89 | ___ | ||
90 | $code.=<<___ if ($i<=70); | ||
91 | stg $X[0],`$stdframe+4*($i%16)`($sp) | ||
92 | ___ | ||
93 | unshift(@X,pop(@X)); | ||
94 | } | ||
95 | |||
96 | sub BODY_00_19 { | ||
97 | my ($i,$a,$b,$c,$d,$e)=@_; | ||
98 | my $xi=$X[1]; | ||
99 | |||
100 | &Xupdate($i); | ||
101 | $code.=<<___; | ||
102 | alr $e,$K ### $i | ||
103 | rll $t1,$a,5 | ||
104 | lr $t0,$d | ||
105 | xr $t0,$c | ||
106 | alr $e,$t1 | ||
107 | nr $t0,$b | ||
108 | alr $e,$xi | ||
109 | xr $t0,$d | ||
110 | rll $b,$b,30 | ||
111 | alr $e,$t0 | ||
112 | ___ | ||
113 | } | ||
114 | |||
115 | sub BODY_20_39 { | ||
116 | my ($i,$a,$b,$c,$d,$e)=@_; | ||
117 | my $xi=$X[1]; | ||
118 | |||
119 | &Xupdate($i); | ||
120 | $code.=<<___; | ||
121 | alr $e,$K ### $i | ||
122 | rll $t1,$a,5 | ||
123 | lr $t0,$b | ||
124 | alr $e,$t1 | ||
125 | xr $t0,$c | ||
126 | alr $e,$xi | ||
127 | xr $t0,$d | ||
128 | rll $b,$b,30 | ||
129 | alr $e,$t0 | ||
130 | ___ | ||
131 | } | ||
132 | |||
133 | sub BODY_40_59 { | ||
134 | my ($i,$a,$b,$c,$d,$e)=@_; | ||
135 | my $xi=$X[1]; | ||
136 | |||
137 | &Xupdate($i); | ||
138 | $code.=<<___; | ||
139 | alr $e,$K ### $i | ||
140 | rll $t1,$a,5 | ||
141 | lr $t0,$b | ||
142 | alr $e,$t1 | ||
143 | or $t0,$c | ||
144 | lr $t1,$b | ||
145 | nr $t0,$d | ||
146 | nr $t1,$c | ||
147 | alr $e,$xi | ||
148 | or $t0,$t1 | ||
149 | rll $b,$b,30 | ||
150 | alr $e,$t0 | ||
151 | ___ | ||
152 | } | ||
153 | |||
154 | $code.=<<___; | ||
155 | .text | ||
156 | .align 64 | ||
157 | .type Ktable,\@object | ||
158 | Ktable: .long 0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6 | ||
159 | .skip 48 #.long 0,0,0,0,0,0,0,0,0,0,0,0 | ||
160 | .size Ktable,.-Ktable | ||
161 | .globl sha1_block_data_order | ||
162 | .type sha1_block_data_order,\@function | ||
163 | sha1_block_data_order: | ||
164 | ___ | ||
165 | $code.=<<___ if ($kimdfunc); | ||
166 | larl %r1,OPENSSL_s390xcap_P | ||
167 | lg %r0,0(%r1) | ||
168 | tmhl %r0,0x4000 # check for message-security assist | ||
169 | jz .Lsoftware | ||
170 | lghi %r0,0 | ||
171 | la %r1,`2*$SIZE_T`($sp) | ||
172 | .long 0xb93e0002 # kimd %r0,%r2 | ||
173 | lg %r0,`2*$SIZE_T`($sp) | ||
174 | tmhh %r0,`0x8000>>$kimdfunc` | ||
175 | jz .Lsoftware | ||
176 | lghi %r0,$kimdfunc | ||
177 | lgr %r1,$ctx | ||
178 | lgr %r2,$inp | ||
179 | sllg %r3,$len,6 | ||
180 | .long 0xb93e0002 # kimd %r0,%r2 | ||
181 | brc 1,.-4 # pay attention to "partial completion" | ||
182 | br %r14 | ||
183 | .align 16 | ||
184 | .Lsoftware: | ||
185 | ___ | ||
186 | $code.=<<___; | ||
187 | lghi %r1,-$frame | ||
188 | st${g} $ctx,`2*$SIZE_T`($sp) | ||
189 | stm${g} %r6,%r15,`6*$SIZE_T`($sp) | ||
190 | lgr %r0,$sp | ||
191 | la $sp,0(%r1,$sp) | ||
192 | st${g} %r0,0($sp) | ||
193 | |||
194 | larl $t0,Ktable | ||
195 | llgf $A,0($ctx) | ||
196 | llgf $B,4($ctx) | ||
197 | llgf $C,8($ctx) | ||
198 | llgf $D,12($ctx) | ||
199 | llgf $E,16($ctx) | ||
200 | |||
201 | lg $K_00_39,0($t0) | ||
202 | lg $K_40_79,8($t0) | ||
203 | |||
204 | .Lloop: | ||
205 | rllg $K_00_39,$K_00_39,32 | ||
206 | ___ | ||
207 | for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } | ||
208 | $code.=<<___; | ||
209 | rllg $K_00_39,$K_00_39,32 | ||
210 | ___ | ||
211 | for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } | ||
212 | $code.=<<___; $K=$K_40_79; | ||
213 | rllg $K_40_79,$K_40_79,32 | ||
214 | ___ | ||
215 | for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } | ||
216 | $code.=<<___; | ||
217 | rllg $K_40_79,$K_40_79,32 | ||
218 | ___ | ||
219 | for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } | ||
220 | $code.=<<___; | ||
221 | |||
222 | l${g} $ctx,`$frame+2*$SIZE_T`($sp) | ||
223 | la $inp,64($inp) | ||
224 | al $A,0($ctx) | ||
225 | al $B,4($ctx) | ||
226 | al $C,8($ctx) | ||
227 | al $D,12($ctx) | ||
228 | al $E,16($ctx) | ||
229 | st $A,0($ctx) | ||
230 | st $B,4($ctx) | ||
231 | st $C,8($ctx) | ||
232 | st $D,12($ctx) | ||
233 | st $E,16($ctx) | ||
234 | brct${g} $len,.Lloop | ||
235 | |||
236 | lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp) | ||
237 | br %r14 | ||
238 | .size sha1_block_data_order,.-sha1_block_data_order | ||
239 | .string "SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>" | ||
240 | .comm OPENSSL_s390xcap_P,16,8 | ||
241 | ___ | ||
242 | |||
243 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | ||
244 | |||
245 | print $code; | ||
246 | close STDOUT; | ||