diff options
Diffstat (limited to 'src/lib/libcrypto/sha/asm/sha1-s390x.pl')
-rw-r--r-- | src/lib/libcrypto/sha/asm/sha1-s390x.pl | 226 |
1 files changed, 0 insertions, 226 deletions
diff --git a/src/lib/libcrypto/sha/asm/sha1-s390x.pl b/src/lib/libcrypto/sha/asm/sha1-s390x.pl deleted file mode 100644 index 4b17848287..0000000000 --- a/src/lib/libcrypto/sha/asm/sha1-s390x.pl +++ /dev/null | |||
@@ -1,226 +0,0 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | |||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
5 | # project. The module is, however, dual licensed under OpenSSL and | ||
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
8 | # ==================================================================== | ||
9 | |||
10 | # SHA1 block procedure for s390x. | ||
11 | |||
12 | # April 2007. | ||
13 | # | ||
14 | # Performance is >30% better than gcc 3.3 generated code. But the real | ||
15 | # twist is that SHA1 hardware support is detected and utilized. In | ||
16 | # which case performance can reach further >4.5x for larger chunks. | ||
17 | |||
18 | # January 2009. | ||
19 | # | ||
20 | # Optimize Xupdate for amount of memory references and reschedule | ||
21 | # instructions to favour dual-issue z10 pipeline. On z10 hardware is | ||
22 | # "only" ~2.3x faster than software. | ||
23 | |||
24 | $kimdfunc=1; # magic function code for kimd instruction | ||
25 | |||
26 | $output=shift; | ||
27 | open STDOUT,">$output"; | ||
28 | |||
29 | $K_00_39="%r0"; $K=$K_00_39; | ||
30 | $K_40_79="%r1"; | ||
31 | $ctx="%r2"; $prefetch="%r2"; | ||
32 | $inp="%r3"; | ||
33 | $len="%r4"; | ||
34 | |||
35 | $A="%r5"; | ||
36 | $B="%r6"; | ||
37 | $C="%r7"; | ||
38 | $D="%r8"; | ||
39 | $E="%r9"; @V=($A,$B,$C,$D,$E); | ||
40 | $t0="%r10"; | ||
41 | $t1="%r11"; | ||
42 | @X=("%r12","%r13","%r14"); | ||
43 | $sp="%r15"; | ||
44 | |||
45 | $frame=160+16*4; | ||
46 | |||
47 | sub Xupdate { | ||
48 | my $i=shift; | ||
49 | |||
50 | $code.=<<___ if ($i==15); | ||
51 | lg $prefetch,160($sp) ### Xupdate(16) warm-up | ||
52 | lr $X[0],$X[2] | ||
53 | ___ | ||
54 | return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle | ||
55 | $code.=<<___ if ($i<16); | ||
56 | lg $X[0],`$i*4`($inp) ### Xload($i) | ||
57 | rllg $X[1],$X[0],32 | ||
58 | ___ | ||
59 | $code.=<<___ if ($i>=16); | ||
60 | xgr $X[0],$prefetch ### Xupdate($i) | ||
61 | lg $prefetch,`160+4*(($i+2)%16)`($sp) | ||
62 | xg $X[0],`160+4*(($i+8)%16)`($sp) | ||
63 | xgr $X[0],$prefetch | ||
64 | rll $X[0],$X[0],1 | ||
65 | rllg $X[1],$X[0],32 | ||
66 | rll $X[1],$X[1],1 | ||
67 | rllg $X[0],$X[1],32 | ||
68 | lr $X[2],$X[1] # feedback | ||
69 | ___ | ||
70 | $code.=<<___ if ($i<=70); | ||
71 | stg $X[0],`160+4*($i%16)`($sp) | ||
72 | ___ | ||
73 | unshift(@X,pop(@X)); | ||
74 | } | ||
75 | |||
76 | sub BODY_00_19 { | ||
77 | my ($i,$a,$b,$c,$d,$e)=@_; | ||
78 | my $xi=$X[1]; | ||
79 | |||
80 | &Xupdate($i); | ||
81 | $code.=<<___; | ||
82 | alr $e,$K ### $i | ||
83 | rll $t1,$a,5 | ||
84 | lr $t0,$d | ||
85 | xr $t0,$c | ||
86 | alr $e,$t1 | ||
87 | nr $t0,$b | ||
88 | alr $e,$xi | ||
89 | xr $t0,$d | ||
90 | rll $b,$b,30 | ||
91 | alr $e,$t0 | ||
92 | ___ | ||
93 | } | ||
94 | |||
95 | sub BODY_20_39 { | ||
96 | my ($i,$a,$b,$c,$d,$e)=@_; | ||
97 | my $xi=$X[1]; | ||
98 | |||
99 | &Xupdate($i); | ||
100 | $code.=<<___; | ||
101 | alr $e,$K ### $i | ||
102 | rll $t1,$a,5 | ||
103 | lr $t0,$b | ||
104 | alr $e,$t1 | ||
105 | xr $t0,$c | ||
106 | alr $e,$xi | ||
107 | xr $t0,$d | ||
108 | rll $b,$b,30 | ||
109 | alr $e,$t0 | ||
110 | ___ | ||
111 | } | ||
112 | |||
113 | sub BODY_40_59 { | ||
114 | my ($i,$a,$b,$c,$d,$e)=@_; | ||
115 | my $xi=$X[1]; | ||
116 | |||
117 | &Xupdate($i); | ||
118 | $code.=<<___; | ||
119 | alr $e,$K ### $i | ||
120 | rll $t1,$a,5 | ||
121 | lr $t0,$b | ||
122 | alr $e,$t1 | ||
123 | or $t0,$c | ||
124 | lr $t1,$b | ||
125 | nr $t0,$d | ||
126 | nr $t1,$c | ||
127 | alr $e,$xi | ||
128 | or $t0,$t1 | ||
129 | rll $b,$b,30 | ||
130 | alr $e,$t0 | ||
131 | ___ | ||
132 | } | ||
133 | |||
134 | $code.=<<___; | ||
135 | .text | ||
136 | .align 64 | ||
137 | .type Ktable,\@object | ||
138 | Ktable: .long 0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6 | ||
139 | .skip 48 #.long 0,0,0,0,0,0,0,0,0,0,0,0 | ||
140 | .size Ktable,.-Ktable | ||
141 | .globl sha1_block_data_order | ||
142 | .type sha1_block_data_order,\@function | ||
143 | sha1_block_data_order: | ||
144 | ___ | ||
145 | $code.=<<___ if ($kimdfunc); | ||
146 | larl %r1,OPENSSL_s390xcap_P | ||
147 | lg %r0,0(%r1) | ||
148 | tmhl %r0,0x4000 # check for message-security assist | ||
149 | jz .Lsoftware | ||
150 | lghi %r0,0 | ||
151 | la %r1,16($sp) | ||
152 | .long 0xb93e0002 # kimd %r0,%r2 | ||
153 | lg %r0,16($sp) | ||
154 | tmhh %r0,`0x8000>>$kimdfunc` | ||
155 | jz .Lsoftware | ||
156 | lghi %r0,$kimdfunc | ||
157 | lgr %r1,$ctx | ||
158 | lgr %r2,$inp | ||
159 | sllg %r3,$len,6 | ||
160 | .long 0xb93e0002 # kimd %r0,%r2 | ||
161 | brc 1,.-4 # pay attention to "partial completion" | ||
162 | br %r14 | ||
163 | .align 16 | ||
164 | .Lsoftware: | ||
165 | ___ | ||
166 | $code.=<<___; | ||
167 | lghi %r1,-$frame | ||
168 | stg $ctx,16($sp) | ||
169 | stmg %r6,%r15,48($sp) | ||
170 | lgr %r0,$sp | ||
171 | la $sp,0(%r1,$sp) | ||
172 | stg %r0,0($sp) | ||
173 | |||
174 | larl $t0,Ktable | ||
175 | llgf $A,0($ctx) | ||
176 | llgf $B,4($ctx) | ||
177 | llgf $C,8($ctx) | ||
178 | llgf $D,12($ctx) | ||
179 | llgf $E,16($ctx) | ||
180 | |||
181 | lg $K_00_39,0($t0) | ||
182 | lg $K_40_79,8($t0) | ||
183 | |||
184 | .Lloop: | ||
185 | rllg $K_00_39,$K_00_39,32 | ||
186 | ___ | ||
187 | for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } | ||
188 | $code.=<<___; | ||
189 | rllg $K_00_39,$K_00_39,32 | ||
190 | ___ | ||
191 | for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } | ||
192 | $code.=<<___; $K=$K_40_79; | ||
193 | rllg $K_40_79,$K_40_79,32 | ||
194 | ___ | ||
195 | for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } | ||
196 | $code.=<<___; | ||
197 | rllg $K_40_79,$K_40_79,32 | ||
198 | ___ | ||
199 | for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } | ||
200 | $code.=<<___; | ||
201 | |||
202 | lg $ctx,`$frame+16`($sp) | ||
203 | la $inp,64($inp) | ||
204 | al $A,0($ctx) | ||
205 | al $B,4($ctx) | ||
206 | al $C,8($ctx) | ||
207 | al $D,12($ctx) | ||
208 | al $E,16($ctx) | ||
209 | st $A,0($ctx) | ||
210 | st $B,4($ctx) | ||
211 | st $C,8($ctx) | ||
212 | st $D,12($ctx) | ||
213 | st $E,16($ctx) | ||
214 | brct $len,.Lloop | ||
215 | |||
216 | lmg %r6,%r15,`$frame+48`($sp) | ||
217 | br %r14 | ||
218 | .size sha1_block_data_order,.-sha1_block_data_order | ||
219 | .string "SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>" | ||
220 | .comm OPENSSL_s390xcap_P,8,8 | ||
221 | ___ | ||
222 | |||
223 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | ||
224 | |||
225 | print $code; | ||
226 | close STDOUT; | ||