diff options
author | jsing <> | 2016-09-04 14:31:29 +0000 |
---|---|---|
committer | jsing <> | 2016-09-04 14:31:29 +0000 |
commit | e38c58272a121e2bc9a785ec4001bbc802d68f66 (patch) | |
tree | 492fd2a4355d8592de425463d194374bdc85aa0a /src/lib/libcrypto/modes/asm | |
parent | a9cbed3be03a99c87e2b07b16b511e65a90bf800 (diff) | |
download | openbsd-e38c58272a121e2bc9a785ec4001bbc802d68f66.tar.gz openbsd-e38c58272a121e2bc9a785ec4001bbc802d68f66.tar.bz2 openbsd-e38c58272a121e2bc9a785ec4001bbc802d68f66.zip |
Less S390.
ok deraadt@
Diffstat (limited to 'src/lib/libcrypto/modes/asm')
-rw-r--r-- | src/lib/libcrypto/modes/asm/ghash-s390x.pl | 262 |
1 files changed, 0 insertions, 262 deletions
diff --git a/src/lib/libcrypto/modes/asm/ghash-s390x.pl b/src/lib/libcrypto/modes/asm/ghash-s390x.pl deleted file mode 100644 index 6a40d5d89c..0000000000 --- a/src/lib/libcrypto/modes/asm/ghash-s390x.pl +++ /dev/null | |||
@@ -1,262 +0,0 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | |||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | ||
5 | # project. The module is, however, dual licensed under OpenSSL and | ||
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
8 | # ==================================================================== | ||
9 | |||
10 | # September 2010. | ||
11 | # | ||
12 | # The module implements "4-bit" GCM GHASH function and underlying | ||
13 | # single multiplication operation in GF(2^128). "4-bit" means that it | ||
14 | # uses 256 bytes per-key table [+128 bytes shared table]. Performance | ||
15 | # was measured to be ~18 cycles per processed byte on z10, which is | ||
16 | # almost 40% better than gcc-generated code. It should be noted that | ||
17 | # 18 cycles is worse result than expected: loop is scheduled for 12 | ||
18 | # and the result should be close to 12. In the lack of instruction- | ||
19 | # level profiling data it's impossible to tell why... | ||
20 | |||
21 | # November 2010. | ||
22 | # | ||
23 | # Adapt for -m31 build. If kernel supports what's called "highgprs" | ||
24 | # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit | ||
25 | # instructions and achieve "64-bit" performance even in 31-bit legacy | ||
26 | # application context. The feature is not specific to any particular | ||
27 | # processor, as long as it's "z-CPU". Latter implies that the code | ||
28 | # remains z/Architecture specific. On z990 it was measured to perform | ||
29 | # 2.8x better than 32-bit code generated by gcc 4.3. | ||
30 | |||
31 | # March 2011. | ||
32 | # | ||
33 | # Support for hardware KIMD-GHASH is verified to produce correct | ||
34 | # result and therefore is engaged. On z196 it was measured to process | ||
35 | # 8KB buffer ~7 faster than software implementation. It's not as | ||
36 | # impressive for smaller buffer sizes and for smallest 16-bytes buffer | ||
37 | # it's actually almost 2 times slower. Which is the reason why | ||
38 | # KIMD-GHASH is not used in gcm_gmult_4bit. | ||
39 | |||
40 | $flavour = shift; | ||
41 | |||
42 | if ($flavour =~ /3[12]/) { | ||
43 | $SIZE_T=4; | ||
44 | $g=""; | ||
45 | } else { | ||
46 | $SIZE_T=8; | ||
47 | $g="g"; | ||
48 | } | ||
49 | |||
50 | while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} | ||
51 | open STDOUT,">$output"; | ||
52 | |||
53 | $softonly=0; | ||
54 | |||
55 | $Zhi="%r0"; | ||
56 | $Zlo="%r1"; | ||
57 | |||
58 | $Xi="%r2"; # argument block | ||
59 | $Htbl="%r3"; | ||
60 | $inp="%r4"; | ||
61 | $len="%r5"; | ||
62 | |||
63 | $rem0="%r6"; # variables | ||
64 | $rem1="%r7"; | ||
65 | $nlo="%r8"; | ||
66 | $nhi="%r9"; | ||
67 | $xi="%r10"; | ||
68 | $cnt="%r11"; | ||
69 | $tmp="%r12"; | ||
70 | $x78="%r13"; | ||
71 | $rem_4bit="%r14"; | ||
72 | |||
73 | $sp="%r15"; | ||
74 | |||
75 | $code.=<<___; | ||
76 | .text | ||
77 | |||
78 | .globl gcm_gmult_4bit | ||
79 | .align 32 | ||
80 | gcm_gmult_4bit: | ||
81 | ___ | ||
82 | $code.=<<___ if(!$softonly && 0); # hardware is slow for single block... | ||
83 | larl %r1,OPENSSL_s390xcap_P | ||
84 | lg %r0,0(%r1) | ||
85 | tmhl %r0,0x4000 # check for message-security-assist | ||
86 | jz .Lsoft_gmult | ||
87 | lghi %r0,0 | ||
88 | la %r1,16($sp) | ||
89 | .long 0xb93e0004 # kimd %r0,%r4 | ||
90 | lg %r1,24($sp) | ||
91 | tmhh %r1,0x4000 # check for function 65 | ||
92 | jz .Lsoft_gmult | ||
93 | stg %r0,16($sp) # arrange 16 bytes of zero input | ||
94 | stg %r0,24($sp) | ||
95 | lghi %r0,65 # function 65 | ||
96 | la %r1,0($Xi) # H lies right after Xi in gcm128_context | ||
97 | la $inp,16($sp) | ||
98 | lghi $len,16 | ||
99 | .long 0xb93e0004 # kimd %r0,$inp | ||
100 | brc 1,.-4 # pay attention to "partial completion" | ||
101 | br %r14 | ||
102 | .align 32 | ||
103 | .Lsoft_gmult: | ||
104 | ___ | ||
105 | $code.=<<___; | ||
106 | stm${g} %r6,%r14,6*$SIZE_T($sp) | ||
107 | |||
108 | aghi $Xi,-1 | ||
109 | lghi $len,1 | ||
110 | lghi $x78,`0xf<<3` | ||
111 | larl $rem_4bit,rem_4bit | ||
112 | |||
113 | lg $Zlo,8+1($Xi) # Xi | ||
114 | j .Lgmult_shortcut | ||
115 | .type gcm_gmult_4bit,\@function | ||
116 | .size gcm_gmult_4bit,(.-gcm_gmult_4bit) | ||
117 | |||
118 | .globl gcm_ghash_4bit | ||
119 | .align 32 | ||
120 | gcm_ghash_4bit: | ||
121 | ___ | ||
122 | $code.=<<___ if(!$softonly); | ||
123 | larl %r1,OPENSSL_s390xcap_P | ||
124 | lg %r0,0(%r1) | ||
125 | tmhl %r0,0x4000 # check for message-security-assist | ||
126 | jz .Lsoft_ghash | ||
127 | lghi %r0,0 | ||
128 | la %r1,16($sp) | ||
129 | .long 0xb93e0004 # kimd %r0,%r4 | ||
130 | lg %r1,24($sp) | ||
131 | tmhh %r1,0x4000 # check for function 65 | ||
132 | jz .Lsoft_ghash | ||
133 | lghi %r0,65 # function 65 | ||
134 | la %r1,0($Xi) # H lies right after Xi in gcm128_context | ||
135 | .long 0xb93e0004 # kimd %r0,$inp | ||
136 | brc 1,.-4 # pay attention to "partial completion" | ||
137 | br %r14 | ||
138 | .align 32 | ||
139 | .Lsoft_ghash: | ||
140 | ___ | ||
141 | $code.=<<___ if ($flavour =~ /3[12]/); | ||
142 | llgfr $len,$len | ||
143 | ___ | ||
144 | $code.=<<___; | ||
145 | stm${g} %r6,%r14,6*$SIZE_T($sp) | ||
146 | |||
147 | aghi $Xi,-1 | ||
148 | srlg $len,$len,4 | ||
149 | lghi $x78,`0xf<<3` | ||
150 | larl $rem_4bit,rem_4bit | ||
151 | |||
152 | lg $Zlo,8+1($Xi) # Xi | ||
153 | lg $Zhi,0+1($Xi) | ||
154 | lghi $tmp,0 | ||
155 | .Louter: | ||
156 | xg $Zhi,0($inp) # Xi ^= inp | ||
157 | xg $Zlo,8($inp) | ||
158 | xgr $Zhi,$tmp | ||
159 | stg $Zlo,8+1($Xi) | ||
160 | stg $Zhi,0+1($Xi) | ||
161 | |||
162 | .Lgmult_shortcut: | ||
163 | lghi $tmp,0xf0 | ||
164 | sllg $nlo,$Zlo,4 | ||
165 | srlg $xi,$Zlo,8 # extract second byte | ||
166 | ngr $nlo,$tmp | ||
167 | lgr $nhi,$Zlo | ||
168 | lghi $cnt,14 | ||
169 | ngr $nhi,$tmp | ||
170 | |||
171 | lg $Zlo,8($nlo,$Htbl) | ||
172 | lg $Zhi,0($nlo,$Htbl) | ||
173 | |||
174 | sllg $nlo,$xi,4 | ||
175 | sllg $rem0,$Zlo,3 | ||
176 | ngr $nlo,$tmp | ||
177 | ngr $rem0,$x78 | ||
178 | ngr $xi,$tmp | ||
179 | |||
180 | sllg $tmp,$Zhi,60 | ||
181 | srlg $Zlo,$Zlo,4 | ||
182 | srlg $Zhi,$Zhi,4 | ||
183 | xg $Zlo,8($nhi,$Htbl) | ||
184 | xg $Zhi,0($nhi,$Htbl) | ||
185 | lgr $nhi,$xi | ||
186 | sllg $rem1,$Zlo,3 | ||
187 | xgr $Zlo,$tmp | ||
188 | ngr $rem1,$x78 | ||
189 | j .Lghash_inner | ||
190 | .align 16 | ||
191 | .Lghash_inner: | ||
192 | srlg $Zlo,$Zlo,4 | ||
193 | sllg $tmp,$Zhi,60 | ||
194 | xg $Zlo,8($nlo,$Htbl) | ||
195 | srlg $Zhi,$Zhi,4 | ||
196 | llgc $xi,0($cnt,$Xi) | ||
197 | xg $Zhi,0($nlo,$Htbl) | ||
198 | sllg $nlo,$xi,4 | ||
199 | xg $Zhi,0($rem0,$rem_4bit) | ||
200 | nill $nlo,0xf0 | ||
201 | sllg $rem0,$Zlo,3 | ||
202 | xgr $Zlo,$tmp | ||
203 | ngr $rem0,$x78 | ||
204 | nill $xi,0xf0 | ||
205 | |||
206 | sllg $tmp,$Zhi,60 | ||
207 | srlg $Zlo,$Zlo,4 | ||
208 | srlg $Zhi,$Zhi,4 | ||
209 | xg $Zlo,8($nhi,$Htbl) | ||
210 | xg $Zhi,0($nhi,$Htbl) | ||
211 | lgr $nhi,$xi | ||
212 | xg $Zhi,0($rem1,$rem_4bit) | ||
213 | sllg $rem1,$Zlo,3 | ||
214 | xgr $Zlo,$tmp | ||
215 | ngr $rem1,$x78 | ||
216 | brct $cnt,.Lghash_inner | ||
217 | |||
218 | sllg $tmp,$Zhi,60 | ||
219 | srlg $Zlo,$Zlo,4 | ||
220 | srlg $Zhi,$Zhi,4 | ||
221 | xg $Zlo,8($nlo,$Htbl) | ||
222 | xg $Zhi,0($nlo,$Htbl) | ||
223 | sllg $xi,$Zlo,3 | ||
224 | xg $Zhi,0($rem0,$rem_4bit) | ||
225 | xgr $Zlo,$tmp | ||
226 | ngr $xi,$x78 | ||
227 | |||
228 | sllg $tmp,$Zhi,60 | ||
229 | srlg $Zlo,$Zlo,4 | ||
230 | srlg $Zhi,$Zhi,4 | ||
231 | xg $Zlo,8($nhi,$Htbl) | ||
232 | xg $Zhi,0($nhi,$Htbl) | ||
233 | xgr $Zlo,$tmp | ||
234 | xg $Zhi,0($rem1,$rem_4bit) | ||
235 | |||
236 | lg $tmp,0($xi,$rem_4bit) | ||
237 | la $inp,16($inp) | ||
238 | sllg $tmp,$tmp,4 # correct last rem_4bit[rem] | ||
239 | brctg $len,.Louter | ||
240 | |||
241 | xgr $Zhi,$tmp | ||
242 | stg $Zlo,8+1($Xi) | ||
243 | stg $Zhi,0+1($Xi) | ||
244 | lm${g} %r6,%r14,6*$SIZE_T($sp) | ||
245 | br %r14 | ||
246 | .type gcm_ghash_4bit,\@function | ||
247 | .size gcm_ghash_4bit,(.-gcm_ghash_4bit) | ||
248 | |||
249 | .align 64 | ||
250 | rem_4bit: | ||
251 | .long `0x0000<<12`,0,`0x1C20<<12`,0,`0x3840<<12`,0,`0x2460<<12`,0 | ||
252 | .long `0x7080<<12`,0,`0x6CA0<<12`,0,`0x48C0<<12`,0,`0x54E0<<12`,0 | ||
253 | .long `0xE100<<12`,0,`0xFD20<<12`,0,`0xD940<<12`,0,`0xC560<<12`,0 | ||
254 | .long `0x9180<<12`,0,`0x8DA0<<12`,0,`0xA9C0<<12`,0,`0xB5E0<<12`,0 | ||
255 | .type rem_4bit,\@object | ||
256 | .size rem_4bit,(.-rem_4bit) | ||
257 | .string "GHASH for s390x, CRYPTOGAMS by <appro\@openssl.org>" | ||
258 | ___ | ||
259 | |||
260 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | ||
261 | print $code; | ||
262 | close STDOUT; | ||