diff options
Diffstat (limited to '')
-rw-r--r-- | src/lib/libcrypto/sha/asm/sha512-s390x.pl | 63 |
1 files changed, 42 insertions, 21 deletions
diff --git a/src/lib/libcrypto/sha/asm/sha512-s390x.pl b/src/lib/libcrypto/sha/asm/sha512-s390x.pl index e7ef2d5a9f..079a3fc78a 100644 --- a/src/lib/libcrypto/sha/asm/sha512-s390x.pl +++ b/src/lib/libcrypto/sha/asm/sha512-s390x.pl | |||
@@ -26,6 +26,26 @@ | |||
26 | # favour dual-issue z10 pipeline. Hardware SHA256/512 is ~4.7x faster | 26 | # favour dual-issue z10 pipeline. Hardware SHA256/512 is ~4.7x faster |
27 | # than software. | 27 | # than software. |
28 | 28 | ||
29 | # November 2010. | ||
30 | # | ||
31 | # Adapt for -m31 build. If kernel supports what's called "highgprs" | ||
32 | # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit | ||
33 | # instructions and achieve "64-bit" performance even in 31-bit legacy | ||
34 | # application context. The feature is not specific to any particular | ||
35 | # processor, as long as it's "z-CPU". Latter implies that the code | ||
36 | # remains z/Architecture specific. On z900 SHA256 was measured to | ||
37 | # perform 2.4x and SHA512 - 13x better than code generated by gcc 4.3. | ||
38 | |||
39 | $flavour = shift; | ||
40 | |||
41 | if ($flavour =~ /3[12]/) { | ||
42 | $SIZE_T=4; | ||
43 | $g=""; | ||
44 | } else { | ||
45 | $SIZE_T=8; | ||
46 | $g="g"; | ||
47 | } | ||
48 | |||
29 | $t0="%r0"; | 49 | $t0="%r0"; |
30 | $t1="%r1"; | 50 | $t1="%r1"; |
31 | $ctx="%r2"; $t2="%r2"; | 51 | $ctx="%r2"; $t2="%r2"; |
@@ -44,7 +64,7 @@ $tbl="%r13"; | |||
44 | $T1="%r14"; | 64 | $T1="%r14"; |
45 | $sp="%r15"; | 65 | $sp="%r15"; |
46 | 66 | ||
47 | $output=shift; | 67 | while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} |
48 | open STDOUT,">$output"; | 68 | open STDOUT,">$output"; |
49 | 69 | ||
50 | if ($output =~ /512/) { | 70 | if ($output =~ /512/) { |
@@ -78,7 +98,8 @@ if ($output =~ /512/) { | |||
78 | } | 98 | } |
79 | $Func="sha${label}_block_data_order"; | 99 | $Func="sha${label}_block_data_order"; |
80 | $Table="K${label}"; | 100 | $Table="K${label}"; |
81 | $frame=160+16*$SZ; | 101 | $stdframe=16*$SIZE_T+4*8; |
102 | $frame=$stdframe+16*$SZ; | ||
82 | 103 | ||
83 | sub BODY_00_15 { | 104 | sub BODY_00_15 { |
84 | my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; | 105 | my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; |
@@ -93,9 +114,9 @@ $code.=<<___; | |||
93 | xgr $t0,$t1 | 114 | xgr $t0,$t1 |
94 | $ROT $t1,$t1,`$Sigma1[2]-$Sigma1[1]` | 115 | $ROT $t1,$t1,`$Sigma1[2]-$Sigma1[1]` |
95 | xgr $t2,$g | 116 | xgr $t2,$g |
96 | $ST $T1,`160+$SZ*($i%16)`($sp) | 117 | $ST $T1,`$stdframe+$SZ*($i%16)`($sp) |
97 | xgr $t0,$t1 # Sigma1(e) | 118 | xgr $t0,$t1 # Sigma1(e) |
98 | la $T1,0($T1,$h) # T1+=h | 119 | algr $T1,$h # T1+=h |
99 | ngr $t2,$e | 120 | ngr $t2,$e |
100 | lgr $t1,$a | 121 | lgr $t1,$a |
101 | algr $T1,$t0 # T1+=Sigma1(e) | 122 | algr $T1,$t0 # T1+=Sigma1(e) |
@@ -113,7 +134,7 @@ $code.=<<___; | |||
113 | ngr $t2,$b | 134 | ngr $t2,$b |
114 | algr $h,$T1 # h+=T1 | 135 | algr $h,$T1 # h+=T1 |
115 | ogr $t2,$t1 # Maj(a,b,c) | 136 | ogr $t2,$t1 # Maj(a,b,c) |
116 | la $d,0($d,$T1) # d+=T1 | 137 | algr $d,$T1 # d+=T1 |
117 | algr $h,$t2 # h+=Maj(a,b,c) | 138 | algr $h,$t2 # h+=Maj(a,b,c) |
118 | ___ | 139 | ___ |
119 | } | 140 | } |
@@ -122,19 +143,19 @@ sub BODY_16_XX { | |||
122 | my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; | 143 | my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; |
123 | 144 | ||
124 | $code.=<<___; | 145 | $code.=<<___; |
125 | $LD $T1,`160+$SZ*(($i+1)%16)`($sp) ### $i | 146 | $LD $T1,`$stdframe+$SZ*(($i+1)%16)`($sp) ### $i |
126 | $LD $t1,`160+$SZ*(($i+14)%16)`($sp) | 147 | $LD $t1,`$stdframe+$SZ*(($i+14)%16)`($sp) |
127 | $ROT $t0,$T1,$sigma0[0] | 148 | $ROT $t0,$T1,$sigma0[0] |
128 | $SHR $T1,$sigma0[2] | 149 | $SHR $T1,$sigma0[2] |
129 | $ROT $t2,$t0,`$sigma0[1]-$sigma0[0]` | 150 | $ROT $t2,$t0,`$sigma0[1]-$sigma0[0]` |
130 | xgr $T1,$t0 | 151 | xgr $T1,$t0 |
131 | $ROT $t0,$t1,$sigma1[0] | 152 | $ROT $t0,$t1,$sigma1[0] |
132 | xgr $T1,$t2 # sigma0(X[i+1]) | 153 | xgr $T1,$t2 # sigma0(X[i+1]) |
133 | $SHR $t1,$sigma1[2] | 154 | $SHR $t1,$sigma1[2] |
134 | $ADD $T1,`160+$SZ*($i%16)`($sp) # +=X[i] | 155 | $ADD $T1,`$stdframe+$SZ*($i%16)`($sp) # +=X[i] |
135 | xgr $t1,$t0 | 156 | xgr $t1,$t0 |
136 | $ROT $t0,$t0,`$sigma1[1]-$sigma1[0]` | 157 | $ROT $t0,$t0,`$sigma1[1]-$sigma1[0]` |
137 | $ADD $T1,`160+$SZ*(($i+9)%16)`($sp) # +=X[i+9] | 158 | $ADD $T1,`$stdframe+$SZ*(($i+9)%16)`($sp) # +=X[i+9] |
138 | xgr $t1,$t0 # sigma1(X[i+14]) | 159 | xgr $t1,$t0 # sigma1(X[i+14]) |
139 | algr $T1,$t1 # +=sigma1(X[i+14]) | 160 | algr $T1,$t1 # +=sigma1(X[i+14]) |
140 | ___ | 161 | ___ |
@@ -212,6 +233,7 @@ $code.=<<___; | |||
212 | .globl $Func | 233 | .globl $Func |
213 | .type $Func,\@function | 234 | .type $Func,\@function |
214 | $Func: | 235 | $Func: |
236 | sllg $len,$len,`log(16*$SZ)/log(2)` | ||
215 | ___ | 237 | ___ |
216 | $code.=<<___ if ($kimdfunc); | 238 | $code.=<<___ if ($kimdfunc); |
217 | larl %r1,OPENSSL_s390xcap_P | 239 | larl %r1,OPENSSL_s390xcap_P |
@@ -219,15 +241,15 @@ $code.=<<___ if ($kimdfunc); | |||
219 | tmhl %r0,0x4000 # check for message-security assist | 241 | tmhl %r0,0x4000 # check for message-security assist |
220 | jz .Lsoftware | 242 | jz .Lsoftware |
221 | lghi %r0,0 | 243 | lghi %r0,0 |
222 | la %r1,16($sp) | 244 | la %r1,`2*$SIZE_T`($sp) |
223 | .long 0xb93e0002 # kimd %r0,%r2 | 245 | .long 0xb93e0002 # kimd %r0,%r2 |
224 | lg %r0,16($sp) | 246 | lg %r0,`2*$SIZE_T`($sp) |
225 | tmhh %r0,`0x8000>>$kimdfunc` | 247 | tmhh %r0,`0x8000>>$kimdfunc` |
226 | jz .Lsoftware | 248 | jz .Lsoftware |
227 | lghi %r0,$kimdfunc | 249 | lghi %r0,$kimdfunc |
228 | lgr %r1,$ctx | 250 | lgr %r1,$ctx |
229 | lgr %r2,$inp | 251 | lgr %r2,$inp |
230 | sllg %r3,$len,`log(16*$SZ)/log(2)` | 252 | lgr %r3,$len |
231 | .long 0xb93e0002 # kimd %r0,%r2 | 253 | .long 0xb93e0002 # kimd %r0,%r2 |
232 | brc 1,.-4 # pay attention to "partial completion" | 254 | brc 1,.-4 # pay attention to "partial completion" |
233 | br %r14 | 255 | br %r14 |
@@ -235,13 +257,12 @@ $code.=<<___ if ($kimdfunc); | |||
235 | .Lsoftware: | 257 | .Lsoftware: |
236 | ___ | 258 | ___ |
237 | $code.=<<___; | 259 | $code.=<<___; |
238 | sllg $len,$len,`log(16*$SZ)/log(2)` | ||
239 | lghi %r1,-$frame | 260 | lghi %r1,-$frame |
240 | agr $len,$inp | 261 | la $len,0($len,$inp) |
241 | stmg $ctx,%r15,16($sp) | 262 | stm${g} $ctx,%r15,`2*$SIZE_T`($sp) |
242 | lgr %r0,$sp | 263 | lgr %r0,$sp |
243 | la $sp,0(%r1,$sp) | 264 | la $sp,0(%r1,$sp) |
244 | stg %r0,0($sp) | 265 | st${g} %r0,0($sp) |
245 | 266 | ||
246 | larl $tbl,$Table | 267 | larl $tbl,$Table |
247 | $LD $A,`0*$SZ`($ctx) | 268 | $LD $A,`0*$SZ`($ctx) |
@@ -265,7 +286,7 @@ $code.=<<___; | |||
265 | clgr $len,$t0 | 286 | clgr $len,$t0 |
266 | jne .Lrounds_16_xx | 287 | jne .Lrounds_16_xx |
267 | 288 | ||
268 | lg $ctx,`$frame+16`($sp) | 289 | l${g} $ctx,`$frame+2*$SIZE_T`($sp) |
269 | la $inp,`16*$SZ`($inp) | 290 | la $inp,`16*$SZ`($inp) |
270 | $ADD $A,`0*$SZ`($ctx) | 291 | $ADD $A,`0*$SZ`($ctx) |
271 | $ADD $B,`1*$SZ`($ctx) | 292 | $ADD $B,`1*$SZ`($ctx) |
@@ -283,14 +304,14 @@ $code.=<<___; | |||
283 | $ST $F,`5*$SZ`($ctx) | 304 | $ST $F,`5*$SZ`($ctx) |
284 | $ST $G,`6*$SZ`($ctx) | 305 | $ST $G,`6*$SZ`($ctx) |
285 | $ST $H,`7*$SZ`($ctx) | 306 | $ST $H,`7*$SZ`($ctx) |
286 | clg $inp,`$frame+32`($sp) | 307 | cl${g} $inp,`$frame+4*$SIZE_T`($sp) |
287 | jne .Lloop | 308 | jne .Lloop |
288 | 309 | ||
289 | lmg %r6,%r15,`$frame+48`($sp) | 310 | lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp) |
290 | br %r14 | 311 | br %r14 |
291 | .size $Func,.-$Func | 312 | .size $Func,.-$Func |
292 | .string "SHA${label} block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>" | 313 | .string "SHA${label} block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>" |
293 | .comm OPENSSL_s390xcap_P,8,8 | 314 | .comm OPENSSL_s390xcap_P,16,8 |
294 | ___ | 315 | ___ |
295 | 316 | ||
296 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | 317 | $code =~ s/\`([^\`]*)\`/eval $1/gem; |