summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/sha/asm/sha512-s390x.pl
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib/libcrypto/sha/asm/sha512-s390x.pl63
1 files changed, 42 insertions, 21 deletions
diff --git a/src/lib/libcrypto/sha/asm/sha512-s390x.pl b/src/lib/libcrypto/sha/asm/sha512-s390x.pl
index e7ef2d5a9f..079a3fc78a 100644
--- a/src/lib/libcrypto/sha/asm/sha512-s390x.pl
+++ b/src/lib/libcrypto/sha/asm/sha512-s390x.pl
@@ -26,6 +26,26 @@
26# favour dual-issue z10 pipeline. Hardware SHA256/512 is ~4.7x faster 26# favour dual-issue z10 pipeline. Hardware SHA256/512 is ~4.7x faster
27# than software. 27# than software.
28 28
29# November 2010.
30#
31# Adapt for -m31 build. If kernel supports what's called "highgprs"
32# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
33# instructions and achieve "64-bit" performance even in 31-bit legacy
34# application context. The feature is not specific to any particular
35# processor, as long as it's "z-CPU". Latter implies that the code
36# remains z/Architecture specific. On z900 SHA256 was measured to
37# perform 2.4x and SHA512 - 13x better than code generated by gcc 4.3.
38
39$flavour = shift;
40
41if ($flavour =~ /3[12]/) {
42 $SIZE_T=4;
43 $g="";
44} else {
45 $SIZE_T=8;
46 $g="g";
47}
48
29$t0="%r0"; 49$t0="%r0";
30$t1="%r1"; 50$t1="%r1";
31$ctx="%r2"; $t2="%r2"; 51$ctx="%r2"; $t2="%r2";
@@ -44,7 +64,7 @@ $tbl="%r13";
44$T1="%r14"; 64$T1="%r14";
45$sp="%r15"; 65$sp="%r15";
46 66
47$output=shift; 67while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
48open STDOUT,">$output"; 68open STDOUT,">$output";
49 69
50if ($output =~ /512/) { 70if ($output =~ /512/) {
@@ -78,7 +98,8 @@ if ($output =~ /512/) {
78} 98}
79$Func="sha${label}_block_data_order"; 99$Func="sha${label}_block_data_order";
80$Table="K${label}"; 100$Table="K${label}";
81$frame=160+16*$SZ; 101$stdframe=16*$SIZE_T+4*8;
102$frame=$stdframe+16*$SZ;
82 103
83sub BODY_00_15 { 104sub BODY_00_15 {
84my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 105my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
@@ -93,9 +114,9 @@ $code.=<<___;
93 xgr $t0,$t1 114 xgr $t0,$t1
94 $ROT $t1,$t1,`$Sigma1[2]-$Sigma1[1]` 115 $ROT $t1,$t1,`$Sigma1[2]-$Sigma1[1]`
95 xgr $t2,$g 116 xgr $t2,$g
96 $ST $T1,`160+$SZ*($i%16)`($sp) 117 $ST $T1,`$stdframe+$SZ*($i%16)`($sp)
97 xgr $t0,$t1 # Sigma1(e) 118 xgr $t0,$t1 # Sigma1(e)
98 la $T1,0($T1,$h) # T1+=h 119 algr $T1,$h # T1+=h
99 ngr $t2,$e 120 ngr $t2,$e
100 lgr $t1,$a 121 lgr $t1,$a
101 algr $T1,$t0 # T1+=Sigma1(e) 122 algr $T1,$t0 # T1+=Sigma1(e)
@@ -113,7 +134,7 @@ $code.=<<___;
113 ngr $t2,$b 134 ngr $t2,$b
114 algr $h,$T1 # h+=T1 135 algr $h,$T1 # h+=T1
115 ogr $t2,$t1 # Maj(a,b,c) 136 ogr $t2,$t1 # Maj(a,b,c)
116 la $d,0($d,$T1) # d+=T1 137 algr $d,$T1 # d+=T1
117 algr $h,$t2 # h+=Maj(a,b,c) 138 algr $h,$t2 # h+=Maj(a,b,c)
118___ 139___
119} 140}
@@ -122,19 +143,19 @@ sub BODY_16_XX {
122my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 143my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
123 144
124$code.=<<___; 145$code.=<<___;
125 $LD $T1,`160+$SZ*(($i+1)%16)`($sp) ### $i 146 $LD $T1,`$stdframe+$SZ*(($i+1)%16)`($sp) ### $i
126 $LD $t1,`160+$SZ*(($i+14)%16)`($sp) 147 $LD $t1,`$stdframe+$SZ*(($i+14)%16)`($sp)
127 $ROT $t0,$T1,$sigma0[0] 148 $ROT $t0,$T1,$sigma0[0]
128 $SHR $T1,$sigma0[2] 149 $SHR $T1,$sigma0[2]
129 $ROT $t2,$t0,`$sigma0[1]-$sigma0[0]` 150 $ROT $t2,$t0,`$sigma0[1]-$sigma0[0]`
130 xgr $T1,$t0 151 xgr $T1,$t0
131 $ROT $t0,$t1,$sigma1[0] 152 $ROT $t0,$t1,$sigma1[0]
132 xgr $T1,$t2 # sigma0(X[i+1]) 153 xgr $T1,$t2 # sigma0(X[i+1])
133 $SHR $t1,$sigma1[2] 154 $SHR $t1,$sigma1[2]
134 $ADD $T1,`160+$SZ*($i%16)`($sp) # +=X[i] 155 $ADD $T1,`$stdframe+$SZ*($i%16)`($sp) # +=X[i]
135 xgr $t1,$t0 156 xgr $t1,$t0
136 $ROT $t0,$t0,`$sigma1[1]-$sigma1[0]` 157 $ROT $t0,$t0,`$sigma1[1]-$sigma1[0]`
137 $ADD $T1,`160+$SZ*(($i+9)%16)`($sp) # +=X[i+9] 158 $ADD $T1,`$stdframe+$SZ*(($i+9)%16)`($sp) # +=X[i+9]
138 xgr $t1,$t0 # sigma1(X[i+14]) 159 xgr $t1,$t0 # sigma1(X[i+14])
139 algr $T1,$t1 # +=sigma1(X[i+14]) 160 algr $T1,$t1 # +=sigma1(X[i+14])
140___ 161___
@@ -212,6 +233,7 @@ $code.=<<___;
212.globl $Func 233.globl $Func
213.type $Func,\@function 234.type $Func,\@function
214$Func: 235$Func:
236 sllg $len,$len,`log(16*$SZ)/log(2)`
215___ 237___
216$code.=<<___ if ($kimdfunc); 238$code.=<<___ if ($kimdfunc);
217 larl %r1,OPENSSL_s390xcap_P 239 larl %r1,OPENSSL_s390xcap_P
@@ -219,15 +241,15 @@ $code.=<<___ if ($kimdfunc);
219 tmhl %r0,0x4000 # check for message-security assist 241 tmhl %r0,0x4000 # check for message-security assist
220 jz .Lsoftware 242 jz .Lsoftware
221 lghi %r0,0 243 lghi %r0,0
222 la %r1,16($sp) 244 la %r1,`2*$SIZE_T`($sp)
223 .long 0xb93e0002 # kimd %r0,%r2 245 .long 0xb93e0002 # kimd %r0,%r2
224 lg %r0,16($sp) 246 lg %r0,`2*$SIZE_T`($sp)
225 tmhh %r0,`0x8000>>$kimdfunc` 247 tmhh %r0,`0x8000>>$kimdfunc`
226 jz .Lsoftware 248 jz .Lsoftware
227 lghi %r0,$kimdfunc 249 lghi %r0,$kimdfunc
228 lgr %r1,$ctx 250 lgr %r1,$ctx
229 lgr %r2,$inp 251 lgr %r2,$inp
230 sllg %r3,$len,`log(16*$SZ)/log(2)` 252 lgr %r3,$len
231 .long 0xb93e0002 # kimd %r0,%r2 253 .long 0xb93e0002 # kimd %r0,%r2
232 brc 1,.-4 # pay attention to "partial completion" 254 brc 1,.-4 # pay attention to "partial completion"
233 br %r14 255 br %r14
@@ -235,13 +257,12 @@ $code.=<<___ if ($kimdfunc);
235.Lsoftware: 257.Lsoftware:
236___ 258___
237$code.=<<___; 259$code.=<<___;
238 sllg $len,$len,`log(16*$SZ)/log(2)`
239 lghi %r1,-$frame 260 lghi %r1,-$frame
240 agr $len,$inp 261 la $len,0($len,$inp)
241 stmg $ctx,%r15,16($sp) 262 stm${g} $ctx,%r15,`2*$SIZE_T`($sp)
242 lgr %r0,$sp 263 lgr %r0,$sp
243 la $sp,0(%r1,$sp) 264 la $sp,0(%r1,$sp)
244 stg %r0,0($sp) 265 st${g} %r0,0($sp)
245 266
246 larl $tbl,$Table 267 larl $tbl,$Table
247 $LD $A,`0*$SZ`($ctx) 268 $LD $A,`0*$SZ`($ctx)
@@ -265,7 +286,7 @@ $code.=<<___;
265 clgr $len,$t0 286 clgr $len,$t0
266 jne .Lrounds_16_xx 287 jne .Lrounds_16_xx
267 288
268 lg $ctx,`$frame+16`($sp) 289 l${g} $ctx,`$frame+2*$SIZE_T`($sp)
269 la $inp,`16*$SZ`($inp) 290 la $inp,`16*$SZ`($inp)
270 $ADD $A,`0*$SZ`($ctx) 291 $ADD $A,`0*$SZ`($ctx)
271 $ADD $B,`1*$SZ`($ctx) 292 $ADD $B,`1*$SZ`($ctx)
@@ -283,14 +304,14 @@ $code.=<<___;
283 $ST $F,`5*$SZ`($ctx) 304 $ST $F,`5*$SZ`($ctx)
284 $ST $G,`6*$SZ`($ctx) 305 $ST $G,`6*$SZ`($ctx)
285 $ST $H,`7*$SZ`($ctx) 306 $ST $H,`7*$SZ`($ctx)
286 clg $inp,`$frame+32`($sp) 307 cl${g} $inp,`$frame+4*$SIZE_T`($sp)
287 jne .Lloop 308 jne .Lloop
288 309
289 lmg %r6,%r15,`$frame+48`($sp) 310 lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp)
290 br %r14 311 br %r14
291.size $Func,.-$Func 312.size $Func,.-$Func
292.string "SHA${label} block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>" 313.string "SHA${label} block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
293.comm OPENSSL_s390xcap_P,8,8 314.comm OPENSSL_s390xcap_P,16,8
294___ 315___
295 316
296$code =~ s/\`([^\`]*)\`/eval $1/gem; 317$code =~ s/\`([^\`]*)\`/eval $1/gem;