diff options
Diffstat (limited to '')
-rw-r--r-- | src/lib/libcrypto/sha/asm/sha1-s390x.pl | 50 |
1 files changed, 35 insertions, 15 deletions
diff --git a/src/lib/libcrypto/sha/asm/sha1-s390x.pl b/src/lib/libcrypto/sha/asm/sha1-s390x.pl index 4b17848287..9193dda45e 100644 --- a/src/lib/libcrypto/sha/asm/sha1-s390x.pl +++ b/src/lib/libcrypto/sha/asm/sha1-s390x.pl | |||
@@ -21,9 +21,28 @@ | |||
21 | # instructions to favour dual-issue z10 pipeline. On z10 hardware is | 21 | # instructions to favour dual-issue z10 pipeline. On z10 hardware is |
22 | # "only" ~2.3x faster than software. | 22 | # "only" ~2.3x faster than software. |
23 | 23 | ||
24 | # November 2010. | ||
25 | # | ||
26 | # Adapt for -m31 build. If kernel supports what's called "highgprs" | ||
27 | # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit | ||
28 | # instructions and achieve "64-bit" performance even in 31-bit legacy | ||
29 | # application context. The feature is not specific to any particular | ||
30 | # processor, as long as it's "z-CPU". Latter implies that the code | ||
31 | # remains z/Architecture specific. | ||
32 | |||
24 | $kimdfunc=1; # magic function code for kimd instruction | 33 | $kimdfunc=1; # magic function code for kimd instruction |
25 | 34 | ||
26 | $output=shift; | 35 | $flavour = shift; |
36 | |||
37 | if ($flavour =~ /3[12]/) { | ||
38 | $SIZE_T=4; | ||
39 | $g=""; | ||
40 | } else { | ||
41 | $SIZE_T=8; | ||
42 | $g="g"; | ||
43 | } | ||
44 | |||
45 | while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} | ||
27 | open STDOUT,">$output"; | 46 | open STDOUT,">$output"; |
28 | 47 | ||
29 | $K_00_39="%r0"; $K=$K_00_39; | 48 | $K_00_39="%r0"; $K=$K_00_39; |
@@ -42,13 +61,14 @@ $t1="%r11"; | |||
42 | @X=("%r12","%r13","%r14"); | 61 | @X=("%r12","%r13","%r14"); |
43 | $sp="%r15"; | 62 | $sp="%r15"; |
44 | 63 | ||
45 | $frame=160+16*4; | 64 | $stdframe=16*$SIZE_T+4*8; |
65 | $frame=$stdframe+16*4; | ||
46 | 66 | ||
47 | sub Xupdate { | 67 | sub Xupdate { |
48 | my $i=shift; | 68 | my $i=shift; |
49 | 69 | ||
50 | $code.=<<___ if ($i==15); | 70 | $code.=<<___ if ($i==15); |
51 | lg $prefetch,160($sp) ### Xupdate(16) warm-up | 71 | lg $prefetch,$stdframe($sp) ### Xupdate(16) warm-up |
52 | lr $X[0],$X[2] | 72 | lr $X[0],$X[2] |
53 | ___ | 73 | ___ |
54 | return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle | 74 | return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle |
@@ -58,8 +78,8 @@ $code.=<<___ if ($i<16); | |||
58 | ___ | 78 | ___ |
59 | $code.=<<___ if ($i>=16); | 79 | $code.=<<___ if ($i>=16); |
60 | xgr $X[0],$prefetch ### Xupdate($i) | 80 | xgr $X[0],$prefetch ### Xupdate($i) |
61 | lg $prefetch,`160+4*(($i+2)%16)`($sp) | 81 | lg $prefetch,`$stdframe+4*(($i+2)%16)`($sp) |
62 | xg $X[0],`160+4*(($i+8)%16)`($sp) | 82 | xg $X[0],`$stdframe+4*(($i+8)%16)`($sp) |
63 | xgr $X[0],$prefetch | 83 | xgr $X[0],$prefetch |
64 | rll $X[0],$X[0],1 | 84 | rll $X[0],$X[0],1 |
65 | rllg $X[1],$X[0],32 | 85 | rllg $X[1],$X[0],32 |
@@ -68,7 +88,7 @@ $code.=<<___ if ($i>=16); | |||
68 | lr $X[2],$X[1] # feedback | 88 | lr $X[2],$X[1] # feedback |
69 | ___ | 89 | ___ |
70 | $code.=<<___ if ($i<=70); | 90 | $code.=<<___ if ($i<=70); |
71 | stg $X[0],`160+4*($i%16)`($sp) | 91 | stg $X[0],`$stdframe+4*($i%16)`($sp) |
72 | ___ | 92 | ___ |
73 | unshift(@X,pop(@X)); | 93 | unshift(@X,pop(@X)); |
74 | } | 94 | } |
@@ -148,9 +168,9 @@ $code.=<<___ if ($kimdfunc); | |||
148 | tmhl %r0,0x4000 # check for message-security assist | 168 | tmhl %r0,0x4000 # check for message-security assist |
149 | jz .Lsoftware | 169 | jz .Lsoftware |
150 | lghi %r0,0 | 170 | lghi %r0,0 |
151 | la %r1,16($sp) | 171 | la %r1,`2*$SIZE_T`($sp) |
152 | .long 0xb93e0002 # kimd %r0,%r2 | 172 | .long 0xb93e0002 # kimd %r0,%r2 |
153 | lg %r0,16($sp) | 173 | lg %r0,`2*$SIZE_T`($sp) |
154 | tmhh %r0,`0x8000>>$kimdfunc` | 174 | tmhh %r0,`0x8000>>$kimdfunc` |
155 | jz .Lsoftware | 175 | jz .Lsoftware |
156 | lghi %r0,$kimdfunc | 176 | lghi %r0,$kimdfunc |
@@ -165,11 +185,11 @@ $code.=<<___ if ($kimdfunc); | |||
165 | ___ | 185 | ___ |
166 | $code.=<<___; | 186 | $code.=<<___; |
167 | lghi %r1,-$frame | 187 | lghi %r1,-$frame |
168 | stg $ctx,16($sp) | 188 | st${g} $ctx,`2*$SIZE_T`($sp) |
169 | stmg %r6,%r15,48($sp) | 189 | stm${g} %r6,%r15,`6*$SIZE_T`($sp) |
170 | lgr %r0,$sp | 190 | lgr %r0,$sp |
171 | la $sp,0(%r1,$sp) | 191 | la $sp,0(%r1,$sp) |
172 | stg %r0,0($sp) | 192 | st${g} %r0,0($sp) |
173 | 193 | ||
174 | larl $t0,Ktable | 194 | larl $t0,Ktable |
175 | llgf $A,0($ctx) | 195 | llgf $A,0($ctx) |
@@ -199,7 +219,7 @@ ___ | |||
199 | for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } | 219 | for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } |
200 | $code.=<<___; | 220 | $code.=<<___; |
201 | 221 | ||
202 | lg $ctx,`$frame+16`($sp) | 222 | l${g} $ctx,`$frame+2*$SIZE_T`($sp) |
203 | la $inp,64($inp) | 223 | la $inp,64($inp) |
204 | al $A,0($ctx) | 224 | al $A,0($ctx) |
205 | al $B,4($ctx) | 225 | al $B,4($ctx) |
@@ -211,13 +231,13 @@ $code.=<<___; | |||
211 | st $C,8($ctx) | 231 | st $C,8($ctx) |
212 | st $D,12($ctx) | 232 | st $D,12($ctx) |
213 | st $E,16($ctx) | 233 | st $E,16($ctx) |
214 | brct $len,.Lloop | 234 | brct${g} $len,.Lloop |
215 | 235 | ||
216 | lmg %r6,%r15,`$frame+48`($sp) | 236 | lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp) |
217 | br %r14 | 237 | br %r14 |
218 | .size sha1_block_data_order,.-sha1_block_data_order | 238 | .size sha1_block_data_order,.-sha1_block_data_order |
219 | .string "SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>" | 239 | .string "SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>" |
220 | .comm OPENSSL_s390xcap_P,8,8 | 240 | .comm OPENSSL_s390xcap_P,16,8 |
221 | ___ | 241 | ___ |
222 | 242 | ||
223 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | 243 | $code =~ s/\`([^\`]*)\`/eval $1/gem; |