summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/aes/asm/aes-s390x.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/aes/asm/aes-s390x.pl')
-rw-r--r--src/lib/libcrypto/aes/asm/aes-s390x.pl1333
1 files changed, 1333 insertions, 0 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-s390x.pl b/src/lib/libcrypto/aes/asm/aes-s390x.pl
new file mode 100644
index 0000000000..4b27afd92f
--- /dev/null
+++ b/src/lib/libcrypto/aes/asm/aes-s390x.pl
@@ -0,0 +1,1333 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for s390x.
11
12# April 2007.
13#
14# Software performance improvement over gcc-generated code is ~70% and
15# in absolute terms is ~73 cycles per byte processed with 128-bit key.
16# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
17# *strictly* in-order execution and issued instruction [in this case
18# load value from memory is critical] has to complete before execution
19# flow proceeds. S-boxes are compressed to 2KB[+256B].
20#
21# As for hardware acceleration support. It's basically a "teaser," as
22# it can and should be improved in several ways. Most notably support
23# for CBC is not utilized, nor multiple blocks are ever processed.
24# Then software key schedule can be postponed till hardware support
25# detection... Performance improvement over assembler is reportedly
26# ~2.5x, but can reach >8x [naturally on larger chunks] if proper
27# support is implemented.
28
29# May 2007.
30#
31# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
32# for 128-bit keys, if hardware support is detected.
33
34# Januray 2009.
35#
36# Add support for hardware AES192/256 and reschedule instructions to
37# minimize/avoid Address Generation Interlock hazard and to favour
38# dual-issue z10 pipeline. This gave ~25% improvement on z10 and
39# almost 50% on z9. The gain is smaller on z10, because being dual-
40# issue z10 makes it improssible to eliminate the interlock condition:
41# critial path is not long enough. Yet it spends ~24 cycles per byte
42# processed with 128-bit key.
43#
44# Unlike previous version hardware support detection takes place only
45# at the moment of key schedule setup, which is denoted in key->rounds.
46# This is done, because deferred key setup can't be made MT-safe, not
47# for key lengthes longer than 128 bits.
48#
49# Add AES_cbc_encrypt, which gives incredible performance improvement,
50# it was measured to be ~6.6x. It's less than previously mentioned 8x,
51# because software implementation was optimized.
52
53$softonly=0; # allow hardware support
54
55$t0="%r0"; $mask="%r0";
56$t1="%r1";
57$t2="%r2"; $inp="%r2";
58$t3="%r3"; $out="%r3"; $bits="%r3";
59$key="%r4";
60$i1="%r5";
61$i2="%r6";
62$i3="%r7";
63$s0="%r8";
64$s1="%r9";
65$s2="%r10";
66$s3="%r11";
67$tbl="%r12";
68$rounds="%r13";
69$ra="%r14";
70$sp="%r15";
71
72sub _data_word()
73{ my $i;
74 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
75}
76
77$code=<<___;
78.text
79
80.type AES_Te,\@object
81.align 256
82AES_Te:
83___
84&_data_word(
85 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
86 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
87 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
88 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
89 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
90 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
91 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
92 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
93 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
94 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
95 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
96 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
97 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
98 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
99 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
100 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
101 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
102 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
103 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
104 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
105 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
106 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
107 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
108 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
109 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
110 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
111 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
112 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
113 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
114 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
115 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
116 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
117 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
118 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
119 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
120 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
121 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
122 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
123 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
124 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
125 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
126 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
127 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
128 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
129 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
130 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
131 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
132 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
133 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
134 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
135 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
136 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
137 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
138 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
139 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
140 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
141 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
142 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
143 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
144 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
145 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
146 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
147 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
148 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
149$code.=<<___;
150# Te4[256]
151.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
152.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
153.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
154.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
155.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
156.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
157.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
158.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
159.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
160.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
161.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
162.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
163.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
164.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
165.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
166.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
167.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
168.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
169.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
170.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
171.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
172.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
173.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
174.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
175.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
176.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
177.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
178.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
179.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
180.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
181.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
182.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
183# rcon[]
184.long 0x01000000, 0x02000000, 0x04000000, 0x08000000
185.long 0x10000000, 0x20000000, 0x40000000, 0x80000000
186.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
187.align 256
188.size AES_Te,.-AES_Te
189
190# void AES_encrypt(const unsigned char *inp, unsigned char *out,
191# const AES_KEY *key) {
192.globl AES_encrypt
193.type AES_encrypt,\@function
194AES_encrypt:
195___
196$code.=<<___ if (!$softonly);
197 l %r0,240($key)
198 lhi %r1,16
199 clr %r0,%r1
200 jl .Lesoft
201
202 la %r1,0($key)
203 #la %r2,0($inp)
204 la %r4,0($out)
205 lghi %r3,16 # single block length
206 .long 0xb92e0042 # km %r4,%r2
207 brc 1,.-4 # can this happen?
208 br %r14
209.align 64
210.Lesoft:
211___
212$code.=<<___;
213 stmg %r3,$ra,24($sp)
214
215 llgf $s0,0($inp)
216 llgf $s1,4($inp)
217 llgf $s2,8($inp)
218 llgf $s3,12($inp)
219
220 larl $tbl,AES_Te
221 bras $ra,_s390x_AES_encrypt
222
223 lg $out,24($sp)
224 st $s0,0($out)
225 st $s1,4($out)
226 st $s2,8($out)
227 st $s3,12($out)
228
229 lmg %r6,$ra,48($sp)
230 br $ra
231.size AES_encrypt,.-AES_encrypt
232
233.type _s390x_AES_encrypt,\@function
234.align 16
235_s390x_AES_encrypt:
236 stg $ra,152($sp)
237 x $s0,0($key)
238 x $s1,4($key)
239 x $s2,8($key)
240 x $s3,12($key)
241 l $rounds,240($key)
242 llill $mask,`0xff<<3`
243 aghi $rounds,-1
244 j .Lenc_loop
245.align 16
246.Lenc_loop:
247 sllg $t1,$s0,`0+3`
248 srlg $t2,$s0,`8-3`
249 srlg $t3,$s0,`16-3`
250 srl $s0,`24-3`
251 nr $s0,$mask
252 ngr $t1,$mask
253 nr $t2,$mask
254 nr $t3,$mask
255
256 srlg $i1,$s1,`16-3` # i0
257 sllg $i2,$s1,`0+3`
258 srlg $i3,$s1,`8-3`
259 srl $s1,`24-3`
260 nr $i1,$mask
261 nr $s1,$mask
262 ngr $i2,$mask
263 nr $i3,$mask
264
265 l $s0,0($s0,$tbl) # Te0[s0>>24]
266 l $t1,1($t1,$tbl) # Te3[s0>>0]
267 l $t2,2($t2,$tbl) # Te2[s0>>8]
268 l $t3,3($t3,$tbl) # Te1[s0>>16]
269
270 x $s0,3($i1,$tbl) # Te1[s1>>16]
271 l $s1,0($s1,$tbl) # Te0[s1>>24]
272 x $t2,1($i2,$tbl) # Te3[s1>>0]
273 x $t3,2($i3,$tbl) # Te2[s1>>8]
274
275 srlg $i1,$s2,`8-3` # i0
276 srlg $i2,$s2,`16-3` # i1
277 nr $i1,$mask
278 nr $i2,$mask
279 sllg $i3,$s2,`0+3`
280 srl $s2,`24-3`
281 nr $s2,$mask
282 ngr $i3,$mask
283
284 xr $s1,$t1
285 srlg $ra,$s3,`8-3` # i1
286 sllg $t1,$s3,`0+3` # i0
287 nr $ra,$mask
288 la $key,16($key)
289 ngr $t1,$mask
290
291 x $s0,2($i1,$tbl) # Te2[s2>>8]
292 x $s1,3($i2,$tbl) # Te1[s2>>16]
293 l $s2,0($s2,$tbl) # Te0[s2>>24]
294 x $t3,1($i3,$tbl) # Te3[s2>>0]
295
296 srlg $i3,$s3,`16-3` # i2
297 xr $s2,$t2
298 srl $s3,`24-3`
299 nr $i3,$mask
300 nr $s3,$mask
301
302 x $s0,0($key)
303 x $s1,4($key)
304 x $s2,8($key)
305 x $t3,12($key)
306
307 x $s0,1($t1,$tbl) # Te3[s3>>0]
308 x $s1,2($ra,$tbl) # Te2[s3>>8]
309 x $s2,3($i3,$tbl) # Te1[s3>>16]
310 l $s3,0($s3,$tbl) # Te0[s3>>24]
311 xr $s3,$t3
312
313 brct $rounds,.Lenc_loop
314 .align 16
315
316 sllg $t1,$s0,`0+3`
317 srlg $t2,$s0,`8-3`
318 ngr $t1,$mask
319 srlg $t3,$s0,`16-3`
320 srl $s0,`24-3`
321 nr $s0,$mask
322 nr $t2,$mask
323 nr $t3,$mask
324
325 srlg $i1,$s1,`16-3` # i0
326 sllg $i2,$s1,`0+3`
327 ngr $i2,$mask
328 srlg $i3,$s1,`8-3`
329 srl $s1,`24-3`
330 nr $i1,$mask
331 nr $s1,$mask
332 nr $i3,$mask
333
334 llgc $s0,2($s0,$tbl) # Te4[s0>>24]
335 llgc $t1,2($t1,$tbl) # Te4[s0>>0]
336 sll $s0,24
337 llgc $t2,2($t2,$tbl) # Te4[s0>>8]
338 llgc $t3,2($t3,$tbl) # Te4[s0>>16]
339 sll $t2,8
340 sll $t3,16
341
342 llgc $i1,2($i1,$tbl) # Te4[s1>>16]
343 llgc $s1,2($s1,$tbl) # Te4[s1>>24]
344 llgc $i2,2($i2,$tbl) # Te4[s1>>0]
345 llgc $i3,2($i3,$tbl) # Te4[s1>>8]
346 sll $i1,16
347 sll $s1,24
348 sll $i3,8
349 or $s0,$i1
350 or $s1,$t1
351 or $t2,$i2
352 or $t3,$i3
353
354 srlg $i1,$s2,`8-3` # i0
355 srlg $i2,$s2,`16-3` # i1
356 nr $i1,$mask
357 nr $i2,$mask
358 sllg $i3,$s2,`0+3`
359 srl $s2,`24-3`
360 ngr $i3,$mask
361 nr $s2,$mask
362
363 sllg $t1,$s3,`0+3` # i0
364 srlg $ra,$s3,`8-3` # i1
365 ngr $t1,$mask
366
367 llgc $i1,2($i1,$tbl) # Te4[s2>>8]
368 llgc $i2,2($i2,$tbl) # Te4[s2>>16]
369 sll $i1,8
370 llgc $s2,2($s2,$tbl) # Te4[s2>>24]
371 llgc $i3,2($i3,$tbl) # Te4[s2>>0]
372 sll $i2,16
373 nr $ra,$mask
374 sll $s2,24
375 or $s0,$i1
376 or $s1,$i2
377 or $s2,$t2
378 or $t3,$i3
379
380 srlg $i3,$s3,`16-3` # i2
381 srl $s3,`24-3`
382 nr $i3,$mask
383 nr $s3,$mask
384
385 l $t0,16($key)
386 l $t2,20($key)
387
388 llgc $i1,2($t1,$tbl) # Te4[s3>>0]
389 llgc $i2,2($ra,$tbl) # Te4[s3>>8]
390 llgc $i3,2($i3,$tbl) # Te4[s3>>16]
391 llgc $s3,2($s3,$tbl) # Te4[s3>>24]
392 sll $i2,8
393 sll $i3,16
394 sll $s3,24
395 or $s0,$i1
396 or $s1,$i2
397 or $s2,$i3
398 or $s3,$t3
399
400 lg $ra,152($sp)
401 xr $s0,$t0
402 xr $s1,$t2
403 x $s2,24($key)
404 x $s3,28($key)
405
406 br $ra
407.size _s390x_AES_encrypt,.-_s390x_AES_encrypt
408___
409
410$code.=<<___;
411.type AES_Td,\@object
412.align 256
413AES_Td:
414___
415&_data_word(
416 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
417 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
418 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
419 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
420 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
421 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
422 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
423 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
424 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
425 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
426 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
427 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
428 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
429 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
430 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
431 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
432 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
433 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
434 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
435 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
436 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
437 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
438 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
439 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
440 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
441 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
442 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
443 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
444 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
445 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
446 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
447 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
448 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
449 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
450 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
451 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
452 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
453 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
454 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
455 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
456 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
457 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
458 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
459 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
460 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
461 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
462 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
463 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
464 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
465 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
466 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
467 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
468 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
469 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
470 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
471 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
472 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
473 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
474 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
475 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
476 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
477 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
478 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
479 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
480$code.=<<___;
481# Td4[256]
482.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
483.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
484.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
485.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
486.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
487.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
488.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
489.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
490.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
491.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
492.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
493.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
494.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
495.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
496.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
497.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
498.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
499.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
500.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
501.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
502.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
503.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
504.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
505.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
506.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
507.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
508.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
509.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
510.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
511.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
512.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
513.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
514.size AES_Td,.-AES_Td
515
516# void AES_decrypt(const unsigned char *inp, unsigned char *out,
517# const AES_KEY *key) {
518.globl AES_decrypt
519.type AES_decrypt,\@function
520AES_decrypt:
521___
522$code.=<<___ if (!$softonly);
523 l %r0,240($key)
524 lhi %r1,16
525 clr %r0,%r1
526 jl .Ldsoft
527
528 la %r1,0($key)
529 #la %r2,0($inp)
530 la %r4,0($out)
531 lghi %r3,16 # single block length
532 .long 0xb92e0042 # km %r4,%r2
533 brc 1,.-4 # can this happen?
534 br %r14
535.align 64
536.Ldsoft:
537___
538$code.=<<___;
539 stmg %r3,$ra,24($sp)
540
541 llgf $s0,0($inp)
542 llgf $s1,4($inp)
543 llgf $s2,8($inp)
544 llgf $s3,12($inp)
545
546 larl $tbl,AES_Td
547 bras $ra,_s390x_AES_decrypt
548
549 lg $out,24($sp)
550 st $s0,0($out)
551 st $s1,4($out)
552 st $s2,8($out)
553 st $s3,12($out)
554
555 lmg %r6,$ra,48($sp)
556 br $ra
557.size AES_decrypt,.-AES_decrypt
558
559.type _s390x_AES_decrypt,\@function
560.align 16
561_s390x_AES_decrypt:
562 stg $ra,152($sp)
563 x $s0,0($key)
564 x $s1,4($key)
565 x $s2,8($key)
566 x $s3,12($key)
567 l $rounds,240($key)
568 llill $mask,`0xff<<3`
569 aghi $rounds,-1
570 j .Ldec_loop
571.align 16
572.Ldec_loop:
573 srlg $t1,$s0,`16-3`
574 srlg $t2,$s0,`8-3`
575 sllg $t3,$s0,`0+3`
576 srl $s0,`24-3`
577 nr $s0,$mask
578 nr $t1,$mask
579 nr $t2,$mask
580 ngr $t3,$mask
581
582 sllg $i1,$s1,`0+3` # i0
583 srlg $i2,$s1,`16-3`
584 srlg $i3,$s1,`8-3`
585 srl $s1,`24-3`
586 ngr $i1,$mask
587 nr $s1,$mask
588 nr $i2,$mask
589 nr $i3,$mask
590
591 l $s0,0($s0,$tbl) # Td0[s0>>24]
592 l $t1,3($t1,$tbl) # Td1[s0>>16]
593 l $t2,2($t2,$tbl) # Td2[s0>>8]
594 l $t3,1($t3,$tbl) # Td3[s0>>0]
595
596 x $s0,1($i1,$tbl) # Td3[s1>>0]
597 l $s1,0($s1,$tbl) # Td0[s1>>24]
598 x $t2,3($i2,$tbl) # Td1[s1>>16]
599 x $t3,2($i3,$tbl) # Td2[s1>>8]
600
601 srlg $i1,$s2,`8-3` # i0
602 sllg $i2,$s2,`0+3` # i1
603 srlg $i3,$s2,`16-3`
604 srl $s2,`24-3`
605 nr $i1,$mask
606 ngr $i2,$mask
607 nr $s2,$mask
608 nr $i3,$mask
609
610 xr $s1,$t1
611 srlg $ra,$s3,`8-3` # i1
612 srlg $t1,$s3,`16-3` # i0
613 nr $ra,$mask
614 la $key,16($key)
615 nr $t1,$mask
616
617 x $s0,2($i1,$tbl) # Td2[s2>>8]
618 x $s1,1($i2,$tbl) # Td3[s2>>0]
619 l $s2,0($s2,$tbl) # Td0[s2>>24]
620 x $t3,3($i3,$tbl) # Td1[s2>>16]
621
622 sllg $i3,$s3,`0+3` # i2
623 srl $s3,`24-3`
624 ngr $i3,$mask
625 nr $s3,$mask
626
627 xr $s2,$t2
628 x $s0,0($key)
629 x $s1,4($key)
630 x $s2,8($key)
631 x $t3,12($key)
632
633 x $s0,3($t1,$tbl) # Td1[s3>>16]
634 x $s1,2($ra,$tbl) # Td2[s3>>8]
635 x $s2,1($i3,$tbl) # Td3[s3>>0]
636 l $s3,0($s3,$tbl) # Td0[s3>>24]
637 xr $s3,$t3
638
639 brct $rounds,.Ldec_loop
640 .align 16
641
642 l $t1,`2048+0`($tbl) # prefetch Td4
643 l $t2,`2048+64`($tbl)
644 l $t3,`2048+128`($tbl)
645 l $i1,`2048+192`($tbl)
646 llill $mask,0xff
647
648 srlg $i3,$s0,24 # i0
649 srlg $t1,$s0,16
650 srlg $t2,$s0,8
651 nr $s0,$mask # i3
652 nr $t1,$mask
653
654 srlg $i1,$s1,24
655 nr $t2,$mask
656 srlg $i2,$s1,16
657 srlg $ra,$s1,8
658 nr $s1,$mask # i0
659 nr $i2,$mask
660 nr $ra,$mask
661
662 llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
663 llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
664 llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
665 sll $t1,16
666 llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
667 sllg $s0,$i3,24
668 sll $t2,8
669
670 llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
671 llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
672 llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
673 sll $i1,24
674 llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
675 sll $i2,16
676 sll $i3,8
677 or $s0,$s1
678 or $t1,$i1
679 or $t2,$i2
680 or $t3,$i3
681
682 srlg $i1,$s2,8 # i0
683 srlg $i2,$s2,24
684 srlg $i3,$s2,16
685 nr $s2,$mask # i1
686 nr $i1,$mask
687 nr $i3,$mask
688 llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
689 llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
690 llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
691 llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
692 sll $i1,8
693 sll $i2,24
694 or $s0,$i1
695 sll $i3,16
696 or $t2,$i2
697 or $t3,$i3
698
699 srlg $i1,$s3,16 # i0
700 srlg $i2,$s3,8 # i1
701 srlg $i3,$s3,24
702 nr $s3,$mask # i2
703 nr $i1,$mask
704 nr $i2,$mask
705
706 lg $ra,152($sp)
707 or $s1,$t1
708 l $t0,16($key)
709 l $t1,20($key)
710
711 llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
712 llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
713 sll $i1,16
714 llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
715 llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
716 sll $i2,8
717 sll $s3,24
718 or $s0,$i1
719 or $s1,$i2
720 or $s2,$t2
721 or $s3,$t3
722
723 xr $s0,$t0
724 xr $s1,$t1
725 x $s2,24($key)
726 x $s3,28($key)
727
728 br $ra
729.size _s390x_AES_decrypt,.-_s390x_AES_decrypt
730___
731
732$code.=<<___;
733# void AES_set_encrypt_key(const unsigned char *in, int bits,
734# AES_KEY *key) {
735.globl AES_set_encrypt_key
736.type AES_set_encrypt_key,\@function
737.align 16
738AES_set_encrypt_key:
739 lghi $t0,0
740 clgr $inp,$t0
741 je .Lminus1
742 clgr $key,$t0
743 je .Lminus1
744
745 lghi $t0,128
746 clr $bits,$t0
747 je .Lproceed
748 lghi $t0,192
749 clr $bits,$t0
750 je .Lproceed
751 lghi $t0,256
752 clr $bits,$t0
753 je .Lproceed
754 lghi %r2,-2
755 br %r14
756
757.align 16
758.Lproceed:
759___
760$code.=<<___ if (!$softonly);
761 # convert bits to km code, [128,192,256]->[18,19,20]
762 lhi %r5,-128
763 lhi %r0,18
764 ar %r5,$bits
765 srl %r5,6
766 ar %r5,%r0
767
768 lghi %r0,0 # query capability vector
769 la %r1,16($sp)
770 .long 0xb92f0042 # kmc %r4,%r2
771
772 llihh %r1,0x8000
773 srlg %r1,%r1,0(%r5)
774 ng %r1,16($sp)
775 jz .Lekey_internal
776
777 lmg %r0,%r1,0($inp) # just copy 128 bits...
778 stmg %r0,%r1,0($key)
779 lhi %r0,192
780 cr $bits,%r0
781 jl 1f
782 lg %r1,16($inp)
783 stg %r1,16($key)
784 je 1f
785 lg %r1,24($inp)
786 stg %r1,24($key)
7871: st $bits,236($key) # save bits
788 st %r5,240($key) # save km code
789 lghi %r2,0
790 br %r14
791___
792$code.=<<___;
793.align 16
794.Lekey_internal:
795 stmg %r6,%r13,48($sp) # all non-volatile regs
796
797 larl $tbl,AES_Te+2048
798
799 llgf $s0,0($inp)
800 llgf $s1,4($inp)
801 llgf $s2,8($inp)
802 llgf $s3,12($inp)
803 st $s0,0($key)
804 st $s1,4($key)
805 st $s2,8($key)
806 st $s3,12($key)
807 lghi $t0,128
808 cr $bits,$t0
809 jne .Lnot128
810
811 llill $mask,0xff
812 lghi $t3,0 # i=0
813 lghi $rounds,10
814 st $rounds,240($key)
815
816 llgfr $t2,$s3 # temp=rk[3]
817 srlg $i1,$s3,8
818 srlg $i2,$s3,16
819 srlg $i3,$s3,24
820 nr $t2,$mask
821 nr $i1,$mask
822 nr $i2,$mask
823
824.align 16
825.L128_loop:
826 la $t2,0($t2,$tbl)
827 la $i1,0($i1,$tbl)
828 la $i2,0($i2,$tbl)
829 la $i3,0($i3,$tbl)
830 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
831 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
832 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
833 icm $t2,1,0($i3) # Te4[rk[3]>>24]
834 x $t2,256($t3,$tbl) # rcon[i]
835 xr $s0,$t2 # rk[4]=rk[0]^...
836 xr $s1,$s0 # rk[5]=rk[1]^rk[4]
837 xr $s2,$s1 # rk[6]=rk[2]^rk[5]
838 xr $s3,$s2 # rk[7]=rk[3]^rk[6]
839
840 llgfr $t2,$s3 # temp=rk[3]
841 srlg $i1,$s3,8
842 srlg $i2,$s3,16
843 nr $t2,$mask
844 nr $i1,$mask
845 srlg $i3,$s3,24
846 nr $i2,$mask
847
848 st $s0,16($key)
849 st $s1,20($key)
850 st $s2,24($key)
851 st $s3,28($key)
852 la $key,16($key) # key+=4
853 la $t3,4($t3) # i++
854 brct $rounds,.L128_loop
855 lghi %r2,0
856 lmg %r6,%r13,48($sp)
857 br $ra
858
859.align 16
860.Lnot128:
861 llgf $t0,16($inp)
862 llgf $t1,20($inp)
863 st $t0,16($key)
864 st $t1,20($key)
865 lghi $t0,192
866 cr $bits,$t0
867 jne .Lnot192
868
869 llill $mask,0xff
870 lghi $t3,0 # i=0
871 lghi $rounds,12
872 st $rounds,240($key)
873 lghi $rounds,8
874
875 srlg $i1,$t1,8
876 srlg $i2,$t1,16
877 srlg $i3,$t1,24
878 nr $t1,$mask
879 nr $i1,$mask
880 nr $i2,$mask
881
882.align 16
883.L192_loop:
884 la $t1,0($t1,$tbl)
885 la $i1,0($i1,$tbl)
886 la $i2,0($i2,$tbl)
887 la $i3,0($i3,$tbl)
888 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
889 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
890 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
891 icm $t1,1,0($i3) # Te4[rk[5]>>24]
892 x $t1,256($t3,$tbl) # rcon[i]
893 xr $s0,$t1 # rk[6]=rk[0]^...
894 xr $s1,$s0 # rk[7]=rk[1]^rk[6]
895 xr $s2,$s1 # rk[8]=rk[2]^rk[7]
896 xr $s3,$s2 # rk[9]=rk[3]^rk[8]
897
898 st $s0,24($key)
899 st $s1,28($key)
900 st $s2,32($key)
901 st $s3,36($key)
902 brct $rounds,.L192_continue
903 lghi %r2,0
904 lmg %r6,%r13,48($sp)
905 br $ra
906
907.align 16
908.L192_continue:
909 lgr $t1,$s3
910 x $t1,16($key) # rk[10]=rk[4]^rk[9]
911 st $t1,40($key)
912 x $t1,20($key) # rk[11]=rk[5]^rk[10]
913 st $t1,44($key)
914
915 srlg $i1,$t1,8
916 srlg $i2,$t1,16
917 srlg $i3,$t1,24
918 nr $t1,$mask
919 nr $i1,$mask
920 nr $i2,$mask
921
922 la $key,24($key) # key+=6
923 la $t3,4($t3) # i++
924 j .L192_loop
925
926.align 16
927.Lnot192:
928 llgf $t0,24($inp)
929 llgf $t1,28($inp)
930 st $t0,24($key)
931 st $t1,28($key)
932 llill $mask,0xff
933 lghi $t3,0 # i=0
934 lghi $rounds,14
935 st $rounds,240($key)
936 lghi $rounds,7
937
938 srlg $i1,$t1,8
939 srlg $i2,$t1,16
940 srlg $i3,$t1,24
941 nr $t1,$mask
942 nr $i1,$mask
943 nr $i2,$mask
944
945.align 16
946.L256_loop:
947 la $t1,0($t1,$tbl)
948 la $i1,0($i1,$tbl)
949 la $i2,0($i2,$tbl)
950 la $i3,0($i3,$tbl)
951 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
952 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
953 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
954 icm $t1,1,0($i3) # Te4[rk[7]>>24]
955 x $t1,256($t3,$tbl) # rcon[i]
956 xr $s0,$t1 # rk[8]=rk[0]^...
957 xr $s1,$s0 # rk[9]=rk[1]^rk[8]
958 xr $s2,$s1 # rk[10]=rk[2]^rk[9]
959 xr $s3,$s2 # rk[11]=rk[3]^rk[10]
960 st $s0,32($key)
961 st $s1,36($key)
962 st $s2,40($key)
963 st $s3,44($key)
964 brct $rounds,.L256_continue
965 lghi %r2,0
966 lmg %r6,%r13,48($sp)
967 br $ra
968
969.align 16
970.L256_continue:
971 lgr $t1,$s3 # temp=rk[11]
972 srlg $i1,$s3,8
973 srlg $i2,$s3,16
974 srlg $i3,$s3,24
975 nr $t1,$mask
976 nr $i1,$mask
977 nr $i2,$mask
978 la $t1,0($t1,$tbl)
979 la $i1,0($i1,$tbl)
980 la $i2,0($i2,$tbl)
981 la $i3,0($i3,$tbl)
982 llgc $t1,0($t1) # Te4[rk[11]>>0]
983 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
984 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
985 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
986 x $t1,16($key) # rk[12]=rk[4]^...
987 st $t1,48($key)
988 x $t1,20($key) # rk[13]=rk[5]^rk[12]
989 st $t1,52($key)
990 x $t1,24($key) # rk[14]=rk[6]^rk[13]
991 st $t1,56($key)
992 x $t1,28($key) # rk[15]=rk[7]^rk[14]
993 st $t1,60($key)
994
995 srlg $i1,$t1,8
996 srlg $i2,$t1,16
997 srlg $i3,$t1,24
998 nr $t1,$mask
999 nr $i1,$mask
1000 nr $i2,$mask
1001
1002 la $key,32($key) # key+=8
1003 la $t3,4($t3) # i++
1004 j .L256_loop
1005
1006.Lminus1:
1007 lghi %r2,-1
1008 br $ra
1009.size AES_set_encrypt_key,.-AES_set_encrypt_key
1010
1011# void AES_set_decrypt_key(const unsigned char *in, int bits,
1012# AES_KEY *key) {
1013.globl AES_set_decrypt_key
1014.type AES_set_decrypt_key,\@function
1015.align 16
1016AES_set_decrypt_key:
1017 stg $key,32($sp) # I rely on AES_set_encrypt_key to
1018 stg $ra,112($sp) # save non-volatile registers!
1019 bras $ra,AES_set_encrypt_key
1020 lg $key,32($sp)
1021 lg $ra,112($sp)
1022 ltgr %r2,%r2
1023 bnzr $ra
1024___
1025$code.=<<___ if (!$softonly);
1026 l $t0,240($key)
1027 lhi $t1,16
1028 cr $t0,$t1
1029 jl .Lgo
1030 oill $t0,0x80 # set "decrypt" bit
1031 st $t0,240($key)
1032 br $ra
1033
1034.align 16
1035.Ldkey_internal:
1036 stg $key,32($sp)
1037 stg $ra,40($sp)
1038 bras $ra,.Lekey_internal
1039 lg $key,32($sp)
1040 lg $ra,40($sp)
1041___
1042$code.=<<___;
1043
1044.Lgo: llgf $rounds,240($key)
1045 la $i1,0($key)
1046 sllg $i2,$rounds,4
1047 la $i2,0($i2,$key)
1048 srl $rounds,1
1049 lghi $t1,-16
1050
1051.align 16
1052.Linv: lmg $s0,$s1,0($i1)
1053 lmg $s2,$s3,0($i2)
1054 stmg $s0,$s1,0($i2)
1055 stmg $s2,$s3,0($i1)
1056 la $i1,16($i1)
1057 la $i2,0($t1,$i2)
1058 brct $rounds,.Linv
1059___
1060$mask80=$i1;
1061$mask1b=$i2;
1062$maskfe=$i3;
1063$code.=<<___;
1064 llgf $rounds,240($key)
1065 aghi $rounds,-1
1066 sll $rounds,2 # (rounds-1)*4
1067 llilh $mask80,0x8080
1068 llilh $mask1b,0x1b1b
1069 llilh $maskfe,0xfefe
1070 oill $mask80,0x8080
1071 oill $mask1b,0x1b1b
1072 oill $maskfe,0xfefe
1073
1074.align 16
1075.Lmix: l $s0,16($key) # tp1
1076 lr $s1,$s0
1077 ngr $s1,$mask80
1078 srlg $t1,$s1,7
1079 slr $s1,$t1
1080 nr $s1,$mask1b
1081 sllg $t1,$s0,1
1082 nr $t1,$maskfe
1083 xr $s1,$t1 # tp2
1084
1085 lr $s2,$s1
1086 ngr $s2,$mask80
1087 srlg $t1,$s2,7
1088 slr $s2,$t1
1089 nr $s2,$mask1b
1090 sllg $t1,$s1,1
1091 nr $t1,$maskfe
1092 xr $s2,$t1 # tp4
1093
1094 lr $s3,$s2
1095 ngr $s3,$mask80
1096 srlg $t1,$s3,7
1097 slr $s3,$t1
1098 nr $s3,$mask1b
1099 sllg $t1,$s2,1
1100 nr $t1,$maskfe
1101 xr $s3,$t1 # tp8
1102
1103 xr $s1,$s0 # tp2^tp1
1104 xr $s2,$s0 # tp4^tp1
1105 rll $s0,$s0,24 # = ROTATE(tp1,8)
1106 xr $s2,$s3 # ^=tp8
1107 xr $s0,$s1 # ^=tp2^tp1
1108 xr $s1,$s3 # tp2^tp1^tp8
1109 xr $s0,$s2 # ^=tp4^tp1^tp8
1110 rll $s1,$s1,8
1111 rll $s2,$s2,16
1112 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
1113 rll $s3,$s3,24
1114 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
1115 xr $s0,$s3 # ^= ROTATE(tp8,8)
1116
1117 st $s0,16($key)
1118 la $key,4($key)
1119 brct $rounds,.Lmix
1120
1121 lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key!
1122 lghi %r2,0
1123 br $ra
1124.size AES_set_decrypt_key,.-AES_set_decrypt_key
1125___
1126
1127#void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1128# size_t length, const AES_KEY *key,
1129# unsigned char *ivec, const int enc)
1130{
1131my $inp="%r2";
1132my $out="%r4"; # length and out are swapped
1133my $len="%r3";
1134my $key="%r5";
1135my $ivp="%r6";
1136
1137$code.=<<___;
1138.globl AES_cbc_encrypt
1139.type AES_cbc_encrypt,\@function
1140.align 16
1141AES_cbc_encrypt:
1142 xgr %r3,%r4 # flip %r3 and %r4, out and len
1143 xgr %r4,%r3
1144 xgr %r3,%r4
1145___
1146$code.=<<___ if (!$softonly);
1147 lhi %r0,16
1148 cl %r0,240($key)
1149 jh .Lcbc_software
1150
1151 lg %r0,0($ivp) # copy ivec
1152 lg %r1,8($ivp)
1153 stmg %r0,%r1,16($sp)
1154 lmg %r0,%r1,0($key) # copy key, cover 256 bit
1155 stmg %r0,%r1,32($sp)
1156 lmg %r0,%r1,16($key)
1157 stmg %r0,%r1,48($sp)
1158 l %r0,240($key) # load kmc code
1159 lghi $key,15 # res=len%16, len-=res;
1160 ngr $key,$len
1161 slgr $len,$key
1162 la %r1,16($sp) # parameter block - ivec || key
1163 jz .Lkmc_truncated
1164 .long 0xb92f0042 # kmc %r4,%r2
1165 brc 1,.-4 # pay attention to "partial completion"
1166 ltr $key,$key
1167 jnz .Lkmc_truncated
1168.Lkmc_done:
1169 lmg %r0,%r1,16($sp) # copy ivec to caller
1170 stg %r0,0($ivp)
1171 stg %r1,8($ivp)
1172 br $ra
1173.align 16
1174.Lkmc_truncated:
1175 ahi $key,-1 # it's the way it's encoded in mvc
1176 tmll %r0,0x80
1177 jnz .Lkmc_truncated_dec
1178 lghi %r1,0
1179 stg %r1,128($sp)
1180 stg %r1,136($sp)
1181 bras %r1,1f
1182 mvc 128(1,$sp),0($inp)
11831: ex $key,0(%r1)
1184 la %r1,16($sp) # restore parameter block
1185 la $inp,128($sp)
1186 lghi $len,16
1187 .long 0xb92f0042 # kmc %r4,%r2
1188 j .Lkmc_done
1189.align 16
1190.Lkmc_truncated_dec:
1191 stg $out,64($sp)
1192 la $out,128($sp)
1193 lghi $len,16
1194 .long 0xb92f0042 # kmc %r4,%r2
1195 lg $out,64($sp)
1196 bras %r1,2f
1197 mvc 0(1,$out),128($sp)
11982: ex $key,0(%r1)
1199 j .Lkmc_done
1200.align 16
1201.Lcbc_software:
1202___
1203$code.=<<___;
1204 stmg $key,$ra,40($sp)
1205 lhi %r0,0
1206 cl %r0,164($sp)
1207 je .Lcbc_decrypt
1208
1209 larl $tbl,AES_Te
1210
1211 llgf $s0,0($ivp)
1212 llgf $s1,4($ivp)
1213 llgf $s2,8($ivp)
1214 llgf $s3,12($ivp)
1215
1216 lghi $t0,16
1217 slgr $len,$t0
1218 brc 4,.Lcbc_enc_tail # if borrow
1219.Lcbc_enc_loop:
1220 stmg $inp,$out,16($sp)
1221 x $s0,0($inp)
1222 x $s1,4($inp)
1223 x $s2,8($inp)
1224 x $s3,12($inp)
1225 lgr %r4,$key
1226
1227 bras $ra,_s390x_AES_encrypt
1228
1229 lmg $inp,$key,16($sp)
1230 st $s0,0($out)
1231 st $s1,4($out)
1232 st $s2,8($out)
1233 st $s3,12($out)
1234
1235 la $inp,16($inp)
1236 la $out,16($out)
1237 lghi $t0,16
1238 ltgr $len,$len
1239 jz .Lcbc_enc_done
1240 slgr $len,$t0
1241 brc 4,.Lcbc_enc_tail # if borrow
1242 j .Lcbc_enc_loop
1243.align 16
1244.Lcbc_enc_done:
1245 lg $ivp,48($sp)
1246 st $s0,0($ivp)
1247 st $s1,4($ivp)
1248 st $s2,8($ivp)
1249 st $s3,12($ivp)
1250
1251 lmg %r7,$ra,56($sp)
1252 br $ra
1253
1254.align 16
1255.Lcbc_enc_tail:
1256 aghi $len,15
1257 lghi $t0,0
1258 stg $t0,128($sp)
1259 stg $t0,136($sp)
1260 bras $t1,3f
1261 mvc 128(1,$sp),0($inp)
12623: ex $len,0($t1)
1263 lghi $len,0
1264 la $inp,128($sp)
1265 j .Lcbc_enc_loop
1266
1267.align 16
1268.Lcbc_decrypt:
1269 larl $tbl,AES_Td
1270
1271 lg $t0,0($ivp)
1272 lg $t1,8($ivp)
1273 stmg $t0,$t1,128($sp)
1274
1275.Lcbc_dec_loop:
1276 stmg $inp,$out,16($sp)
1277 llgf $s0,0($inp)
1278 llgf $s1,4($inp)
1279 llgf $s2,8($inp)
1280 llgf $s3,12($inp)
1281 lgr %r4,$key
1282
1283 bras $ra,_s390x_AES_decrypt
1284
1285 lmg $inp,$key,16($sp)
1286 sllg $s0,$s0,32
1287 sllg $s2,$s2,32
1288 lr $s0,$s1
1289 lr $s2,$s3
1290
1291 lg $t0,0($inp)
1292 lg $t1,8($inp)
1293 xg $s0,128($sp)
1294 xg $s2,136($sp)
1295 lghi $s1,16
1296 slgr $len,$s1
1297 brc 4,.Lcbc_dec_tail # if borrow
1298 brc 2,.Lcbc_dec_done # if zero
1299 stg $s0,0($out)
1300 stg $s2,8($out)
1301 stmg $t0,$t1,128($sp)
1302
1303 la $inp,16($inp)
1304 la $out,16($out)
1305 j .Lcbc_dec_loop
1306
1307.Lcbc_dec_done:
1308 stg $s0,0($out)
1309 stg $s2,8($out)
1310.Lcbc_dec_exit:
1311 lmg $ivp,$ra,48($sp)
1312 stmg $t0,$t1,0($ivp)
1313
1314 br $ra
1315
1316.align 16
1317.Lcbc_dec_tail:
1318 aghi $len,15
1319 stg $s0,128($sp)
1320 stg $s2,136($sp)
1321 bras $s1,4f
1322 mvc 0(1,$out),128($sp)
13234: ex $len,0($s1)
1324 j .Lcbc_dec_exit
1325.size AES_cbc_encrypt,.-AES_cbc_encrypt
1326___
1327}
1328$code.=<<___;
1329.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
1330___
1331
1332$code =~ s/\`([^\`]*)\`/eval $1/gem;
1333print $code;